Skip to content

Commit 529d16a

Browse files
committed
production code finalised
0 parents  commit 529d16a

15 files changed

+2062
-0
lines changed

__init__.py

Whitespace-only changes.

scripts/__init__.py

Whitespace-only changes.

scripts/export/__init__.py

Whitespace-only changes.

scripts/export/fasta_gen_handler.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#!/usr/bin/python
2+
'''
3+
Created on 30 Mar 2016
4+
5+
@author: ikalvari
6+
7+
Description: Calls fasta_generator to generate fasta files for all Rfam
8+
families in rfam_live
9+
10+
Comments: It is a prerequisite that the sequence file is indexed using
11+
esl-sfetch --index option
12+
'''
13+
14+
# ---------------------------------IMPORTS-------------------------------------
15+
16+
import os
17+
import sys
18+
import subprocess
19+
from utils import RfamDB
20+
21+
# -----------------------------------------------------------------------------
22+
23+
LSF_GROUP = "/rfam_fa"
24+
25+
# -----------------------------------------------------------------------------
26+
27+
28+
def fasta_gen_handler(seq_file, out_dir):
29+
'''
30+
This purpose of this script is to handle the fasta generation process,
31+
generate individual shell scripts for each available family and submit
32+
them to the cluster.
33+
34+
seq_file: Path to the input sequence file (e.g. rfamseq11.fa)
35+
out_dir: The output directory where the fasta files will be generated.
36+
37+
'''
38+
39+
# fetch family accessions
40+
cnx = RfamDB.connect()
41+
42+
cursor = cnx.cursor(buffered=True)
43+
44+
query = ("SELECT rfam_acc FROM family")
45+
46+
cursor.execute(query)
47+
48+
families = cursor.fetchall()
49+
50+
cursor.close()
51+
RfamDB.disconnect(cnx)
52+
53+
# create scripts dir within output directory
54+
if (not os.path.exists(os.path.join(out_dir, "scripts"))):
55+
os.mkdir(os.path.join(out_dir, "scripts"))
56+
57+
for fam in families:
58+
59+
# 1. Generate script file
60+
sh_path = shell_script_generator(
61+
seq_file, str(fam[0]), out_dir, os.path.join(out_dir, "scripts"))
62+
63+
# 2. submit job under group
64+
cmd = "bsub < %s" % (sh_path)
65+
subprocess.call(cmd, shell=True)
66+
67+
# -----------------------------------------------------------------------------
68+
69+
70+
def shell_script_generator(seq_file, rfam_acc, fa_outdir, out_dir=None):
71+
'''
72+
Generates family specific shell scripts to split fasta generation into
73+
individual jobs.
74+
75+
seq_file: The path to sequence file (e.g. )
76+
'''
77+
78+
# If no specific directory is provided for the shell scripts, generate them
79+
# in the fa output directory
80+
81+
if out_dir is None:
82+
file_path = os.path.join(fa_outdir, rfam_acc + '.sh')
83+
else:
84+
file_path = os.path.join(out_dir, rfam_acc + '.sh')
85+
86+
fp = open(file_path, 'w')
87+
88+
fp.write("#!/bin/csh\n")
89+
fp.write("#BSUB -M 8000\n")
90+
fp.write("#BSUB -R \"rusage[mem=8000,tmp=1000]\"\n")
91+
fp.write("#BSUB -o \"/tmp/%J.out\"\n")
92+
fp.write("#BSUB -e \"/tmp/%J.err\"\n")
93+
94+
fp.write(
95+
"#BSUB -f \"/nfs/research2/nobackup/rfamp/fa_gen_err/%s.out < \
96+
/tmp/%sJ.out\"\n" % (rfam_acc, chr(37)))
97+
98+
fp.write(
99+
"#BSUB -f \"/nfs/research2/nobackup/rfamp/fa_gen_err/%s.err < \
100+
/tmp/%sJ.err\"\n" % (rfam_acc, chr(37)))
101+
102+
fp.write("#BSUB -Ep \"rm /tmp/$LSB_JOBID.*\"\n")
103+
fp.write("#BSUB -g %s \n\n" % (LSF_GROUP))
104+
fp.write("/nfs/research2/nobackup/rfamp/code/fasta_generator.py %s %s %s \n" %
105+
(seq_file, rfam_acc, fa_outdir))
106+
107+
fp.close()
108+
109+
return file_path
110+
111+
# -----------------------------------------------------------------------------
112+
113+
114+
def usage():
115+
'''
116+
Displays information on how to run fasta_gen_handler
117+
'''
118+
119+
print "\nUsage:\n------"
120+
121+
print "\npython fasta_gen_handler.py seq_file out_dir"
122+
123+
print "\nseq_file: Path to sequence for sequence export (e.g. rfamseq11.fa)"
124+
print "out_dir: The path to the output directory"
125+
print "\n-h option to display usage\n"
126+
127+
# -----------------------------------------------------------------------------
128+
129+
if __name__ == '__main__':
130+
131+
# minor input checks
132+
if (sys.argv[1] == "-h"):
133+
usage()
134+
sys.exit()
135+
136+
elif(len(sys.argv) == 3):
137+
seq_file = sys.argv[1]
138+
out_dir = sys.argv[2]
139+
140+
if (os.path.isfile(seq_file) and os.path.isdir(out_dir)):
141+
fasta_gen_handler(seq_file, out_dir)
142+
143+
else:
144+
print "\nIncorrect Input."
145+
usage()
146+
147+
else:
148+
usage()

0 commit comments

Comments
 (0)