Skip to content

Commit f68df07

Browse files
author
Mark Fiers
committed
identifies NNN regions (gaps)
1 parent 1dcaa04 commit f68df07

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

hagfish_gapfinder

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env python
2+
import os
3+
import sys
4+
import math
5+
import pickle
6+
import jinja2
7+
8+
import numpy as np
9+
10+
import logging
11+
import optparse
12+
13+
from hagfish_file_util import *
14+
15+
## Arguments: General options
16+
parser = optparse.OptionParser()
17+
18+
parser.add_option('-f', dest='fasta',
19+
help='reference fasta file')
20+
parser.add_option('-v', dest='verbose', action="count",
21+
help='Show debug information')
22+
23+
options, args = parser.parse_args()
24+
25+
l = logging.getLogger('hagfish')
26+
27+
handler = logging.StreamHandler()
28+
logmark = chr(27) + '[0;37;44mHAGFISH' + \
29+
chr(27) + '[0m '
30+
31+
formatter = logging.Formatter(
32+
logmark + '%(levelname)-6s %(message)s')
33+
34+
handler.setFormatter(formatter)
35+
l.addHandler(handler)
36+
37+
if options.verbose >= 2:
38+
l.setLevel(logging.DEBUG)
39+
elif options.verbose == 1:
40+
l.setLevel(logging.INFO)
41+
else:
42+
l.setLevel(logging.WARNING)
43+
44+
if not options.fasta:
45+
l.critical("Must specify a fasta input file")
46+
sys.exit(-1)
47+
48+
if __name__ == '__main__':
49+
50+
if not os.path.exists('gaps'):
51+
os.makedirs('gaps')
52+
53+
#read an arbitrary seqId file
54+
for f in os.listdir('seqInfo'):
55+
if '.seqinfo' in f:
56+
seqInfoFile = os.path.join('seqInfo', f)
57+
break
58+
else:
59+
l.critical("cannot find a seqInfo file")
60+
sys.exit(-1)
61+
62+
l.info("reading %s for seqinfo" % seqInfoFile)
63+
with open(seqInfoFile) as F:
64+
seqInfo = pickle.load(F)
65+
66+
l.info("discovered %d sequences" % len(seqInfo))
67+
68+
seqslist = set(seqInfo.keys())
69+
70+
for seqId, seq in fastareader(options.fasta):
71+
l.info('Processing %s (%d nt)' % (seqId, len(seq)))
72+
73+
nns = np.array(np.array(list(seq)) == 'n', 'b')
74+
np_savez(os.path.join('gaps', seqId), nns = nns )
75+

0 commit comments

Comments
 (0)