Skip to content

Commit 995aec4

Browse files
author
Mark Fiers
committed
generate wiggle formatted plots
1 parent a0abfc8 commit 995aec4

File tree

1 file changed

+153
-0
lines changed

1 file changed

+153
-0
lines changed

hagfish_wiggle

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/env python
2+
3+
import os
4+
import sys
5+
import pickle
6+
7+
import numpy as np
8+
9+
import logging
10+
import optparse
11+
12+
from hagfish_file_util import *
13+
14+
## Arguments: General options
15+
parser = optparse.OptionParser()
16+
parser.set_defaults(binSize=500)
17+
parser.add_option('-v', dest='verbose', action="count",
18+
help='Show debug information')
19+
parser.add_option('-b', dest='binSize', type='int',
20+
help='binSize')
21+
parser.add_option('-s', dest='seqId',
22+
help='seqId to use in the output..')
23+
24+
options, args = parser.parse_args()
25+
26+
l = logging.getLogger('hagfish')
27+
handler = logging.StreamHandler()
28+
logmark = chr(27) + '[0;37;44mHAGFISH' + \
29+
chr(27) + '[0m '
30+
31+
formatter = logging.Formatter(
32+
logmark + '%(levelname)-6s %(message)s')
33+
34+
handler.setFormatter(formatter)
35+
l.addHandler(handler)
36+
37+
if options.verbose >= 2:
38+
l.setLevel(logging.DEBUG)
39+
elif options.verbose == 1:
40+
l.setLevel(logging.INFO)
41+
else:
42+
l.setLevel(logging.WARNING)
43+
44+
45+
def generate_histogram(F,
46+
cptype,
47+
category,
48+
seqId,
49+
group,
50+
data,
51+
bin):
52+
53+
if category == 'low':
54+
color = '001,086,102'
55+
elif category == 'ok':
56+
color = '000,125,028'
57+
else:
58+
color = '166,16,0'
59+
60+
F.write('track name="Hf_%s_%s_%s" description="Hagfish wiggle plot %s %s for %s" color=%s\n' %
61+
(cptype, category, seqId, cptype, category, seqId, color))
62+
F.write("fixedStep chrom=%s start=1 step=%d\n"% (
63+
seqId, bin))
64+
65+
66+
for r in range(0, len(data)+bin-1,bin):
67+
score = np.average(data[r:r+bin-1])
68+
if not np.isnan(score):
69+
F.write("%.2f\n" % score)
70+
71+
if __name__ == '__main__':
72+
73+
#read an arbitrary seqId file
74+
for f in os.listdir('seqInfo'):
75+
if '.seqinfo' in f:
76+
seqInfoFile = os.path.join('seqInfo', f)
77+
break
78+
else:
79+
l.critical("cannot find a seqInfo file")
80+
sys.exit(-1)
81+
82+
l.info("reading %s to get seqinfo" % seqInfoFile)
83+
with open(seqInfoFile) as F:
84+
seqInfo = pickle.load(F)
85+
86+
l.info("discovered %d sequences" % len(seqInfo))
87+
88+
binSize = options.binSize
89+
if len(args) > 0:
90+
seqIds = args
91+
else:
92+
seqIds = seqInfo.keys()
93+
94+
if not os.path.exists('wiggle'):
95+
os.mkdir('wiggle')
96+
97+
if options.seqId: assert(len(seqIds) == 1)
98+
99+
for seqId in seqIds:
100+
l.info("Processing %s" % seqId)
101+
102+
if options.seqId:
103+
outSeqId = options.seqId
104+
else:
105+
outSeqId = seqId
106+
107+
l.info("Outputting %s" % outSeqId)
108+
109+
FG = open(os.path.join('wiggle', '%s.icp.ok.WIG' % (outSeqId)), 'w')
110+
FS = open(os.path.join('wiggle', '%s.icp.low.WIG' % (outSeqId)), 'w')
111+
FL = open(os.path.join('wiggle', '%s.icp.high.WIG' % (outSeqId)), 'w')
112+
FC = open(os.path.join('wiggle', '%s.icp.score.WIG' % (outSeqId)), 'w')
113+
114+
FGE = open(os.path.join('wiggle', '%s.ecp.ok.WIG' % (outSeqId)), 'w')
115+
FSE = open(os.path.join('wiggle', '%s.ecp.low.WIG' % (outSeqId)), 'w')
116+
FLE = open(os.path.join('wiggle', '%s.ecp.high.WIG' % (outSeqId)), 'w')
117+
118+
file_base = os.path.join('combined', seqId)
119+
r_ok = np_load(file_base, 'r_ok')
120+
r_low = np_load(file_base, 'r_low')
121+
r_high = np_load(file_base, 'r_high')
122+
r_ok_ends = np_load(file_base, 'r_ok_ends')
123+
r_low_ends = np_load(file_base, 'r_low_ends')
124+
r_high_ends = np_load(file_base, 'r_high_ends')
125+
126+
l.info("read %d datapoints" % len(r_ok))
127+
median = np.median(r_ok)
128+
score = 1 - 2 * np.exp(-1 * (r_ok / median)) \
129+
+ np.exp(-1 * ((r_ok + r_low + r_high)/median))
130+
131+
seqLen = len(r_ok)
132+
l.info("sequence length of %s is %d" % (seqId, seqLen))
133+
134+
135+
generate_histogram(FG,'ICP', 'ok', outSeqId, 'ok', r_ok, binSize)
136+
generate_histogram(FS,'ICP', 'low', outSeqId, 'short', r_low, binSize)
137+
generate_histogram(FL,'ICP', 'high', outSeqId, 'long', r_high, binSize)
138+
139+
generate_histogram(FGE,'ECP', 'ok', outSeqId, 'ok_ends', r_ok_ends, binSize)
140+
generate_histogram(FSE,'ECP', 'low', outSeqId, 'short_ends', r_low_ends, binSize)
141+
generate_histogram(FLE,'ECP', 'high', outSeqId, 'long_ends', r_high_ends, binSize)
142+
143+
generate_histogram(FC,'ICP', 'Score', outSeqId, 'score', score, binSize)
144+
145+
FG.close()
146+
FS.close()
147+
FL.close()
148+
FC.close()
149+
150+
FGE.close()
151+
FSE.close()
152+
FLE.close()
153+

0 commit comments

Comments
 (0)