Skip to content

Commit 90ab35f

Browse files
author
Mark Fiers
committed
bugfixes
1 parent b89b94f commit 90ab35f

6 files changed

+351
-196
lines changed

hagfishUtils.py

+52-13
Original file line numberDiff line numberDiff line change
@@ -59,34 +59,23 @@ def getHagfishOptparser():
5959
help='Show debug information')
6060
return parser
6161

62-
def addPlotParameters(parser):
62+
63+
def addBasePlotParameters(parser):
6364
parser.set_defaults(ntPerBand=-1)
6465

6566
parser.add_option('-n', dest='ntPerBand',
6667
help='no nucleotides per band')
6768

68-
parser.add_option('-i', dest='inputFile',
69-
help='input file with the coverage data (npz, if not specified, '+
70-
'the input file name will be inferred from the sequence Id')
71-
7269
parser.set_defaults(imageWidth=1000)
7370
parser.set_defaults(bandHeight=200)
7471
parser.add_option('-W', dest='imageWidth', type='int', help='imageWidth (in px)')
7572
parser.add_option('-H', dest='bandHeight', type='int', help='bandHeight (in px)')
7673

77-
parser.set_defaults(yfrac=0.98)
78-
parser.add_option('-Y', dest='yfrac', type='float', help='percentage of the plotted'
79-
'fraction that must fall inside the Y boundaries of the graph - use'
80-
'this to scale the y axis')
81-
parser.add_option('--ymax', dest='ymax',
82-
help='Alternatively, set a max value for the y axis')
83-
8474
parser.add_option('-s', dest='start',
8575
help='Start position (nt) of the plot')
8676
parser.add_option('-e', dest='stop',
8777
help='Stop position (nt) of the plot')
8878

89-
9079
parser.add_option('-o', dest='outfile',
9180
help='Output file name')
9281

@@ -98,9 +87,24 @@ def addPlotParameters(parser):
9887
parser.add_option('--dpi', dest='dpi', type='int',
9988
help='dpi of the image, pixel calculations are based on dpi 100, setting dpi to 200 will double the x/y pixel size of your image)')
10089

90+
91+
def addPlotParameters(parser):
92+
addBasePlotParameters(parser)
93+
parser.add_option('-i', dest='inputFile',
94+
help='input file with the coverage data (npz, if not specified, '+
95+
'the input file name will be inferred from the sequence Id')
96+
97+
parser.set_defaults(yfrac=0.98)
98+
parser.add_option('-Y', dest='yfrac', type='float', help='percentage of the plotted'
99+
'fraction that must fall inside the Y boundaries of the graph - use'
100+
'this to scale the y axis')
101+
parser.add_option('--ymax', dest='ymax',
102+
help='Alternatively, set a max value for the y axis')
101103
parser.add_option('-Q', dest='quick', action='store_true',
102104
help='Create a "light" version of this graph (if implemented)')
103105

106+
def addBinPlotParameters(parser):
107+
addBasePlotParameters(parser)
104108

105109
class hagfishData:
106110

@@ -167,6 +171,41 @@ def __init__(self, options, args, seqId = None, inputFile=None):
167171
#self.z = np.zeros_like(self.x)
168172
self.vectors.append('x')
169173

174+
175+
class hagfishBinData:
176+
177+
def __init__(self, options, args, seqId = None, inputFile=None):
178+
179+
if seqId:
180+
self.seqId = seqId
181+
else:
182+
self.seqId = args[0]
183+
184+
self.options = options
185+
186+
self.l = getLogger('data', options.verbose)
187+
self.l.info("Loading sequence: %s" % self.seqId)
188+
189+
if not inputFile:
190+
l.critical("need to provide an input file")
191+
sys.exit()
192+
self.inputFile = inputFile
193+
194+
self.l.info('loading %s' % self.inputFile)
195+
196+
self.data = np.load(self.inputFile)
197+
self.bins = self.data['bins']
198+
self.binSize = self.data['binSize']
199+
200+
self.seqLen = len(self.data['bins']) * self.binSize
201+
202+
203+
self.x = np.arange(0, self.seqLen, dtype="int")
204+
self.l.info("discovered bin sequence of %d nt" % self.seqLen)
205+
206+
#self.z = np.zeros_like(self.x)
207+
#self.vectors.append('x')
208+
170209
class hagfishPlot:
171210

172211
def __init__(self, options, data, title=None, data2=None, ymax=None):

hagfish_circos

+16
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ if __name__ == '__main__':
9595
FL = open(os.path.join('circos', '%s.long.txt' % (outSeqId)), 'w')
9696
FC = open(os.path.join('circos', '%s.score.txt' % (outSeqId)), 'w')
9797

98+
FGE = open(os.path.join('circos', '%s.ok_ends.txt' % (outSeqId)), 'w')
99+
FSE = open(os.path.join('circos', '%s.short_ends.txt' % (outSeqId)), 'w')
100+
FLE = open(os.path.join('circos', '%s.long_ends.txt' % (outSeqId)), 'w')
101+
98102
coverageFile = os.path.join('combined', '%s.combined.coverage.npz' % seqId)
99103

100104
if not os.path.exists(coverageFile):
@@ -106,6 +110,9 @@ if __name__ == '__main__':
106110
r_ok = data['r_ok']
107111
r_low = data['r_low']
108112
r_high = data['r_high']
113+
r_ok_ends = data['r_ok_ends']
114+
r_low_ends = data['r_low_ends']
115+
r_high_ends = data['r_high_ends']
109116

110117
l.info("read %d datapoints" % len(r_ok))
111118
median = np.median(r_ok)
@@ -119,10 +126,19 @@ if __name__ == '__main__':
119126
generate_histogram(FG, outSeqId, 'ok', r_ok, binSize)
120127
generate_histogram(FS, outSeqId, 'short', r_low, binSize)
121128
generate_histogram(FL, outSeqId, 'long', r_high, binSize)
129+
130+
generate_histogram(FGE, outSeqId, 'ok_ends', r_ok_ends, binSize)
131+
generate_histogram(FSE, outSeqId, 'short_ends', r_low_ends, binSize)
132+
generate_histogram(FLE, outSeqId, 'long_ends', r_high_ends, binSize)
133+
122134
generate_histogram(FC, outSeqId, 'score', score, binSize)
123135

124136
FG.close()
125137
FS.close()
126138
FL.close()
127139
FC.close()
128140

141+
FGE.close()
142+
FSE.close()
143+
FLE.close()
144+

hagfish_coverage_combine

+34-32
Original file line numberDiff line numberDiff line change
@@ -71,44 +71,46 @@ if __name__ == '__main__':
7171
for inputDir in os.listdir('coverage'):
7272
inputFile = os.path.join('coverage', inputDir, seqId + '.coverage.npz')
7373
if not os.path.exists(inputFile):
74-
l.debug("skiping %s (%d nt)" % (seqId, seqInfo[seqId]['length']))
74+
l.debug("skipping %s (%d nt)" % (seqId, seqInfo[seqId]['length']))
7575
break
7676
bamBase = inputDir
7777

7878
l.info("processing %s (%s)" % (bamBase,inputFile))
7979

8080
#read the coverage plots
81-
data = np.load(inputFile)
82-
if r_ok == None:
83-
r_ok = data['r_ok']
84-
r_high = data['r_high']
85-
r_low = data['r_low']
86-
87-
r_ok_ends = data['r_ok_ends']
88-
r_high_ends = data['r_high_ends']
89-
r_low_ends = data['r_low_ends']
90-
91-
r_low_binned = data['r_low_binned']
92-
r_high_binned = data['r_high_binned']
93-
94-
bins_low = data['bins_low']
95-
bins_high = data['bins_high']
96-
else:
97-
if (not list(bins_low) == list(data['bins_low'])) or \
98-
(not list(bins_high) == list(data['bins_high'])):
99-
l.critical("different bins!")
100-
sys.exit()
101-
102-
r_ok += data['r_ok']
103-
r_high += data['r_high']
104-
r_low += data['r_low']
105-
106-
r_ok_ends += data['r_ok_ends']
107-
r_high_ends += data['r_high_ends']
108-
r_low_ends += data['r_low_ends']
109-
110-
r_low_binned += data['r_low_binned']
111-
r_high_binned += data['r_high_binned']
81+
with open(inputFile) as F:
82+
data = np.load(F)
83+
84+
if r_ok == None:
85+
r_ok = data['r_ok']
86+
r_high = data['r_high']
87+
r_low = data['r_low']
88+
89+
r_ok_ends = data['r_ok_ends']
90+
r_high_ends = data['r_high_ends']
91+
r_low_ends = data['r_low_ends']
92+
93+
r_low_binned = data['r_low_binned']
94+
r_high_binned = data['r_high_binned']
95+
96+
bins_low = data['bins_low']
97+
bins_high = data['bins_high']
98+
else:
99+
if (not list(bins_low) == list(data['bins_low'])) or \
100+
(not list(bins_high) == list(data['bins_high'])):
101+
l.critical("different bins!")
102+
sys.exit()
103+
104+
r_ok += data['r_ok']
105+
r_high += data['r_high']
106+
r_low += data['r_low']
107+
108+
r_ok_ends += data['r_ok_ends']
109+
r_high_ends += data['r_high_ends']
110+
r_low_ends += data['r_low_ends']
111+
112+
r_low_binned += data['r_low_binned']
113+
r_high_binned += data['r_high_binned']
112114

113115
l.info("Max low: %d / Max ok: %d / Max high: %d" % (
114116
np.max(r_low), np.max(r_ok), np.max(r_high)))

0 commit comments

Comments
 (0)