Skip to content

Commit 57ccf6c

Browse files
author
Mark Fiers
committed
minor fixes
1 parent daebce3 commit 57ccf6c

File tree

3 files changed

+79
-52
lines changed

3 files changed

+79
-52
lines changed

hagfish_file_util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def np_savez(file_base, **kwargs):
2020
for k in kwargs:
2121
file_name = '%s.%s.gz' % (file_base, k)
2222
F = gzip.open(file_name, 'w')
23-
cPickle.dump(kwargs[k], F)
23+
cPickle.dump(kwargs[k], F,0)
2424
F.close()
2525

2626
def np_exists(file_base, what):

hagfish_report

+34-28
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,13 @@ if __name__ == '__main__':
9393
l.info("quite a few sequences (%d) :( might take a while!" % len(seqInfo))
9494

9595
cum_r_ok = np.array([])
96-
cum_r_high = np.array([])
97-
cum_r_low = np.array([])
98-
9996
cum_r_ok_ends = np.array([])
100-
cum_r_high_ends = np.array([])
101-
cum_r_low_ends = np.array([])
97+
98+
if not options.onlyok:
99+
cum_r_high = np.array([])
100+
cum_r_low = np.array([])
101+
cum_r_high_ends = np.array([])
102+
cum_r_low_ends = np.array([])
102103

103104
l.info("Start parsing %d sequences" % len(seqs_to_parse))
104105

@@ -129,12 +130,13 @@ if __name__ == '__main__':
129130

130131
try:
131132
r_ok = np_load(file_base, 'r_ok')
132-
r_high = np_load(file_base, 'r_high')
133-
r_low = np_load(file_base, 'r_low')
134-
135133
r_ok_ends = np_load(file_base, 'r_ok_ends')
136-
r_high_ends = np_load(file_base, 'r_high_ends')
137-
r_low_ends = np_load(file_base, 'r_low_ends')
134+
if True: #not options.onlyok:
135+
r_high = np_load(file_base, 'r_high')
136+
r_low = np_load(file_base, 'r_low')
137+
r_high_ends = np_load(file_base, 'r_high_ends')
138+
r_low_ends = np_load(file_base, 'r_low_ends')
139+
138140
if GAP:
139141
nons += len(np.flatnonzero(np_load(gap_base, 'nns')))
140142

@@ -148,11 +150,12 @@ if __name__ == '__main__':
148150
#add to the cumulative plot
149151
if seqLen >= options.cum_seqlen_cutoff:
150152
cum_r_ok = np.concatenate((cum_r_ok, r_ok))
151-
cum_r_high = np.concatenate((cum_r_high, r_high))
152-
cum_r_low = np.concatenate((cum_r_low, r_low))
153153
cum_r_ok_ends = np.concatenate((cum_r_ok_ends, r_ok_ends))
154-
cum_r_high_ends = np.concatenate((cum_r_high_ends, r_high_ends))
155-
cum_r_low_ends = np.concatenate((cum_r_low_ends, r_low_ends))
154+
if True: #not options.onlyok:
155+
cum_r_high = np.concatenate((cum_r_high, r_high))
156+
cum_r_low = np.concatenate((cum_r_low, r_low))
157+
cum_r_high_ends = np.concatenate((cum_r_high_ends, r_high_ends))
158+
cum_r_low_ends = np.concatenate((cum_r_low_ends, r_low_ends))
156159

157160
seqLen = totalSeqLen
158161

@@ -176,7 +179,11 @@ if __name__ == '__main__':
176179
l.debug("Calculated score: min %s, max %s" % (np.min(score), np.max(score)))
177180

178181
#determine what the bins are
179-
maxx = max(max(cum_r_ok), max(cum_r_high), max(cum_r_low))
182+
if False: # True: #options.onlyok:
183+
maxx = max(max(cum_r_ok))
184+
else:
185+
maxx = max(max(cum_r_ok), max(cum_r_high), max(cum_r_low))
186+
180187
maxx = 1500 * ( ( maxx / 1000 ) + 1 )
181188

182189
#bins = np.array([0,1,2,3,4] + range(5, int(maxx)))
@@ -189,24 +196,23 @@ if __name__ == '__main__':
189196

190197
l.debug("Bins %s" % bins)
191198
ok_hist, oe = np.histogram(cum_r_ok, bins = bins)
192-
high_hist, he = np.histogram(cum_r_high, bins = bins)
193-
low_hist, le = np.histogram(cum_r_low, bins = bins)
194-
195199
ok_hist_ends, _oee = np.histogram(cum_r_ok_ends, bins = bins)
196-
high_hist_ends, _hee = np.histogram(cum_r_high_ends, bins = bins)
197-
low_hist_ends, _lee = np.histogram(cum_r_low_ends, bins = bins)
198-
199-
hist_edges = oe
200-
201200
rep_ok_hist_ends, roee = np.histogram(cum_r_ok_ends, bins = rBins)
202-
rep_high_hist_ends, rhee = np.histogram(cum_r_high_ends, bins = rBins)
203-
rep_low_hist_ends, rlee = np.histogram(cum_r_low_ends, bins = rBins)
204-
205201
rep_ok_hist, roe = np.histogram(cum_r_ok, bins = rBins)
206-
rep_high_hist, rhe = np.histogram(cum_r_high, bins = rBins)
207-
rep_low_hist, rle = np.histogram(cum_r_low, bins = rBins)
208202

209203

204+
if True: # not options.onlyok:
205+
high_hist, he = np.histogram(cum_r_high, bins = bins)
206+
low_hist, le = np.histogram(cum_r_low, bins = bins)
207+
high_hist_ends, _hee = np.histogram(cum_r_high_ends, bins = bins)
208+
low_hist_ends, _lee = np.histogram(cum_r_low_ends, bins = bins)
209+
rep_high_hist_ends, rhee = np.histogram(cum_r_high_ends, bins = rBins)
210+
rep_low_hist_ends, rlee = np.histogram(cum_r_low_ends, bins = rBins)
211+
rep_high_hist, rhe = np.histogram(cum_r_high, bins = rBins)
212+
rep_low_hist, rle = np.histogram(cum_r_low, bins = rBins)
213+
214+
215+
hist_edges = oe
210216
rep_hist_edges = roee
211217

212218
#print coverage distribution plot

test/run_test.sh

+44-23
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,22 @@
33
mkdir -p demo_run 2>/dev/null
44
cd demo_run
55

6-
if [[ ! -f demo_1.fq ]]
6+
if [[ ! -f set1_1.fq ]]
77
then
88
echo 'unpacking demo fq'
9-
cp ../demo_1.fq.bz2 .
10-
cp ../demo_2.fq.bz2 .
11-
bunzip2 demo_1.fq.bz2
12-
bunzip2 demo_2.fq.bz2
9+
cp ../set1_1.fq.bz2 .
10+
cp ../set1_2.fq.bz2 .
11+
bunzip2 set1_1.fq.bz2
12+
bunzip2 set1_2.fq.bz2
1313
fi
1414

15-
if [[ ! -f alter_1.fq ]]
15+
if [[ ! -f set2_1.fq ]]
1616
then
1717
echo 'unpacking alternative fq'
18-
cp ../alter_1.fq.bz2 .
19-
cp ../alter_2.fq.bz2 .
20-
bunzip2 alter_1.fq.bz2
21-
bunzip2 alter_2.fq.bz2
18+
cp ../set2_1.fq.bz2 .
19+
cp ../set2_2.fq.bz2 .
20+
bunzip2 set2_1.fq.bz2
21+
bunzip2 set2_2.fq.bz2
2222
fi
2323

2424
if [[ ! -f db.1.ebwt ]]
@@ -27,30 +27,51 @@ then
2727
bowtie-build ../test.fasta db
2828
fi
2929

30-
if [[ ! -f "demo.sam" ]]
30+
if [[ ! -f "set1.sam" ]]
3131
then
3232
echo 'running bowtie'
3333
bowtie -I 1 -X 100000 -k 4 -S -p 4 --strata --best \
34-
db -1 demo_1.fq -2 demo_2.fq demo.sam
34+
db -1 set1_1.fq -2 set1_2.fq set1.sam
3535
fi
3636

37-
if [[ ! -f "alter.sam" ]]
37+
if [[ ! -f "set2.sam" ]]
3838
then
3939
echo 'running bowtie'
4040
bowtie -I 1 -X 100000 -k 4 -S -p 4 --strata --best \
41-
db -1 alter_1.fq -2 alter_2.fq alter.sam
41+
db -1 set2_1.fq -2 set2_2.fq set2.sam
4242
fi
4343

44-
echo 'running hagfish'
45-
hagfish_extract -S -v --low 210 --high 365 demo.sam
46-
hagfish_extract -S -v --low 210 --high 365 alter.sam
47-
hagfish_coverage_combine -v
48-
hagfish_gapfinder -v -f ../test.fasta
44+
echo 'running hagfish - if necessary'
4945

50-
echo 'and plot'
51-
hagfish_cplot2 --ymax 400 -n 6e4 contig
52-
hagfish_blockplot -n 6e4 contig
46+
[[ -d "readpairs/set1" ]] || \
47+
hagfish_extract -S -vv --low 210 --high 365 set1.sam
48+
[[ -d "readpairs/set2" ]] || \
49+
hagfish_extract -S -vv --low 210 --high 365 set2.sam
50+
[[ -d "combined" ]] || hagfish_coverage_combine -v
51+
[[ -d "gaps" ]] || hagfish_gapfinder -v -f ../test.fasta
5352

54-
hagfish_compplot2 -n 6e4 -l demo -L alter
53+
echo 'and plot!'
54+
55+
c1='-n 6e4 --dpi 400'
56+
c2='-n 15e3 -e 15e3 --dpi 400'
57+
cp='--ymax 400 -S -H 400'
58+
59+
hagfish_cplot2 $c1 $cp -l set1 contig
60+
hagfish_cplot2 $c1 $cp -l set2 contig
61+
62+
hagfish_cplot2 $c2 $cp -l set1 contig
63+
hagfish_cplot2 $c2 $cp -l set2 contig
64+
65+
hagfish_blockplot $c1 -l set1 contig
66+
hagfish_blockplot $c1 -l set2 contig
67+
68+
hagfish_blockplot $c2 -l set1 contig
69+
hagfish_blockplot $c2 -l set2 contig
70+
71+
hagfish_compplot2 $c1 $cp -l set1 -L set2 contig
72+
hagfish_compplot2 $c2 $cp -l set1 -L set2 contig
73+
74+
hagfish_blockcompplot2 $c1 -l set1 -L set2 contig
75+
hagfish_blockcompplot2 $c2 -l set1 -L set2 contig
5576

5677

0 commit comments

Comments
 (0)