minor fixes

Mark Fiers · Mark Fiers · commit 57ccf6ced946 · 2012-07-30T20:19:23.000+12:00
diff --git a/hagfish_file_util.py b/hagfish_file_util.py
@@ -20,7 +20,7 @@ def np_savez(file_base, **kwargs):
     for k in kwargs:
         file_name = '%s.%s.gz' % (file_base, k)
         F = gzip.open(file_name, 'w')
-        cPickle.dump(kwargs[k], F)
+        cPickle.dump(kwargs[k], F,0)
         F.close()
 
 def np_exists(file_base, what):
diff --git a/hagfish_report b/hagfish_report
@@ -93,12 +93,13 @@ if __name__ == '__main__':
         l.info("quite a few sequences (%d) :( might take a while!" % len(seqInfo))
 
     cum_r_ok = np.array([])
-    cum_r_high = np.array([])
-    cum_r_low = np.array([])
-
     cum_r_ok_ends = np.array([])
-    cum_r_high_ends = np.array([])
-    cum_r_low_ends = np.array([])
+
+    if not options.onlyok:
+        cum_r_high = np.array([])
+        cum_r_low = np.array([])
+        cum_r_high_ends = np.array([])
+        cum_r_low_ends = np.array([])
 
     l.info("Start parsing %d sequences" % len(seqs_to_parse))
 
@@ -129,12 +130,13 @@ if __name__ == '__main__':
             
         try:
             r_ok = np_load(file_base, 'r_ok')
-            r_high = np_load(file_base, 'r_high')
-            r_low = np_load(file_base, 'r_low')
-
             r_ok_ends = np_load(file_base, 'r_ok_ends')
-            r_high_ends = np_load(file_base, 'r_high_ends')
-            r_low_ends = np_load(file_base, 'r_low_ends')
+            if True: #not options.onlyok:
+                r_high = np_load(file_base, 'r_high')
+                r_low = np_load(file_base, 'r_low')
+                r_high_ends = np_load(file_base, 'r_high_ends')
+                r_low_ends = np_load(file_base, 'r_low_ends')
+
             if GAP:
                 nons += len(np.flatnonzero(np_load(gap_base, 'nns')))
 
@@ -148,11 +150,12 @@ if __name__ == '__main__':
         #add to the cumulative plot
         if seqLen >= options.cum_seqlen_cutoff: 
             cum_r_ok = np.concatenate((cum_r_ok, r_ok))
-            cum_r_high = np.concatenate((cum_r_high, r_high))
-            cum_r_low = np.concatenate((cum_r_low, r_low))
             cum_r_ok_ends = np.concatenate((cum_r_ok_ends, r_ok_ends))
-            cum_r_high_ends = np.concatenate((cum_r_high_ends, r_high_ends))
-            cum_r_low_ends = np.concatenate((cum_r_low_ends, r_low_ends))
+            if True: #not options.onlyok:
+                cum_r_high = np.concatenate((cum_r_high, r_high))
+                cum_r_low = np.concatenate((cum_r_low, r_low))
+                cum_r_high_ends = np.concatenate((cum_r_high_ends, r_high_ends))
+                cum_r_low_ends = np.concatenate((cum_r_low_ends, r_low_ends))
 
     seqLen = totalSeqLen
 
@@ -176,7 +179,11 @@ if __name__ == '__main__':
     l.debug("Calculated score: min %s, max %s" % (np.min(score), np.max(score)))
 
     #determine what the bins are
-    maxx = max(max(cum_r_ok), max(cum_r_high), max(cum_r_low))
+    if False: # True: #options.onlyok:
+        maxx = max(max(cum_r_ok))
+    else:
+        maxx = max(max(cum_r_ok), max(cum_r_high), max(cum_r_low))
+
     maxx = 1500 * ( ( maxx / 1000 ) + 1 )             
     
     #bins =  np.array([0,1,2,3,4] + range(5, int(maxx)))
@@ -189,24 +196,23 @@ if __name__ == '__main__':
 
     l.debug("Bins %s" % bins)
     ok_hist, oe = np.histogram(cum_r_ok, bins = bins)
-    high_hist, he = np.histogram(cum_r_high, bins = bins)
-    low_hist, le = np.histogram(cum_r_low, bins = bins)
-
     ok_hist_ends, _oee = np.histogram(cum_r_ok_ends, bins = bins)
-    high_hist_ends, _hee = np.histogram(cum_r_high_ends, bins = bins)
-    low_hist_ends, _lee = np.histogram(cum_r_low_ends, bins = bins)
-
-    hist_edges = oe
-
     rep_ok_hist_ends, roee = np.histogram(cum_r_ok_ends, bins = rBins)
-    rep_high_hist_ends, rhee = np.histogram(cum_r_high_ends, bins = rBins)
-    rep_low_hist_ends, rlee = np.histogram(cum_r_low_ends, bins = rBins)
-
     rep_ok_hist, roe = np.histogram(cum_r_ok, bins = rBins)
-    rep_high_hist, rhe = np.histogram(cum_r_high, bins = rBins)
-    rep_low_hist, rle = np.histogram(cum_r_low, bins = rBins)
 
 
+    if True: # not options.onlyok:
+        high_hist, he = np.histogram(cum_r_high, bins = bins)
+        low_hist, le = np.histogram(cum_r_low, bins = bins)
+        high_hist_ends, _hee = np.histogram(cum_r_high_ends, bins = bins)
+        low_hist_ends, _lee = np.histogram(cum_r_low_ends, bins = bins)
+        rep_high_hist_ends, rhee = np.histogram(cum_r_high_ends, bins = rBins)
+        rep_low_hist_ends, rlee = np.histogram(cum_r_low_ends, bins = rBins)
+        rep_high_hist, rhe = np.histogram(cum_r_high, bins = rBins)
+        rep_low_hist, rle = np.histogram(cum_r_low, bins = rBins)
+
+
+    hist_edges = oe
     rep_hist_edges = roee
 
     #print coverage distribution plot
diff --git a/test/run_test.sh b/test/run_test.sh
@@ -3,22 +3,22 @@
 mkdir -p demo_run 2>/dev/null
 cd demo_run
 
-if [[ ! -f demo_1.fq ]]
+if [[ ! -f set1_1.fq ]]
 then
     echo 'unpacking demo fq'
-    cp ../demo_1.fq.bz2 .
-    cp ../demo_2.fq.bz2 .
-    bunzip2 demo_1.fq.bz2
-    bunzip2 demo_2.fq.bz2
+    cp ../set1_1.fq.bz2 .
+    cp ../set1_2.fq.bz2 .
+    bunzip2 set1_1.fq.bz2
+    bunzip2 set1_2.fq.bz2
 fi
 
-if [[ ! -f alter_1.fq ]]
+if [[ ! -f set2_1.fq ]]
 then
     echo 'unpacking alternative fq'
-    cp ../alter_1.fq.bz2 .
-    cp ../alter_2.fq.bz2 .
-    bunzip2 alter_1.fq.bz2
-    bunzip2 alter_2.fq.bz2
+    cp ../set2_1.fq.bz2 .
+    cp ../set2_2.fq.bz2 .
+    bunzip2 set2_1.fq.bz2
+    bunzip2 set2_2.fq.bz2
 fi
 
 if [[ ! -f db.1.ebwt ]]
@@ -27,30 +27,51 @@ then
     bowtie-build ../test.fasta db
 fi
 
-if [[ ! -f "demo.sam" ]]
+if [[ ! -f "set1.sam" ]]
 then
     echo 'running bowtie'
     bowtie -I 1 -X 100000 -k 4 -S -p 4 --strata --best \
-        db -1 demo_1.fq -2 demo_2.fq demo.sam
+        db -1 set1_1.fq -2 set1_2.fq set1.sam
 fi
 
-if [[ ! -f "alter.sam" ]]
+if [[ ! -f "set2.sam" ]]
 then
     echo 'running bowtie'
     bowtie -I 1 -X 100000 -k 4 -S -p 4 --strata --best \
-        db -1 alter_1.fq -2 alter_2.fq alter.sam
+        db -1 set2_1.fq -2 set2_2.fq set2.sam
 fi
 
-echo 'running hagfish'
-hagfish_extract -S -v --low 210 --high 365 demo.sam 
-hagfish_extract -S -v --low 210 --high 365 alter.sam 
-hagfish_coverage_combine -v
-hagfish_gapfinder -v -f ../test.fasta 
+echo 'running hagfish - if necessary'
 
-echo 'and plot'
-hagfish_cplot2 --ymax 400 -n 6e4 contig
-hagfish_blockplot -n 6e4 contig
+[[ -d "readpairs/set1" ]] || \
+    hagfish_extract -S -vv --low 210 --high 365 set1.sam 
+[[ -d "readpairs/set2" ]] || \
+    hagfish_extract -S -vv --low 210 --high 365 set2.sam 
+[[ -d "combined" ]] || hagfish_coverage_combine -v
+[[ -d "gaps" ]] || hagfish_gapfinder -v -f ../test.fasta 
 
-hagfish_compplot2 -n 6e4 -l demo -L alter
+echo 'and plot!'
+
+c1='-n 6e4 --dpi 400'
+c2='-n 15e3 -e 15e3 --dpi 400'
+cp='--ymax 400 -S -H 400'
+
+hagfish_cplot2 $c1 $cp -l set1  contig 
+hagfish_cplot2 $c1 $cp -l set2 contig
+
+hagfish_cplot2 $c2 $cp -l set1  contig
+hagfish_cplot2 $c2 $cp -l set2 contig
+
+hagfish_blockplot $c1 -l set1 contig
+hagfish_blockplot $c1 -l set2 contig
+
+hagfish_blockplot $c2 -l set1  contig
+hagfish_blockplot $c2 -l set2  contig
+
+hagfish_compplot2 $c1 $cp -l set1 -L set2 contig
+hagfish_compplot2 $c2 $cp -l set1 -L set2 contig
+
+hagfish_blockcompplot2 $c1 -l set1 -L set2 contig
+hagfish_blockcompplot2 $c2 -l set1 -L set2 contig