mfiers
diff --git a/‎hagfishUtils.py
+52-13 b/‎hagfishUtils.py
+52-13
diff --git a/‎hagfish_circos
+16 b/‎hagfish_circos
+16
diff --git a/‎hagfish_coverage_combine
+34-32 b/‎hagfish_coverage_combine
+34-32
@@ -59,34 +59,23 @@ def getHagfishOptparser():
                       help='Show debug information')
     return parser
 
-def addPlotParameters(parser):
+
+def addBasePlotParameters(parser):
     parser.set_defaults(ntPerBand=-1)
 
     parser.add_option('-n', dest='ntPerBand',
                       help='no nucleotides per band')
 
-    parser.add_option('-i', dest='inputFile',
-                      help='input file with the coverage data (npz, if not specified, '+
-                      'the input file name will be inferred from the sequence Id')
-
     parser.set_defaults(imageWidth=1000)
     parser.set_defaults(bandHeight=200)
     parser.add_option('-W', dest='imageWidth', type='int', help='imageWidth (in px)')
     parser.add_option('-H', dest='bandHeight', type='int', help='bandHeight (in px)')
 
-    parser.set_defaults(yfrac=0.98)
-    parser.add_option('-Y', dest='yfrac', type='float', help='percentage of the plotted'
-                      'fraction that must fall inside the Y boundaries of the graph - use'
-                      'this to scale the y axis')
-    parser.add_option('--ymax', dest='ymax',
-                      help='Alternatively, set a max value for the y axis')
-
     parser.add_option('-s', dest='start',
                       help='Start position (nt) of the plot')
     parser.add_option('-e', dest='stop',
                       help='Stop position (nt) of the plot')
 
-
     parser.add_option('-o', dest='outfile',
                       help='Output file name')
 
@@ -98,9 +87,24 @@ def addPlotParameters(parser):
     parser.add_option('--dpi', dest='dpi', type='int',
                       help='dpi of the image, pixel calculations are based on dpi 100, setting dpi to 200 will double the x/y pixel size of your image)')
 
+
+def addPlotParameters(parser):
+    addBasePlotParameters(parser)
+    parser.add_option('-i', dest='inputFile',
+                      help='input file with the coverage data (npz, if not specified, '+
+                      'the input file name will be inferred from the sequence Id')
+
+    parser.set_defaults(yfrac=0.98)
+    parser.add_option('-Y', dest='yfrac', type='float', help='percentage of the plotted'
+                      'fraction that must fall inside the Y boundaries of the graph - use'
+                      'this to scale the y axis')
+    parser.add_option('--ymax', dest='ymax',
+                      help='Alternatively, set a max value for the y axis')
     parser.add_option('-Q', dest='quick', action='store_true',
                       help='Create a "light" version of this graph (if implemented)')
 
+def addBinPlotParameters(parser):
+    addBasePlotParameters(parser)
 
 class hagfishData:
 
@@ -167,6 +171,41 @@ def __init__(self, options, args, seqId = None, inputFile=None):
         #self.z = np.zeros_like(self.x)
         self.vectors.append('x')
 
+
+class hagfishBinData:
+    
+    def __init__(self, options, args, seqId = None, inputFile=None):
+
+        if seqId:
+            self.seqId = seqId
+        else:
+            self.seqId = args[0]
+
+        self.options = options
+
+        self.l = getLogger('data', options.verbose)
+        self.l.info("Loading sequence: %s" % self.seqId)
+
+        if not inputFile:
+            l.critical("need to provide an input file")
+            sys.exit()
+        self.inputFile = inputFile
+        
+        self.l.info('loading %s' % self.inputFile)
+
+        self.data = np.load(self.inputFile)
+        self.bins = self.data['bins']
+        self.binSize = self.data['binSize']
+
+        self.seqLen = len(self.data['bins']) * self.binSize
+
+
+        self.x = np.arange(0, self.seqLen, dtype="int")
+        self.l.info("discovered bin sequence of %d nt" % self.seqLen)
+
+        #self.z = np.zeros_like(self.x)
+        #self.vectors.append('x')
+
 class hagfishPlot:
 
     def __init__(self, options, data, title=None, data2=None, ymax=None):
 
@@ -95,6 +95,10 @@ if __name__ == '__main__':
         FL = open(os.path.join('circos', '%s.long.txt' % (outSeqId)), 'w')
         FC = open(os.path.join('circos', '%s.score.txt' % (outSeqId)), 'w')
 
+        FGE = open(os.path.join('circos', '%s.ok_ends.txt' % (outSeqId)), 'w')
+        FSE = open(os.path.join('circos', '%s.short_ends.txt' % (outSeqId)), 'w')
+        FLE = open(os.path.join('circos', '%s.long_ends.txt' % (outSeqId)), 'w')
+        
         coverageFile = os.path.join('combined', '%s.combined.coverage.npz' % seqId)
 
         if not os.path.exists(coverageFile):
@@ -106,6 +110,9 @@ if __name__ == '__main__':
         r_ok = data['r_ok']
         r_low = data['r_low']
         r_high = data['r_high']
+        r_ok_ends = data['r_ok_ends']
+        r_low_ends = data['r_low_ends']
+        r_high_ends = data['r_high_ends']
 
         l.info("read %d datapoints" % len(r_ok))
         median = np.median(r_ok)
@@ -119,10 +126,19 @@ if __name__ == '__main__':
         generate_histogram(FG, outSeqId, 'ok', r_ok, binSize)
         generate_histogram(FS, outSeqId, 'short', r_low, binSize)
         generate_histogram(FL, outSeqId, 'long', r_high, binSize)
+
+        generate_histogram(FGE, outSeqId, 'ok_ends', r_ok_ends, binSize)
+        generate_histogram(FSE, outSeqId, 'short_ends', r_low_ends, binSize)
+        generate_histogram(FLE, outSeqId, 'long_ends', r_high_ends, binSize)
+
         generate_histogram(FC, outSeqId, 'score', score, binSize)
 
         FG.close()
         FS.close()
         FL.close()
         FC.close()
 
+        FGE.close()
+        FSE.close()
+        FLE.close()
+
@@ -71,44 +71,46 @@ if __name__ == '__main__':
         for inputDir in os.listdir('coverage'):
             inputFile = os.path.join('coverage', inputDir, seqId + '.coverage.npz')
             if not os.path.exists(inputFile):
-                l.debug("skiping %s (%d nt)" % (seqId, seqInfo[seqId]['length']))
+                l.debug("skipping %s (%d nt)" % (seqId, seqInfo[seqId]['length']))
                 break
             bamBase = inputDir
 
             l.info("processing %s (%s)" % (bamBase,inputFile))
 
             #read the coverage plots
-            data = np.load(inputFile)
-            if r_ok == None:
-                r_ok = data['r_ok']
-                r_high = data['r_high']
-                r_low = data['r_low']
-
-                r_ok_ends = data['r_ok_ends']
-                r_high_ends = data['r_high_ends']
-                r_low_ends = data['r_low_ends']
-
-                r_low_binned = data['r_low_binned']
-                r_high_binned = data['r_high_binned']
-
-                bins_low = data['bins_low']
-                bins_high = data['bins_high']
-            else:
-                if (not list(bins_low) == list(data['bins_low'])) or \
-                    (not list(bins_high) == list(data['bins_high'])):
-                    l.critical("different bins!")
-                    sys.exit()
-
-                r_ok += data['r_ok']
-                r_high += data['r_high']
-                r_low += data['r_low']
-
-                r_ok_ends += data['r_ok_ends']
-                r_high_ends += data['r_high_ends']
-                r_low_ends += data['r_low_ends']
-
-                r_low_binned += data['r_low_binned']
-                r_high_binned += data['r_high_binned']
+            with open(inputFile) as F:
+                data = np.load(F)
+
+                if r_ok == None:
+                    r_ok = data['r_ok']
+                    r_high = data['r_high']
+                    r_low = data['r_low']
+
+                    r_ok_ends = data['r_ok_ends']
+                    r_high_ends = data['r_high_ends']
+                    r_low_ends = data['r_low_ends']
+
+                    r_low_binned = data['r_low_binned']
+                    r_high_binned = data['r_high_binned']
+
+                    bins_low = data['bins_low']
+                    bins_high = data['bins_high']
+                else:
+                    if (not list(bins_low) == list(data['bins_low'])) or \
+                        (not list(bins_high) == list(data['bins_high'])):
+                        l.critical("different bins!")
+                        sys.exit()
+
+                    r_ok += data['r_ok']
+                    r_high += data['r_high']
+                    r_low += data['r_low']
+
+                    r_ok_ends += data['r_ok_ends']
+                    r_high_ends += data['r_high_ends']
+                    r_low_ends += data['r_low_ends']
+
+                    r_low_binned += data['r_low_binned']
+                    r_high_binned += data['r_high_binned']
 
             l.info("Max low: %d / Max ok: %d / Max high: %d" % (
                     np.max(r_low), np.max(r_ok), np.max(r_high)))