Skip to content

Commit 9b6402b

Browse files
author
zhangrengang
committed
clean code; add comments x2; v1.2.0
1 parent 350076d commit 9b6402b

10 files changed

+133
-163
lines changed

soi/WGDI.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717

1818
class TPM:
19+
'''Parser for TMP table'''
20+
1921
def __init__(self, tpmfile):
2022
self.tpmfile = tpmfile
2123

@@ -47,6 +49,8 @@ def mean(self):
4749

4850

4951
class AK:
52+
'''Parser for WGDI ancestor.txt'''
53+
5054
def __init__(self, akfile):
5155
self.akfile = akfile
5256

@@ -382,13 +386,15 @@ def group_genes(genes, d_genes, max_dist=50):
382386

383387

384388
class Alignment:
389+
'''Parser for WGDI -a output (alignment file)'''
390+
385391
def __init__(self, alignment, indice=None, idmap=None, colnames=None, **kargs):
386392
self.alignment = alignment
387393
self.indice = self.parse_indice(indice) # cut columns
388394
if idmap is None and colnames is not None:
389395
idmap = colnames
390396
self.idmap = self.parse_idmap(idmap) # idx - name * unique ID
391-
self.colnames = self.parse_colnames(colnames) # names * repeat allowed
397+
self.colnames = self.parse_colnames(colnames) # names * repeat allowed
392398

393399
def __iter__(self):
394400
return self._parse()

soi/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = '1.2'
1+
version = '1.2.0'

soi/colors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class HexColors:
1515
'''get default colors'''
1616

1717
def __init__(self, colors_hex=None):
18-
if colors_hex is None:
18+
if colors_hex is None: # HEX colors
1919
colors_hex = COLORS_HEX
2020
elif isinstance(colors_hex, str):
2121
colors_hex = colors_hex.split(',')

soi/depth_bins_plot.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
from math import ceil
44
from collections import OrderedDict
55
import matplotlib.pyplot as plt
6-
# from xopen import xopen as open
76
from .small_tools import parse_kargs
8-
7+
from .small_tools import open_file as open
98

109
def main(inDepth=sys.argv[1], vvlines=None, order=None,
1110
window_size=50000, window_step=25000, minlength=0.005,

soi/dot_plotter.py

+65-62
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
#!/bin/env python
22
# coding: utf-8
3-
from .RunCmdsMP import logger
4-
from .WGDI import AK
5-
from .ploidy_plotter import add_ploidy_opts, get_ploidy, plot_bars
6-
from .mcscan import Collinearity, Gff, XCollinearity
73
import logging
84
import argparse
95
import sys
@@ -17,9 +13,11 @@
1713

1814
mpl.use("Agg")
1915
mpl.rcParams['pdf.fonttype'] = 42
20-
# mpl.rcParams['ps.fonttype'] = 42
21-
# mpl.rcParams['font.family'] = 'sans-serif'
22-
# mpl.rcParams['font.sans-serif'] = 'Arial'
16+
17+
from .RunCmdsMP import logger
18+
from .WGDI import AK
19+
from .ploidy_plotter import add_ploidy_opts, get_ploidy, plot_bars
20+
from .mcscan import Collinearity, Gff, XCollinearity
2321

2422

2523
def dotplot_args(parser):
@@ -34,13 +32,13 @@ def dotplot_args(parser):
3432
parser.add_argument('--format', metavar='FORMAT', action='append', default=['pdf', 'png'],
3533
help="output figure format [default=%(default)s]")
3634
parser.add_argument('--homology', action='store_true', default=False,
37-
help="`-s` is in homology format (gene1<tab>gene2). [default=%(default)s]")
35+
help=argparse.SUPPRESS) #"`-s` is in homology format (gene1<tab>gene2). [default=%(default)s]")
3836
parser.add_argument('--number-plots', action='store_true', default=False,
3937
help="number subplots with (a-d). [default=%(default)s]")
4038
parser.add_argument('--min-block', metavar='INT', type=int, default=None,
4139
help="min gene number in a block. [default=%(default)s]")
42-
parser.add_argument('--min-same-block', metavar='INT', type=int, default=25,
43-
help="min gene number in a block on the same chromosome. [default=%(default)s]")
40+
parser.add_argument('--min-same-block', metavar='INT', type=int, default=None,
41+
help=argparse.SUPPRESS) # "min gene number in a block on the same chromosome. [default=%(default)s]")
4442
parser.add_argument('--min-dist', dest='tandem_dist', metavar='INT', type=int, default=None,
4543
help="remove tandem with distance shorter than this value. [default=%(default)s]")
4644
parser.add_argument('--plot-dot', action='store_true', default=None,
@@ -126,14 +124,14 @@ def dotplot_args(parser):
126124
group_ks.add_argument('--hist-ylim', type=float, default=None,
127125
help=argparse.SUPPRESS) # "max y axis of Ks histgram. [default=%(default)s]")
128126

129-
group_ploidy = parser.add_argument_group('ploidy plot',
127+
group_ploidy = parser.add_argument_group('Ploidy plot',
130128
'options to plot relative ploidy (synteny depth)')
131129
group_ploidy.add_argument('--plot-ploidy', action='store_true', default=False,
132130
help="plot relative ploidy. [default=%(default)s]")
133131
add_ploidy_opts(group_ploidy)
134132

135133
group_bin = parser.add_argument_group(
136-
'plot Ks by bins', 'options to plot binned Ks')
134+
'Plot Ks by bins', 'options to plot binned Ks')
137135
group_bin.add_argument('--plot-bin', action='store_true', default=False,
138136
help="plot binned Ks. [default=%(default)s]")
139137

@@ -169,7 +167,6 @@ def makeArgparse():
169167
args = parser.parse_args(['-h'])
170168
else:
171169
args = parser.parse_args()
172-
173170
return args
174171

175172

@@ -207,7 +204,8 @@ def main(args):
207204
lower_ks=args.lower_ks, upper_ks=args.upper_ks,
208205
cluster=args.cluster, diagonal=args.diagonal, gene_axis=args.gene_axis,
209206
matrix=args.matrix, min_same_block=args.min_same_block,
210-
min_block=args.min_block, tandem_dist=args.tandem_dist, **ks_args)
207+
min_block=args.min_block, tandem_dist=args.tandem_dist,
208+
ploidy=args.plot_ploidy, **ks_args)
211209

212210
logger.info('{} blocks to plot'.format(len(blocks)))
213211
# positions of chromosome labels
@@ -255,49 +253,6 @@ def main(args):
255253
)
256254

257255

258-
def is_mcscan_style(labels):
259-
matches = [re.compile(
260-
r'[A-Za-z]{2}\d{1,5}[A-Za-z]*$').match(lab) for lab in labels]
261-
return all(matches)
262-
263-
264-
def match_paptern(lab, pattern):
265-
match = re.compile(pattern).match(lab)
266-
if match:
267-
return match.groups()[0]
268-
else:
269-
return
270-
271-
272-
def is_same_prefix(labels):
273-
matches = [match_paptern(lab, r'(\D+)') for lab in labels]
274-
matches = list(set(matches))
275-
if len(matches) == 1 and matches[0] is not None:
276-
return len(matches[0])
277-
else:
278-
return False
279-
280-
281-
def is_same_prefix2(labels):
282-
lst_labels = list(zip(*labels))
283-
for i, strs in enumerate(lst_labels):
284-
if len(set(strs)) > 1:
285-
if i > 0: # retrieve number preifx
286-
for j in range(i-1, 0, -1):
287-
strj = lst_labels[j][0]
288-
if not re.compile(r'[1-9]').match(strj):
289-
return j+1
290-
return i
291-
292-
293-
def _remove_prefix(labels):
294-
'''remove prefix of chromosome id'''
295-
same_prefix = is_same_prefix2(labels)
296-
if same_prefix:
297-
return [label[same_prefix:] for label in labels]
298-
return labels
299-
300-
301256
def plot_blocks(blocks, outplots, ks=None, max_ks=None, ks_hist=False, ks_cmap=None,
302257
clip_ks=None, min_block=None, ks_step=0.02,
303258
xlabels=None, ylabels=None, xpositions=None, ypositions=None,
@@ -553,6 +508,7 @@ def plot_collapse(kXs, kYs, Ks, xlabels, ylabels, xpositions, ypositions,
553508

554509

555510
def plot_label(ax, label, **kargs):
511+
'''a/b/c/d labels'''
556512
xmin, xmax = ax.get_xlim()
557513
ymin, ymax = ax.get_ylim()
558514
xoffset = (xmax-xmin) / 60
@@ -565,6 +521,7 @@ def plot_label(ax, label, **kargs):
565521

566522
def plot_fold(ax, titles, ref_coord_paths, ref_coord_graph, qry_coord_graph,
567523
rq_ortholog_graph, **kargs):
524+
'''c/d subplots'''
568525
d_fold = get_ploidy(ref_coord_paths, ref_coord_graph,
569526
qry_coord_graph, rq_ortholog_graph, **kargs)
570527
data = [np.array(sorted(d_fold.items()))]
@@ -573,6 +530,7 @@ def plot_fold(ax, titles, ref_coord_paths, ref_coord_graph, qry_coord_graph,
573530

574531
def _histgram(ax, allKs, cmap=None, xlim=None, ylim=None, bins=100, normed=False,
575532
xlabel='Ks', ylabel=' of syntenic gene pairs', fontsize=None, output_hist=False):
533+
'''b subplot'''
576534
if cmap is None:
577535
cmap = cm.jet
578536
allKs = [v for v in allKs if v >= 0 and v is not None]
@@ -611,6 +569,49 @@ def _histgram(ax, allKs, cmap=None, xlim=None, ylim=None, bins=100, normed=False
611569
return xlim, ylim
612570

613571

572+
def is_mcscan_style(labels):
573+
matches = [re.compile(
574+
r'[A-Za-z]{2}\d{1,5}[A-Za-z]*$').match(lab) for lab in labels]
575+
return all(matches)
576+
577+
578+
def match_paptern(lab, pattern):
579+
match = re.compile(pattern).match(lab)
580+
if match:
581+
return match.groups()[0]
582+
else:
583+
return
584+
585+
586+
def is_same_prefix(labels):
587+
matches = [match_paptern(lab, r'(\D+)') for lab in labels]
588+
matches = list(set(matches))
589+
if len(matches) == 1 and matches[0] is not None:
590+
return len(matches[0])
591+
else:
592+
return False
593+
594+
595+
def is_same_prefix2(labels):
596+
lst_labels = list(zip(*labels))
597+
for i, strs in enumerate(lst_labels):
598+
if len(set(strs)) > 1:
599+
if i > 0: # retrieve number preifx
600+
for j in range(i-1, 0, -1):
601+
strj = lst_labels[j][0]
602+
if not re.compile(r'[1-9]').match(strj):
603+
return j+1
604+
return i
605+
606+
607+
def _remove_prefix(labels):
608+
'''remove the same prefix of chromosome id'''
609+
same_prefix = is_same_prefix2(labels)
610+
if same_prefix:
611+
return [label[same_prefix:] for label in labels]
612+
return labels
613+
614+
614615
def parse_hvlines(bedfile, min_span=10):
615616
lines = []
616617
for line in open(bedfile):
@@ -712,7 +713,7 @@ def parse_collinearity(collinearity, gff, chrs1, chrs2, kaks, homology,
712713
hide_blocks=None, use_median=False, lower_ks=None, upper_ks=None,
713714
cluster=False, diagonal=False, gene_axis=False, source=None, use_frac=False,
714715
ofdir=None, of_ratio=0, of_color=False, tandem_dist=None, min_block=None,
715-
matrix=None, min_same_block=None, **ks_args):
716+
matrix=None, min_same_block=None, ploidy=None, **ks_args):
716717
blocks = XCollinearity(collinearity, orthologs=ofdir, gff=gff, kaks=kaks,
717718
homology=homology, source=source, **ks_args)
718719
chrs1s, chrs2s = set(chrs1), set(chrs2)
@@ -757,16 +758,18 @@ def parse_collinearity(collinearity, gff, chrs1, chrs2, kaks, homology,
757758
if use_median:
758759
ks = [rc.median_ks] * len(ks)
759760

760-
# use OI
761-
if ofdir:
762-
ratio = rc.oi
761+
# use OI or fractionation_rate
762+
if ofdir or use_frac:
763763
if use_frac:
764764
ratio = rc.fractionation_rate(both=True)
765+
else:
766+
ratio = rc.oi
765767
if not ratio > of_ratio:
766768
continue
767769
if of_color:
768770
ks = [ratio] * len(genes1)
769-
ortholog_graph.add_edges_from(rc.pairs)
771+
if ploidy:
772+
ortholog_graph.add_edges_from(rc.pairs)
770773
try:
771774
d_blocks[(chr1, chr2)] += [(genes1, genes2, ks)]
772775
except KeyError:

soi/karyotype.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def akr(collinearity, gff, anc, spsd, rounds=3):
1010
'''To refine ancestral karyotype:
1111
1. to prune non-syntenic and tandemly repeated genes;
1212
2. to add genes that show synteny in other genomes. '''
13+
1314
synG = ColinearGroups(collinearity,
1415
spsd=spsd, nosamechr=True, noparalog=False).to_graph()
1516
gffG = Gff(gff).to_graph()
@@ -41,18 +42,16 @@ def akr(collinearity, gff, anc, spsd, rounds=3):
4142
print(set(ancG.nodes) - set(d_ancs))
4243
ancG.remove_internals(set(ancG.nodes) - set(d_ancs))
4344
logger.info((len(ancG), len(list(ancG.starts))))
44-
# with open('sny.{}.gfa'.format('x'), 'w') as fout:
45-
# ancG.to_gfa(fout)
45+
4646
# insert syntenic nodes
4747
np, nn = insert_syn(ancG, gffG, d_ancs, synG, TANDEM=TANDEM)
4848
logger.info('inserted {} paths, {} nodes'.format(np, nn))
4949
logger.info((len(ancG), len(list(ancG.starts))))
5050

5151
with open('sny.{}.gfa'.format(i), 'w') as fout:
5252
ancG.to_gfa(fout)
53+
5354
# final remove non-syntenic nodes
54-
# d_ancs = process_synG(synG, ancG, TANDEM=TANDEM)
55-
# ancG.remove_internals(set(ancG.nodes) - set(d_ancs))
5655
logger.info(len(ancG))
5756
ancG.to_wgdi(anc+'.akr')
5857
ancG.to_idmap()

0 commit comments

Comments
 (0)