Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Benja1972 committed Jun 5, 2019
1 parent a38252d commit 8499815
Show file tree
Hide file tree
Showing 9 changed files with 1,262 additions and 930 deletions.
449 changes: 367 additions & 82 deletions .ipynb_checkpoints/EnrichrLib_tutorial-checkpoint.ipynb

Large diffs are not rendered by default.

608 changes: 322 additions & 286 deletions .ipynb_checkpoints/TLX3_Project_Story_2019-checkpoint.ipynb

Large diffs are not rendered by default.

103 changes: 62 additions & 41 deletions Density_mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,78 +151,99 @@ def hypmut_bw(vcf,bw,step=1000,nstep=50,shr=0.8,genome='mm9'):
]


for trck in trck_list:
bw = join(DATADIR,trck)

hyp_bdd = hypmut_bw(vcf,bw, step=600, nstep=150,shr=0.85)
hyp_bdd.saveas(join(WGS,'Hypermut_TLX3_WGS_'+trck.split('/')[-1]+'.bed'))

#~ for trck in trck_list:
#~ bw = join(DATADIR,trck)

#~ hyp_bdd = hypmut_bw(vcf,bw, step=600, nstep=150,shr=0.85)
#~ hyp_bdd.saveas(join(WGS,'Hypermut_TLX3_WGS_'+trck.split('/')[-1]+'.bed'))




res_trck_l = [
'Hypermut_TLX3_WGS_TLX3_H3K4me1_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_H3K4me2_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_H3K4me3_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_H3K9ac_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_H3K9me3_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_H3K27ac_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_H3K27me3_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_H3K36me3_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_POLII_mm9.bw.bed',
'Hypermut_TLX3_WGS_TLX3_TLX3_mm9.bw.bed'
]


#~ mm9 = pb.chromsizes('mm9')
allb = pb.BedTool(join(WGS,res_trck_l[0]))

#~ hyp_cor= pd.DataFrame()
for trck in res_trck_l[1:]:
btr = pb.BedTool(join(WGS,trck))
allb = allb+btr




#~ bin_bed = bed_bins(step)
#~ hyp_cor= pd.DataFrame()

# Figure
step=2000

bin_bed = bed_bins(step)



#~ bed_out = variants_bed_counts(vcf,bin_bed)


#~ out = bed_out.to_dataframe()
#~ out['density'] = out['score']/step
bed_out = variants_bed_counts(vcf,bin_bed)


out = bed_out.to_dataframe()
out['density'] = out['score']/step


#~ out_chr = out[out['chrom']==chrom]
chrom='chr1'

#~ x = out_chr['start'].values + step/2
#~ y = out_chr['density'].values
out_chr = out[out['chrom']==chrom]

#~ yv = savgol_filter(y,13,5)
#~ yv = y.copy()
x = out_chr['start'].values + step/2
y = out_chr['density'].values

#yv = savgol_filter(y,13,5)
yv = y.copy()


trck = trck_list[5]

bw = join(DATADIR,trck)

mm9 = pb.chromsizes('mm9')

#~ st = mm9[chrom][0]
#~ end = mm9[chrom][1]
st = mm9[chrom][0]
end = mm9[chrom][1]

#~ vl = gs.countFragmentsInRegions_worker(chrom,
#~ int(st),
#~ int(end),
#~ [bw],
#~ stepSize=step,
#~ binLength=step,
#~ save_data=False)
vl = gs.countFragmentsInRegions_worker(chrom,
int(st),
int(end),
[bw],
stepSize=step,
binLength=step,
save_data=False)


#~ xb = np.arange(st,end,step)[:-1]
#~ yb = np.squeeze(vl[0])[:-1]
xb = np.arange(st,end,step)[:-1]
yb = np.squeeze(vl[0])[:-1]


#~ # Figures
#~ f, ax = plt.subplots(figsize=(26.5, 6.5))
f, ax = plt.subplots(figsize=(26.5, 6.5))

#~ ax.plot(x,yv,'--.', color='b', MarkerSize=0.8, LineWidth=0.1)

#~ ax1 = ax.twinx()
#~ ax1.plot(xb,yb, '--.',color='r', MarkerSize=0.8, LineWidth=0.1)
ax.plot(x,yv,'--.', color='b', MarkerSize=0.8, LineWidth=0.1)
ax.set_ylabel('Hypermutation density')

ax1 = ax.twinx()
ax1.plot(xb,yb, '--.',color='r', MarkerSize=0.8, LineWidth=0.1)

ax1.set_ylabel(trck.split('/')[-1])


#~ corr = np.corrcoef(yv,yb)
Expand All @@ -235,19 +256,19 @@ def hypmut_bw(vcf,bw,step=1000,nstep=50,shr=0.8,genome='mm9'):



#~ nstep = 50
#~ x_r = x[:-nstep]
nstep = 50
x_r = x[:-nstep]

#~ cr_f = np.zeros(x_r.shape)
cr_f = np.zeros(x_r.shape)

#~ for i in range(len(x_r)):
#~ cr_f[i] = np.corrcoef(yv[i:i+nstep],yb[i:i+nstep])[0,1]
for i in range(len(x_r)):
cr_f[i] = np.corrcoef(yv[i:i+nstep],yb[i:i+nstep])[0,1]


#~ f3, ax3 = plt.subplots()
#~ ax3.plot(x_r,cr_f, '--.',color='r', MarkerSize=0.8, LineWidth=0.1)
f3, ax3 = plt.subplots()
ax3.plot(x_r,cr_f, '--.',color='r', MarkerSize=0.8, LineWidth=0.1)

#~ plt.show()
plt.show()

# Pocessing

Expand Down
92 changes: 63 additions & 29 deletions EnrichRLib.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
import scipy.stats as st
from scipy.cluster.hierarchy import fcluster, linkage
import networkx as nx
Expand Down Expand Up @@ -231,25 +232,25 @@ def cluster_jacc(enr):
return enr


def sigmoid(x):
return 1 / (1 + np.exp(-x))


def make_graph(setR, enr, kappa=0.4, draw=False, palette='tab20'):
def make_graph(setR, enr_in, kappa=0.4):
setR = set(setR)
## Cluster
enr = cluster(setR,enr)

## Network
terms = list(enr.index)
terms = list(enr_in.index)
n = len(terms)
col = ['source', 'target', 'dist', 'comm_genes']
nt = pd.DataFrame(columns=col)


for i in range(len(terms)):
ter1 = terms[i]
set1 = set(list(enr['term_genes'].loc[ter1]))
set1 = set(list(enr_in['term_genes'].loc[ter1]))
for j in range(i):
ter2 = terms[j]
set2 = set(list(enr['term_genes'].loc[ter2]))
set2 = set(list(enr_in['term_genes'].loc[ter2]))
cm = list(setR & set1 & set2)
rw = pd.DataFrame([[ter1, ter2, coeff_kappa(setR,set1,set2), cm]], columns=col)
nt = nt.append(rw, ignore_index=True)
Expand All @@ -258,36 +259,69 @@ def make_graph(setR, enr, kappa=0.4, draw=False, palette='tab20'):
nt = nt[nt['dist']>kappa]

ls = list(set(list(nt['source']))|set(list(nt['target'])))
nt_tb = enr[enr.index.isin(ls)]
enr_out = enr_in[enr_in.index.isin(ls)]

## Cluster
enr_out = cluster(setR,enr_out)
enr_out.sort_values('cluster', axis=0, inplace = True)

## Graph
G = nx.Graph()

for tr in nt_tb.index:
for tr in enr_out.index:
G.add_node( tr,
genes = nt_tb.loc[tr]['genes'],
num_list = nt_tb.loc[tr]['num_list'],
num_term = nt_tb.loc[tr]['num_term'],
pVal = nt_tb.loc[tr]['p-Val'],
padj = nt_tb.loc[tr]['p-adj'],
mlog10pVal = nt_tb.loc[tr]['-log10(p-Val)'],
cluster = nt_tb.loc[tr]['cluster'])
genes = enr_out.loc[tr]['genes'],
num_list = enr_out.loc[tr]['num_list'],
num_term = enr_out.loc[tr]['num_term'],
ratio = enr_out.loc[tr]['num_list']/enr_out.loc[tr]['num_term'],
pVal = enr_out.loc[tr]['p-Val'],
padj = enr_out.loc[tr]['p-adj'],
mlog10pVal = enr_out.loc[tr]['-log10(p-Val)'],
cluster = enr_out.loc[tr]['cluster'])

for nm in nt.index:
G.add_edge( nt.loc[nm]['source'],
nt.loc[nm]['target'],
weight = nt.loc[nm]['dist'])

if draw:
cl = [G.nodes[v]['cluster'] for v in G]
sz = [G.nodes[v]['mlog10pVal']*100 for v in G]

f, ax = plt.subplots(figsize=(10, 10))

nx.draw(G, pos=nx.spring_layout(G),
with_labels = True,
node_color = cl,
node_size = sz,
font_size=8,
cmap = palette)
return nt, nt_tb, G
return G, enr_out , nt



def draw_graph(G,spring = 1.0, pval_prcnt=0.8, palette='tab20'):
cl = [float(G.nodes[v]['cluster']) for v in G]
sz = [G.nodes[v]['mlog10pVal']*100 for v in G]
ec = [G.edges[u, v]['weight'] for u,v in G.edges]
for u,v in G.edges:
G.edges[u, v]['spring'] = sigmoid(G.edges[u, v]['weight'])*(spring/500)

kappa = min(ec)
mpValshr=pval_prcnt*max(sz)/100
lb_M = {v:v for v in G if G.nodes[v]['mlog10pVal']>mpValshr}
lb_m = {v:v for v in G if G.nodes[v]['mlog10pVal']<=mpValshr}

f, ax = plt.subplots(figsize=(15, 15))
pos=nx.spring_layout(G,k=0.1, weight = 'spring')


#~ cmap = get_cmap(name='viridis')
#~ colors = cmap(np.linspace(0, 1, max(cl)))
nx.draw(G, pos=pos,
node_color = cl,
vmin = 1.,
vmax = 20., ##FIX hardcoded number for tab20
node_size = sz,
edge_color = ec,
edge_vmin = 0.6*kappa,
edge_vmax = 1.2+kappa,
edge_cmap =plt.cm.Greys,
cmap = palette)

nx.draw_networkx_labels(G, pos=pos,
labels = lb_M,
font_size=14,
font_weight='bold')

nx.draw_networkx_labels(G, pos=pos,
labels = lb_m,
font_size=6)
296 changes: 143 additions & 153 deletions EnrichrLib_tutorial.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 8499815

Please sign in to comment.