-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
163 lines (136 loc) · 5.83 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# example Asian-Papuan demography
# originally defined in Sanderson et al. 2015: https://doi.org/10.1534/genetics.115.176842
import msprime
import libsequence.polytable as pt
import libsequence.summstats as sstats
# basic variables
generation_time = 25
length = 1e4
recombination_rate = 2e-8
mutation_rate = 2e-8
# sample sizes
S_asian = 100
S_papuan = 100
S_admixed = 100
# effective sizes
N_ancestral = 10500
N_asian = 2050
N_papuan = 800
N_admixed = 1425
# demography times in years
T_ancestral = 50000 / generation_time
T_admixture = 4000 / generation_time
# asian admixture proportion
P_admixture = 0.5
# migration rates
m_AS_PA = 0
m_AS_AD = 0
m_PA_AD = 0
# population IDs: asian=0, papuan=1, admixed=2
population_configurations = [
msprime.PopulationConfiguration(sample_size=S_asian, initial_size=N_asian),
msprime.PopulationConfiguration(sample_size=S_papuan, initial_size=N_papuan),
msprime.PopulationConfiguration(sample_size=S_admixed, initial_size=N_admixed)
]
# migration
migration_matrix = [
[ 0, m_AS_PA, m_AS_AD],
[m_AS_PA, 0, m_PA_AD],
[m_AS_AD, m_PA_AD, 0],
]
# demography
demographic_events = [
# Admixed deme merges to Asians and Papuans
msprime.MassMigration(time=T_admixture, source=2, destination=0, proportion=P_admixture),
msprime.MassMigration(time=T_admixture, source=2, destination=1, proportion=1.0),
msprime.MigrationRateChange(time=T_admixture, rate=0),
msprime.MigrationRateChange(time=T_admixture, rate=m_AS_PA, matrix_index=(0, 1)),
msprime.MigrationRateChange(time=T_admixture, rate=m_AS_PA, matrix_index=(1, 0)),
msprime.PopulationParametersChange(time=T_admixture, initial_size=N_asian, growth_rate=0, population_id=0),
msprime.PopulationParametersChange(time=T_admixture, initial_size=N_papuan, growth_rate=0, population_id=1),
# Asian and Papuan demes merge
msprime.MassMigration(time=T_ancestral, source=1, destination=0, proportion=1.0),
msprime.MigrationRateChange(time=T_ancestral, rate=0),
msprime.PopulationParametersChange(time=T_ancestral, initial_size=N_ancestral, growth_rate=0, population_id=0)
]
# check demography (human readable only)
dp = msprime.DemographyDebugger(
Ne=N_ancestral,
population_configurations=population_configurations,
migration_matrix=migration_matrix,
demographic_events=demographic_events)
dp.print_history()
# simulate demography (one replicate only)
sims = msprime.simulate(
length=length,
recombination_rate=recombination_rate,
mutation_rate=mutation_rate,
Ne=N_ancestral,
population_configurations=population_configurations,
migration_matrix=migration_matrix,
demographic_events=demographic_events)
# get population sample lists
asians = sims.get_samples(population_id=0)
papuans = sims.get_samples(population_id=1)
admixed = sims.get_samples(population_id=2)
pops = {'Asians': asians, 'Papuans': papuans, 'Admixed': admixed}
# temp: sanity check variants (and population subsetting)
for v in sims.variants():
print(v.index, v.position, v.genotypes, sep="\t")
print("\n")
for i in pops.keys():
for v in sims.variants():
print(v.index, v.position, v.genotypes[pops[i]], sep="\t")
print("\n")
# use subsetting to access populations
sd = pt.SimData([(v.position, v.genotypes) for v in sims.variants(as_bytes=True)])
ps = sstats.PolySIM(sd)
print("Everyone:", ps.tajimasd(),' ',ps.thetaw(),' ',ps.thetapi())
for i in pops.keys():
sd = pt.SimData([(v.position, v.genotypes[min(pops[i]):max(pops[i])+1]) for v in sims.variants(as_bytes=True)])
ps = sstats.PolySIM(sd)
print(i, ":", ps.tajimasd(),' ',ps.thetaw(),' ',ps.thetapi())
# run replicates (example 1 - summaries on whole dataset only)
num_replicates = 3
replicates = msprime.simulate(
length=length,
recombination_rate=recombination_rate,
mutation_rate=mutation_rate,
Ne=N_ancestral,
population_configurations=population_configurations,
migration_matrix=migration_matrix,
demographic_events=demographic_events,
num_replicates=num_replicates)
for j, sim in enumerate(replicates):
sd = pt.SimData([(v.position, v.genotypes) for v in sim.variants(as_bytes=True)])
ps = sstats.PolySIM(sd)
print(j+1, '\t', ps.tajimasd(), '\t', ps.thetaw(), '\t', ps.thetapi())
# run replicates (example 2 - summaries per population)
num_replicates = 5
significant_digits = 4
replicates = msprime.simulate(
length=length,
recombination_rate=recombination_rate,
mutation_rate=mutation_rate,
Ne=N_ancestral,
population_configurations=population_configurations,
migration_matrix=migration_matrix,
demographic_events=demographic_events,
num_replicates=num_replicates)
print("rep", '\t', "pi_all", '\t', "pi_asian", '\t', "pi_papuan", '\t', "pi_admixed")
for j, sim in enumerate(replicates):
pi = []
# get population sample lists
#if j == 0:
asians = sim.get_samples(population_id=0)
papuans = sim.get_samples(population_id=1)
admixed = sim.get_samples(population_id=2)
pops = {'Asians': asians, 'Papuans': papuans, 'Admixed': admixed}
sd = pt.SimData([(v.position, v.genotypes) for v in sim.variants(as_bytes=True)])
ps = sstats.PolySIM(sd)
pi.append( round(ps.thetapi(), significant_digits) )
for i in pops.keys():
sd = pt.SimData([(v.position, v.genotypes[min(pops[i]):max(pops[i])+1]) for v in sim.variants(as_bytes=True)])
ps = sstats.PolySIM(sd)
pi.append( round(ps.thetapi(), significant_digits) )
print(j+1, *pi, sep='\t')