Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
recombination_rate=1e-6
mutation_rate=1e-6
ts = msprime.simulate(10000,
Ne=5000, length=10000, recombination_rate=recombination_rate)
#or try generating an identical neutral ftprime simulation using
# python3 ./examples/examples.py -T 10000 -N 5000 -r 1e-6 -L 10000 -a 0.0001 -b 0.0001 -k 5000 -t neutral_ts
T_W, T_pi, T_H, D, FayWuH = [], [], [], [], []
incrementalSFS(ts, save_estimates)
plt = ax1
plt.plot(*zip(*D), marker="")
plt.plot(*zip(*FayWuH), marker="")
plt.legend(["Tajima's D", "Fay & Wu's H"])
plt.set_xlim(0,ts.get_sequence_length())
plt.set_title("msprime simulation")
#plt.set_ylim(-15000,15000)
ts = msprime.load("../ftprime/neutral_ts")
T_W, T_pi, T_H, D, FayWuH = [], [], [], [], []
incrementalSFS(ts, save_estimates)
plt = ax2
plt.plot(*zip(*D), marker="")
plt.plot(*zip(*FayWuH), marker="")
plt.legend(["Tajima's D", "Fay & Wu's H"])
plt.set_xlim(0,ts.get_sequence_length())
plt.set_title("ftprime equivalent (after 1000 generations)")
fig.savefig("SelectionMeasuresOnNeutral.pdf")
#selective example
#e.g. from python3 ./examples/selective_sweep.py -N 10000 -r 1e-8 -L 100000 -k 10000 -s 0.05 -of 0.1 -of 0.5 -of 0.8 -of 1 -of 1 200 -of 1 600 -d 20 -v
fig, ((ax1, ax2, ax3),(ax4, ax5,ax6)) = pyplot.subplots(2, 3, sharey=True, figsize=(21, 11))
for f1, f2, plt in zip(
["0.1","0.5","0.8","1","1","1"],
def test_single_locus_example_recombination(self):
from_ts = msprime.load("tests/data/SLiM/single-locus-example.trees")
ts = self.finish_simulation(from_ts, recombination_rate=0.1, seed=1)
self.verify_completed(from_ts, ts)
def verify_dump_load(self, tree_sequence):
"""
Dump the tree sequence and verify we can load again from the same
file.
"""
tree_sequence.dump(self.temp_file)
other = msprime.load(self.temp_file)
self.assertIsNotNone(other.file_uuid)
records = list(tree_sequence.edges())
other_records = list(other.edges())
self.assertEqual(records, other_records)
haplotypes = list(tree_sequence.haplotypes())
other_haplotypes = list(other.haplotypes())
self.assertEqual(haplotypes, other_haplotypes)
def main(args):
nhaps = map(int, args.nhaps.split(','))
recomb = args.recomb_map
ncausal = args.ncausal
# generate/load coalescent simulations
if args.tree is None:
(pop_config, mig_mat, demog) = out_of_africa(nhaps)
simulation = simulate_ooa(pop_config, mig_mat, demog, recomb)
simulation.dump(args.out+ '_nhaps_' + '_'.join(map(str, nhaps)) + '.hdf5', True)
else:
simulation = msprime.load(args.tree)
eprint(simulation)
eprint('Number of haplotypes: ' + ','.join(map(str, nhaps)))
eprint('Number of trees: ' + str(simulation.get_num_trees()))
eprint('Number of mutations: ' + str(simulation.get_num_mutations()))
eprint('Sequence length: ' + str(simulation.get_sequence_length()))
prs_true = true_prs(simulation, args.ncausal, args.h2, nhaps, args.out)
cases_diploid, controls_diploid, prs_norm, environment = case_control(prs_true, args.h2, nhaps, args.prevalence, args.ncontrols, args.out)
summary_stats, cases_haploid, controls_haploid = run_gwas(simulation, cases_diploid, controls_diploid, args.p_threshold, args.cc_maf)
clumped_snps, usable_positions = clump_variants(simulation, summary_stats, nhaps, args.r2, args.window_size)
prs_infer = infer_prs(simulation, nhaps, clumped_snps, summary_stats, usable_positions, args.h2, args.ncausal, args.out)
write_summaries(args.out, prs_true, prs_infer, nhaps, cases_diploid, controls_diploid, args.h2, args.ncausal, environment)
def tsfile_to_ARGweaver_in(trees, ARGweaver_filehandle):
"""
take a .trees file, and convert it into an input file suitable for ARGweaver
Returns the simulation parameters (Ne, mu, r) used to create the .trees file
"""
logging.info("== Saving to ARGweaver input format ==")
try:
ts = msprime.load(trees.name) #trees is a fh
except AttributeError:
ts = msprime.load(trees)
ts_to_ARGweaver_in(ts, ARGweaver_filehandle)
#here we should extract the /provenance information from the .trees file and return
# {'Ne':XXX, 'mutation_rate':XXX, 'recombination_rate':XXX}
#but this information is currently not encoded in the .trees file (listed as TODO)
return {'Ne':None, 'mutation_rate':None, 'recombination_rate':None}
def build_profile_inputs(n, num_megabases):
L = num_megabases * 10 ** 6
input_file = "tmp__NOBACKUP__/profile-n={}-m={}.input.trees".format(
n, num_megabases
)
if os.path.exists(input_file):
ts = msprime.load(input_file)
else:
ts = msprime.simulate(
n,
length=L,
Ne=10 ** 4,
recombination_rate=1e-8,
mutation_rate=1e-8,
random_seed=10,
)
print(
"Ran simulation: n = ",
n,
" num_sites = ",
ts.num_sites,
"num_trees =",
ts.num_trees,
msprime.PopulationConfiguration(sample_size=1000)],
demographic_events=[
msprime.MassMigration(time=t, source=1, destination=0),
msprime.MassMigration(time=t, source=2, destination=0),
msprime.MassMigration(time=t, source=3, destination=0),
msprime.MassMigration(time=t, source=4, destination=0)],
length=100 * 1e6,
recombination_rate=2e-8,
mutation_rate=2e-8,
random_seed=1)
ts.dump("populations.hdf5")
print(
ts.get_sample_size(), ts.get_num_trees(),
ts.get_num_mutations())
else:
ts = msprime.load("populations.hdf5")
before = time.clock()
R = 1
for i in range(R):
for j in range(5):
samples = ts.get_samples(population_id=j)
pi = ts.get_pairwise_diversity(samples)
# pi2 = ts.get_pairwise_diversity2(samples)
# print(j, pi, pi2, pi == pi2)
# print(j, pi2)
duration = time.clock() - before
print("duration = ", duration, " per call = ", duration / (5 * R))
shared_recombinations, num_threads=1, inject_real_ancestors_from_ts_fn=None, rho=None, error_probability=None):
with tempfile.NamedTemporaryFile("w+") as ts_out:
cmd = [sys.executable, tsinfer_executable, sample_fn, "--length", str(int(length))]
if rho is not None:
cmd += ["--recombination-rate", str(rho)]
if error_probability is not None:
cmd += ["--error-probability", str(error_probability)]
cmd += ["--threads", str(num_threads), ts_out.name]
if inject_real_ancestors_from_ts_fn:
logging.debug("Injecting real ancestors constructed from {}".format(
inject_real_ancestors_from_ts_fn))
cmd.extend(["--inject-real-ancestors-from-ts", inject_real_ancestors_from_ts_fn])
if shared_recombinations:
cmd.append("--shared-recombinations")
cpu_time, memory_use = time_cmd(cmd)
ts_simplified = msprime.load(ts_out.name)
return ts_simplified, cpu_time, memory_use
def examine():
ts = msprime.load("tmp__NOBACKUP__/bottleneck-example.hdf5")
print("num_records = ", ts.get_num_records())
non_binary_records = 0
max_record_length = 0
for r in ts.records():
if len(r.children) > 2:
non_binary_records +=1
max_record_length = max(max_record_length, len(r.children))
print("non_binary_records = ", non_binary_records)
print("max_record_length = ", max_record_length)
num_nodes = collections.Counter()
num_trees = 0
for t in ts.trees():
num_nodes[len(list(t.nodes(t.get_root())))] += 1
num_trees += 1
print("num_trees = ", num_trees)
for k, v in num_nodes.items():
for node in tree.nodes():
if node in colours:
for c in tree.get_children(node):
ret_colours[c]=colours[node]
return ret_colours
#from http://www.internationalgenome.org
pop_cols = {"African":"yellow", "American": "red", "East Asian":"green","European":"blue", "South Asian":"purple"}
sample_cols = {}
with open("/Volumes/SDdisk/1000G/igsr_samples.tsv") as tsvfile:
reader = csv.DictReader(tsvfile, delimiter='\t')
for row in reader:
if 'phase 1' in row['Data collections']:
sample_cols[row['Sample name']]= pop_cols[row['Superpopulation name']]
ts = msprime.load(args.msprime_infile).simplify()
#sample names are not stored in msprime files, so we need to get them from the base file
#sample names in the base file have an extra character ('a' or 'b') appended
with h5py.File(args.base_file, "r") as f:
data = f['data']
samples = data['samples']
sample_colours = {i:sample_cols[str(d, 'utf8')[:-1]] for i,d in enumerate(data['samples']) if str(d, 'utf8')[-1] in 'ab'}
for i, t in enumerate(ts.trees()):
if i==0:
start = t.get_interval()[1]
branch_colours = {},{}
node_colours=sample_colours.copy()
percolate_unambiguous_colours(t, node_colours, t.get_root(), )
branch_colours = colour_children(t, node_colours)
file = os.path.join(args.outdir,"tree{:05d}.svg".format(i))
svg=t.draw(file,