Skip to content

Commit 1b064b3

Browse files
committed
Update dadi-cli LowPass integration to automatically generate a dictionary with population IDs and the max poptential sequenced smaples using dadi.make_data_dict_vcf's extract_ploidy argument
1 parent d152b21 commit 1b064b3

File tree

9 files changed

+35
-23
lines changed

9 files changed

+35
-23
lines changed

dadi_cli/GenerateFs.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,25 @@ def generate_fs(
9090
# multiply number of individuals subsamples by the ploidy to get sample size
9191
projections = [individuals*ploidy for individuals in subsample]
9292
print(projections, ploidy, subsample)
93+
elif calc_coverage:
94+
dd, ploidy = dadi.Misc.make_data_dict_vcf(vcf_filename=vcf, popinfo_filename=pop_info, calc_coverage=calc_coverage, extract_ploidy=True)
95+
import collections
96+
nseq = collections.defaultdict(int)
97+
for line in open(pop_info).readlines():
98+
nseq[line.strip().split()[-1]] += 1 * ploidy
9399
else:
94100
dd = dadi.Misc.make_data_dict_vcf(vcf_filename=vcf, popinfo_filename=pop_info, calc_coverage=calc_coverage)
95101

96102
# Moved this lower, since using subsamples make projections not required
97103
if len(pop_ids) != len(projections):
98104
raise ValueError("The lengths of `pop_ids` and `projections` must match.")
99105

100-
if calc_coverage:
106+
if calc_coverage:
101107
import pickle
102108
import dadi.LowPass.LowPass as lp
103109
cov_dist = lp.compute_cov_dist(dd, pop_ids)
104110
print(f"\nSaving coverage distribution data in pickle named:\n{output}.coverage.pickle\n")
105-
pickle.dump([cov_dist, calc_coverage], open(f"{output}.coverage.pickle","wb"))
111+
pickle.dump([cov_dist, nseq], open(f"{output}.coverage.pickle","wb"))
106112

107113
if bootstrap is None:
108114
fs = dadi.Spectrum.from_data_dict(

dadi_cli/InferDFE.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,15 @@ def infer_dfe(
117117
from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov
118118
except ModuleNotFoundError:
119119
raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n")
120-
# nseq = [int(ele) for ele in cov_args[1:]]
120+
nseq = []
121+
for pop in fs.pop_ids:
122+
nseq.append(cov_args[1][pop])
121123
if cov_inbreeding == []:
122124
Fx = None
123125
else:
124126
Fx = cov_inbreeding
125127

126-
func = func_cov(func, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx)
128+
func = func_cov(func, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
127129

128130
p0_len = len(p0)
129131
# lower_bounds = convert_to_None(lower_bounds, p0_len)

dadi_cli/InferDM.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,15 @@ def infer_demography(
9090
from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov
9191
except ModuleNotFoundError:
9292
raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n")
93-
# nseq = [int(ele) for ele in cov_args[1]]
94-
# cov_args[1] should be a list with ints, so no need to convert to int
93+
nseq = []
94+
for pop in fs.pop_ids:
95+
nseq.append(cov_args[1][pop])
9596
if cov_inbreeding == []:
9697
Fx = None
9798
else:
9899
Fx = cov_inbreeding
99100

100-
func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx)
101+
func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
101102

102103
p0_len = len(p0)
103104
if fixed_params == -1:
@@ -215,13 +216,15 @@ def infer_global_opt(
215216
import pickle
216217
except ModuleNotFoundError:
217218
raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n")
218-
# nseq = [int(ele) for ele in cov_args[1:]]
219+
nseq = []
220+
for pop in fs.pop_ids:
221+
nseq.append(cov_args[1][pop])
219222
if cov_inbreeding == []:
220223
Fx = None
221224
else:
222225
Fx = cov_inbreeding
223226

224-
func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx)
227+
func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
225228

226229
p0_len = len(p0)
227230
if fixed_params == -1:

dadi_cli/Plot.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,15 @@ def plot_fitted_demography(
183183
from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov
184184
except ModuleNotFoundError:
185185
raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n")
186-
# nseq = [int(ele) for ele in cov_args[1:]]
186+
nseq = []
187+
for pop in fs.pop_ids:
188+
nseq.append(cov_args[1][pop])
187189
if cov_inbreeding == []:
188190
Fx = None
189191
else:
190192
Fx = cov_inbreeding
191193

192-
func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx)
194+
func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
193195

194196
model = func_ex(popt, ns, pts_l)
195197

@@ -304,13 +306,15 @@ def plot_fitted_dfe(
304306
from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov
305307
except ModuleNotFoundError:
306308
raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n")
307-
# nseq = [int(ele) for ele in cov_args[1:]]
309+
nseq = []
310+
for pop in fs.pop_ids:
311+
nseq.append(cov_args[1][pop])
308312
if cov_inbreeding == []:
309313
Fx = None
310314
else:
311315
Fx = cov_inbreeding
312316

313-
func = func_cov(func, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx)
317+
func = func_cov(func, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
314318
# Get expected SFS for MLE
315319
if (cache1d != None) and (cache2d != None):
316320
model = func(sele_popt, None, spectra1d, spectra2d, pdf, pdf2, theta, None)

dadi_cli/parsers/generate_fs_parsers.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,10 @@ def add_generate_fs_parsers(subparsers: argparse.ArgumentParser) -> None:
173173

174174
parser.add_argument(
175175
"--calc-coverage",
176-
nargs="+",
177176
default=False,
178-
type=list,
177+
action="store_true",
179178
dest="calc_coverage",
180-
help="Pass in the total number of sequenced haploids (nseq) for each population:\n"+
181-
"ex. --calc-coverage 22 12, if the total number a sequenced haploids was 22 for population 1 and 12 for population 2.\n" +
182-
"And store coverage information of sites and nseq in <output>.coverage.pickle object. Default: False.",
179+
help="Store coverage information of sites and total haploids sequenced per population in <output>.coverage.pickle object. Default: False.",
183180
)
184181
add_output_argument(parser)
185182
add_seed_argument(parser)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "dadi-cli"
7-
version = "0.9.10"
7+
version = "0.9.11"
88
requires-python = ">=3.9"
99
description = "A command line interface for dadi"
1010
readme = "README.md"
14 Bytes
Binary file not shown.

tests/test_GenerateFs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,13 +424,13 @@ def test_generate_fs_lowpass_dd(data):
424424
bootstrap=None,
425425
chunk_size=None,
426426
masking="",
427-
calc_coverage=[20, 20],
427+
calc_coverage=True,
428428
seed=None,
429429
)
430430
import pickle
431431
cov_dist, nseq = pickle.load(open("tests/test_results/cov-test.fs.coverage.pickle", "rb"))
432432

433-
assert nseq == [20, 20]
433+
assert nseq == {'pop1': 20, 'pop2': 20}
434434

435435
def test_generate_fs_lowpass_fs(data):
436436
generate_fs(
@@ -445,7 +445,7 @@ def test_generate_fs_lowpass_fs(data):
445445
bootstrap=None,
446446
chunk_size=None,
447447
masking="",
448-
calc_coverage=[20, 20],
448+
calc_coverage=True,
449449
seed=None,
450450
)
451451
dadi_cli_fs = dadi.Spectrum.from_file("tests/test_results/cov-test.fs")

tests/test_main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def gen_fs_args():
6060
gen_fs_args.pop_info = "tests/example_data/LowPass-files/cov_popfile_2D.txt"
6161
gen_fs_args.pop_ids = ["pop1", "pop2"]
6262
gen_fs_args.polarized = False
63-
gen_fs_args.calc_coverage = [20, 20]
63+
gen_fs_args.calc_coverage = True
6464
_run_generate_fs(gen_fs_args)
6565
os.remove(gen_fs_args.output)
6666
os.remove(gen_fs_args.output+".coverage.pickle")

0 commit comments

Comments
 (0)