diff --git a/docs/parameter_tables/PruDul/AlmondHist_1V16.csv b/docs/parameter_tables/PruDul/AlmondHist_1V16.csv new file mode 100644 index 000000000..26412cde8 --- /dev/null +++ b/docs/parameter_tables/PruDul/AlmondHist_1V16.csv @@ -0,0 +1,31 @@ +Population size,134679,Ancestral population size +Population size,134679,Pop. size during 1st time interval +Population size,117577,Pop. size during 2nd time interval +Population size,100475,Pop. size during 3rd time interval +Population size,84798,Pop. size during 4th time interval +Population size,70546,Pop. size during 5th time interval +Population size,58432,Pop. size during 6th time interval +Population size,49168,Pop. size during 7th time interval +Population size,43467,Pop. size during 8th time interval +Population size,44180,Pop. size during 9th time interval +Population size,49881,Pop. size during 10th time interval +Population size,66270,Pop. size during 11th time interval +Population size,111163,Pop. size during 12th time interval +Population size,220902,Pop. size during 13th time interval +Population size,216627,Pop. size during 14th time interval +Time (yrs.),59366,Begining of 1st time interval +Time (yrs.),49651,Begining of 2nd time interval +Time (yrs.),41325,Begining of 3rd time interval +Time (yrs.),34063,Begining of 4th time interval +Time (yrs.),27673,Begining of 5th time interval +Time (yrs.),21943,Begining of 6th time interval +Time (yrs.),16819,Begining of 7th time interval +Time (yrs.),14409,Begining of 8th time interval +Time (yrs.),12047,Begining of 9th time interval +Time (yrs.),9879,Begining of 10th time interval +Time (yrs.),7754,Begining of 11th time interval +Time (yrs.),5685,Begining of 12th time interval +Time (yrs.),3726,Begining of 13th time interval +Time (yrs.),1821,Begining of 14th time interval +Generation time (yrs.),10,Average generation interval +Mutation rate,1.00E-08,Per-base per-generation mutation rate diff --git a/stdpopsim/catalog/PruDul/__init__.py b/stdpopsim/catalog/PruDul/__init__.py new file mode 100644 index 000000000..d4a26c430 --- /dev/null +++ b/stdpopsim/catalog/PruDul/__init__.py @@ -0,0 +1,7 @@ +""" +Catalog definitions for PruDul (Ensembl ID='prunus_dulcis') +""" + +from . import species # noqa: F401 + +# from . import demographic_models diff --git a/stdpopsim/catalog/PruDul/demographic_models.py b/stdpopsim/catalog/PruDul/demographic_models.py new file mode 100644 index 000000000..c505eef51 --- /dev/null +++ b/stdpopsim/catalog/PruDul/demographic_models.py @@ -0,0 +1,92 @@ +import msprime +import stdpopsim +import numpy as np + +_species = stdpopsim.get_species("PruDul") + + +def _pop1_almond(): + # the size during the interval times[k] to times[k+1] = sizes[k] + times = np.array( + [ + 1821, + 3726, + 5685, + 7754, + 9879, + 12047, + 14409, + 16819, + 21943, + 27673, + 34063, + 41325, + 49651, + 59366, + ] + ) + sizes = np.array( + [ + 216627, + 220902, + 111163, + 66270, + 49881, + 44180, + 43467, + 49168, + 58432, + 70546, + 84798, + 100475, + 117577, + 134679, + ] + ) + + demographic_events = [] + for sz, t in zip(sizes, times): + demographic_events.append( + msprime.PopulationParametersChange(time=t, initial_size=sz, population_id=0) + ) + + populations = [ + stdpopsim.Population( + id="Prunus_dulcis", + description="Prunus dulcis population", + ) + ] + + return stdpopsim.DemographicModel( + id="AlmondHist_1V16", + description="Prunus_dulcis_population", + long_description=""" + This model comes from MSMC using five + individuals (PD03, PD04, PD05, PD06, and PD07) from Turkey, + Iran, Pakistan and Italy. + The model is estimated with 14 time periods. + depiction of the model can be found in Figure S8a of the paper. + Data points for population size over time were + obtained by digitizing line plot Figure S8a. + """, + populations=populations, + citations=[ + stdpopsim.Citation( + author="Velasco et al.", + year=2016, + doi="https://doi.org/10.1534/g3.116.032672", + reasons={stdpopsim.CiteReason.DEM_MODEL}, + ) + ], + generation_time=10, + mutation_rate=1e-8, + demographic_events=demographic_events, + population_configurations=[ + msprime.PopulationConfiguration( + initial_size=sizes[0], metadata=populations[0].asdict() + ) + ], + ) + + +_species.add_demographic_model(_pop1_almond()) diff --git a/stdpopsim/catalog/PruDul/genome_data.py b/stdpopsim/catalog/PruDul/genome_data.py new file mode 100644 index 000000000..0862858f9 --- /dev/null +++ b/stdpopsim/catalog/PruDul/genome_data.py @@ -0,0 +1,27 @@ +# # File generated from averaged phased haplotypes FASTAs. Do not edit. +# Genome assembly Castanera et al. 2024 https://doi.org/10.1093/hr/uhae106 +# https://zenodo.org/records/10829948 +# https://www.ebi.ac.uk/ena/browser/view/ERP158378 + +# Mitochondrial and chloroplast assembly +# D'Amico-Willman et al. 2022 +# https://doi.org/10.1093/g3journal/jkac065 + +data = { + "assembly_accession": "PRJEB73607", + "assembly_name": "Texas v.3.0", + "chromosomes": { + "1": {"length": 50452767, "synonyms": []}, + "2": {"length": 30972344, "synonyms": []}, + "3": {"length": 30498384, "synonyms": []}, + "4": {"length": 28068711, "synonyms": []}, + "5": {"length": 22241271, "synonyms": []}, + "6": {"length": 32371708, "synonyms": []}, + "7": {"length": 24859559, "synonyms": []}, + "8": {"length": 27108964, "synonyms": []}, + "MT": {"length": 444092, "synonyms": []}, + "Pt": {"length": 142856, "synonyms": []}, + }, + "assembly_source": "manual", + "assembly_build_version": "3", +} diff --git a/stdpopsim/catalog/PruDul/species.py b/stdpopsim/catalog/PruDul/species.py new file mode 100644 index 000000000..10c7e972e --- /dev/null +++ b/stdpopsim/catalog/PruDul/species.py @@ -0,0 +1,122 @@ +import stdpopsim + +from . import genome_data + +# Recombination rates +# From linkage map in Mas-Gomez et al. (2025) Table 1 +# (divided the genetic length per chromosome in cM with physical length) +_recombination_rate = { + "1": 1.97e-8, + "2": 1.51e-8, + "3": 1.89e-8, + "4": 2.22e-8, + "5": 2.54e-8, + "6": 1.98e-8, + "7": 2.61e-8, + "8": 2.12e-8, + "MT": 0, + "Pt": 0, +} + +# Generic and chromosome-specific ploidy +_species_ploidy = 2 +_ploidy = { + "1": _species_ploidy, + "2": _species_ploidy, + "3": _species_ploidy, + "4": _species_ploidy, + "5": _species_ploidy, + "6": _species_ploidy, + "7": _species_ploidy, + "8": _species_ploidy, + "MT": 1, + "Pt": 1, +} + + +# Evolutionary Genomics of Peach and Almond Domestication +_VelascoEtAl = stdpopsim.Citation( + doi="https://doi.org/10.1534/g3.116.032672", + year=2016, + author="Velasco et al.", + reasons={ + stdpopsim.CiteReason.GEN_TIME, # page 3987 + stdpopsim.CiteReason.POP_SIZE, # Figure S8, page 3987-8 + stdpopsim.CiteReason.MUT_RATE, # page 3987 + }, +) + +# A phased genome of the highly heterozygous ‘Texas’ almond +# uncovers patterns of allele-specific expression linked to +# heterozygous structural variants +_CastaneraEtAl = stdpopsim.Citation( + doi="https://doi.org/10.1093/hr/uhae106", + year=2024, + author="Castanera et al.", + reasons={stdpopsim.CiteReason.ASSEMBLY}, +) + +# Integration of linkage mapping, QTL analysis, RNA-Seq data, and +# Genome-Wide Association Studies (GWAS) to explore relative +# flowering traits in almond +_MasGomezEtAl = stdpopsim.Citation( + doi="https://doi.org/10.1016/j.hpj.2025.04.013", + year=2025, + author="Mas-Gomez et al.", + reasons={stdpopsim.CiteReason.REC_RATE}, # Table 1 +) + +# Whole-genome sequence and methylome profiling of the almond +# [Prunus dulcis (Mill.) D.A. Webb] cultivar ‘Nonpareil’ +_DAmicoWillmanEtal = stdpopsim.Citation( + doi="https://doi.org/10.1093/g3journal/jkac065", + year=2022, + author="D'Amico-Willman et al.", + reasons={stdpopsim.CiteReason.ASSEMBLY}, # mitochondrial and chloroplast +) + +_overall_rate = 1e-8 # per generation, Velasco et al. (2016), page 3987 +_mutation_rate = { + "1": _overall_rate, + "2": _overall_rate, + "3": _overall_rate, + "4": _overall_rate, + "5": _overall_rate, + "6": _overall_rate, + "7": _overall_rate, + "8": _overall_rate, + "MT": _overall_rate, + "Pt": _overall_rate, +} + + +_genome = stdpopsim.Genome.from_data( + genome_data.data, + recombination_rate=_recombination_rate, + mutation_rate=_mutation_rate, + ploidy=_ploidy, + citations=[ + _VelascoEtAl, # MUT_RATE + _MasGomezEtAl, # REC_RATE + _CastaneraEtAl, # ASSEMBLY + _DAmicoWillmanEtal, # ASSEMBLY + ], +) +stdpopsim.utils.append_common_synonyms(_genome) + +_species = stdpopsim.Species( + id="PruDul", + ensembl_id="prunus_dulcis", + name="Prunus dulcis", + common_name="Prunus dulcis", + genome=_genome, + generation_time=10, # Velasco et al. (2016), page 3987 + ploidy=_species_ploidy, + population_size=216627, # Velasco et al. (2016), Figure S8, page 3987-8 + citations=[ + _VelascoEtAl, # GEN_TIME, POP_SIZE + ], + separate_sexes=False, +) + +stdpopsim.register_species(_species) diff --git a/tests/test_PruDul.py b/tests/test_PruDul.py new file mode 100644 index 000000000..52a8b46d9 --- /dev/null +++ b/tests/test_PruDul.py @@ -0,0 +1,104 @@ +import pytest + +import stdpopsim +from tests import test_species + + +class TestSpeciesData(test_species.SpeciesTestBase): + + species = stdpopsim.get_species("PruDul") + + def test_ensembl_id(self): + assert self.species.ensembl_id == "prunus_dulcis" + + def test_name(self): + assert self.species.name == "Prunus dulcis" + + def test_common_name(self): + assert self.species.common_name == "Prunus dulcis" + + def test_assembly_source(self): + assert self.species.genome.assembly_source == "manual" + + def test_assembly_build_version(self): + assert self.species.genome.assembly_build_version == "3" + + # Effective population size (Velasco et al., 2016: Figure S8, page 3987-8) + # https://doi.org/10.1534/g3.116.032672 + def test_qc_population_size(self): + assert self.species.population_size == 216627 + + # Generation time/interal (Velasco et al., 2016: Figure S8, page 3987) + # https://doi.org/10.1534/g3.116.032672 + def test_qc_generation_time(self): + assert self.species.generation_time == 10 + + +class TestGenomeData(test_species.GenomeTestBase): + + genome = stdpopsim.get_species("PruDul").genome + + # Recombination rates from linkage map in Mas-Gomez et al. (2025) Table 1 + # and physical lengths from genome assembly + # in Castanera et al. (2024) - file genome_data.py + # https://doi.org/10.1016/j.hpj.2025.04.013 + @pytest.mark.parametrize( + ["name", "rate"], + { + # Morgans / n_base_pairs + "1": 1.97e-8, # 99.41 / 50452767 = 0.00000197 + "2": 1.51e-8, # 46.88 / 30972344 = 0.00000151 + "3": 1.89e-8, # 57.52 / 30498384 = 0.00000189 + "4": 2.22e-8, # 62.41 / 28068711 = 0.00000222 + "5": 2.54e-8, # 56.54 / 22241271 = 0.00000254 + "6": 1.98e-8, # 64.19 / 32371708 = 0.00000198 + "7": 2.61e-8, # 64.98 / 24859559 = 0.00000261 + "8": 2.12e-8, # 57.36 / 27108964 = 0.00000212 + "MT": 0, # no recombination + "Pt": 0, # no recombination + }.items(), + ) + def test_recombination_rate(self, name, rate): + assert rate == pytest.approx( + self.genome.get_chromosome(name).recombination_rate + ) + + # Mutation rate per site per generation + # Velasco et al. (2016), page 3987 + # https://doi.org/10.1534/g3.116.032672 + @pytest.mark.parametrize( + ["name", "rate"], + { + "1": 1e-8, + "2": 1e-8, + "3": 1e-8, + "4": 1e-8, + "5": 1e-8, + "6": 1e-8, + "7": 1e-8, + "8": 1e-8, + "MT": 1e-8, + "Pt": 1e-8, + }.items(), + ) + def test_mutation_rate(self, name, rate): + assert rate == pytest.approx(self.genome.get_chromosome(name).mutation_rate) + + # Ploidy - almond is diploid + @pytest.mark.parametrize( + ["name", "ploidy"], + { + "1": 2, + "2": 2, + "3": 2, + "4": 2, + "5": 2, + "6": 2, + "7": 2, + "8": 2, + "MT": 1, + "Pt": 1, + }.items(), + ) + def test_chromosome_ploidy(self, name, ploidy): + assert ploidy == self.genome.get_chromosome(name).ploidy