Skip to content
4 changes: 2 additions & 2 deletions stdpopsim/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,9 +829,9 @@ def run_simulation(args):
intervals_summary_str = f"[{left}, {right})"

dfe = species.get_dfe(args.dfe)
contig.add_dfe(
contig.add_dme(
intervals=intervals,
DFE=dfe,
DME=dfe,
)
logger.info(
f"Applying selection under the DFE model {dfe.id} "
Expand Down
163 changes: 110 additions & 53 deletions stdpopsim/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,13 @@ class Contig:
"""
Class representing a contiguous region of genome that is to be simulated.
This contains the information about mutation rates, distributions of
fitness effects (DFEs), gene conversion rates and recombination rates
mutational effects (DMEs), gene conversion rates and recombination rates
that are needed to simulate this region.

Information about targets of selection are contained in the ``dfe_list``
and ``interval_list``. These must be of the same length, and the k-th DFE
applies to the k-th interval; see :meth:`.add_dfe` for more information.
Information about variants that affect traits
or fitness are contained in the ``dme_list``
and ``interval_list``. These must be of the same length, and the k-th DME
applies to the k-th interval; see :meth:`.add_dme` for more information.

One attribute of the Contig is ``species``, which is necessary to pass
on aspects of life history important for simulation. However, generic
Expand Down Expand Up @@ -299,12 +300,13 @@ class Contig:
:ivar exclude: If True, ``mask_intervals`` specify regions to exclude. If False,
``mask_intervals`` specify regions in keep.
:vartype exclude: bool
:ivar dfe_list: A list of :class:`.DFE` objects.
By default, the only DFE is completely neutral.
:vartype dfe_list: list
:ivar interval_list: A list of :class:`np.array` objects containing integers.
By default, the inital interval list spans the whole chromosome with the
neutral DFE.
These specify where each DME provided in dme_list is applied.
By default, the inital interval list spans the whole chromosome with
DME where variants have no effects on traits or fitness.
Comment on lines +305 to +306
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
By default, the inital interval list spans the whole chromosome with
DME where variants have no effects on traits or fitness.
By default, the initial interval list spans the whole chromosome with
a DME whose variants have no effects on traits or fitness.

:ivar dme_list: A list of :class:`.DME` objects.
By default, the only DME is completely neutral and affects no traits.
:vartype dme_list: list
:ivar coordinates: The location of the contig on a named chromosome,
as a tuple of the form `(chromosome, left, right)`. If `None`, the contig
is assumed to be generic (i.e. it does not inherit a coordinate system
Expand Down Expand Up @@ -339,18 +341,29 @@ class Contig:
genetic_map = attr.ib(default=None)
inclusion_mask = attr.ib(default=None)
exclusion_mask = attr.ib(default=None)
dfe_list = attr.ib(factory=list)
dme_list = attr.ib(factory=list)
_dfe_list = attr.ib(factory=list, alias="dfe_list")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the thinking here? How will this work? (I don't know quite what alias does.)

interval_list = attr.ib(factory=list)
coordinates = attr.ib(default=None, type=tuple)
species = attr.ib(default=None, kw_only=True)

def __attrs_post_init__(self):
if self._dfe_list and self.dme_list:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We like to make things explicit rather than relying on truthiness of things (eg is an empty list True or False? I'd have to check), so how about

Suggested change
if self._dfe_list and self.dme_list:
if self._dfe_list is not None and self.dme_list is not None:

raise ValueError("Cannot specify both dme_list and dfe_list.")
if len(self._dfe_list) > 0:
warnings.warn(
stdpopsim.DeprecatedFeatureWarning(
"Use of dfe_list is deprecated. Please use dme_list."
)
)
self.dme_list = self._dfe_list

if self.coordinates is None:
self.coordinates = (None, 0, int(self.length))
_, left, right = self.coordinates
self.add_dfe(
self.add_dme(
np.array([[left, right]]),
stdpopsim.dfe.neutral_dfe(),
stdpopsim.traits.neutral_dfe(),
)
if self.species is None:
self.species = stdpopsim.species.Species(
Expand Down Expand Up @@ -681,29 +694,37 @@ def origin(self):
return f"{chromosome}:{left}-{right}"

def dfe_breakpoints(self, *, relative_coordinates=None):
warnings.warn(
stdpopsim.DeprecatedFeatureWarning(
"dfe_breakpoints is deprecated. Please use dme_breakpoints."
)
)
return self.dme_breakpoints(relative_coordinates=relative_coordinates)

def dme_breakpoints(self, *, relative_coordinates=None):
"""
Returns two things: the sorted vector of endpoints of all intervals across
all DFEs in the contig, and a vector of integer labels for these intervals,
saying which DFE goes with which interval.
all DMEs in the contig, and a vector of integer labels for these intervals,
saying which DME goes with which interval.
This provides a complementary method to tell which bit of the contig
has which DFE attached, which may be more convenient than the list of two-column
has which DME attached, which may be more convenient than the list of two-column
arrays provided by interval_list.

Suppose there are n+1 unique interval endpoints across all the DFE intervals
Suppose there are n+1 unique interval endpoints across all the DME intervals
in :attr:`.interval_list`. (If the intervals in that list
cover the whole genome, the number of intervals is n.)
This method returns a tuple of two things: ``breaks, dfe_labels``.
This method returns a tuple of two things: ``breaks, dme_labels``.
"breaks" is the array containing those n+1 unique endpoints, in increasing order,
and "dfe" is the array of length n containing the index of the DFE
and "dme" is the array of length n containing the index of the DME
the applies to that interval. So, ``breaks[0]`` is always 0, and
``breaks[n+1]`` is always the length of the contig, and
``dfe_labels[k] = j`` if
``dme_labels[k] = j`` if
``[breaks[k], breaks[k+1]]`` is an interval in ``contig.interval_list[j]``,
i.e., if ``contig.dfe_list[j]`` applies to the interval starting at
``breaks[k]``. Some intervals may not be covered by a DFE, in which
i.e., if ``contig.dme_list[j]`` applies to the interval starting at
``breaks[k]``. Some intervals may not be covered by a DME, in which
case they will have the label ``-1`` (beware of python indexing!).

:return: A tuple (breaks, dfe_labels).
:return: A tuple (breaks, dme_labels).
"""
if relative_coordinates is not None:
warnings.warn(
Expand All @@ -717,27 +738,63 @@ def dfe_breakpoints(self, *, relative_coordinates=None):
breaks = np.unique(
np.vstack(self.interval_list + [[[0, int(self.length)]]]) # also sorted
)
dfe_labels = np.full(len(breaks) - 1, -1, dtype="int")
dme_labels = np.full(len(breaks) - 1, -1, dtype="int")
for j, intervals in enumerate(self.interval_list):
dfe_labels[np.isin(breaks[:-1], intervals[:, 0], assume_unique=True)] = j
return breaks, dfe_labels
dme_labels[np.isin(breaks[:-1], intervals[:, 0], assume_unique=True)] = j
return breaks, dme_labels

def clear_dfes(self):
warnings.warn(
stdpopsim.DeprecatedFeatureWarning(
"The original_coordinates property is deprecated, "
"since contigs are no longer shifted to be relative to their "
"left endpoint: use Contig.coordinates instead."
)
)
self.clear_dmes()

def clear_dmes(self):
"""
Removes all DFEs from the contig (as well as the corresponding list of
Removes all DMEs from the contig (as well as the corresponding list of
intervals).
"""
self.dfe_list = []
self.dme_list = []
self.interval_list = []

@property
def dfe_list(self):
warnings.warn(
stdpopsim.DeprecatedFeatureWarning(
"contig.dfe_list is deprecated. Please use contig.dme_list"
)
)
return self.dme_list

@dfe_list.setter
def dfe_list(self, value):
warnings.warn(
stdpopsim.DeprecatedFeatureWarning(
"contig.dfe_list is deprecated. Please use contig.dme_list"
)
)
self.dme_list = value

def add_dfe(self, intervals, DFE):
warnings.warn(
stdpopsim.DeprecatedFeatureWarning(
"add_dfe is deprecated. Please use add_dme."
)
)
self.add_dme(intervals, DFE)

def add_dme(self, intervals, DME):
"""
Adds the provided DFE to the contig, applying to the regions of the
Adds the provided DME to the contig, applying to the regions of the
contig specified in ``intervals``. These intervals are also *removed*
from the intervals of any previously-present DFEs - in other words,
more recently-added DFEs take precedence. Each DFE added in this way
from the intervals of any previously-present DMEs - in other words,
more recently-added DMEs take precedence. Each DME added in this way
carries its own MutationTypes: no mutation types are shared between
DFEs added by different calls to ``add_dfe( )``, even if the same DFE
DMEs added by different calls to ``add_dme( )``, even if the same DME
object is added more than once.

For instance, if we do
Expand All @@ -746,26 +803,26 @@ def add_dfe(self, intervals, DFE):

a1 = np.array([[0, 100]])
a2 = np.array([[50, 120]])
contig.add_dfe(a1, dfe1)
contig.add_dfe(a2, dfe2)
contig.add_dme(a1, dme1)
contig.add_dme(a2, dme2)

then ``dfe1`` applies to the region from 0 to 50 (including 0 but not
50) and ``dfe2`` applies to the region from 50 to 120 (including 50 but
then ``dme1`` applies to the region from 0 to 50 (including 0 but not
50) and ``dme2`` applies to the region from 50 to 120 (including 50 but
not 120).

Any of the ``intervals`` that fall outside of the contig will be
clipped to the contig boundaries. If no ``intervals`` overlap the
contig, an "empty" DFE will be added with a warning.
contig, an "empty" DME will be added with a warning.

:param array intervals: A valid set of intervals.
:param DFE dfe: A DFE object.
:param DME dme: A DME object.
"""
_, left, right = self.coordinates
intervals = stdpopsim.utils.clip_intervals(intervals, left, right)
stdpopsim.utils._check_intervals_validity(intervals, start=left, end=right)
for j, ints in enumerate(self.interval_list):
self.interval_list[j] = stdpopsim.utils.mask_intervals(ints, intervals)
self.dfe_list.append(DFE)
self.dme_list.append(DME)
self.interval_list.append(intervals)

def add_single_site(
Expand Down Expand Up @@ -828,53 +885,53 @@ def add_single_site(
description=description,
long_description=long_description,
)
self.add_dfe(
self.add_dme(
intervals=np.array([[coordinate, coordinate + 1]], dtype="int"),
DFE=dfe,
DME=dfe,
)

@property
def is_neutral(self):
"""
Returns True if the contig has no non-neutral mutation types.
"""
return all(mt.is_neutral for d in self.dfe_list for mt in d.mutation_types)
return all(mt.is_neutral for d in self.dme_list for mt in d.mutation_types)

def mutation_types(self):
"""
Provides information about the MutationTypes assigned to this Contig,
along with information about which DFE they correspond to. This is
along with information about which DME they correspond to. This is
useful because when simulating with SLiM, the mutation types are
assigned numeric IDs in the order provided here (e.g., in the order
encountered when iterating over the DFEs); this method provides an easy
way to map back from their numeric ID to the DFE that each MutationType
encountered when iterating over the DMEs); this method provides an easy
way to map back from their numeric ID to the DME that each MutationType
corresponds to.

This method returns a list of dictionaries of length equal to the
number of MutationTypes in all DFEs of the contig, each dictionary
containing three things: ``"dfe_id"``: the ID of the DFE this
number of MutationTypes in all DMEs of the contig, each dictionary
containing three things: ``"dme_id"``: the ID of the DME this
MutationType comes from; ``mutation_type``: the mutation type; and
``id``: the index in the list.

For instance, if ``muts`` is a list of mutation objects in a SLiM tree
sequence, then the following code will print the IDs of the DFEs that
sequence, then the following code will print the IDs of the DMEs that
each comes from:

.. code-block:: python

mut_types = contig.mutation_types()
for m in muts:
dfe_ids = [mut_types[k]["dfe_id"] for md in m.metadata["muation_list"]]
print(f"Mutation {m.id} has mutations from DFE(s) {','.join(dfe_ids)}")
dme_ids = [mut_types[k]["dme_id"] for md in m.metadata["muation_list"]]
print(f"Mutation {m.id} has mutations from DME(s) {','.join(dme_ids)}")

"""
id = 0
mut_types = []
for d in self.dfe_list:
for d in self.dme_list:
for mt in d.mutation_types:
mut_types.append(
{
"dfe_id": d.id,
"dme_id": d.id,
"mutation_type": mt,
"id": id,
}
Expand All @@ -888,7 +945,7 @@ def __str__(self):
"Contig(length={:.2G}, mutation_rate={:.2G}, "
"recombination_rate={:.2G}, bacterial_recombination={}, "
"gene_conversion_fraction={}, gene_conversion_length={}, "
"genetic_map={}, origin={}, dfe_list={}, "
"genetic_map={}, origin={}, dme_list={}, "
"interval_list={}, species={})"
).format(
self.length,
Expand All @@ -899,7 +956,7 @@ def __str__(self):
self.gene_conversion_length,
gmap,
self.origin,
self.dfe_list,
self.dme_list,
self.interval_list,
self.species.id,
)
Expand Down
20 changes: 10 additions & 10 deletions stdpopsim/slim_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ def get_slim_mutation_rate_map(contig):
"""
Returns a tuple with the breakpoints and rates of mutations for SLiM.
"""
breaks, dfe_labels = contig.dfe_breakpoints() # beware -1 labels
breaks, dfe_labels = contig.dme_breakpoints() # beware -1 labels
slim_fractions = np.array(
[
sum(
Expand All @@ -771,7 +771,7 @@ def get_slim_mutation_rate_map(contig):
if (not mt.is_neutral)
]
)
for d in contig.dfe_list
for d in contig.dme_list
]
+ [0]
) # append 0 for the -1 labels
Expand Down Expand Up @@ -805,16 +805,16 @@ def _enum_dfe_and_intervals(contig):
function we can ensure these two steps assigns numeric ids to the DFEs in the
same way.
"""
assert len(contig.dfe_list) == len(contig.interval_list)
for i, (d, ints) in enumerate(zip(contig.dfe_list, contig.interval_list)):
assert len(contig.dme_list) == len(contig.interval_list)
for i, (d, ints) in enumerate(zip(contig.dme_list, contig.interval_list)):
yield i, d, ints


def _dfe_to_mtypes(contig):
"""
Assigns mutation type ids to each of the mutation types contained in this
`Contig`. This function will return a dictionary with `len(contig.dfe_list)`
elements, in which the position of the DFE in `contig.dfe_list` is the key.
`Contig`. This function will return a dictionary with `len(contig.dme_list)`
elements, in which the position of the DFE in `contig.dme_list` is the key.
For each DFE, the dictionary holds a list of tuples that contain the
assigned mutation type ids (used in SLiM) and the `MutationType` object.
This is necessary so that we use the same numeric ids to the mutation types
Expand Down Expand Up @@ -863,7 +863,7 @@ def _get_json(obj):
schema = tables.metadata_schema.asdict()
metadata = tables.metadata
schema["properties"].update(_raw_stdpopsim_top_level_schema)
dfes = _get_json(contig.dfe_list)
dfes = _get_json(contig.dme_list)
dfe_to_mtypes = _dfe_to_mtypes(contig)
for i, d, ints in _enum_dfe_and_intervals(contig):
dfes[i]["intervals"] = ints.tolist()
Expand Down Expand Up @@ -1048,7 +1048,7 @@ def fix_time(event):
coordinate = None
if hasattr(ee, "single_site_id"):
dfe_index = [
i for i, x in enumerate(contig.dfe_list) if x.id == ee.single_site_id
i for i, x in enumerate(contig.dme_list) if x.id == ee.single_site_id
]
if len(dfe_index) != 1:
raise ValueError(
Expand Down Expand Up @@ -1640,7 +1640,7 @@ def simulate(
raise ValueError("slim_scaling_factor must be positive")
if slim_burn_in < 0:
raise ValueError("slim_burn_in must be non-negative")
if len(contig.dfe_list) == 0:
if len(contig.dme_list) == 0:
raise ValueError("SLiM requires at least one DFE.")

if slim_scaling_factor != 1:
Expand Down Expand Up @@ -1923,7 +1923,7 @@ def _recap_and_rescale(
ts = self._simplify_remembered(ts)

# Adding neutral mutations to simulation and recapitation periods
breaks, dfe_labels = contig.dfe_breakpoints() # beware -1 labels
breaks, dfe_labels = contig.dme_breakpoints() # beware -1 labels
for i, dfe in enumerate(ts.metadata["stdpopsim"]["DFEs"]):
assert len(dfe["proportions"]) == len(dfe["mutation_types"])
for prop, mt in zip(dfe["proportions"], dfe["mutation_types"]):
Expand Down
Loading
Loading