Albrja/mic-5664/update-rate-to-probability (#620)

albrja · web-flow · commit b3d90d51a39a · 2025-05-19T09:41:30.000-07:00
Albrja/mic-5664/update-rate-to-probability Update utility functions for rate to probability and probability to rate conversions - *Category*: Feature - *JIRA issue*: https://jira.ihme.washington.edu/browse/MIC-5664 Changes and notes -updates for rate and probability conversions -Discussion and requested change can be found here: ihmeuw/vivarium_research#1391 ### Testing
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,3 +1,7 @@
+**3.4.0 - 05/19/25**
+
+  - Feature: Update utility functions for rate and probability conversions
+
 **3.3.22 - 05/05/25**
 
   - Add py.typed marker
diff --git a/docs/source/tutorials/exploration.rst b/docs/source/tutorials/exploration.rst
@@ -112,6 +112,8 @@ configuration by simply printing it.
             component_configs: 0
         additional_seed:
             component_configs: None
+        rate_conversion_type:
+            component_configs: linear
     time:
         start:
             year:
@@ -202,10 +204,12 @@ just those subsets if we like.
        component_configs: 0
    additional_seed:
        component_configs: None
+   rate_conversion_type:
+       component_configs: linear
 
 This subset of configuration data contains more keys.  All of the keys in
-our example here (key_columns, map_size, random_seed, and additional_seed)
-point directly to values. We can access these values from the simulation
+our example here (key_columns, map_size, random_seed, additional_seed,
+and rate_conversion_type) point directly to values. We can access these values from the simulation
 as well.
 
 .. testcode::
@@ -214,6 +218,7 @@ as well.
    print(sim.configuration.randomness.map_size)
    print(sim.configuration.randomness.random_seed)
    print(sim.configuration.randomness.additional_seed)
+   print(sim.configuration.randomness.rate_conversion_type)
 
 
 .. testoutput::
@@ -222,6 +227,7 @@ as well.
    1000000
    0
    None
+   linear
 
 However, we can no longer modify the configuration since the simulation
 has already been setup.
@@ -252,6 +258,8 @@ should be one more layer of keys.
        component_configs: 0
    additional_seed:
        component_configs: None
+    rate_conversion_type:
+       component_configs: linear
 
 This last layer reflects a priority level in the way simulation configuration
 is managed. The ``component_configs`` under ``map_size``, ``random_seed``, and
diff --git a/src/vivarium/framework/randomness/manager.py b/src/vivarium/framework/randomness/manager.py
@@ -7,7 +7,7 @@
 from __future__ import annotations
 
 from collections.abc import Callable
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
 
 import pandas as pd
 
@@ -33,6 +33,7 @@ class RandomnessManager(Manager):
             "key_columns": [],
             "random_seed": 0,
             "additional_seed": None,
+            "rate_conversion_type": "linear",
         }
     }
 
@@ -42,6 +43,7 @@ def __init__(self) -> None:
         self._key_columns: list[str] = []
         self._key_mapping_: IndexMap | None = None
         self._decision_points: dict[str, RandomnessStream] = dict()
+        self._rate_conversion_type: Literal["linear", "exponential"] = "linear"
 
     @property
     def name(self) -> str:
@@ -74,7 +76,7 @@ def setup(self, builder: Builder) -> None:
         pop_size = builder.configuration.population.population_size
         map_size = max(map_size, 10 * pop_size)
         self._key_mapping_ = IndexMap(self._key_columns, map_size)
-
+        self._rate_conversion_type = builder.configuration.randomness.rate_conversion_type
         self.resources = builder.resources
         self._add_constraint = builder.lifecycle.add_constraint
         self._add_constraint(self.get_seed, restrict_during=["initialization"])
@@ -95,6 +97,7 @@ def get_randomness_stream(
         decision_point: str,
         component: Component | None,
         initializes_crn_attributes: bool = False,
+        rate_conversion_type: Literal["linear", "exponential"] = "linear",
     ) -> RandomnessStream:
         """Provides a new source of random numbers for the given decision point.
 
@@ -112,6 +115,10 @@ def get_randomness_stream(
             in the Common Random Number framework. These streams cannot be
             copied and should only be used to generate the state table columns
             specified in ``builder.configuration.randomness.key_columns``.
+        rate_conversion_type
+            The type of conversion to use. Default is "linear" for a simple
+            multiplication of rate and time_scaling_factor. The other option is
+            "exponential".
 
         Returns
         -------
@@ -126,7 +133,7 @@ def get_randomness_stream(
             with the same identifier.
         """
         stream = self._get_randomness_stream(
-            decision_point, component, initializes_crn_attributes
+            decision_point, component, initializes_crn_attributes, rate_conversion_type
         )
         if not initializes_crn_attributes:
             # We need the key columns to be created before this stream can be called.
@@ -152,6 +159,7 @@ def _get_randomness_stream(
         decision_point: str,
         component: Component | None,
         initializes_crn_attributes: bool = False,
+        rate_conversion_type: Literal["linear", "exponential"] = "linear",
     ) -> RandomnessStream:
         if decision_point in self._decision_points:
             raise RandomnessError(
@@ -165,6 +173,7 @@ def _get_randomness_stream(
             index_map=self._key_mapping,
             component=component,
             initializes_crn_attributes=initializes_crn_attributes,
+            rate_conversion_type=rate_conversion_type,
         )
         self._decision_points[decision_point] = stream
         return stream
diff --git a/src/vivarium/framework/randomness/stream.py b/src/vivarium/framework/randomness/stream.py
@@ -28,7 +28,7 @@
 
 import hashlib
 from collections.abc import Callable
-from typing import TYPE_CHECKING, Any, Protocol, TypeVar
+from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar
 
 import numpy as np
 import numpy.typing as npt
@@ -102,6 +102,7 @@ def __init__(
         # TODO [MIC-5452]: all resources should have a component
         component: Component | None = None,
         initializes_crn_attributes: bool = False,
+        rate_conversion_type: Literal["linear", "exponential"] = "linear",
     ):
         super().__init__("stream", key, component)
         self.key = key
@@ -114,6 +115,15 @@ def __init__(
         """A key-index mapping with a vectorized hash and vectorized lookups."""
         self.initializes_crn_attributes = initializes_crn_attributes
         """A boolean indicating whether the stream is used to initialize CRN attributes."""
+        self.rate_conversion_type = rate_conversion_type
+        """The type of rate conversion to use when converting rates to probabilities.
+        Allowable types are 'linear' or 'exponential'.
+        """
+        if self.rate_conversion_type not in ["linear", "exponential"]:
+            raise ValueError(
+                f"Rate conversion type {self.rate_conversion_type} is not implemented. "
+                "Allowable types are 'linear' or 'exponential'."
+            )
 
     def _key(self, additional_key: Any = None) -> str:
         """Construct a hashable key from this object's state.
@@ -224,7 +234,9 @@ def filter_for_rate(
             The return type will be the same as type(population).
         """
         return self.filter_for_probability(
-            population, rate_to_probability(rate), additional_key
+            population,
+            rate_to_probability(rate, rate_conversion_type=self.rate_conversion_type),
+            additional_key,
         )
 
     def filter_for_probability(
diff --git a/src/vivarium/framework/utilities.py b/src/vivarium/framework/utilities.py
@@ -11,9 +11,10 @@
 from bdb import BdbQuit
 from collections.abc import Callable, Sequence
 from importlib import import_module
-from typing import Any, TypeVar
+from typing import Any, Literal, TypeVar
 
 import numpy as np
+from loguru import logger
 
 from vivarium.types import NumberLike, NumericArray, Timedelta
 
@@ -28,20 +29,91 @@ def to_yearly(value: TimeValue, time_step: Timedelta) -> TimeValue:
     return value / (time_step.total_seconds() / (60 * 60 * 24 * 365.0))
 
 
-def rate_to_probability(rate: Sequence[float] | NumberLike) -> NumericArray:
-    # encountered underflow from rate > 30k
-    # for rates greater than 250, exp(-rate) evaluates to 1e-109
-    # beware machine-specific floating point issues
+def rate_to_probability(
+    rate: Sequence[float] | NumberLike,
+    time_scaling_factor: float | int = 1.0,
+    rate_conversion_type: Literal["linear", "exponential"] = "linear",
+) -> NumericArray:
+    """Converts a rate to a probability.
+
+    Parameters
+    ----------
+    rate
+        The rate to convert to a probability.
+    time_scaling_factor
+        The time factor in to scale the rates by. This is usually the time step.
+    rate_conversion_type
+        The type of conversion to use. Default is "linear" for a simple multiplcation
+        of rate and time_scaling_factor. The other option is "exponential" which should be
+        used for continuous time event driven models.
+
+    Returns
+    -------
+        An array of floats representing the probability of the converted rates
+    """
+    if rate_conversion_type not in ["linear", "exponential"]:
+        raise ValueError(
+            f"Rate conversion type {rate_conversion_type} is not implemented. "
+            "Allowable types are 'linear' or 'exponential'."
+        )
+    if rate_conversion_type == "linear":
+        # NOTE: The default behavior for randomness streams is to use a rate that is already
+        # scaled to the time step which is why the default time scaling factor is 1.0.
+        probability = np.array(rate * time_scaling_factor)
+
+        # Clip to 1.0 if the probability is greater than 1.0.
+        exceeds_one = probability > 1.0
+        if exceeds_one.any():
+            probability[exceeds_one] = 1.0
+            logger.warning(
+                "The rate to probability conversion resulted in a probability greater than 1.0. "
+                "The probability has been clipped to 1.0 and indicates the rate is too high. "
+            )
+    else:
+        # encountered underflow from rate > 30k
+        # for rates greater than 250, exp(-rate) evaluates to 1e-109
+        # beware machine-specific floating point issues
+        rate = np.array(rate)
+        rate[rate > 250] = 250.0
+        probability: NumericArray = 1 - np.exp(-rate * time_scaling_factor)
 
-    rate = np.array(rate)
-    rate[rate > 250] = 250.0
-    probability: NumericArray = 1 - np.exp(-rate)
     return probability
 
 
-def probability_to_rate(probability: Sequence[float] | NumberLike) -> NumericArray:
-    probability = np.array(probability)
-    rate: NumericArray = -np.log(1 - probability)
+def probability_to_rate(
+    probability: Sequence[float] | NumberLike,
+    time_scaling_factor: float | int = 1.0,
+    rate_conversion_type: Literal["linear", "exponential"] = "linear",
+) -> NumericArray:
+    """Function to convert a probability to a rate.
+
+    Parameters
+    ----------
+    probability
+        The probability to convert to a rate.
+    time_scaling_factor
+        The time factor in to scale the probability by. This is usually the time step.
+    rate_conversion_type
+        The type of conversion to use. Default is "linear" for a simple multiplcation
+        of rate and time_scaling_factor. The other option is "exponential" which should be
+        used for continuous time event driven models.
+
+    Returns
+    -------
+        An array of floats representing the rate of the converted probabilities
+    """
+    # NOTE: The default behavior for randomness streams is to use a rate that is already
+    # scaled to the time step which is why the default time scaling factor is 1.0.
+    if rate_conversion_type not in ["linear", "exponential"]:
+        raise ValueError(
+            f"Rate conversion type {rate_conversion_type} is not implemented. "
+            "Allowable types are 'linear' or 'exponential'."
+        )
+    if rate_conversion_type == "linear":
+        rate = np.array(probability / time_scaling_factor)
+    else:
+        probability = np.array(probability)
+        rate: NumericArray = -np.log(1 - probability)
     return rate
 
 
diff --git a/tests/framework/randomness/test_manager.py b/tests/framework/randomness/test_manager.py
@@ -23,6 +23,7 @@ def test_randomness_manager_get_randomness_stream() -> None:
     rm._clock_ = mock_clock
     rm._key_columns = ["age", "sex"]
     rm._key_mapping_ = IndexMap(["age", "sex"])
+    rm._rate_conversion_type = "linear"
     stream = rm._get_randomness_stream("test", component)
 
     assert stream.key == "test"
diff --git a/tests/framework/randomness/test_stream.py b/tests/framework/randomness/test_stream.py
diff --git a/tests/framework/test_utilities.py b/tests/framework/test_utilities.py