Skip to content

Commit 28e9974

Browse files
Raise if bin index is out of range (#540)
1 parent 6cdc00c commit 28e9974

File tree

3 files changed

+63
-12
lines changed

3 files changed

+63
-12
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
**4.3.7 - 09/10/25**
2+
3+
- Bugfix: raise an error if simulation start year is earlier than population data.
4+
15
**4.3.6 - 08/28/25**
26

37
- Feature: ScaledPopulation can handle multiple year data inputs.

src/vivarium_public_health/population/base_population.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def get_demographic_proportions_for_creation_time(
166166
demographic_proportions, year: int
167167
) -> pd.DataFrame:
168168
reference_years = sorted(set(demographic_proportions.year_start))
169-
ref_year_index = np.digitize(year, reference_years).item() - 1
169+
ref_year_index = _find_bin_start_index(year, reference_years)
170170
return demographic_proportions[
171171
demographic_proportions.year_start == reference_years[ref_year_index]
172172
]
@@ -257,22 +257,22 @@ def _format_data_inputs(
257257
if "year_start" not in scaling_factor.index.names:
258258
return population_structure, scaling_factor
259259

260-
# Subset to start year of simulation or closest year
260+
# Subset the population structure and scaling factors to the simulation
261+
# start year. If the data does not contain the exact simulation start
262+
# year, subset to the closest year less than the simulation start year.
261263
pop_reference_years = sorted(
262264
set(population_structure.index.get_level_values("year_start"))
263265
)
264-
pop_year_index = np.digitize(year, pop_reference_years).item() - 1
265-
scale_reference_years = sorted(
266-
set(scaling_factor.index.get_level_values("year_start"))
267-
)
268-
scale_year_index = np.digitize(year, scale_reference_years).item() - 1
269-
# Subset to start year of simulation or earliest year. E.g. if start year = 2021 and pop
270-
# structure has 2021, we will subset to 2021. If pop structure minimum year is 2025, we
271-
# will subset to 2025.
266+
pop_year_index = _find_bin_start_index(year, pop_reference_years)
272267
population_structure = population_structure.loc[
273268
population_structure.index.get_level_values("year_start")
274269
== pop_reference_years[pop_year_index]
275270
]
271+
272+
scale_reference_years = sorted(
273+
set(scaling_factor.index.get_level_values("year_start"))
274+
)
275+
scale_year_index = _find_bin_start_index(year, scale_reference_years)
276276
scaling_factor = scaling_factor.loc[
277277
scaling_factor.index.get_level_values("year_start")
278278
== scale_reference_years[scale_year_index]
@@ -527,3 +527,31 @@ def _assign_demography_with_age_bounds(
527527
)
528528
register_simulants(simulants[list(key_columns)])
529529
return simulants
530+
531+
532+
def _find_bin_start_index(value: int, sorted_reference_values: list[int]) -> int:
533+
"""Finds the index of the closest reference value less than or equal to the provided value.
534+
535+
Parameters
536+
----------
537+
value
538+
The value for which to find the closest reference value.
539+
sorted_reference_values
540+
A sorted list of reference values.
541+
542+
Returns
543+
-------
544+
The index of the closest reference value less than or equal to the provided value.
545+
546+
Raises
547+
------
548+
ValueError
549+
If the provided value is less than the minimum reference value.
550+
"""
551+
ref_value_index = np.digitize(value, sorted_reference_values).item() - 1
552+
if ref_value_index < 0:
553+
raise ValueError(
554+
f"The provided value {value} is less than the minimum reference value "
555+
f"{min(sorted_reference_values)}."
556+
)
557+
return ref_value_index

tests/population/test_base_population.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import numpy as np
55
import pandas as pd
66
import pytest
7-
from layered_config_tree import LayeredConfigTree
87
from vivarium import InteractiveContext
98
from vivarium.testing_utilities import get_randomness
109
from vivarium_testing_utils import FuzzyChecker
@@ -296,7 +295,11 @@ def test_scaled_population(
296295
"population_size": 1_000_000,
297296
"include_sex": "Both",
298297
},
299-
"time": {"step_size": 1},
298+
"time": {
299+
"step_size": 1,
300+
# Update the start year to fall within the population structure data.
301+
"start": {"year": 2021},
302+
},
300303
},
301304
layer="override",
302305
)
@@ -470,6 +473,22 @@ def test_scaled_population__format_data_inputs(
470473
pd.testing.assert_frame_equal(data, expected)
471474

472475

476+
def test__find_bin_start_index():
477+
sorted_values = [10, 20, 30]
478+
assert bp._find_bin_start_index(10, sorted_values) == 0
479+
assert bp._find_bin_start_index(19, sorted_values) == 0
480+
assert bp._find_bin_start_index(20, sorted_values) == 1
481+
assert bp._find_bin_start_index(29, sorted_values) == 1
482+
assert bp._find_bin_start_index(30, sorted_values) == 2
483+
assert bp._find_bin_start_index(99999, sorted_values) == 2
484+
485+
# Edge case
486+
with pytest.raises(
487+
ValueError, match="The provided value 9 is less than the minimum reference value 10."
488+
):
489+
bp._find_bin_start_index(9, sorted_values)
490+
491+
473492
def _check_population(simulants, initial_age, step_size, include_sex):
474493
assert len(simulants) == len(simulants.age.unique())
475494
assert simulants.age.min() > initial_age

0 commit comments

Comments
 (0)