@@ -166,7 +166,7 @@ def get_demographic_proportions_for_creation_time(
166166 demographic_proportions , year : int
167167 ) -> pd .DataFrame :
168168 reference_years = sorted (set (demographic_proportions .year_start ))
169- ref_year_index = np . digitize (year , reference_years ). item () - 1
169+ ref_year_index = _find_bin_start_index (year , reference_years )
170170 return demographic_proportions [
171171 demographic_proportions .year_start == reference_years [ref_year_index ]
172172 ]
@@ -257,22 +257,22 @@ def _format_data_inputs(
257257 if "year_start" not in scaling_factor .index .names :
258258 return population_structure , scaling_factor
259259
260- # Subset to start year of simulation or closest year
260+ # Subset the population structure and scaling factors to the simulation
261+ # start year. If the data does not contain the exact simulation start
262+ # year, subset to the closest year less than the simulation start year.
261263 pop_reference_years = sorted (
262264 set (population_structure .index .get_level_values ("year_start" ))
263265 )
264- pop_year_index = np .digitize (year , pop_reference_years ).item () - 1
265- scale_reference_years = sorted (
266- set (scaling_factor .index .get_level_values ("year_start" ))
267- )
268- scale_year_index = np .digitize (year , scale_reference_years ).item () - 1
269- # Subset to start year of simulation or earliest year. E.g. if start year = 2021 and pop
270- # structure has 2021, we will subset to 2021. If pop structure minimum year is 2025, we
271- # will subset to 2025.
266+ pop_year_index = _find_bin_start_index (year , pop_reference_years )
272267 population_structure = population_structure .loc [
273268 population_structure .index .get_level_values ("year_start" )
274269 == pop_reference_years [pop_year_index ]
275270 ]
271+
272+ scale_reference_years = sorted (
273+ set (scaling_factor .index .get_level_values ("year_start" ))
274+ )
275+ scale_year_index = _find_bin_start_index (year , scale_reference_years )
276276 scaling_factor = scaling_factor .loc [
277277 scaling_factor .index .get_level_values ("year_start" )
278278 == scale_reference_years [scale_year_index ]
@@ -527,3 +527,31 @@ def _assign_demography_with_age_bounds(
527527 )
528528 register_simulants (simulants [list (key_columns )])
529529 return simulants
530+
531+
532+ def _find_bin_start_index (value : int , sorted_reference_values : list [int ]) -> int :
533+ """Finds the index of the closest reference value less than or equal to the provided value.
534+
535+ Parameters
536+ ----------
537+ value
538+ The value for which to find the closest reference value.
539+ sorted_reference_values
540+ A sorted list of reference values.
541+
542+ Returns
543+ -------
544+ The index of the closest reference value less than or equal to the provided value.
545+
546+ Raises
547+ ------
548+ ValueError
549+ If the provided value is less than the minimum reference value.
550+ """
551+ ref_value_index = np .digitize (value , sorted_reference_values ).item () - 1
552+ if ref_value_index < 0 :
553+ raise ValueError (
554+ f"The provided value { value } is less than the minimum reference value "
555+ f"{ min (sorted_reference_values )} ."
556+ )
557+ return ref_value_index
0 commit comments