Skip to content

Commit 876abd3

Browse files
Add the facility active proportion field (#71)
## Nate's Summary Added the new field, tests, and docstrings. I think I got it into all the necessary places, but it's been a few months since I worked with this repo so I may have forgotten something. ## Copilot's Summary This pull request introduces a new parameter, `facility_active_proportion`, to the EpiNow2 configuration generation pipeline. This parameter represents the minimum proportion of days a facility must be active during the modeling period and is integrated across multiple functions, constants, and tests to ensure proper handling and validation. ### Core Feature Addition: * Introduced `facility_active_proportion` as a new argument to configuration generation functions (`generate_config`, `generate_rerun_config`, `generate_backfill_config`, and `generate_task_configs`). It is validated to ensure it is a float between 0 and 1, with a default value of 1.0. [[1]](diffhunk://#diff-d56536f6759432dcf08d9f3961d8344b4e7014d136f4c4450c4069e829e86926R44) [[2]](diffhunk://#diff-d56536f6759432dcf08d9f3961d8344b4e7014d136f4c4450c4069e829e86926R164) [[3]](diffhunk://#diff-d56536f6759432dcf08d9f3961d8344b4e7014d136f4c4450c4069e829e86926R348) [[4]](diffhunk://#diff-ab59143b1ba343d18c0ed85e6e50cacaa76d64fad93c142b8f349b5e61db04f8R348) ### Configuration and Constants Updates: * Added `facility_active_proportion` to the default configuration constants and included it in the list of required parameters for task generation. [[1]](diffhunk://#diff-1e19d89684d25107f8db5de423069387a48db2ce4ea6bc4421388b450e8c3938R16) [[2]](diffhunk://#diff-1e19d89684d25107f8db5de423069387a48db2ce4ea6bc4421388b450e8c3938R97) * Updated the `extract_user_args` function to parse `facility_active_proportion` from environment variables, with error handling for invalid formats. [[1]](diffhunk://#diff-ab59143b1ba343d18c0ed85e6e50cacaa76d64fad93c142b8f349b5e61db04f8R27-R39) [[2]](diffhunk://#diff-ab59143b1ba343d18c0ed85e6e50cacaa76d64fad93c142b8f349b5e61db04f8R97) ### Validation Enhancements: * Extended the `validate_args` function to include validation logic for `facility_active_proportion`, ensuring it is a valid float within the acceptable range. [[1]](diffhunk://#diff-ab59143b1ba343d18c0ed85e6e50cacaa76d64fad93c142b8f349b5e61db04f8R204) [[2]](diffhunk://#diff-ab59143b1ba343d18c0ed85e6e50cacaa76d64fad93c142b8f349b5e61db04f8R278-R287) ### Test Coverage: * Updated existing test cases and added new assertions to verify that `facility_active_proportion` is correctly passed, validated, and included in generated configurations. [[1]](diffhunk://#diff-d5d888969fd8e06ca26b226838d662a18b517912f4df62b41e87e8f8a2e1d9ffR50) [[2]](diffhunk://#diff-d5d888969fd8e06ca26b226838d662a18b517912f4df62b41e87e8f8a2e1d9ffR286) [[3]](diffhunk://#diff-8c80c5198a3e12b29443be1e74e33e55e5bd044c05c5d81c6ac2e5cf1acb6e2eR49-R53) * Added tests to ensure the default value of `facility_active_proportion` is correctly applied when not explicitly provided. [[1]](diffhunk://#diff-8c80c5198a3e12b29443be1e74e33e55e5bd044c05c5d81c6ac2e5cf1acb6e2eR49-R53) [[2]](diffhunk://#diff-8c80c5198a3e12b29443be1e74e33e55e5bd044c05c5d81c6ac2e5cf1acb6e2eR83-R87) --------- Co-authored-by: Adam Howes <adamthowes@gmail.com>
1 parent 5a94120 commit 876abd3

8 files changed

Lines changed: 121 additions & 6 deletions

File tree

pipelines/epinow2/generate_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@
2929
output_container=user_args["output_container"],
3030
task_exclusions=user_args.get("task_exclusions"),
3131
exclusions=user_args.get("exclusions"),
32+
facility_active_proportion=user_args["facility_active_proportion"],
3233
)

pipelines/epinow2/generate_rerun_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,5 @@
3333
as_of_date=user_args["as_of_date"],
3434
output_container=user_args["output_container"],
3535
data_exclusions_path=data_exclusions_path,
36+
facility_active_proportion=user_args["facility_active_proportion"],
3637
)

src/cfa_config_generator/utils/epinow2/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"config_version": "1.0",
1414
"quantile_width": [0.5, 0.95],
1515
"model": "EpiNow2",
16+
"facility_active_proportion": 1.0,
1617
}
1718

1819
all_states = (
@@ -93,6 +94,7 @@
9394
"exclusions",
9495
"quantile_width",
9596
"model",
97+
"facility_active_proportion",
9698
)
9799

98100
sample_task = {
@@ -139,4 +141,5 @@
139141
"config_version": "1.0",
140142
"quantile_width": [0.5, 0.95],
141143
"model": "EpiNow2",
144+
"facility_active_proportion": 1.0,
142145
}

src/cfa_config_generator/utils/epinow2/driver_functions.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def generate_config(
4141
job_id: str,
4242
as_of_date: str,
4343
output_container: str,
44+
facility_active_proportion: float,
4445
task_exclusions: str | None = None,
4546
exclusions: dict | None = None,
4647
):
@@ -77,6 +78,9 @@ def generate_config(
7778
exclusions: dict | None
7879
Dictionary with keys 'path' and 'blob_storage_container' for the exclusions file.
7980
If provided, this will be used to generate the task exclusions string.
81+
facility_active_proportion: float
82+
Minimum proportion of days a facility must be active during the modeling period.
83+
Must be a number between 0 and 1 (inclusive).
8084
8185
Returns
8286
-------
@@ -108,6 +112,7 @@ def generate_config(
108112
output_container=output_container,
109113
task_exclusions=task_exclusions,
110114
exclusions=exclusions,
115+
facility_active_proportion=facility_active_proportion,
111116
)
112117

113118
# Generate task-specific configs
@@ -156,6 +161,7 @@ def generate_rerun_config(
156161
job_id: str,
157162
as_of_date: str,
158163
output_container: str,
164+
facility_active_proportion: float,
159165
data_exclusions_path: str | None = None,
160166
):
161167
"""
@@ -193,6 +199,9 @@ def generate_rerun_config(
193199
Path to the data exclusion CSV file. If in Blob, use form
194200
`az://<container-name>/<path>`. Defaults to
195201
`az://nssp-etl/outliers-v2/<report_date>.csv` if None or empty.
202+
facility_active_proportion: float
203+
Minimum proportion of days a facility must be active during the modeling period.
204+
Must be a number between 0 and 1.
196205
Returns
197206
-------
198207
None
@@ -293,6 +302,7 @@ def generate_rerun_config(
293302
output_container=output_container,
294303
task_exclusions=task_excl_str,
295304
exclusions=excl_field,
305+
facility_active_proportion=facility_active_proportion,
296306
)
297307

298308
# Generate task-specific configs
@@ -335,6 +345,7 @@ def generate_backfill_config(
335345
backfill_name: str,
336346
as_of_dates: list[str],
337347
output_container: str,
348+
facility_active_proportion: float,
338349
task_exclusions: str | None = None,
339350
) -> list[str]:
340351
"""
@@ -388,6 +399,9 @@ def generate_backfill_config(
388399
Blob storage container to store output.
389400
task_exclusions: str | None
390401
Comma separated state:disease pair to exclude from model run.
402+
facility_active_proportion: float
403+
Minimum proportion of days a facility must be active during the modeling period.
404+
Must be a number between 0 and 1.
391405
392406
Returns
393407
-------
@@ -475,6 +489,7 @@ def generate_backfill_config(
475489
output_container=output_container,
476490
task_exclusions=task_exclusions,
477491
exclusions=exclusions_dict.get(rep_date, None),
492+
facility_active_proportion=facility_active_proportion,
478493
)
479494
logger.info(
480495
f"Successfully generated config for {job_id} with report date {rep_date.isoformat()}"

src/cfa_config_generator/utils/epinow2/functions.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,19 @@ def extract_user_args(as_of_date: str) -> dict[str, Any]:
2424
state = os.getenv("state") or "all"
2525
disease = os.getenv("disease") or "all"
2626

27+
# Handle facility_active_proportion
28+
facility_active_proportion_str = os.getenv("facility_active_proportion")
29+
try:
30+
facility_active_proportion = (
31+
float(facility_active_proportion_str)
32+
if facility_active_proportion_str
33+
else 1.0
34+
)
35+
except ValueError:
36+
raise ValueError(
37+
f"Invalid facility_active_proportion format: {facility_active_proportion_str}. Must be a float."
38+
)
39+
2740
# Handle report_date
2841
report_date_str = os.getenv("report_date")
2942
try:
@@ -81,6 +94,7 @@ def extract_user_args(as_of_date: str) -> dict[str, Any]:
8194
"job_id": job_id,
8295
"as_of_date": as_of_date,
8396
"output_container": output_container,
97+
"facility_active_proportion": facility_active_proportion,
8498
}
8599

86100

@@ -187,6 +201,7 @@ def validate_args(
187201
job_id: str,
188202
as_of_date: str,
189203
output_container: str,
204+
facility_active_proportion: float,
190205
task_exclusions: str | None = None,
191206
exclusions: dict | None = None,
192207
) -> dict:
@@ -207,6 +222,8 @@ def validate_args(
207222
task_exclusions: comma separated state:disease pairs to exclude
208223
exclusions: A dictionary with `path` and `blob_storage_container` keys
209224
to specify the path to the exclusions file and its container.
225+
facility_active_proportion: Minimum proportion of days a facility must be active
226+
during the modeling period. Must be a number between 0 and 1.
210227
Returns:
211228
A dictionary of sanitized arguments.
212229
"""
@@ -258,6 +275,16 @@ def validate_args(
258275
f"as_of_date must be a string. Got {type(as_of_date)} instead."
259276
)
260277

278+
# Check that the facility_active_proportion is a number between 0 and 1
279+
if not isinstance(facility_active_proportion, (int, float)):
280+
raise ValueError(
281+
f"facility_active_proportion must be a number. Got {type(facility_active_proportion)} instead."
282+
)
283+
if facility_active_proportion < 0 or facility_active_proportion > 1:
284+
raise ValueError(
285+
f"facility_active_proportion must be between 0 and 1. Got {facility_active_proportion} instead."
286+
)
287+
261288
args_dict["reference_dates"] = reference_dates
262289
args_dict["report_date"] = report_date
263290
args_dict["data_path"] = data_path
@@ -267,6 +294,7 @@ def validate_args(
267294
args_dict["as_of_date"] = as_of_date
268295
args_dict["exclusions"] = exclusions
269296
args_dict["output_container"] = output_container
297+
args_dict["facility_active_proportion"] = facility_active_proportion
270298
return args_dict
271299

272300

@@ -317,6 +345,7 @@ def generate_task_configs(
317345
production_date: date,
318346
job_id: str,
319347
output_container: str,
348+
facility_active_proportion: float,
320349
task_exclusions: dict[str, list[str]] | None = None,
321350
exclusions: str | None = None,
322351
) -> tuple[list[dict], str]:
@@ -336,6 +365,9 @@ def generate_task_configs(
336365
output_container: Azure container for output
337366
task_exclusions: dictionary of state:disease pairs to exclude
338367
exclusions: a path to exclusions csv
368+
facility_active_proportion: Minimum proportion of days a facility must be active
369+
during the modeling period. Must be a number between 0 and 1.
370+
339371
Returns:
340372
A list of configuration objects and the job_id.
341373
"""
@@ -347,6 +379,7 @@ def generate_task_configs(
347379
**shared_params,
348380
"job_id": job_id,
349381
"task_id": generate_task_id(state=s, disease=d),
382+
"facility_active_proportion": facility_active_proportion,
350383
"exclusions": exclusions or {"path": None},
351384
"min_reference_date": min(reference_dates).isoformat(),
352385
"max_reference_date": max(reference_dates).isoformat(),

tests/test_args.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def test_extract_user_args(monkeypatch):
4747
"job_id": generate_default_job_id(as_of_date=as_of_date),
4848
"as_of_date": as_of_date,
4949
"output_container": "test-container",
50+
"facility_active_proportion": 1.0,
5051
}
5152

5253
extracted_args = extract_user_args(as_of_date=as_of_date)
@@ -68,7 +69,6 @@ def test_validate_args_default():
6869
validated_args = validate_args(
6970
state="all",
7071
disease="all",
71-
exclusions=None,
7272
report_date=report_date,
7373
production_date=production_date,
7474
reference_dates=[min_reference_date, max_reference_date],
@@ -77,6 +77,8 @@ def test_validate_args_default():
7777
job_id="test-job-id",
7878
as_of_date=as_of_date,
7979
output_container="test-container",
80+
facility_active_proportion=1.0,
81+
exclusions=None,
8082
)
8183
assert validated_args == {
8284
"state": list(nssp_valid_states),
@@ -90,6 +92,7 @@ def test_validate_args_default():
9092
"job_id": "test-job-id",
9193
"as_of_date": as_of_date,
9294
"output_container": "test-container",
95+
"facility_active_proportion": 1.0,
9396
}
9497

9598

@@ -108,6 +111,7 @@ def test_invalid_state():
108111
job_id="test-job-id",
109112
as_of_date=generate_timestamp(),
110113
output_container="test-container",
114+
facility_active_proportion=1.0,
111115
)
112116

113117

@@ -127,6 +131,7 @@ def test_invalid_disease():
127131
job_id="test-job-id",
128132
as_of_date=generate_timestamp(),
129133
output_container="test-container",
134+
facility_active_proportion=1.0,
130135
)
131136

132137

@@ -152,6 +157,7 @@ def test_invalid_reference_date_logic():
152157
job_id="test-job-id",
153158
as_of_date=generate_timestamp(),
154159
output_container="test-container",
160+
facility_active_proportion=1.0,
155161
)
156162

157163

@@ -173,8 +179,9 @@ def test_invalid_disease_exclusion():
173179
production_date=today,
174180
job_id="test-job-id",
175181
as_of_date=as_of_date,
176-
task_exclusions=task_exclusions,
177182
output_container="test-container",
183+
facility_active_proportion=1.0,
184+
task_exclusions=task_exclusions,
178185
)
179186

180187

@@ -276,4 +283,5 @@ def test_generate_backfill_bad_lists():
276283
backfill_name=backfill_name,
277284
as_of_dates=as_of_dates,
278285
output_container=output_container,
286+
facility_active_proportion=1.0,
279287
)

tests/test_config_generation.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def test_default_config_set():
3434
job_id="test-job-id",
3535
as_of_date=as_of_date,
3636
output_container="test-container",
37+
facility_active_proportion=1.0,
3738
exclusions=None,
3839
task_exclusions=None,
3940
)
@@ -45,6 +46,11 @@ def test_default_config_set():
4546
)
4647
assert len(task_configs) == total_tasks_expected
4748

49+
# Test that facility_active_proportion is present with default value
50+
for config in task_configs:
51+
assert "facility_active_proportion" in config
52+
assert config["facility_active_proportion"] == 1.0
53+
4854

4955
def test_single_geo_disease_set():
5056
"""Tests that a single geography-disease combination returns a single task."""
@@ -64,6 +70,7 @@ def test_single_geo_disease_set():
6470
job_id="test-job-id",
6571
as_of_date=as_of_date,
6672
output_container="test-container",
73+
facility_active_proportion=1.0,
6774
exclusions=None,
6875
task_exclusions=None,
6976
)
@@ -73,6 +80,11 @@ def test_single_geo_disease_set():
7380
total_tasks_expected = 1
7481
assert len(task_configs) == total_tasks_expected
7582

83+
# Test that facility_active_proportion is present with default value
84+
config = task_configs[0]
85+
assert "facility_active_proportion" in config
86+
assert config["facility_active_proportion"] == 1.0
87+
7688

7789
@pytest.mark.parametrize(
7890
"report_dates, time_span, expected_ref_dates",
@@ -133,3 +145,26 @@ def test_gen_ref_date_tuples(report_dates, time_span, expected_ref_dates):
133145
"""
134146
got = generate_ref_date_tuples(report_dates=report_dates, delta=time_span)
135147
assert expected_ref_dates == got
148+
149+
150+
def test_facility_active_proportion_in_shared_params():
151+
"""Test that facility_active_proportion is included in shared_params with correct default value."""
152+
from cfa_config_generator.utils.epinow2.constants import shared_params
153+
154+
assert "facility_active_proportion" in shared_params
155+
assert shared_params["facility_active_proportion"] == 1.0
156+
157+
158+
def test_facility_active_proportion_modifiable():
159+
"""Test that facility_active_proportion is included in modifiable_params for CLI usage."""
160+
from cfa_config_generator.utils.epinow2.constants import modifiable_params
161+
162+
assert "facility_active_proportion" in modifiable_params
163+
164+
165+
def test_sample_task_facility_active_proportion():
166+
"""Test that sample_task includes facility_active_proportion with correct default value."""
167+
from cfa_config_generator.utils.epinow2.constants import sample_task
168+
169+
assert "facility_active_proportion" in sample_task
170+
assert sample_task["facility_active_proportion"] == 1.0

0 commit comments

Comments
 (0)