Refactor sarix tests to create configs w helpers

lshandross · lshandross · commit 2db039fe2a9d · 2025-11-13T12:24:12.000-05:00
diff --git a/tests/integration/test_sarix.py b/tests/integration/test_sarix.py
@@ -11,56 +11,48 @@
 
 
 def test_sarix_nhsn(tmp_path):
-    model_config = SimpleNamespace(
-        model_class = "sarix",
-        model_name = "sarix_nhsn_p6_4rt_thetashared_sigmanone",
-        
-        # data sources and adjustments for reporting issues
-        sources = ["nhsn"],
-        
-        # fit locations separately or jointly
-        fit_locations_separately = False,
-        
-        # SARI model parameters
-        p = 6,
-        P = 0,
-        d = 0,
-        D = 0,
-        season_period = 1,
-
-        # power transform applied to surveillance signals
-        power_transform = "4rt",
+    date = datetime.date.fromisoformat("2024-01-06")
+    fips_codes = ["US", "01", "02", "04", "05", "06", "08", "09", "10", "11",
+                "12", "13", "15", "16", "17", "18", "19", "20", "21", "22",
+                "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
+                "33", "34", "35", "36", "37", "38", "39", "40", "41", "42",
+                "44", "45", "46", "47", "48", "49", "50", "51", "53", "54",
+                "55", "56", "72"]
+    model_config = create_test_sarix_model_config(main_source=["nhsn"])
+    run_config = create_test_sarix_run_config(ref_date=date, states=fips_codes, hsas=[], tmp_path=tmp_path)
+    
+    # patch the `_np_percentile()` helper function return the same values to make the tests reproducible across OSs
+    with patch("idmodels.sarix._np_percentile", return_value=_np_percentile_val()):
+        model = SARIXModel(model_config)
+        model.run(run_config)
 
-        # sharing of information about parameters
-        theta_pooling="shared",
-        sigma_pooling="none",
-        
-        # covariates
-        x = []
+    actual_df = pd.read_csv(
+        run_config.output_root / f"UMass-{model_config.model_name}" / 
+        f"{str(run_config.ref_date)}-UMass-{model_config.model_name}.csv"
     )
-
-    run_config = SimpleNamespace(
-        disease="flu",
-        ref_date=datetime.date.fromisoformat("2024-01-06"),
-        output_root=tmp_path / "model-output",
-        artifact_store_root=tmp_path / "artifact-store",
-        save_feat_importance=False,
-        locations=["US", "01", "02", "04", "05", "06", "08", "09", "10", "11",
-                   "12", "13", "15", "16", "17", "18", "19", "20", "21", "22",
-                   "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
-                   "33", "34", "35", "36", "37", "38", "39", "40", "41", "42",
-                   "44", "45", "46", "47", "48", "49", "50", "51", "53", "54",
-                   "55", "56", "72"],
-        max_horizon=3,
-        q_levels = [0.025, 0.50, 0.975],
-        q_labels = ["0.025", "0.5", "0.975"],
-        num_warmup = 200,
-        num_samples = 200,
-        num_chains = 1
+    expected_df = pd.read_csv(
+        Path("tests") / "integration" / "data" /
+        f"UMass-{model_config.model_name}" / 
+        f"{str(run_config.ref_date)}-UMass-{model_config.model_name}.csv"
     )
+    assert_frame_equal(actual_df, expected_df)
+
 
+def test_sarix_nssp(tmp_path):
+    date = datetime.date.fromisoformat("2025-09-27")
+    # Missouri (29) does not submit to NSSP
+    fips_codes = ["US", "01", "02", "04", "05", "06", "08", "09", "10", "11",
+                "12", "13", "15", "16", "17", "18", "19", "20", "21", "22",
+                "23", "24", "25", "26", "27", "28", "30", "31", "32",
+                "33", "34", "35", "36", "37", "38", "39", "40", "41", "42",
+                "44", "45", "46", "47", "48", "49", "50", "51", "53", "54",
+                "55", "56"]
+    model_config = create_test_sarix_model_config(main_source=["nssp"])
+    run_config = create_test_sarix_run_config(ref_date=date, states=fips_codes, hsas=[], tmp_path=tmp_path)
+    
     # patch the `_np_percentile()` helper function return the same values to make the tests reproducible across OSs
-    with patch("idmodels.sarix._np_percentile", return_value=_np_percentile_val()):
+    # nssp data only covers 51 locations
+    with patch("idmodels.sarix._np_percentile", return_value=_np_percentile_val()[:, 0:51, :]):
         model = SARIXModel(model_config)
         model.run(run_config)
 
@@ -75,14 +67,15 @@ def test_sarix_nhsn(tmp_path):
     )
     assert_frame_equal(actual_df, expected_df)
 
+    # hsas=["25", "150"]
 
-def test_sarix_nssp(tmp_path):
+def create_test_sarix_model_config(main_source):
     model_config = SimpleNamespace(
         model_class = "sarix",
-        model_name = "sarix_nssp_p6_4rt_thetashared_sigmanone",
+        model_name = "sarix_" + main_source[0] + "_p6_4rt_thetashared_sigmanone",
         
         # data sources and adjustments for reporting issues
-        sources = ["nssp"],
+        sources = main_source,
         
         # fit locations separately or jointly
         fit_locations_separately = False,
@@ -104,44 +97,26 @@ def test_sarix_nssp(tmp_path):
         # covariates
         x = []
     )
+    return model_config
 
+def create_test_sarix_run_config(ref_date, states, hsas, tmp_path):
     run_config = SimpleNamespace(
         disease="flu",
-        ref_date=datetime.date.fromisoformat("2025-09-27"),
+        ref_date=ref_date,
         output_root=tmp_path / "model-output",
         artifact_store_root=tmp_path / "artifact-store",
         save_feat_importance=False,
-        locations=["US", "01", "02", "04", "05", "06", "08", "09", "10", "11",
-                   "12", "13", "15", "16", "17", "18", "19", "20", "21", "22",
-                   "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
-                   "33", "34", "35", "36", "37", "38", "39", "40", "41", "42",
-                   "44", "45", "46", "47", "48", "49", "50", "51", "53", "54",
-                   "55", "56", "72"],
+        states=states,
+        hsas = hsas,
         max_horizon=3,
         q_levels = [0.025, 0.50, 0.975],
         q_labels = ["0.025", "0.5", "0.975"],
         num_warmup = 200,
         num_samples = 200,
         num_chains = 1
     )
-
-    # patch the `_np_percentile()` helper function return the same values to make the tests reproducible across OSs
-    # nssp data only covers 51 locations
-    with patch("idmodels.sarix._np_percentile", return_value=_np_percentile_val()[:, 0:51, :]):
-        model = SARIXModel(model_config)
-        model.run(run_config)
-
-    actual_df = pd.read_csv(
-        run_config.output_root / f"UMass-{model_config.model_name}" / 
-        f"{str(run_config.ref_date)}-UMass-{model_config.model_name}.csv"
-    )
-    expected_df = pd.read_csv(
-        Path("tests") / "integration" / "data" /
-        f"UMass-{model_config.model_name}" / 
-        f"{str(run_config.ref_date)}-UMass-{model_config.model_name}.csv"
-    )
-    assert_frame_equal(actual_df, expected_df)
-
+    return run_config
+    
 def _np_percentile_val():
     return numpy.array(
         [[[2.22541624e-01, 1.82324940e-01, 1.27709944e-01],