From e5118500a94939628fa1f94d11a8ce1efc6fdf34 Mon Sep 17 00:00:00 2001 From: Scott Olesen Date: Tue, 30 Dec 2025 11:17:17 -0500 Subject: [PATCH 1/2] Do not rerun everything every time --- Makefile | 41 ++++++++++++++++++++-------------------- scripts/describe_data.py | 3 +++ scripts/diagnostics.py | 21 ++++++++++++++------ scripts/eval.py | 16 ++++++---------- scripts/fit.py | 4 +--- scripts/forecast.py | 13 ++++++------- scripts/viz.py | 16 +++++++++------- 7 files changed, 61 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index 007e6a7a..13b03fc4 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,17 @@ RUN_ID = test -CONFIG = scripts/config.yaml -SETTINGS = output/settings/$(RUN_ID)/ + RAW_DATA = data/raw.parquet -DATA = output/data/$(RUN_ID)/nis.parquet -FITS = output/fits/$(RUN_ID)/ -DIAGNOSTICS = output/diagnostics/$(RUN_ID)/ -FORECASTS = output/forecasts/$(RUN_ID)/ -SCORES = output/scores/$(RUN_ID)/ -DATA_PLOT = output/diagnostics/$(RUN_ID)/data_national.png +CONFIG = scripts/config.yaml + +OUTPUT_DIR = output/$(RUN_ID) +CONFIG_COPY = $(OUTPUT_DIR)/config.yaml +DATA = $(OUTPUT_DIR)/nis.parquet +FITS = $(OUTPUT_DIR)/fits.pkl +FORECASTS = $(OUTPUT_DIR)/forecasts.parquet +DIAGNOSTICS = $(OUTPUT_DIR)/diagnostics/status.txt +SCORES = $(OUTPUT_DIR)/scores.parquet + +DATA_PLOT = $(OUTPUT_DIR)/plots/data_national.png .PHONY: clean viz @@ -16,32 +20,29 @@ all: $(SETTINGS) $(DATA) $(FITS) $(DIAGNOSTICS) $(FORECASTS) $(SCORES) $(DATA_PL viz: streamlit run scripts/viz.py -- \ - --obs=$(DATA) --pred=$(FORECASTS) --score=$(SCORES) --config=$(CONFIG) + --data=$(DATA) --forecasts=$(FORECASTS) --scores=$(SCORES) --config=$(CONFIG) $(SCORES): scripts/eval.py $(FORECASTS) $(DATA) - python $< \ - --pred=$(FORECASTS) --obs=$(DATA) --config=$(CONFIG) \ - --output=$@ + python $< --forecasts=$(FORECASTS) --data=$(DATA) --config=$(CONFIG) --output=$@ $(FORECASTS): scripts/forecast.py $(DATA) $(FITS) $(CONFIG) - python $< --data=$(DATA) --models=$(FITS) --config=$(CONFIG) \ - --output=$@ + python $< --data=$(DATA) --fits=$(FITS) --config=$(CONFIG) --output=$@ $(DIAGNOSTICS): scripts/diagnostics.py $(FITS) $(CONFIG) - python $< --input=$(FITS) --config=$(CONFIG) --output=$@ + python $< --fits=$(FITS) --config=$(CONFIG) --output=$@ $(FITS): scripts/fit.py $(DATA) $(CONFIG) python $< --data=$(DATA) --config=$(CONFIG) --output=$@ $(DATA_PLOT): scripts/describe_data.py $(DATA) - python $< --input=$(DATA) --output_dir=output/diagnostics/$(RUN_ID)/ + python $< --input=$(DATA) --output_dir=$(OUTPUT_DIR)/plots $(DATA): scripts/preprocess.py $(RAW_DATA) $(CONFIG) python $< --config=$(CONFIG) --input=$(RAW_DATA) --output=$@ -$(SETTINGS): $(CONFIG) - mkdir -p $(SETTINGS) - cp $(CONFIG) $(SETTINGS) +$(CONFIG_COPY): $(CONFIG) + mkdir -p $(OUTPUT_DIR) + cp $(CONFIG) $(CONFIG_COPY) clean: - rm -r $(SETTINGS) $(DATA) $(FITS) $(DIAGNOSTICS) $(FORECASTS) $(SCORES) + rm -rf $(OUTPUT_DIR) diff --git a/scripts/describe_data.py b/scripts/describe_data.py index f22850a7..2e55e6a3 100644 --- a/scripts/describe_data.py +++ b/scripts/describe_data.py @@ -13,6 +13,9 @@ data = pl.read_parquet(args.input) out_dir = Path(args.output_dir) + # ensure output directory exists + out_dir.mkdir(parents=True, exist_ok=True) + # national, every month, every season alt.Chart( data.filter(pl.col("geography_type") == pl.lit("nation")) diff --git a/scripts/diagnostics.py b/scripts/diagnostics.py index 2f28932d..326d21b6 100644 --- a/scripts/diagnostics.py +++ b/scripts/diagnostics.py @@ -89,16 +89,25 @@ def select_model_to_diagnose( if __name__ == "__main__": p = argparse.ArgumentParser() p.add_argument("--config", help="config file") - p.add_argument("--input", help="fitted model directory") - p.add_argument("--output", help="output directory") + p.add_argument("--fits", help="fits pickle") + p.add_argument( + "--output", help="output status file; other files put in the same directory" + ) args = p.parse_args() with open(args.config, "r") as f: config = yaml.safe_load(f) - with open(Path(args.input, "model_fits.pkl"), "rb") as f: + with open(args.fits, "rb") as f: models = pickle.load(f) - Path(args.output).mkdir(parents=True, exist_ok=True) - diagnostic_plot(models, config, args.output) - diagnostic_table(models, config, args.output) + output_dir = Path(args.output).parent + output_dir.mkdir(parents=True, exist_ok=True) + + # write the other plots to the same folder + diagnostic_plot(models, config, output_dir) + diagnostic_table(models, config, output_dir) + + # write the status file + with open(args.output, "w") as f: + f.write(dt.datetime.now().isoformat()) diff --git a/scripts/eval.py b/scripts/eval.py index c51fcf97..7dc291ad 100644 --- a/scripts/eval.py +++ b/scripts/eval.py @@ -1,5 +1,4 @@ import argparse -from pathlib import Path import polars as pl import yaml @@ -97,18 +96,15 @@ def eval_all_forecasts( if __name__ == "__main__": p = argparse.ArgumentParser() p.add_argument("--config", help="config file", required=True) - p.add_argument("--pred", help="forecast data directory", required=True) - p.add_argument("--obs", help="observed data", required=True) - p.add_argument("--output", help="output directory", required=True) + p.add_argument("--forecasts", help="forecasts parquet", required=True) + p.add_argument("--data", help="observed data", required=True) + p.add_argument("--output", help="output scores parquet", required=True) args = p.parse_args() with open(args.config) as f: config = yaml.safe_load(f) - pred = pl.read_parquet(Path(args.pred, "forecasts.parquet")) - data = pl.read_parquet(args.obs) + pred = pl.read_parquet(args.forecasts) + data = pl.read_parquet(args.data) - Path(args.output).mkdir(parents=True, exist_ok=True) - eval_all_forecasts(data, pred, config).write_parquet( - Path(args.output, "scores.parquet") - ) + eval_all_forecasts(data, pred, config).write_parquet(args.output) diff --git a/scripts/fit.py b/scripts/fit.py index 687c86c9..98ebd0f0 100644 --- a/scripts/fit.py +++ b/scripts/fit.py @@ -1,7 +1,6 @@ import argparse import datetime as dt import pickle as pkl -from pathlib import Path from typing import Any, Dict, List, Type import numpyro @@ -98,6 +97,5 @@ def fit_model( all_models = fit_all_models(input_data, config) - Path(args.output).mkdir(parents=True, exist_ok=True) - with open(Path(args.output, "model_fits.pkl"), "wb") as f: + with open(args.output, "wb") as f: pkl.dump(all_models, f) diff --git a/scripts/forecast.py b/scripts/forecast.py index 31b2bc16..d8fb3336 100644 --- a/scripts/forecast.py +++ b/scripts/forecast.py @@ -100,8 +100,8 @@ def run_all_forecasts( p = argparse.ArgumentParser() p.add_argument("--config", help="config file", required=True) p.add_argument("--data", help="input data", required=True) - p.add_argument("--models", help="fitted model directory", required=True) - p.add_argument("--output", help="output directory", required=True) + p.add_argument("--fits", required=True) + p.add_argument("--output", help="forecasts parquet", required=True) args = p.parse_args() with open(args.config, "r") as f: @@ -109,11 +109,10 @@ def run_all_forecasts( input_data = iup.CumulativeUptakeData(pl.read_parquet(args.data)) - with open(Path(args.models, "model_fits.pkl"), "rb") as f: + with open(args.fits, "rb") as f: models = pickle.load(f) - output = run_all_forecasts(input_data, models, config) + postchecks, forecasts = run_all_forecasts(input_data, models, config) - Path(args.output).mkdir(parents=True, exist_ok=True) - output[0].write_parquet(Path(args.output, "postchecks.parquet")) - output[1].write_parquet(Path(args.output, "forecasts.parquet")) + forecasts.write_parquet(args.output) + postchecks.write_parquet(Path(args.output).parent / "postchecks.parquet") diff --git a/scripts/viz.py b/scripts/viz.py index c89fa9b7..becf19d1 100644 --- a/scripts/viz.py +++ b/scripts/viz.py @@ -1,5 +1,4 @@ import argparse -from pathlib import Path from typing import Any, Dict, List import altair as alt @@ -412,8 +411,8 @@ def layer_with_facets(charts: List, encodings: Dict): if __name__ == "__main__": p = argparse.ArgumentParser() - p.add_argument("--obs", help="observed data", required=True) - p.add_argument("--pred", help="forecasts", required=True) + p.add_argument("--data", help="observed data", required=True) + p.add_argument("--forecasts", help="forecasts", required=True) p.add_argument("--score", help="score metrics", required=True) p.add_argument("--config", help="config yaml file", required=True) args = p.parse_args() @@ -421,16 +420,19 @@ def layer_with_facets(charts: List, encodings: Dict): @st.cache_data def load_data(): return { - "observed": pl.read_parquet(args.obs), - "forecasts": pl.read_parquet(Path(args.pred, "forecasts.parquet")), + "observed": pl.read_parquet(args.data), + "forecasts": pl.read_parquet(args.forecasts), } @st.cache_data def load_scores(): - return pl.read_parquet(Path(args.score, "scores.parquet")) + return pl.read_parquet(args.score) @st.cache_data def load_config(): - return yaml.safe_load(open(args.config, "r")) + with open(args.config) as f: + config = yaml.safe_load(f) + + return config app() From e1d05c25f478517e8db957ae83844a46dbc7e2bb Mon Sep 17 00:00:00 2001 From: Scott Olesen Date: Tue, 30 Dec 2025 11:20:54 -0500 Subject: [PATCH 2/2] Increment version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bdb40f60..dbe3b784 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "iup" -version = "0.1.1" +version = "0.1.2" description = "" authors = [ { name = "Scott Olesen", email = "ulp7@cdc.gov" },