Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
RUN_ID = test
CONFIG = scripts/config.yaml
SETTINGS = output/settings/$(RUN_ID)/

RAW_DATA = data/raw.parquet
DATA = output/data/$(RUN_ID)/nis.parquet
FITS = output/fits/$(RUN_ID)/
DIAGNOSTICS = output/diagnostics/$(RUN_ID)/
FORECASTS = output/forecasts/$(RUN_ID)/
SCORES = output/scores/$(RUN_ID)/
DATA_PLOT = output/diagnostics/$(RUN_ID)/data_national.png
CONFIG = scripts/config.yaml

OUTPUT_DIR = output/$(RUN_ID)
CONFIG_COPY = $(OUTPUT_DIR)/config.yaml
DATA = $(OUTPUT_DIR)/nis.parquet
FITS = $(OUTPUT_DIR)/fits.pkl
FORECASTS = $(OUTPUT_DIR)/forecasts.parquet
DIAGNOSTICS = $(OUTPUT_DIR)/diagnostics/status.txt
SCORES = $(OUTPUT_DIR)/scores.parquet

DATA_PLOT = $(OUTPUT_DIR)/plots/data_national.png


.PHONY: clean viz
Expand All @@ -16,32 +20,29 @@ all: $(SETTINGS) $(DATA) $(FITS) $(DIAGNOSTICS) $(FORECASTS) $(SCORES) $(DATA_PL

viz:
streamlit run scripts/viz.py -- \
--obs=$(DATA) --pred=$(FORECASTS) --score=$(SCORES) --config=$(CONFIG)
--data=$(DATA) --forecasts=$(FORECASTS) --scores=$(SCORES) --config=$(CONFIG)

$(SCORES): scripts/eval.py $(FORECASTS) $(DATA)
python $< \
--pred=$(FORECASTS) --obs=$(DATA) --config=$(CONFIG) \
--output=$@
python $< --forecasts=$(FORECASTS) --data=$(DATA) --config=$(CONFIG) --output=$@

$(FORECASTS): scripts/forecast.py $(DATA) $(FITS) $(CONFIG)
python $< --data=$(DATA) --models=$(FITS) --config=$(CONFIG) \
--output=$@
python $< --data=$(DATA) --fits=$(FITS) --config=$(CONFIG) --output=$@

$(DIAGNOSTICS): scripts/diagnostics.py $(FITS) $(CONFIG)
python $< --input=$(FITS) --config=$(CONFIG) --output=$@
python $< --fits=$(FITS) --config=$(CONFIG) --output=$@

$(FITS): scripts/fit.py $(DATA) $(CONFIG)
python $< --data=$(DATA) --config=$(CONFIG) --output=$@

$(DATA_PLOT): scripts/describe_data.py $(DATA)
python $< --input=$(DATA) --output_dir=output/diagnostics/$(RUN_ID)/
python $< --input=$(DATA) --output_dir=$(OUTPUT_DIR)/plots

$(DATA): scripts/preprocess.py $(RAW_DATA) $(CONFIG)
python $< --config=$(CONFIG) --input=$(RAW_DATA) --output=$@

$(SETTINGS): $(CONFIG)
mkdir -p $(SETTINGS)
cp $(CONFIG) $(SETTINGS)
$(CONFIG_COPY): $(CONFIG)
mkdir -p $(OUTPUT_DIR)
cp $(CONFIG) $(CONFIG_COPY)

clean:
rm -r $(SETTINGS) $(DATA) $(FITS) $(DIAGNOSTICS) $(FORECASTS) $(SCORES)
rm -rf $(OUTPUT_DIR)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "iup"
version = "0.1.1"
version = "0.1.2"
description = ""
authors = [
{ name = "Scott Olesen", email = "ulp7@cdc.gov" },
Expand Down
3 changes: 3 additions & 0 deletions scripts/describe_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
data = pl.read_parquet(args.input)
out_dir = Path(args.output_dir)

# ensure output directory exists
out_dir.mkdir(parents=True, exist_ok=True)

# national, every month, every season
alt.Chart(
data.filter(pl.col("geography_type") == pl.lit("nation"))
Expand Down
21 changes: 15 additions & 6 deletions scripts/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,25 @@ def select_model_to_diagnose(
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("--config", help="config file")
p.add_argument("--input", help="fitted model directory")
p.add_argument("--output", help="output directory")
p.add_argument("--fits", help="fits pickle")
p.add_argument(
"--output", help="output status file; other files put in the same directory"
)
args = p.parse_args()

with open(args.config, "r") as f:
config = yaml.safe_load(f)

with open(Path(args.input, "model_fits.pkl"), "rb") as f:
with open(args.fits, "rb") as f:
models = pickle.load(f)

Path(args.output).mkdir(parents=True, exist_ok=True)
diagnostic_plot(models, config, args.output)
diagnostic_table(models, config, args.output)
output_dir = Path(args.output).parent
output_dir.mkdir(parents=True, exist_ok=True)

# write the other plots to the same folder
diagnostic_plot(models, config, output_dir)
diagnostic_table(models, config, output_dir)

# write the status file
with open(args.output, "w") as f:
f.write(dt.datetime.now().isoformat())
16 changes: 6 additions & 10 deletions scripts/eval.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
from pathlib import Path

import polars as pl
import yaml
Expand Down Expand Up @@ -97,18 +96,15 @@ def eval_all_forecasts(
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("--config", help="config file", required=True)
p.add_argument("--pred", help="forecast data directory", required=True)
p.add_argument("--obs", help="observed data", required=True)
p.add_argument("--output", help="output directory", required=True)
p.add_argument("--forecasts", help="forecasts parquet", required=True)
p.add_argument("--data", help="observed data", required=True)
p.add_argument("--output", help="output scores parquet", required=True)
args = p.parse_args()

with open(args.config) as f:
config = yaml.safe_load(f)

pred = pl.read_parquet(Path(args.pred, "forecasts.parquet"))
data = pl.read_parquet(args.obs)
pred = pl.read_parquet(args.forecasts)
data = pl.read_parquet(args.data)

Path(args.output).mkdir(parents=True, exist_ok=True)
eval_all_forecasts(data, pred, config).write_parquet(
Path(args.output, "scores.parquet")
)
eval_all_forecasts(data, pred, config).write_parquet(args.output)
4 changes: 1 addition & 3 deletions scripts/fit.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import argparse
import datetime as dt
import pickle as pkl
from pathlib import Path
from typing import Any, Dict, List, Type

import numpyro
Expand Down Expand Up @@ -98,6 +97,5 @@ def fit_model(

all_models = fit_all_models(input_data, config)

Path(args.output).mkdir(parents=True, exist_ok=True)
with open(Path(args.output, "model_fits.pkl"), "wb") as f:
with open(args.output, "wb") as f:
pkl.dump(all_models, f)
13 changes: 6 additions & 7 deletions scripts/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,19 @@ def run_all_forecasts(
p = argparse.ArgumentParser()
p.add_argument("--config", help="config file", required=True)
p.add_argument("--data", help="input data", required=True)
p.add_argument("--models", help="fitted model directory", required=True)
p.add_argument("--output", help="output directory", required=True)
p.add_argument("--fits", required=True)
p.add_argument("--output", help="forecasts parquet", required=True)
args = p.parse_args()

with open(args.config, "r") as f:
config = yaml.safe_load(f)

input_data = iup.CumulativeUptakeData(pl.read_parquet(args.data))

with open(Path(args.models, "model_fits.pkl"), "rb") as f:
with open(args.fits, "rb") as f:
models = pickle.load(f)

output = run_all_forecasts(input_data, models, config)
postchecks, forecasts = run_all_forecasts(input_data, models, config)

Path(args.output).mkdir(parents=True, exist_ok=True)
output[0].write_parquet(Path(args.output, "postchecks.parquet"))
output[1].write_parquet(Path(args.output, "forecasts.parquet"))
forecasts.write_parquet(args.output)
postchecks.write_parquet(Path(args.output).parent / "postchecks.parquet")
16 changes: 9 additions & 7 deletions scripts/viz.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
from pathlib import Path
from typing import Any, Dict, List

import altair as alt
Expand Down Expand Up @@ -412,25 +411,28 @@ def layer_with_facets(charts: List, encodings: Dict):

if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("--obs", help="observed data", required=True)
p.add_argument("--pred", help="forecasts", required=True)
p.add_argument("--data", help="observed data", required=True)
p.add_argument("--forecasts", help="forecasts", required=True)
p.add_argument("--score", help="score metrics", required=True)
p.add_argument("--config", help="config yaml file", required=True)
args = p.parse_args()

@st.cache_data
def load_data():
return {
"observed": pl.read_parquet(args.obs),
"forecasts": pl.read_parquet(Path(args.pred, "forecasts.parquet")),
"observed": pl.read_parquet(args.data),
"forecasts": pl.read_parquet(args.forecasts),
}

@st.cache_data
def load_scores():
return pl.read_parquet(Path(args.score, "scores.parquet"))
return pl.read_parquet(args.score)

@st.cache_data
def load_config():
return yaml.safe_load(open(args.config, "r"))
with open(args.config) as f:
config = yaml.safe_load(f)

return config

app()