Skip to content

Commit 9865ec2

Browse files
Add --clean-frs-base for per-year clean FRS, update Modal deployment
- Add --clean-frs-base flag: loads clean CSVs from base/YYYY/ dirs, falls back to latest available year + uprating for projected years - Update Modal app to use single volume with per-year clean FRS - Simplify upload script for clean FRS base directory structure - Extract all 30 years of clean CSVs into data/frs_clean_all/
1 parent 353b78f commit 9865ec2

File tree

3 files changed

+70
-44
lines changed

3 files changed

+70
-44
lines changed

api/modal_app.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,24 @@
33
44
Architecture:
55
- Rust binary is compiled at image build time (baked into the image layer).
6-
- A Modal Volume (`policyengine-uk-frs`) holds the clean FRS microdata CSVs.
7-
Upload once with: python api/upload_frs.py <path-to-frs-clean-dir>
6+
- Modal Volume (`policyengine-uk-frs`) holds per-year clean FRS CSVs (1994/-2023/).
7+
Upload with: python api/upload_frs.py data/frs_clean_all
88
- FastAPI app is served via modal.asgi_app().
99
1010
Deploy:
1111
modal deploy api/modal_app.py
1212
1313
Serve locally (with hot-reload):
1414
modal serve api/modal_app.py
15-
16-
Upload FRS data to the volume:
17-
python api/upload_frs.py data/frs_clean
1815
"""
1916

2017
import modal
2118

2219
# ---------------------------------------------------------------------------
23-
# Volume for FRS clean microdata (persons.csv / benunits.csv / households.csv)
20+
# Volumes
2421
# ---------------------------------------------------------------------------
2522
frs_volume = modal.Volume.from_name("policyengine-uk-frs", create_if_missing=True)
26-
FRS_MOUNT_PATH = "/data/frs_clean"
23+
FRS_MOUNT = "/data/frs_clean"
2724

2825
# ---------------------------------------------------------------------------
2926
# Image — Debian base, install Rust toolchain, clone repo, compile binary
@@ -38,7 +35,6 @@
3835
)
3936
.pip_install("fastapi>=0.115", "uvicorn[standard]>=0.30", "pydantic>=2.0")
4037
# Copy the repo source into the image (exclude FRS data — it stays on the Volume)
41-
# The .dockerignore / ignore list ensures data/frs_clean never enters the image.
4238
.add_local_dir(".", remote_path="/app", copy=True,
4339
ignore=["data/", "target/", ".git/", "app/node_modules/", "app/.next/"])
4440
.run_commands(
@@ -57,7 +53,7 @@
5753

5854

5955
# ---------------------------------------------------------------------------
60-
# FastAPI application (identical logic to api/main.py but paths adjusted)
56+
# FastAPI application
6157
# ---------------------------------------------------------------------------
6258
def _make_fastapi_app():
6359
import json
@@ -70,9 +66,8 @@ def _make_fastapi_app():
7066
from pydantic import BaseModel
7167

7268
RUST_BINARY = "policyengine-uk"
73-
CLEAN_FRS_DIR = FRS_MOUNT_PATH
74-
PARAMETERS_DIR = "/app/parameters"
75-
AVAILABLE_YEARS = [2023, 2024, 2025, 2026, 2027, 2028, 2029]
69+
FRS_BASE_DIR = FRS_MOUNT
70+
AVAILABLE_YEARS = list(range(1994, 2030))
7671

7772
fastapi_app = FastAPI(title="PolicyEngine UK API")
7873

@@ -87,8 +82,8 @@ def _make_fastapi_app():
8782
params_cache: dict[int, dict] = {}
8883

8984
def _data_args() -> list[str]:
90-
if os.path.isdir(CLEAN_FRS_DIR) and os.listdir(CLEAN_FRS_DIR):
91-
return ["--clean-frs", CLEAN_FRS_DIR]
85+
if os.path.isdir(FRS_BASE_DIR) and os.listdir(FRS_BASE_DIR):
86+
return ["--clean-frs-base", FRS_BASE_DIR]
9287
return []
9388

9489
def run_simulation(year: int, reform_json: Optional[str] = None) -> dict:
@@ -133,7 +128,7 @@ async def cache_baselines():
133128
REFORM_SECTIONS = [
134129
"income_tax", "national_insurance", "universal_credit",
135130
"child_benefit", "benefit_cap", "housing_benefit",
136-
"tax_credits", "scottish_child_payment",
131+
"tax_credits", "council_tax_reduction", "scottish_child_payment",
137132
"pension_credit", "state_pension",
138133
]
139134

@@ -146,6 +141,7 @@ class SimulateRequest(BaseModel):
146141
benefit_cap: Optional[dict[str, Any]] = None
147142
housing_benefit: Optional[dict[str, Any]] = None
148143
tax_credits: Optional[dict[str, Any]] = None
144+
council_tax_reduction: Optional[dict[str, Any]] = None
149145
scottish_child_payment: Optional[dict[str, Any]] = None
150146
pension_credit: Optional[dict[str, Any]] = None
151147
state_pension: Optional[dict[str, Any]] = None
@@ -159,6 +155,7 @@ class SimulateMultiYearRequest(BaseModel):
159155
benefit_cap: Optional[dict[str, Any]] = None
160156
housing_benefit: Optional[dict[str, Any]] = None
161157
tax_credits: Optional[dict[str, Any]] = None
158+
council_tax_reduction: Optional[dict[str, Any]] = None
162159
scottish_child_payment: Optional[dict[str, Any]] = None
163160
pension_credit: Optional[dict[str, Any]] = None
164161
state_pension: Optional[dict[str, Any]] = None
@@ -219,19 +216,19 @@ async def health():
219216
return {
220217
"status": "ok",
221218
"binary": bool(shutil.which("policyengine-uk")),
222-
"frs_data": os.path.isdir(CLEAN_FRS_DIR) and bool(os.listdir(CLEAN_FRS_DIR)),
223-
"frs_path": CLEAN_FRS_DIR,
219+
"frs_data": os.path.isdir(FRS_BASE_DIR) and bool(os.listdir(FRS_BASE_DIR)),
220+
"cached_years": sorted(baseline_cache.keys()),
224221
}
225222

226223
return fastapi_app
227224

228225

229226
@app.function(
230-
volumes={FRS_MOUNT_PATH: frs_volume},
231-
# Startup caches all 7 year baselines — give it enough RAM
232-
memory=4096,
227+
volumes={FRS_MOUNT: frs_volume},
228+
# Startup caches 36 year baselines — needs time and memory
229+
memory=8192,
233230
cpu=4,
234-
timeout=300,
231+
timeout=600,
235232
# EU West (Ireland) for lower latency from UK callers
236233
region="eu-west-1",
237234
)

api/upload_frs.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
"""
2-
Upload clean FRS CSV files to the Modal Volume.
2+
Upload clean FRS data to the Modal Volume.
33
44
Usage:
5-
python api/upload_frs.py <path-to-frs-clean-dir>
5+
python api/upload_frs.py data/frs_clean_all
66
7-
The directory should contain: persons.csv, benunits.csv, households.csv
8-
(generated by: policyengine-uk --extract-frs <output-dir> --frs <raw-frs-dir>)
7+
The directory should contain per-year subdirectories (1994/, 1995/, ..., 2023/)
8+
each with persons.csv, benunits.csv, households.csv.
9+
10+
Generate with:
11+
for year in $(seq 1994 2023); do
12+
./target/release/policyengine-uk-rust --year $year --frs <tab-dir> --extract-frs data/frs_clean_all/$year
13+
done
914
1015
This only needs to be run once (or when the FRS data changes).
1116
"""
@@ -14,36 +19,34 @@
1419
from pathlib import Path
1520
import modal
1621

17-
FRS_VOLUME_NAME = "policyengine-uk-frs"
18-
REMOTE_PATH = "/data/frs_clean"
19-
EXPECTED_FILES = {"persons.csv", "benunits.csv", "households.csv"}
22+
VOLUME_NAME = "policyengine-uk-frs"
2023

2124

2225
def upload(local_dir: Path) -> None:
23-
missing = EXPECTED_FILES - {f.name for f in local_dir.iterdir() if f.is_file()}
24-
if missing:
25-
print(f"Error: missing files in {local_dir}: {missing}", file=sys.stderr)
26+
year_dirs = sorted(d for d in local_dir.iterdir() if d.is_dir() and d.name.isdigit())
27+
if not year_dirs:
28+
print(f"Error: no year directories found in {local_dir}", file=sys.stderr)
2629
sys.exit(1)
2730

28-
volume = modal.Volume.from_name(FRS_VOLUME_NAME, create_if_missing=True)
31+
volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True)
32+
print(f"Uploading {len(year_dirs)} years from {local_dir} → Modal Volume '{VOLUME_NAME}'")
2933

30-
# Files are stored at the volume root (e.g. "persons.csv").
31-
# The volume is mounted at FRS_MOUNT_PATH in the container, so
32-
# container path /data/frs_clean/persons.csv == volume root persons.csv.
33-
print(f"Uploading FRS clean CSVs from {local_dir} → Modal Volume '{FRS_VOLUME_NAME}' (root)")
3434
with volume.batch_upload(force=True) as batch:
35-
for csv_file in local_dir.glob("*.csv"):
36-
batch.put_file(str(csv_file), csv_file.name)
37-
print(f" {csv_file.name} → /{csv_file.name}")
35+
for year_dir in year_dirs:
36+
csvs = list(year_dir.glob("*.csv"))
37+
if not csvs:
38+
print(f" WARNING: no CSVs in {year_dir.name}, skipping")
39+
continue
40+
print(f" {year_dir.name}: {len(csvs)} files")
41+
for f in csvs:
42+
batch.put_file(str(f), f"{year_dir.name}/{f.name}")
3843

39-
print("Done. Volume contents:")
40-
for entry in volume.listdir("/"):
41-
print(f" {entry.path}")
44+
print("Done.")
4245

4346

4447
if __name__ == "__main__":
4548
if len(sys.argv) != 2:
46-
print(f"Usage: python {sys.argv[0]} <frs-clean-dir>", file=sys.stderr)
49+
print(f"Usage: python {sys.argv[0]} <frs-clean-base-dir>", file=sys.stderr)
4750
sys.exit(1)
4851

4952
local_dir = Path(sys.argv[1])

src/main.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ struct Cli {
6262
#[arg(long)]
6363
clean_frs: Option<PathBuf>,
6464

65+
/// Base directory containing per-year clean FRS subdirectories (YYYY/).
66+
/// Each subdirectory has persons.csv, benunits.csv, households.csv.
67+
/// Falls back to latest available year and uprates if needed.
68+
#[arg(long)]
69+
clean_frs_base: Option<PathBuf>,
70+
6571
/// Output format: "pretty" (default) or "json" for machine-readable output
6672
#[arg(long, default_value = "pretty")]
6773
output: String,
@@ -256,7 +262,27 @@ fn main() -> anyhow::Result<()> {
256262
}
257263

258264
// Load dataset
259-
let dataset = if let Some(clean_path) = &cli.clean_frs {
265+
let dataset = if let Some(base) = &cli.clean_frs_base {
266+
// Per-year clean FRS directories: base/YYYY/
267+
let year_dir = base.join(cli.year.to_string());
268+
if year_dir.is_dir() {
269+
if !json_mode { println!(" {} Loading clean FRS {}/{}...", "▸".bright_cyan(), cli.year, (cli.year + 1) % 100); }
270+
load_clean_frs(&year_dir)?
271+
} else {
272+
// Find latest available year and uprate
273+
let latest = (1994..=cli.year).rev()
274+
.find(|y| base.join(y.to_string()).is_dir())
275+
.ok_or_else(|| anyhow::anyhow!("No clean FRS data found in {}", base.display()))?;
276+
if !json_mode {
277+
println!(" {} Loading clean FRS {}/{} and uprating to {}/{}...",
278+
"▸".bright_cyan(), latest, (latest + 1) % 100,
279+
cli.year, (cli.year + 1) % 100);
280+
}
281+
let mut ds = load_clean_frs(&base.join(latest.to_string()))?;
282+
ds.uprate_to(cli.year);
283+
ds
284+
}
285+
} else if let Some(clean_path) = &cli.clean_frs {
260286
if !json_mode { println!(" {} Loading clean FRS from {}...", "▸".bright_cyan(), clean_path.display()); }
261287
load_clean_frs(clean_path)?
262288
} else if let Some(frs_path) = &cli.frs {

0 commit comments

Comments
 (0)