Skip to content

Commit 9ac29eb

Browse files
dshkolclaude
andcommitted
Add config-driven table fetcher for easier expansion
- table_configs.json: Defines extraction rules for 12 CANSIM tables - fetch_table.R: Simple config-driven fetcher that uses the config Configured tables: - CPI (18-10-0004), LFS (14-10-0287), Manufacturing (16-10-0047) - Retail (20-10-0008), Housing starts (34-10-0158), NHPI (18-10-0205) - GDP (36-10-0434), Airlines (23-10-0079), Trade (12-10-0011) - Food services (21-10-0019), Electricity (25-10-0015), Wholesale (20-10-0074) Usage: Rscript r-tools/fetch_table.R <table-number> [output_dir] 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 86b9f56 commit 9ac29eb

2 files changed

Lines changed: 245 additions & 0 deletions

File tree

r-tools/fetch_table.R

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env Rscript
2+
# Config-driven CANSIM data fetcher
3+
4+
library(cansim)
5+
library(dplyr)
6+
library(jsonlite)
7+
8+
args <- commandArgs(trailingOnly = TRUE)
9+
table_number <- if (length(args) > 0) args[1] else stop("Usage: Rscript fetch_table.R <table-number> [output_dir]")
10+
output_dir <- if (length(args) > 1) args[2] else "output"
11+
12+
# Load config - try multiple paths
13+
config_paths <- c(
14+
"r-tools/table_configs.json",
15+
"table_configs.json",
16+
file.path(dirname(normalizePath(commandArgs(FALSE)[grep("--file=", commandArgs(FALSE))])), "table_configs.json")
17+
)
18+
config_file <- NULL
19+
for (p in config_paths) {
20+
if (file.exists(p)) {
21+
config_file <- p
22+
break
23+
}
24+
}
25+
if (is.null(config_file)) stop("Cannot find table_configs.json")
26+
27+
configs <- fromJSON(config_file)
28+
29+
if (!table_number %in% names(configs)) {
30+
cat("Table", table_number, "not in config. Available:\n")
31+
for (t in names(configs)) cat(" -", t, ":", configs[[t]]$name, "\n")
32+
stop("Add table config first")
33+
}
34+
35+
config <- configs[[table_number]]
36+
cat("Fetching:", config$name, "\n")
37+
38+
# Fetch data
39+
data <- get_cansim(table_number)
40+
cat("Downloaded", nrow(data), "rows\n")
41+
42+
# Apply filters
43+
filtered <- data
44+
for (col in names(config$filters)) {
45+
if (col %in% names(filtered)) {
46+
val <- config$filters[[col]]
47+
filtered <- filtered %>% filter(grepl(val, .data[[col]], fixed = TRUE))
48+
cat("Filtered", col, "->", nrow(filtered), "rows\n")
49+
}
50+
}
51+
52+
filtered <- filtered %>% arrange(Date)
53+
54+
# Get time series
55+
scale <- if (!is.null(config$scale)) config$scale else 1
56+
ts <- filtered %>%
57+
tail(13) %>%
58+
select(date = REF_DATE, value = VALUE) %>%
59+
mutate(value = value / scale)
60+
61+
latest <- tail(ts, 1)
62+
prev <- head(tail(ts, 2), 1)
63+
yoy <- head(ts, 1)
64+
65+
mom_pct <- round((latest$value - prev$value) / prev$value * 100, 2)
66+
yoy_pct <- round((latest$value - yoy$value) / yoy$value * 100, 2)
67+
68+
scale_label <- if (!is.null(config$scale_label)) config$scale_label else ""
69+
cat("\nLatest:", latest$date, "-", round(latest$value, 1), scale_label, "\n")
70+
cat("MoM:", mom_pct, "%\n")
71+
cat("YoY:", yoy_pct, "%\n")
72+
73+
# Output
74+
output <- list(
75+
metadata = list(
76+
table_number = table_number,
77+
name = config$name,
78+
headline = config$headline,
79+
unit = config$unit,
80+
scale_label = scale_label
81+
),
82+
latest = list(
83+
ref_date = latest$date,
84+
value = round(latest$value, 2),
85+
mom_pct_change = mom_pct,
86+
yoy_pct_change = yoy_pct
87+
),
88+
time_series = ts %>% mutate(value = round(value, 2))
89+
)
90+
91+
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
92+
output_file <- file.path(output_dir, paste0("data_", gsub("-", "_", table_number), ".json"))
93+
write_json(output, output_file, pretty = TRUE, auto_unbox = TRUE)
94+
cat("Written to:", output_file, "\n")

r-tools/table_configs.json

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
{
2+
"18-10-0004": {
3+
"name": "Consumer Price Index",
4+
"headline": "Consumer prices",
5+
"unit": "index",
6+
"scale": 1,
7+
"filters": {
8+
"GEO": "Canada",
9+
"Products and product groups": "All-items"
10+
},
11+
"breakdown_dimension": "Products and product groups",
12+
"breakdown_values": ["Food", "Shelter", "Transportation", "Health and personal care"],
13+
"provincial": true
14+
},
15+
"14-10-0287": {
16+
"name": "Labour Force Survey",
17+
"headline": "Unemployment rate",
18+
"unit": "percent",
19+
"scale": 1,
20+
"filters": {
21+
"GEO": "Canada",
22+
"Labour force characteristics": "Unemployment rate",
23+
"Sex": "Both sexes",
24+
"Age group": "15 years and over"
25+
},
26+
"breakdown_dimension": "Labour force characteristics",
27+
"provincial": true
28+
},
29+
"16-10-0047": {
30+
"name": "Manufacturing Sales",
31+
"headline": "Manufacturing sales",
32+
"unit": "dollars",
33+
"scale": 1000000,
34+
"scale_label": "billions",
35+
"filters": {
36+
"Seasonal adjustment": "Seasonally adjusted",
37+
"North American Industry Classification System (NAICS)": "Manufacturing [31-33]"
38+
},
39+
"value_filter": "Sales of goods manufactured",
40+
"breakdown_dimension": "North American Industry Classification System (NAICS)"
41+
},
42+
"20-10-0008": {
43+
"name": "Retail Trade",
44+
"headline": "Retail sales",
45+
"unit": "dollars",
46+
"scale": 1000000,
47+
"scale_label": "billions",
48+
"filters": {
49+
"GEO": "Canada",
50+
"North American Industry Classification System (NAICS)": "Retail trade [44-45]",
51+
"Adjustments": "Seasonally adjusted"
52+
},
53+
"breakdown_dimension": "North American Industry Classification System (NAICS)"
54+
},
55+
"34-10-0158": {
56+
"name": "Housing Starts",
57+
"headline": "Housing starts",
58+
"unit": "units (SAAR, thousands)",
59+
"scale": 1,
60+
"scale_label": "thousands",
61+
"filters": {
62+
"GEO": "Canada"
63+
},
64+
"breakdown_dimension": "GEO",
65+
"note": "Values are seasonally adjusted annual rates (SAAR) in thousands"
66+
},
67+
"18-10-0205": {
68+
"name": "New Housing Price Index",
69+
"headline": "New housing prices",
70+
"unit": "index",
71+
"scale": 1,
72+
"filters": {
73+
"GEO": "Canada",
74+
"New housing price indexes": "Total (house and land)"
75+
},
76+
"breakdown_dimension": "GEO"
77+
},
78+
"36-10-0434": {
79+
"name": "GDP by Industry",
80+
"headline": "Real GDP",
81+
"unit": "dollars",
82+
"scale": 1000000,
83+
"scale_label": "billions",
84+
"filters": {
85+
"North American Industry Classification System (NAICS)": "All industries [T001]",
86+
"Seasonal adjustment": "Seasonally adjusted at annual rates",
87+
"Prices": "Chained (2017) dollars"
88+
},
89+
"breakdown_dimension": "North American Industry Classification System (NAICS)"
90+
},
91+
"23-10-0079": {
92+
"name": "Airline Statistics",
93+
"headline": "Airline passengers",
94+
"unit": "thousands of passengers",
95+
"scale": 1,
96+
"scale_label": "thousands",
97+
"filters": {
98+
"Operational and financial statistics": "Passengers"
99+
},
100+
"breakdown_dimension": "Operational and financial statistics"
101+
},
102+
"12-10-0011": {
103+
"name": "International Trade",
104+
"headline": "Merchandise exports",
105+
"unit": "dollars",
106+
"scale": 1000000,
107+
"scale_label": "billions",
108+
"filters": {
109+
"Trade": "Export",
110+
"Principal trading partners": "All countries",
111+
"Seasonal adjustment": "Seasonally adjusted"
112+
},
113+
"breakdown_dimension": "Principal trading partners"
114+
},
115+
"21-10-0019": {
116+
"name": "Food Services",
117+
"headline": "Food services sales",
118+
"unit": "dollars",
119+
"scale": 1000000,
120+
"scale_label": "billions",
121+
"filters": {
122+
"GEO": "Canada",
123+
"North American Industry Classification System (NAICS)": "Food services and drinking places [722]"
124+
},
125+
"breakdown_dimension": "North American Industry Classification System (NAICS)"
126+
},
127+
"25-10-0015": {
128+
"name": "Electric Power Generation",
129+
"headline": "Electric power generation",
130+
"unit": "gigawatt hours",
131+
"scale": 1000,
132+
"scale_label": "terawatt hours",
133+
"filters": {
134+
"GEO": "Canada",
135+
"Type of electricity generation": "Total electricity generation"
136+
},
137+
"breakdown_dimension": "Type of electricity generation"
138+
},
139+
"20-10-0074": {
140+
"name": "Wholesale Trade",
141+
"headline": "Wholesale sales",
142+
"unit": "dollars",
143+
"scale": 1000000,
144+
"scale_label": "billions",
145+
"filters": {
146+
"GEO": "Canada",
147+
"North American Industry Classification System (NAICS)": "Wholesale trade [41]"
148+
},
149+
"breakdown_dimension": "North American Industry Classification System (NAICS)"
150+
}
151+
}

0 commit comments

Comments
 (0)