Skip to content

Commit 2daf176

Browse files
committed
fix: match consultations schema exactly - drop id, loaded_at
The MariaDB consultations table now matches the original schema: source, name, description, status, agency, tags, region, url, publishdate, expirydate. Removes the extraneous id and loaded_at columns that were added since the Python-to-SQL migration. Also enable setting target table name via helm chart.
1 parent 7415586 commit 2daf176

6 files changed

Lines changed: 21 additions & 20 deletions

File tree

.github/workflows/ci-nightly.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
name: Nightly CI
22

33
on:
4+
push:
5+
branches: [main]
46
schedule:
57
- cron: '0 14 * * *' # 10 PM AWST
68
workflow_dispatch:

chart/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: harvest-consultations
33
description: DuckDB harvest pipeline for WA government consultation data
44
type: application
5-
version: 0.5.0
5+
version: 0.5.3
66
appVersion: "1.5.2"

chart/harvest.sql

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,17 +218,15 @@ FROM engagementhq_raw;
218218
CREATE OR REPLACE TABLE consultations_final AS
219219
SELECT
220220
source,
221-
id,
222221
name,
223222
description,
224223
status,
225-
tags,
226224
agency,
225+
tags,
227226
region,
228227
url,
229228
publishdate,
230-
expirydate,
231-
CURRENT_TIMESTAMP AS loaded_at
229+
expirydate
232230
FROM (
233231
SELECT * FROM engagementhq_std
234232
UNION ALL BY NAME
@@ -246,5 +244,5 @@ ORDER BY source, status;
246244
-- Matches old/harvest.py export behaviour: replace the whole output table.
247245
-- ============================================================================
248246
ATTACH '' AS mysqldb (TYPE mysql);
249-
CREATE OR REPLACE TABLE mysqldb.consultations AS
247+
CREATE OR REPLACE TABLE mysqldb.{{ .Values.mysql.table }} AS
250248
SELECT * FROM consultations_final;

chart/templates/configmap.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ metadata:
77
{{- include "harvest-consultations.labels" . | nindent 4 }}
88
data:
99
harvest.sql: |
10-
{{ .Files.Get "harvest.sql" | indent 4 }}
10+
{{ tpl (.Files.Get "harvest.sql") . | indent 4 }}

chart/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
mysql:
22
host: mariadb
33
database: harvest
4+
table: consultations
45

56
mariadb:
67
# rootPassword is only used for MariaDB initialization; the application and

justfile

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
ns := "harvest-consultations"
2-
helmHost := "mariadb"
2+
mysqlHost := "mariadb"
3+
table := "consultations"
34

45
default:
56
just --choose
@@ -13,16 +14,13 @@ kind-up:
1314
kind get clusters | grep -q harvest || kind create cluster --name harvest
1415
helm upgrade --install harvest chart \
1516
--namespace {{ns}} --create-namespace \
16-
--set mysql.host={{helmHost}}
17+
--set mysql.host={{mysqlHost}} \
18+
--set mysql.table={{table}}
1719

1820
# Forward mariadb from k8s cluster
1921
mariadb-svc: kind-up
2022
ss -ltpn | grep 3306 || kubectl port-forward service/mariadb 3306:3306 -n {{ns}} & sleep 1
2123

22-
# Run the DuckDB pipeline locally
23-
run:
24-
duckdb -c ".read chart/harvest.sql"
25-
2624
# Create a one-off test job in the cluster
2725
test: kind-up
2826
kubectl delete job test -n {{ns}} --ignore-not-found
@@ -32,7 +30,8 @@ test: kind-up
3230
helm-install:
3331
helm upgrade --install harvest chart \
3432
--namespace {{ns}} --create-namespace \
35-
--set mysql.host={{helmHost}}
33+
--set mysql.host={{mysqlHost}} \
34+
--set mysql.table={{table}}
3635

3736
# Package helm chart
3837
helm-package:
@@ -52,7 +51,8 @@ ci-test:
5251
echo "=== CI: installing helm chart ==="
5352
helm upgrade --install harvest chart \
5453
--namespace {{ns}} --create-namespace \
55-
--set mysql.host={{helmHost}}
54+
--set mysql.host={{mysqlHost}} \
55+
--set mysql.table={{table}}
5656
5757
echo "=== CI: waiting for MariaDB (up to 5 min) ==="
5858
kubectl rollout status statefulset/mariadb -n {{ns}} --timeout=300s
@@ -68,16 +68,16 @@ ci-test:
6868
exit 1
6969
}
7070
71-
echo "=== CI: dumping consultations table ==="
71+
echo "=== CI: dumping {{table}} table ==="
7272
POD=$(kubectl get pod -l app=mariadb -n {{ns}} -o jsonpath='{.items[0].metadata.name}')
7373
mkdir -p dist
7474
kubectl exec -n {{ns}} "$POD" -- \
75-
sh -c 'MYSQL_PWD="$MARIADB_PASSWORD" exec mariadb-dump -u"$MARIADB_USER" -h 127.0.0.1 harvest consultations' \
76-
| gzip > dist/consultations.sql.gz
75+
sh -c 'MYSQL_PWD="$MARIADB_PASSWORD" exec mariadb-dump -u"$MARIADB_USER" -h 127.0.0.1 harvest {{table}}' \
76+
| gzip > dist/{{table}}.sql.gz
7777
7878
echo "=== CI: validating dump ==="
79-
gunzip -c dist/consultations.sql.gz | head -20 || true
80-
ROWS=$(gunzip -c dist/consultations.sql.gz | grep -c 'INSERT INTO' || echo 0)
79+
gunzip -c dist/{{table}}.sql.gz | head -20 || true
80+
ROWS=$(gunzip -c dist/{{table}}.sql.gz | grep -c 'INSERT INTO' || echo 0)
8181
echo "Rows found: $ROWS"
8282
if [ "$ROWS" -eq 0 ]; then
8383
echo "ERROR: dump contains no INSERT statements"

0 commit comments

Comments
 (0)