Skip to content

Commit f019784

Browse files
authored
Merge pull request #1 from thatapicompany/aden/improve-test-coverage
Aden/improve test coverage
2 parents ed98c0f + 7eff430 commit f019784

20 files changed

+1047
-41
lines changed

.github/workflows/production.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Build, Test, and Deploy to Cloud Run
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- master
8+
9+
jobs:
10+
build-test-deploy:
11+
runs-on: ubuntu-latest
12+
env:
13+
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
14+
GAR_LOCATION: ${{ secrets.GAR_LOCATION }} # e.g. us-central1
15+
SERVICE: ${{ secrets.CLOUD_RUN_SERVICE }}
16+
REGION: ${{ secrets.CLOUD_RUN_REGION }}
17+
IMAGE: ${{ secrets.GAR_LOCATION }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ secrets.GAR_REPO }}/${{ secrets.CLOUD_RUN_SERVICE }}
18+
steps:
19+
- name: Checkout code
20+
uses: actions/checkout@v4
21+
22+
- name: Set up Node.js
23+
uses: actions/setup-node@v4
24+
with:
25+
node-version: '20'
26+
27+
- name: Install dependencies
28+
run: npm ci
29+
30+
- name: Run tests
31+
run: npm test
32+
33+
- name: Set up Google Cloud SDK
34+
uses: google-github-actions/auth@v2
35+
with:
36+
credentials_json: ${{ secrets.GCP_SA_KEY }}
37+
38+
- name: Configure Docker for Artifact Registry
39+
run: gcloud auth configure-docker $GAR_LOCATION-docker.pkg.dev
40+
41+
- name: Build and push Docker image
42+
run: |
43+
docker build -t $IMAGE:$GITHUB_SHA .
44+
docker push $IMAGE:$GITHUB_SHA
45+
46+
- name: Deploy to Cloud Run
47+
uses: google-github-actions/deploy-cloudrun@v2
48+
with:
49+
service: ${{ env.SERVICE }}
50+
image: ${{ env.IMAGE }}:$GITHUB_SHA
51+
region: ${{ env.REGION }}
52+
project_id: ${{ env.PROJECT_ID }}

.github/workflows/deploy.yml renamed to .github/workflows/test-build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ name: Default test
22
on:
33
push:
44
branches-ignore:
5+
- 'main'
56
- 'master'
67
- 'staging'
78

Dockerfile

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,31 @@
1-
FROM node:17-stretch
21

2+
# ---- Build Stage ----
3+
FROM node:22-alpine AS build
34
WORKDIR /app
45

6+
# Install dependencies
57
COPY package*.json ./
8+
RUN npm ci --omit=dev
69

7-
RUN npm install
8-
RUN git --version
9-
10+
# Copy source files and build
11+
COPY tsconfig.json tsconfig.build.json ./
1012
COPY src ./src
11-
RUN ls ./
12-
13-
#ARG NODE_ENV=prod
14-
#ENV NODE_ENV=${NODE_ENV}
15-
16-
#COPY tools ./tools
13+
RUN npm run build
1714

18-
# placeholders for validation
19-
RUN [ "touch",".env"]
20-
RUN [ "touch",".env.staging"]
21-
RUN [ "touch",".env.dev"]
22-
RUN [ "touch",".env.production"]
23-
24-
COPY tsconfig.json .
25-
COPY tsconfig.build.json .
15+
# ---- Production Stage ----
16+
FROM node:22-alpine
17+
WORKDIR /app
2618

27-
RUN [ "npm", "run", "build"]
19+
# Only copy built files and production dependencies
20+
COPY --from=build /app/node_modules ./node_modules
21+
COPY --from=build /app/dist ./dist
22+
COPY package*.json ./
2823

24+
# Set environment variables
25+
ENV NODE_ENV=production
26+
ENV PORT=8080
2927

3028
EXPOSE 8080
3129

32-
CMD [ "npm", "run", "start" ]
33-
# COPY entrypoint.sh .
34-
# ENTRYPOINT [ "/app/entrypoint.sh" ]
30+
# Start the app
31+
CMD ["npm", "run", "start:prod"]

etl/export-from-bq-to-pg.md

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
2+
3+
#### Export from BigQuery to GCS (CSV)
4+
5+
CSV is easiest to ingest to Postgres; export in shards.
6+
7+
```sql
8+
9+
EXPORT DATA OPTIONS (
10+
uri='gs://YOUR_BUCKET/overture/place_min_*.csv',
11+
format='CSV',
12+
overwrite=true,
13+
header=true,
14+
field_delimiter=','
15+
)
16+
AS
17+
SELECT * FROM (
18+
WITH flat AS (
19+
SELECT
20+
id,
21+
ST_X(geometry) AS lon,
22+
ST_Y(geometry) AS lat,
23+
names.primary AS name,
24+
categories.primary AS category,
25+
websites.list[SAFE_OFFSET(0)].element AS website,
26+
phones.list[SAFE_OFFSET(0)].element AS phone,
27+
emails.list[SAFE_OFFSET(0)].element AS email,
28+
socials.list[SAFE_OFFSET(0)].element AS social,
29+
addresses.list[SAFE_OFFSET(0)].element.freeform AS address,
30+
addresses.list[SAFE_OFFSET(0)].element.locality AS locality,
31+
addresses.list[SAFE_OFFSET(0)].element.postcode AS postcode,
32+
addresses.list[SAFE_OFFSET(0)].element.country AS country,
33+
34+
brand.wikidata AS wikidata,
35+
confidence AS confidence,
36+
sources.list[SAFE_OFFSET(0)].element.dataset AS src_dataset,
37+
sources.list[SAFE_OFFSET(0)].element.record_id AS src_record_id,
38+
TIMESTAMP(sources.list[SAFE_OFFSET(0)].element.update_time) AS src_update_time,
39+
confidence
40+
FROM `bigquery-public-data.overture_maps.place`
41+
)
42+
SELECT * FROM flat
43+
44+
);
45+
```
46+
47+
48+
#### Load into Postgres
49+
50+
```
51+
# pull from GCS to a machine that has psql
52+
gcloud storage cp gs://YOUR_BUCKET/overture/place_min_*.csv .
53+
54+
# bulk load
55+
psql "$PGURL" -c "\copy place_min(id,name,category,lon,lat,website,phone,email,social,address,locality,postcode,country,wikidata,confidence,src_dataset,src_record_id,src_update_time,confidence) FROM PROGRAM 'cat place_min_*.csv' CSV HEADER"
56+
```
57+
58+
59+
#### Incremental updates (recommended)
60+
61+
Overture refreshes; avoid full reloads.
62+
63+
BigQuery delta extract (only rows updated since your last successful run):
64+
65+
```
66+
DECLARE last_run TIMESTAMP DEFAULT TIMESTAMP('2025-08-01 00:00:00+00'); -- replace via param/metadata
67+
68+
EXPORT DATA OPTIONS (
69+
uri='gs://YOUR_BUCKET/overture/delta/place_min_@{run_date}_*.csv',
70+
format='CSV', overwrite=true, header=true
71+
)
72+
AS
73+
SELECT *
74+
FROM (
75+
-- same SELECT as step 2
76+
)
77+
WHERE src_update_time >= last_run;
78+
```
79+
80+
Postgres upsert via a staging table:
81+
82+
```sql
83+
CREATE TEMP TABLE place_min_stg (LIKE place_min);
84+
85+
\copy place_min_stg(id,name,category,lon,lat,website,phone,email,social,address,locality,postcode,country,wikidata,confidence,src_dataset,src_record_id,src_update_time,confidence) FROM 'delta_files.csv' CSV HEADER;
86+
87+
INSERT INTO place_min AS t (
88+
id,name,category,lon,lat,website,phone,email,social,address,locality,postcode,country,wikidata,confidence,src_dataset,src_record_id,src_update_time,confidence
89+
)
90+
SELECT * FROM place_min_stg s
91+
ON CONFLICT (id) DO UPDATE
92+
SET
93+
name = EXCLUDED.name,
94+
category = EXCLUDED.category,
95+
lon = EXCLUDED.lon,
96+
lat = EXCLUDED.lat,
97+
website = EXCLUDED.website,
98+
phone = EXCLUDED.phone,
99+
email = EXCLUDED.email,
100+
social = EXCLUDED.social,
101+
address = EXCLUDED.address,
102+
locality = EXCLUDED.locality,
103+
postcode = EXCLUDED.postcode,
104+
country = EXCLUDED.country,
105+
wikidata = EXCLUDED.wikidata,
106+
confidence = EXCLUDED.confidence,
107+
src_dataset = EXCLUDED.src_dataset,
108+
src_record_id = EXCLUDED.src_record_id,
109+
src_update_time = EXCLUDED.src_update_time,
110+
confidence = EXCLUDED.confidence
111+
WHERE t.src_update_time IS NULL OR EXCLUDED.src_update_time > t.src_update_time;
112+
```
113+
114+
#### Sanity checks & sample query
115+
116+
```
117+
-- record counts
118+
SELECT COUNT(*) FROM place_min;
119+
120+
-- quick Vienna radius test (500 m)
121+
SELECT id, name, category
122+
FROM place_min
123+
WHERE ST_DWithin(
124+
geom,
125+
ST_SetSRID(ST_MakePoint(16.3738, 48.2082),4326)::geography,
126+
500
127+
)
128+
ORDER BY confidence DESC NULLS LAST
129+
LIMIT 50;
130+
```
131+
132+
#### Notes & options
133+
134+
PostGIS type: I used geography(Point,4326) for easy meters-based ST_DWithin. If you prefer geometry(Point,4326), use meters via geography(geom) or transform appropriately.
135+
136+
Multiple values: I picked the first website/phone/social/address. If you want arrays, we can keep them as Postgres text[] (and flatten in BQ with ARRAY_TO_STRING(..., '|')).
137+
138+
Performance: On 20–30M rows, COPY + ON CONFLICT is still very fast. For huge refreshes, consider:
139+
140+
- Load deltas into a permanent place_min_delta table and run a single INSERT ... ON CONFLICT.
141+
142+
- Periodic VACUUM (ANALYZE) on place_min.
143+
144+
Automation: Wrap steps in a small script (Cloud Build, GitLab CI, or a cron on a VM).

etl/export-to-gcs.sql

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
2+
EXPORT DATA OPTIONS (
3+
uri='gs://YOUR-BUCKET/overture/place_min_*.csv',
4+
format='CSV',
5+
overwrite=true,
6+
header=true,
7+
field_delimiter=','
8+
)
9+
AS
10+
SELECT * FROM (
11+
WITH flat AS (
12+
SELECT
13+
id,
14+
ST_X(geometry) AS lon,
15+
ST_Y(geometry) AS lat,
16+
names.primary AS name,
17+
categories.primary AS category,
18+
websites.list[SAFE_OFFSET(0)].element AS website,
19+
phones.list[SAFE_OFFSET(0)].element AS phone,
20+
emails.list[SAFE_OFFSET(0)].element AS email,
21+
socials.list[SAFE_OFFSET(0)].element AS social,
22+
addresses.list[SAFE_OFFSET(0)].element.freeform AS address,
23+
addresses.list[SAFE_OFFSET(0)].element.locality AS locality,
24+
addresses.list[SAFE_OFFSET(0)].element.postcode AS postcode,
25+
addresses.list[SAFE_OFFSET(0)].element.country AS country,
26+
sources.list[SAFE_OFFSET(0)].element.dataset AS src_dataset,
27+
sources.list[SAFE_OFFSET(0)].element.record_id AS src_record_id,
28+
TIMESTAMP(sources.list[SAFE_OFFSET(0)].element.update_time) AS src_update_time,
29+
confidence
30+
FROM `bigquery-public-data.overture_maps.place`
31+
)
32+
SELECT * FROM flat
33+
);

0 commit comments

Comments
 (0)