Skip to content

Commit 33b6181

Browse files
authored
Feature/handle full refresh (#245) (#247)
* fix issue with earthquake staging model * update workflow to handle incremental models * update get artifacts script to use newer github actions syntax * update workflow to run for any branch name
1 parent e97c932 commit 33b6181

File tree

9 files changed

+159
-40
lines changed

9 files changed

+159
-40
lines changed

.github/workflows/10_feature_airflow_checks.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ jobs:
3737
name: Pull Request Airflow Tests
3838
runs-on: ubuntu-latest
3939
container: datacoves/ci-airflow-dbt-snowflake:3.4
40+
needs: [validate-branch]
4041

4142
env:
4243
AIRFLOW__CORE__DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/orchestrate/dags
@@ -83,17 +84,19 @@ jobs:
8384

8485
airflow-job-status:
8586
runs-on: ubuntu-latest
86-
needs: [airflow]
87+
needs: [validate-branch, airflow]
8788
if: always()
8889
steps:
8990
- name: Comment PR with Airflow status
9091
uses: thollander/actions-comment-pull-request@v2
9192
with:
9293
message: |
9394
## 🎯 Airflow Workflow Status
94-
- **Job**: Pull Request Airflow Tests → **${{ needs.airflow.result }}**
95+
- **Branch Validation**: ${{ needs.validate-branch.result }}
96+
- **Airflow Validations**: ${{ needs.airflow.result }}
9597
96-
${{ needs.airflow.result == 'skipped' && '⏭️ _Skipped because no orchestrate/ files were changed_' || '' }}
98+
${{ needs.validate-branch.result == 'failure' && '❌ _Branch validation failed_' || '' }}
99+
${{ needs.airflow.result == 'skipped' && '⏭️ _Airflow validations skipped_' || '' }}
97100
${{ needs.airflow.result == 'success' && '✅ _All Airflow validations passed_' || '' }}
98101
${{ needs.airflow.result == 'failure' && '❌ _Airflow validations failed_' || '' }}
99102
comment_tag: airflow-workflow-status

.github/workflows/10_feature_dbt_checks.yml

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ jobs:
3636
dbt:
3737
name: Pull Request dbt Tests
3838
runs-on: ubuntu-latest
39+
needs: [validate-branch]
3940

4041
# Set environment variables in
4142
# https://github.com//<your org>/<your repo>/settings/variables/actions
@@ -71,6 +72,9 @@ jobs:
7172
# cannot be applied when using the Datacoves permifrost security model.
7273
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }}
7374

75+
# Full refresh control variables
76+
FULL_REFRESH_FLAG: ${{ contains(github.event.pull_request.labels.*.name, 'full-refresh') && '--full-refresh' || '' }}
77+
7478
steps:
7579
- name: Checkout branch
7680
uses: actions/[email protected]
@@ -88,12 +92,17 @@ jobs:
8892
run: "dbt deps"
8993

9094
- name: Create PR database
91-
run: "dbt --no-write-json run-operation create_database"
95+
run: dbt --no-write-json run-operation create_database
9296

9397
- name: Get prod manifest
9498
id: prod_manifest
9599
run: "../automate/dbt/get_artifacts.sh"
96100

101+
- name: Clone incremental models that are directly or indirectly affected by the change
102+
run: |
103+
dbt clone -s state:modified+,config.materialized:incremental,state:old --state logs
104+
dbt clone -s state:modified+,config.materialized:snapshot,state:old --state logs
105+
97106
##### Governance Checks
98107
# this first runs dbt but creates enpty tables, this is enough to then run the hooks and fail fast
99108

@@ -107,13 +116,13 @@ jobs:
107116

108117
# There is an issue with --empty and dynamic tables so need to exclude them
109118
- name: Governance run of dbt with EMPTY models using slim mode
110-
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
111-
run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty --exclude config.materialized:dynamic_table"
119+
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }}
120+
run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty --exclude config.materialized:dynamic_table ${{ env.FULL_REFRESH_FLAG }}"
112121

113122
# There is an issue with --empty and dynamic tables so need to exclude
114123
- name: Governance run of dbt with EMPTY models using full run
115-
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
116-
run: "dbt build --fail-fast --empty --exclude config.materialized:dynamic_table"
124+
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }}
125+
run: "dbt build --fail-fast --empty --exclude config.materialized:dynamic_table ${{ env.FULL_REFRESH_FLAG }}"
117126

118127
- name: Generate Docs Combining Prod and branch catalog.json
119128
run: "dbt-coves generate docs --merge-deferred --state logs"
@@ -123,12 +132,12 @@ jobs:
123132

124133
##### Real dbt run given that we passed governance checks
125134
- name: Run dbt build slim mode
126-
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
127-
run: "dbt build --fail-fast --defer --state logs --select state:modified+"
135+
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }}
136+
run: "dbt build --fail-fast --defer --state logs --select state:modified+ ${{ env.FULL_REFRESH_FLAG }}"
128137

129138
- name: Run dbt build full run
130-
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
131-
run: "dbt build --fail-fast"
139+
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }}
140+
run: "dbt build --fail-fast ${{ env.FULL_REFRESH_FLAG }}"
132141

133142
- name: Grant access to PR database
134143
id: grant-access-to-database
@@ -142,17 +151,19 @@ jobs:
142151

143152
dbt-job-status:
144153
runs-on: ubuntu-latest
145-
needs: [dbt]
154+
needs: [validate-branch, dbt]
146155
if: always()
147156
steps:
148157
- name: Comment PR with dbt status
149158
uses: thollander/actions-comment-pull-request@v2
150159
with:
151160
message: |
152161
## 🧪 dbt Workflow Status
153-
- **Job**: Pull Request dbt Tests → **${{ needs.dbt.result }}**
162+
- **Branch Validation**: ${{ needs.validate-branch.result }}
163+
- **dbt Validations**: ${{ needs.dbt.result }}
154164
155-
${{ needs.dbt.result == 'skipped' && '⏭️ _Skipped because no transform/ files were changed_' || '' }}
165+
${{ needs.validate-branch.result == 'failure' && '❌ _Branch validation failed_' || '' }}
166+
${{ needs.dbt.result == 'skipped' && '⏭️ _dbt validations skipped_' || '' }}
156167
${{ needs.dbt.result == 'success' && '✅ _All dbt validations passed_' || '' }}
157168
${{ needs.dbt.result == 'failure' && '❌ _dbt validations failed_' || '' }}
158169
comment_tag: dbt-workflow-status

.github/workflows/20_release_dbt_checks.yml

Lines changed: 99 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ name: "20: 🏎️ dbt Release Branch Validations"
22

33
on: # yamllint disable-line rule:truthy
44
pull_request:
5-
branches:
6-
- main
5+
types: [opened, synchronize, labeled, unlabeled]
6+
# No branch restrictions for validate-branch job to run on all PRs
7+
# dbt job will be conditionally run based on target branch
78
paths:
89
- transform/**/*
910

@@ -16,6 +17,83 @@ concurrency:
1617
cancel-in-progress: true
1718

1819
jobs:
20+
add-deploy-marker:
21+
if: github.event.action == 'labeled' && github.event.label.name == 'full-refresh'
22+
runs-on: ubuntu-latest
23+
name: Add Deploy Marker to PR Description
24+
25+
steps:
26+
- name: Add deploy marker to PR description
27+
uses: actions/github-script@v6
28+
with:
29+
script: |
30+
const { data: pullRequest } = await github.rest.pulls.get({
31+
owner: context.repo.owner,
32+
repo: context.repo.repo,
33+
pull_number: context.issue.number
34+
});
35+
36+
let body = pullRequest.body || '';
37+
38+
// Check if marker already exists
39+
if (!body.includes('[deploy:full-refresh]')) {
40+
// Add marker to the end of the description
41+
const newBody = body + '\n\n[deploy:full-refresh]';
42+
43+
await github.rest.pulls.update({
44+
owner: context.repo.owner,
45+
repo: context.repo.repo,
46+
pull_number: context.issue.number,
47+
body: newBody
48+
});
49+
50+
// Also add a comment explaining what happened
51+
await github.rest.issues.createComment({
52+
owner: context.repo.owner,
53+
repo: context.repo.repo,
54+
issue_number: context.issue.number,
55+
body: '🚀 **Full-refresh deployment marker added to PR description**\n\nThe `[deploy:full-refresh]` marker has been automatically added to this PR description. When merged, this will trigger a full-refresh deployment.'
56+
});
57+
}
58+
59+
remove-deploy-marker:
60+
if: github.event.action == 'unlabeled' && github.event.label.name == 'full-refresh'
61+
runs-on: ubuntu-latest
62+
name: Remove Deploy Marker from PR Description
63+
64+
steps:
65+
- name: Remove deploy marker from PR description
66+
uses: actions/github-script@v6
67+
with:
68+
script: |
69+
const { data: pullRequest } = await github.rest.pulls.get({
70+
owner: context.repo.owner,
71+
repo: context.repo.repo,
72+
pull_number: context.issue.number
73+
});
74+
75+
let body = pullRequest.body || '';
76+
77+
// Remove the marker if it exists
78+
if (body.includes('[deploy:full-refresh]')) {
79+
const newBody = body.replace(/\n*\[deploy:full-refresh\]\n*/g, '').trim();
80+
81+
await github.rest.pulls.update({
82+
owner: context.repo.owner,
83+
repo: context.repo.repo,
84+
pull_number: context.issue.number,
85+
body: newBody
86+
});
87+
88+
// Add a comment explaining what happened
89+
await github.rest.issues.createComment({
90+
owner: context.repo.owner,
91+
repo: context.repo.repo,
92+
issue_number: context.issue.number,
93+
body: '✅ **Full-refresh deployment marker removed**\n\nThe `[deploy:full-refresh]` marker has been removed from this PR description. Normal deployment will occur on merge.'
94+
});
95+
}
96+
1997
validate-branch:
2098
runs-on: ubuntu-latest
2199
name: Validate Branch Names and Merge Rules and assure branch has changes from main
@@ -34,8 +112,10 @@ jobs:
34112
run: automate/git/branch_validator.py
35113

36114
dbt:
115+
if: github.base_ref == 'main' && github.event.action != 'unlabeled' && (github.event.action != 'labeled' || github.event.label.name == 'full-refresh')
37116
name: Pull Request dbt Tests
38117
runs-on: ubuntu-latest
118+
needs: [validate-branch]
39119

40120
# Set environment variables in
41121
# https://github.com//<your org>/<your repo>/settings/variables/actions
@@ -44,7 +124,7 @@ jobs:
44124
# https://github.com/<org>/<repo>/settings/environments
45125
# environment: PR_ENV
46126

47-
# most people should use this one
127+
# Most people should use this docker image
48128
container: datacoves/ci-basic-dbt-snowflake:3.4
49129

50130
defaults:
@@ -74,6 +154,9 @@ jobs:
74154
# cannot be applied when using the Datacoves permifrost security model.
75155
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }}
76156

157+
# Full refresh control variables
158+
FULL_REFRESH_FLAG: ${{ contains(github.event.pull_request.labels.*.name, 'full-refresh') && '--full-refresh' || '' }}
159+
77160
steps:
78161
- name: Checkout branch
79162
uses: actions/[email protected]
@@ -123,12 +206,12 @@ jobs:
123206

124207
##### Real dbt run given that we passed governance checks
125208
- name: Run dbt build slim mode
126-
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
127-
run: "dbt build --fail-fast --defer --state logs --select state:modified+"
209+
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }}
210+
run: "dbt build --fail-fast --defer --state logs --select state:modified+ ${{ env.FULL_REFRESH_FLAG }}"
128211

129212
- name: Run dbt build full run
130-
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
131-
run: "dbt build --fail-fast"
213+
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }}
214+
run: "dbt build --fail-fast ${{ env.FULL_REFRESH_FLAG }}"
132215

133216
- name: Grant access to QA_TEMP database
134217
if: steps.check_qa_created_today.outputs.qa_created_today != 'true'
@@ -141,6 +224,7 @@ jobs:
141224
dbt --no-write-json run-operation swap_database --args '{db1: ${{ env.DATACOVES__MAIN__DATABASE }}, db2: ${{ env.DATACOVES__MAIN__DATABASE_QA }}, create_missing_db: true}'
142225
dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{ env.DATACOVES__MAIN__DATABASE }}, recreate: False}'
143226
227+
144228
# # We drop the database when there is a failure to grant access to the db because
145229
# # most likely the schema was not set properly in dbt_project.yml so models built to default schema
146230
# - name: Drop PR database on Failure to grant security access
@@ -149,17 +233,21 @@ jobs:
149233

150234
dbt-job-status:
151235
runs-on: ubuntu-latest
152-
needs: [dbt]
236+
needs: [add-deploy-marker, remove-deploy-marker, validate-branch, dbt]
153237
if: always()
154238
steps:
155-
- name: Comment PR with dbt status
239+
- name: Comment PR with workflow status
156240
uses: thollander/actions-comment-pull-request@v2
157241
with:
158242
message: |
159243
## 🧪 dbt Workflow Status
160-
- **Job**: Pull Request dbt Tests → **${{ needs.dbt.result }}**
244+
- **Branch Validation**: ${{ needs.validate-branch.result }}
245+
- **Deploy Marker Management**: ${{ (needs.add-deploy-marker.result != 'skipped' && needs.add-deploy-marker.result) || (needs.remove-deploy-marker.result != 'skipped' && needs.remove-deploy-marker.result) || 'skipped' }}
246+
- **dbt Validations**: ${{ needs.dbt.result }}
161247
162-
${{ needs.dbt.result == 'skipped' && '⏭️ _Skipped because no transform/ files were changed_' || '' }}
248+
${{ needs.validate-branch.result == 'failure' && '❌ _Branch validation failed_' || '' }}
249+
${{ needs.dbt.result == 'skipped' && '⏭️ _dbt validations skipped_' || '' }}
163250
${{ needs.dbt.result == 'success' && '✅ _All dbt validations passed_' || '' }}
164251
${{ needs.dbt.result == 'failure' && '❌ _dbt validations failed_' || '' }}
252+
${{ (needs.add-deploy-marker.result == 'success' || needs.remove-deploy-marker.result == 'success') && '📝 _Deploy marker updated in PR description_' || '' }}
165253
comment_tag: dbt-workflow-status

.github/workflows/30_deploy_changes_to_production.yml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,27 @@ jobs:
7373
- name: Refresh dynamic tables before blue-green
7474
run: "dbt run -s materialization config.materialized:dynamic_table --full-refresh"
7575

76+
- name: Check for full-refresh marker in merge commit message
77+
id: check-full-refresh
78+
run: |
79+
# Check the latest commit message for the deployment marker
80+
if git log -1 --pretty=%B | grep -q "\[deploy:full-refresh\]"; then
81+
echo "full_refresh=true" >> $GITHUB_OUTPUT
82+
echo "Found '[deploy:full-refresh]' marker in merge commit message, will run with --full-refresh"
83+
else
84+
echo "full_refresh=false" >> $GITHUB_OUTPUT
85+
echo "No full-refresh deployment marker found in merge commit message, running normal deployment"
86+
fi
87+
7688
- name: Run blue / green deployment
77-
if: contains(github.event.pull_request.labels.*.name, 'full-refresh') != true
89+
if: steps.check-full-refresh.outputs.full_refresh != 'true'
7890
id: run-blue-green
7991
env:
8092
MANIFEST_FOUND: ${{ steps.prod-manifest.outputs.manifest_found }}
8193
run: "dbt-coves blue-green --defer"
8294

8395
- name: Run blue / green deployment with full-refresh
84-
if: contains(github.event.pull_request.labels.*.name, 'full-refresh')
96+
if: steps.check-full-refresh.outputs.full_refresh == 'true'
8597
id: run-blue-green-full-refresh
8698
env:
8799
MANIFEST_FOUND: ${{ steps.prod-manifest.outputs.manifest_found }}

automate/dbt/get_artifacts.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ if [ $LINES_IN_MANIFEST -eq 0 ]
1414
then
1515
echo "Manifest not found in Snowflake stage, contact the Snowflake administrator to load a updated manifest to snowflake."
1616
# This is used by github actions
17-
echo "::set-output name=manifest_found::false"
17+
echo "manifest_found=false" >> $GITHUB_OUTPUT
1818

1919
# This is used by Jenkins
2020
# echo "false" > temp_MANIFEST_FOUND.txt
2121
else
2222
echo "Updated manifest from production"
2323

2424
# This is used by github actions
25-
echo "::set-output name=manifest_found::true"
25+
echo "manifest_found=true" >> $GITHUB_OUTPUT
2626

2727
# This is used by Jenkins
2828
# echo "true" > temp_MANIFEST_FOUND.txt

automate/git/branch_validator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def get_commit_count(source_branch, github_token, repository):
3737
print(f"✅ The {source_branch} is up to date with the main branch.")
3838
else:
3939
print(f"❌ There are {commits_behind} commit(s) in the main branch that are not in Source branch: {source_branch}. Pull main into the {source_branch}")
40+
sys.exit(1)
4041

4142
except requests.exceptions.RequestException as e:
4243
raise GitCommandError(f"❌ Failed to compare branches via GitHub API: {e}")

transform/models/L1_inlets/country_geo/stg_country_polygons.sql

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ with raw_source as (
88
final as (
99

1010
select
11-
features:properties:ADMIN::STRING as country_name,
12-
features:properties:ISO_A2::STRING as country_code_2,
13-
features:properties:ISO_A3::STRING as country_code_3,
14-
features:type::STRING as feature_type,
15-
features:geometry:type::STRING as geometry_type,
16-
TRY_TO_GEOGRAPHY(features:geometry) as geography,
11+
name as country_name,
12+
iso3166_1_alpha_2 as country_code_2,
13+
iso3166_1_alpha_3 as country_code_3,
14+
type as feature_type,
15+
geometry_type,
16+
try_to_geography(geometry) as geography,
1717
features as raw_geojson
1818
from raw_source
1919

@@ -26,8 +26,8 @@ select
2626
feature_type,
2727
geometry_type,
2828
geography,
29-
case when geography is not NULL then ST_AREA(geography) end as area_m2,
30-
case when geography is not NULL then ST_PERIMETER(geography) end as perimeter_m,
31-
case when geography is not NULL then ST_CENTROID(geography) end as centroid,
29+
case when geography is not null then st_area(geography) end as area_m2,
30+
case when geography is not null then st_perimeter(geography) end as perimeter_m,
31+
case when geography is not null then st_centroid(geography) end as centroid,
3232
raw_geojson
3333
from final

0 commit comments

Comments
 (0)