Skip to content

Feature/update dags and dbt api #44

Feature/update dags and dbt api

Feature/update dags and dbt api #44

name: "10: 🧪 dbt Validations & Governance Checks"
on: # yamllint disable-line rule:truthy
pull_request:
branches:
- 'release/**/*'
paths:
- transform/**/*
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# This cancels a run if another change is pushed to the same branch
concurrency:
group: transform-${{ github.ref }}
cancel-in-progress: true
jobs:
validate-branch:
runs-on: ubuntu-latest
name: Validate Branch Names and Merge Rules and assure branch has changes from main
env:
SOURCE_BRANCH: ${{ github.head_ref }}
TARGET_BRANCH: ${{ github.base_ref }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Validate current branch
run: automate/git/branch_validator.py
dbt:
name: Pull Request dbt Tests
runs-on: ubuntu-latest
needs: [validate-branch]
# Set environment variables in
# https://github.com//<your org>/<your repo>/settings/variables/actions
#
# Alternatively, You can define multiple ENV for different workflows.
# https://github.com/<org>/<repo>/settings/environments
# environment: PR_ENV
# most people should use this one
container: datacoves/ci-basic-dbt-snowflake:4.1
defaults:
run:
working-directory: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform
env:
DBT_PROFILES_DIR: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/dbt
DATACOVES__DBT_HOME: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform
DATACOVES__YAML_DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/schedule
DATACOVES__MAIN__ACCOUNT: ${{ vars.DATACOVES__MAIN__ACCOUNT }}
DATACOVES__MAIN__DATABASE: ${{ vars.DATACOVES__MAIN__DATABASE }}_PR_${{ github.event.number }}
DATACOVES__MAIN__SCHEMA: ${{ vars.DATACOVES__MAIN__SCHEMA }}
DATACOVES__MAIN__ROLE: ${{ vars.DATACOVES__MAIN__ROLE }}
DATACOVES__MAIN__WAREHOUSE: ${{ vars.DATACOVES__MAIN__WAREHOUSE }}
DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }}
DATACOVES__MAIN__PRIVATE_KEY: ${{ secrets.DATACOVES__MAIN__PRIVATE_KEY }}
# This is used by datacoves to drop the test database if permissions
# cannot be applied when using the Datacoves permifrost security model.
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }}
# Full refresh control variables
FULL_REFRESH_FLAG: ${{ contains(github.event.pull_request.labels.*.name, 'full-refresh') && '--full-refresh' || '' }}
# Needed for dbt-api
DATACOVES__API_ENDPOINT: ${{ vars.DATACOVES__API_ENDPOINT }}
DATACOVES__API_TOKEN: ${{ secrets.DATACOVES__API_TOKEN }}
DATACOVES__ACCOUNT_ID: ${{ vars.DATACOVES__ACCOUNT_ID }}
DATACOVES__PROJECT_SLUG: ${{ vars.DATACOVES__PROJECT_SLUG }}
DATACOVES__ENVIRONMENT_SLUG: ${{ vars.DATACOVES__ENVIRONMENT_SLUG }}
steps:
- name: Checkout branch
uses: actions/[email protected]
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Set Secure Directory
run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}
- name: List of files changed
run: "git diff origin/${{ github.event.pull_request.base.ref }} HEAD --name-status"
- name: Install dbt packages
run: |
dbt deps
dbt compile
../automate/dbt/push_dbt_artifacts.py
# - name: Install dbt packages
# run: "dbt deps"
# - name: Create PR database
# run: dbt --no-write-json run-operation create_database
# - name: Get prod manifest
# id: prod_manifest
# run: "../automate/dbt/get_artifacts.sh"
# - name: Clone incremental models that are directly or indirectly affected by the change
# if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }}
# run: |
# dbt clone -s state:modified+,config.materialized:incremental,state:old --state logs
# dbt clone -s state:modified+,config.materialized:snapshot,state:old --state logs
# ##### Governance Checks
# # this first runs dbt but creates enpty tables, this is enough to then run the hooks and fail fast
# # We need to run observe model so that post hook works
# - name: Run Observe Model
# run: "dbt build --fail-fast -s L1_inlets.observe"
# # There is an issue with --empty and dynamic tables so need to run them by themselves
# - name: Governance run of dynamic tables
# if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }}
# run: "dbt build --fail-fast -s config.materialized:dynamic_table stg_test_failures resource_type:seed"
# # There is an issue with --empty and dynamic tables so need to run them by themselves
# - name: Governance run of dynamic tables
# if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }}
# run: "dbt build --fail-fast -s config.materialized:dynamic_table stg_test_failures --defer --state logs"
# # There is an issue with --empty and dynamic tables so need to exclude them
# - name: Governance run of dbt with EMPTY models using slim mode
# if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }}
# run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty --exclude config.materialized:dynamic_table ${{ env.FULL_REFRESH_FLAG }}"
# # There is an issue with --empty and dynamic tables so need to exclude
# - name: Governance run of dbt with EMPTY models using full run
# if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }}
# run: "dbt build --fail-fast --empty --exclude config.materialized:dynamic_table ${{ env.FULL_REFRESH_FLAG }}"
# - name: Generate Docs Combining Prod and branch catalog.json
# if: ${{ steps.prod_manifest.outputs.catalog_found == 'true' }}
# run: "dbt-coves generate docs --merge-deferred --state logs"
# - name: Generate dbt Docs
# if: ${{ steps.prod_manifest.outputs.catalog_found == 'false' }}
# run: "dbt docs generate"
# - name: Run governance checks
# run: "pre-commit run --from-ref origin/${{ github.event.pull_request.base.ref }} --to-ref HEAD"
# ##### Real dbt run given that we passed governance checks
# - name: Run dbt build slim mode
# if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }}
# run: "dbt build --fail-fast --defer --state logs --select state:modified+ ${{ env.FULL_REFRESH_FLAG }}"
# - name: Run dbt build full run
# if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }}
# run: "dbt build --fail-fast ${{ env.FULL_REFRESH_FLAG }}"
# - name: Grant access to PR database
# id: grant-access-to-database
# run: "dbt --no-write-json run-operation grant_access_to_pr_database"
# # We drop the database when there is a failure to grant access to the db because
# # most likely the schema was not set properly in dbt_project.yml so models built to default schema
# - name: Drop PR database on Failure to grant security access
# if: always() && (env.DATACOVES__DROP_DB_ON_FAIL == 'true') && (steps.grant-access-to-database.outcome == 'failure')
# run: "dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{env.DATACOVES__MAIN__DATABASE}}, recreate: False}'" # yamllint disable-line rule:line-length
dbt-job-status:
runs-on: ubuntu-latest
needs: [validate-branch, dbt]
if: always()
steps:
- name: Comment PR with dbt status
uses: thollander/actions-comment-pull-request@v2
with:
message: |
## 🧪 dbt Workflow Status
- **Branch Validation**: ${{ needs.validate-branch.result }}
- **dbt Validations**: ${{ needs.dbt.result }}
${{ needs.validate-branch.result == 'failure' && '❌ _Branch validation failed_' || '' }}
${{ needs.dbt.result == 'skipped' && '⏭️ _dbt validations skipped_' || '' }}
${{ needs.dbt.result == 'success' && '✅ _All dbt validations passed_' || '' }}
${{ needs.dbt.result == 'failure' && '❌ _dbt validations failed_' || '' }}
comment_tag: dbt-workflow-status