Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/05_branch_validation.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "05: 🪾 Branch Validation"
name: "05: 🪾 Branch Validations"

on:
pull_request:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# orchestrate-workflow.yml
name: 🎯 Airflow Validations
name: "10: 🎯 Airflow Validations"

on: # yamllint disable-line rule:truthy
pull_request:
branches:
- 'feature/**'
paths:
- orchestrate/*
- orchestrate/**/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
name: 🔄 dbt Validations & Governance Checks"
name: "10: 🧪 dbt Validations & Governance Checks"

on: # yamllint disable-line rule:truthy
pull_request:
branches:
- 'feature/**'
paths:
- transform/*
- transform/**/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
name: 🗑️ Drop test databases
name: "20: 🗑️ Drop test databases"

on: # yamllint disable-line rule:truthy
pull_request:
branches:
- 'feature/**'
types:
- closed

Expand Down
107 changes: 107 additions & 0 deletions .github/workflows/20_release_dbt_checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
name: "20: ✅ dbt Release Branch Validations"

on: # yamllint disable-line rule:truthy
pull_request:
branches:
- 'release/**'
paths:
- transform/*
- transform/**/*


# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# This cancels a run if another change is pushed to the same branch
concurrency:
group: transform-${{ github.ref }}
cancel-in-progress: true

jobs:
dbt:
name: Pull Request dbt Tests
runs-on: ubuntu-latest

# Set environment variables in
# https://github.com//<your org>/<your repo>/settings/variables/actions
#
# Alternatively, You can define multiple ENV for different workflows.
# https://github.com/<org>/<repo>/settings/environments
# environment: PR_ENV

# most people should use this one
container: datacoves/ci-basic-dbt-snowflake:3.4

defaults:
run:
working-directory: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform

env:
DBT_PROFILES_DIR: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/dbt
DATACOVES__DBT_HOME: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform
DATACOVES__YAML_DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/schedule

DATACOVES__MAIN__ACCOUNT: ${{ vars.DATACOVES__MAIN__ACCOUNT }}

DATACOVES__MAIN__DATABASE_PROD: ${{ vars.DATACOVES__MAIN__DATABASE }}
DATACOVES__MAIN__DATABASE: ${{ vars.DATACOVES__MAIN__DATABASE }}_QA_TEMP
DATACOVES__MAIN__DATABASE_QA: ${{ vars.DATACOVES__MAIN__DATABASE }}_QA

DATACOVES__MAIN__SCHEMA: ${{ vars.DATACOVES__MAIN__SCHEMA }}

DATACOVES__MAIN__ROLE: ${{ vars.DATACOVES__MAIN__ROLE }}
DATACOVES__MAIN__WAREHOUSE: ${{ vars.DATACOVES__MAIN__WAREHOUSE }}

DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }}
DATACOVES__MAIN__PRIVATE_KEY: ${{ secrets.DATACOVES__MAIN__PRIVATE_KEY }}

# This is used by datacoves to drop the test database if permissions
# cannot be applied when using the Datacoves permifrost security model.
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }}

steps:
- name: Checkout branch
uses: actions/[email protected]
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}

- name: Set Secure Directory
run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}

- name: List of files changed
run: "git diff origin/${{ github.event.pull_request.base.ref }} HEAD --name-status"

- name: Install dbt packages
run: "dbt deps"

- name: Clone Prod database
run: "dbt --no-write-json run-operation clone_database --args '{source_db: ${{ vars.DATACOVES__MAIN__DATABASE_PROD }}, target_db: ${{ vars.DATACOVES__MAIN__DATABASE }}}'"

- name: Get prod manifest
id: prod_manifest
run: "../automate/dbt/get_artifacts.sh"

##### Real dbt run given that we passed governance checks
- name: Run dbt build slim mode
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
run: "dbt build --fail-fast --defer --state logs --select state:modified+"

- name: Run dbt build full run
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
run: "dbt build --fail-fast"

- name: Grant access to QA_TEMP database
id: grant-access-to-database
run: "dbt --no-write-json run-operation grant_access_to_pr_database"

- name: Swap QA with QA_TEMP database and drop old QA db
run: |
dbt --no-write-json run-operation swap_database --args '{db1: ${{ vars.DATACOVES__MAIN__DATABASE }}, db2: ${{ vars.DATACOVES__MAIN__DATABASE_QA }}}'
dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{ vars.DATACOVES__MAIN__DATABASE }}, recreate: False}'

# # We drop the database when there is a failure to grant access to the db because
# # most likely the schema was not set properly in dbt_project.yml so models built to default schema
# - name: Drop PR database on Failure to grant security access
# if: always() && (env.DATACOVES__DROP_DB_ON_FAIL == 'true') && (steps.grant-access-to-database.outcome == 'failure')
# run: "dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{env.DATACOVES__MAIN__DATABASE}}, recreate: False}'" # yamllint disable-line rule:line-length
2 changes: 1 addition & 1 deletion .github/workflows/30_deploy_changes_to_production.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: 🚀 Deploy to Production
name: "30: 🚀 Deploy to Production"

on: # yamllint disable-line rule:truthy
push:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ scripts/
# VS Code
.vscode/

# Duckdb files
*.duckdb

# logs
*.log
logs/
Expand Down
2 changes: 0 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ repos:
- id: check-model-has-description
- id: check-model-has-properties-file
- id: check-model-has-all-columns
- id: check-database-casing-consistency
always_run: true

- repo: https://github.com/sqlfluff/sqlfluff
# this is the version of sqlfluff, needs to be updated when using a new sqlfluff version (pip show sqlfluff)
Expand Down
6 changes: 6 additions & 0 deletions automate/dbt/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@ default:
private_key: "{{ env_var('DATACOVES__MAIN__PRIVATE_KEY') }}"
role: "{{ env_var('DATACOVES__MAIN__ROLE') }}"
warehouse: "{{ env_var('DATACOVES__MAIN__WAREHOUSE') }}"

# DATACOVES__MAIN__PRIVATE_KEY must be set in GH Actions and look like this. P8 format
# -----BEGIN PRIVATE KEY-----
# MIIEugIBYV.....
# .......PcCnSg=
# -----END PRIVATE KEY-----
116 changes: 116 additions & 0 deletions automate/git/branch_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python3

import os
import sys
import requests

class ValidationError(Exception):
pass

class GitCommandError(Exception):
pass

def get_commit_count(source_branch, github_token, repository):
"""
Use GitHub API to compare branches and get commit count difference.
This tells us how many commits behind target the source branch is.
"""
try:
# GitHub API endpoint for comparing branches
url = f"https://api.github.com/repos/{repository}/compare/{source_branch}...main"

headers = {
"Authorization": f"token {github_token}",
"Accept": "application/vnd.github.v3+json"
}

response = requests.get(url, headers=headers)
response.raise_for_status()

data = response.json()

# ahead_by tells us how many commits main is ahead of source
# (i.e., how many commits source is behind main)
commits_behind = data.get("ahead_by", 0)

if commits_behind == 0:
print(f"✅ The {source_branch} is up to date with the main branch.")
else:
print(f"❌ There are {commits_behind} commit(s) in the main branch that are not in Source branch: {source_branch}. Pull main into the {source_branch}")

except requests.exceptions.RequestException as e:
raise GitCommandError(f"❌ Failed to compare branches via GitHub API: {e}")
except (KeyError, ValueError) as e:
raise GitCommandError(f"❌ Failed to parse GitHub API response: {e}")

def main():
"""
Runs some validations on branches given that SOURCE_BRANCH and
TARGET_BRANCH are set as environment vars before running this.

Raises:
Exception: Validations not passed
"""
# Get GitHub API credentials
github_token = os.environ.get("GITHUB_TOKEN")
repository = os.environ.get("GITHUB_REPOSITORY")

# Retrieve branch names from environment variables
source_branch = os.environ.get("SOURCE_BRANCH")
target_branch = os.environ.get("TARGET_BRANCH")
print(f"Source Branch: {source_branch}")
print(f"Target Branch: {target_branch}")

if not source_branch or not target_branch:
print("❌ ERROR: SOURCE_BRANCH and TARGET_BRANCH environment variables must be set.")
sys.exit(1)

if not github_token or not repository:
print("❌ ERROR: GITHUB_TOKEN and GITHUB_REPOSITORY environment variables must be set.")
sys.exit(1)

# Get the commit count for changes in target that are not in source_branch
try:
get_commit_count(source_branch, github_token, repository)
except GitCommandError as e:
print(e)
sys.exit(1)

if source_branch != target_branch:
print(
"Validating source branch: "
+ source_branch
+ " with target branch: "
+ target_branch
)
if not source_branch.lower().startswith(("feature", "release")):
raise ValidationError(
"❌ Source branch must start with 'feature' or 'release'"
)

if not target_branch.lower().startswith(("feature", "release", "main")):
raise ValidationError(
"❌ Target branch must start with 'feature', 'release' or 'main'"
)

# check the correct order
if source_branch.lower().startswith(("feature")):
if not target_branch.lower().startswith(("feature", "release")):
raise ValidationError(
"❌ Feature branch can only be merged to another feature branch or a release branch"
)

if source_branch.lower().startswith(("release")):
if not target_branch.lower().startswith(("main")):
raise ValidationError(
"❌ Release branch can only be merged to the main branch"
)

print("✅ Branch validated!")

if __name__ == "__main__":
try:
main()
except Exception as ex:
print(ex)
exit(1)
Loading
Loading