Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 58 additions & 8 deletions .github/workflows/main_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ name: Pull Request trigger

on:
pull_request:
workflow_dispatch:
inputs:
components:
description: 'Components to test (comma-separated: dbt, spark_dataproc, hive_dataproc, dataplex, scenarios, or "all")'
required: false
default: 'all'
type: string


permissions:
Expand All @@ -19,10 +26,12 @@ jobs:
run_scenarios: ${{ steps.get-changed.outputs.scenarios_changed }}
run_spark_dataproc: ${{ steps.get-changed.outputs.spark_dataproc_changed }}
run_hive_dataproc: ${{ steps.get-changed.outputs.hive_dataproc_changed }}
run_dbt: ${{ steps.get-changed.outputs.dbt_changed }}
ol_release: ${{ steps.get-release.outputs.openlineage_release }}
any_run: ${{ steps.get-changed.outputs.any_changed }}
spark_matrix: ${{ steps.set-matrix-values.outputs.spark_dataproc_matrix }}
hive_matrix: ${{ steps.set-matrix-values.outputs.hive_dataproc_matrix }}
dbt_matrix: ${{ steps.set-matrix-values.outputs.dbt_matrix }}
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -47,18 +56,46 @@ jobs:
fi
}

CHANGED_FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only)
if [[ -n "$CHANGED_FILES" ]]; then
echo "changes=$(echo "$CHANGED_FILES" | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
check_component() {
local component=$1
local output=$2
if [[ "$COMPONENTS" == "all" ]] || echo "$COMPONENTS" | grep -qw "$component"; then
echo "$output=true" >> $GITHUB_OUTPUT
echo "true"
fi
}

# Handle workflow_dispatch (manual trigger)
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
COMPONENTS="${{ github.event.inputs.components }}"
echo "Manual trigger - testing components: $COMPONENTS"

scenarios=$(check_path "consumer/scenarios/" "scenarios_changed")
dataplex=$(check_path "consumer/consumers/dataplex/" "dataplex_changed")
spark_dataproc=$(check_path "producer/spark_dataproc/" "spark_dataproc_changed")
hive_dataproc=$(check_path "producer/hive_dataproc/" "hive_dataproc_changed")
scenarios=$(check_component "scenarios" "scenarios_changed")
dataplex=$(check_component "dataplex" "dataplex_changed")
spark_dataproc=$(check_component "spark_dataproc" "spark_dataproc_changed")
hive_dataproc=$(check_component "hive_dataproc" "hive_dataproc_changed")
dbt=$(check_component "dbt" "dbt_changed")

if [[ $scenarios || $dataplex || $spark_dataproc || $hive_dataproc ]]; then
if [[ $scenarios || $dataplex || $spark_dataproc || $hive_dataproc || $dbt ]]; then
echo "any_changed=true" >> $GITHUB_OUTPUT
fi

# Handle pull_request (PR trigger)
else
CHANGED_FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only)
if [[ -n "$CHANGED_FILES" ]]; then
echo "changes=$(echo "$CHANGED_FILES" | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT

scenarios=$(check_path "consumer/scenarios/" "scenarios_changed")
dataplex=$(check_path "consumer/consumers/dataplex/" "dataplex_changed")
spark_dataproc=$(check_path "producer/spark_dataproc/" "spark_dataproc_changed")
hive_dataproc=$(check_path "producer/hive_dataproc/" "hive_dataproc_changed")
dbt=$(check_path "producer/dbt/" "dbt_changed")

if [[ $scenarios || $dataplex || $spark_dataproc || $hive_dataproc || $dbt ]]; then
echo "any_changed=true" >> $GITHUB_OUTPUT
fi
fi
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down Expand Up @@ -94,6 +131,7 @@ jobs:

echo "spark_dataproc_matrix=$(get_matrix spark_dataproc)" >> $GITHUB_OUTPUT
echo "hive_dataproc_matrix=$(get_matrix hive_dataproc)" >> $GITHUB_OUTPUT
echo "dbt_matrix=$(get_matrix dbt)" >> $GITHUB_OUTPUT


######## COMPONENT VALIDATION ########
Expand Down Expand Up @@ -145,6 +183,17 @@ jobs:
component_release: ${{ matrix.component_version }}
get-latest-snapshots: 'false'

dbt:
needs: initialize_workflow
if: ${{ needs.initialize_workflow.outputs.run_dbt == 'true' }}
uses: ./.github/workflows/producer_dbt.yml
strategy:
matrix: ${{ fromJson(needs.initialize_workflow.outputs.dbt_matrix) }}
with:
dbt_release: ${{ matrix.component_version }}
ol_release: ${{ matrix.openlineage_versions }}
get-latest-snapshots: 'false'

######## COLLECTION OF REPORTS AND EXECUTE APPROPRIATE ACTIONS ########

collect-and-compare-reports:
Expand All @@ -153,6 +202,7 @@ jobs:
- scenarios
- dataplex
- hive_dataproc
- dbt
if: ${{ !failure() && needs.initialize_workflow.outputs.any_run == 'true'}}
uses: ./.github/workflows/collect_and_compare_reports.yml
with:
Expand Down
111 changes: 111 additions & 0 deletions .github/workflows/producer_dbt.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: dbt Producer

on:
workflow_call:
inputs:
dbt_release:
description: "release of dbt-core to use"
type: string
ol_release:
description: "release tag of OpenLineage to use"
type: string
get-latest-snapshots:
description: "Should the artifact be downloaded from maven repo or circleci"
type: string
workflow_dispatch:
inputs:
dbt_release:
description: "release of dbt-core to use"
type: string
default: "1.8.0"
ol_release:
description: "release tag of OpenLineage to use"
type: string
default: "1.23.0"
get-latest-snapshots:
description: "Should the artifact be downloaded from maven repo or circleci"
type: string
default: "false"

jobs:
run-dbt-tests:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Initialize tests
id: init
run: |
scenarios=$(./scripts/get_valid_test_scenarios.sh "producer/dbt/scenarios/" ${{ inputs.dbt_release }} ${{ inputs.ol_release }} )
if [[ "$scenarios" != "" ]]; then
echo "scenarios=$scenarios" >> $GITHUB_OUTPUT
echo "Found scenarios: $scenarios"
else
echo "No valid scenarios found for dbt ${{ inputs.dbt_release }} and OL ${{ inputs.ol_release }}"
fi

- name: Set up Python 3.12
if: ${{ steps.init.outputs.scenarios }}
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install dbt dependencies
if: ${{ steps.init.outputs.scenarios }}
run: |
python -m pip install --upgrade pip
pip install dbt-core==${{ inputs.dbt_release }}
pip install dbt-duckdb
pip install openlineage-dbt==${{ inputs.ol_release }}
pip install -r producer/dbt/test_runner/requirements.txt

- name: Set producer output event dir
if: ${{ steps.init.outputs.scenarios }}
id: set-producer-output
run: |
echo "event_dir=/tmp/dbt-events-$(date +%s%3N)" >> $GITHUB_OUTPUT

- name: Run dbt scenarios and create OL events
if: ${{ steps.init.outputs.scenarios }}
id: run-producer
continue-on-error: true
run: |
set -e
IFS=';' read -ra scenarios <<< "${{ steps.init.outputs.scenarios }}"

for scenario in "${scenarios[@]}"
do
echo "Running dbt scenario: $scenario"

if ! python3 producer/dbt/test_runner/cli.py run-scenario \
--scenario "$scenario" \
--output-dir "${{ steps.set-producer-output.outputs.event_dir }}"
then
echo "Error: dbt scenario failed: $scenario"
exit 1
fi

echo "Finished running scenario: $scenario"
done

echo "Finished running all scenarios"

- name: Validation
if: ${{ steps.init.outputs.scenarios }}
uses: ./.github/actions/run_event_validation
with:
component: 'dbt'
producer-dir: 'producer/dbt'
release_tags: ${{ inputs.get-latest-snapshots == 'true' && 'main' || inputs.ol_release }}
ol_release: ${{ inputs.ol_release }}
component_release: ${{ inputs.dbt_release }}
event-directory: ${{ steps.set-producer-output.outputs.event_dir }}
target-path: 'dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report.json'

- uses: actions/upload-artifact@v4
if: ${{ steps.init.outputs.scenarios }}
with:
name: dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report
path: dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report.json
retention-days: 1
17 changes: 16 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ __pycache__/
# C extensions
*.so

#Status files and documentation
Status/

# Distribution / packaging
.Python
build/
Expand Down Expand Up @@ -164,4 +167,16 @@ cython_debug/
.idea/

ignored/
bin/
bin/

# OpenLineage event files generated during local testing
openlineage_events.jsonl
*/openlineage_events.jsonl
**/events/openlineage_events.jsonl

# Virtual environments
venv/
test_venv/
*/venv/
*/test_venv/
**/test_venv/
4 changes: 4 additions & 0 deletions generated-files/releases.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
"name": "spark_dataproc",
"latest_version": ""
},
{
"name": "dbt",
"latest_version": "1.8.0"
},
{
"name": "openlineage",
"latest_version": "1.39.0"
Expand Down
Loading