Skip to content

Commit 9d319ac

Browse files
author
roller100 (BearingNode)
committed
feat(dbt): Add GitHub Actions workflow integration
- Add producer_dbt.yml workflow for automated CI/CD testing - Add run-scenario command to CLI for per-scenario event generation - Update releases.json to include dbt version tracking - Fix requirements.txt syntax for pip compatibility The workflow follows the official OpenLineage compatibility test framework: - Uses get_valid_test_scenarios.sh for version-based scenario filtering - Generates events in per-scenario directories as individual JSON files - Integrates with run_event_validation action for syntax/semantic validation - Produces standardized test reports for compatibility tracking This addresses Steering Committee feedback on PR OpenLineage#180 to integrate dbt producer tests with GitHub Actions workflows.
1 parent e600d8f commit 9d319ac

File tree

4 files changed

+221
-7
lines changed

4 files changed

+221
-7
lines changed

.github/workflows/producer_dbt.yml

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
name: dbt Producer
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
dbt_release:
7+
description: "release of dbt-core to use"
8+
type: string
9+
ol_release:
10+
description: "release tag of OpenLineage to use"
11+
type: string
12+
get-latest-snapshots:
13+
description: "Should the artifact be downloaded from maven repo or circleci"
14+
type: string
15+
16+
jobs:
17+
run-dbt-tests:
18+
runs-on: ubuntu-latest
19+
steps:
20+
- name: Checkout code
21+
uses: actions/checkout@v4
22+
23+
- name: Initialize tests
24+
id: init
25+
run: |
26+
scenarios=$(./scripts/get_valid_test_scenarios.sh "producer/dbt/scenarios/" ${{ inputs.dbt_release }} ${{ inputs.ol_release }} )
27+
if [[ "$scenarios" != "" ]]; then
28+
echo "scenarios=$scenarios" >> $GITHUB_OUTPUT
29+
echo "Found scenarios: $scenarios"
30+
else
31+
echo "No valid scenarios found for dbt ${{ inputs.dbt_release }} and OL ${{ inputs.ol_release }}"
32+
fi
33+
34+
- name: Set up Python 3.12
35+
if: ${{ steps.init.outputs.scenarios }}
36+
uses: actions/setup-python@v5
37+
with:
38+
python-version: "3.12"
39+
40+
- name: Install dbt dependencies
41+
if: ${{ steps.init.outputs.scenarios }}
42+
run: |
43+
python -m pip install --upgrade pip
44+
pip install dbt-core==${{ inputs.dbt_release }}
45+
pip install dbt-duckdb
46+
pip install openlineage-dbt==${{ inputs.ol_release }}
47+
pip install -r producer/dbt/test_runner/requirements.txt
48+
49+
- name: Set producer output event dir
50+
if: ${{ steps.init.outputs.scenarios }}
51+
id: set-producer-output
52+
run: |
53+
echo "event_dir=/tmp/dbt-events-$(date +%s%3N)" >> $GITHUB_OUTPUT
54+
55+
- name: Run dbt scenarios and create OL events
56+
if: ${{ steps.init.outputs.scenarios }}
57+
id: run-producer
58+
continue-on-error: true
59+
run: |
60+
set -e
61+
IFS=';' read -ra scenarios <<< "${{ steps.init.outputs.scenarios }}"
62+
63+
for scenario in "${scenarios[@]}"
64+
do
65+
echo "Running dbt scenario: $scenario"
66+
67+
if ! python3 producer/dbt/test_runner/cli.py run-scenario \
68+
--scenario "$scenario" \
69+
--output-dir "${{ steps.set-producer-output.outputs.event_dir }}"
70+
then
71+
echo "Error: dbt scenario failed: $scenario"
72+
exit 1
73+
fi
74+
75+
echo "Finished running scenario: $scenario"
76+
done
77+
78+
echo "Finished running all scenarios"
79+
80+
- name: Validation
81+
if: ${{ steps.init.outputs.scenarios }}
82+
uses: ./.github/actions/run_event_validation
83+
with:
84+
component: 'dbt'
85+
producer-dir: 'producer/dbt'
86+
release_tags: ${{ inputs.get-latest-snapshots == 'true' && 'main' || inputs.ol_release }}
87+
ol_release: ${{ inputs.ol_release }}
88+
component_release: ${{ inputs.dbt_release }}
89+
event-directory: ${{ steps.set-producer-output.outputs.event_dir }}
90+
target-path: 'dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report.json'
91+
92+
- uses: actions/upload-artifact@v4
93+
if: ${{ steps.init.outputs.scenarios }}
94+
with:
95+
name: dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report
96+
path: dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report.json
97+
retention-days: 1

generated-files/releases.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
"name": "spark_dataproc",
88
"latest_version": ""
99
},
10+
{
11+
"name": "dbt",
12+
"latest_version": "1.8.0"
13+
},
1014
{
1115
"name": "openlineage",
1216
"latest_version": "1.39.0"

producer/dbt/test_runner/cli.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,5 +166,123 @@ def validate_events(events_file, spec_dir):
166166
exit(1)
167167

168168

169+
@cli.command()
170+
@click.option('--scenario', required=True, help='Scenario name to run')
171+
@click.option('--output-dir', required=True, help='Output directory for events')
172+
def run_scenario(scenario, output_dir):
173+
"""Run a specific scenario for CI/CD workflow using dbt-ol wrapper"""
174+
import subprocess
175+
import os
176+
177+
click.echo(f"🚀 Running scenario: {scenario}")
178+
click.echo(f"📁 Output directory: {output_dir}\n")
179+
180+
# Validate scenario exists
181+
scenario_path = Path(__file__).parent.parent / "scenarios" / scenario
182+
if not scenario_path.exists():
183+
click.echo(f"❌ Scenario not found: {scenario}")
184+
exit(1)
185+
186+
# Ensure output directory exists
187+
output_path = Path(output_dir)
188+
output_path.mkdir(parents=True, exist_ok=True)
189+
190+
# Path to runner directory
191+
runner_dir = Path(__file__).parent.parent / "runner"
192+
193+
# Create scenario-specific output directory
194+
scenario_output_dir = output_path / scenario
195+
scenario_output_dir.mkdir(parents=True, exist_ok=True)
196+
197+
# Temporary events file for this run
198+
temp_events_file = scenario_output_dir / "openlineage_events.jsonl"
199+
200+
# Backup and modify openlineage.yml
201+
openlineage_config = runner_dir / "openlineage.yml"
202+
openlineage_backup = runner_dir / "openlineage.yml.backup"
203+
204+
import shutil
205+
import yaml
206+
207+
try:
208+
# Backup original config
209+
if openlineage_config.exists():
210+
shutil.copy(openlineage_config, openlineage_backup)
211+
212+
# Update config to write to our output directory
213+
config = {
214+
'transport': {
215+
'type': 'file',
216+
'log_file_path': str(temp_events_file.absolute()),
217+
'append': False
218+
}
219+
}
220+
221+
with open(openlineage_config, 'w') as f:
222+
yaml.dump(config, f)
223+
224+
click.echo("📝 Updated OpenLineage configuration")
225+
226+
# Run dbt-ol commands (wrapper that emits OpenLineage events)
227+
click.echo("🔨 Running dbt-ol seed...")
228+
result = subprocess.run(
229+
['dbt-ol', 'seed', '--project-dir', str(runner_dir), '--profiles-dir', str(runner_dir),
230+
'--vars', f'scenario: {scenario}', '--no-version-check'],
231+
cwd=runner_dir,
232+
check=True
233+
)
234+
235+
click.echo("🔨 Running dbt-ol run...")
236+
subprocess.run(
237+
['dbt-ol', 'run', '--project-dir', str(runner_dir), '--profiles-dir', str(runner_dir),
238+
'--vars', f'scenario: {scenario}', '--no-version-check'],
239+
cwd=runner_dir,
240+
check=True
241+
)
242+
243+
click.echo("🔨 Running dbt-ol test...")
244+
result = subprocess.run(
245+
['dbt-ol', 'test', '--project-dir', str(runner_dir), '--profiles-dir', str(runner_dir),
246+
'--vars', f'scenario: {scenario}', '--no-version-check'],
247+
cwd=runner_dir
248+
)
249+
if result.returncode != 0:
250+
click.echo("⚠️ dbt test had failures (continuing to capture events)")
251+
252+
# The file transport creates individual JSON files with timestamps
253+
# Find and rename them to sequential format
254+
import glob
255+
event_files = sorted(glob.glob(str(scenario_output_dir / "openlineage_events.jsonl-*.json")))
256+
257+
if event_files:
258+
click.echo(f"📋 Generated {len(event_files)} OpenLineage events")
259+
260+
# Rename to sequential format
261+
for i, event_file in enumerate(event_files, 1):
262+
old_path = Path(event_file)
263+
new_path = scenario_output_dir / f"event_{i:03d}.json"
264+
old_path.rename(new_path)
265+
266+
click.echo(f"✅ Events written to {scenario_output_dir}")
267+
else:
268+
click.echo(f"⚠️ No events generated in {scenario_output_dir}")
269+
270+
exit(0)
271+
272+
except subprocess.CalledProcessError as e:
273+
click.echo(f"❌ dbt command failed: {e}")
274+
if e.output:
275+
click.echo(f" Output: {e.output.decode()}")
276+
exit(1)
277+
except Exception as e:
278+
click.echo(f"❌ Error running scenario: {e}")
279+
exit(1)
280+
finally:
281+
# Restore original config
282+
if openlineage_backup.exists():
283+
shutil.move(openlineage_backup, openlineage_config)
284+
click.echo("🔄 Restored original OpenLineage configuration")
285+
286+
169287
if __name__ == '__main__':
170288
cli()

producer/dbt/test_runner/requirements.txt

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
1-
#!/usr/bin/env python3
2-
"""
3-
OpenLineage dbt Producer Test Dependencies
4-
5-
Install required dependencies for test runner:
6-
pip install -r requirements.txt
7-
"""
1+
# OpenLineage dbt Producer Test Dependencies
2+
# Install: pip install -r requirements.txt
83

94
# Core dependencies for test runner
105
pyyaml>=6.0

0 commit comments

Comments
 (0)