feat(dbt): Add GitHub Actions workflow integration

roller100 (BearingNode) · roller100 (BearingNode) · commit 9d319ac23f54 · 2025-11-18T11:45:14.000Z
- Add producer_dbt.yml workflow for automated CI/CD testing - Add run-scenario command to CLI for per-scenario event generation - Update releases.json to include dbt version tracking - Fix requirements.txt syntax for pip compatibility The workflow follows the official OpenLineage compatibility test framework: - Uses get_valid_test_scenarios.sh for version-based scenario filtering - Generates events in per-scenario directories as individual JSON files - Integrates with run_event_validation action for syntax/semantic validation - Produces standardized test reports for compatibility tracking This addresses Steering Committee feedback on PR OpenLineage#180 to integrate dbt producer tests with GitHub Actions workflows.
diff --git a/.github/workflows/producer_dbt.yml b/.github/workflows/producer_dbt.yml
@@ -0,0 +1,97 @@
+name: dbt Producer
+
+on:
+  workflow_call:
+    inputs:
+      dbt_release:
+        description: "release of dbt-core to use"
+        type: string
+      ol_release:
+        description: "release tag of OpenLineage to use"
+        type: string
+      get-latest-snapshots:
+        description: "Should the artifact be downloaded from maven repo or circleci"
+        type: string
+
+jobs:
+  run-dbt-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Initialize tests
+        id: init
+        run: |
+          scenarios=$(./scripts/get_valid_test_scenarios.sh "producer/dbt/scenarios/" ${{ inputs.dbt_release }} ${{ inputs.ol_release }} )
+          if [[ "$scenarios" != "" ]]; then
+            echo "scenarios=$scenarios" >> $GITHUB_OUTPUT
+            echo "Found scenarios: $scenarios"
+          else
+            echo "No valid scenarios found for dbt ${{ inputs.dbt_release }} and OL ${{ inputs.ol_release }}"
+          fi
+
+      - name: Set up Python 3.12
+        if: ${{ steps.init.outputs.scenarios }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dbt dependencies
+        if: ${{ steps.init.outputs.scenarios }}
+        run: |
+          python -m pip install --upgrade pip
+          pip install dbt-core==${{ inputs.dbt_release }}
+          pip install dbt-duckdb
+          pip install openlineage-dbt==${{ inputs.ol_release }}
+          pip install -r producer/dbt/test_runner/requirements.txt
+
+      - name: Set producer output event dir
+        if: ${{ steps.init.outputs.scenarios }}
+        id: set-producer-output
+        run: | 
+          echo "event_dir=/tmp/dbt-events-$(date +%s%3N)" >> $GITHUB_OUTPUT
+
+      - name: Run dbt scenarios and create OL events
+        if: ${{ steps.init.outputs.scenarios }}
+        id: run-producer
+        continue-on-error: true
+        run: |          
+          set -e
+          IFS=';' read -ra scenarios <<< "${{ steps.init.outputs.scenarios }}"
+          
+          for scenario in "${scenarios[@]}"
+          do
+              echo "Running dbt scenario: $scenario"
+
+              if ! python3 producer/dbt/test_runner/cli.py run-scenario \
+                --scenario "$scenario" \
+                --output-dir "${{ steps.set-producer-output.outputs.event_dir }}"
+              then
+                echo "Error: dbt scenario failed: $scenario"
+                exit 1
+              fi
+
+              echo "Finished running scenario: $scenario"
+          done
+
+          echo "Finished running all scenarios"
+
+      - name: Validation
+        if: ${{ steps.init.outputs.scenarios }}
+        uses: ./.github/actions/run_event_validation
+        with:
+          component: 'dbt'
+          producer-dir: 'producer/dbt'
+          release_tags: ${{ inputs.get-latest-snapshots == 'true' && 'main' || inputs.ol_release }}
+          ol_release: ${{ inputs.ol_release }}
+          component_release: ${{ inputs.dbt_release }}
+          event-directory: ${{ steps.set-producer-output.outputs.event_dir }}
+          target-path: 'dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report.json'
+
+      - uses: actions/upload-artifact@v4
+        if: ${{ steps.init.outputs.scenarios }}
+        with:
+          name: dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report
+          path: dbt-${{inputs.dbt_release}}-${{inputs.ol_release}}-report.json
+          retention-days: 1
diff --git a/generated-files/releases.json b/generated-files/releases.json
@@ -7,6 +7,10 @@
     "name": "spark_dataproc",
     "latest_version": ""
   },
+  {
+    "name": "dbt",
+    "latest_version": "1.8.0"
+  },
   {
     "name": "openlineage",
     "latest_version": "1.39.0"
diff --git a/producer/dbt/test_runner/cli.py b/producer/dbt/test_runner/cli.py
@@ -166,5 +166,123 @@ def validate_events(events_file, spec_dir):
         exit(1)
 
 
+@cli.command()
+@click.option('--scenario', required=True, help='Scenario name to run')
+@click.option('--output-dir', required=True, help='Output directory for events')
+def run_scenario(scenario, output_dir):
+    """Run a specific scenario for CI/CD workflow using dbt-ol wrapper"""
+    import subprocess
+    import os
+    
+    click.echo(f"🚀 Running scenario: {scenario}")
+    click.echo(f"📁 Output directory: {output_dir}\n")
+    
+    # Validate scenario exists
+    scenario_path = Path(__file__).parent.parent / "scenarios" / scenario
+    if not scenario_path.exists():
+        click.echo(f"❌ Scenario not found: {scenario}")
+        exit(1)
+    
+    # Ensure output directory exists
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    
+    # Path to runner directory
+    runner_dir = Path(__file__).parent.parent / "runner"
+    
+    # Create scenario-specific output directory
+    scenario_output_dir = output_path / scenario
+    scenario_output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Temporary events file for this run
+    temp_events_file = scenario_output_dir / "openlineage_events.jsonl"
+    
+    # Backup and modify openlineage.yml
+    openlineage_config = runner_dir / "openlineage.yml"
+    openlineage_backup = runner_dir / "openlineage.yml.backup"
+    
+    import shutil
+    import yaml
+    
+    try:
+        # Backup original config
+        if openlineage_config.exists():
+            shutil.copy(openlineage_config, openlineage_backup)
+        
+        # Update config to write to our output directory
+        config = {
+            'transport': {
+                'type': 'file',
+                'log_file_path': str(temp_events_file.absolute()),
+                'append': False
+            }
+        }
+        
+        with open(openlineage_config, 'w') as f:
+            yaml.dump(config, f)
+        
+        click.echo("📝 Updated OpenLineage configuration")
+        
+        # Run dbt-ol commands (wrapper that emits OpenLineage events)
+        click.echo("🔨 Running dbt-ol seed...")
+        result = subprocess.run(
+            ['dbt-ol', 'seed', '--project-dir', str(runner_dir), '--profiles-dir', str(runner_dir), 
+             '--vars', f'scenario: {scenario}', '--no-version-check'],
+            cwd=runner_dir,
+            check=True
+        )
+        
+        click.echo("🔨 Running dbt-ol run...")
+        subprocess.run(
+            ['dbt-ol', 'run', '--project-dir', str(runner_dir), '--profiles-dir', str(runner_dir),
+             '--vars', f'scenario: {scenario}', '--no-version-check'],
+            cwd=runner_dir,
+            check=True
+        )
+        
+        click.echo("🔨 Running dbt-ol test...")
+        result = subprocess.run(
+            ['dbt-ol', 'test', '--project-dir', str(runner_dir), '--profiles-dir', str(runner_dir),
+             '--vars', f'scenario: {scenario}', '--no-version-check'],
+            cwd=runner_dir
+        )
+        if result.returncode != 0:
+            click.echo("⚠️  dbt test had failures (continuing to capture events)")
+        
+        # The file transport creates individual JSON files with timestamps
+        # Find and rename them to sequential format
+        import glob
+        event_files = sorted(glob.glob(str(scenario_output_dir / "openlineage_events.jsonl-*.json")))
+        
+        if event_files:
+            click.echo(f"📋 Generated {len(event_files)} OpenLineage events")
+            
+            # Rename to sequential format
+            for i, event_file in enumerate(event_files, 1):
+                old_path = Path(event_file)
+                new_path = scenario_output_dir / f"event_{i:03d}.json"
+                old_path.rename(new_path)
+            
+            click.echo(f"✅ Events written to {scenario_output_dir}")
+        else:
+            click.echo(f"⚠️  No events generated in {scenario_output_dir}")
+        
+        exit(0)
+        
+    except subprocess.CalledProcessError as e:
+        click.echo(f"❌ dbt command failed: {e}")
+        if e.output:
+            click.echo(f"   Output: {e.output.decode()}")
+        exit(1)
+    except Exception as e:
+        click.echo(f"❌ Error running scenario: {e}")
+        exit(1)
+    finally:
+        # Restore original config
+        if openlineage_backup.exists():
+            shutil.move(openlineage_backup, openlineage_config)
+            click.echo("🔄 Restored original OpenLineage configuration")
+
+
 if __name__ == '__main__':
     cli()
diff --git a/producer/dbt/test_runner/requirements.txt b/producer/dbt/test_runner/requirements.txt
@@ -1,10 +1,5 @@
-#!/usr/bin/env python3
-"""
-OpenLineage dbt Producer Test Dependencies
-
-Install required dependencies for test runner:
-pip install -r requirements.txt
-"""
+# OpenLineage dbt Producer Test Dependencies
+# Install: pip install -r requirements.txt
 
 # Core dependencies for test runner
 pyyaml>=6.0