|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Demo script for dbt-dag-opt. Designed to be recorded (asciinema / QuickTime). |
| 3 | +# |
| 4 | +# Runs against a synthetic 24-model dbt project under tests/fixtures/demo_project/ |
| 5 | +# — a baseball analytics warehouse with 4 threads, ~7.5 min wall-clock, and one |
| 6 | +# shared bottleneck (int_game_events) sitting on three of the top longest paths. |
| 7 | +# |
| 8 | +# Usage: |
| 9 | +# ./scripts/demo.sh |
| 10 | +# |
| 11 | +# Each command is echoed in bold before it runs, with a narration hint above. |
| 12 | +# Pause between commands by setting PAUSE=2 (default) or call with PAUSE=0 to |
| 13 | +# rush through for a dry-run. |
| 14 | + |
| 15 | +set -euo pipefail |
| 16 | + |
| 17 | +PAUSE="${PAUSE:-2}" |
| 18 | +ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| 19 | +MANIFEST="$ROOT/tests/fixtures/demo_project/manifest.json" |
| 20 | +RUN_RESULTS="$ROOT/tests/fixtures/demo_project/run_results.json" |
| 21 | + |
| 22 | +bold() { printf "\033[1m%s\033[0m\n" "$*"; } |
| 23 | +dim() { printf "\033[2m%s\033[0m\n" "$*"; } |
| 24 | +section() { |
| 25 | + echo |
| 26 | + printf "\033[1;36m▌ %s\033[0m\n" "$*" |
| 27 | + echo |
| 28 | +} |
| 29 | +run() { |
| 30 | + bold "\$ $*" |
| 31 | + sleep "$PAUSE" |
| 32 | + eval "$@" |
| 33 | + echo |
| 34 | + sleep "$PAUSE" |
| 35 | +} |
| 36 | + |
| 37 | +section "1 · Which paths through the DAG are actually slow?" |
| 38 | +dim "analyze uses manifest + run_results to compute the critical path — the" |
| 39 | +dim "longest cumulative chain of model execution times. That's the bound on" |
| 40 | +dim "how fast your pipeline could possibly run." |
| 41 | +run "uv run dbt-dag-opt analyze --manifest \"$MANIFEST\" --run-results \"$RUN_RESULTS\" --top 5" |
| 42 | + |
| 43 | +section "2 · The Bottleneck column names the slowest model on each path" |
| 44 | +dim "Watch for a model that appears as the bottleneck on MULTIPLE rows — that's" |
| 45 | +dim "shared-node leverage. Optimizing it speeds up several paths at once." |
| 46 | + |
| 47 | +section "3 · Drill into the full chain with --show-path" |
| 48 | +run "uv run dbt-dag-opt analyze --manifest \"$MANIFEST\" --run-results \"$RUN_RESULTS\" --top 3 --show-path" |
| 49 | + |
| 50 | +section "4 · What actually happened? (replay reconstructs the observed schedule)" |
| 51 | +dim "replay reads thread_id + timing from run_results to reconstruct the" |
| 52 | +dim "per-thread Gantt, identify the observed critical path, and attribute" |
| 53 | +dim "every idle gap to the upstream model a thread was waiting on." |
| 54 | +run "uv run dbt-dag-opt replay --manifest \"$MANIFEST\" --run-results \"$RUN_RESULTS\" --top-idle-gaps 5" |
| 55 | + |
| 56 | +section "5 · Put a price on it: --warehouse-size translates wall-clock to dollars" |
| 57 | +dim "Four framed numbers:" |
| 58 | +dim " • Run cost — what this run billed" |
| 59 | +dim " • Critical-path floor — the irreducible cost of your slowest chain" |
| 60 | +dim " • Headroom — run − floor; prize for better parallelization" |
| 61 | +dim " • Idle cost — \$ equivalent of thread-idle warehouse-seconds" |
| 62 | +run "uv run dbt-dag-opt replay --manifest \"$MANIFEST\" --run-results \"$RUN_RESULTS\" --warehouse-size L --top-idle-gaps 3" |
| 63 | + |
| 64 | +section "6 · Change the warehouse, change the bill (same run, XL)" |
| 65 | +dim "Doubling warehouse size doubles the rate. Same wall-clock, 2x cost." |
| 66 | +run "uv run dbt-dag-opt replay --manifest \"$MANIFEST\" --run-results \"$RUN_RESULTS\" --warehouse-size XL --top-idle-gaps 0" |
| 67 | + |
| 68 | +section "7 · Non-Snowflake adapters: pass --credits-per-hour directly" |
| 69 | +dim "Databricks, BigQuery, Redshift — pass the cost/hour your adapter charges." |
| 70 | +run "uv run dbt-dag-opt replay --manifest \"$MANIFEST\" --run-results \"$RUN_RESULTS\" --credits-per-hour 12 --rate-per-credit 1.5 --top-idle-gaps 0" |
| 71 | + |
| 72 | +section "8 · Machine-readable: --format json" |
| 73 | +dim "Everything in the text output is also in JSON — pipe to jq for dashboards," |
| 74 | +dim "Slack alerts, or CI annotations." |
| 75 | +run "uv run dbt-dag-opt replay --manifest \"$MANIFEST\" --run-results \"$RUN_RESULTS\" --warehouse-size L --format json | jq '.cost'" |
| 76 | + |
| 77 | +section "Wrap" |
| 78 | +dim "Three takeaways from this run:" |
| 79 | +dim " 1. int_game_events is the shared bottleneck on 3 of the top 5 paths." |
| 80 | +dim " 2. 5% of the bill is pure parallelism headroom (small — DAG is well-shaped)." |
| 81 | +dim " 3. 30% of warehouse-seconds are idle threads — you're overprovisioned" |
| 82 | +dim " on thread count for this DAG shape. Consider --threads 2 next run." |
| 83 | +echo |
| 84 | +bold "pip install dbt-dag-opt" |
| 85 | +echo |
0 commit comments