feat(replay-vision): API validation + lens_result row column #159416
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This workflow runs all of our dagster tests. | |
| name: Dagster CI | |
| on: | |
| push: | |
| branches: | |
| - master | |
| pull_request: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
| permissions: | |
| contents: read | |
| pull-requests: read | |
| env: | |
| SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only | |
| DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' | |
| REDIS_URL: 'redis://localhost' | |
| CLICKHOUSE_HOST: 'localhost' | |
| CLICKHOUSE_SECURE: 'False' | |
| CLICKHOUSE_VERIFY: 'False' | |
| TEST: 1 | |
| OBJECT_STORAGE_ENABLED: 'True' | |
| OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' | |
| OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' | |
| OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' | |
| # tests would intermittently fail in GH actions | |
| # with exit code 134 _after passing_ all tests | |
| # this appears to fix it | |
| # absolute wild tbh https://stackoverflow.com/a/75503402 | |
| DISPLAY: ':99.0' | |
| # this is a fake key so this workflow can run for external contributors as they do not have access to secrets (that we don't need here) | |
| OIDC_RSA_PRIVATE_KEY: ${{ vars.OIDC_RSA_FAKE_PRIVATE_KEY }} | |
| RUNS_ON_INTERNAL_PR: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false }} | |
| jobs: | |
| # Verify the path filter below stays in sync with what dagster files | |
| # actually import — prevents silent drift where a new import slips in | |
| # without a matching filter entry. | |
| validate-paths: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| name: Validate dagster path filter coverage | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| clean: false | |
| - uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 | |
| with: | |
| version: '0.10.2' | |
| - run: uv run .github/scripts/check-dagster-paths.py | |
| # Job to decide if we should run dagster ci | |
| # See https://github.com/dorny/paths-filter#conditional-execution for more details | |
| changes: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| name: Determine need to run dagster checks | |
| # Set job outputs to values from filter step | |
| outputs: | |
| dagster: ${{ steps.filter.outputs.dagster || 'true' }} | |
| oldest_supported: ${{ steps.read-versions.outputs.oldest_supported }} | |
| matrix_include: ${{ steps.build-matrix.outputs.include || '[]' }} | |
| schema_cache_key: ${{ steps.schema-key.outputs.key }} | |
| steps: | |
| # For pull requests it's not necessary to checkout the code, but we | |
| # also want this to run on master so we need to checkout. | |
| # fetch-depth=1000 + blob:none mirrors ci-backend's turbo-discover so | |
| # HEAD^2 (PR branch tip) is reachable for the merge-base step below | |
| # without the cost of fetching blobs. | |
| - uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 1000 | |
| filter: blob:none | |
| clean: false | |
| - uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 | |
| id: app-token | |
| if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository | |
| with: | |
| client-id: ${{ secrets.GH_APP_POSTHOG_PATHS_FILTER_APP_ID }} | |
| private-key: ${{ secrets.GH_APP_POSTHOG_PATHS_FILTER_PRIVATE_KEY }} | |
| - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 | |
| id: filter | |
| if: github.event_name != 'push' # Run all tests on master push | |
| with: | |
| token: ${{ steps.app-token.outputs.token || github.token }} | |
| filters: | | |
| dagster: | |
| # --- DAG code itself --- | |
| - 'posthog/dags/**' | |
| - 'products/*/dags/**' | |
| # --- posthog submodules that DAGs actually import --- | |
| # (verified by tracing imports of all 148 dagster Python files) | |
| - 'posthog/clickhouse/**' | |
| - 'posthog/models/**' | |
| - 'posthog/hogql/**' | |
| - 'posthog/hogql_queries/**' | |
| - 'posthog/settings/**' | |
| - 'posthog/kafka_client/**' | |
| - 'posthog/schema.py' | |
| - 'posthog/utils.py' | |
| - 'posthog/errors.py' | |
| - 'posthog/exceptions_capture.py' | |
| - 'posthog/metrics.py' | |
| - 'posthog/git.py' | |
| - 'posthog/llm/**' | |
| - 'posthog/person_db_router.py' | |
| - 'posthog/redis.py' | |
| - 'posthog/event_usage.py' | |
| - 'posthog/products.py' | |
| - 'posthog/cloud_utils.py' | |
| - 'posthog/ducklake/**' | |
| # Test infrastructure used by dagster tests | |
| - 'posthog/conftest.py' | |
| - 'posthog/test/**' | |
| # --- ee modules that DAGs import --- | |
| - 'ee/billing/**' | |
| - 'ee/clickhouse/**' | |
| - 'ee/hogai/**' | |
| # --- Only the 8 products that DAGs actually import from --- | |
| - 'products/data_warehouse/backend/**/*.py' | |
| - 'products/error_tracking/backend/**/*.py' | |
| - 'products/event_definitions/backend/**/*.py' | |
| - 'products/growth/backend/**/*.py' | |
| - 'products/llm_analytics/backend/**/*.py' | |
| - 'products/posthog_ai/backend/**/*.py' | |
| - 'products/revenue_analytics/backend/**/*.py' | |
| - 'products/web_analytics/backend/**/*.py' | |
| # Make sure we run if someone is explicitly change the workflow | |
| - .github/workflows/ci-dagster.yml | |
| - .github/clickhouse-versions.json | |
| # Composite action used to install sqlx-cli | |
| - '.github/actions/setup-sqlx-cli/**' | |
| # Docker helper scripts invoked in CI setup | |
| - bin/ci-wait-for-docker | |
| - bin/wait-for-docker | |
| # We use docker compose for tests, make sure we rerun on | |
| # changes to docker-compose.dev.yml e.g. dependency | |
| # version changes | |
| - docker-compose.base.yml | |
| - docker-compose.dev.yml | |
| - docker-compose.profiles.yml | |
| # Database init scripts used by docker compose | |
| - 'docker/postgres-init-scripts/**' | |
| # Persons DB migrations (sqlx migrate run) | |
| - 'rust/persons_migrations/**' | |
| # Django entry point + pytest config | |
| - manage.py | |
| - pytest.ini | |
| - pyproject.toml | |
| - uv.lock | |
| - name: Read ClickHouse versions from JSON | |
| id: read-versions | |
| if: github.event_name == 'push' || steps.filter.outputs.dagster == 'true' | |
| run: | | |
| oldest_supported=$(jq -r '.oldest_supported' .github/clickhouse-versions.json) | |
| if [ -z "$oldest_supported" ] || [ "$oldest_supported" = "null" ]; then | |
| echo "::error::No oldest_supported version found in .github/clickhouse-versions.json" | |
| exit 1 | |
| fi | |
| echo "oldest_supported=[\"$oldest_supported\"]" >> $GITHUB_OUTPUT | |
| - name: Build sharded test matrix | |
| id: build-matrix | |
| if: github.event_name == 'push' || steps.filter.outputs.dagster == 'true' | |
| env: | |
| OLDEST_SUPPORTED: ${{ steps.read-versions.outputs.oldest_supported }} | |
| run: | | |
| # :NOTE: Keep shard count in sync with observed run times. | |
| # Target: ~15 min per shard. Current: 3 shards for ~30 min total. | |
| # Consult #team-devex before changing. | |
| shards=3 | |
| # Parse the oldest_supported from the JSON array format | |
| image=$(echo "$OLDEST_SUPPORTED" | jq -r '.[0]') | |
| include=$(jq -cn --arg image "$image" --argjson shards "$shards" ' | |
| [range(1; $shards + 1) | { | |
| "clickhouse-server-image": $image, | |
| concurrency: $shards, | |
| group: . | |
| }] | |
| ') | |
| echo "include=$include" >> "$GITHUB_OUTPUT" | |
| echo "Dagster matrix: $shards shards" | |
| - name: Fetch base branch for merge-base computation | |
| if: github.event_name == 'pull_request' | |
| env: | |
| BASE_REF: ${{ github.event.pull_request.base.ref }} | |
| # Scoped, blob-less, no-tags — matches ci-backend's pattern. | |
| # Without an explicit refspec, `git fetch --deepen` would fall back | |
| # to remote.origin.fetch and pull every branch. | |
| run: git fetch --no-tags --depth=1000 --filter=blob:none origin "$BASE_REF:refs/remotes/origin/$BASE_REF" | |
| - name: Compute schema cache key from merge-base | |
| id: schema-key | |
| if: github.event_name == 'pull_request' | |
| env: | |
| BASE_REF: ${{ github.event.pull_request.base.ref }} | |
| run: | | |
| # HEAD is the synthetic merge commit; HEAD^2 is the PR branch tip. | |
| MERGE_BASE=$(git merge-base HEAD^2 "origin/${BASE_REF}" 2>/dev/null || echo "") | |
| if [ -n "$MERGE_BASE" ]; then | |
| echo "key=posthog-schema-master-${MERGE_BASE}" >> $GITHUB_OUTPUT | |
| else | |
| echo "key=" >> $GITHUB_OUTPUT | |
| echo "::notice::merge-base not found (branch too stale?) — schema cache will be skipped" | |
| fi | |
| dagster: | |
| name: Dagster tests (${{ matrix.group }}/${{ matrix.concurrency }}) | |
| needs: [changes] | |
| timeout-minutes: 40 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: ${{ fromJson(needs.changes.outputs.matrix_include || '[]') }} | |
| if: needs.changes.outputs.dagster == 'true' | |
| runs-on: depot-ubuntu-latest | |
| steps: | |
| - name: 'Checkout repo' | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 1 | |
| clean: false | |
| - name: Clean up data directories with container permissions | |
| run: | | |
| # Use docker to clean up files created by containers | |
| [ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true | |
| continue-on-error: true | |
| - name: Start stack with Docker Compose | |
| env: | |
| COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml | |
| CLICKHOUSE_SERVER_IMAGE: ${{ matrix.clickhouse-server-image }} | |
| run: | | |
| bin/ci-wait-for-docker launch --down | |
| - name: Wait for Docker services | |
| env: | |
| COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml | |
| CLICKHOUSE_SERVER_IMAGE: ${{ matrix.clickhouse-server-image }} | |
| run: bin/ci-wait-for-docker wait | |
| - name: Set up Python | |
| uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 | |
| with: | |
| python-version-file: 'pyproject.toml' | |
| - name: Install uv | |
| id: setup-uv | |
| uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 | |
| with: | |
| version: '0.10.2' # pinned: unpinned setup-uv calls GH API on every job, exhausts rate limit | |
| enable-cache: true | |
| cache-dependency-glob: uv.lock | |
| save-cache: ${{ github.ref == 'refs/heads/master' }} | |
| - name: Install SAML (python3-saml) dependencies | |
| if: steps.setup-uv.outputs.cache-hit != 'true' | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl | |
| - name: Install python dependencies | |
| shell: bash | |
| run: | | |
| UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev | |
| - name: Install Rust | |
| uses: dtolnay/rust-toolchain@0b1efabc08b657293548b77fb76cc02d26091c7e | |
| with: | |
| toolchain: stable | |
| components: cargo | |
| - name: Cache Rust dependencies | |
| uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1 | |
| with: | |
| shared-key: 'v2-rust-backend' | |
| workspaces: rust | |
| save-if: ${{ github.ref == 'refs/heads/master' }} | |
| - name: Install sqlx-cli | |
| uses: ./.github/actions/setup-sqlx-cli | |
| - name: Add service hostnames to /etc/hosts | |
| run: sudo echo "127.0.0.1 db redis7 kafka clickhouse clickhouse-coordinator objectstorage seaweedfs temporal" | sudo tee -a /etc/hosts | |
| - name: Create Dagster test database | |
| run: | | |
| # Ensure the test_dagster database exists for Dagster's PostgreSQL-backed | |
| # event log / run storage (avoids SQLite locking issues in tests). | |
| # The init script in docker/postgres-init-scripts/ handles this on fresh | |
| # containers, but this step is a safety net. | |
| docker compose -f docker-compose.dev.yml exec -T db \ | |
| psql -U posthog -tAc "SELECT 1 FROM pg_database WHERE datname='test_dagster'" | grep -q 1 || \ | |
| docker compose -f docker-compose.dev.yml exec -T db \ | |
| psql -U posthog -c "CREATE DATABASE test_dagster;" | |
| - name: Restore schema cache from master | |
| if: ${{ github.event_name == 'pull_request' && needs.changes.outputs.schema_cache_key != '' }} | |
| uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5 | |
| id: schema-cache | |
| with: | |
| path: schema.sql.gz | |
| key: ${{ needs.changes.outputs.schema_cache_key }} | |
| - name: Prime test_posthog and posthog from cached schema (PR) | |
| # Primes both test_posthog (used by pytest --reuse-db) and the prod-style | |
| # posthog db (read by migrate_clickhouse → InstanceSetting). pytest's | |
| # keepdb=True path layers any PR-added migrations on top. | |
| # On cache miss, fall through to a full migrate so migrate_clickhouse works. | |
| if: ${{ github.event_name == 'pull_request' }} | |
| run: | | |
| if [ ! -f schema.sql.gz ]; then | |
| echo "::notice::Schema cache miss — falling through to full migrate" | |
| python manage.py migrate | |
| exit 0 | |
| fi | |
| mkdir -p .postgres-backups | |
| mv schema.sql.gz .postgres-backups/schema-latest.sql.gz | |
| ./bin/hogli db:restore-schema-fresh | |
| gunzip -c .postgres-backups/schema-latest.sql.gz | \ | |
| docker compose -f docker-compose.dev.yml exec -T db psql -q -U posthog posthog | |
| - name: Run Django migrations (master) | |
| # Master pushes don't restore from cache (the artifact-producing run | |
| # is in ci-backend, not here). Run a full migrate so migrate_clickhouse | |
| # can read posthog_instancesetting from a populated posthog db. | |
| if: ${{ github.event_name != 'pull_request' }} | |
| run: python manage.py migrate | |
| - name: Run persons migrations | |
| run: | | |
| DATABASE_URL="postgres://posthog:posthog@localhost:5432/posthog_persons" \ | |
| sqlx database create | |
| DATABASE_URL="postgres://posthog:posthog@localhost:5432/posthog_persons" \ | |
| sqlx migrate run --source rust/persons_migrations/ | |
| - name: Run clickhouse migrations | |
| run: python manage.py migrate_clickhouse | |
| - name: Run Dagster tests | |
| run: | | |
| pytest posthog/dags products/**/dags \ | |
| --reuse-db \ | |
| --splits ${{ matrix.concurrency }} --group ${{ matrix.group }} \ | |
| --durations=100 --durations-min=1.0 --store-durations \ | |
| --splitting-algorithm=duration_based_chunks \ | |
| --reruns 2 --reruns-delay 1 \ | |
| -r fEsxX \ | |
| --junitxml=junit-dagster-${{ matrix.group }}.xml | |
| - name: Upload test results | |
| uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 | |
| if: always() | |
| with: | |
| name: junit-results-dagster-${{ matrix.group }} | |
| path: junit-*.xml | |
| - name: Upload updated timing data as artifacts | |
| uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 | |
| if: ${{ github.ref == 'refs/heads/master' && always() }} | |
| with: | |
| name: timing_data-Dagster-${{ matrix.group }} | |
| path: .test_durations | |
| include-hidden-files: true | |
| retention-days: 2 | |
| # Job just to collate the status of the matrix jobs for requiring passing status | |
| dagster_tests: | |
| needs: [dagster] | |
| name: Dagster Tests Pass | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| if: always() | |
| steps: | |
| - name: Check matrix outcome | |
| run: | | |
| # The `needs.dagster.result` will be 'success' only if all jobs in the matrix succeeded. | |
| # Otherwise, it will be 'failure'. | |
| if [[ "${{ needs.dagster.result }}" != "success" && "${{ needs.dagster.result }}" != "skipped" ]]; then | |
| echo "One or more jobs in the Dagster test matrix failed." | |
| exit 1 | |
| fi | |
| echo "All checks passed." |