Skip to content

fix(signals): evict stale db connections in temporal activities #284974

fix(signals): evict stale db connections in temporal activities

fix(signals): evict stale db connections in temporal activities #284974

Workflow file for this run

# This workflow runs all of our backend django tests.
#
# If these tests get too slow, look at increasing concurrency and re-timing the tests by manually dispatching
# .github/workflows/ci-backend-update-test-timing.yml action
name: Backend CI
on:
push:
branches:
- master
workflow_dispatch:
inputs:
clickhouseServerVersion:
description: ClickHouse server version. Leave blank for default
type: string
pull_request:
merge_group:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only
DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog'
REDIS_URL: 'redis://localhost'
CLICKHOUSE_HOST: 'localhost'
CLICKHOUSE_SECURE: 'False'
CLICKHOUSE_VERIFY: 'False'
# Restricted `autoresearch` user defined in docker/clickhouse/users-dev.xml,
# pointed at the CI test DB (posthog_test, not `default` as in regular dev).
CLICKHOUSE_TEST_CLUSTER_HOST: 'localhost'
CLICKHOUSE_TEST_CLUSTER_DATABASE: 'posthog_test'
CLICKHOUSE_TEST_CLUSTER_USER: 'autoresearch'
CLICKHOUSE_TEST_CLUSTER_PASSWORD: 'autoresearchpass'
CLICKHOUSE_TEST_CLUSTER_SECURE: 'False'
CLICKHOUSE_TEST_CLUSTER_VERIFY: 'False'
TEST: 1
CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }}
CLICKHOUSE_COMPAT_PYTEST_TARGETS: 'posthog/clickhouse ee/clickhouse'
OBJECT_STORAGE_ENABLED: 'True'
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
UV_HTTP_TIMEOUT: 120
# tests would intermittently fail in GH actions
# with exit code 134 _after passing_ all tests
# this appears to fix it
# absolute wild tbh https://stackoverflow.com/a/75503402
DISPLAY: ':99.0'
# this is a fake key so this workflow can run for external contributors as they do not have access to secrets (that we don't need here)
OIDC_RSA_PRIVATE_KEY: ${{ vars.OIDC_RSA_FAKE_PRIVATE_KEY }}
RUNS_ON_INTERNAL_PR: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false }}
SANDBOX_JWT_PRIVATE_KEY: ${{ vars.OIDC_RSA_FAKE_PRIVATE_KEY }}
permissions:
contents: read
pull-requests: write
jobs:
# Job to decide if we should run backend ci
# See https://github.com/dorny/paths-filter#conditional-execution for more details
changes:
runs-on: ubuntu-latest
timeout-minutes: 5
name: Determine need to run backend and migration checks
# Set job outputs to values from filter step
outputs:
backend: ${{ steps.filter.outputs.backend || 'true' }}
backend_files: ${{ steps.filter.outputs.backend_files }}
migrations: ${{ steps.filter.outputs.migrations || 'true' }}
migrations_files: ${{ steps.filter.outputs.migrations_files }}
tasks_temporal: ${{ steps.filter.outputs.tasks_temporal || 'true' }}
openapi_types: ${{ steps.filter.outputs.openapi_types || 'true' }}
legacy: ${{ steps.filter.outputs.legacy || 'true' }}
steps:
# For pull requests it's not necessary to checkout the code, but we
# also want this to run on master so we need to checkout
- uses: actions/checkout@v6
with:
clean: false
- name: Clean up data directories with container permissions
run: |
# Use docker to clean up files created by containers
[ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true
continue-on-error: true
- uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
id: app-token
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
with:
client-id: ${{ secrets.GH_APP_POSTHOG_PATHS_FILTER_APP_ID }}
private-key: ${{ secrets.GH_APP_POSTHOG_PATHS_FILTER_PRIVATE_KEY }}
- uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
id: filter
if: github.event_name != 'push' # Run all tests on master push
with:
token: ${{ steps.app-token.outputs.token || github.token }}
list-files: 'escape'
filters: |
backend:
# Avoid running backend tests for irrelevant changes
# NOTE: we are at risk of missing a dependency here. We could make
# the dependencies more clear if we separated the backend/frontend
# code completely
# really we should ignore ee/frontend/** but dorny doesn't support that
# - '!ee/frontend/**'
# including the negated rule appears to work
# but makes it always match because the checked file always isn't `ee/frontend/**` 🙈
- 'ee/**/*'
- 'common/__init__.py'
- 'common/hogql_parser/**'
- 'common/hogvm/**'
- 'common/ingestion/**'
- 'common/migration_utils/**'
- 'common/plugin_transpiler/**'
- 'posthog/**/*'
- 'products/**/backend/**/*'
- bin/build-schema-latest-versions.py
- bin/build-taxonomy-json.py
- bin/check_uv_python_compatibility.py
- bin/find_python_dependencies.py
- bin/unit_metrics.py
- pyproject.toml
- uv.lock
- requirements.txt
- requirements-dev.txt
- mypy.ini
- pytest.ini
- .test_durations # Used for pytest-split sharding
# generates posthog/schema.py
- frontend/src/queries/schema.json
# Operator parity check
- rust/feature-flags/src/properties/property_models.rs
- frontend/src/products.json # Loaded at runtime by posthog/products.py
- 'products/*/manifest.tsx' # Generates products.json
# Make sure we run if someone is explicitly changing the workflow
- .github/workflows/ci-backend.yml
- .github/clickhouse-versions.json
# We use docker compose for tests, make sure we rerun on
# changes to docker-compose.dev.yml e.g. dependency
# version changes
- docker-compose.dev.yml
- docker-compose.profiles.yml
- docker-compose.base.yml
- bin/wait-for-docker
- bin/ci-wait-for-docker
- frontend/public/email/*
- 'docker/clickhouse/**'
legacy:
# Non-product backend code — when only products/ change,
# turbo-discover uses Turbo query affectedness to detect changed
# products and decide whether Django runs.
# Everything from backend: EXCEPT products/**/backend/**/*
- 'ee/**/*'
- 'common/__init__.py'
- 'common/hogql_parser/**'
- 'common/hogvm/**'
- 'common/ingestion/**'
- 'common/migration_utils/**'
- 'common/plugin_transpiler/**'
- 'posthog/**/*'
- bin/build-schema-latest-versions.py
- bin/build-taxonomy-json.py
- bin/check_uv_python_compatibility.py
- bin/find_python_dependencies.py
- bin/unit_metrics.py
- pyproject.toml
- uv.lock
- requirements.txt
- requirements-dev.txt
- mypy.ini
- pytest.ini
- .test_durations
- frontend/src/queries/schema.json
- frontend/src/products.json
- 'products/*/manifest.tsx'
- rust/feature-flags/src/properties/property_models.rs
- .github/workflows/ci-backend.yml
- .github/clickhouse-versions.json
- docker-compose.dev.yml
- docker-compose.profiles.yml
- docker-compose.base.yml
- bin/wait-for-docker
- bin/ci-wait-for-docker
- frontend/public/email/*
- 'docker/clickhouse/**'
migrations:
- 'docker/clickhouse/**'
- 'posthog/migrations/*.py'
- 'products/*/backend/migrations/*.py'
- 'products/*/migrations/*.py' # Legacy structure
- 'rust/persons_migrations/*.sql'
- 'rust/bin/migrate-persons'
tasks_temporal:
- 'products/tasks/backend/temporal/**/*'
openapi_types:
# Generated OpenAPI types - validate they match schema
- 'frontend/src/generated/**/*'
- 'products/*/frontend/generated/**/*'
- 'services/mcp/src/generated/**/*'
- 'services/mcp/src/api/generated.ts'
# Generation tooling - changes here could affect output
- 'tools/openapi-codegen/**/*'
- 'services/mcp/scripts/lib/**'
- 'frontend/bin/generate-openapi-types.mjs'
- 'frontend/src/lib/api-orval-mutator.ts'
- 'services/mcp/scripts/**/*'
- 'services/mcp/definitions/**/*.yaml'
- 'products/*/mcp/**/*.yaml'
- 'services/mcp/src/tools/generated/**/*'
- 'services/mcp/schema/generated-tool-definitions.json'
- 'services/mcp/schema/tool-definitions-all.json'
detect-snapshot-mode:
name: Detect snapshot mode
runs-on: ubuntu-latest
timeout-minutes: 5
needs: [changes]
if: needs.changes.outputs.backend == 'true'
outputs:
mode: ${{ steps.detect.outputs.mode }}
steps:
- name: Detect mode
id: detect
env:
PR_REPO: ${{ github.event.pull_request.head.repo.full_name }}
REPO: ${{ github.repository }}
HAS_NO_SNAPSHOT_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'no-snapshot-update') }}
AUTHOR: ${{ github.actor }}
run: |
if [ "$PR_REPO" != "$REPO" ] && [ -n "$PR_REPO" ]; then
echo "mode=check" >> $GITHUB_OUTPUT
echo "Fork detected - running in CHECK mode (no commits allowed)"
elif [ "$HAS_NO_SNAPSHOT_LABEL" == "true" ]; then
echo "mode=check" >> $GITHUB_OUTPUT
echo "::notice::🔍 Running in CHECK mode - 'no-snapshot-update' label detected"
else
echo "Workflow triggered by: $AUTHOR"
# Dependabot is excluded - it creates new PRs that may need snapshot updates
# Other bots (github-actions, posthog-bot) commit snapshots and must use CHECK mode to avoid loops
if [[ "$AUTHOR" != "dependabot[bot]" ]] && \
([[ "$AUTHOR" == *"github-actions"* ]] || [[ "$AUTHOR" == *"[bot]"* ]] || [[ "$AUTHOR" == "posthog-bot" ]]); then
echo "mode=check" >> $GITHUB_OUTPUT
echo "::notice::🔍 Running in CHECK mode - snapshots must match exactly"
else
echo "mode=update" >> $GITHUB_OUTPUT
echo "::notice::🔄 Running in UPDATE mode - snapshots can be updated"
fi
fi
# Fast pre-job: determines which products need testing and if Django should run
# Only needs pnpm + node — no Python, Docker, or services
# Runs on depot to match the product-test runner environment
turbo-discover:
needs: changes
if: needs.changes.outputs.backend == 'true'
runs-on: depot-ubuntu-latest
timeout-minutes: 20
name: Discover product tests
outputs:
run_legacy: ${{ steps.discover.outputs.run_legacy }}
matrix: ${{ steps.discover.outputs.matrix }}
schema_cache_key: ${{ steps.schema-key.outputs.key }}
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 1000
filter: blob:none
- name: Fetch current PR base for Turbo affected diff
if: github.event_name == 'pull_request'
env:
BASE_REF: ${{ github.event.pull_request.base.ref }}
run: git fetch --no-tags --depth=1000 --filter=blob:none origin "$BASE_REF:refs/remotes/origin/$BASE_REF"
- name: Compute schema cache key from merge-base
id: schema-key
if: github.event_name == 'pull_request'
env:
BASE_REF: ${{ github.event.pull_request.base.ref }}
run: |
# HEAD is the synthetic merge commit; HEAD^2 is the PR branch tip.
# The fetch-depth:1000 checkout + base-ref fetch above ensure the
# full ancestry needed to find the divergence point is available.
MERGE_BASE=$(git merge-base HEAD^2 "origin/${BASE_REF}" 2>/dev/null || echo "")
if [ -n "$MERGE_BASE" ]; then
echo "key=posthog-schema-master-${MERGE_BASE}" >> $GITHUB_OUTPUT
else
echo "key=" >> $GITHUB_OUTPUT
echo "::notice::merge-base not found (branch too stale?) — schema cache will be skipped"
fi
- name: Setup pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
- name: Setup Node.js
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
with:
node-version-file: .nvmrc
cache: pnpm
cache-dependency-path: |
pnpm-lock.yaml
.github/workflows/ci-backend.yml
- name: Install pnpm dependencies
run: pnpm install --frozen-lockfile --filter=@posthog/root
- name: Discover products to test
id: discover
env:
# On pushes to master, always run everything.
# On PRs, use the path filter to detect legacy changes.
LEGACY_CHANGED: ${{ github.event_name != 'pull_request' || needs.changes.outputs.legacy }}
TURBO_SCM_BASE: ${{ github.event_name == 'pull_request' && format('origin/{0}', github.event.pull_request.base.ref) || '' }}
TURBO_SCM_HEAD: ${{ github.sha }}
run: |
# turbo-discover.js uses Turbo's Git affectedness to detect
# changed products. Non-isolated product changes trigger the
# full suite (all products + Django).
RESULT=$(node .github/scripts/turbo-discover.js)
echo "Result: $RESULT"
echo "matrix=$(echo "$RESULT" | jq -c '.matrix')" >> $GITHUB_OUTPUT
echo "run_legacy=$(echo "$RESULT" | jq -r '.run_legacy')" >> $GITHUB_OUTPUT
# Runs product tests in parallel — one matrix job per group
# Each job gets its own runner + Docker stack, so no shared DB conflicts
# Small products (< 50 tests) are grouped into a single job to avoid setup overhead
turbo-tests:
needs: [changes, turbo-discover, detect-snapshot-mode]
if: >-
always() &&
needs.turbo-discover.result == 'success' &&
needs.turbo-discover.outputs.matrix != '[]' &&
needs.turbo-discover.outputs.matrix != ''
runs-on: depot-ubuntu-latest
timeout-minutes: 30
name: Product tests (${{ matrix.group }})
strategy:
fail-fast: false
matrix:
include: ${{ fromJson(needs.turbo-discover.outputs.matrix) }}
steps:
- uses: actions/checkout@v6
# Start Docker early (before dependency installs) so containers can pull
# images and initialize while we install deps. This matches the pattern
# used by core-tests/django which achieves ~1s wait times.
- name: Clean up data directories
run: |
[ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true
continue-on-error: true
- name: Start services
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
CLICKHOUSE_SERVER_IMAGE: clickhouse/clickhouse-server:26.3.9.8
run: |
cp posthog/user_scripts/latest_user_defined_function.xml docker/clickhouse/user_defined_function.xml
bin/ci-wait-for-docker launch --background --down \
db redis7 clickhouse zookeeper kafka objectstorage feature-flags \
temporal elasticsearch objectstorage-azure
- name: Setup pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
- name: Setup Node.js
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
with:
node-version-file: .nvmrc
cache: pnpm
cache-dependency-path: |
pnpm-lock.yaml
.github/workflows/ci-backend.yml
- name: Install pnpm dependencies
run: pnpm install --frozen-lockfile --filter=@posthog/root
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: 3.12.12
token: ${{ secrets.POSTHOG_BOT_PAT }}
- name: Install uv
id: setup-uv
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0
with:
version: '0.10.2' # pinned: unpinned setup-uv calls GH API on every job, exhausts rate limit
enable-cache: true
cache-dependency-glob: uv.lock
save-cache: ${{ github.ref == 'refs/heads/master' }}
- name: Install SAML dependencies
if: steps.setup-uv.outputs.cache-hit != 'true'
run: sudo apt-get update && sudo apt-get install -y libxml2-dev libxmlsec1-dev libxmlsec1-openssl
- name: Install Rust
uses: dtolnay/rust-toolchain@0b1efabc08b657293548b77fb76cc02d26091c7e
with:
toolchain: 1.91.1
components: cargo
- name: Cache Rust dependencies
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
shared-key: 'v2-rust-backend'
workspaces: rust
save-if: ${{ github.ref == 'refs/heads/master' }}
- name: Install sqlx-cli
uses: ./.github/actions/setup-sqlx-cli
- name: Install Python dependencies
run: UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev
- name: Add service hostnames to /etc/hosts
run: echo "127.0.0.1 db redis7 kafka clickhouse clickhouse-coordinator objectstorage temporal" | sudo tee -a /etc/hosts
- name: Set up needed files
run: |
mkdir -p frontend/dist
touch frontend/dist/index.html frontend/dist/layout.html frontend/dist/exporter.html
./bin/download-mmdb
- name: Wait for Docker services
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
CLICKHOUSE_SERVER_IMAGE: clickhouse/clickhouse-server:26.3.9.8
run: bin/ci-wait-for-docker wait
- name: Restore schema cache from master
if: ${{ github.event_name == 'pull_request' && needs.turbo-discover.outputs.schema_cache_key != '' }}
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5
with:
path: schema.sql.gz
key: ${{ needs.turbo-discover.outputs.schema_cache_key }}
- name: Prime test_posthog from cached schema
# No-op on cache miss; pytest --reuse-db falls back to a full migrate.
if: ${{ github.event_name == 'pull_request' }}
run: |
if [ ! -f schema.sql.gz ]; then
echo "::notice::Schema cache miss — pytest --reuse-db will run full migrations"
exit 0
fi
mkdir -p .postgres-backups
mv schema.sql.gz .postgres-backups/schema-latest.sql.gz
./bin/hogli db:restore-test-db
- name: Register Temporal search attributes
run: |
bin/wait-for-docker temporal
python manage.py register_temporal_search_attributes
- name: Run product tests
# --force: discover already decided this product needs testing, skip turbo cache
# --log-order=stream: stream pytest output live instead of buffering until completion
# pytest_args: optional pytest-split flags for sharded products (e.g. "-- --splits 3 --group 1")
env:
# --reuse-db: keep the test database between sequential product runs to avoid
# ClickHouse drop/create race conditions with ReplicatedMergeTree ZK metadata.
# On master, also collect timing data for pytest-split sharding.
PYTEST_ADDOPTS: >-
--reuse-db
${{ needs.detect-snapshot-mode.outputs.mode == 'update' && '--snapshot-update --snapshot-warn-unused' || '' }}
${{ github.ref == 'refs/heads/master' && '--store-durations --durations-path ../../.test_durations' || '' }}
run: pnpm turbo run backend:test ${{ matrix.filters }} --concurrency=1 --output-logs=full --force --log-order=stream ${{ matrix.pytest_args }}
- name: Upload timing data
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
if: ${{ github.ref == 'refs/heads/master' }}
with:
name: timing_data-Products-${{ strategy.job-index }}
path: .test_durations
include-hidden-files: true
retention-days: 2
- name: Verify new snapshots for flakiness
if: ${{ always() && needs.detect-snapshot-mode.outputs.mode == 'update' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog' }}
shell: bash
run: |
.github/scripts/verify-new-snapshots.sh
- name: Generate snapshot patch
if: ${{ always() && needs.detect-snapshot-mode.outputs.mode == 'update' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog' }}
shell: bash
run: |
mkdir -p /tmp/patches
git add -N '*.ambr' || true
if ! git diff --quiet '*.ambr' 2>/dev/null; then
git diff --binary --full-index '*.ambr' > /tmp/patches/backend-Products-${{ strategy.job-index }}.patch
echo "Generated patch with $(wc -l < /tmp/patches/backend-Products-${{ strategy.job-index }}.patch) lines"
else
echo "No snapshot changes to patch"
fi
- name: Upload snapshot patch
if: ${{ always() && needs.detect-snapshot-mode.outputs.mode == 'update' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog' }}
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: snapshot-patch-Products-${{ strategy.job-index }}
path: /tmp/patches/
if-no-files-found: ignore
retention-days: 1
# Lightweight repo-wide checks that only need Python + uv (no Docker/DB).
# Consolidates checks that previously each spun up their own runner.
repo-checks:
needs: [changes]
if: needs.changes.outputs.backend == 'true'
timeout-minutes: 10
name: Repo checks (depot-ubuntu-latest)
runs-on: depot-ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: 3.12.12
- name: Install uv
uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
with:
version: '0.10.2' # pinned: unpinned setup-uv calls GH API on every job, exhausts rate limit
- name: Install Python dependencies
run: UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev
- name: Bootstrap scaffold product
run: ./bin/hogli product:bootstrap spline_reticulator --non-interactive
- name: Lint product structure
run: ./bin/hogli product:lint --all
- name: Check version specifiers
run: python .github/scripts/check-version-specifiers.py
- name: Check IDOR model coverage
run: python .github/scripts/check-idor-model-coverage.py
- name: Check operator parity
run: python .github/scripts/check-operator-parity.py
- name: Check module boundaries (tach)
run: tach check --dependencies --interfaces
- name: Check product facade enforcement (import-linter)
run: lint-imports
# Migration validation and OpenAPI type generation.
# This job needs Docker + DB — it checks out master first to run baseline
# migrations, then checks out the PR branch. All steps after the PR checkout
# require Django + DB. Lightweight checks belong in repo-checks above.
check-migrations:
needs: [changes]
if: needs.changes.outputs.backend == 'true' || needs.changes.outputs.openapi_types == 'true'
timeout-minutes: 20
# checks: write is needed so the "Publish Migration risk check" step
# can POST to /repos/.../check-runs. Workflow-level perms only grant
# contents:read + pull-requests:write.
permissions:
contents: read
pull-requests: write
checks: write
name: Validate migrations and OpenAPI types
runs-on: depot-ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
clean: false
- name: Clean up data directories with container permissions
run: |
# Use docker to clean up files created by containers
[ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true
continue-on-error: true
- name: Stop/Start stack with Docker Compose
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
run: |
bin/ci-wait-for-docker launch --background --down
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: 3.12.12
- name: Install uv
id: setup-uv
uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
with:
version: '0.10.2' # pinned: unpinned setup-uv calls GH API on every job, exhausts rate limit
enable-cache: true
cache-dependency-glob: uv.lock
save-cache: ${{ github.ref == 'refs/heads/master' }}
- name: Install SAML (python3-saml) dependencies
if: steps.setup-uv.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl
- name: Install Rust
uses: dtolnay/rust-toolchain@0b1efabc08b657293548b77fb76cc02d26091c7e
with:
toolchain: 1.91.1
components: cargo
- name: Cache Rust dependencies
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
shared-key: 'v2-rust-backend'
workspaces: rust
save-if: ${{ github.ref == 'refs/heads/master' }}
- name: Install sqlx-cli
uses: ./.github/actions/setup-sqlx-cli
- name: Preserve CI Docker helper during rollout
run: |
# Temporary during rollout: this job checks out `master`,
# which does not include the latest Docker wait helpers yet.
cp bin/ci-wait-for-docker /tmp/ci-wait-for-docker
chmod +x /tmp/ci-wait-for-docker
cp bin/wait-for-docker /tmp/wait-for-docker
chmod +x /tmp/wait-for-docker
# First running migrations from master, to simulate the real-world scenario
- name: Checkout master
uses: actions/checkout@v6
with:
ref: master
clean: false
- name: Restore CI Docker helper during rollout
run: |
# Temporary during rollout: restore the helpers after the
# checkout to `master` removes them from the workspace.
cp /tmp/ci-wait-for-docker bin/ci-wait-for-docker
chmod +x bin/ci-wait-for-docker
cp /tmp/wait-for-docker bin/wait-for-docker
chmod +x bin/wait-for-docker
- name: Install python dependencies for master
run: |
UV_PROJECT_ENVIRONMENT=.venv-master uv sync --frozen --dev
- name: Wait for Docker services
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
run: bin/ci-wait-for-docker wait
- name: Run migrations up to master
run: |
# Run Django migrations first (excluding managed=False models)
.venv-master/bin/python manage.py migrate
# Then run persons migrations using sqlx; comment out until we've merged
# DATABASE_URL="postgres://posthog:posthog@localhost:5432/posthog_persons" \
# sqlx database create
# DATABASE_URL="postgres://posthog:posthog@localhost:5432/posthog_persons" \
# sqlx migrate run --source rust/persons_migrations/
# Get app token early so it can be passed to checkout below.
# This sets the git extraheader to the app token instead of GITHUB_TOKEN,
# which is required for the OpenAPI type commit push to trigger CI.
- name: Get app token for OpenAPI type commits
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog'
id: openapi-app-token
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
client-id: ${{ secrets.GH_APP_POSTHOG_TESTS_APP_ID }}
private-key: ${{ secrets.GH_APP_POSTHOG_TESTS_PRIVATE_KEY }}
# Now we can consider this PR's migrations
- name: Checkout this PR
uses: actions/checkout@v6
with:
# For same-repo PRs, checkout the actual branch (not the merge commit)
# so OpenAPI type generation can be committed directly. Fork PRs fall
# back to the default merge commit (auto-commit bails out for forks).
ref: ${{ (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog') && github.event.pull_request.head.ref || github.ref }}
clean: false
# Use app token so the git extraheader is set correctly for the OpenAPI
# type commit push later — GITHUB_TOKEN pushes don't trigger CI.
token: ${{ steps.openapi-app-token.outputs.token || github.token }}
- name: Install python dependencies for this PR
run: |
UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev
- name: Check migrations and post SQL comment
if: github.event_name == 'pull_request' && needs.changes.outputs.migrations == 'true'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CHANGED_FILES: ${{ needs.changes.outputs.migrations_files }}
BASE_SHA: ${{ github.event.pull_request.base.sha }}
run: |
# If no migration files changed, exit
if [ -z "$CHANGED_FILES" ]; then
echo "No migration files changed"
exit 0
fi
if [ -z "$BASE_SHA" ]; then
echo "::warning::BASE_SHA is empty — all changed migrations will be treated as new"
else
# Ensure the base commit is available for comparison
git fetch --no-tags --prune --depth=1 origin "$BASE_SHA" || echo "::warning::Could not fetch base SHA $BASE_SHA — all changed migrations will be shown as new"
fi
# Initialize comment body for SQL changes
COMMENT_BODY="## Migration SQL Changes\n\nHey 👋, we've detected some migrations on this PR. Here's the SQL output for each migration, make sure they make sense:\n\n"
HAS_NEW_MIGRATIONS=false
# Process each changed migration file (excluding Rust migrations)
for file in $CHANGED_FILES; do
# Skip Rust migrations as they're handled separately by sqlx
if [[ $file =~ rust/persons_migrations ]]; then
continue
fi
if [[ $file =~ migrations/([0-9]+)_ ]]; then
migration_number="${BASH_REMATCH[1]}"
# Get app name by looking at the directory structure
# For new structure products/user_interviews/backend/migrations, we want user_interviews
# For old structure products/user_interviews/migrations, we want user_interviews
if [[ $file =~ products/([^/]+)/backend/migrations/ ]]; then
app_name="${BASH_REMATCH[1]}"
else
app_name=$(echo $file | sed -E 's|^([^/]+/)*([^/]+)/migrations/.*|\2|')
fi
# Only show SQL for new migrations, not modifications to existing ones
if git cat-file -e "$BASE_SHA:$file" 2>/dev/null; then
echo "Skipping $file (already exists on base branch)"
continue
fi
HAS_NEW_MIGRATIONS=true
echo "Checking migration $migration_number for app $app_name"
# Get SQL output
SQL_OUTPUT=$(python manage.py sqlmigrate $app_name $migration_number)
# Add to comment body
COMMENT_BODY+="#### [\`$file\`](https:\/\/github.com\/${{ github.repository }}\/blob\/${{ github.sha }}\/$file)\n\`\`\`sql\n$SQL_OUTPUT\n\`\`\`\n\n"
fi
done
# Get existing comments (needed for both update and cleanup)
COMMENTS=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments")
# Extract comment ID if exists
SQL_COMMENT_ID=$(echo "$COMMENTS" | jq -r '.[] | select(.body | startswith("## Migration SQL Changes")) | .id' | head -1)
# If no new migrations, clean up any stale comment and exit
if [ "$HAS_NEW_MIGRATIONS" = false ]; then
echo "No new migrations to show (all changed files already exist on base branch)"
if [ -n "$SQL_COMMENT_ID" ]; then
echo "Deleting stale SQL comment $SQL_COMMENT_ID"
curl -X DELETE \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/comments/$SQL_COMMENT_ID"
fi
exit 0
fi
# Add timestamp and commit SHA to SQL changes
TIMESTAMP=$(date -u '+%Y-%m-%d %H:%M UTC')
COMMIT_SHA="${{ github.event.pull_request.head.sha }}"
COMMIT_SHORT="${COMMIT_SHA:0:7}"
COMMENT_BODY+="\n*Last updated: $TIMESTAMP ([${COMMIT_SHORT}](https://github.com/${{ github.repository }}/commit/${COMMIT_SHA}))*"
# Convert \n into actual newlines
COMMENT_BODY=$(printf '%b' "$COMMENT_BODY")
COMMENT_BODY_JSON=$(jq -n --arg body "$COMMENT_BODY" '{body: $body}')
if [ -n "$SQL_COMMENT_ID" ]; then
# Update existing comment
echo "Updating existing SQL comment $SQL_COMMENT_ID"
curl -X PATCH \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/comments/$SQL_COMMENT_ID" \
-d "$COMMENT_BODY_JSON"
else
# Post new SQL comment to PR
echo "Posting new SQL comment to PR"
curl -X POST \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
-d "$COMMENT_BODY_JSON"
fi
- name: Run migration risk analysis and post comment
if: github.event_name == 'pull_request'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Get risk analysis for all unapplied migrations (including third-party).
# --output-json gives downstream consumers (the check-run step
# below, and any other tool) a stable structured shape to read.
set +e # Don't exit immediately on error
RISK_ANALYSIS=$(python manage.py analyze_migration_risk --fail-on-blocked --output-json migration_analysis.json 2>/dev/null)
EXIT_CODE=$?
set -e # Re-enable exit on error
# Save analysis to file for artifact upload
if [ -n "$RISK_ANALYSIS" ]; then
echo "$RISK_ANALYSIS" > migration_analysis.md
fi
# Get existing comments
COMMENTS=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments")
# Extract comment ID if exists
COMMENT_ID=$(echo "$COMMENTS" | jq -r '.[] | select(.body | startswith("## 🔍 Migration Risk Analysis")) | .id' | head -1)
if [ -n "$RISK_ANALYSIS" ] && echo "$RISK_ANALYSIS" | grep -q "Summary:"; then
# Add timestamp and commit SHA to analysis
TIMESTAMP=$(date -u '+%Y-%m-%d %H:%M UTC')
COMMIT_SHA="${{ github.event.pull_request.head.sha }}"
COMMIT_SHORT="${COMMIT_SHA:0:7}"
RISK_COMMENT="## 🔍 Migration Risk Analysis\n\nWe've analyzed your migrations for potential risks.\n\n$RISK_ANALYSIS\n\n*Last updated: $TIMESTAMP ([${COMMIT_SHORT}](https://github.com/${{ github.repository }}/commit/${COMMIT_SHA}))*"
RISK_COMMENT=$(printf '%b' "$RISK_COMMENT")
RISK_COMMENT_JSON=$(jq -n --arg body "$RISK_COMMENT" '{body: $body}')
if [ -n "$COMMENT_ID" ]; then
# Update existing comment
echo "Updating existing risk analysis comment $COMMENT_ID"
curl -X PATCH \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/comments/$COMMENT_ID" \
-d "$RISK_COMMENT_JSON"
else
# Create new comment if none exists
echo "Posting new risk analysis comment to PR"
curl -X POST \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
-d "$RISK_COMMENT_JSON"
fi
elif [ -n "$COMMENT_ID" ]; then
# No migrations to analyze but comment exists - delete it
echo "Deleting risk analysis comment (no migrations to analyze)"
curl -X DELETE \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/comments/$COMMENT_ID"
else
echo "No migrations to analyze and no existing comment"
fi
# Fail the job if there were blocked migrations
if [ $EXIT_CODE -ne 0 ]; then
exit $EXIT_CODE
fi
- name: Publish Migration risk check
# Migration risk classification is published as a GitHub check on
# the head commit so it shows up in the PR UI alongside CI checks
# and so any tool that already reads check_runs (review bots,
# branch-protection rules, dashboards) can consume it without
# parsing the comment. The check is a CI feature; consumers and
# the analyzer are decoupled.
#
# Always published — even with zero migrations to analyze, or
# when the analyzer crashed — so every PR ends up with a
# definitive verdict on its head SHA. Consumers can then treat
# "no completed check yet" purely as "CI hasn't finished" and
# don't need a fallback heuristic.
#
# Skipped on PRs from forks: GITHUB_TOKEN is read-only for fork
# PRs regardless of the workflow's `permissions:` block, so the
# check-runs POST 403s. The analyzer itself still runs in earlier
# steps and fails the job on Blocked migrations; reviewers can
# read the verdict in the uploaded migration-analysis artifact.
if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
REPO: ${{ github.repository }}
run: |
if [ ! -f migration_analysis.json ]; then
# Analyzer crashed before writing JSON. Publish a failure
# check so stamphog (or any consumer) gets a definitive
# verdict instead of looping in "wait for the check."
echo "No migration_analysis.json — analyzer didn't run; publishing failure check"
jq -n \
--arg name "Migration risk" \
--arg head_sha "$HEAD_SHA" \
'{name: $name, head_sha: $head_sha, status: "completed",
conclusion: "failure",
output: {title: "❌ Analyzer failed",
summary: "<!-- stamphog:v1 [] -->\nMigration analyzer did not produce migration_analysis.json. Re-run the Backend CI job; if it keeps failing, inspect the analyzer step logs."}}' \
| gh api "repos/$REPO/check-runs" --method POST --input -
exit 0
fi
MAX_LEVEL=$(jq -r '.max_level // "none"' migration_analysis.json)
case "$MAX_LEVEL" in
"Safe") CONCLUSION=success; TITLE="✅ All migrations safe" ;;
"Needs Review") CONCLUSION=neutral; TITLE="⚠️ Needs review" ;;
"Blocked") CONCLUSION=failure; TITLE="❌ Blocked migrations" ;;
"none"|"null")
# Zero Django migrations to analyze. Publish success so
# PRs that touch only ClickHouse/async/rbac migrations
# (which the analyzer doesn't cover) don't leave
# consumers waiting on a check that would never come.
CONCLUSION=success
TITLE="✅ No Django migrations to analyze"
;;
*)
echo "Unknown max_level '$MAX_LEVEL' — publishing failure"
CONCLUSION=failure
TITLE="❌ Unknown analyzer output"
;;
esac
# Embed the analyzed file paths in the summary as a hidden
# marker. Stamphog parses this to scope its deny-list bypass
# to exactly the files the analyzer classified — heuristics
# over directory names produce false bypasses for unrelated
# systems (ClickHouse, async migrations) that share the
# `migrations/` directory naming.
ANALYZED_PATHS=$(jq -c '[.migrations[].file_path | select(. != null)]' migration_analysis.json 2>/dev/null || echo '[]')
MARKER="<!-- stamphog:v1 ${ANALYZED_PATHS} -->"
# Truncate the human report to fit the check-run output
# limit (~64KB); the full markdown is also posted as a PR
# comment for humans. The marker stays at the top so it's
# never lost to truncation.
REPORT_BODY=$(head -c 59000 migration_analysis.md 2>/dev/null || echo "See PR comment")
SUMMARY=$(printf '%s\n%s' "$MARKER" "$REPORT_BODY")
jq -n \
--arg name "Migration risk" \
--arg head_sha "$HEAD_SHA" \
--arg conclusion "$CONCLUSION" \
--arg title "$TITLE" \
--arg summary "$SUMMARY" \
'{name: $name, head_sha: $head_sha, status: "completed",
conclusion: $conclusion,
output: {title: $title, summary: $summary}}' \
| gh api "repos/$REPO/check-runs" --method POST --input -
- name: Upload migration analysis artifact
if: always() && github.event_name == 'pull_request'
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: migration-analysis
path: |
migration_analysis.md
migration_analysis.json
if-no-files-found: ignore
- name: Run migrations for this PR
run: |
# Run Django migrations first (excluding managed=False models)
python manage.py migrate
# Then run persons migrations using sqlx
DATABASE_URL="postgres://posthog:posthog@localhost:5432/posthog_persons" \
sqlx migrate run --source rust/persons_migrations/
- name: Dump migrated schema
if: github.event_name == 'push'
run: |
set -e
set -o pipefail
# Dump schema + django_migrations data so Django knows which migrations are applied
# Run pg_dump inside container to ensure version match (host has pg_dump 16, container has 15)
(docker compose -f docker-compose.dev.yml exec -T db pg_dump --schema-only --clean --if-exists -U posthog posthog && \
docker compose -f docker-compose.dev.yml exec -T db pg_dump --data-only --table=django_migrations -U posthog posthog) | gzip > schema.sql.gz
# Verify the dump is valid
gunzip -t schema.sql.gz
- name: Upload migrated schema artifact
if: github.event_name == 'push'
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: migrated-schema
path: schema.sql.gz
retention-days: 90
- name: Save schema to Actions cache for PR shards
# Seeds a Postgres schema cache consumed by PR test jobs (turbo-tests,
# django, dagster). PR jobs restore using the merge-base SHA as the key
# so they always get a schema from their exact branch point rather than
# the newest master. LRU eviction handles cleanup.
if: github.event_name == 'push'
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5
with:
path: schema.sql.gz
key: posthog-schema-master-${{ github.sha }}
- name: Check migrations
# Skip migration safety check on master push (no migration_files from path filter)
if: github.event_name != 'push'
env:
MIGRATIONS_FILES: ${{ needs.changes.outputs.migrations_files }}
run: |
DATABASE_URL="postgres://posthog:posthog@localhost:5432/posthog_persons" \
sqlx migrate info --source rust/persons_migrations/
python manage.py makemigrations --check --dry-run
- name: Check CH migrations
run: |
# Same as above, except now for CH looking at files that were added in posthog/clickhouse/migrations/
git diff --name-status origin/master..HEAD | grep "A\sposthog/clickhouse/migrations/" | grep -v README | awk '{print $2}' | python manage.py test_ch_migrations_are_safe
- name: Render CH migration SQL per cloud environment
# CH migrations can build different `operations` lists at import time depending on
# `settings.CLOUD_DEPLOYMENT` (e.g. cloud-only Kafka tables). Print the rendered SQL
# each environment will execute so reviewers can spot per-env divergence and verify
# gated branches don't sneak in ON CLUSTER or other forbidden patterns.
if: github.event_name == 'pull_request'
run: |
CHANGED=$(git diff --name-only --diff-filter=AM origin/master..HEAD | grep '^posthog/clickhouse/migrations/[0-9]' | grep -v __pycache__ || true)
if [ -z "$CHANGED" ]; then
echo "No ClickHouse migrations changed."
exit 0
fi
for deployment in '' US EU DEV; do
label="${deployment:-<unset>}"
echo "::group::CLOUD_DEPLOYMENT=${label}"
CLOUD_DEPLOYMENT="${deployment}" python manage.py print_ch_migration_sql ${CHANGED} || true
echo "::endgroup::"
done
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
- name: Fix node-gyp permissions
run: chmod +x ~/setup-pnpm/node_modules/.pnpm/pnpm@*/node_modules/pnpm/dist/node_modules/node-gyp/gyp/gyp_main.py
- name: Set up Node.js
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
with:
node-version-file: .nvmrc
cache: pnpm
cache-dependency-path: |
pnpm-lock.yaml
.github/workflows/ci-backend.yml
- name: Install package.json dependencies with pnpm
env:
npm_config_fetch_retries: 3
npm_config_fetch_retry_mintimeout: 10000
npm_config_fetch_retry_maxtimeout: 60000
run: pnpm --filter=@posthog/root --filter=@posthog/frontend... --filter=@posthog/mcp... install --frozen-lockfile
- name: Add OpenAPI Problem Matcher
run: echo "::add-matcher::.github/openapi-problem-matcher.json"
- name: Check and update OpenAPI types
id: openapi-check
env:
EVENT_NAME: ${{ github.event_name }}
HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }}
BRANCH: ${{ github.event.pull_request.head.ref }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
PR_NUMBER: ${{ github.event.pull_request.number }}
GH_TOKEN: ${{ steps.openapi-app-token.outputs.token }}
run: |
./bin/hogli build:openapi
pnpm --filter=@posthog/mcp run scaffold-yaml -- --sync-all
if git diff --exit-code; then
echo "OpenAPI types are up to date"
echo "needs-commit=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "OpenAPI types are out of date"
# On non-PR builds or fork PRs, fail with instructions
if [ "$EVENT_NAME" != "pull_request" ] || \
[ "$HEAD_REPO" != "PostHog/posthog" ]; then
echo ""
echo "::error::OpenAPI types are out of date!"
echo ""
echo "The TypeScript API types in products/*/frontend/generated/ are auto-generated"
echo "from Django serializers and views. When you modify the backend API, you need"
echo "to regenerate these types."
echo ""
echo "To fix, run locally: hogli build:openapi"
echo "Then commit the updated generated files."
echo ""
echo "More info: https://posthog.com/handbook/engineering/type-system"
echo ""
echo "Questions? #team-devex on Slack"
exit 1
fi
echo "::notice::Committing updated OpenAPI types to PR branch"
# Verify branch hasn't advanced since CI started
CURRENT_SHA=$(git ls-remote origin "refs/heads/$BRANCH" | cut -f1)
if [ "$CURRENT_SHA" != "$HEAD_SHA" ]; then
echo "::error::Branch advanced during workflow ($HEAD_SHA -> $CURRENT_SHA) — cannot auto-commit OpenAPI types."
echo ""
echo "OpenAPI types are out of date and could not be auto-committed because the branch"
echo "was updated while this workflow was running. Please run locally and push:"
echo ""
echo " hogli build:openapi"
echo ""
echo "Then commit the updated generated files."
echo "needs-commit=false" >> $GITHUB_OUTPUT
exit 1
fi
# Disable auto-merge before pushing to prevent unreviewed code from merging
gh pr merge --disable-auto "$PR_NUMBER" || echo "Auto-merge was not enabled"
echo "needs-commit=true" >> $GITHUB_OUTPUT
- name: Commit OpenAPI types via GitHub API (signed)
if: steps.openapi-check.outputs.needs-commit == 'true'
uses: planetscale/ghcommit-action@25309d8005ac7c3bcd61d3fe19b69e0fe47dbdde # v0.2.20
with:
commit_message: 'chore: update OpenAPI generated types'
repo: ${{ github.repository }}
branch: ${{ github.event.pull_request.head.ref }}
file_pattern: 'frontend/src/generated/** products/*/frontend/generated/** products/*/mcp/*.yaml services/mcp/definitions/*.yaml services/mcp/src/api/generated.ts services/mcp/src/generated/** services/mcp/schema/generated-tool-definitions.json services/mcp/schema/tool-definitions-all.json services/mcp/src/tools/generated/**'
env:
GITHUB_TOKEN: ${{ steps.openapi-app-token.outputs.token }}
build_django_matrix:
name: Build Django matrix
needs: [changes, get_clickhouse_versions]
if: needs.changes.outputs.backend == 'true'
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
include: ${{ steps.build.outputs.include }}
steps:
- name: Build matrix include list
id: build
env:
OLDEST_SUPPORTED_IMAGE: ${{ needs.get_clickhouse_versions.outputs.oldest_supported_image }}
COMPAT_MATRIX_JSON: ${{ needs.get_clickhouse_versions.outputs.compat_matrix }}
run: |
# :NOTE: Keep shard counts/group ranges in sync with historical Django matrix tuning.
# Consult #team-devex before changing.
core=$(jq -cn --arg image "$OLDEST_SUPPORTED_IMAGE" '
[range(1; 39) | {
segment: "Core",
"person-on-events": false,
"python-version": "3.12.12",
"clickhouse-server-image": $image,
concurrency: 38,
group: .,
artifact_key: ("core-" + (.|tostring)),
compat: false
}]
')
core_persons_on_events=$(jq -cn --arg image "$OLDEST_SUPPORTED_IMAGE" '
[range(1; 8) | {
segment: "Core",
"person-on-events": true,
"python-version": "3.12.12",
"clickhouse-server-image": $image,
concurrency: 7,
group: .,
artifact_key: ("core-poe-" + (.|tostring)),
compat: false
}]
')
temporal=$(jq -cn --arg image "$OLDEST_SUPPORTED_IMAGE" '
[range(1; 8) | {
segment: "Temporal",
"person-on-events": false,
"python-version": "3.12.12",
"clickhouse-server-image": $image,
concurrency: 7,
group: .,
artifact_key: ("temporal-" + (.|tostring)),
compat: false
}]
')
compat_source="${COMPAT_MATRIX_JSON:-[]}"
compat=$(jq -cn --argjson compat "$compat_source" '
[
$compat
| to_entries[]
| .value + {
segment: "Core",
"person-on-events": false,
"python-version": "3.12.12",
compat: true,
artifact_key: ("compat-" + ((.key + 1)|tostring))
}
]
')
include=$(jq -cn \
--argjson core "$core" \
--argjson core_persons_on_events "$core_persons_on_events" \
--argjson temporal "$temporal" \
--argjson compat "$compat" \
'$core + $core_persons_on_events + $temporal + $compat')
echo "include=$include" >> "$GITHUB_OUTPUT"
echo "Django matrix size: $(jq -r 'length' <<< "$include")"
django:
needs: [changes, turbo-discover, detect-snapshot-mode, get_clickhouse_versions, build_django_matrix]
# Run legacy pytest if:
# 1. Legacy code directly changed (ee/, posthog/)
# 2. OR product changes affect legacy code (turbo-discover outputs run_legacy=true)
# 3. OR turbo-discover itself failed (conservative: run Django on detection failure)
if: |
always() &&
needs.changes.outputs.backend == 'true' &&
needs.build_django_matrix.result == 'success' &&
(needs.changes.outputs.legacy == 'true' ||
needs.turbo-discover.outputs.run_legacy == 'true' ||
(needs.turbo-discover.result != 'success' && needs.turbo-discover.result != 'skipped'))
# increase for tmate testing
timeout-minutes: 30
name: Django tests – ${{ matrix.segment }}${{ matrix.compat && ' compat' || '' }} (persons-on-events ${{ matrix.person-on-events && 'on' || 'off' }}), Py ${{ matrix.python-version }}, ${{ matrix.clickhouse-server-image }} (${{matrix.group}}/${{ matrix.concurrency }})
# Runner type is performance-critical — consult #team-devex before changing
runs-on: depot-ubuntu-latest
strategy:
fail-fast: false
matrix:
include: ${{ fromJson(needs.build_django_matrix.outputs.include) }}
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 1
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.ref }}
lfs: true
clean: false
- name: Clean up data directories with container permissions
run: |
# Use docker to clean up files created by containers
[ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true
continue-on-error: true
- name: 'Safeguard: ensure no stray Python modules at product root'
run: |
echo "Checking that products/* only contain backend/, frontend/, or shared/ as Python code roots..."
BAD_FILES=$(find products -maxdepth 2 -type f -name "*.py" ! -path "*/backend/*" ! -name "__init__.py" ! -name "conftest.py" -o -maxdepth 2 -type d -name "migrations" ! -path "*/backend/*")
if [ -n "$BAD_FILES" ]; then
echo "❌ Found Python code or migrations outside backend/:"
echo "$BAD_FILES"
echo "Please move these into the appropriate backend/ folder."
exit 1
fi
echo "✅ No stray Python files or migrations found at product roots."
# Pre-tests
# Copies the fully versioned UDF xml file for use in CI testing
- name: Stop/Start stack with Docker Compose
shell: bash
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
CLICKHOUSE_SERVER_IMAGE: ${{ matrix.clickhouse-server-image || needs.get_clickhouse_versions.outputs.oldest_supported_image }}
WAIT_FOR_DOCKER_LAUNCH_RETRIES: 3
WAIT_FOR_DOCKER_LAUNCH_RETRY_DELAY: 5
run: |
cp posthog/user_scripts/latest_user_defined_function.xml docker/clickhouse/user_defined_function.xml
bin/ci-wait-for-docker launch --background --down-all-profiles
- name: Add service hostnames to /etc/hosts
shell: bash
run: echo "127.0.0.1 db redis7 kafka clickhouse clickhouse-coordinator objectstorage seaweedfs temporal" | sudo tee -a /etc/hosts
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
id: setup-uv-tests
uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
with:
version: '0.10.2' # pinned: unpinned setup-uv calls GH API on every job, exhausts rate limit
enable-cache: true
cache-dependency-glob: uv.lock
save-cache: ${{ github.ref == 'refs/heads/master' }}
- name: Install SAML (python3-saml) dependencies
if: ${{ needs.changes.outputs.backend == 'true' && steps.setup-uv-tests.outputs.cache-hit != 'true' }}
shell: bash
run: |
sudo apt-get update && sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl
- name: Install Rust
if: needs.changes.outputs.backend == 'true'
uses: dtolnay/rust-toolchain@0b1efabc08b657293548b77fb76cc02d26091c7e
with:
toolchain: 1.91.1
components: cargo
- name: Cache Rust dependencies
if: needs.changes.outputs.backend == 'true'
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
shared-key: 'v2-rust-backend'
workspaces: rust
save-if: ${{ github.ref == 'refs/heads/master' }}
- name: Install sqlx-cli
if: needs.changes.outputs.backend == 'true'
uses: ./.github/actions/setup-sqlx-cli
- name: Determine if hogql-parser has changed compared to master
shell: bash
id: hogql-parser-diff
run: |
git fetch --no-tags --prune --depth=1 origin master
changed=$(git diff --quiet HEAD origin/master -- common/hogql_parser/ && echo "false" || echo "true")
echo "changed=$changed" >> $GITHUB_OUTPUT
# tests would intermittently fail in GH actions
# with exit code 134 _after passing_ all tests
# this appears to fix it
# absolute wild tbh https://stackoverflow.com/a/75503402
# Cache Qt library packages to reduce network dependency and flakiness
# Using cache-apt-pkgs-action instead of setup-qt-libs to avoid network issues
- name: Cache and install Qt libraries
uses: awalsh128/cache-apt-pkgs-action@acb598e5ddbc6f68a970c5da0688d2f3a9f04d05 # v1.6.0
with:
packages: libegl1 libdbus-1-3 libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-xinput0 libxcb-xfixes0 x11-utils libxcb-cursor0 libopengl0
version: 1.0
- name: Install Python dependencies
shell: bash
run: |
UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev
- name: Install the working version of hogql-parser
if: ${{ needs.changes.outputs.backend == 'true' && steps.hogql-parser-diff.outputs.changed == 'true' }}
shell: bash
# This is not cached currently, as it's important to build the current HEAD version of hogql-parser if it has
# changed (requirements.txt has the already-published version)
run: |
sudo apt-get install unzip cmake curl uuid pkg-config
curl --fail --location https://www.antlr.org/download/antlr4-cpp-runtime-4.13.1-source.zip --output antlr4-source.zip || curl --fail --location https://raw.githubusercontent.com/antlr/website-antlr4/gh-pages/download/antlr4-cpp-runtime-4.13.1-source.zip --output antlr4-source.zip
# Check that the downloaded archive is the expected runtime - a security measure
anltr_known_md5sum="c875c148991aacd043f733827644a76f"
antlr_found_ms5sum="$(md5sum antlr4-source.zip | cut -d' ' -f1)"
if [[ "$anltr_known_md5sum" != "$antlr_found_ms5sum" ]]; then
echo "Unexpected MD5 sum of antlr4-source.zip!"
echo "Known: $anltr_known_md5sum"
echo "Found: $antlr_found_ms5sum"
exit 64
fi
unzip antlr4-source.zip -d antlr4-source && cd antlr4-source
cmake .
DESTDIR=out make install
sudo cp -r out/usr/local/include/antlr4-runtime /usr/include/
sudo cp out/usr/local/lib/libantlr4-runtime.so* /usr/lib/
sudo ldconfig
cd ..
pip install ./common/hogql_parser
- name: Set up needed files
shell: bash
run: |
mkdir -p frontend/dist
touch frontend/dist/index.html
touch frontend/dist/layout.html
touch frontend/dist/exporter.html
./bin/download-mmdb
- name: Wait for Docker services
shell: bash
# Read-only health poll for core services only. Can't use --wait here
# because it blocks on ALL project containers including temporal, which
# boots slowly (auto-setup runs DB migrations). Temporal was started in
# background above and will be ready by the time temporal tests run.
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
CLICKHOUSE_SERVER_IMAGE: ${{ matrix.clickhouse-server-image || needs.get_clickhouse_versions.outputs.oldest_supported_image }}
run: bin/ci-wait-for-docker wait
- name: Restore schema cache from master
if: ${{ github.event_name == 'pull_request' && needs.turbo-discover.outputs.schema_cache_key != '' }}
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5
with:
path: schema.sql.gz
key: ${{ needs.turbo-discover.outputs.schema_cache_key }}
- name: Prime test_posthog from cached schema
# No-op on cache miss; pytest --reuse-db falls back to a full migrate.
if: ${{ github.event_name == 'pull_request' }}
run: |
if [ ! -f schema.sql.gz ]; then
echo "::notice::Schema cache miss — pytest --reuse-db will run full migrations"
exit 0
fi
mkdir -p .postgres-backups
mv schema.sql.gz .postgres-backups/schema-latest.sql.gz
./bin/hogli db:restore-test-db
- name: Determine if --snapshot-update should be on
# UPDATE mode: human commits - update snapshots
# CHECK mode: bot commits (after snapshot update) - verify snapshots match exactly
# persons-on-events: always update (we ignore snapshot divergence there)
if: ${{ needs.changes.outputs.backend == 'true' && (needs.detect-snapshot-mode.outputs.mode == 'update' || matrix.person-on-events) }}
shell: bash
# --snapshot-warn-unused: pytest-split shards individual tests across
# runners, so each shard only exercises a subset of snapshots per file.
# Without this flag, --snapshot-update deletes "unused" snapshots that
# belong to other shards, causing cross-shard data loss.
run: echo "PYTEST_ARGS=--snapshot-update --snapshot-warn-unused" >> $GITHUB_ENV
- name: Add snapshot flags for compat subset runs
# Compat runs a subset and doesn't own snapshots (main Django rows do).
# Warn on unused snapshots to avoid version-specific false negatives.
if: ${{ needs.changes.outputs.backend == 'true' && matrix.compat }}
shell: bash
run: echo "PYTEST_ARGS=--snapshot-update --snapshot-warn-unused" >> $GITHUB_ENV
# Tests
- name: Log test environment diagnostics
if: ${{ needs.changes.outputs.backend == 'true' && matrix.segment == 'Core' }}
shell: bash
env:
SHARD_GROUP: ${{ matrix.group }}
SHARD_CONCURRENCY: ${{ matrix.concurrency }}
CLICKHOUSE_IMAGE: ${{ matrix.clickhouse-server-image }}
COMMIT_SHA: ${{ github.sha }}
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
run: |
echo "=== Test Environment Diagnostics ==="
echo "Shard: $SHARD_GROUP/$SHARD_CONCURRENCY"
echo "Python version: $(python --version)"
echo "ClickHouse version: $CLICKHOUSE_IMAGE"
echo "Commit: $COMMIT_SHA"
echo "Branch: $BRANCH_NAME"
echo "Runner: $(uname -a)"
echo "Memory: $(free -h | head -2)"
echo "Disk: $(df -h / | tail -1)"
echo "Docker containers:"
docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" || true
echo "==================================="
- name: Run Core tests
id: run-core-tests
if: ${{ needs.changes.outputs.backend == 'true' && matrix.segment == 'Core' }}
env:
PERSON_ON_EVENTS_V2_ENABLED: ${{ matrix.person-on-events && 'true' || 'false' }}
shell: bash
run: | # async_migrations covered in ci-async-migrations.yml
set +e
pytest -v --tb=short --reuse-db -o junit_duration_report=call ${{
matrix.compat
&& env.CLICKHOUSE_COMPAT_PYTEST_TARGETS
|| (
matrix.person-on-events
&& './posthog/clickhouse/ ./posthog/queries/ ./posthog/api/test/test_insight* ./posthog/api/test/dashboards/test_dashboard.py'
|| 'posthog'
)
}} ${{ matrix.compat && '' || (matrix.person-on-events && 'ee/clickhouse/' || 'ee/') }} -m "not async_migrations" \
--ignore=posthog/temporal \
--ignore=posthog/dags \
--ignore=common/hogvm/python/test \
${{ matrix.person-on-events && '--ignore=posthog/hogql_queries' || '' }} \
${{ matrix.person-on-events && '--ignore=posthog/hogql' || '' }} \
--splits ${{ matrix.concurrency }} --group ${{ matrix.group }} \
--durations=1000 --durations-min=1.0 --store-durations \
--splitting-algorithm=duration_based_chunks \
--reruns 2 --reruns-delay 1 \
-r fEsxX \
--junitxml=junit-core.xml \
$PYTEST_ARGS
exit_code=$?
set -e
if [ $exit_code -eq 5 ]; then
echo "No tests collected for this shard, this is expected when splitting tests"
exit 0
else
exit $exit_code
fi
# Uncomment this code to create an ssh-able console so you can debug issues with github actions
# (Consider changing the timeout in ci-backend.yml to have more time)
# - name: Setup tmate session
# if: failure()
# uses: mxschmitt/action-tmate@v3
- name: Log test environment diagnostics
if: ${{ needs.changes.outputs.backend == 'true' && matrix.segment == 'Temporal' }}
shell: bash
env:
SHARD_GROUP: ${{ matrix.group }}
SHARD_CONCURRENCY: ${{ matrix.concurrency }}
COMMIT_SHA: ${{ github.sha }}
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
run: |
echo "=== Test Environment Diagnostics ==="
echo "Shard: $SHARD_GROUP/$SHARD_CONCURRENCY"
echo "Python version: $(python --version)"
echo "Commit: $COMMIT_SHA"
echo "Branch: $BRANCH_NAME"
echo "Runner: $(uname -a)"
echo "Memory: $(free -h | head -2)"
echo "Disk: $(df -h / | tail -1)"
echo "Docker containers:"
docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" || true
echo "==================================="
- name: Run Temporal tests
id: run-temporal-tests
if: ${{ needs.changes.outputs.backend == 'true' && matrix.segment == 'Temporal' }}
shell: bash
env:
AWS_S3_ALLOW_UNSAFE_RENAME: 'true'
MODAL_TOKEN_ID: ${{ needs.changes.outputs.tasks_temporal == 'true' && secrets.MODAL_TOKEN_ID || '' }}
MODAL_TOKEN_SECRET: ${{ needs.changes.outputs.tasks_temporal == 'true' && secrets.MODAL_TOKEN_SECRET || '' }}
run: |
set +e
pytest -v --tb=short --reuse-db -o junit_duration_report=call --timeout=600 --timeout-method=thread posthog/temporal products/batch_exports/backend/tests/temporal products/tasks/backend/temporal -m "not async_migrations" \
--splits ${{ matrix.concurrency }} --group ${{ matrix.group }} \
--durations=100 --durations-min=1.0 --store-durations \
--splitting-algorithm=duration_based_chunks \
--reruns 2 --reruns-delay 1 \
-r fEsxX \
--junitxml=junit-temporal.xml \
$PYTEST_ARGS
exit_code=$?
set -e
if [ $exit_code -eq 5 ]; then
echo "No tests collected for this shard, this is expected when splitting tests"
exit 0
else
exit $exit_code
fi
# Post tests
- name: Show docker compose logs on failure
if: failure() && (needs.changes.outputs.backend == 'true' && steps.run-core-tests.outcome != 'failure' && steps.run-decide-read-replica-tests.outcome != 'failure' && steps.run-temporal-tests.outcome != 'failure')
shell: bash
run: docker compose -f docker-compose.dev.yml logs
- name: Upload updated timing data as artifacts
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
# Only upload timing data on main branch to avoid artifact bloat from PRs
if: ${{ github.ref == 'refs/heads/master' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events && matrix.clickhouse-server-image == needs.get_clickhouse_versions.outputs.oldest_supported_image }}
with:
name: timing_data-${{ matrix.segment }}-${{ matrix.group }}
path: .test_durations
include-hidden-files: true
retention-days: 2
- name: Verify new snapshots for flakiness
# Only in UPDATE mode - CHECK mode doesn't update snapshots
if: ${{ needs.detect-snapshot-mode.outputs.mode == 'update' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events && !matrix.compat }}
shell: bash
run: |
.github/scripts/verify-new-snapshots.sh
- name: Generate snapshot patch
# Only in UPDATE mode - CHECK mode verifies snapshots match exactly
if: ${{ needs.detect-snapshot-mode.outputs.mode == 'update' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events && !matrix.compat }}
shell: bash
run: |
mkdir -p /tmp/patches
# Stage any new/modified .ambr files so they appear in git diff
git add -N '*.ambr' || true
# Generate patch if there are changes
if ! git diff --quiet '*.ambr' 2>/dev/null; then
git diff --binary --full-index '*.ambr' > /tmp/patches/backend-${{ matrix.segment }}-${{ matrix.group }}.patch
echo "Generated patch with $(wc -l < /tmp/patches/backend-${{ matrix.segment }}-${{ matrix.group }}.patch) lines"
else
echo "No snapshot changes to patch"
fi
- name: Upload snapshot patch
# Only in UPDATE mode
if: ${{ needs.detect-snapshot-mode.outputs.mode == 'update' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events && !matrix.compat }}
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: snapshot-patch-${{ matrix.segment }}-${{ matrix.group }}
path: /tmp/patches/
if-no-files-found: ignore
retention-days: 1
- name: Archive email renders
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
if: needs.changes.outputs.backend == 'true' && matrix.segment == 'Core' && !matrix.person-on-events && !matrix.compat
with:
name: email_renders-${{ github.sha }}-${{ github.run_attempt }}-${{ matrix.segment }}-${{ matrix.person-on-events }}-${{ matrix.group }}
path: posthog/tasks/test/__emails__
retention-days: 1
- name: Upload test results
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
if: always()
with:
name: junit-results-backend-${{ matrix.artifact_key }}
path: junit-*.xml
# Aggregate and commit snapshot changes from all matrix jobs
get_clickhouse_versions:
name: Get ClickHouse versions
needs: [changes]
if: needs.changes.outputs.backend == 'true'
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
# Oldest supported version for main Django tests (JSON array for matrix)
oldest_supported: ${{ steps.read-versions.outputs.oldest_supported }}
# Oldest supported version as plain string (for comparisons)
oldest_supported_image: ${{ steps.read-versions.outputs.oldest_supported_image }}
# Fully expanded compat matrix (version x shard group)
compat_matrix: ${{ steps.read-versions.outputs.compat_matrix }}
steps:
- uses: actions/checkout@v6
with:
sparse-checkout: .github/clickhouse-versions.json
sparse-checkout-cone-mode: false
- name: Read ClickHouse versions from JSON
id: read-versions
run: |
compat_shards="2"
echo "Using $compat_shards shard(s) per compat version"
# Oldest supported version for main Django tests (for max compatibility, as JSON array for matrix)
oldest_supported=$(jq -r '.oldest_supported' .github/clickhouse-versions.json)
if [ -z "$oldest_supported" ] || [ "$oldest_supported" = "null" ]; then
echo "::error::No oldest_supported version found in .github/clickhouse-versions.json"
exit 1
fi
echo "oldest_supported=[\"$oldest_supported\"]" >> $GITHUB_OUTPUT
echo "oldest_supported_image=$oldest_supported" >> $GITHUB_OUTPUT
echo "Oldest supported version for Django tests: $oldest_supported"
# Read all unique versions so we can derive compat coverage.
all=$(jq -c '[.[]] | unique' .github/clickhouse-versions.json)
if [ "$all" = "[]" ] || [ -z "$all" ]; then
echo "::error::No versions found in .github/clickhouse-versions.json"
exit 1
fi
echo "All CH versions found: $all"
# Compat coverage is only needed for non-oldest versions.
compat_versions=$(jq -c --arg oldest "$oldest_supported" '[.[]] | unique | map(select(. != $oldest))' .github/clickhouse-versions.json)
echo "Compat versions (excluding oldest): $compat_versions"
compat_count=$(jq -r 'length' <<< "$compat_versions")
echo "compat_count=$compat_count" >> $GITHUB_OUTPUT
if [ "$compat_count" -eq 0 ]; then
echo "compat_matrix=[]" >> $GITHUB_OUTPUT
echo "No non-oldest CH versions found — compat tests provide no additional coverage, skipping"
else
compat_matrix=$(jq -cn --argjson versions "$compat_versions" --argjson shards "$compat_shards" '
[
$versions[] as $version
| range(1; $shards + 1) as $group
| {
"clickhouse-server-image": $version,
"concurrency": $shards,
"group": $group
}
]
')
echo "compat_matrix=$compat_matrix" >> $GITHUB_OUTPUT
echo "Compat matrix: $compat_matrix"
fi
# Aggregate and commit snapshot changes from all matrix jobs
handle-snapshots:
name: Commit snapshot changes
needs: [changes, detect-snapshot-mode, django, turbo-tests]
# Only in UPDATE mode - CHECK mode verifies snapshots match exactly
# Run even if some matrix jobs failed (to commit snapshots from passing jobs)
if: ${{ always() && needs.detect-snapshot-mode.outputs.mode == 'update' && needs.changes.outputs.backend == 'true' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog' }}
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: write
pull-requests: write
steps:
# Use GitHub app token so Actions run after commiting updated snapshots
- name: Get app token
id: app-token
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
client-id: ${{ secrets.GH_APP_POSTHOG_TESTS_APP_ID }}
private-key: ${{ secrets.GH_APP_POSTHOG_TESTS_PRIVATE_KEY }}
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
token: ${{ steps.app-token.outputs.token }}
fetch-depth: 1
- name: Download all snapshot patches
id: download-patches
continue-on-error: true
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
pattern: snapshot-patch-*
path: /tmp/snapshot-patches
merge-multiple: true
- name: Check for patches
id: check-patches
run: |
if [ "${{ steps.download-patches.outcome }}" == "failure" ] || [ ! -d /tmp/snapshot-patches ]; then
echo "has-patches=false" >> $GITHUB_OUTPUT
echo "No snapshot patches found"
exit 0
fi
if [ -n "$(find /tmp/snapshot-patches -name '*.patch' -type f -size +0c 2>/dev/null)" ]; then
echo "has-patches=true" >> $GITHUB_OUTPUT
echo "Found patches:"
ls -la /tmp/snapshot-patches/
else
echo "has-patches=false" >> $GITHUB_OUTPUT
echo "Patch files empty or missing - no snapshot changes"
fi
- name: Commit snapshots
if: steps.check-patches.outputs.has-patches == 'true'
uses: ./.github/actions/commit-snapshots
with:
workflow-type: backend
patch-path: /tmp/snapshot-patches
snapshot-path: '.'
commit-message: 'test(backend): update query snapshots'
pr-number: ${{ github.event.pull_request.number }}
repository: ${{ github.repository }}
commit-sha: ${{ github.event.pull_request.head.sha }}
branch-name: ${{ github.event.pull_request.head.ref }}
github-token: ${{ steps.app-token.outputs.token }}
# Job just to collate the status of the matrix jobs for requiring passing status
# Must depend on handle-snapshots to prevent auto-merge before commits complete
django_tests:
needs: [django, check-migrations, async-migrations, turbo-discover, turbo-tests, handle-snapshots, repo-checks]
name: Django Tests Pass
runs-on: ubuntu-latest
timeout-minutes: 5
if: always()
steps:
- name: Check matrix outcome
run: |
# The `needs.django.result` will be 'success' only if all jobs in the matrix succeeded.
# Otherwise, it will be 'failure'.
if [[ "${{ needs.django.result }}" != "success" && "${{ needs.django.result }}" != "skipped" ]]; then
echo "One or more jobs in the Django test matrix failed."
exit 1
fi
# Check migration validation - must pass for Django Tests to be green
if [[ "${{ needs.check-migrations.result }}" != "success" && "${{ needs.check-migrations.result }}" != "skipped" ]]; then
echo "Migration checks failed."
exit 1
fi
# Check async migrations - must pass for Django Tests to be green
if [[ "${{ needs.async-migrations.result }}" != "success" && "${{ needs.async-migrations.result }}" != "skipped" ]]; then
echo "Async migrations tests failed."
exit 1
fi
# Check repo checks (product lint, IDOR, operator parity, tach)
if [[ "${{ needs.repo-checks.result }}" != "success" && "${{ needs.repo-checks.result }}" != "skipped" ]]; then
echo "Repo checks failed."
exit 1
fi
# Check Turbo discover - must pass for Django Tests to be green
if [[ "${{ needs.turbo-discover.result }}" != "success" && "${{ needs.turbo-discover.result }}" != "skipped" ]]; then
echo "Turbo discover failed."
exit 1
fi
# Check Turbo tests (products) - must pass for Django Tests to be green
if [[ "${{ needs.turbo-tests.result }}" != "success" && "${{ needs.turbo-tests.result }}" != "skipped" ]]; then
echo "Turbo tests failed."
exit 1
fi
# Check handle-snapshots result (OK if skipped, but fail if it failed)
if [[ "${{ needs.handle-snapshots.result }}" == "failure" ]]; then
echo "Snapshot commit job failed."
exit 1
fi
echo "All checks passed."
test-selection-verdict:
needs: [django, turbo-tests, changes]
name: Test selection verdict
if: always() && github.event_name == 'pull_request'
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 1000
filter: blob:none
- name: Install uv
uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
with:
version: '0.10.2'
- name: Download JUnit artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
pattern: junit-results-backend-*
path: /tmp/junit-results/
continue-on-error: true
- name: Run test selection and verdict
env:
BASE_REF: ${{ github.event.pull_request.base.ref }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PR_SHA: ${{ github.event.pull_request.head.sha }}
PR_BRANCH: ${{ github.event.pull_request.head.ref }}
run: |
set -euo pipefail
mkdir -p /tmp/verdict
# Fetch the *current* tip of the base branch, not pull_request.base.sha:
# base.sha is captured at webhook time and goes stale if the branch
# later merges a newer master, which makes `base.sha...HEAD` balloon
# to include every merged-in master change.
git fetch --no-tags --depth=1000 --filter=blob:none origin "$BASE_REF:refs/remotes/origin/$BASE_REF"
uv run tools/snob_backend_test_selection_shadow.py \
--base-ref "origin/$BASE_REF" \
--pretty \
> /tmp/verdict/selection.json
uv run tools/test_selection_verdict.py \
/tmp/verdict/selection.json \
/tmp/junit-results/ \
--summary-path "$GITHUB_STEP_SUMMARY" \
--pretty \
> /tmp/verdict/verdict.json
- name: Upload verdict
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: test-selection-verdict-pr${{ github.event.pull_request.number }}
path: /tmp/verdict/
retention-days: 90
async-migrations:
name: Async migrations tests - ${{ matrix.clickhouse-server-image }} (depot-ubuntu-latest)
needs: [changes, turbo-discover, get_clickhouse_versions]
strategy:
fail-fast: false
matrix:
clickhouse-server-image: ${{ fromJson(needs.get_clickhouse_versions.outputs.oldest_supported) }}
# Run if legacy code changed, product changes affect legacy, or turbo-discover failed
if: |
always() &&
needs.changes.outputs.backend == 'true' &&
(needs.changes.outputs.legacy == 'true' ||
needs.turbo-discover.outputs.run_legacy == 'true' ||
(needs.turbo-discover.result != 'success' && needs.turbo-discover.result != 'skipped'))
runs-on: depot-ubuntu-latest
timeout-minutes: 30
steps:
- name: 'Checkout repo'
uses: actions/checkout@v6
with:
fetch-depth: 1
clean: false
- name: Clean up data directories with container permissions
run: |
# Use docker to clean up files created by containers
[ -d "data" ] && docker run --rm -v "$(pwd)/data:/data" alpine sh -c "rm -rf /data/seaweedfs /data/minio" || true
continue-on-error: true
- name: Start stack with Docker Compose
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
CLICKHOUSE_SERVER_IMAGE: ${{ matrix.clickhouse-server-image || needs.get_clickhouse_versions.outputs.oldest_supported_image }}
run: |
bin/ci-wait-for-docker launch --background --down
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version-file: 'pyproject.toml'
- name: Install uv
id: setup-uv-async
uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
with:
version: '0.10.2' # pinned: unpinned setup-uv calls GH API on every job, exhausts rate limit
enable-cache: true
cache-dependency-glob: uv.lock
save-cache: ${{ github.ref == 'refs/heads/master' }}
- name: Install SAML (python3-saml) dependencies
if: steps.setup-uv-async.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl
- name: Install Rust
uses: dtolnay/rust-toolchain@0b1efabc08b657293548b77fb76cc02d26091c7e
with:
toolchain: 1.91.1
components: cargo
- name: Cache Rust dependencies
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
shared-key: 'v2-rust-backend'
workspaces: rust
save-if: ${{ github.ref == 'refs/heads/master' }}
- name: Install sqlx-cli
uses: ./.github/actions/setup-sqlx-cli
- name: Install python dependencies
shell: bash
run: |
UV_PROJECT_ENVIRONMENT=$pythonLocation uv sync --frozen --dev
- name: Add service hostnames to /etc/hosts
run: sudo echo "127.0.0.1 db redis7 kafka clickhouse clickhouse-coordinator objectstorage seaweedfs temporal" | sudo tee -a /etc/hosts
- name: Set up needed files
run: |
mkdir -p frontend/dist
touch frontend/dist/index.html
touch frontend/dist/layout.html
touch frontend/dist/exporter.html
- name: Wait for Docker services
shell: bash
env:
COMPOSE_FILE: docker-compose.dev.yml:docker-compose.profiles.yml
COMPOSE_PROFILES: temporal,azure
CLICKHOUSE_SERVER_IMAGE: ${{ matrix.clickhouse-server-image || needs.get_clickhouse_versions.outputs.oldest_supported_image }}
run: bin/ci-wait-for-docker wait
- name: Run async migrations tests
run: |
pytest -m "async_migrations" --reruns 2 --reruns-delay 1 --junitxml=junit.xml
- name: Upload test results
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
if: always()
with:
name: junit-results-async-migrations
path: junit.xml
calculate-running-time:
name: Calculate running time
needs: [django_tests, async-migrations]
runs-on: ubuntu-latest
timeout-minutes: 5
if: # Run on pull requests to PostHog/posthog + on PostHog/posthog outside of PRs - but never on forks or Dependabot (no secrets access)
always() && github.actor != 'dependabot[bot]' && (
(github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == 'PostHog/posthog') ||
(github.event_name != 'pull_request' && github.repository == 'PostHog/posthog'))
steps:
- name: Get telemetry app token
id: telemetry-app-token
if: github.run_attempt == '1'
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
client-id: ${{ secrets.GH_APP_TELEMETRY_APP_ID }}
private-key: ${{ secrets.GH_APP_TELEMETRY_PRIVATE_KEY }}
- name: Capture running time to PostHog
if: github.run_attempt == '1'
uses: PostHog/posthog-github-action@58dea254b598fb5d469c0699c98af8288a7f7650 # v1.2.0
with:
posthog-token: ${{ secrets.POSTHOG_API_TOKEN }}
event: 'posthog-ci-running-time'
capture-run-duration: true
capture-job-durations: true
github-token: ${{ steps.telemetry-app-token.outputs.token }}
status-job: 'Django Tests Pass'
runner: 'depot'
- name: Capture running time to DevEx PostHog
if: github.run_attempt == '1'
continue-on-error: true
uses: PostHog/posthog-github-action@58dea254b598fb5d469c0699c98af8288a7f7650 # v1.2.0
with:
posthog-token: ${{ secrets.POSTHOG_DEVEX_PROJECT_API_TOKEN }}
event: 'posthog-ci-running-time'
capture-run-duration: true
capture-job-durations: true
github-token: ${{ steps.telemetry-app-token.outputs.token }}
status-job: 'Django Tests Pass'
runner: 'depot'
report-test-timings:
name: Report per-test traces
needs: [django_tests]
runs-on: ubuntu-latest
continue-on-error: true
timeout-minutes: 10
# Master pushes always; PRs only when labelled `capture-test-timings`. Skip forks (no secrets access).
if: >-
always() &&
github.repository == 'PostHog/posthog' &&
(
(github.event_name != 'pull_request' && github.ref == 'refs/heads/master') ||
(github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.full_name == 'PostHog/posthog' &&
contains(github.event.pull_request.labels.*.name, 'capture-test-timings'))
)
steps:
# Labelled PRs run the trusted base parser, so the same PR can't swap the script that receives secrets.
- uses: actions/checkout@v6
if: github.run_attempt == '1'
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.sha }}
fetch-depth: 1
- name: Check for trusted timing reporter
id: timing-reporter
if: github.run_attempt == '1'
shell: bash
run: |
if [[ -f .github/scripts/report_test_timings.py ]]; then
echo "available=true" >> "$GITHUB_OUTPUT"
else
echo "available=false" >> "$GITHUB_OUTPUT"
echo "Trusted timing reporter is not available on the checked-out ref; skipping emit."
fi
- uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
if: github.run_attempt == '1' && steps.timing-reporter.outputs.available == 'true'
with:
version: '0.10.2' # pinned: unpinned setup-uv calls GH API on every job, exhausts rate limit
- name: Download junit artifacts
if: github.run_attempt == '1' && steps.timing-reporter.outputs.available == 'true'
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v6.0.0
with:
path: ./junit-artifacts
pattern: junit-results-*
- name: Emit per-test traces
if: github.run_attempt == '1' && steps.timing-reporter.outputs.available == 'true'
continue-on-error: true
env:
POSTHOG_DEVEX_PROJECT_API_TOKEN: ${{ secrets.POSTHOG_DEVEX_PROJECT_API_TOKEN }}
POSTHOG_OTLP_TRACES_ENDPOINT: ${{ vars.POSTHOG_OTLP_TRACES_ENDPOINT || 'https://us.i.posthog.com/i/v1/traces' }}
run: |
uv run --script .github/scripts/report_test_timings.py \
--min-duration-seconds=0.5 \
./junit-artifacts