docs(16): archive Phase 16 planning artifacts (#50) #139
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # .github/workflows/ci.yml | |
| # Single source of truth for Phase 1 CI. Every `run:` step invokes `just <recipe>` | |
| # exclusively (D-10 / FOUND-12). No inline `cargo` / `docker` / `rustup` / `sqlx` / | |
| # `npm` / `npx` commands. | |
| name: ci | |
| on: | |
| pull_request: | |
| push: | |
| branches: [main] | |
| concurrency: | |
| group: ci-${{ github.ref }} | |
| cancel-in-progress: true | |
| # Top-level permissions: read-only by default. `packages: write` is scoped | |
| # per-job to the `image` job only (T-01-13). | |
| permissions: | |
| contents: read | |
| jobs: | |
| lint: | |
| name: lint (fmt + clippy + openssl-sys guard) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| with: | |
| components: rustfmt, clippy | |
| - uses: Swatinem/rust-cache@v2 | |
| - uses: extractions/setup-just@v2 | |
| - run: just fmt-check | |
| - run: just clippy | |
| # `just openssl-check` depends on `just install-targets` and loops over | |
| # native + amd64-musl + arm64-musl in a single run. One lint-job invocation | |
| # covers every target CI ships (Pitfall 14 -- 01-RESEARCH.md S14). | |
| - run: just openssl-check | |
| # Phase 13 OBS-05 structural parity guard — p50/p95 percentile is computed | |
| # in Rust via src/web/stats.rs::percentile, never via SQL-native | |
| # percentile_cont / percentile_disc / median() (even on Postgres). This | |
| # step permanently prevents any future PR from introducing SQL-native | |
| # percentile into src/. See justfile recipe for the grep pattern + rationale. | |
| - run: just grep-no-percentile-cont | |
| # Phase 15 / FOUND-16. cargo-deny supply-chain check (advisories + | |
| # licenses + duplicate-versions). Non-blocking on rc.1 per D-09 — the | |
| # step is marked continue-on-error: true so a transient advisory or | |
| # transitive duplicate-version finding cannot redden CI in v1.2 hands. | |
| # Promoted to blocking (single-line removal of continue-on-error) | |
| # before final v1.2.0 ships in Phase 24. Pairs with deny.toml's | |
| # `bans.multiple-versions = "warn"` for two-layer non-blocking (D-10). | |
| - uses: taiki-e/install-action@v2 | |
| with: | |
| tool: cargo-deny | |
| - run: just deny | |
| continue-on-error: true | |
| # Test matrix: arch dimension only. | |
| # Both SQLite and Postgres backends are exercised in every cell via | |
| # testcontainers-modules::postgres (integration tests in tests/schema_parity.rs | |
| # and tests/db_pool_postgres.rs boot a real Postgres container). A per-cell | |
| # `db` dimension would be cosmetic -- see ci.yml design notes in | |
| # .planning/phases/01-foundation-security-posture-persistence-base/01-RESEARCH.md S10. | |
| test: | |
| name: test ${{ matrix.arch }} | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| permissions: | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| arch: [amd64, arm64] | |
| env: | |
| SQLX_OFFLINE: "true" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| - uses: Swatinem/rust-cache@v2 | |
| with: | |
| key: ${{ matrix.arch }} | |
| - uses: extractions/setup-just@v2 | |
| - uses: taiki-e/install-action@v2 | |
| with: | |
| tool: nextest,cargo-zigbuild | |
| - name: Install cross-compile targets (arm64 cells only) | |
| if: matrix.arch == 'arm64' | |
| run: just install-targets | |
| # Pre-pull testcontainers images from Google's public Docker Hub mirror | |
| # and retag locally. Avoids the anonymous Docker Hub pull rate limit | |
| # (100/6h per IP) that intermittently breaks the test matrix, since both | |
| # amd64 and arm64 cells run on ubuntu-latest and pull the same images. | |
| # mirror.gcr.io has much higher anonymous rate limits and is transparent | |
| # to testcontainers-rs because bollard finds the retagged image locally | |
| # before attempting an upstream pull. | |
| - name: Pre-pull testcontainers images via mirror.gcr.io | |
| run: | | |
| set -euo pipefail | |
| for image in postgres:11-alpine alpine:latest; do | |
| echo "::group::Pre-pull ${image}" | |
| docker pull "mirror.gcr.io/library/${image}" | |
| docker tag "mirror.gcr.io/library/${image}" "${image}" | |
| echo "::endgroup::" | |
| done | |
| - run: just nextest | |
| - run: just schema-diff | |
| image: | |
| name: multi-arch docker image | |
| runs-on: ubuntu-latest | |
| needs: [lint, test] | |
| timeout-minutes: 45 | |
| # Per-job permissions: scoped `packages: write` for GHCR push (T-01-13). | |
| permissions: | |
| contents: read | |
| packages: write | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: docker/setup-qemu-action@v3 | |
| - uses: docker/setup-buildx-action@v3 | |
| - uses: extractions/setup-just@v2 | |
| # PR path: build both platforms, load no push | |
| - name: just image (PR -- build only) | |
| if: github.event_name == 'pull_request' | |
| run: just image | |
| # main path: :main floating tag is owned by main-build.yml (OPS-10). | |
| # ci.yml's image job stays PR-only — verifies multi-arch BUILD works | |
| # without pushing. On push-to-main this job becomes a no-op because the | |
| # only remaining step (`just image (PR -- build only)` above) is gated on | |
| # `github.event_name == 'pull_request'`. DO NOT re-add a main-push step | |
| # here — that is what caused the pre-Phase-12.1 :latest divergence. | |
| compose-smoke: | |
| name: quickstart compose smoke (${{ matrix.compose }}) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| permissions: | |
| contents: read | |
| # Phase 8 D-18..D-22: matrix over both quickstart compose files. Each axis | |
| # boots the full stack, triggers Run Now on every example job via the API, | |
| # and asserts all four reach status=success within 120s. Extends the Phase 6 | |
| # gap-closure compose-smoke job rather than adding a second job (D-22 — | |
| # single compose up/down cycle per axis). | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| compose: | |
| - docker-compose.yml | |
| - docker-compose.secure.yml | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Install jq | |
| run: | | |
| if ! command -v jq >/dev/null 2>&1; then | |
| sudo apt-get update -qq | |
| sudo apt-get install -y --no-install-recommends jq | |
| fi | |
| jq --version | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Build local cronduit:ci image from PR checkout | |
| uses: docker/build-push-action@v6 | |
| with: | |
| context: . | |
| file: Dockerfile | |
| platforms: linux/amd64 | |
| push: false | |
| load: true | |
| tags: cronduit:ci | |
| cache-from: type=gha,scope=cronduit-ci-smoke | |
| cache-to: type=gha,mode=max,scope=cronduit-ci-smoke | |
| - name: Point compose at locally-built cronduit:ci image | |
| env: | |
| COMPOSE_FILE: ${{ matrix.compose }} | |
| run: | | |
| # Point the smoke test at the just-built cronduit:ci image so it | |
| # exercises PR code, not stale ghcr :latest. Two paths: | |
| # (a) Compose file uses ${CRONDUIT_IMAGE:-...} — set CRONDUIT_IMAGE | |
| # env var; docker compose substitution does the rest. | |
| # (b) Compose file uses literal image: ... — sed-rewrite in place. | |
| # Never committed either way. | |
| if grep -qE '\$\{CRONDUIT_IMAGE(:-[^}]*)?\}' "examples/${COMPOSE_FILE}"; then | |
| echo "Compose file uses CRONDUIT_IMAGE env var — exporting CRONDUIT_IMAGE=cronduit:ci" | |
| echo "CRONDUIT_IMAGE=cronduit:ci" >> "$GITHUB_ENV" | |
| else | |
| sed -i "s|ghcr.io/simplicityguy/cronduit:latest|cronduit:ci|g" "examples/${COMPOSE_FILE}" | |
| count=$(grep -c 'image: cronduit:ci' "examples/${COMPOSE_FILE}" || true) | |
| if [ "$count" -ne 1 ]; then | |
| echo "ERROR: expected exactly 1 'image: cronduit:ci' line, found ${count}" | |
| cat "examples/${COMPOSE_FILE}" | |
| exit 1 | |
| fi | |
| fi | |
| - name: Derive DOCKER_GID from runner's docker.sock | |
| # Explicit derivation makes the default-compose axis robust against | |
| # ubuntu-latest image bumps that change the docker group GID. The | |
| # secure-compose axis doesn't need this (its socket-proxy runs as | |
| # root in its own container), but exporting DOCKER_GID unconditionally | |
| # is a no-op there since nothing reads it. | |
| run: | | |
| DG=$(stat -c %g /var/run/docker.sock) | |
| echo "DOCKER_GID=${DG}" >> "$GITHUB_ENV" | |
| echo "derived DOCKER_GID=${DG}" | |
| - name: docker compose up -d | |
| working-directory: examples | |
| env: | |
| COMPOSE_FILE: ${{ matrix.compose }} | |
| run: docker compose -f "${COMPOSE_FILE}" up -d | |
| - name: Wait for /health (max 30s) | |
| env: | |
| COMPOSE_FILE: ${{ matrix.compose }} | |
| run: | | |
| set -eu | |
| for i in $(seq 1 30); do | |
| if curl -sSf http://localhost:8080/health >/tmp/health.json 2>/dev/null; then | |
| echo "health responded after ${i}s" | |
| cat /tmp/health.json | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "ERROR: /health never responded after 30s" | |
| docker compose -f "examples/${COMPOSE_FILE}" logs | |
| exit 1 | |
| - name: Assert /health body contains status:ok | |
| run: | | |
| set -eu | |
| body=$(curl -sSf http://localhost:8080/health) || { | |
| echo "ERROR: /health curl failed with exit $?" | |
| exit 1 | |
| } | |
| echo "health body: $body" | |
| echo "$body" | grep -q '"status":"ok"' || { | |
| echo "ERROR: /health body missing status:ok" | |
| exit 1 | |
| } | |
| - name: Assert dashboard lists all four quickstart jobs | |
| run: | | |
| set -eu | |
| dash=$(curl -sSf http://localhost:8080/) | |
| for job in echo-timestamp http-healthcheck disk-usage hello-world; do | |
| echo "$dash" | grep -q "$job" || { | |
| echo "ERROR: dashboard missing job: $job" | |
| echo "---- full dashboard body ----" | |
| echo "$dash" | |
| exit 1 | |
| } | |
| done | |
| echo "all four jobs present on dashboard" | |
| - name: Trigger Run Now on every example job and assert success within 120s | |
| env: | |
| COMPOSE_FILE: ${{ matrix.compose }} | |
| run: | | |
| set -eu | |
| BASE="http://localhost:8080" | |
| JOBS="echo-timestamp http-healthcheck disk-usage hello-world" | |
| BUDGET_SECS=120 | |
| POLL_INTERVAL=2 | |
| COOKIE_JAR=$(mktemp) | |
| # Prime the CSRF cookie by GETting the dashboard. The | |
| # ensure_csrf_cookie middleware sets the `cronduit_csrf` cookie on | |
| # first response; subsequent POSTs must echo that value in both the | |
| # cookie header and the `csrf_token` form field (validate_csrf | |
| # requires byte-equal non-empty strings). | |
| curl -sSf -c "${COOKIE_JAR}" -o /dev/null "${BASE}/" | |
| CSRF_TOKEN=$(awk '$6 == "cronduit_csrf" { print $7 }' "${COOKIE_JAR}") | |
| if [ -z "${CSRF_TOKEN}" ]; then | |
| echo "ERROR: cronduit_csrf cookie not set after GET /" | |
| cat "${COOKIE_JAR}" | |
| exit 1 | |
| fi | |
| echo "primed CSRF cookie (len=${#CSRF_TOKEN})" | |
| # Fetch all jobs once and map name -> id (the Run Now API takes id, not name). | |
| jobs_json=$(curl -sSf "${BASE}/api/jobs") | |
| echo "---- /api/jobs body ----" | |
| echo "$jobs_json" | jq '.' | |
| for name in $JOBS; do | |
| id=$(echo "$jobs_json" | jq -r --arg n "$name" '.[] | select(.name == $n) | .id') | |
| if [ -z "$id" ] || [ "$id" = "null" ]; then | |
| echo "ERROR: could not resolve job id for '$name' from /api/jobs" | |
| exit 1 | |
| fi | |
| echo "triggering Run Now for ${name} (id=${id})" | |
| curl -sSf -b "${COOKIE_JAR}" \ | |
| -H "Content-Type: application/x-www-form-urlencoded" \ | |
| --data-urlencode "csrf_token=${CSRF_TOKEN}" \ | |
| -X POST "${BASE}/api/jobs/${id}/run" -o /dev/null || { | |
| echo "ERROR: POST /api/jobs/${id}/run failed for ${name}" | |
| exit 1 | |
| } | |
| done | |
| echo "polling run history for every job (per-job budget ${BUDGET_SECS}s, interval ${POLL_INTERVAL}s)" | |
| for name in $JOBS; do | |
| deadline=$(( $(date +%s) + BUDGET_SECS )) # per-job budget: avoids first slow job starving later ones | |
| id=$(echo "$jobs_json" | jq -r --arg n "$name" '.[] | select(.name == $n) | .id') | |
| while :; do | |
| now=$(date +%s) | |
| if [ "$now" -ge "$deadline" ]; then | |
| echo "ERROR: job '${name}' did not reach status=success within ${BUDGET_SECS}s" | |
| echo "---- latest runs for ${name} ----" | |
| curl -sSf "${BASE}/api/jobs/${id}/runs?limit=5" | jq '.' | |
| exit 1 | |
| fi | |
| latest=$(curl -sSf "${BASE}/api/jobs/${id}/runs?limit=1" | jq -r '.[0].status // empty') | |
| case "$latest" in | |
| success) | |
| echo "job '${name}' reached status=success" | |
| break | |
| ;; | |
| failed|timeout|cancelled) | |
| echo "ERROR: job '${name}' reached terminal status=${latest} (expected success)" | |
| curl -sSf "${BASE}/api/jobs/${id}/runs?limit=5" | jq '.' | |
| exit 1 | |
| ;; | |
| running|scheduled|"") | |
| sleep "$POLL_INTERVAL" | |
| ;; | |
| *) | |
| echo "WARN: unknown status='${latest}' for ${name}, continuing poll" | |
| sleep "$POLL_INTERVAL" | |
| ;; | |
| esac | |
| done | |
| done | |
| echo "all four example jobs reached status=success within ${BUDGET_SECS}s" | |
| - name: Dump diagnostics on failure | |
| if: failure() | |
| env: | |
| COMPOSE_FILE: ${{ matrix.compose }} | |
| run: | | |
| echo "::group::cronduit logs (tail 200)" | |
| docker compose -f "examples/${COMPOSE_FILE}" logs cronduit --tail=200 || true | |
| echo "::endgroup::" | |
| echo "::group::dockerproxy logs (tail 50, secure axis only)" | |
| docker compose -f "examples/${COMPOSE_FILE}" logs dockerproxy --tail=50 2>/dev/null || echo "(no dockerproxy service in this axis)" | |
| echo "::endgroup::" | |
| echo "::group::run history tail per job" | |
| for name in echo-timestamp http-healthcheck disk-usage hello-world; do | |
| echo "---- ${name} ----" | |
| jobs_json=$(curl -sSf "http://localhost:8080/api/jobs" 2>/dev/null || echo "[]") | |
| id=$(echo "$jobs_json" | jq -r --arg n "$name" '.[] | select(.name == $n) | .id' 2>/dev/null || true) | |
| if [ -n "$id" ] && [ "$id" != "null" ]; then | |
| curl -sSf "http://localhost:8080/api/jobs/${id}/runs?limit=5" 2>/dev/null | jq '.' || echo "(unreachable)" | |
| else | |
| echo "(id not found)" | |
| fi | |
| done | |
| echo "::endgroup::" | |
| echo "::group::cronduit_docker_reachable gauge" | |
| curl -sSf http://localhost:8080/metrics 2>/dev/null | grep cronduit_docker_reachable || echo "(gauge unavailable)" | |
| echo "::endgroup::" | |
| - name: Tear down compose stack | |
| if: always() | |
| working-directory: examples | |
| env: | |
| COMPOSE_FILE: ${{ matrix.compose }} | |
| run: docker compose -f "${COMPOSE_FILE}" down -v |