Skip to content

feat(pipeline): harvest related legislation discovered during enrichm… #3854

feat(pipeline): harvest related legislation discovered during enrichm…

feat(pipeline): harvest related legislation discovered during enrichm… #3854

Workflow file for this run

---
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: -Dwarnings
jobs:
changes:
name: Detect changes
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
ci: ${{ steps.filter.outputs.ci }}
admin: ${{ steps.filter.outputs.admin }}
editor-api: ${{ steps.filter.outputs.editor-api }}
docs: ${{ steps.filter.outputs.docs }}
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
id: filter
with:
filters: |
ci:
- packages/corpus/**
- packages/engine/**
- packages/harvester/**
- packages/pipeline/**
- frontend/**
- packages/frontend-shared/**
- bdd/**
- corpus/regulation/**
- schema/**
- script/**
- Justfile
- .pre-commit-config.yaml
- .yamllint
- .github/workflows/ci.yml
admin:
- packages/admin/**
- packages/frontend-shared/**
- Justfile
- .github/workflows/ci.yml
editor-api:
- packages/editor-api/**
- packages/corpus/**
- packages/pipeline/**
- packages/harvester/**
- Justfile
- .github/workflows/ci.yml
docs:
- docs/**
- Justfile
- .github/workflows/ci.yml
protect-schema:
name: Protect schema versions
needs: changes
runs-on: ubuntu-latest
timeout-minutes: 5
if: github.event_name == 'pull_request'
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
if: needs.changes.outputs.ci == 'true'
with:
fetch-depth: 0
- name: Check for modifications to released schema versions
if: needs.changes.outputs.ci == 'true'
run: |
# Get list of versioned schema directories that exist on main
git fetch origin main
EXISTING_VERSIONS=$(git ls-tree -d --name-only origin/main schema/ 2>/dev/null \
| grep -E '^schema/v[0-9]+\.[0-9]+\.[0-9]+$' || true)
if [ -z "$EXISTING_VERSIONS" ]; then
echo "No existing schema versions on main, skipping check"
exit 0
fi
echo "Existing schema versions on main:"
echo "$EXISTING_VERSIONS"
echo ""
# Check if any of these directories have modifications in this PR
VIOLATIONS=""
for version_dir in $EXISTING_VERSIONS; do
# Check for modified or deleted files in this version directory
CHANGES=$(git diff --name-status origin/main...HEAD -- "$version_dir" | grep -E '^[MD]\s' || true)
if [ -n "$CHANGES" ]; then
VIOLATIONS="$VIOLATIONS
$CHANGES"
fi
done
if [ -n "$VIOLATIONS" ]; then
echo "Schema Protection Error!"
echo "=================================================="
echo ""
echo "Cannot modify or delete files in released schema versions:"
echo "$VIOLATIONS"
echo ""
echo "Schema versions are immutable once released to main."
echo "To make changes, create a new schema version directory."
echo "(schema/latest/ can be updated freely)"
exit 1
fi
echo "Schema protection check passed"
- name: Skip (no relevant changes)
if: needs.changes.outputs.ci != 'true'
run: echo "No schema changes, skipping"
provenance-checks:
name: Provenance checks (RFC-013)
needs: changes
runs-on: ubuntu-latest
timeout-minutes: 5
if: needs.changes.outputs.ci == 'true'
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
if: needs.changes.outputs.ci == 'true'
with:
fetch-depth: 0
- name: Check schema versions registered in schema.rs
if: needs.changes.outputs.ci == 'true'
run: |
# Every schema/vX.Y.Z/ directory must appear in the embedded schema table.
VALIDATE_RS="packages/engine/src/schema.rs"
MISSING=""
for dir in schema/v*/; do
version=$(basename "$dir")
if ! grep -q "\"$version\"" "$VALIDATE_RS"; then
MISSING="$MISSING $version\n"
fi
done
if [ -n "$MISSING" ]; then
echo "ERROR: Schema versions not registered in schema.rs:"
printf "$MISSING"
exit 1
fi
echo "All schema versions registered in schema.rs"
- name: Check corpus schema references
if: needs.changes.outputs.ci == 'true'
run: |
# Every $schema URL in corpus YAML must reference a known schema version
KNOWN_VERSIONS=$(ls -d schema/v*/ | xargs -n1 basename)
FAILED=false
for file in $(find corpus/regulation -name "*.yaml"); do
SCHEMA_URL=$(grep '^\$schema:' "$file" | head -1 | sed 's/\$schema: *//')
# Handle multi-line $schema with >-
if [ -z "$SCHEMA_URL" ] || [ "$SCHEMA_URL" = ">-" ]; then
SCHEMA_URL=$(sed -n '2p' "$file" | sed 's/^ *//' | grep 'schema.json' || true)
if [ -z "$SCHEMA_URL" ]; then
SCHEMA_URL=$(sed -n '3p' "$file" | sed 's/^ *//' | grep 'schema.json' || true)
fi
fi
if [ -z "$SCHEMA_URL" ]; then
echo "WARN: $file has no \$schema URL"
continue
fi
FOUND=false
for ver in $KNOWN_VERSIONS; do
if echo "$SCHEMA_URL" | grep -q "$ver"; then
FOUND=true
break
fi
done
if [ "$FOUND" = "false" ]; then
echo "ERROR: $file references unknown schema version: $SCHEMA_URL"
FAILED=true
fi
done
if [ "$FAILED" = "true" ]; then
exit 1
fi
echo "All corpus schema references valid"
- name: Check schema/latest symlink
if: needs.changes.outputs.ci == 'true'
run: |
TARGET=$(readlink schema/latest)
HIGHEST=$(ls -d schema/v*/ | xargs -n1 basename | sort -V | tail -1)
if [ "$TARGET" != "$HIGHEST" ]; then
echo "ERROR: schema/latest points to $TARGET but highest version is $HIGHEST"
exit 1
fi
echo "schema/latest correctly points to $HIGHEST"
- name: Check engine version bump on source changes
if: needs.changes.outputs.ci == 'true' && github.event_name == 'pull_request'
run: |
git fetch origin main
# Check if engine source files changed (excluding binaries and tests)
ENGINE_CHANGES=$(git diff --name-only origin/main...HEAD -- \
'packages/engine/src/*.rs' \
'packages/engine/src/**/*.rs' \
':!packages/engine/src/bin/*' \
':!packages/engine/src/wasm.rs' || true)
if [ -z "$ENGINE_CHANGES" ]; then
echo "No engine source changes, skipping version check"
exit 0
fi
# Compare engine version between main and this branch
MAIN_VERSION=$(git show origin/main:packages/engine/Cargo.toml 2>/dev/null \
| grep '^version' | head -1 | sed 's/.*"\(.*\)"/\1/')
PR_VERSION=$(grep '^version' packages/engine/Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
if [ "$MAIN_VERSION" = "$PR_VERSION" ]; then
echo "WARNING: Engine source files changed but version is still $PR_VERSION"
echo "Changed files:"
echo "$ENGINE_CHANGES"
echo ""
echo "Consider bumping the engine version in packages/engine/Cargo.toml"
# Warning only, not a hard failure (yet)
else
echo "Engine version bumped: $MAIN_VERSION -> $PR_VERSION"
fi
- name: Skip (no relevant changes)
if: needs.changes.outputs.ci != 'true'
run: echo "No relevant changes, skipping provenance checks"
cross-law-integrity:
name: Cross-law integrity
needs: changes
runs-on: ubuntu-latest
timeout-minutes: 5
if: needs.changes.outputs.ci == 'true'
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v6
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6
with:
python-version: '3.x'
- name: Install pyyaml
run: pip install pyyaml
# Fail-closed gate over the corpus: MISPLACED / DANGLING / PLAIN-PARAM
# source bindings and IMPL-DANGLING / IMPL-NO-DATE implements declarations
# are all modelling errors. The script exits non-zero on any finding.
- name: Check cross-law binding integrity
run: |
python3 script/cross-law-integriteit.py corpus/regulation
pre-commit:
name: Pre-commit
runs-on: ubuntu-latest
timeout-minutes: 15
env:
# sccache caches compilations across runs; the GHA backend is wired up by
# the sccache-action step below. Incremental must be off for sccache.
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
CARGO_INCREMENTAL: "0"
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6
with:
python-version: '3.12'
- name: Install just
uses: extractions/setup-just@53165ef7e734c5c07cb06b3c8e7b647c5aa16db3 # v4.0.0
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
components: rustfmt, clippy
- name: Install mold linker
run: sudo apt-get update && sudo apt-get install -y mold
- name: Run sccache
uses: mozilla-actions/sccache-action@9e7fa8a12102821edf02ca5dbea1acd0f89a2696 # v0.0.10
- name: Cache cargo registry (shared)
uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('packages/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-registry-
# Leak-guard laag 2: voorzie het git-ignored aanvulbestand met de casus-/
# sector-specifieke patronen uit een secret, zodat de skills-no-casus hook in
# CI op volle sterkte draait zonder die patronen in de publieke repo te zetten.
# De waarde wordt nooit gelogd. Ontbreekt het secret (bv. PR's vanuit forks,
# die geen toegang tot secrets hebben), dan draait alleen laag 1 — expliciet
# gelogd, zodat een groene run nooit "volledig gescand" suggereert.
- name: Provision leak-guard supplement (laag 2)
env:
SKILLS_CASUS_DENYLIST: ${{ secrets.SKILLS_CASUS_DENYLIST }}
run: |
if [ -n "${SKILLS_CASUS_DENYLIST:-}" ]; then
printf '%s\n' "$SKILLS_CASUS_DENYLIST" > script/.skills-casus-denylist.local
echo "Leak-guard laag 2 voorzien vanuit secret (volle sterkte)."
else
echo "WAARSCHUWING: geen SKILLS_CASUS_DENYLIST secret — alleen laag 1 (domein-loze vorm) draait. Casus-/sector-patronen worden in deze run NIET gescand."
fi
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
test:
name: Test
needs: changes
runs-on: ubuntu-latest
timeout-minutes: 15
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
CARGO_INCREMENTAL: "0"
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
if: needs.changes.outputs.ci == 'true'
- name: Install just
if: needs.changes.outputs.ci == 'true'
uses: extractions/setup-just@53165ef7e734c5c07cb06b3c8e7b647c5aa16db3 # v4.0.0
- name: Install Rust toolchain
if: needs.changes.outputs.ci == 'true'
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
- name: Install mold linker
if: needs.changes.outputs.ci == 'true'
run: sudo apt-get update && sudo apt-get install -y mold
- name: Run sccache
if: needs.changes.outputs.ci == 'true'
uses: mozilla-actions/sccache-action@9e7fa8a12102821edf02ca5dbea1acd0f89a2696 # v0.0.10
- name: Cache cargo registry (shared)
if: needs.changes.outputs.ci == 'true'
uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('packages/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-registry-
- name: Run all tests
if: needs.changes.outputs.ci == 'true'
run: just test-all
- name: Setup Node.js
if: needs.changes.outputs.ci == 'true'
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
with:
node-version: '22'
registry-url: https://npm.pkg.github.com
cache: 'npm'
cache-dependency-path: package-lock.json
- name: Install frontend dependencies
if: needs.changes.outputs.ci == 'true'
run: npm ci
- name: BDD grammar codegen is in sync
if: needs.changes.outputs.ci == 'true'
run: |
node bdd/codegen/gen-js.mjs
git diff --exit-code frontend/src/gherkin/grammar.generated.js \
|| { echo "::error::grammar.generated.js is stale — run 'just bdd-codegen' and commit the result"; exit 1; }
- name: Run frontend tests
if: needs.changes.outputs.ci == 'true'
# Directory form (`-w frontend`) rather than the package name, so a
# rename of the `name` field can't silently skip the tests.
run: npm test -w frontend
- name: Run shared-package tests
if: needs.changes.outputs.ci == 'true'
run: npm test -w packages/frontend-shared
- name: Skip (no relevant changes)
if: needs.changes.outputs.ci != 'true'
run: echo "No relevant changes, skipping tests"
wasm:
name: WASM Build
needs: changes
runs-on: ubuntu-latest
timeout-minutes: 15
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
CARGO_INCREMENTAL: "0"
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
if: needs.changes.outputs.ci == 'true'
- name: Install Rust toolchain
if: needs.changes.outputs.ci == 'true'
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
targets: wasm32-unknown-unknown
- name: Run sccache
if: needs.changes.outputs.ci == 'true'
uses: mozilla-actions/sccache-action@9e7fa8a12102821edf02ca5dbea1acd0f89a2696 # v0.0.10
- name: Cache cargo registry (shared)
if: needs.changes.outputs.ci == 'true'
uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('packages/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-registry-
# wasm32 links with rust-lld, not mold — so no mold here; the default
# RUSTFLAGS (-Dwarnings, no mold link-arg) is correct for this target.
- name: Build for WASM
if: needs.changes.outputs.ci == 'true'
run: cd packages/engine && cargo build --target wasm32-unknown-unknown --features wasm
- name: Skip (no relevant changes)
if: needs.changes.outputs.ci != 'true'
run: echo "No relevant changes, skipping WASM build"
# Security audit
audit:
name: Security Audit
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
packages: read
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
- name: Cache cargo registry (shared)
uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('packages/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-registry-
- name: Install cargo-deny
run: |
CARGO_DENY_VERSION="0.19.0"
CARGO_DENY_SHA256="0e8c2aa59128612c90d9e09c02204e912f29a5b8d9a64671b94608cbe09e064f"
curl -sSL "https://github.com/EmbarkStudios/cargo-deny/releases/download/${CARGO_DENY_VERSION}/cargo-deny-${CARGO_DENY_VERSION}-x86_64-unknown-linux-musl.tar.gz" \
-o /tmp/cargo-deny.tar.gz
echo "${CARGO_DENY_SHA256} /tmp/cargo-deny.tar.gz" | sha256sum -c -
tar -xz --strip-components=1 -C /usr/local/bin \
-f /tmp/cargo-deny.tar.gz \
"cargo-deny-${CARGO_DENY_VERSION}-x86_64-unknown-linux-musl/cargo-deny"
- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
with:
node-version: '22'
registry-url: https://npm.pkg.github.com
cache: 'npm'
cache-dependency-path: package-lock.json
- name: Install workspace dependencies
run: npm ci
- name: Install just
uses: extractions/setup-just@53165ef7e734c5c07cb06b3c8e7b647c5aa16db3 # v4.0.0
- name: Run security audit
run: just audit
# One matrixed job for the per-package fmt/lint/check pipelines that only
# differ in the `just <package>-*` recipe names. The job runs when either
# package changed; per-step guards on the package's own changes filter make
# the matrix leg of an unchanged package a no-op (every step skips), so an
# admin-only change does not actually build editor-api and vice versa.
package-checks:
name: Package checks (${{ matrix.package }})
needs: changes
if: >-
needs.changes.outputs.admin == 'true' ||
needs.changes.outputs['editor-api'] == 'true'
runs-on: ubuntu-latest
timeout-minutes: 15
permissions:
contents: read
# Needed by the admin leg to install @nldd packages from
# npm.pkg.github.com; harmless (read-only) for editor-api.
packages: read
strategy:
fail-fast: false
matrix:
package: [admin, editor-api]
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
CARGO_INCREMENTAL: "0"
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
if: needs.changes.outputs[matrix.package] == 'true'
- name: Install Rust toolchain
if: needs.changes.outputs[matrix.package] == 'true'
uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
components: rustfmt, clippy
- name: Cache cargo registry (shared)
if: needs.changes.outputs[matrix.package] == 'true'
uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('packages/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-registry-
- name: Install mold linker
if: needs.changes.outputs[matrix.package] == 'true'
run: sudo apt-get update && sudo apt-get install -y mold
- name: Run sccache
if: needs.changes.outputs[matrix.package] == 'true'
uses: mozilla-actions/sccache-action@9e7fa8a12102821edf02ca5dbea1acd0f89a2696 # v0.0.10
- name: Install just
if: needs.changes.outputs[matrix.package] == 'true'
uses: extractions/setup-just@53165ef7e734c5c07cb06b3c8e7b647c5aa16db3 # v4.0.0
- name: Check formatting
if: needs.changes.outputs[matrix.package] == 'true'
run: just ${{ matrix.package }}-fmt
- name: Run clippy
if: needs.changes.outputs[matrix.package] == 'true'
run: just ${{ matrix.package }}-lint
- name: Run cargo check
if: needs.changes.outputs[matrix.package] == 'true'
run: just ${{ matrix.package }}-check
# The remaining steps are admin-only: editor-api has no `just
# editor-api-test` recipe and no frontend build of its own.
- name: Run tests
if: matrix.package == 'admin' && needs.changes.outputs.admin == 'true'
run: just admin-test
- name: Setup Node.js
if: matrix.package == 'admin' && needs.changes.outputs.admin == 'true'
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
with:
node-version: '22'
registry-url: https://npm.pkg.github.com
cache: 'npm'
cache-dependency-path: package-lock.json
- name: Install admin frontend dependencies
if: matrix.package == 'admin' && needs.changes.outputs.admin == 'true'
run: npm ci
- name: Build admin frontend
if: matrix.package == 'admin' && needs.changes.outputs.admin == 'true'
# Directory form, consistent with the frontend test step above.
run: npm run build -w packages/admin/frontend-src
- name: Skip (no relevant changes)
if: needs.changes.outputs[matrix.package] != 'true'
run: echo "No ${{ matrix.package }} changes, skipping"
docs-a11y:
name: Docs accessibility gate
needs: changes
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- name: Install just
if: needs.changes.outputs.docs == 'true'
uses: extractions/setup-just@53165ef7e734c5c07cb06b3c8e7b647c5aa16db3 # v4.0.0
- name: Setup Node.js
if: needs.changes.outputs.docs == 'true'
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
with:
node-version: '22'
cache: 'npm'
cache-dependency-path: docs/package-lock.json
- name: Install docs dependencies
if: needs.changes.outputs.docs == 'true'
working-directory: docs
run: npm ci
# rehype-mermaid renders diagrams with a headless Chromium at build time,
# and pa11y drives Chromium too — install it (with system deps) the same
# way docs/Dockerfile does.
- name: Install Playwright Chromium
if: needs.changes.outputs.docs == 'true'
working-directory: docs
run: npx playwright install --with-deps chromium
# `just docs-a11y` builds the site, regenerates .pa11yci from the build,
# then runs pa11y-ci (htmlcs + axe) over every page. Non-zero exit fails.
- name: Run accessibility gate
if: needs.changes.outputs.docs == 'true'
run: just docs-a11y
- name: Skip (no relevant changes)
if: needs.changes.outputs.docs != 'true'
run: echo "No docs changes, skipping accessibility gate"