diff --git a/.ci/benchmarks/baseline.json b/.ci/benchmarks/baseline.json
new file mode 100644
index 0000000..e30304e
--- /dev/null
+++ b/.ci/benchmarks/baseline.json
@@ -0,0 +1,3 @@
+{
+  "benchmarks": []
+}
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000..1e270da
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,39 @@
+name: Benchmark Regression
+
+on:
+  pull_request:
+    branches: [main, master]
+  schedule:
+    - cron: "30 2 * * *"
+  workflow_dispatch:
+
+jobs:
+  benchmark-cpu:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+          pip install pytest pytest-benchmark
+
+      - name: Run benchmark suite
+        run: pytest tests -m "benchmark" --benchmark-json benchmark.json -q
+
+      - name: Enforce regression threshold
+        run: python scripts/ci/check_benchmark_regression.py benchmark.json 0.05
+
+      - name: Upload benchmark report
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-report
+          path: benchmark.json
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..82da6b4
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,34 @@
+name: Docs
+
+on:
+  pull_request:
+    paths:
+      - "docs/**"
+      - "README.md"
+      - ".github/workflows/docs.yml"
+  push:
+    branches: [main, master]
+    paths:
+      - "docs/**"
+      - "README.md"
+  workflow_dispatch:
+
+jobs:
+  docs-build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install docs dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install mkdocs mkdocs-material
+
+      - name: Build docs
+        run: mkdocs build --strict
diff --git a/.github/workflows/gpu-hardware.yml b/.github/workflows/gpu-hardware.yml
new file mode 100644
index 0000000..ff90223
--- /dev/null
+++ b/.github/workflows/gpu-hardware.yml
@@ -0,0 +1,66 @@
+name: GPU Hardware Validation
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 1 * * 0"
+
+jobs:
+  gpu-smoke:
+    name: GPU smoke (${{ matrix.runner_label }})
+    runs-on: [self-hosted, linux, x64, gpu, "${{ matrix.runner_label }}"]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner_label: a100
+            cuda_version: "11.8"
+            gpu_arch: sm_80
+          - runner_label: h100
+            cuda_version: "12.1"
+            gpu_arch: sm_90
+          - runner_label: rtx4090
+            cuda_version: "12.4"
+            gpu_arch: sm_89
+
+    env:
+      EXPECTED_CUDA: ${{ matrix.cuda_version }}
+      EXPECTED_ARCH: ${{ matrix.gpu_arch }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+          pip install pytest
+
+      - name: Verify hardware runtime
+        run: |
+          nvidia-smi
+          python - <<'PY'
+          import os
+          print(f"Expected CUDA: {os.environ['EXPECTED_CUDA']}")
+          print(f"Expected arch: {os.environ['EXPECTED_ARCH']}")
+          PY
+
+      - name: GPU integration and numerical tests
+        run: |
+          pytest tests -m "gpu or numerical" -q
+
+      - name: Performance regression check
+        run: |
+          pytest tests -m "benchmark" --benchmark-json benchmark-${{ matrix.runner_label }}.json -q
+
+      - name: Upload benchmark artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-${{ matrix.runner_label }}
+          path: benchmark-${{ matrix.runner_label }}.json
diff --git a/.github/workflows/hpc-matrix.yml b/.github/workflows/hpc-matrix.yml
new file mode 100644
index 0000000..07f791b
--- /dev/null
+++ b/.github/workflows/hpc-matrix.yml
@@ -0,0 +1,77 @@
+name: HPC Matrix
+
+on:
+  pull_request:
+    branches: [main, master]
+  push:
+    branches: [main, master]
+  schedule:
+    - cron: "0 3 * * *"
+  workflow_dispatch:
+
+jobs:
+  matrix-cpu:
+    name: CPU matrix (py${{ matrix.python-version }} / torch${{ matrix.torch-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        torch-version: ["2.2.2", "2.3.1"]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+          pip install "torch==${{ matrix.torch-version }}" --index-url https://download.pytorch.org/whl/cpu
+          pip install pytest pytest-cov mypy ruff
+
+      - name: Lint and type checks
+        run: |
+          ruff check .
+          ruff format --check .
+          mypy kernels implementations
+
+      - name: Unit tests
+        run: pytest -m "not gpu" --cov=kernels --cov=implementations --cov-fail-under=85
+
+      - name: Integration tests
+        run: pytest tests -m "integration and not gpu" -q
+
+  cuda-compat:
+    name: CUDA compatibility (containerized)
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda-tag: ["11.8.0-runtime-ubuntu22.04", "12.1.1-runtime-ubuntu22.04"]
+
+    container:
+      image: nvidia/cuda:${{ matrix.cuda-tag }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install Python and tooling
+        run: |
+          apt-get update
+          apt-get install -y python3 python3-pip
+          python3 -m pip install --upgrade pip
+          pip install -e .
+          pip install pytest
+
+      - name: Import and fallback smoke check
+        run: |
+          python3 -c "import kernels; print('kernels import ok')"
+          pytest tests -m "fallback" -q
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8433e3a..c40fd68 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
 
 ### Added
 
+- Added a production-grade HPC CI/CD architecture reference document covering deterministic build strategy, GPU matrix validation, release gating, and scaling guidance.
+- Added new GitHub Actions workflows for expanded HPC matrix validation, dedicated GPU hardware testing, benchmark regression gating, and docs build verification.
+- Added a benchmark regression check utility script with baseline support to enforce latency performance thresholds in CI.
+- Added MkDocs configuration to enable strict documentation build checks in CI.
 - Added a scheduled dependency health workflow that validates installation integrity, runs `pip check`, audits vulnerabilities with `pip-audit`, and verifies package imports across Python 3.9-3.12.
 - Added a weekly dependency canary workflow that upgrades core quality tooling to latest versions and runs lint, type checks, tests, and package build validation.
 - Expanded CI workflow to run a Python 3.9-3.12 matrix with linting, format checks, type checking, security scanning, coverage-enforced tests, smoke verification, and package build validation.
@@ -27,6 +31,10 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
 
 ### Added
 
+- Added a production-grade HPC CI/CD architecture reference document covering deterministic build strategy, GPU matrix validation, release gating, and scaling guidance.
+- Added new GitHub Actions workflows for expanded HPC matrix validation, dedicated GPU hardware testing, benchmark regression gating, and docs build verification.
+- Added a benchmark regression check utility script with baseline support to enforce latency performance thresholds in CI.
+- Added MkDocs configuration to enable strict documentation build checks in CI.
 - Initial kernel implementation with deterministic state machine
 - Core types: KernelState, KernelRequest, KernelReceipt, Decision, ReceiptStatus
 - Append-only audit ledger with hash-chained entries
diff --git a/README.md b/README.md
index ef32f08..0ee3032 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,7 @@ python -m kernels --help
 |----------|---------|
 | [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | Component boundaries and data flows |
 | [docs/THREAT_MODEL.md](docs/THREAT_MODEL.md) | Adversary model + mitigations |
+| [docs/pipelines/HPC_CICD_ARCHITECTURE.md](docs/pipelines/HPC_CICD_ARCHITECTURE.md) | Production-grade CI/CD architecture for kernel projects |
 | [docs/FAQ.md](docs/FAQ.md) | Usage clarifications |
 
 ---
diff --git a/docs/README.md b/docs/README.md
index c79de06..7a8a4f1 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,6 +8,7 @@ Documentation for understanding and integrating KERNELS.
 |----------|---------|
 | [ARCHITECTURE.md](ARCHITECTURE.md) | Component boundaries and data flows |
 | [THREAT_MODEL.md](THREAT_MODEL.md) | Adversary model + mitigations |
+| [pipelines/HPC_CICD_ARCHITECTURE.md](pipelines/HPC_CICD_ARCHITECTURE.md) | Advanced CI/CD design for deterministic kernel infrastructure |
 | [FAQ.md](FAQ.md) | Usage clarifications and non-goals |
 
 ## Getting Started
diff --git a/docs/pipelines/HPC_CICD_ARCHITECTURE.md b/docs/pipelines/HPC_CICD_ARCHITECTURE.md
new file mode 100644
index 0000000..6c66301
--- /dev/null
+++ b/docs/pipelines/HPC_CICD_ARCHITECTURE.md
@@ -0,0 +1,117 @@
+# HPC CI/CD Architecture for KERNELS
+
+This document defines a production-grade CI/CD design for compute-kernel style
+repositories where correctness, determinism, and hardware compatibility are first-
+class release gates.
+
+## Pipeline Layers
+
+```text
+Commit
+  -> Static Analysis
+  -> Deterministic Build Matrix
+  -> CPU + GPU Test Matrix
+  -> Security / Supply Chain
+  -> Numerical Validation
+  -> Performance Regression Validation
+  -> Artifact Packaging + Attestation
+  -> Release + Registry Publish
+  -> Production Telemetry Hooks
+```
+
+## Workflow Set
+
+| Workflow | Trigger | Purpose |
+|----------|---------|---------|
+| `ci.yml` | PR + push | Fast quality checks, unit tests, package build |
+| `hpc-matrix.yml` | PR + push + nightly | Expanded matrix with PyTorch/CUDA compatibility checks |
+| `gpu-hardware.yml` | nightly + manual + release candidate | Hardware validation on self-hosted GPU runners |
+| `security.yml` | PR + push + weekly | SAST, dependency audit, secret scan |
+| `benchmark.yml` | PR + nightly | Performance baselines and regression thresholds |
+| `release.yml` | tags | Build, verify, and publish release artifacts |
+| `docs.yml` | docs changes + release | Build and publish documentation |
+
+## Deterministic Build Strategy
+
+Determinism is enforced by:
+
+1. Pinning lock files and toolchain versions.
+2. Building wheels in isolated environments (`python -m build`).
+3. Verifying metadata (`twine check`).
+4. Capturing SBOM and provenance attestations.
+
+## Build Matrix Recommendation
+
+| Axis | Values |
+|------|--------|
+| Python | 3.9, 3.10, 3.11, 3.12 |
+| PyTorch | Supported minor versions |
+| CUDA | 11.8, 12.1, 12.4 |
+| GPU arch | sm_80, sm_86, sm_89, sm_90 |
+| OS | ubuntu-latest, self-hosted GPU Linux |
+
+## Validation Gates
+
+### 1. Static Quality Gate
+
+- `ruff check .`
+- `ruff format --check .`
+- `mypy kernels implementations`
+
+### 2. Correctness Gate
+
+- Unit tests with coverage threshold.
+- Integration tests for runtime loading and fallback behavior.
+- Numerical equivalence checks against reference implementations.
+
+### 3. Hardware Gate
+
+- Run kernel smoke tests on A100 / H100 / RTX-class runners.
+- Enforce CPU fallback tests in every PR.
+
+### 4. Security Gate
+
+- CodeQL
+- `pip-audit`
+- `safety`
+- `gitleaks`
+- Optional Semgrep policy pack
+
+### 5. Performance Gate
+
+- Pytest benchmark suite with historical comparison.
+- Fail CI if median latency regresses beyond threshold (default 5%).
+
+## Release Controls
+
+Release jobs should execute only after all mandatory checks pass:
+
+- Lint / type / tests
+- Security scans
+- Benchmark regression check
+- Docs build
+
+On tag:
+
+1. Build source + wheel distributions.
+2. Generate SBOM (`syft`) and vulnerability report (`grype` or `trivy`).
+3. Publish to PyPI.
+4. Publish container image.
+5. Attach benchmark + security artifacts to GitHub release.
+
+## Rollback Policy
+
+If release validation fails for correctness or benchmark thresholds:
+
+- Mark release candidate as failed.
+- Prevent publication jobs from running.
+- Emit structured summary and incident artifact.
+
+## Scaling Guidance
+
+For large OSS adoption:
+
+- Split fast and slow workflows; protect PR latency.
+- Use distributed/self-hosted GPU pools by architecture label.
+- Cache Python deps, build layers, and benchmark baselines.
+- Nightly deep validation for expensive fuzz + hardware tests.
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..f8c29c7
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,15 @@
+site_name: KERNELS
+site_description: Deterministic control planes and CI/CD governance documentation.
+repo_url: https://github.com/ayais12210-hub/kernels
+
+theme:
+  name: material
+
+nav:
+  - Home: README.md
+  - Architecture: ARCHITECTURE.md
+  - Threat Model: THREAT_MODEL.md
+  - Pipelines:
+      - Phases: pipelines/PHASES.md
+      - Gates: pipelines/GATES.md
+      - HPC CI/CD Architecture: pipelines/HPC_CICD_ARCHITECTURE.md
diff --git a/scripts/ci/check_benchmark_regression.py b/scripts/ci/check_benchmark_regression.py
new file mode 100644
index 0000000..b5f32fe
--- /dev/null
+++ b/scripts/ci/check_benchmark_regression.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Fail CI when benchmark median latency regresses above a threshold.
+
+Usage:
+  python scripts/ci/check_benchmark_regression.py <current-json> <max-regression-ratio>
+
+The baseline file path is `.ci/benchmarks/baseline.json`.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+BASELINE_PATH = Path('.ci/benchmarks/baseline.json')
+
+
+def _load(path: Path) -> dict:
+    with path.open('r', encoding='utf-8') as handle:
+        return json.load(handle)
+
+
+def _median_map(payload: dict) -> dict[str, float]:
+    benchmarks = payload.get('benchmarks', [])
+    result: dict[str, float] = {}
+    for benchmark in benchmarks:
+        name = benchmark.get('name')
+        median = benchmark.get('stats', {}).get('median')
+        if name is None or median is None:
+            continue
+        result[str(name)] = float(median)
+    return result
+
+
+def main() -> int:
+    if len(sys.argv) != 3:
+        print('Usage: check_benchmark_regression.py <current-json> <max-regression-ratio>')
+        return 2
+
+    current_path = Path(sys.argv[1])
+    max_regression = float(sys.argv[2])
+
+    if not current_path.exists():
+        print(f'Current benchmark report not found: {current_path}')
+        return 2
+
+    if not BASELINE_PATH.exists():
+        print(f'Baseline benchmark file not found at {BASELINE_PATH}; skipping regression gate.')
+        return 0
+
+    current = _median_map(_load(current_path))
+    baseline = _median_map(_load(BASELINE_PATH))
+
+    failures: list[str] = []
+    for name, baseline_median in baseline.items():
+        current_median = current.get(name)
+        if current_median is None:
+            failures.append(f'{name}: missing from current benchmark run')
+            continue
+
+        if baseline_median <= 0:
+            continue
+
+        regression = (current_median - baseline_median) / baseline_median
+        if regression > max_regression:
+            failures.append(
+                f'{name}: baseline={baseline_median:.6f}s current={current_median:.6f}s '
+                f'regression={regression * 100:.2f}% > {max_regression * 100:.2f}%'
+            )
+
+    if failures:
+        print('Performance regression gate failed:')
+        for failure in failures:
+            print(f'- {failure}')
+        return 1
+
+    print('Benchmark regression gate passed.')
+    return 0
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())