lambdaclass · manuelpuebla · Apr 19, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -236,7 +236,7 @@ jobs:
         ./generated/verify_assembly.sh
 
   benchmark-validation:
-    name: Benchmark Validation (v3.16.0)
+    name: Benchmark Validation (v3.19.0 — Rust primary)
     runs-on: ubuntu-latest
     needs: build
 
@@ -266,13 +266,23 @@ jobs:
     - name: Build bench executable
       run: lake build bench
 
-    - name: C scalar validation (standard DFT)
+    - name: Scalar validation (standard DFT, Rust + C)
       run: |
-        echo "=== C Scalar Validation: Standard DFT ==="
+        echo "=== Scalar Validation: Standard DFT — Rust + C co-gate ==="
         # v3.17.0 N317.1: --use-standard removed (default=True since N317.8).
+        # v3.19.0 N319.3.1: Rust promoted to primary; both langs co-validated.
+        # NOTE: --langs accepts comma list ("c,rust"), the literal "both" is not
+        # split by the harness despite what --help suggests.
         python3 Tests/benchmark/benchmark.py \
-          --validation-only --fields babybear,goldilocks --sizes 14
-        echo "C scalar validation PASS"
+          --validation-only --langs c,rust --fields babybear,goldilocks --sizes 14
+        echo "Rust + C scalar validation PASS"
+
+    # v3.19.0 N319.4 (Option B++): arm-neon SIMD validation was DEFERRED because
+    # emitSIMDNTTC used ref_dit (legacy) convention while Python reference uses
+    # DFT standard — first-element divergence was immediate. v3.20.a fixes the
+    # convention (stages.reverse + bitrev prelude), so arm-neon validation is
+    # now sound. See sibling job `arm-neon-validation` below which runs on an
+    # ARM64 runner (NEON intrinsics don't compile on ubuntu-latest x86).
 
     - name: Oracle validation (TRZK vs Plonky3 real)
       run: |
@@ -290,10 +300,131 @@ jobs:
           --fields goldilocks,babybear --sizes 3,6,8,10,14
         echo "Differential fuzzing PASS"
 
+  arm-neon-validation:
+    name: arm-neon Validation (v3.20.a — DFT standard migration)
+    # v3.20.a: GitHub Actions ARM64 runner. TRZK emitSIMDNTTC emits NEON intrinsics
+    # (vdupq_n_u32, vmull_u32, etc.) that require `arm_neon.h` + aarch64 target —
+    # not available on ubuntu-latest (x86). This sibling job isolates the ARM-
+    # specific correctness gate introduced by v3.20.a (§14.11.a addendum:
+    # correctness gap §8c closed, gate re-defined; 820 μs perf target deferred
+    # to v3.20.b B3.5, see BENCHMARKS.md §8d).
+    #
+    # SCOPE INTENCIONAL: esta job corre SOLO el `benchmark.py --validation-only
+    # --hardware arm-neon` step (BabyBear N=14 byte-equivalence vs DFT standard).
+    # `differential_fuzz.py` NO corre aquí — ya se ejecuta en el job
+    # `benchmark-validation` (x86, ubuntu-latest) con 1150/1150 PASS cubriendo
+    # TRZK-C vs Plonky3 vs Python naive. La semántica cross-platform queda
+    # validada por ese fuzz triangular; el ARM job solo necesita certificar que
+    # el codegen NEON produce output byte-igual al scalar DFT standard para el
+    # mismo input. Correr fuzz también en ARM sería redundante y duplicaría
+    # ~15 min de CI sin nueva cobertura (la dif entre arm-neon y Plonky3/naive
+    # es exactamente lo que `--validation-only --hardware arm-neon` ya chequea
+    # contra la referencia Python). NO "arreglar" la ausencia agregando fuzz
+    # aquí sin revisar este comentario primero.
+    runs-on: ubuntu-24.04-arm
+    needs: build
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Install elan
+      run: |
+        curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y --default-toolchain none
+        echo "$HOME/.elan/bin" >> $GITHUB_PATH
+
+    - name: Cache Lake packages
+      uses: actions/cache@v4
+      with:
+        path: |
+          ~/.elan
+          .lake
+        key: ${{ runner.os }}-arm64-lake-${{ hashFiles('lean-toolchain', 'lakefile.lean', 'lake-manifest.json') }}
+        restore-keys: |
+          ${{ runner.os }}-arm64-lake-
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update && sudo apt-get install -y clang python3 rustc cargo
+
+    - name: Build bench executable
+      run: lake build bench
+
+    - name: arm-neon C validation (DFT standard, N=14)
+      run: |
+        echo "=== arm-neon Validation: TRZK SIMD = scalar DFT standard ==="
+        # v3.20.a correctness gate: emitSIMDNTTC output must be byte-equivalent
+        # to emitCFromPlanStandard (scalar arm-scalar) for the same input. Prior
+        # to v3.20.a this FAILed at index[0]; post-migration passes 3/3 locally.
+        python3 Tests/benchmark/benchmark.py \
+          --validation-only --hardware arm-neon --langs c \
+          --fields babybear --sizes 14
+        echo "arm-neon C validation PASS"
+
+  # v3.20.b B6.3 — Batch NTT correctness validation
+  batch-validation:
+    name: Batch NTT Validation (v3.20.b — emitCFromPlanBatch differential fuzz)
+    # v3.20.b B6.3 sibling job: asserts TRZK batch NTT emission
+    # (emitCFromPlanBatch, B4 loop-wrapping path — production default
+    # post-B4.5 MVP escape) produces byte-equivalent output to B independent
+    # single-vector invocations. Same C binary comparison (avoids cross-language
+    # variance). 9000/9000 PASS baseline achieved locally (9 combos × 1000 iters).
+    #
+    # SCOPE: differential_fuzz.py --mode batch — C-level comparison of
+    # batch_fn(data_base, tw, B) vs B × single_fn(data_base + b*N, tw). Tests
+    # offset arithmetic in batchOffsetAssign + outer for-loop semantics in
+    # lowerNTTFromPlanBatch (B4 delivery). Does NOT test packed dispatch
+    # (B4.5 MVP escape — packed path is opt-in only, not wired in production).
+    #
+    # BRANCH FILTER: inherits from push/pull_request triggers at workflow root.
+    # NOTE: feat/v3.19-simd (current v3.20.b stack) may not match the filter
+    # at root; that's a separate config issue (branch coverage for feature
+    # stacks) and not blocking for this job's semantic correctness. When the
+    # branch lands on main/master via PR#23, this job activates automatically.
+    runs-on: ubuntu-24.04-arm
+    needs: build
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Install elan
+      run: |
+        curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y --default-toolchain none
+        echo "$HOME/.elan/bin" >> $GITHUB_PATH
+
+    - name: Cache Lake packages
+      uses: actions/cache@v4
+      with:
+        path: |
+          ~/.elan
+          .lake
+        key: ${{ runner.os }}-arm64-lake-${{ hashFiles('lean-toolchain', 'lakefile.lean', 'lake-manifest.json') }}
+        restore-keys: |
+          ${{ runner.os }}-arm64-lake-
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update && sudo apt-get install -y clang python3
+
+    - name: Build bench executable
+      run: lake build bench
+
+    - name: Differential fuzz batch mode (9000/9000 target)
+      run: |
+        echo "=== Batch Validation: TRZK batch = B × single-vector ==="
+        # v3.20.b B6.2 gate: 1000 iter × 3 sizes × 3 widths = 9000 iters.
+        # Compile flag: -mcpu=apple-m1 (actually emit_code harness compiles
+        # on target ARM64; runner is ubuntu-24.04-arm so native is OK).
+        python3 Tests/benchmark/differential_fuzz.py \
+          --mode batch --seed 42 \
+          --sizes 8,10,14 --batch-width 4,8,16 --iters 1000
+        echo "Batch validation PASS"
+
   summary:
     name: CI Summary
     runs-on: ubuntu-latest
-    needs: [build, phase0-tests, goldilocks-tests, sanitizers, avx2-tests, avx2-qa, benchmark-validation]
+    needs: [build, phase0-tests, goldilocks-tests, sanitizers, avx2-tests, avx2-qa, benchmark-validation, arm-neon-validation, batch-validation]
     if: always()
 
     steps:
@@ -310,6 +441,8 @@ jobs:
         echo "AVX2 Tests:       ${{ needs.avx2-tests.result }}"
         echo "AVX2 QA Suite:    ${{ needs.avx2-qa.result }}"
         echo "Bench Validation: ${{ needs.benchmark-validation.result }}"
+        echo "arm-neon Val:     ${{ needs.arm-neon-validation.result }}"
+        echo "Batch Val:        ${{ needs.batch-validation.result }}"
         echo ""
         if [ "${{ needs.build.result }}" == "success" ] && \
            [ "${{ needs.phase0-tests.result }}" == "success" ] && \

diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,7 @@ Cargo.lock
 # Benchmark output (reproducible artifacts, not committed)
 Tests/benchmark/output/latest/
 Tests/benchmark/output/history/
+Tests/benchmark/output/v3.19_*.json
 
 # Research & design documents (local only, not for remote)
 research/