opensensor
diff --git a/‎benchmarks/basecalling/run_pod5.sh‎
Lines changed: 58 additions & 0 deletions b/‎benchmarks/basecalling/run_pod5.sh‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎benchmarks/crispr/run_chr.sh‎
Lines changed: 78 additions & 0 deletions b/‎benchmarks/crispr/run_chr.sh‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎docs/ENERGY_METHODOLOGY.md‎
Lines changed: 108 additions & 20 deletions b/‎docs/ENERGY_METHODOLOGY.md‎
Lines changed: 108 additions & 20 deletions
diff --git a/‎reference/basecalling/bionpu-reference.fastq‎
Lines changed: 40 additions & 0 deletions b/‎reference/basecalling/bionpu-reference.fastq‎
Lines changed: 40 additions & 0 deletions
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# bionpu — AIE2P-accelerated genomics with reference-equivalence verification.
+# Copyright (C) 2026 OpenSensor / Matt Davis <matt@opensensor.io>
+# SPDX-License-Identifier: GPL-3.0-only
+#
+# Basecall a pod5 read set on the NPU and verify byte-equality against
+# a Dorado reference FASTQ.
+#
+# Usage:
+#   benchmarks/basecalling/run_pod5.sh <pod5_path>
+#
+# Status: v0.1 ships a skeleton — the NPU basecalling pipeline lives in
+# the bionpu source tree (src/bionpu/kernels/basecalling/) but the CLI
+# wrapper that drives the full streaming pipeline is v0.2 scope. For
+# v0.1 the supported flow is to run the per-kernel make targets
+# manually and then verify byte-equality with `bionpu verify
+# basecalling`.
+
+set -euo pipefail
+
+POD5="${1:-}"
+OUT_DIR="benchmarks/results/basecalling/$(basename "${POD5%.pod5}" 2>/dev/null || echo unknown)"
+
+if [[ -z "${POD5}" ]]; then
+    cat <<EOF
+Usage: $0 <pod5_path>
+
+  pod5_path   Path to a pod5 read set (Nanopore raw signal).
+
+Pre-computed reference FASTQs are at reference/basecalling/.
+EOF
+    exit 1
+fi
+
+REF_FASTQ="reference/basecalling/dorado-reference.fastq"
+NPU_FASTQ="${OUT_DIR}/npu.fastq"
+
+mkdir -p "${OUT_DIR}"
+
+echo "==> $0 ${POD5}"
+echo "    output dir:    ${OUT_DIR}"
+echo "    reference:     ${REF_FASTQ}"
+
+cat <<EOF
+
+[v0.1 placeholder]
+The end-to-end driver is v0.2 scope. For v0.1, run the kernels
+manually and then call:
+
+    bionpu verify basecalling "${NPU_FASTQ}" "${REF_FASTQ}"
+
+The kernels live at:
+    src/bionpu/kernels/basecalling/{conv_stem,lstm_cell_*,linear_projection,...}
+
+The Dorado reference FASTQ is committed at reference/basecalling/ when
+it has been generated on a host with Dorado available (the build is
+not redistributable; see Dorado's license).
+EOF
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# bionpu — AIE2P-accelerated genomics with reference-equivalence verification.
+# Copyright (C) 2026 OpenSensor / Matt Davis <matt@opensensor.io>
+# SPDX-License-Identifier: GPL-3.0-only
+#
+# Run a CRISPR off-target scan against a target chromosome and verify
+# byte-equality against a Cas-OFFinder reference.
+#
+# Usage:
+#   benchmarks/crispr/run_chr.sh <chr> [<guides_file>]
+#
+# Example:
+#   benchmarks/crispr/run_chr.sh chr22 reference/crispr/guides_chr22.txt
+#
+# Status: v0.1 ships a skeleton — the NPU scan invocation lives in the
+# bionpu source tree (src/bionpu/kernels/crispr/) but the CLI wrapper
+# that drives a full chromosome scan from the command line is v0.2
+# scope. For v0.1 the supported flow is to run the per-kernel make
+# target manually and then verify byte-equality with `bionpu verify
+# crispr`.
+
+set -euo pipefail
+
+CHR="${1:-}"
+GUIDES="${2:-reference/crispr/guides_${CHR}.txt}"
+OUT_DIR="benchmarks/results/crispr/${CHR}"
+
+if [[ -z "${CHR}" ]]; then
+    cat <<EOF
+Usage: $0 <chr> [<guides_file>]
+
+  chr           Target chromosome (chr1 ... chr22, chrX, chrY).
+  guides_file   Newline-separated list of 20-nt guide spacers.
+                Defaults to reference/crispr/guides_<chr>.txt.
+
+Example:
+    $0 chr22
+
+End-to-end pipeline (v0.2 scope; v0.1 ships a manual workflow):
+    1. Scan ${CHR} with the NPU PAM filter + match kernel.
+    2. Run cas-offinder on the same input as the CPU reference.
+    3. bionpu verify crispr <npu.tsv> <ref.tsv>
+EOF
+    exit 1
+fi
+
+REF_TSV="reference/crispr/casoffinder-${CHR}-canonical.tsv"
+NPU_TSV="${OUT_DIR}/npu.tsv"
+
+mkdir -p "${OUT_DIR}"
+
+echo "==> $0 ${CHR}"
+echo "    guides:        ${GUIDES}"
+echo "    reference TSV: ${REF_TSV}"
+echo "    output dir:    ${OUT_DIR}"
+
+cat <<EOF
+
+[v0.1 placeholder]
+The end-to-end driver is v0.2 scope. For v0.1, run the kernels
+manually (one-time, ~30 s build) and then call:
+
+    bionpu verify crispr "${NPU_TSV}" "${REF_TSV}"
+
+The verify command exits 0 on byte-equality and 1 on divergence.
+
+The kernels live at:
+    src/bionpu/kernels/crispr/{pam_filter,match_multitile_memtile,...}
+
+The CPU reference is built from cas-offinder; pre-computed canonical
+TSVs are at reference/crispr/.
+
+When the v0.2 driver lands, this script will:
+    1. Build (or use cached) NPU artifacts for the kernels.
+    2. Dispatch the scan against \${CHR} via bionpu.dispatch.
+    3. Run cas-offinder on the same input.
+    4. Call bionpu verify crispr ... and exit with its return code.
+EOF
@@ -1,22 +1,110 @@
 # Energy methodology
 
-> Status: shell — populated from `bionpu/bench/POWER_DOMAINS.md` and
-> `bionpu/bench/energy/SANITY-LOG.md` during the v0.1 extraction. Until
-> filled, the v0.1 `bench` numbers in this repo should be treated as
-> wall-clock only.
-
-This document will cover:
-
-- AMD RAPL counter access path (`/sys/class/powercap/intel-rapl:*` on
-  Ryzen-AI HX systems; the AMD-specific `package-0` / `package-1`
-  domain layout).
-- Sustained-load measurement — pre-warmup window, measurement window,
-  drift-detection window — the three-phase shape that distinguishes
-  steady-state energy from cold-start spikes.
-- Spec-bracketing assumptions — what TDP range we assume the package
-  is in, how we cross-check against the documented Ryzen-AI 9 HX SKU
-  TDP envelope, where the assumption fails.
-- NPU-specific power accounting — what's measurable today vs what is
-  inferred from the package counter delta with NPU idle vs NPU active.
-- Reproducibility envelope — what hardware revisions / firmware
-  versions / governor settings the documented numbers are valid for.
+This document is the public-facing methodology for the energy figures
+reported in `benchmarks/results/`. It exists because cross-device
+energy comparisons (CPU vs GPU vs NPU joules-per-Mbp / joules-per-scan)
+are easy to misuse — every device's "energy" is a different rail with
+different includes, different sampling rates, and different known
+instrumentation gaps. We document those explicitly so a reader can
+decide whether the comparison is honest.
+
+## TL;DR
+
+| Device | Counter source | Includes | Excludes | Sampling |
+|---|---|---|---|---|
+| **CPU** | `/sys/class/powercap/{intel-rapl,amd-rapl-msr}:0/energy_uj` (RAPL) | All P-cores + E-cores + L3 / uncore on Zen 5 package | DRAM (no separate AMD RAPL DRAM domain), discrete GPU, NPU subdomain, platform IO | ≥10 Hz, monotonic counter integrated start-to-end |
+| **GPU** | `nvidia-smi --query-gpu=power.draw,total_energy_consumption` | Compute cores + GDDR/HBM memory + PCIe interface (board side) + VRMs | Host CPU, host DRAM, NPU | ~1 Hz (driver-reported); prefer driver-integrated `total_energy_consumption` over trapezoidal-integrated `power.draw` |
+| **NPU** | `xrt-smi examine -r platform` (firmware-internal estimate) | AIE compute tiles in the active hardware-context partition | Host SoC package (CPU; on the RAPL rail), host DRAM, Radeon iGPU on the same package, platform IO outside the AIE partition | 10 Hz (capped by ~40 ms `xrt-smi` invocation cost); trapezoidal-integrated to joules |
+
+A figure caption that compares any two of these without listing the
+includes / excludes is not honest enough to publish.
+
+## Reference documents
+
+The full methodology lives in three places in this repo:
+
+1. **[`src/bionpu/bench/POWER_DOMAINS.md`](../src/bionpu/bench/POWER_DOMAINS.md)**
+   — exhaustive per-device specification: rail name, target hardware,
+   includes / excludes, sampling rate, source path, fallback source,
+   known issues, cross-compare caveats. Front-matter is mechanically
+   lint-checked so every device entry is fully populated.
+
+2. **[`src/bionpu/bench/energy/SANITY-LOG.md`](../src/bionpu/bench/energy/SANITY-LOG.md)**
+   — the calibration log. Records the host system the numbers were
+   measured on, the kernel + module versions, the probe results
+   (which counters are AVAILABLE / UNAVAILABLE on this host), and
+   the resolution paths for the UNAVAILABLE cases. Future calibration
+   runs append; never overwrite.
+
+3. **[`src/bionpu/bench/UNITS.md`](../src/bionpu/bench/UNITS.md)** —
+   units convention (J vs Wh vs J/Mbp), measurement passport schema,
+   and the rules for combining same-rail / cross-rail figures.
+
+## Sustained-load measurement
+
+Every benchmark in `benchmarks/` measures energy across three
+windows, in this order:
+
+1. **Pre-warmup** (`pre_warmup_seconds`, default 10 s): host runs the
+   workload at full duty cycle to bring caches, governors, NPU
+   firmware, and GPU clocks to their steady-state. Energy in this
+   window is **not** counted.
+
+2. **Measurement** (`measurement_seconds`, default 30 s): the actual
+   integration window. The energy counter is sampled at the start
+   boundary, sampled again at the end boundary, and sampled at
+   least once mid-window to detect counter wraparound.
+
+3. **Drift-detection** (`drift_seconds`, default 5 s): a
+   final-window sample taken `drift_seconds` after the measurement
+   window ends. If the per-second power in the drift window deviates
+   from the measurement window by > drift threshold (default 5 %),
+   the measurement passport flags the run as `drift_detected: true`
+   and the published number is the measurement-window value with a
+   drift-warning annotation.
+
+This three-phase shape distinguishes steady-state energy from
+cold-start spikes; almost all "the NPU uses X joules" claims that
+report a single-shot wall-time figure are conflating the warmup
+transient with the steady-state, sometimes by a factor of 2-3×.
+
+## Spec-bracketing assumptions
+
+The published energy numbers are reported alongside the
+manufacturer-spec TDP envelope of the device's silicon, so a reader
+can check whether the measurement falls in a plausible range:
+
+- **CPU** — Ryzen AI 9 HX nominal 28-54 W TDP envelope.
+- **GPU** — per-board TGP from the OEM, recorded per run in the
+  measurement passport.
+- **NPU** — AIE2P partition at sustained load typically falls in
+  the 1.5-3.5 W range; published measurements outside this band
+  are flagged as out-of-spec and require a calibration entry in
+  `SANITY-LOG.md` before publication.
+
+The spec envelope is a sanity gate, not a target. A measurement that
+falls in-band is not automatically valid; a measurement that falls
+out-of-band is not automatically wrong (silicon binning,
+firmware-state, or governor changes can move the steady-state
+envelope by ±20 %). The bracket is published so readers can
+challenge the number.
+
+## When a counter is UNAVAILABLE
+
+Per the rules in `POWER_DOMAINS.md`, if any counter probe fails
+(permission denied, sysfs path missing, driver too old) the harness
+emits a measurements record with that device's energy field set to
+`null` and a `reason_unavailable` string. The harness MUST NEVER
+fabricate a reading. A run with an UNAVAILABLE counter still records
+wall-time; the published comparison just drops that device from the
+energy column with a footnote pointing at the sanity-log entry that
+explains why.
+
+## Reproducibility envelope
+
+The numbers in `benchmarks/results/` are valid for the host
+configuration recorded at the head of `SANITY-LOG.md`. A different
+host (different kernel, different driver, different governor) is a
+different measurement. We do not ship "expected energy" thresholds
+that other hosts must hit; we ship the reproducible measurement
+**method** so other hosts can produce their own numbers.
@@ -0,0 +1,40 @@
+@8a391d95-9203-5f59-b83f-220eed61908d
+GA
++
+!!
+@17548974-2486-5717-afde-9739ce6fa468
+AAAAA
++
+!!!!!
+@61b9e826-e529-527f-97f1-0dcbb7ba1a8f
+GA
++
+!!
+@dc04946a-59c2-508d-8f73-f5d2b460b2eb
+GAT
++
+!!!
+@7308d02e-b0ba-5be8-9bcc-7bf8c91b5593
+A
++
+!
+@3ecff3e5-b271-5cc3-ad06-a11158b4da57
+AAAA
++
+!!!!
+@4a3ed9d0-0ddf-5842-9912-633ef6d3f640
+GA
++
+!!
+@106871d5-c50a-5c28-9979-801d7caed7b1
+A
++
+!
+@e63163dc-c8b3-5574-afb5-a3922bc64074
+G
++
+!
+@2c8cc84d-40b6-5540-9a6a-089660505fd7
+GA
++
+!!