From 2a691af493e3fbf680e54c1501aeec912b5b2037 Mon Sep 17 00:00:00 2001
From: Fernando Sobreira <fbsobreira@gmail.com>
Date: Thu, 7 May 2026 19:57:34 -0400
Subject: [PATCH 1/6] [KLC-2387] add validator CPU preflight and benchmark
 crypto category

Adds a startup-time SHA-256 throughput gate for validator nodes plus a
crypto benchmark category in klever-benchmark. The preflight refuses to
start a validator measuring below 800 MB/s on a 200 ms self-bench (the
floor below which leader-mode TX processing exceeds the 425 ms protocol
hardware-tolerance window). Operators control enforcement via
preferences.enforceCpuPreflight (defaults to true, including for
existing configs that omit the key); KLEVER_SKIP_CPU_CHECK=1 is an
emergency bypass.

The standalone klever-benchmark gains a Crypto category (SHA-256 1K/16K,
Blake2b, Keccak-256, Ed25519 verify) with CPU feature attestation and a
hard grade-F veto below 500 MB/s SHA-256. JSON output exposes vetoed
and veto_reason for fleet tooling.

Field validation across the validator fleet confirms the SHA-NI gap:
non-SHA-NI guests measure 244-283 MB/s while EPYC hosts measure
1474-1674 MB/s, matching the ~5x ratio that motivated the gate.
---
 cmd/benchmark/CLI.md        |  39 +++++-
 cmd/benchmark/crypto.go     | 200 +++++++++++++++++++++++++++++
 cmd/benchmark/main.go       |   2 +
 cmd/benchmark/report.go     | 182 +++++++++++++++++++++++----
 cmd/benchmark/runner.go     |  11 ++
 cmd/benchmark/score.go      |  90 +++++++++++--
 cmd/benchmark/score_test.go | 181 ++++++++++++++++++++++++++
 cmd/node/PREFLIGHT.md       | 108 ++++++++++++++++
 cmd/node/preflight.go       | 164 ++++++++++++++++++++++++
 cmd/node/preflight_test.go  | 244 ++++++++++++++++++++++++++++++++++++
 cmd/node/startup.go         |  11 ++
 config/node/config.yaml     |  11 ++
 config/prefsConfig.go       |  21 ++++
 13 files changed, 1225 insertions(+), 39 deletions(-)
 create mode 100644 cmd/benchmark/crypto.go
 create mode 100644 cmd/benchmark/score_test.go
 create mode 100644 cmd/node/PREFLIGHT.md
 create mode 100644 cmd/node/preflight.go
 create mode 100644 cmd/node/preflight_test.go

diff --git a/cmd/benchmark/CLI.md b/cmd/benchmark/CLI.md
index 9eec2114..727c4668 100644
--- a/cmd/benchmark/CLI.md
+++ b/cmd/benchmark/CLI.md
@@ -17,6 +17,7 @@ Usage of benchmark:
   -skip-kv            Skip KV store benchmark
   -skip-memory        Skip memory bandwidth and latency benchmark
   -skip-bignum        Skip big-number / FPU benchmark
+  -skip-crypto        Skip crypto benchmark (SHA-256/Blake2b/Keccak/Ed25519)
   -output string      Output format: text or json (default: "text")
   -verbose            Enable verbose logging
   -version            Print version and exit
@@ -34,6 +35,7 @@ Usage of benchmark:
 | **KV Store** | In-memory state-access patterns (80/20 read-write) | ops/s |
 | **Memory** | DRAM bandwidth, random latency, allocator speed | GB/s, ns, M allocs/s |
 | **BigNum / FPU** | 2048-bit modexp/modmul and float64 transcendentals | ops/s |
+| **Crypto / Hashing** | SHA-256 / Blake2b / Keccak-256 / Ed25519 throughput + CPU feature flags | MB/s, ops/s |
 
 ---
 
@@ -57,22 +59,49 @@ Skipped categories are excluded from the denominator so the grade stays fair.
 
 | Category | Weight |
 |----------|--------|
-| Goroutine (CPU) | 200 |
 | Disk I/O | 200 |
 | KV Store | 200 |
-| Network | 150 |
-| Memory | 150 |
-| BigNum / FPU | 100 |
+| Crypto / Hashing | 200 |
+| Goroutine (CPU) | 150 |
+| Network | 100 |
+| Memory | 100 |
+| BigNum / FPU | 50 |
 
 | Grade | % of enabled max | Description |
 |-------|-----------------|-------------|
 | **S** | ≥ 90 % | Elite — top-tier validator hardware |
 | **A** | ≥ 75 % | Excellent — production-ready for high-traffic networks |
 | **B** | ≥ 60 % | Good — suitable for standard validator operation |
-| **C** | ≥ 45 % | Acceptable — meets minimum validator requirements |
+| **C** | ≥ 45 % | Acceptable — meets minimum validator requirements; consider a hardware upgrade |
 | **D** | ≥ 30 % | Marginal — several metrics below recommended levels |
 | **F** | < 30 % | Insufficient — does not meet validator requirements |
 
+### Hard veto: SHA-256 throughput floor
+
+Klever's TX hot path hashes SHA-256 per-TX, per-header, and per-state-entry.
+The protocol tolerates some hardware variance — a leader has a 500 ms
+baseline timeout with a 425 ms lower bound below which validators
+attribute leader failure to weak hardware. To prevent operators from
+deploying nodes that cannot consistently process TXs as leader within
+that window, the Crypto category applies a **hard veto** on the measured
+SHA-256 throughput: hosts whose 16 KiB SHA-256 throughput is below
+**500 MB/s** have their overall grade capped at **F** regardless of total
+points.
+
+The veto is grounded in the measurement, not in any specific CPU feature
+flag. SHA-NI absence is the most common cause of low throughput in
+practice (Skylake-X / Cascade Lake / Haswell on amd64), and the report
+calls this out informationally. The text report highlights the reason;
+the JSON report sets `score.grade = "F"`, populates `score.vetoed=true`
+and `score.veto_reason`, and exposes the underlying CPU flags under
+`crypto.cpu_features`.
+
+**Note on validator startup:** the `klever-node` validator binary applies
+a *stricter* CPU preflight at startup, requiring ≥ 800 MB/s on a 200 ms
+self-bench (vs the 500 MB/s benchmark veto). A host can pass this
+benchmark with a non-`F` grade and still be refused by validator
+startup. See `cmd/node/PREFLIGHT.md` for the rationale.
+
 ---
 
 ## Examples
diff --git a/cmd/benchmark/crypto.go b/cmd/benchmark/crypto.go
new file mode 100644
index 00000000..2217ae10
--- /dev/null
+++ b/cmd/benchmark/crypto.go
@@ -0,0 +1,200 @@
+package main
+
+// RunCryptoBenchmark measures the cryptographic throughput a Kleverchain
+// validator depends on for consensus, transaction hashing, and signature
+// verification. The hashing primitives matter most — Klever's TX hot path
+// hashes SHA-256 per-TX, per-header, and per-state-entry, and the difference
+// between SHA-NI-equipped and SHA-NI-deficient hardware is roughly 5–6× on
+// SHA-256 throughput, which translates almost linearly into wall-time on
+// smart-contract transactions.
+//
+// Metrics:
+//
+//   - SHA256MBps      : 1 KiB blocks; sensitive to SHA-NI startup cost.
+//   - SHA256LargeMBps : 16 KiB blocks; matches the openssl-speed reference
+//     used during the original validator-fleet investigation, and reveals
+//     sustained SHA-NI throughput separately from the small-block path.
+//   - Blake2bMBps     : 16 KiB blocks; exercises the AVX2 path in
+//     golang.org/x/crypto/blake2b.
+//   - Keccak256MBps   : 16 KiB blocks; pure-Go reference (no SIMD), used
+//     as a sanity check — Keccak should be roughly identical across CPUs
+//     of the same generation, so a large gap here means non-CPU factors
+//     are at play (frequency cap, thermal throttle, hypervisor masking).
+//   - Ed25519VerifyOpsPerSec : stdlib Ed25519 signature verification,
+//     SHA-512-bound; complements the SHA-256 numbers.
+//
+// CPU feature attestation is reported alongside the throughput numbers so
+// operators can correlate measured perf to the underlying instruction set.
+//
+// BLS-verify (MCL/herumi) is intentionally not measured here — wiring MCL
+// into the benchmark binary would force a CGO dependency on operators that
+// just want to grade their hardware. The AVX-512 IFMA flag is reported
+// instead as a proxy for the BLS-pairing fast path; that flag has a tight
+// 1.5× correlation with herumi/MCL pairing throughput.
+
+import (
+	"crypto/ed25519"
+	"crypto/rand"
+	"crypto/sha256"
+	"fmt"
+	"hash"
+	"os"
+	"runtime"
+	"strings"
+	"time"
+
+	"github.com/klauspost/cpuid/v2"
+	"golang.org/x/crypto/blake2b"
+	"golang.org/x/crypto/sha3"
+)
+
+const (
+	cryptoBenchDuration = 2 * time.Second
+	cryptoSmallBlock    = 1024      // 1 KiB
+	cryptoLargeBlock    = 16 * 1024 // 16 KiB — matches openssl-speed reference
+)
+
+// CryptoResult holds the per-primitive throughput and CPU feature flags.
+type CryptoResult struct {
+	SHA256MBps             float64 // SHA-256 throughput on 1 KiB blocks
+	SHA256LargeMBps        float64 // SHA-256 throughput on 16 KiB blocks
+	Blake2bMBps            float64 // Blake2b-512 throughput on 16 KiB blocks
+	Keccak256MBps          float64 // Keccak-256 throughput on 16 KiB blocks
+	Ed25519VerifyOpsPerSec float64 // Ed25519 signature verifications per second
+
+	// CPU feature flags — reported but not directly scored. The overall
+	// score applies a hard veto when HasSHA_NI is false on amd64 (see
+	// score.go). The other flags are informational; AVX-512 IFMA in
+	// particular indicates whether the BLS pairing fast path is available.
+	HasSHA_NI     bool
+	HasAVX512IFMA bool
+	HasVAES       bool
+	HasGFNI       bool
+}
+
+// RunCryptoBenchmark executes all crypto sub-benchmarks and detects CPU
+// feature flags. Returns a populated CryptoResult on success.
+func RunCryptoBenchmark() (*CryptoResult, error) {
+	r := &CryptoResult{
+		HasSHA_NI:     hasSHAAcceleration(),
+		HasAVX512IFMA: cpuid.CPU.Has(cpuid.AVX512IFMA),
+		HasVAES:       cpuid.CPU.Has(cpuid.VAES),
+		HasGFNI:       cpuid.CPU.Has(cpuid.GFNI),
+	}
+
+	clearLine("  Crypto: SHA-256 1 KiB blocks (%s)...", cryptoBenchDuration)
+	r.SHA256MBps = benchHashMBps(sha256.New, cryptoSmallBlock)
+
+	clearLine("  Crypto: SHA-256 16 KiB blocks (%s)...", cryptoBenchDuration)
+	r.SHA256LargeMBps = benchHashMBps(sha256.New, cryptoLargeBlock)
+
+	clearLine("  Crypto: Blake2b 16 KiB blocks (%s)...", cryptoBenchDuration)
+	r.Blake2bMBps = benchHashMBps(newBlake2b512, cryptoLargeBlock)
+
+	clearLine("  Crypto: Keccak-256 16 KiB blocks (%s)...", cryptoBenchDuration)
+	r.Keccak256MBps = benchHashMBps(sha3.NewLegacyKeccak256, cryptoLargeBlock)
+
+	clearLine("  Crypto: Ed25519 verify (%s)...", cryptoBenchDuration)
+	ops, err := benchEd25519Verify()
+	if err != nil {
+		return nil, fmt.Errorf("ed25519 verify: %w", err)
+	}
+	r.Ed25519VerifyOpsPerSec = ops
+
+	fmt.Fprintf(os.Stderr, "  %s\r", strings.Repeat(" ", 60))
+	return r, nil
+}
+
+// hasSHAAcceleration reports whether SHA-256 hardware acceleration is
+// available on the current architecture: SHA-NI on amd64, ARMv8 SHA2 on
+// arm64. Returns false on every other architecture.
+func hasSHAAcceleration() bool {
+	switch runtime.GOARCH {
+	case "amd64":
+		return cpuid.CPU.Has(cpuid.SHA)
+	case "arm64":
+		return cpuid.CPU.Has(cpuid.SHA2)
+	default:
+		return false
+	}
+}
+
+// newBlake2b512 returns a fresh Blake2b-512 hash. Wrapped to match the
+// stdlib `func() hash.Hash` constructor signature so it can be passed to
+// benchHashMBps directly. blake2b.New512(nil) only errors on a non-nil key
+// of invalid length; passing nil never errors.
+func newBlake2b512() hash.Hash {
+	h, err := blake2b.New512(nil)
+	if err != nil {
+		panic(fmt.Sprintf("blake2b.New512(nil) unexpectedly errored: %v", err))
+	}
+	return h
+}
+
+// benchHashMBps hashes blockSize-byte buffers for cryptoBenchDuration and
+// returns sustained throughput in MB/s.
+//
+// The buffer is seeded once with crypto/rand so data-dependent hash
+// implementations (Blake2b, Keccak) measure realistic throughput rather
+// than the all-zero special case. SHA-256's amd64/arm64 fast paths are
+// data-independent, so seeding does not alter their numbers.
+//
+// The hot loop checks the deadline once per innerLoop iterations to
+// minimise time.Now() syscall overhead on fast hosts.
+func benchHashMBps(newHash func() hash.Hash, blockSize int) float64 {
+	buf := make([]byte, blockSize)
+	if _, err := rand.Read(buf); err != nil {
+		for i := range buf {
+			buf[i] = byte(i)
+		}
+	}
+	h := newHash()
+	deadline := time.Now().Add(cryptoBenchDuration)
+	start := time.Now()
+	var bytes int64
+	const innerLoop = 256
+	for time.Now().Before(deadline) {
+		for range innerLoop {
+			h.Reset()
+			_, _ = h.Write(buf)
+			_ = h.Sum(nil)
+		}
+		bytes += int64(blockSize) * innerLoop
+	}
+	elapsed := time.Since(start).Seconds()
+	if elapsed <= 0 {
+		return 0
+	}
+	return float64(bytes) / (1024 * 1024) / elapsed
+}
+
+// benchEd25519Verify generates a key pair + signs once, then measures how
+// many verifications per second the host can sustain. Verify is the path
+// that runs on every TX received by the validator, so it is more relevant
+// to fleet behavior than Sign.
+func benchEd25519Verify() (float64, error) {
+	pub, priv, err := ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		return 0, err
+	}
+	msg := make([]byte, 64)
+	if _, err := rand.Read(msg); err != nil {
+		return 0, err
+	}
+	sig := ed25519.Sign(priv, msg)
+
+	deadline := time.Now().Add(cryptoBenchDuration)
+	start := time.Now()
+	var count int64
+	for time.Now().Before(deadline) {
+		if !ed25519.Verify(pub, msg, sig) {
+			return 0, fmt.Errorf("ed25519 self-verify failed unexpectedly")
+		}
+		count++
+	}
+	elapsed := time.Since(start).Seconds()
+	if elapsed <= 0 {
+		return 0, nil
+	}
+	return float64(count) / elapsed, nil
+}
diff --git a/cmd/benchmark/main.go b/cmd/benchmark/main.go
index 1d59d9c3..ff8f6d54 100644
--- a/cmd/benchmark/main.go
+++ b/cmd/benchmark/main.go
@@ -31,6 +31,7 @@ func main() {
 		skipKV        = flag.Bool("skip-kv", false, "Skip KV store benchmark")
 		skipMemory    = flag.Bool("skip-memory", false, "Skip memory bandwidth and latency benchmark")
 		skipBigNum    = flag.Bool("skip-bignum", false, "Skip big-number / FPU benchmark")
+		skipCrypto    = flag.Bool("skip-crypto", false, "Skip crypto benchmark (SHA-256/Blake2b/Keccak/Ed25519)")
 		outputFmt     = flag.String("output", "text", "Output format: text or json")
 		verbose       = flag.Bool("verbose", false, "Enable verbose logging")
 		version       = flag.Bool("version", false, "Print version and exit")
@@ -96,6 +97,7 @@ func main() {
 		SkipKV:        *skipKV,
 		SkipMemory:    *skipMemory,
 		SkipBigNum:    *skipBigNum,
+		SkipCrypto:    *skipCrypto,
 		OutputFmt:     *outputFmt,
 	}
 
diff --git a/cmd/benchmark/report.go b/cmd/benchmark/report.go
index c3934228..7195c17e 100644
--- a/cmd/benchmark/report.go
+++ b/cmd/benchmark/report.go
@@ -32,6 +32,7 @@ type BenchmarkResults struct {
 	KVResult        *KVResult
 	MemoryResult    *MemoryResult
 	BigNumResult    *BigNumResult
+	CryptoResult    *CryptoResult
 }
 
 // ---------------------------------------------------------------------------
@@ -123,6 +124,25 @@ const (
 	bigFloat64FailOps = 1_000_000.0
 	bigIntDivPassOps  = 13_000_000.0
 	bigIntDivFailOps  = 3_000_000.0
+
+	// Crypto thresholds (calibrated against the validator-fleet investigation:
+	// AMD EPYC Zen4 with SHA-NI hits ~1740 MB/s at 16 KiB; Intel Skylake-IBRS
+	// without SHA-NI sits at ~310 MB/s on the same blocks. The fail floor is
+	// set above the Skylake number so any SHA-NI-deficient amd64 host fails.)
+	cryptoSHA256SmallPassMBps = 1_200.0 // SHA-256 on 1 KiB blocks
+	cryptoSHA256SmallFailMBps = 500.0
+	cryptoSHA256LargePassMBps = 1_500.0 // SHA-256 on 16 KiB blocks
+	cryptoSHA256LargeFailMBps = 600.0
+	cryptoBlake2bPassMBps     = 700.0 // Blake2b-512 on 16 KiB blocks (AVX2)
+	cryptoBlake2bFailMBps     = 300.0
+	// Pure-Go Keccak (no SIMD path); calibrated against three healthy AMD
+	// Zen2/Zen4 chips that landed in the 295–390 MB/s range. The pass
+	// threshold is set just below the slowest observed healthy value so a
+	// production AMD chip does not show a misleading WARN.
+	cryptoKeccak256PassMBps    = 350.0
+	cryptoKeccak256FailMBps    = 100.0
+	cryptoEd25519VerifyPassOps = 12_000.0 // Ed25519.Verify (SHA-512-bound)
+	cryptoEd25519VerifyFailOps = 5_000.0
 )
 
 type verdict int
@@ -245,6 +265,27 @@ func memoryVerdict(r *MemoryResult) verdict {
 	return verdictPass
 }
 
+func cryptoVerdict(r *CryptoResult) verdict {
+	if r == nil {
+		return verdictSkip
+	}
+	if r.SHA256MBps < cryptoSHA256SmallFailMBps ||
+		r.SHA256LargeMBps < cryptoSHA256LargeFailMBps ||
+		r.Blake2bMBps < cryptoBlake2bFailMBps ||
+		r.Keccak256MBps < cryptoKeccak256FailMBps ||
+		r.Ed25519VerifyOpsPerSec < cryptoEd25519VerifyFailOps {
+		return verdictFail
+	}
+	if r.SHA256MBps < cryptoSHA256SmallPassMBps ||
+		r.SHA256LargeMBps < cryptoSHA256LargePassMBps ||
+		r.Blake2bMBps < cryptoBlake2bPassMBps ||
+		r.Keccak256MBps < cryptoKeccak256PassMBps ||
+		r.Ed25519VerifyOpsPerSec < cryptoEd25519VerifyPassOps {
+		return verdictWarn
+	}
+	return verdictPass
+}
+
 func bigNumVerdict(r *BigNumResult) verdict {
 	if r == nil {
 		return verdictSkip
@@ -306,6 +347,7 @@ func printText(results *BenchmarkResults) {
 	kv := kvVerdict(results.KVResult)
 	mv := memoryVerdict(results.MemoryResult)
 	bv := bigNumVerdict(results.BigNumResult)
+	cv := cryptoVerdict(results.CryptoResult)
 	sc := ComputeScore(results)
 
 	fmt.Println()
@@ -315,6 +357,10 @@ func printText(results *BenchmarkResults) {
 	fmt.Println(sep)
 	fmt.Printf("  System : %s/%s   CPUs: %d   Go: %s\n",
 		si.GOOS, si.GOARCH, si.CPUs, si.GoVersion)
+	if c := results.CryptoResult; c != nil {
+		fmt.Printf("  CPU    : SHA-NI=%s  AVX-512 IFMA=%s  VAES=%s  GFNI=%s\n",
+			yesNo(c.HasSHA_NI), yesNo(c.HasAVX512IFMA), yesNo(c.HasVAES), yesNo(c.HasGFNI))
+	}
 	fmt.Println(sep)
 
 	if results.GoroutineResult != nil {
@@ -335,6 +381,9 @@ func printText(results *BenchmarkResults) {
 	if results.BigNumResult != nil {
 		printBigNumSection(results.BigNumResult, bv, sep)
 	}
+	if results.CryptoResult != nil {
+		printCryptoSection(results.CryptoResult, cv, sep)
+	}
 
 	printScoreSection(sc, sep)
 	fmt.Println()
@@ -509,6 +558,49 @@ func printBigNumSection(r *BigNumResult, v verdict, sep string) {
 	fmt.Println(sep)
 }
 
+func printCryptoSection(r *CryptoResult, v verdict, sep string) {
+	fmt.Printf("  CRYPTO / HASHING   %s %s\n", v.Icon(), v)
+	fmt.Println()
+
+	s256V := metricVerdict(r.SHA256MBps, cryptoSHA256SmallPassMBps, cryptoSHA256SmallFailMBps)
+	s256LV := metricVerdict(r.SHA256LargeMBps, cryptoSHA256LargePassMBps, cryptoSHA256LargeFailMBps)
+	b2V := metricVerdict(r.Blake2bMBps, cryptoBlake2bPassMBps, cryptoBlake2bFailMBps)
+	kV := metricVerdict(r.Keccak256MBps, cryptoKeccak256PassMBps, cryptoKeccak256FailMBps)
+	edV := metricVerdict(r.Ed25519VerifyOpsPerSec, cryptoEd25519VerifyPassOps, cryptoEd25519VerifyFailOps)
+
+	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+		"SHA-256 (1 KiB blocks):", r.SHA256MBps, s256V.Icon(),
+		cryptoSHA256SmallPassMBps, cryptoSHA256SmallFailMBps)
+	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+		"SHA-256 (16 KiB blocks):", r.SHA256LargeMBps, s256LV.Icon(),
+		cryptoSHA256LargePassMBps, cryptoSHA256LargeFailMBps)
+	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+		"Blake2b-512 (16 KiB):", r.Blake2bMBps, b2V.Icon(),
+		cryptoBlake2bPassMBps, cryptoBlake2bFailMBps)
+	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+		"Keccak-256 (16 KiB):", r.Keccak256MBps, kV.Icon(),
+		cryptoKeccak256PassMBps, cryptoKeccak256FailMBps)
+	fmt.Printf("  %-32s  %s  %s  (pass≥%.0fK, fail<%.0fK ops/s)\n",
+		"Ed25519 verify:", humanOps(r.Ed25519VerifyOpsPerSec), edV.Icon(),
+		cryptoEd25519VerifyPassOps/1000, cryptoEd25519VerifyFailOps/1000)
+
+	if runtime.GOARCH == "amd64" && !r.HasSHA_NI {
+		fmt.Println()
+		fmt.Println("  ! CPU lacks SHA-NI; this is the most common cause of low SHA-256 throughput.")
+		fmt.Println("  ! If the throughput numbers above are below the pass thresholds, migrate to")
+		fmt.Println("  ! AMD Zen, Intel Ice Lake-SP+, or modern ARM (with ARMv8 SHA2).")
+	}
+	fmt.Println()
+	fmt.Println(sep)
+}
+
+func yesNo(b bool) string {
+	if b {
+		return "yes"
+	}
+	return "no"
+}
+
 // ---------------------------------------------------------------------------
 // Score section
 // ---------------------------------------------------------------------------
@@ -516,6 +608,9 @@ func printBigNumSection(r *BigNumResult, v verdict, sep string) {
 func printScoreSection(s BenchmarkScore, sep string) {
 	fmt.Printf("  SCORE : %d / %d   Grade: %s   %s\n",
 		s.Total, s.MaxTotal, s.Grade, scoreGradeSummary(s.Grade))
+	if s.Vetoed {
+		fmt.Printf("  ! Hard veto: %s\n", s.VetoedReason)
+	}
 	fmt.Println()
 
 	printScoreRow("Goroutine (CPU)", s.Goroutine, weightGoroutine)
@@ -524,6 +619,7 @@ func printScoreSection(s BenchmarkScore, sep string) {
 	printScoreRow("KV Store", s.KV, weightKV)
 	printScoreRow("Memory", s.Memory, weightMemory)
 	printScoreRow("BigNum / FPU", s.BigNum, weightBigNum)
+	printScoreRow("Crypto / Hashing", s.Crypto, weightCrypto)
 }
 
 func printScoreRow(name string, c CategoryScore, weight int) {
@@ -599,7 +695,7 @@ func verdictSummary(v verdict) string {
 	case verdictPass:
 		return "This node meets Kleverchain validator requirements."
 	case verdictWarn:
-		return "Performance is below recommended levels; review individual sections before deploying."
+		return "Performance meets minimum requirements but is below recommended levels — consider a hardware upgrade and review individual sections before deploying."
 	case verdictFail:
 		return "This node does NOT meet Kleverchain validator requirements."
 	default:
@@ -620,6 +716,7 @@ type jsonReport struct {
 	KV             *jsonKV        `json:"kv,omitempty"`
 	Memory         *jsonMemory    `json:"memory,omitempty"`
 	BigNum         *jsonBigNum    `json:"bignum,omitempty"`
+	Crypto         *jsonCrypto    `json:"crypto,omitempty"`
 	Score          jsonScore      `json:"score"`
 	OverallVerdict string         `json:"overall_verdict"`
 }
@@ -632,16 +729,19 @@ type jsonCategoryScore struct {
 }
 
 type jsonScore struct {
-	Total     int               `json:"total"`
-	MaxTotal  int               `json:"max_total"`
-	Pct       float64           `json:"pct"`
-	Grade     string            `json:"grade"`
-	Goroutine jsonCategoryScore `json:"goroutine"`
-	Disk      jsonCategoryScore `json:"disk"`
-	Network   jsonCategoryScore `json:"network"`
-	KV        jsonCategoryScore `json:"kv"`
-	Memory    jsonCategoryScore `json:"memory"`
-	BigNum    jsonCategoryScore `json:"bignum"`
+	Total        int               `json:"total"`
+	MaxTotal     int               `json:"max_total"`
+	Pct          float64           `json:"pct"`
+	Grade        string            `json:"grade"`
+	Vetoed       bool              `json:"vetoed"`
+	VetoedReason string            `json:"veto_reason,omitempty"`
+	Goroutine    jsonCategoryScore `json:"goroutine"`
+	Disk         jsonCategoryScore `json:"disk"`
+	Network      jsonCategoryScore `json:"network"`
+	KV           jsonCategoryScore `json:"kv"`
+	Memory       jsonCategoryScore `json:"memory"`
+	BigNum       jsonCategoryScore `json:"bignum"`
+	Crypto       jsonCategoryScore `json:"crypto"`
 }
 
 type jsonGoroutineLevel struct {
@@ -700,6 +800,23 @@ type jsonBigNum struct {
 	Verdict          string  `json:"verdict"`
 }
 
+type jsonCPUFeatures struct {
+	HasSHA_NI     bool `json:"sha_ni"`
+	HasAVX512IFMA bool `json:"avx512_ifma"`
+	HasVAES       bool `json:"vaes"`
+	HasGFNI       bool `json:"gfni"`
+}
+
+type jsonCrypto struct {
+	SHA256MBps             float64         `json:"sha256_1k_mbps"`
+	SHA256LargeMBps        float64         `json:"sha256_16k_mbps"`
+	Blake2bMBps            float64         `json:"blake2b_16k_mbps"`
+	Keccak256MBps          float64         `json:"keccak256_16k_mbps"`
+	Ed25519VerifyOpsPerSec float64         `json:"ed25519_verify_ops_per_sec"`
+	CPUFeatures            jsonCPUFeatures `json:"cpu_features"`
+	Verdict                string          `json:"verdict"`
+}
+
 func printJSON(results *BenchmarkResults) {
 	gv := goroutineVerdict(results.GoroutineResult)
 	dv := diskVerdict(results.DiskResult)
@@ -707,8 +824,9 @@ func printJSON(results *BenchmarkResults) {
 	kv := kvVerdict(results.KVResult)
 	mv := memoryVerdict(results.MemoryResult)
 	bv := bigNumVerdict(results.BigNumResult)
+	cv := cryptoVerdict(results.CryptoResult)
 	sc := ComputeScore(results)
-	ov := gradeToVerdict(sc.Grade, overallVerdict(gv, dv, nv, kv, mv, bv))
+	ov := gradeToVerdict(sc.Grade, overallVerdict(gv, dv, nv, kv, mv, bv, cv))
 
 	report := jsonReport{
 		RunAt:          results.RunAt.Format(time.RFC3339),
@@ -786,17 +904,37 @@ func printJSON(results *BenchmarkResults) {
 		}
 	}
 
+	if r := results.CryptoResult; r != nil {
+		report.Crypto = &jsonCrypto{
+			SHA256MBps:             r.SHA256MBps,
+			SHA256LargeMBps:        r.SHA256LargeMBps,
+			Blake2bMBps:            r.Blake2bMBps,
+			Keccak256MBps:          r.Keccak256MBps,
+			Ed25519VerifyOpsPerSec: r.Ed25519VerifyOpsPerSec,
+			CPUFeatures: jsonCPUFeatures{
+				HasSHA_NI:     r.HasSHA_NI,
+				HasAVX512IFMA: r.HasAVX512IFMA,
+				HasVAES:       r.HasVAES,
+				HasGFNI:       r.HasGFNI,
+			},
+			Verdict: cv.String(),
+		}
+	}
+
 	report.Score = jsonScore{
-		Total:     sc.Total,
-		MaxTotal:  sc.MaxTotal,
-		Pct:       sc.Pct,
-		Grade:     sc.Grade,
-		Goroutine: jsonCategoryScore{Points: sc.Goroutine.Points, Max: sc.Goroutine.Max, Pct: sc.Goroutine.Pct(), Skipped: sc.Goroutine.Skipped},
-		Disk:      jsonCategoryScore{Points: sc.Disk.Points, Max: sc.Disk.Max, Pct: sc.Disk.Pct(), Skipped: sc.Disk.Skipped},
-		Network:   jsonCategoryScore{Points: sc.Network.Points, Max: sc.Network.Max, Pct: sc.Network.Pct(), Skipped: sc.Network.Skipped},
-		KV:        jsonCategoryScore{Points: sc.KV.Points, Max: sc.KV.Max, Pct: sc.KV.Pct(), Skipped: sc.KV.Skipped},
-		Memory:    jsonCategoryScore{Points: sc.Memory.Points, Max: sc.Memory.Max, Pct: sc.Memory.Pct(), Skipped: sc.Memory.Skipped},
-		BigNum:    jsonCategoryScore{Points: sc.BigNum.Points, Max: sc.BigNum.Max, Pct: sc.BigNum.Pct(), Skipped: sc.BigNum.Skipped},
+		Total:        sc.Total,
+		MaxTotal:     sc.MaxTotal,
+		Pct:          sc.Pct,
+		Grade:        sc.Grade,
+		Vetoed:       sc.Vetoed,
+		VetoedReason: sc.VetoedReason,
+		Goroutine:    jsonCategoryScore{Points: sc.Goroutine.Points, Max: sc.Goroutine.Max, Pct: sc.Goroutine.Pct(), Skipped: sc.Goroutine.Skipped},
+		Disk:         jsonCategoryScore{Points: sc.Disk.Points, Max: sc.Disk.Max, Pct: sc.Disk.Pct(), Skipped: sc.Disk.Skipped},
+		Network:      jsonCategoryScore{Points: sc.Network.Points, Max: sc.Network.Max, Pct: sc.Network.Pct(), Skipped: sc.Network.Skipped},
+		KV:           jsonCategoryScore{Points: sc.KV.Points, Max: sc.KV.Max, Pct: sc.KV.Pct(), Skipped: sc.KV.Skipped},
+		Memory:       jsonCategoryScore{Points: sc.Memory.Points, Max: sc.Memory.Max, Pct: sc.Memory.Pct(), Skipped: sc.Memory.Skipped},
+		BigNum:       jsonCategoryScore{Points: sc.BigNum.Points, Max: sc.BigNum.Max, Pct: sc.BigNum.Pct(), Skipped: sc.BigNum.Skipped},
+		Crypto:       jsonCategoryScore{Points: sc.Crypto.Points, Max: sc.Crypto.Max, Pct: sc.Crypto.Pct(), Skipped: sc.Crypto.Skipped},
 	}
 
 	enc := json.NewEncoder(os.Stdout)
diff --git a/cmd/benchmark/runner.go b/cmd/benchmark/runner.go
index b11b7f00..796fac83 100644
--- a/cmd/benchmark/runner.go
+++ b/cmd/benchmark/runner.go
@@ -19,6 +19,7 @@ type Config struct {
 	SkipKV        bool
 	SkipMemory    bool
 	SkipBigNum    bool
+	SkipCrypto    bool
 	OutputFmt     string
 }
 
@@ -102,5 +103,15 @@ func (r *Runner) Run() (*BenchmarkResults, error) {
 		results.BigNumResult = br
 	}
 
+	if !r.cfg.SkipCrypto {
+		fmt.Fprintf(os.Stderr, "Running crypto benchmark (SHA-256/Blake2b/Keccak/Ed25519, %s/primitive)...\n",
+			cryptoBenchDuration)
+		cr, err := RunCryptoBenchmark()
+		if err != nil {
+			return nil, fmt.Errorf("crypto benchmark: %w", err)
+		}
+		results.CryptoResult = cr
+	}
+
 	return results, nil
 }
diff --git a/cmd/benchmark/score.go b/cmd/benchmark/score.go
index 40329fe9..41c50737 100644
--- a/cmd/benchmark/score.go
+++ b/cmd/benchmark/score.go
@@ -13,27 +13,47 @@ package main
 //
 // Point weights (total = 1000 when all categories are enabled):
 //
-//	Goroutine (CPU scalability)  200
 //	Disk I/O                     200
 //	KV Store (state access)      200
-//	Network (P2P stack)          150
-//	Memory (DRAM + allocator)    150
-//	BigNum / FPU                 100
+//	Crypto / Hashing             200   ← consensus + TX hashing
+//	Goroutine (CPU scalability)  150
+//	Network (P2P stack)          100
+//	Memory (DRAM + allocator)    100
+//	BigNum / FPU                  50
+//
+// The Crypto category is gated by a hard veto on measured SHA-256
+// throughput: hosts that cannot sustain enough hashing throughput to keep
+// leader-mode TX processing within the protocol's hardware-tolerance
+// window (lowerBound = 425 ms = 85% of the 500 ms baseTimeout) have their
+// overall grade capped at F regardless of total points. SHA-NI absence is
+// the most common reason for low throughput in practice but is not
+// asserted as the sole cause — the veto is grounded in the measured
+// number, not a CPU flag. See ComputeScore for the gating logic.
 //
 // Grade thresholds (% of enabled max):
 //
 //	≥ 90 %  → S   Elite — top-tier validator hardware
 //	≥ 75 %  → A   Excellent — production-ready for high-traffic networks
 //	≥ 60 %  → B   Good — suitable for standard validator operation
-//	≥ 45 %  → C   Acceptable — meets minimum validator requirements
+//	≥ 45 %  → C   Acceptable — meets minimum validator requirements; consider a hardware upgrade
 //	≥ 30 %  → D   Marginal — several metrics below recommended levels
 //	< 30 %  → F   Insufficient — does not meet validator requirements
 
 import (
+	"fmt"
 	"math"
 	"time"
 )
 
+// minLeaderSHA256MBps is the SHA-256 throughput floor below which a node
+// cannot reliably process TXs as leader within the protocol's hardware-
+// tolerance window (lowerBound = 425 ms). Calibrated from field data: a
+// validator measured at ~250 MB/s on 16 KiB blocks took ~600 ms to process
+// a representative SC TX as leader, well above the 425 ms lowerBound.
+// Setting the floor at 500 MB/s gives ~2× margin to the lowerBound and
+// matches the existing fail floor for SHA-256 16 KiB blocks.
+const minLeaderSHA256MBps = 500.0
+
 // ---------------------------------------------------------------------------
 // Excellent thresholds (score = 100)
 // ---------------------------------------------------------------------------
@@ -72,6 +92,14 @@ const (
 	bigModMulExcellentOps  = 2_000_000.0
 	bigFloat64ExcellentOps = 10_000_000.0
 	bigIntDivExcellentOps  = 30_000_000.0
+
+	// Crypto excellent ceilings (matches AMD Zen4 with SHA-NI; openssl-speed
+	// 16 KiB SHA-256 ≈ 1.7 GB/s, Blake2b ≈ 0.9 GB/s; ed25519 stdlib ≈ 30K/s).
+	cryptoSHA256SmallExcellentMBps  = 1_500.0
+	cryptoSHA256LargeExcellentMBps  = 1_800.0
+	cryptoBlake2bExcellentMBps      = 900.0
+	cryptoKeccak256ExcellentMBps    = 600.0
+	cryptoEd25519VerifyExcellentOps = 25_000.0
 )
 
 // ---------------------------------------------------------------------------
@@ -79,12 +107,13 @@ const (
 // ---------------------------------------------------------------------------
 
 const (
-	weightGoroutine = 200
 	weightDisk      = 200
 	weightKV        = 200
-	weightNetwork   = 150
-	weightMemory    = 150
-	weightBigNum    = 100
+	weightCrypto    = 200
+	weightGoroutine = 150
+	weightNetwork   = 100
+	weightMemory    = 100
+	weightBigNum    = 50
 )
 
 // ---------------------------------------------------------------------------
@@ -115,11 +144,16 @@ type BenchmarkScore struct {
 	KV        CategoryScore
 	Memory    CategoryScore
 	BigNum    CategoryScore
+	Crypto    CategoryScore
 
 	Total    int     // sum of enabled category points
 	MaxTotal int     // sum of enabled category maxes
 	Pct      float64 // Total / MaxTotal (0.0–1.0); 0 if nothing enabled
 	Grade    string  // S / A / B / C / D / F
+	// Vetoed is true when a hard requirement failed (e.g., missing SHA-NI
+	// on amd64). When set, Grade is forced to "F" regardless of point total.
+	Vetoed       bool
+	VetoedReason string
 }
 
 // ComputeScore builds a BenchmarkScore from all benchmark results.
@@ -136,8 +170,9 @@ func ComputeScore(r *BenchmarkResults) BenchmarkScore {
 	s.KV = scoreCategory(kvCatScore(r.KVResult), weightKV, r.KVResult == nil)
 	s.Memory = scoreCategory(memoryCatScore(r.MemoryResult), weightMemory, r.MemoryResult == nil)
 	s.BigNum = scoreCategory(bigNumCatScore(r.BigNumResult), weightBigNum, r.BigNumResult == nil)
+	s.Crypto = scoreCategory(cryptoCatScore(r.CryptoResult), weightCrypto, r.CryptoResult == nil)
 
-	for _, c := range []CategoryScore{s.Goroutine, s.Disk, s.Network, s.KV, s.Memory, s.BigNum} {
+	for _, c := range []CategoryScore{s.Goroutine, s.Disk, s.Network, s.KV, s.Memory, s.BigNum, s.Crypto} {
 		s.Total += c.Points
 		s.MaxTotal += c.Max
 	}
@@ -147,6 +182,24 @@ func ComputeScore(r *BenchmarkResults) BenchmarkScore {
 	} else {
 		s.Grade = "N/A"
 	}
+
+	// Hard veto: SHA-256 throughput below the leader-mode floor caps the
+	// grade at F regardless of other category scores. Field investigation
+	// of slow validators showed measured SHA-256 throughput correlates
+	// with leader-mode TX processing time well enough to predict whether
+	// a node will exceed the protocol's hardware-tolerance window. SHA-NI
+	// absence is the most common cause of low throughput on amd64 but is
+	// not asserted as the sole cause — the veto fires on the measurement.
+	if c := r.CryptoResult; c != nil && c.SHA256LargeMBps < minLeaderSHA256MBps {
+		s.Vetoed = true
+		s.VetoedReason = fmt.Sprintf(
+			"SHA-256 throughput %.0f MB/s < %.0f MB/s minimum — node likely cannot sustain "+
+				"leader-mode TX processing within the consensus hardware-tolerance window. "+
+				"Most common cause: missing SHA-NI on amd64 (Skylake-X / Cascade Lake / Haswell)",
+			c.SHA256LargeMBps, minLeaderSHA256MBps)
+		s.Grade = "F"
+	}
+
 	return s
 }
 
@@ -221,6 +274,19 @@ func bigNumCatScore(r *BigNumResult) float64 {
 	)
 }
 
+func cryptoCatScore(r *CryptoResult) float64 {
+	if r == nil {
+		return 0
+	}
+	return mean(
+		normHigh(r.SHA256MBps, cryptoSHA256SmallFailMBps, cryptoSHA256SmallExcellentMBps),
+		normHigh(r.SHA256LargeMBps, cryptoSHA256LargeFailMBps, cryptoSHA256LargeExcellentMBps),
+		normHigh(r.Blake2bMBps, cryptoBlake2bFailMBps, cryptoBlake2bExcellentMBps),
+		normHigh(r.Keccak256MBps, cryptoKeccak256FailMBps, cryptoKeccak256ExcellentMBps),
+		normHigh(r.Ed25519VerifyOpsPerSec, cryptoEd25519VerifyFailOps, cryptoEd25519VerifyExcellentOps),
+	)
+}
+
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
@@ -300,9 +366,9 @@ func scoreGradeSummary(g string) string {
 	case "B":
 		return "Good — suitable for standard validator operation"
 	case "C":
-		return "Below standard — not recommended for production use"
+		return "Acceptable — meets minimum validator requirements; consider a hardware upgrade"
 	case "D":
-		return "Poor — critical subsystems underperform validator requirements"
+		return "Marginal — several metrics below recommended levels"
 	case "N/A":
 		return "No benchmarks were run; all categories were skipped."
 	default:
diff --git a/cmd/benchmark/score_test.go b/cmd/benchmark/score_test.go
new file mode 100644
index 00000000..7b0b0ceb
--- /dev/null
+++ b/cmd/benchmark/score_test.go
@@ -0,0 +1,181 @@
+package main
+
+import (
+	"testing"
+)
+
+// excellentResults returns a fully-populated BenchmarkResults whose values
+// sit at the excellent ceiling for every category — used to verify the
+// max-points totals.
+func excellentResults() *BenchmarkResults {
+	return &BenchmarkResults{
+		GoroutineResult: &GoroutineResult{CPUEfficiency: cpuEffExcellentPct, NumCPU: 8},
+		DiskResult: &DiskResult{
+			SeqWriteMBps:  seqWriteExcellentMBps,
+			SeqReadMBps:   seqReadExcellentMBps,
+			RandWriteIOPS: randWriteExcellentIPS,
+			RandReadIOPS:  randReadExcellentIPS,
+		},
+		NetworkResult: &NetworkResult{
+			LatP50:         1, // 1 ns → effectively 0 µs (excellent)
+			LatP99:         1,
+			ThroughputMBps: netThroughputExcellentMBps,
+		},
+		KVResult: &KVResult{
+			WriteOpsPerSec: kvWriteExcellentOps,
+			ReadOpsPerSec:  kvReadExcellentOps,
+			MixedOpsPerSec: kvMixedExcellentOps,
+		},
+		MemoryResult: &MemoryResult{
+			SeqReadGBps:   memSeqReadExcellentGBps,
+			SeqWriteGBps:  memSeqWriteExcellentGBps,
+			RandLatencyNs: 1, // ≈ 0 ns → excellent
+			AllocMOpsPerS: memAllocExcellentMOps,
+		},
+		BigNumResult: &BigNumResult{
+			ModExpOpsPerSec:  bigModExpExcellentOps,
+			ModMulOpsPerSec:  bigModMulExcellentOps,
+			Float64OpsPerSec: bigFloat64ExcellentOps,
+			IntDivOpsPerSec:  bigIntDivExcellentOps,
+		},
+		CryptoResult: &CryptoResult{
+			SHA256MBps:             cryptoSHA256SmallExcellentMBps,
+			SHA256LargeMBps:        cryptoSHA256LargeExcellentMBps,
+			Blake2bMBps:            cryptoBlake2bExcellentMBps,
+			Keccak256MBps:          cryptoKeccak256ExcellentMBps,
+			Ed25519VerifyOpsPerSec: cryptoEd25519VerifyExcellentOps,
+			HasSHA_NI:              true,
+			HasAVX512IFMA:          true,
+		},
+	}
+}
+
+func TestComputeScore_TotalMaxIs1000WhenAllEnabled(t *testing.T) {
+	s := ComputeScore(excellentResults())
+	if s.MaxTotal != 1000 {
+		t.Fatalf("MaxTotal = %d, want 1000 (rebalance must keep weights summing to 1000)", s.MaxTotal)
+	}
+	if s.Total != 1000 {
+		t.Fatalf("Total = %d, want 1000 with excellent inputs across the board", s.Total)
+	}
+	if s.Grade != "S" {
+		t.Fatalf("Grade = %q, want S with 100%% score", s.Grade)
+	}
+}
+
+func TestComputeScore_CryptoWeightIs200(t *testing.T) {
+	s := ComputeScore(excellentResults())
+	if s.Crypto.Max != 200 {
+		t.Fatalf("Crypto.Max = %d, want 200", s.Crypto.Max)
+	}
+	if s.Crypto.Points != 200 {
+		t.Fatalf("Crypto.Points = %d, want 200 with excellent inputs", s.Crypto.Points)
+	}
+}
+
+func TestComputeScore_RebalancedWeights(t *testing.T) {
+	s := ComputeScore(excellentResults())
+	cases := []struct {
+		name string
+		got  int
+		want int
+	}{
+		{"Disk", s.Disk.Max, 200},
+		{"KV", s.KV.Max, 200},
+		{"Crypto", s.Crypto.Max, 200},
+		{"Goroutine", s.Goroutine.Max, 150},
+		{"Network", s.Network.Max, 100},
+		{"Memory", s.Memory.Max, 100},
+		{"BigNum", s.BigNum.Max, 50},
+	}
+	for _, c := range cases {
+		if c.got != c.want {
+			t.Errorf("%s.Max = %d, want %d", c.name, c.got, c.want)
+		}
+	}
+}
+
+func TestComputeScore_ThroughputVeto_CapsGradeAtF(t *testing.T) {
+	r := excellentResults()
+	// Bench measured below the leader-mode floor (e.g., a Skylake/Haswell
+	// without SHA-NI typically lands around 250 MB/s).
+	r.CryptoResult.SHA256LargeMBps = 250
+
+	s := ComputeScore(r)
+	if !s.Vetoed {
+		t.Fatal("expected Vetoed=true when SHA-256 16K throughput below the floor")
+	}
+	if s.Grade != "F" {
+		t.Fatalf("Grade = %q, want F when veto applies", s.Grade)
+	}
+	if s.VetoedReason == "" {
+		t.Fatal("expected VetoedReason to be populated")
+	}
+	// The numeric score should still be substantial — the veto is a grade-cap,
+	// not a silent zero. Operators get to see how the rest of the system performs.
+	// (The Crypto category itself will score low because of the bad throughput,
+	// but the other six categories were set to excellent in this fixture.)
+	if s.Total < 700 {
+		t.Fatalf("Total = %d, expected non-veto categories to still score normally", s.Total)
+	}
+}
+
+func TestComputeScore_ThroughputVeto_DoesNotApply_AboveFloor(t *testing.T) {
+	r := excellentResults()
+	// Throughput just above the floor — veto must not trigger even though
+	// the host could in principle be a non-SHA-NI amd64.
+	r.CryptoResult.SHA256LargeMBps = minLeaderSHA256MBps + 1
+	r.CryptoResult.HasSHA_NI = false
+
+	s := ComputeScore(r)
+	if s.Vetoed {
+		t.Fatalf("Vetoed must be false when throughput is above floor; reason: %s", s.VetoedReason)
+	}
+	if s.Grade == "F" {
+		t.Fatalf("Grade = F unexpectedly when throughput is above floor")
+	}
+}
+
+func TestComputeScore_ThroughputVeto_NoCryptoResult_NoVeto(t *testing.T) {
+	r := excellentResults()
+	r.CryptoResult = nil // crypto bench was skipped
+
+	s := ComputeScore(r)
+	if s.Vetoed {
+		t.Fatal("Vetoed must be false when CryptoResult is nil (bench skipped)")
+	}
+}
+
+func TestComputeScore_NilResults_GradeF(t *testing.T) {
+	if got := ComputeScore(nil); got.Grade != "F" {
+		t.Fatalf("ComputeScore(nil).Grade = %q, want F", got.Grade)
+	}
+}
+
+func TestComputeScore_AllSkipped_GradeNotApplicable(t *testing.T) {
+	s := ComputeScore(&BenchmarkResults{}) // all category results nil
+	if s.Grade != "N/A" {
+		t.Fatalf("Grade = %q, want N/A when nothing ran", s.Grade)
+	}
+}
+
+func TestScoreGrade_Boundaries(t *testing.T) {
+	cases := []struct {
+		pct  float64
+		want string
+	}{
+		{0.95, "S"},
+		{0.90, "S"},
+		{0.80, "A"},
+		{0.65, "B"},
+		{0.50, "C"},
+		{0.35, "D"},
+		{0.10, "F"},
+		{0.0, "F"},
+	}
+	for _, c := range cases {
+		if got := scoreGrade(c.pct); got != c.want {
+			t.Errorf("scoreGrade(%.2f) = %q, want %q", c.pct, got, c.want)
+		}
+	}
+}
diff --git a/cmd/node/PREFLIGHT.md b/cmd/node/PREFLIGHT.md
new file mode 100644
index 00000000..4617b0bc
--- /dev/null
+++ b/cmd/node/PREFLIGHT.md
@@ -0,0 +1,108 @@
+# Validator CPU Preflight
+
+The validator binary runs a CPU preflight check at startup, immediately after
+loading the BLS signing key. The preflight verifies that the host has
+sufficient SHA-256 hardware acceleration to keep up with consensus and TX
+processing on a production network.
+
+## Why this exists
+
+A field investigation across the Klever validator fleet found a ~5× spread in
+smart-contract TX processing time — ~600 ms on some validators vs ~120 ms on
+peers with otherwise comparable specs. The slow nodes uniformly lacked the
+**SHA-NI** instruction set (Skylake-X / Cascade Lake Xeon and earlier never
+received it), and the SHA-256 throughput delta correlated with the wall-time
+disparity. SHA-NI absence is the most likely contributing cause but was not
+conclusively proven to be the sole cause — the consensus log confirms the
+protocol's own "leader hardware too weak" detection treats this as a
+hardware-class issue regardless of the underlying instruction.
+
+The preflight is grounded in **measured SHA-256 throughput** rather than the
+SHA-NI feature flag. A node whose throughput cannot keep leader-mode TX
+processing within the protocol's 425 ms hardware-tolerance window is
+refused at startup so operators discover the issue before consensus-time
+outliers manifest.
+
+## What it checks
+
+On `amd64` and `arm64`:
+
+1. A 200 ms self-bench measures sustained SHA-256 throughput on 16 KiB
+   blocks. The result must be at least **800 MB/s** for startup to proceed.
+2. Missing SHA-NI (amd64) or ARMv8 SHA2 (arm64) is logged as an
+   informational `Warn` line — it is the most common cause of low SHA-256
+   throughput, and noting it makes the resulting log actionable. Missing
+   the flag never blocks startup on its own; only the bench does.
+3. Missing AVX-512 IFMA on `amd64` is logged as a separate `Warn`
+   (informational) — it indicates that the BLS pairing path is on the
+   ~1.5× slower scalar fallback.
+
+Other architectures are skipped — the preflight is a no-op on `riscv64`,
+`386`, `ppc64le`, etc.
+
+## Behavior modes
+
+The preflight has two layers of failure handling, controlled by the
+`preferences.enforceCpuPreflight` flag in the validator config.
+
+| Flag value | On preflight failure |
+|------------|----------------------|
+| `true` (default) | Returns a non-zero exit code with a clear error message. The validator does not start. |
+| `false` (escape hatch) | Logs the failure as a `Warn` and continues startup. Useful during a coordinated fleet migration when operators need to observe the issue without bricking running nodes. |
+
+Every preflight run logs a single `Info` line with the measured throughput
+(emitted on success, on warn-only failure, and as a precursor to a hard
+failure error):
+
+```text
+INFO  validator CPU preflight measurement  arch=amd64 sha_ni=true avx512_ifma=true sha256_mbps=1742.3
+```
+
+## Override
+
+For emergencies (CI, dev environments, intentional homogeneity tests),
+the env var `KLEVER_SKIP_CPU_CHECK=1` bypasses the preflight entirely.
+The exact value `1` is required — `true`, `yes`, etc. are not honored
+(fail-closed: a typo leaves the preflight active). A loud warning is
+logged on every startup so operators see the bypass in their logs:
+
+```text
+WARN  validator CPU preflight bypassed via env var  env=KLEVER_SKIP_CPU_CHECK risk=consensus latency may exceed peer median; not for production
+```
+
+Do not use this flag in production.
+
+## Migration plan for SHA-NI-deficient hardware
+
+If the preflight refuses to start your validator, migrate to a CPU with
+SHA extensions. Note: the validator startup gate is **stricter** than
+the standalone `klever-benchmark` tool — startup requires ≥ 800 MB/s
+while the benchmark's SHA-256 hard-veto threshold is 500 MB/s. A host
+that earns a non-`F` grade from the benchmark can still fail the
+startup gate; always run the actual validator binary on a candidate
+host before committing to it.
+
+Recommended CPU classes:
+
+- **AMD**: any Zen generation — EPYC Naples, Rome, Milan, Genoa, Turin, or
+  Ryzen / Threadripper equivalents.
+- **Intel**: Ice Lake-SP (3rd-gen Xeon Scalable) or newer. Skylake-X,
+  Cascade Lake, and earlier consumer Skylake / Coffee Lake / Cooper Lake
+  parts do not have SHA-NI.
+- **ARM**: any ARMv8 chip exposing the SHA2 feature flag (i.e., effectively
+  every datacenter ARM CPU since 2018, including AWS Graviton and Apple
+  Silicon).
+
+For Hetzner Cloud specifically: CCX (dedicated AMD EPYC) and CPX (shared
+AMD EPYC) instances always satisfy the preflight. The CX series is a mixed
+Intel/AMD pool, and Skylake-class instances within it do not. Run
+`klever-benchmark --skip-disk --skip-network --skip-kv --skip-memory \
+--skip-goroutine --skip-bignum` on a candidate instance before deploying as
+a validator to confirm.
+
+## Related
+
+- `cmd/node/preflight.go` — the preflight implementation.
+- `cmd/benchmark/CLI.md` — the operator-facing benchmark, which applies the
+  same SHA-NI veto and produces a more detailed report.
+- `config/prefsConfig.go` — `EnforceCPUPreflight` is the runtime flag.
diff --git a/cmd/node/preflight.go b/cmd/node/preflight.go
new file mode 100644
index 00000000..42bad3c8
--- /dev/null
+++ b/cmd/node/preflight.go
@@ -0,0 +1,164 @@
+package main
+
+import (
+	"crypto/rand"
+	"crypto/sha256"
+	"fmt"
+	"os"
+	"runtime"
+	"time"
+
+	"github.com/klauspost/cpuid/v2"
+	logger "github.com/klever-io/klever-go-logger"
+)
+
+const (
+	envSkipCPUCheck         = "KLEVER_SKIP_CPU_CHECK"
+	minSHA256ThroughputMBps = 800
+	preflightBenchDuration  = 200 * time.Millisecond
+	benchBlockSize          = 16 * 1024
+)
+
+// cpuInfo captures the CPU features the preflight cares about. It is its own
+// type (rather than reading klauspost/cpuid globals directly inside the
+// preflight) so tests can construct synthetic CPUs on any host.
+type cpuInfo struct {
+	arch          string
+	hasSHA        bool
+	hasAVX512IFMA bool
+}
+
+// detectCPU reads the runtime architecture and the relevant feature bits.
+// Architectures other than amd64 and arm64 are treated as "skip" — preflight
+// is a no-op there.
+func detectCPU() cpuInfo {
+	info := cpuInfo{arch: runtime.GOARCH}
+	switch runtime.GOARCH {
+	case "amd64":
+		info.hasSHA = cpuid.CPU.Has(cpuid.SHA)
+		info.hasAVX512IFMA = cpuid.CPU.Has(cpuid.AVX512IFMA)
+	case "arm64":
+		info.hasSHA = cpuid.CPU.Has(cpuid.SHA2)
+	}
+	return info
+}
+
+// validatorCPUPreflight verifies the host CPU is capable of validator-grade
+// SHA-256 throughput. Returns a non-nil error when the check fails; the call
+// site decides whether to block startup or downgrade to a warning depending
+// on the EnforceCPUPreflight config flag.
+//
+// Outcomes:
+//   - skipped (returns nil) on unsupported architectures or when the
+//     KLEVER_SKIP_CPU_CHECK=1 env var is set;
+//   - failed (returns error) when measured SHA-256 throughput is below
+//     minSHA256ThroughputMBps;
+//   - passed (returns nil) otherwise, with an informational log line that
+//     includes the measured throughput.
+//
+// Missing SHA-NI on amd64 is logged as a Warn but is no longer a hard fail
+// on its own — the field investigation that motivated this preflight could
+// not conclusively prove SHA-NI absence is the sole cause of the observed
+// consensus-time disparity, so the gate is grounded in the measured number
+// instead of the CPU feature flag. SHA-NI absence is the most common cause
+// of low SHA-256 throughput in practice and is called out in the warn line.
+func validatorCPUPreflight(log logger.Logger) error {
+	return validatorCPUPreflightWithInfo(log, detectCPU(), benchSHA256)
+}
+
+// validatorCPUPreflightWithInfo is the test seam for validatorCPUPreflight.
+// Passing the cpuInfo and bench function as parameters keeps the package free
+// of mutable globals while still letting tests cover every branch.
+//
+// The bench is run twice and the maximum is reported, so a single transient
+// throttle event (thermal, hypervisor noisy neighbor) does not refuse startup
+// on a host that would otherwise pass.
+func validatorCPUPreflightWithInfo(
+	log logger.Logger,
+	info cpuInfo,
+	bench func(time.Duration) float64,
+) error {
+	if os.Getenv(envSkipCPUCheck) == "1" {
+		log.Warn("validator CPU preflight bypassed via env var",
+			"env", envSkipCPUCheck,
+			"risk", "consensus latency may exceed peer median; not for production")
+		return nil
+	}
+
+	if info.arch != "amd64" && info.arch != "arm64" {
+		log.Info("validator CPU preflight skipped on unsupported arch", "arch", info.arch)
+		return nil
+	}
+
+	mbps := bench(preflightBenchDuration)
+	if second := bench(preflightBenchDuration); second > mbps {
+		mbps = second
+	}
+	log.Info("validator CPU preflight measurement",
+		"arch", info.arch,
+		"sha_ni", info.hasSHA,
+		"avx512_ifma", info.hasAVX512IFMA,
+		"sha256_mbps", fmt.Sprintf("%.1f", mbps))
+
+	if info.arch == "amd64" && !info.hasAVX512IFMA {
+		log.Warn("CPU lacks AVX-512 IFMA; BLS verify ~1.5x slower than Zen4 peers (informational only)")
+	}
+
+	if !info.hasSHA {
+		log.Warn("CPU lacks SHA-256 hardware acceleration "+
+			"(SHA-NI on amd64 / ARMv8 SHA2 on arm64); "+
+			"this is the most common cause of low SHA-256 throughput",
+			"arch", info.arch)
+	}
+
+	if mbps < minSHA256ThroughputMBps {
+		return fmt.Errorf(
+			"validator CPU preflight failed: measured SHA-256 throughput %.1f MB/s < %d MB/s minimum. "+
+				"This typically indicates missing SHA-NI (Skylake-X / Cascade Lake / Haswell on amd64) "+
+				"or a degraded host (frequency cap, thermal throttle, hypervisor masking). "+
+				"Migrate to AMD Zen, Intel Ice Lake-SP+, or modern ARM with ARMv8 SHA2. "+
+				"Override (NOT for production): %s=1",
+			mbps, minSHA256ThroughputMBps, envSkipCPUCheck)
+	}
+	return nil
+}
+
+// benchSHA256 hashes 16 KiB blocks for d and returns sustained throughput in
+// megabytes per second. Returns 0 on a non-positive duration. The block size
+// matches the openssl-speed reference used during the original investigation
+// so operators can compare numbers directly.
+//
+// The hot loop checks the deadline once per innerLoop iterations to avoid
+// time.Now() syscall overhead dominating on SHA-NI hosts (~3 GB/s ≈ 3M
+// hashes/s).
+func benchSHA256(d time.Duration) float64 {
+	if d <= 0 {
+		return 0
+	}
+	buf := make([]byte, benchBlockSize)
+	if _, err := rand.Read(buf); err != nil {
+		// Deterministic fallback so the bench remains representative of a
+		// non-zero working set even when the system RNG is unavailable.
+		for i := range buf {
+			buf[i] = byte(i)
+		}
+	}
+	h := sha256.New()
+	var bytes int64
+	start := time.Now()
+	deadline := start.Add(d)
+	const innerLoop = 256
+	for time.Now().Before(deadline) {
+		for i := 0; i < innerLoop; i++ {
+			h.Reset()
+			_, _ = h.Write(buf)
+			_ = h.Sum(nil)
+		}
+		bytes += benchBlockSize * innerLoop
+	}
+	elapsed := time.Since(start).Seconds()
+	if elapsed <= 0 {
+		return 0
+	}
+	return float64(bytes) / (1024 * 1024) / elapsed
+}
diff --git a/cmd/node/preflight_test.go b/cmd/node/preflight_test.go
new file mode 100644
index 00000000..6ac25bee
--- /dev/null
+++ b/cmd/node/preflight_test.go
@@ -0,0 +1,244 @@
+package main
+
+import (
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	logger "github.com/klever-io/klever-go-logger"
+)
+
+// recordingLogger is a Logger that captures messages by level so tests can
+// assert which branches of the preflight produced output.
+type recordingLogger struct {
+	mu    sync.Mutex
+	infos []string
+	warns []string
+}
+
+func (r *recordingLogger) record(slot *[]string, msg string) {
+	r.mu.Lock()
+	*slot = append(*slot, msg)
+	r.mu.Unlock()
+}
+
+func (r *recordingLogger) Trace(msg string, _ ...interface{}) {}
+func (r *recordingLogger) Debug(msg string, _ ...interface{}) {}
+func (r *recordingLogger) Info(msg string, _ ...interface{}) {
+	r.record(&r.infos, msg)
+}
+func (r *recordingLogger) Warn(msg string, _ ...interface{}) {
+	r.record(&r.warns, msg)
+}
+func (r *recordingLogger) Error(msg string, _ ...interface{})                {}
+func (r *recordingLogger) LogIfError(_ error, _ ...interface{})              {}
+func (r *recordingLogger) Log(_ logger.LogLevel, _ string, _ ...interface{}) {}
+func (r *recordingLogger) LogLine(_ *logger.LogLine)                         {}
+func (r *recordingLogger) SetLevel(_ logger.LogLevel)                        {}
+func (r *recordingLogger) GetLevel() logger.LogLevel                         { return logger.LogTrace }
+func (r *recordingLogger) IsInterfaceNil() bool                              { return r == nil }
+
+// hasInfo reports whether any captured Info message contains s. The slice
+// is read under the lock to keep the helper race-safe even if a future
+// caller invokes the preflight in a goroutine.
+func (r *recordingLogger) hasInfo(s string) bool {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	for _, m := range r.infos {
+		if strings.Contains(m, s) {
+			return true
+		}
+	}
+	return false
+}
+
+// hasWarn reports whether any captured Warn message contains s.
+func (r *recordingLogger) hasWarn(s string) bool {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	for _, m := range r.warns {
+		if strings.Contains(m, s) {
+			return true
+		}
+	}
+	return false
+}
+
+// fastBench returns a fixed throughput regardless of duration. Useful for
+// asserting on the bench-too-slow path without waiting on the real bench.
+func fastBench(mbps float64) func(time.Duration) float64 {
+	return func(time.Duration) float64 { return mbps }
+}
+
+func TestValidatorCPUPreflight_HappyPath_Amd64(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "amd64", hasSHA: true, hasAVX512IFMA: true}
+
+	if err := validatorCPUPreflightWithInfo(log, info, fastBench(2000)); err != nil {
+		t.Fatalf("expected nil error, got: %v", err)
+	}
+	if !log.hasInfo("validator CPU preflight measurement") {
+		t.Fatalf("expected a measurement info log, got infos=%v", log.infos)
+	}
+	if log.hasWarn("AVX-512 IFMA") {
+		t.Fatalf("did not expect AVX-512 IFMA warning when feature is present, got warns=%v", log.warns)
+	}
+	if log.hasWarn("SHA-256 hardware acceleration") {
+		t.Fatalf("did not expect SHA-NI warning when feature is present, got warns=%v", log.warns)
+	}
+}
+
+func TestValidatorCPUPreflight_HappyPath_Arm64(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "arm64", hasSHA: true}
+
+	if err := validatorCPUPreflightWithInfo(log, info, fastBench(2000)); err != nil {
+		t.Fatalf("expected nil error on arm64 with fast bench, got: %v", err)
+	}
+	if !log.hasInfo("validator CPU preflight measurement") {
+		t.Fatalf("expected measurement info log, got infos=%v", log.infos)
+	}
+	if log.hasWarn("AVX-512 IFMA") {
+		t.Fatalf("did not expect AVX-512 IFMA warn on arm64, got warns=%v", log.warns)
+	}
+}
+
+func TestValidatorCPUPreflight_HappyPath_Amd64_NoIFMA_Warns(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	// Skylake-X case: SHA-NI present but AVX-512 IFMA missing — should pass with a warn.
+	info := cpuInfo{arch: "amd64", hasSHA: true, hasAVX512IFMA: false}
+
+	if err := validatorCPUPreflightWithInfo(log, info, fastBench(2000)); err != nil {
+		t.Fatalf("expected nil error on Skylake-X-shaped CPU, got: %v", err)
+	}
+	if !log.hasWarn("AVX-512 IFMA") {
+		t.Fatalf("expected AVX-512 IFMA warn, got warns=%v", log.warns)
+	}
+}
+
+func TestValidatorCPUPreflight_MissingSHA_FastBench_Passes_WithWarn(t *testing.T) {
+	// Missing SHA-NI is no longer a hard fail on its own — only the measured
+	// throughput is. A (hypothetical) host without SHA-NI but with somehow
+	// fast-enough SHA-256 should pass with a Warn note.
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "amd64", hasSHA: false, hasAVX512IFMA: true}
+
+	if err := validatorCPUPreflightWithInfo(log, info, fastBench(2000)); err != nil {
+		t.Fatalf("expected nil error when bench is fast enough, even without SHA-NI; got: %v", err)
+	}
+	if !log.hasWarn("SHA-256 hardware acceleration") {
+		t.Fatalf("expected SHA-NI absence warn, got warns=%v", log.warns)
+	}
+}
+
+func TestValidatorCPUPreflight_MissingSHA_SlowBench_Errors(t *testing.T) {
+	// Realistic Skylake/Haswell case: no SHA-NI plus low measured throughput.
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "amd64", hasSHA: false, hasAVX512IFMA: false}
+
+	err := validatorCPUPreflightWithInfo(log, info, fastBench(250))
+	if err == nil {
+		t.Fatal("expected error when measured throughput is below the floor")
+	}
+	if !strings.Contains(err.Error(), "throughput") {
+		t.Fatalf("error message should mention throughput, got: %v", err)
+	}
+	if !strings.Contains(err.Error(), envSkipCPUCheck) {
+		t.Fatalf("error should mention the override env var, got: %v", err)
+	}
+}
+
+func TestValidatorCPUPreflight_MissingSHA2_Arm64_SlowBench_Errors(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "arm64", hasSHA: false}
+
+	err := validatorCPUPreflightWithInfo(log, info, fastBench(200))
+	if err == nil {
+		t.Fatal("expected error when arm64 measured throughput is below the floor")
+	}
+	if !strings.Contains(err.Error(), "throughput") {
+		t.Fatalf("error message should mention throughput, got: %v", err)
+	}
+}
+
+func TestValidatorCPUPreflight_BenchTooSlow_Errors(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "amd64", hasSHA: true, hasAVX512IFMA: true}
+
+	err := validatorCPUPreflightWithInfo(log, info, fastBench(minSHA256ThroughputMBps-1))
+	if err == nil {
+		t.Fatal("expected error when measured throughput is below the minimum")
+	}
+	if !strings.Contains(err.Error(), "throughput") {
+		t.Fatalf("error message should mention throughput, got: %v", err)
+	}
+}
+
+func TestValidatorCPUPreflight_EnvBypass_NilEvenWithSlowBench(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "1")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "amd64", hasSHA: false}
+
+	if err := validatorCPUPreflightWithInfo(log, info, fastBench(0)); err != nil {
+		t.Fatalf("expected nil error when env bypass is active, got: %v", err)
+	}
+	if !log.hasWarn("bypassed via env var") {
+		t.Fatalf("expected bypass warn log, got warns=%v", log.warns)
+	}
+}
+
+func TestValidatorCPUPreflight_UnsupportedArch_NilAndSkips(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "")
+	log := &recordingLogger{}
+	info := cpuInfo{arch: "386"}
+
+	if err := validatorCPUPreflightWithInfo(log, info, fastBench(0)); err != nil {
+		t.Fatalf("expected nil error on unsupported arch, got: %v", err)
+	}
+	if !log.hasInfo("unsupported arch") {
+		t.Fatalf("expected unsupported-arch info log, got infos=%v", log.infos)
+	}
+}
+
+func TestBenchSHA256_NonPositiveDuration_ReturnsZero(t *testing.T) {
+	if got := benchSHA256(0); got != 0 {
+		t.Fatalf("benchSHA256(0) = %.2f, want 0", got)
+	}
+	if got := benchSHA256(-time.Second); got != 0 {
+		t.Fatalf("benchSHA256(-1s) = %.2f, want 0", got)
+	}
+}
+
+func TestBenchSHA256_RealMeasurement_Positive(t *testing.T) {
+	// Smoke test: a 100 ms run on any modern CPU should produce a positive
+	// throughput. We do not assert a specific MB/s number to keep the test
+	// portable across CI runners; 100 ms (vs 50 ms) gives heavily-throttled
+	// cgroup runners enough wall time to complete a full inner-loop batch.
+	if got := benchSHA256(100 * time.Millisecond); got <= 0 {
+		t.Fatalf("benchSHA256(100ms) = %.2f, want > 0", got)
+	}
+}
+
+// TestValidatorCPUPreflight_RealEntry_NoPanic exercises the production
+// signature (which calls detectCPU() and the real benchSHA256). It does
+// not assert pass/fail because the result depends on host hardware — it
+// only ensures the wiring between detectCPU, benchSHA256, and the
+// WithInfo seam does not panic and produces a self-consistent outcome.
+func TestValidatorCPUPreflight_RealEntry_NoPanic(t *testing.T) {
+	t.Setenv(envSkipCPUCheck, "1") // bypass so we don't fail on slow CI hosts
+	log := &recordingLogger{}
+	if err := validatorCPUPreflight(log); err != nil {
+		t.Fatalf("env bypass should make validatorCPUPreflight nil, got: %v", err)
+	}
+	if !log.hasWarn("bypassed via env var") {
+		t.Fatalf("expected bypass warn log, got warns=%v", log.warns)
+	}
+}
diff --git a/cmd/node/startup.go b/cmd/node/startup.go
index 5c0115bf..259a8350 100644
--- a/cmd/node/startup.go
+++ b/cmd/node/startup.go
@@ -249,6 +249,17 @@ func startNode(ctx *cli.Context, log logger.Logger, version string) error {
 		return err
 	}
 
+	// Run the CPU preflight before loading the validator's BLS private key:
+	// on a host that fails the gate we want to refuse startup without
+	// having unlocked the key file (defense-in-depth, CWE-316).
+	if err := validatorCPUPreflight(log); err != nil {
+		if cfg.Preferences.ShouldEnforceCPUPreflight() {
+			return err
+		}
+		log.Warn("validator CPU preflight failed (continuing because preferences.enforceCpuPreflight=false)",
+			"error", err.Error())
+	}
+
 	validatorKeyPemFileName := ctx.GlobalString(validatorKeyPemFile.Name)
 	cryptoParamsLoader, err := factory.NewCryptoSigningParamsLoader(
 		validatorPubkeyConverter,
diff --git a/config/node/config.yaml b/config/node/config.yaml
index d066d4ee..32d6627c 100644
--- a/config/node/config.yaml
+++ b/config/node/config.yaml
@@ -60,6 +60,17 @@ preferences:
   # MaxComputableSlots represents the max number of slots computable in a round
   # by the validator statistics processor
   maxComputableSlots: 100
+  # EnforceCPUPreflight refuses to start the validator when measured SHA-256
+  # throughput falls below 800 MB/s on a 200 ms self-bench (stricter than
+  # the standalone klever-benchmark tool, which only hard-vetoes below
+  # 500 MB/s — see cmd/node/PREFLIGHT.md). Hosts lacking SHA-NI hardware
+  # acceleration (Skylake-X / Cascade Lake / Haswell on amd64, or ARM
+  # without ARMv8 SHA2) typically measure ~250 MB/s and will be rejected.
+  # Use KLEVER_SKIP_CPU_CHECK=1 as an emergency override.
+  # Set to false to downgrade the failure to a warning instead of refusing.
+  # If this key is omitted from an existing operator config, enforcement
+  # defaults to true (safe upgrade behavior).
+  enforceCpuPreflight: true
 
 blockSizeThrottleConfig:
   minSizeInBytes: 104857 # 104857 is 10% from 1MB
diff --git a/config/prefsConfig.go b/config/prefsConfig.go
index ab3f3853..6df2c6c2 100644
--- a/config/prefsConfig.go
+++ b/config/prefsConfig.go
@@ -7,4 +7,25 @@ type PreferencesConfig struct {
 	RedundancyLevel          int64  `yaml:"redundancyLevel"`
 	StatusPollingIntervalSec int64  `yaml:"statusPollingIntervalSec"`
 	MaxComputableSlots       uint64 `yaml:"maxComputableSlots"`
+	// EnforceCPUPreflight controls whether the validator refuses to start
+	// when the startup SHA-256 throughput bench falls below the leader-mode
+	// floor. When true (default — including when the YAML key is absent so
+	// existing operator configs upgrade safely), failure aborts startup
+	// with a clear error. When explicitly set to false, the failure is
+	// downgraded to a warning so operators can observe the issue without
+	// bricking a running node — useful during fleet migration.
+	// KLEVER_SKIP_CPU_CHECK=1 bypasses the check entirely.
+	//
+	// Pointer type so an absent YAML key is distinguishable from an
+	// explicit false; use ShouldEnforceCPUPreflight() at call sites.
+	EnforceCPUPreflight *bool `yaml:"enforceCpuPreflight"`
+}
+
+// ShouldEnforceCPUPreflight returns the effective enforcement decision,
+// applying the safe default (true) when the YAML key is absent.
+func (p PreferencesConfig) ShouldEnforceCPUPreflight() bool {
+	if p.EnforceCPUPreflight == nil {
+		return true
+	}
+	return *p.EnforceCPUPreflight
 }

From 2a056dc6e1c41941362805329c17aa47f2501b4e Mon Sep 17 00:00:00 2001
From: Fernando Sobreira <fbsobreira@gmail.com>
Date: Thu, 7 May 2026 21:16:46 -0400
Subject: [PATCH 2/6] [KLC-2387] address PR review feedback and CI lint

- Preallocate digest buffer in benchSHA256 / benchHashMBps to remove
  per-iteration allocations from the hash hot loop (verified: 2039
  allocs/op -> 0 allocs/op). Improves measurement stability on hosts
  where GC pressure could mask SHA-NI throughput.
- Update PREFLIGHT.md: preflight runs immediately *before* loading the
  BLS key (not after), matching the implementation.
- Fix doc drift: BenchmarkScore.Vetoed and CryptoResult.HasSHA_NI now
  correctly describe throughput-based gating rather than SHA-NI feature
  flag. HasSHA_NI doc clarifies the cross-platform shorthand (SHA-NI on
  amd64, ARMv8 SHA2 on arm64).
- Document the relationship between cryptoSHA256LargeFailMBps (600,
  per-metric verdict) and minLeaderSHA256MBps (500, hard grade veto).
- Surface preferences.enforceCpuPreflight=false escape hatch in the
  preflight error message so operators discover the warn-only mode
  without needing to find the docs first.
- go mod tidy: promote klauspost/cpuid/v2 from indirect to direct dep
  since cmd/node/preflight.go imports it directly.
---
 cmd/benchmark/crypto.go | 17 ++++++++++++-----
 cmd/benchmark/report.go |  7 +++++++
 cmd/benchmark/score.go  |  8 ++++++--
 cmd/node/PREFLIGHT.md   | 10 ++++++----
 cmd/node/preflight.go   |  7 +++++--
 go.mod                  |  2 +-
 6 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/cmd/benchmark/crypto.go b/cmd/benchmark/crypto.go
index 2217ae10..af102c32 100644
--- a/cmd/benchmark/crypto.go
+++ b/cmd/benchmark/crypto.go
@@ -62,10 +62,16 @@ type CryptoResult struct {
 	Keccak256MBps          float64 // Keccak-256 throughput on 16 KiB blocks
 	Ed25519VerifyOpsPerSec float64 // Ed25519 signature verifications per second
 
-	// CPU feature flags — reported but not directly scored. The overall
-	// score applies a hard veto when HasSHA_NI is false on amd64 (see
-	// score.go). The other flags are informational; AVX-512 IFMA in
-	// particular indicates whether the BLS pairing fast path is available.
+	// CPU feature flags — reported but not directly scored. The hard veto
+	// in score.go fires on measured SHA-256 throughput, not on these flags
+	// (see BenchmarkScore.Vetoed). The flags are informational and help
+	// operators correlate measured throughput to the underlying ISA.
+	//
+	// HasSHA_NI is the cross-platform shorthand: true means SHA-256
+	// hardware acceleration is available — Intel/AMD SHA-NI on amd64,
+	// ARMv8 SHA2 on arm64 — false on every other architecture.
+	// AVX-512 IFMA on amd64 indicates whether the BLS pairing fast path
+	// is available (~1.5x speedup vs scalar fallback).
 	HasSHA_NI     bool
 	HasAVX512IFMA bool
 	HasVAES       bool
@@ -149,6 +155,7 @@ func benchHashMBps(newHash func() hash.Hash, blockSize int) float64 {
 		}
 	}
 	h := newHash()
+	digest := make([]byte, 0, h.Size())
 	deadline := time.Now().Add(cryptoBenchDuration)
 	start := time.Now()
 	var bytes int64
@@ -157,7 +164,7 @@ func benchHashMBps(newHash func() hash.Hash, blockSize int) float64 {
 		for range innerLoop {
 			h.Reset()
 			_, _ = h.Write(buf)
-			_ = h.Sum(nil)
+			digest = h.Sum(digest[:0])
 		}
 		bytes += int64(blockSize) * innerLoop
 	}
diff --git a/cmd/benchmark/report.go b/cmd/benchmark/report.go
index 7195c17e..72c077ae 100644
--- a/cmd/benchmark/report.go
+++ b/cmd/benchmark/report.go
@@ -132,6 +132,13 @@ const (
 	cryptoSHA256SmallPassMBps = 1_200.0 // SHA-256 on 1 KiB blocks
 	cryptoSHA256SmallFailMBps = 500.0
 	cryptoSHA256LargePassMBps = 1_500.0 // SHA-256 on 16 KiB blocks
+	// cryptoSHA256LargeFailMBps drives the per-metric category verdict
+	// (WARN/FAIL labels in the text report); minLeaderSHA256MBps in score.go
+	// (500 MB/s) drives the hard grade-F veto. The category fail floor is
+	// set slightly above the veto so a host in [500, 600) MB/s shows a
+	// per-metric FAIL label without triggering the grade-cap veto path —
+	// gradeToVerdict still surfaces the overall verdict as FAIL via the
+	// category-fail route, so behavior is consistent across both paths.
 	cryptoSHA256LargeFailMBps = 600.0
 	cryptoBlake2bPassMBps     = 700.0 // Blake2b-512 on 16 KiB blocks (AVX2)
 	cryptoBlake2bFailMBps     = 300.0
diff --git a/cmd/benchmark/score.go b/cmd/benchmark/score.go
index 41c50737..e391cb61 100644
--- a/cmd/benchmark/score.go
+++ b/cmd/benchmark/score.go
@@ -150,8 +150,12 @@ type BenchmarkScore struct {
 	MaxTotal int     // sum of enabled category maxes
 	Pct      float64 // Total / MaxTotal (0.0–1.0); 0 if nothing enabled
 	Grade    string  // S / A / B / C / D / F
-	// Vetoed is true when a hard requirement failed (e.g., missing SHA-NI
-	// on amd64). When set, Grade is forced to "F" regardless of point total.
+	// Vetoed is true when a hard requirement failed — currently only the
+	// measured SHA-256 throughput floor (SHA256LargeMBps < minLeaderSHA256MBps).
+	// SHA-NI absence is the most common cause of low throughput on amd64 but
+	// is not asserted as the sole cause; the gate fires on the measurement,
+	// not on a CPU feature flag. When set, Grade is forced to "F" regardless
+	// of point total. VetoedReason carries an operator-facing explanation.
 	Vetoed       bool
 	VetoedReason string
 }
diff --git a/cmd/node/PREFLIGHT.md b/cmd/node/PREFLIGHT.md
index 4617b0bc..67f01596 100644
--- a/cmd/node/PREFLIGHT.md
+++ b/cmd/node/PREFLIGHT.md
@@ -1,7 +1,8 @@
 # Validator CPU Preflight
 
-The validator binary runs a CPU preflight check at startup, immediately after
-loading the BLS signing key. The preflight verifies that the host has
+The validator binary runs a CPU preflight check at startup, immediately before
+loading the BLS signing key — defense-in-depth so a host that fails the gate
+never unlocks the key file. The preflight verifies that the host has
 sufficient SHA-256 hardware acceleration to keep up with consensus and TX
 processing on a production network.
 
@@ -103,6 +104,7 @@ a validator to confirm.
 ## Related
 
 - `cmd/node/preflight.go` — the preflight implementation.
-- `cmd/benchmark/CLI.md` — the operator-facing benchmark, which applies the
-  same SHA-NI veto and produces a more detailed report.
+- `cmd/benchmark/CLI.md` — the operator-facing benchmark, which applies a
+  measured-throughput veto (SHA-256 < 500 MB/s) — not a SHA-NI feature-bit
+  check — and produces a more detailed report.
 - `config/prefsConfig.go` — `EnforceCPUPreflight` is the runtime flag.
diff --git a/cmd/node/preflight.go b/cmd/node/preflight.go
index 42bad3c8..f546f4c9 100644
--- a/cmd/node/preflight.go
+++ b/cmd/node/preflight.go
@@ -117,7 +117,9 @@ func validatorCPUPreflightWithInfo(
 				"This typically indicates missing SHA-NI (Skylake-X / Cascade Lake / Haswell on amd64) "+
 				"or a degraded host (frequency cap, thermal throttle, hypervisor masking). "+
 				"Migrate to AMD Zen, Intel Ice Lake-SP+, or modern ARM with ARMv8 SHA2. "+
-				"Override (NOT for production): %s=1",
+				"To downgrade this failure to a warning during a coordinated fleet migration, "+
+				"set preferences.enforceCpuPreflight=false in the validator config. "+
+				"Emergency override (NOT for production): %s=1",
 			mbps, minSHA256ThroughputMBps, envSkipCPUCheck)
 	}
 	return nil
@@ -144,6 +146,7 @@ func benchSHA256(d time.Duration) float64 {
 		}
 	}
 	h := sha256.New()
+	digest := make([]byte, 0, h.Size())
 	var bytes int64
 	start := time.Now()
 	deadline := start.Add(d)
@@ -152,7 +155,7 @@ func benchSHA256(d time.Duration) float64 {
 		for i := 0; i < innerLoop; i++ {
 			h.Reset()
 			_, _ = h.Write(buf)
-			_ = h.Sum(nil)
+			digest = h.Sum(digest[:0])
 		}
 		bytes += benchBlockSize * innerLoop
 	}
diff --git a/go.mod b/go.mod
index 761098ec..eb64971a 100644
--- a/go.mod
+++ b/go.mod
@@ -28,6 +28,7 @@ require (
 	github.com/jbenet/goprocess v0.1.4
 	github.com/joho/godotenv v1.4.0
 	github.com/keygen-sh/machineid v1.1.1
+	github.com/klauspost/cpuid/v2 v2.3.0
 	github.com/klever-io/klever-go-logger v1.3.1
 	github.com/libp2p/go-libp2p v0.48.0
 	github.com/libp2p/go-libp2p-kad-dht v0.39.1
@@ -108,7 +109,6 @@ require (
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
-	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/koron/go-ssdp v0.0.6 // indirect
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/libp2p/go-buffer-pool v0.1.0 // indirect

From 1a88cfa46473ba3e7de126f302ee7bf0724ada09 Mon Sep 17 00:00:00 2001
From: Fernando Sobreira <fbsobreira@gmail.com>
Date: Thu, 7 May 2026 21:27:00 -0400
Subject: [PATCH 3/6] [KLC-2387] extract MB/s metric row format to a constant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sonar flagged the printf template "  %-32s  %7.1f MB/s  %s
(pass≥%.0f, fail<%.0f MB/s)\n" as duplicated 5 times across
printNetworkSection and printCryptoSection. Centralised as
metricThroughputMBpsRowFmt so future column-width or label-format
changes stay consistent across sections.
---
 cmd/benchmark/report.go | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/cmd/benchmark/report.go b/cmd/benchmark/report.go
index 72c077ae..5f2a0677 100644
--- a/cmd/benchmark/report.go
+++ b/cmd/benchmark/report.go
@@ -345,6 +345,11 @@ func PrintReport(results *BenchmarkResults, format string) {
 
 const reportWidth = 70
 
+// metricThroughputMBpsRowFmt is the printf template for MB/s metric rows
+// (network throughput + every crypto MB/s metric). Centralised so column
+// width and pass/fail label format stay consistent across sections.
+const metricThroughputMBpsRowFmt = "  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n"
+
 func printText(results *BenchmarkResults) {
 	sep := strings.Repeat("─", reportWidth)
 	si := results.SystemInfo
@@ -479,7 +484,7 @@ func printNetworkSection(r *NetworkResult, v verdict, sep string) {
 		"Latency P50:", p50us, p50v.Icon(), netLatP50PassUs, netLatP50FailUs)
 	fmt.Printf("  %-32s  %8.1f µs  %s  (pass<%.0f, fail≥%.0f µs)\n",
 		"Latency P99:", p99us, p99v.Icon(), netLatP99PassUs, netLatP99FailUs)
-	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+	fmt.Printf(metricThroughputMBpsRowFmt,
 		"Throughput:", r.ThroughputMBps, thrV.Icon(), netThroughputPassMBps, netThroughputFailMBps)
 
 	fmt.Println()
@@ -575,16 +580,16 @@ func printCryptoSection(r *CryptoResult, v verdict, sep string) {
 	kV := metricVerdict(r.Keccak256MBps, cryptoKeccak256PassMBps, cryptoKeccak256FailMBps)
 	edV := metricVerdict(r.Ed25519VerifyOpsPerSec, cryptoEd25519VerifyPassOps, cryptoEd25519VerifyFailOps)
 
-	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+	fmt.Printf(metricThroughputMBpsRowFmt,
 		"SHA-256 (1 KiB blocks):", r.SHA256MBps, s256V.Icon(),
 		cryptoSHA256SmallPassMBps, cryptoSHA256SmallFailMBps)
-	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+	fmt.Printf(metricThroughputMBpsRowFmt,
 		"SHA-256 (16 KiB blocks):", r.SHA256LargeMBps, s256LV.Icon(),
 		cryptoSHA256LargePassMBps, cryptoSHA256LargeFailMBps)
-	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+	fmt.Printf(metricThroughputMBpsRowFmt,
 		"Blake2b-512 (16 KiB):", r.Blake2bMBps, b2V.Icon(),
 		cryptoBlake2bPassMBps, cryptoBlake2bFailMBps)
-	fmt.Printf("  %-32s  %7.1f MB/s  %s  (pass≥%.0f, fail<%.0f MB/s)\n",
+	fmt.Printf(metricThroughputMBpsRowFmt,
 		"Keccak-256 (16 KiB):", r.Keccak256MBps, kV.Icon(),
 		cryptoKeccak256PassMBps, cryptoKeccak256FailMBps)
 	fmt.Printf("  %-32s  %s  %s  (pass≥%.0fK, fail<%.0fK ops/s)\n",

From dd9a3328baaadb856c1e5f6da0f4f4fbb725aded Mon Sep 17 00:00:00 2001
From: Fernando Sobreira <fbsobreira@gmail.com>
Date: Thu, 7 May 2026 21:28:24 -0400
Subject: [PATCH 4/6] [KLC-2387] add tests for ShouldEnforceCPUPreflight

Sonar flagged the new safe-default accessor as uncovered. The function
is load-bearing for the rollout safety guarantee (absent YAML key must
default to enforce=true so upgrading operators are not silently
downgraded to warn-only), so the absent-key branch is the most
important one to lock down. Test covers all three states: nil pointer,
explicit *true, explicit *false.
---
 config/prefsConfig_test.go | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 config/prefsConfig_test.go

diff --git a/config/prefsConfig_test.go b/config/prefsConfig_test.go
new file mode 100644
index 00000000..53156098
--- /dev/null
+++ b/config/prefsConfig_test.go
@@ -0,0 +1,32 @@
+package config
+
+import "testing"
+
+// TestShouldEnforceCPUPreflight covers the safe-default semantics: when the
+// YAML key is absent (pointer is nil), enforcement defaults to true so an
+// existing operator config that omits the new key cannot silently
+// downgrade the validator startup gate to warn-only.
+func TestShouldEnforceCPUPreflight(t *testing.T) {
+	t.Run("nil pointer (absent YAML key) defaults to true", func(t *testing.T) {
+		p := PreferencesConfig{EnforceCPUPreflight: nil}
+		if !p.ShouldEnforceCPUPreflight() {
+			t.Fatal("expected ShouldEnforceCPUPreflight() to return true when EnforceCPUPreflight is nil")
+		}
+	})
+
+	t.Run("explicit true returns true", func(t *testing.T) {
+		v := true
+		p := PreferencesConfig{EnforceCPUPreflight: &v}
+		if !p.ShouldEnforceCPUPreflight() {
+			t.Fatal("expected ShouldEnforceCPUPreflight() to return true when EnforceCPUPreflight is *true")
+		}
+	})
+
+	t.Run("explicit false returns false", func(t *testing.T) {
+		v := false
+		p := PreferencesConfig{EnforceCPUPreflight: &v}
+		if p.ShouldEnforceCPUPreflight() {
+			t.Fatal("expected ShouldEnforceCPUPreflight() to return false when EnforceCPUPreflight is *false")
+		}
+	})
+}

From 8fc73baeb88fe9195e0e35c4b9e97b65a0183a4a Mon Sep 17 00:00:00 2001
From: Fernando Sobreira <fbsobreira@gmail.com>
Date: Thu, 7 May 2026 21:34:15 -0400
Subject: [PATCH 5/6] [KLC-2387] address re-review: drop crypto/rand, fix
 comments, soften docs

- preflight.go: replace crypto/rand seed with deterministic init.
  rand.Read can block on entropy starvation during early boot, which
  could delay validator startup before preflight even runs. SHA-256
  amd64/arm64 fast paths are data-independent so the buffer contents
  do not affect measured throughput.
- score.go: stale comment claimed minLeaderSHA256MBps "matches the
  existing fail floor"; actual fail floor is 600 MB/s vs 500 veto.
  Document the intentional gap and explain why behavior stays
  consistent (category-fail path in gradeToVerdict).
- PREFLIGHT.md: soften absolute "always satisfy" claim about Hetzner
  CCX/CPX SKUs to "typically" + emphasise running the benchmark to
  confirm. Cloud SKU hardware can change over time.
---
 cmd/benchmark/score.go |  8 ++++++--
 cmd/node/PREFLIGHT.md  | 12 +++++++-----
 cmd/node/preflight.go  | 13 ++++++-------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/cmd/benchmark/score.go b/cmd/benchmark/score.go
index e391cb61..f0b64c65 100644
--- a/cmd/benchmark/score.go
+++ b/cmd/benchmark/score.go
@@ -50,8 +50,12 @@ import (
 // tolerance window (lowerBound = 425 ms). Calibrated from field data: a
 // validator measured at ~250 MB/s on 16 KiB blocks took ~600 ms to process
 // a representative SC TX as leader, well above the 425 ms lowerBound.
-// Setting the floor at 500 MB/s gives ~2× margin to the lowerBound and
-// matches the existing fail floor for SHA-256 16 KiB blocks.
+// Setting the floor at 500 MB/s gives ~2× margin to the lowerBound. This
+// veto threshold is deliberately below the per-metric category fail
+// threshold (cryptoSHA256LargeFailMBps = 600 MB/s in report.go) so a
+// host in [500, 600) MB/s shows a per-metric FAIL label without the
+// hard grade-cap firing — the overall verdict still resolves to FAIL via
+// the category-fail path in gradeToVerdict.
 const minLeaderSHA256MBps = 500.0
 
 // ---------------------------------------------------------------------------
diff --git a/cmd/node/PREFLIGHT.md b/cmd/node/PREFLIGHT.md
index 67f01596..f28f8b1d 100644
--- a/cmd/node/PREFLIGHT.md
+++ b/cmd/node/PREFLIGHT.md
@@ -94,12 +94,14 @@ Recommended CPU classes:
   every datacenter ARM CPU since 2018, including AWS Graviton and Apple
   Silicon).
 
-For Hetzner Cloud specifically: CCX (dedicated AMD EPYC) and CPX (shared
-AMD EPYC) instances always satisfy the preflight. The CX series is a mixed
-Intel/AMD pool, and Skylake-class instances within it do not. Run
+For Hetzner Cloud specifically (based on current fleet observations):
+CCX (dedicated AMD EPYC) and CPX (shared AMD EPYC) instances typically
+satisfy the preflight. The CX series is a mixed Intel/AMD pool, and
+Skylake-class instances within it may not. Cloud SKUs and underlying
+hardware can change over time, so always confirm by running
 `klever-benchmark --skip-disk --skip-network --skip-kv --skip-memory \
---skip-goroutine --skip-bignum` on a candidate instance before deploying as
-a validator to confirm.
+--skip-goroutine --skip-bignum` on a candidate instance before
+deploying as a validator.
 
 ## Related
 
diff --git a/cmd/node/preflight.go b/cmd/node/preflight.go
index f546f4c9..584cdea4 100644
--- a/cmd/node/preflight.go
+++ b/cmd/node/preflight.go
@@ -1,7 +1,6 @@
 package main
 
 import (
-	"crypto/rand"
 	"crypto/sha256"
 	"fmt"
 	"os"
@@ -137,13 +136,13 @@ func benchSHA256(d time.Duration) float64 {
 	if d <= 0 {
 		return 0
 	}
+	// Deterministic init — avoids crypto/rand which can block on entropy
+	// starvation during early boot (validator startup may run before the
+	// kernel RNG pool is fully initialised). SHA-256's amd64/arm64 fast
+	// paths are data-independent so the contents do not affect throughput.
 	buf := make([]byte, benchBlockSize)
-	if _, err := rand.Read(buf); err != nil {
-		// Deterministic fallback so the bench remains representative of a
-		// non-zero working set even when the system RNG is unavailable.
-		for i := range buf {
-			buf[i] = byte(i)
-		}
+	for i := range buf {
+		buf[i] = byte(i)
 	}
 	h := sha256.New()
 	digest := make([]byte, 0, h.Size())

From 9bdbaeaac9a32a4898c78390b486caebbc7214ea Mon Sep 17 00:00:00 2001
From: Fernando Sobreira <fbsobreira@gmail.com>
Date: Thu, 7 May 2026 21:54:00 -0400
Subject: [PATCH 6/6] [KLC-2387] make SHA-256 acceleration reporting
 architecture-aware
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CodeRabbit's third pass surfaced a real correctness issue: text
report, JSON output, log keys, and remediation messages all hardcoded
"SHA-NI" — which is the Intel/AMD x86 brand. On arm64 the equivalent
ISA is ARMv8 SHA2, so an operator running on a slow arm64 validator
was being misdirected to migrate from SHA-NI-deficient x86 hardware
when the actual root cause is missing ARMv8 SHA2.

Rename CryptoResult.HasSHA_NI -> HasSHAAccel; JSON tag sha_ni ->
sha_accel; preflight log key sha_ni -> sha_accel. Add shaAccelName(arch)
helper that returns "SHA-NI" / "ARMv8 SHA2" / generic per architecture
and use it in: text CPU section, post-crypto warning, JSON surface,
score VetoedReason, preflight measurement log, preflight Warn line,
preflight failure error.

Qualify the PREFLIGHT.md key-loading guarantee to scope it to the
enforced default path, since enforceCpuPreflight=false and
KLEVER_SKIP_CPU_CHECK=1 deliberately allow startup past failure.

Verified end-to-end:
- amd64 Skylake host: text shows "SHA-NI=no", JSON has "sha_accel":
  false, veto_reason cites "missing SHA-NI on amd64 (Skylake-X /
  Cascade Lake / Haswell)".
- arm64 (M4 Max) local: text shows "ARMv8 SHA2=yes" instead of the
  previous misleading "SHA-NI=yes".
---
 cmd/benchmark/crypto.go     | 40 ++++++++++++++++++++++++++++++++-----
 cmd/benchmark/report.go     | 14 +++++++------
 cmd/benchmark/score.go      |  6 ++++--
 cmd/benchmark/score_test.go |  4 ++--
 cmd/node/PREFLIGHT.md       | 16 ++++++++++-----
 cmd/node/preflight.go       | 40 +++++++++++++++++++++++++++++++++----
 6 files changed, 96 insertions(+), 24 deletions(-)

diff --git a/cmd/benchmark/crypto.go b/cmd/benchmark/crypto.go
index af102c32..f5f901f7 100644
--- a/cmd/benchmark/crypto.go
+++ b/cmd/benchmark/crypto.go
@@ -67,12 +67,13 @@ type CryptoResult struct {
 	// (see BenchmarkScore.Vetoed). The flags are informational and help
 	// operators correlate measured throughput to the underlying ISA.
 	//
-	// HasSHA_NI is the cross-platform shorthand: true means SHA-256
-	// hardware acceleration is available — Intel/AMD SHA-NI on amd64,
-	// ARMv8 SHA2 on arm64 — false on every other architecture.
+	// HasSHAAccel is true when SHA-256 hardware acceleration is available
+	// on the current architecture: Intel/AMD SHA-NI on amd64, ARMv8 SHA2
+	// on arm64. False on every other architecture. Use shaAccelName(arch)
+	// to render an arch-appropriate label in user-facing output.
 	// AVX-512 IFMA on amd64 indicates whether the BLS pairing fast path
 	// is available (~1.5x speedup vs scalar fallback).
-	HasSHA_NI     bool
+	HasSHAAccel   bool
 	HasAVX512IFMA bool
 	HasVAES       bool
 	HasGFNI       bool
@@ -82,7 +83,7 @@ type CryptoResult struct {
 // feature flags. Returns a populated CryptoResult on success.
 func RunCryptoBenchmark() (*CryptoResult, error) {
 	r := &CryptoResult{
-		HasSHA_NI:     hasSHAAcceleration(),
+		HasSHAAccel:   hasSHAAcceleration(),
 		HasAVX512IFMA: cpuid.CPU.Has(cpuid.AVX512IFMA),
 		HasVAES:       cpuid.CPU.Has(cpuid.VAES),
 		HasGFNI:       cpuid.CPU.Has(cpuid.GFNI),
@@ -111,6 +112,35 @@ func RunCryptoBenchmark() (*CryptoResult, error) {
 	return r, nil
 }
 
+// shaAccelName returns the operator-facing name of the SHA-256 hardware
+// acceleration ISA on the given architecture. Used in reports and
+// remediation messages so the output is correct on every supported arch
+// (rather than hardcoding the x86 brand on arm64).
+func shaAccelName(arch string) string {
+	switch arch {
+	case "amd64":
+		return "SHA-NI"
+	case "arm64":
+		return "ARMv8 SHA2"
+	default:
+		return "SHA-256 hardware acceleration"
+	}
+}
+
+// shaCommonCauseSuffix returns the arch-specific "common cause" suffix
+// for veto / preflight error messages. Empty on unknown archs so the
+// generic shaAccelName label stands alone.
+func shaCommonCauseSuffix(arch string) string {
+	switch arch {
+	case "amd64":
+		return " on amd64 (Skylake-X / Cascade Lake / Haswell)"
+	case "arm64":
+		return " on arm64"
+	default:
+		return ""
+	}
+}
+
 // hasSHAAcceleration reports whether SHA-256 hardware acceleration is
 // available on the current architecture: SHA-NI on amd64, ARMv8 SHA2 on
 // arm64. Returns false on every other architecture.
diff --git a/cmd/benchmark/report.go b/cmd/benchmark/report.go
index 5f2a0677..036cb474 100644
--- a/cmd/benchmark/report.go
+++ b/cmd/benchmark/report.go
@@ -370,8 +370,9 @@ func printText(results *BenchmarkResults) {
 	fmt.Printf("  System : %s/%s   CPUs: %d   Go: %s\n",
 		si.GOOS, si.GOARCH, si.CPUs, si.GoVersion)
 	if c := results.CryptoResult; c != nil {
-		fmt.Printf("  CPU    : SHA-NI=%s  AVX-512 IFMA=%s  VAES=%s  GFNI=%s\n",
-			yesNo(c.HasSHA_NI), yesNo(c.HasAVX512IFMA), yesNo(c.HasVAES), yesNo(c.HasGFNI))
+		fmt.Printf("  CPU    : %s=%s  AVX-512 IFMA=%s  VAES=%s  GFNI=%s\n",
+			shaAccelName(si.GOARCH), yesNo(c.HasSHAAccel),
+			yesNo(c.HasAVX512IFMA), yesNo(c.HasVAES), yesNo(c.HasGFNI))
 	}
 	fmt.Println(sep)
 
@@ -596,9 +597,10 @@ func printCryptoSection(r *CryptoResult, v verdict, sep string) {
 		"Ed25519 verify:", humanOps(r.Ed25519VerifyOpsPerSec), edV.Icon(),
 		cryptoEd25519VerifyPassOps/1000, cryptoEd25519VerifyFailOps/1000)
 
-	if runtime.GOARCH == "amd64" && !r.HasSHA_NI {
+	if !r.HasSHAAccel && (runtime.GOARCH == "amd64" || runtime.GOARCH == "arm64") {
 		fmt.Println()
-		fmt.Println("  ! CPU lacks SHA-NI; this is the most common cause of low SHA-256 throughput.")
+		fmt.Printf("  ! CPU lacks %s; this is the most common cause of low SHA-256 throughput.\n",
+			shaAccelName(runtime.GOARCH))
 		fmt.Println("  ! If the throughput numbers above are below the pass thresholds, migrate to")
 		fmt.Println("  ! AMD Zen, Intel Ice Lake-SP+, or modern ARM (with ARMv8 SHA2).")
 	}
@@ -813,7 +815,7 @@ type jsonBigNum struct {
 }
 
 type jsonCPUFeatures struct {
-	HasSHA_NI     bool `json:"sha_ni"`
+	HasSHAAccel   bool `json:"sha_accel"`
 	HasAVX512IFMA bool `json:"avx512_ifma"`
 	HasVAES       bool `json:"vaes"`
 	HasGFNI       bool `json:"gfni"`
@@ -924,7 +926,7 @@ func printJSON(results *BenchmarkResults) {
 			Keccak256MBps:          r.Keccak256MBps,
 			Ed25519VerifyOpsPerSec: r.Ed25519VerifyOpsPerSec,
 			CPUFeatures: jsonCPUFeatures{
-				HasSHA_NI:     r.HasSHA_NI,
+				HasSHAAccel:   r.HasSHAAccel,
 				HasAVX512IFMA: r.HasAVX512IFMA,
 				HasVAES:       r.HasVAES,
 				HasGFNI:       r.HasGFNI,
diff --git a/cmd/benchmark/score.go b/cmd/benchmark/score.go
index f0b64c65..567c76a9 100644
--- a/cmd/benchmark/score.go
+++ b/cmd/benchmark/score.go
@@ -203,8 +203,10 @@ func ComputeScore(r *BenchmarkResults) BenchmarkScore {
 		s.VetoedReason = fmt.Sprintf(
 			"SHA-256 throughput %.0f MB/s < %.0f MB/s minimum — node likely cannot sustain "+
 				"leader-mode TX processing within the consensus hardware-tolerance window. "+
-				"Most common cause: missing SHA-NI on amd64 (Skylake-X / Cascade Lake / Haswell)",
-			c.SHA256LargeMBps, minLeaderSHA256MBps)
+				"Most common cause: missing %s%s",
+			c.SHA256LargeMBps, minLeaderSHA256MBps,
+			shaAccelName(r.SystemInfo.GOARCH),
+			shaCommonCauseSuffix(r.SystemInfo.GOARCH))
 		s.Grade = "F"
 	}
 
diff --git a/cmd/benchmark/score_test.go b/cmd/benchmark/score_test.go
index 7b0b0ceb..c532163c 100644
--- a/cmd/benchmark/score_test.go
+++ b/cmd/benchmark/score_test.go
@@ -44,7 +44,7 @@ func excellentResults() *BenchmarkResults {
 			Blake2bMBps:            cryptoBlake2bExcellentMBps,
 			Keccak256MBps:          cryptoKeccak256ExcellentMBps,
 			Ed25519VerifyOpsPerSec: cryptoEd25519VerifyExcellentOps,
-			HasSHA_NI:              true,
+			HasSHAAccel:            true,
 			HasAVX512IFMA:          true,
 		},
 	}
@@ -125,7 +125,7 @@ func TestComputeScore_ThroughputVeto_DoesNotApply_AboveFloor(t *testing.T) {
 	// Throughput just above the floor — veto must not trigger even though
 	// the host could in principle be a non-SHA-NI amd64.
 	r.CryptoResult.SHA256LargeMBps = minLeaderSHA256MBps + 1
-	r.CryptoResult.HasSHA_NI = false
+	r.CryptoResult.HasSHAAccel = false
 
 	s := ComputeScore(r)
 	if s.Vetoed {
diff --git a/cmd/node/PREFLIGHT.md b/cmd/node/PREFLIGHT.md
index f28f8b1d..dae75a8c 100644
--- a/cmd/node/PREFLIGHT.md
+++ b/cmd/node/PREFLIGHT.md
@@ -1,10 +1,16 @@
 # Validator CPU Preflight
 
 The validator binary runs a CPU preflight check at startup, immediately before
-loading the BLS signing key — defense-in-depth so a host that fails the gate
-never unlocks the key file. The preflight verifies that the host has
-sufficient SHA-256 hardware acceleration to keep up with consensus and TX
-processing on a production network.
+loading the BLS signing key. When the preflight is enforced (the default,
+`preferences.enforceCpuPreflight=true`), a host that fails the gate exits
+before the key file is unlocked — defense-in-depth against deploying a
+validator key on hardware that cannot keep up with consensus. The
+warn-only path (`enforceCpuPreflight=false`) and the emergency env
+bypass (`KLEVER_SKIP_CPU_CHECK=1`) deliberately allow startup to
+continue past a failure; both log a loud Warn so the bypass is auditable
+in fleet logs. The preflight verifies that the host has sufficient SHA-256
+hardware acceleration to keep up with consensus and TX processing on a
+production network.
 
 ## Why this exists
 
@@ -56,7 +62,7 @@ Every preflight run logs a single `Info` line with the measured throughput
 failure error):
 
 ```text
-INFO  validator CPU preflight measurement  arch=amd64 sha_ni=true avx512_ifma=true sha256_mbps=1742.3
+INFO  validator CPU preflight measurement  arch=amd64 sha_accel=true avx512_ifma=true sha256_mbps=1742.3
 ```
 
 ## Override
diff --git a/cmd/node/preflight.go b/cmd/node/preflight.go
index 584cdea4..3d42f076 100644
--- a/cmd/node/preflight.go
+++ b/cmd/node/preflight.go
@@ -95,7 +95,7 @@ func validatorCPUPreflightWithInfo(
 	}
 	log.Info("validator CPU preflight measurement",
 		"arch", info.arch,
-		"sha_ni", info.hasSHA,
+		"sha_accel", info.hasSHA,
 		"avx512_ifma", info.hasAVX512IFMA,
 		"sha256_mbps", fmt.Sprintf("%.1f", mbps))
 
@@ -105,7 +105,7 @@ func validatorCPUPreflightWithInfo(
 
 	if !info.hasSHA {
 		log.Warn("CPU lacks SHA-256 hardware acceleration "+
-			"(SHA-NI on amd64 / ARMv8 SHA2 on arm64); "+
+			"("+shaAccelName(info.arch)+"); "+
 			"this is the most common cause of low SHA-256 throughput",
 			"arch", info.arch)
 	}
@@ -113,17 +113,49 @@ func validatorCPUPreflightWithInfo(
 	if mbps < minSHA256ThroughputMBps {
 		return fmt.Errorf(
 			"validator CPU preflight failed: measured SHA-256 throughput %.1f MB/s < %d MB/s minimum. "+
-				"This typically indicates missing SHA-NI (Skylake-X / Cascade Lake / Haswell on amd64) "+
+				"This typically indicates missing %s%s "+
 				"or a degraded host (frequency cap, thermal throttle, hypervisor masking). "+
 				"Migrate to AMD Zen, Intel Ice Lake-SP+, or modern ARM with ARMv8 SHA2. "+
 				"To downgrade this failure to a warning during a coordinated fleet migration, "+
 				"set preferences.enforceCpuPreflight=false in the validator config. "+
 				"Emergency override (NOT for production): %s=1",
-			mbps, minSHA256ThroughputMBps, envSkipCPUCheck)
+			mbps, minSHA256ThroughputMBps,
+			shaAccelName(info.arch),
+			shaAccelArchSuffix(info.arch),
+			envSkipCPUCheck)
 	}
 	return nil
 }
 
+// shaAccelName returns the operator-facing name of the SHA-256 hardware
+// acceleration ISA on the given architecture (so log lines on arm64 do
+// not misdirect operators to "SHA-NI" — the arm64 instruction set is
+// ARMv8 SHA2, not Intel's SHA-NI).
+func shaAccelName(arch string) string {
+	switch arch {
+	case "amd64":
+		return "SHA-NI"
+	case "arm64":
+		return "ARMv8 SHA2"
+	default:
+		return "SHA-256 hardware acceleration"
+	}
+}
+
+// shaAccelArchSuffix returns the arch-specific "common cause" suffix for
+// the preflight error message. Empty on unknown archs so the label stands
+// alone.
+func shaAccelArchSuffix(arch string) string {
+	switch arch {
+	case "amd64":
+		return " (Skylake-X / Cascade Lake / Haswell on amd64)"
+	case "arm64":
+		return " (any ARMv7 or ARMv8 chip without the SHA2 feature flag)"
+	default:
+		return ""
+	}
+}
+
 // benchSHA256 hashes 16 KiB blocks for d and returns sustained throughput in
 // megabytes per second. Returns 0 on a non-positive duration. The block size
 // matches the openssl-speed reference used during the original investigation