eliza/packages/chip/benchmarks/mlperf/energy.py at 48c2f6a4318b3305f5c380b31aec3919f2f90672 · elizaOS/eliza · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Modeled energy-per-inference for the MLPerf Inference harness (G-7).

Wires ``docs/benchmarks/report-schema.yaml``'s ``energy_joules_per_inference``
field end to end. The energy value is MODELED, never measured:

- Primary source: the deterministic architecture scale model in
  ``compiler/runtime/e1_npu_scale_model.py``. For the two-layer tiny MLP
  the harness sums the per-GEMM modeled energy (``KernelEstimate.energy_nj``)
  over the two vector-by-matrix GEMMs of one forward pass.
- The result is reported with ``provenance: simulator`` and a
  ``calibration.status: blocked-no-calibrated-assets`` block, because the
  pre-silicon model has no measured ground truth to calibrate against.

The silicon power-measurement axis (Joulescope / Monsoon rail
integration) stays BLOCKED; this module never emits ``provenance:
measured`` and never fabricates an instrument reading.
"""

from __future__ import annotations

import sys
from pathlib import Path
from typing import Any

RUNTIME_DIR = Path(__file__).resolve().parents[2] / "compiler" / "runtime"
if str(RUNTIME_DIR) not in sys.path:
    sys.path.insert(0, str(RUNTIME_DIR))

from e1_npu_scale_model import (  # noqa: E402
    MIN_REAL_V1,
    OPEN_2028_FIRST,
    OPEN_2028_SOTA,
    OPEN_2028_STRETCH,
    NpuScaleConfig,
    estimate_gemm_s8,
)

from benchmarks.mlperf.model import NUM_CLASSES, NUM_FEATURES  # noqa: E402

SCALE_CONFIGS: dict[str, NpuScaleConfig] = {
    MIN_REAL_V1.name: MIN_REAL_V1,
    OPEN_2028_FIRST.name: OPEN_2028_FIRST,
    OPEN_2028_STRETCH.name: OPEN_2028_STRETCH,
    OPEN_2028_SOTA.name: OPEN_2028_SOTA,
}

ENERGY_UNITS = "J_per_inference"
GROUND_TRUTH_REFERENCE = (
    "compiler/runtime/e1_npu_scale_model.py "
    "(deterministic architecture scale model; modeled energy, no measured "
    "silicon ground truth available pre-silicon)"
)


def modeled_energy_joules_per_inference(config: NpuScaleConfig) -> float:
    """Sum the scale-model energy of the two vector-by-matrix GEMMs in one forward pass.

    The host-side bias-add + ReLU activation composite is not an NPU GEMM
    command and is excluded from the NPU energy model (it runs on the CPU
    fallback path); only the two NPU GEMM tiles contribute.

    Both GEMMs are issued with M=1 (a single feature row) on the NPU, so
    the modeled shapes are 1xK by KxN, matching the hardware MAC counter.
    """
    gemm0 = estimate_gemm_s8(config, 1, NUM_CLASSES, NUM_FEATURES)
    gemm1 = estimate_gemm_s8(config, 1, NUM_CLASSES, NUM_CLASSES)
    energy_nj = gemm0.energy_nj(config) + gemm1.energy_nj(config)
    return energy_nj / 1e9


def energy_block(
    config: NpuScaleConfig, integration_window_seconds: float, sample_count: int
) -> dict[str, Any]:
    """Build the schema ``energy_joules_per_inference`` object (G-7).

    Matches ``docs/benchmarks/report-schema.yaml`` field-for-field:
    value, units, provenance, instrument, sampling_rate_hz,
    integration_window_seconds, ground_truth_reference, sample_count,
    and a fail-closed calibration sub-block.
    """
    return {
        "value": modeled_energy_joules_per_inference(config),
        "units": ENERGY_UNITS,
        "provenance": "simulator",
        "instrument": "e1_npu_scale_model_v1",
        "sampling_rate_hz": float(config.clock_hz),
        "integration_window_seconds": integration_window_seconds,
        "ground_truth_reference": GROUND_TRUTH_REFERENCE,
        "sample_count": sample_count,
        "calibration": {
            "status": "blocked-no-calibrated-assets",
            "last_calibrated_utc": None,
            "evidence": (
                "Modeled pre-silicon energy. Measured power calibration "
                "(Joulescope/Monsoon rail integration on fabricated silicon) "
                "is BLOCKED until silicon exists."
            ),
        },
    }