@@ -839,7 +839,7 @@ static void init_splash(CHAR16 *stage) {
839839 cls();
840840 UINTN cy = g_h / 2;
841841 /* Title — large centered line. */
842- CHAR16 *title = L"MEMFORGE v0.4.19 ";
842+ CHAR16 *title = L"MEMFORGE v0.4.20 ";
843843 UINTN tx = (g_w - StrLen(title) * g_char_w) / 2;
844844 gfx_draw_str_color(tx, cy - g_char_h * 2, title, COL_ACCENT_HI);
845845 /* Stage indicator — what we're doing right now. */
@@ -943,7 +943,7 @@ static UINTN g_card_cols = 1;
943943 compute_layout(). */
944944static int g_show_cards = 1;
945945
946- /* v0.4.19 — focused cards layout for small screens (g_h < 900).
946+ /* v0.4.20 — focused cards layout for small screens (g_h < 900).
947947 Instead of one full-width row per test (14 rows × ~40 px = 560 px,
948948 which on a 1024×768 screen eats 70% of vertical space and clips the
949949 core panel + footer), we draw:
@@ -1013,7 +1013,7 @@ static void compute_layout(UINTN n_tests) {
10131013 g_card_w = g_inner;
10141014 g_card_row_h = g_compact ? g_char_h : (g_char_h + 16);
10151015
1016- /* v0.4.19 — focused layout on small screens.
1016+ /* v0.4.20 — focused layout on small screens.
10171017 On g_h<900 the per-test card list eats 60-70% of vertical space
10181018 and clips the core panel / footer (YgrecK field report on 1024×768
10191019 Radeon HD 4350). Replace with: 1-row strip of all test dots +
@@ -1227,9 +1227,9 @@ static void render_header(UINT64 elapsed_ms, UINTN done, UINTN total) {
12271227 UINTN cols = g_text_cols;
12281228 if (cols >= 110) {
12291229 SPrint(buf, sizeof(buf),
1230- T(L" MEMFORGE v0.4.19 | %ld.%ld ГБ RAM | %s "
1230+ T(L" MEMFORGE v0.4.20 | %ld.%ld ГБ RAM | %s "
12311231 L"| %s | %02d:%02d | ост ~%02d:%02d | Тесты %d/%d",
1232- L" MEMFORGE v0.4.19 | %ld.%ld GB RAM | %s "
1232+ L" MEMFORGE v0.4.20 | %ld.%ld GB RAM | %s "
12331233 L"| %s | %02d:%02d | ETA ~%02d:%02d | Tests %d/%d"),
12341234 ram_gb_x10 / 10, ram_gb_x10 % 10,
12351235 pass_tag,
@@ -1239,25 +1239,25 @@ static void render_header(UINT64 elapsed_ms, UINTN done, UINTN total) {
12391239 (UINT32)done, (UINT32)total);
12401240 } else if (cols >= 90) {
12411241 SPrint(buf, sizeof(buf),
1242- T(L" MEMFORGE v0.4.19 | %ld.%ld ГБ RAM | %s | %s | %02d:%02d | ост ~%02d:%02d",
1243- L" MEMFORGE v0.4.19 | %ld.%ld GB RAM | %s | %s | %02d:%02d | ETA ~%02d:%02d"),
1242+ T(L" MEMFORGE v0.4.20 | %ld.%ld ГБ RAM | %s | %s | %02d:%02d | ост ~%02d:%02d",
1243+ L" MEMFORGE v0.4.20 | %ld.%ld GB RAM | %s | %s | %02d:%02d | ETA ~%02d:%02d"),
12441244 ram_gb_x10 / 10, ram_gb_x10 % 10,
12451245 pass_tag,
12461246 err_tag,
12471247 secs / 60, secs % 60,
12481248 eta_secs / 60, eta_secs % 60);
12491249 } else if (cols >= 70) {
12501250 SPrint(buf, sizeof(buf),
1251- T(L" MEMFORGE v0.4.19 | %ld.%ld ГБ RAM | %s | %s | %02d:%02d",
1252- L" MEMFORGE v0.4.19 | %ld.%ld GB RAM | %s | %s | %02d:%02d"),
1251+ T(L" MEMFORGE v0.4.20 | %ld.%ld ГБ RAM | %s | %s | %02d:%02d",
1252+ L" MEMFORGE v0.4.20 | %ld.%ld GB RAM | %s | %s | %02d:%02d"),
12531253 ram_gb_x10 / 10, ram_gb_x10 % 10,
12541254 pass_tag,
12551255 err_tag,
12561256 secs / 60, secs % 60);
12571257 } else {
12581258 SPrint(buf, sizeof(buf),
1259- T(L" MEMFORGE v0.4.19 | %s | %s | %02d:%02d",
1260- L" MEMFORGE v0.4.19 | %s | %s | %02d:%02d"),
1259+ T(L" MEMFORGE v0.4.20 | %s | %s | %02d:%02d",
1260+ L" MEMFORGE v0.4.20 | %s | %s | %02d:%02d"),
12611261 pass_tag,
12621262 err_tag,
12631263 secs / 60, secs % 60);
@@ -1783,7 +1783,7 @@ static int dominant_dimm_idx(void) {
17831783 return best;
17841784}
17851785
1786- /* v0.4.19 — detect dual-channel interleave ambiguity.
1786+ /* v0.4.20 — detect dual-channel interleave ambiguity.
17871787 On consumer desktops with dual/quad-channel memory, the iMC interleaves
17881788 addresses between channels at 64-byte (cache-line) granularity. A
17891789 SINGLE bad chip on one stick produces errors that, when mapped through
@@ -1792,7 +1792,7 @@ static int dominant_dimm_idx(void) {
17921792
17931793 Field report from a Habr user (Netac DDR4 kit): same stuck bit
17941794 D[53] was reported 24 times, distributed as A2 (8) + B2 (11) + ? (5).
1795- Pre-v0.4.19 verdict confidently said "REPLACE: DDR4-B2 (HIGH)" — but
1795+ Pre-v0.4.20 verdict confidently said "REPLACE: DDR4-B2 (HIGH)" — but
17961796 physically it's likely ONE bad chip on one of A2/B2, NOT both.
17971797
17981798 This helper returns the list of DIMM indices that each hold >=25% of
@@ -4774,15 +4774,15 @@ static void amd_thermal_probe(void) {
47744774}
47754775
47764776static UINT32 amd_thermal_sample(void) {
4777- /* v0.4.19 — correct decode per Linux k10temp / FreeBSD amdtemp.c:
4777+ /* v0.4.20 — correct decode per Linux k10temp / FreeBSD amdtemp.c:
47784778 SMN 0x59800 (SMU_THM_TCON_CUR_TMP)
47794779 bits [31:21] raw temperature value (11 bits, mask 0x7FF)
47804780 bit 19 TempRangeSel — when SET, scale is -49°C..+206°C
47814781 (subtract 49°C from the raw decode); when CLEAR
47824782 scale is 0..225°C (no offset).
47834783 temp_c = (raw * 0.125) - (range_sel ? 49 : 0)
47844784
4785- Pre-v0.4.19 code was missing both the 0x7FF mask AND the bit-19
4785+ Pre-v0.4.20 code was missing both the 0x7FF mask AND the bit-19
47864786 range adjustment, which inflated readings by ~49°C on Ryzen SKUs
47874787 that report on the -49..206 scale (most Renoir/Cezanne/Zen3+
47884788 desktop parts). Field report on Ryzen 5 4500 showed Tctl=93°C at
@@ -6402,6 +6402,26 @@ static test_def_t g_tests[] = {
64026402};
64036403#define N_TESTS (sizeof(g_tests) / sizeof(g_tests[0]))
64046404
6405+ /* v0.4.20 — map a kernel enum (KER_*) to its position in g_tests[].
6406+ CRITICAL: do NOT index g_tests[] directly by a kernel_id_t value.
6407+ The enum values do not match array positions (e.g., KER_AVX2_SUSTAINED
6408+ = 12 maps to position 0 in g_tests because AVX2 Sustained is the
6409+ first row of the table, while position 12 happens to be L3 Cache
6410+ Stress). Before this helper existed, an AVX2 error was displayed in
6411+ the verdict, JSON and log as "T=L3 Cache Stress" — total
6412+ misattribution that completely broke field triage. Always use this
6413+ helper for kernel→display-name lookup. */
6414+ static int tests_idx_for_kernel(kernel_id_t k) {
6415+ for (UINTN i = 0; i < N_TESTS; i++) {
6416+ if (g_tests[i].k == k) return (int)i;
6417+ }
6418+ return -1;
6419+ }
6420+ static CHAR16 *name_for_kernel(kernel_id_t k) {
6421+ int ti = tests_idx_for_kernel(k);
6422+ return (ti >= 0) ? g_tests[ti].name : L"(unknown kernel)";
6423+ }
6424+
64056425/* Activity row painter — invoked by render_header() on every tick to show
64066426 what test is running, how long it's been on this test, and (critically)
64076427 a per-second countdown when Bit Fade is in its silent wait phase. Lives
@@ -6534,7 +6554,7 @@ typedef struct {
65346554} card_info_t;
65356555static card_info_t g_cards[N_TESTS];
65366556
6537- /* v0.4.19 — Forward decls for focused-mode helpers (defined below
6557+ /* v0.4.20 — Forward decls for focused-mode helpers (defined below
65386558 card_paint so they can share the same color-lookup logic). */
65396559static void card_paint_full(UINTN i);
65406560static void card_strip_paint(UINTN i);
@@ -6648,7 +6668,7 @@ static void card_paint_full(UINTN i) {
66486668 }
66496669}
66506670
6651- /* ---------- Focused-mode card painters (v0.4.19 ) ---------- */
6671+ /* ---------- Focused-mode card painters (v0.4.20 ) ---------- */
66526672
66536673/* Paint the small status dot for test i in the top strip. The strip is
66546674 one row tall and shows N evenly-spaced dots, one per test. The dot
@@ -7982,7 +8002,7 @@ static void render_simple_verdict(UINT64 total_ms) {
79828002 }
79838003 } else { /* VERDICT_FAIL */
79848004 int didx = dominant_dimm_idx();
7985- /* v0.4.19 — interleave detection.
8005+ /* v0.4.20 — interleave detection.
79868006 If errors are distributed across 2+ DIMMs (typical dual-channel
79878007 interleave hiding a single bad chip behind two DIMM labels),
79888008 we MUST NOT confidently name one DIMM. Verdict instead tells
@@ -8205,8 +8225,8 @@ static void render_summary(UINT64 total_ms) {
82058225 UINTN hrow = (g_hdr_h / 2 - g_char_h / 2) / g_char_h;
82068226 CHAR16 buf[200];
82078227 SPrint(buf, sizeof(buf),
8208- T(L" MEMFORGE v0.4.19 ИТОГИ | %d сек | Ядра %d/%d",
8209- L" MEMFORGE v0.4.19 SUMMARY | %d sec | Cores %d/%d"),
8228+ T(L" MEMFORGE v0.4.20 ИТОГИ | %d сек | Ядра %d/%d",
8229+ L" MEMFORGE v0.4.20 SUMMARY | %d sec | Cores %d/%d"),
82108230 (UINT32)(total_ms / 1000),
82118231 (UINT32)g_n_enabled, (UINT32)g_n_cores);
82128232 say_at_rc(0, hrow, buf);
@@ -8288,18 +8308,23 @@ static void render_summary(UINT64 total_ms) {
82888308 CHAR16 chip[64] = L"";
82898309 if (didx >= 0)
82908310 chip_label_for_bit((UINT32)didx, bp, chip, 64);
8311+ /* v0.4.20 — use SMBIOS Type 17 locator string ("DDR4-B2")
8312+ instead of array-index-based "DIMM%d" which had nothing
8313+ to do with the physical slot label the user sees. */
8314+ CHAR8 *loc = (didx >= 0 && g_dimms[didx].locator[0])
8315+ ? g_dimms[didx].locator : (CHAR8*)"?";
82918316 if (didx >= 0 && chip[0]) {
82928317 /* Full info: DIMM + exact chip designator */
82938318 SPrint(sb, sizeof(sb),
8294- T(L"⚠ Застрял бит D[%d] → DIMM%d , %s: %d ошибок",
8295- L"⚠ Stuck bit D[%d] → DIMM%d , %s: %d errors"),
8296- bp, didx + 1 , chip, stuck_n);
8319+ T(L"⚠ Застрял бит D[%d] → %a , %s: %d ошибок",
8320+ L"⚠ Stuck bit D[%d] → %a , %s: %d errors"),
8321+ bp, loc , chip, stuck_n);
82978322 } else if (didx >= 0) {
82988323 /* DIMM known, exact chip not — say so plainly */
82998324 SPrint(sb, sizeof(sb),
8300- T(L"⚠ Застрял бит D[%d] → DIMM%d (точный чип не определён по SPD): %d ошибок",
8301- L"⚠ Stuck bit D[%d] → DIMM%d (exact chip unknown per SPD): %d errors"),
8302- bp, didx + 1 , stuck_n);
8325+ T(L"⚠ Застрял бит D[%d] → %a (точный чип не определён по SPD): %d ошибок",
8326+ L"⚠ Stuck bit D[%d] → %a (exact chip unknown per SPD): %d errors"),
8327+ bp, loc , stuck_n);
83038328 } else {
83048329 SPrint(sb, sizeof(sb),
83058330 T(L"⚠ Застрял бит D[%d] (планку определить не удалось): %d ошибок",
@@ -8839,7 +8864,7 @@ static void write_json_report(UINT64 total_ms) {
88398864 L"\"at\":{\"t_ms\":%ld,\"temp_c\":%d,\"pkg_w\":%d,"
88408865 L"\"throttle\":%d,\"vid_mv\":%d}}",
88418866 (i > 0) ? "," : "",
8842- g_tests[ r->test].name , r->core + 1,
8867+ name_for_kernel( r->test) , r->core + 1,
88438868 r->phys_addr, r->expected, r->actual, r->xor_mask, r->pass_idx,
88448869 dimm_lab,
88458870 cc.bank_group, cc.bank, cc.row, cc.column,
@@ -9993,7 +10018,7 @@ EFI_STATUS efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) {
999310018 }
999410019 }
999510020
9996- log_line(L"=== MemForge2 v0.4.19 init ===");
10021+ log_line(L"=== MemForge2 v0.4.20 init ===");
999710022 log_line(L"[WATCHDOG] UEFI 5-min watchdog disabled at app entry");
999810023 /* Show splash IMMEDIATELY so the user sees the program is alive while
999910024 INI parsing, SMBus probes and SMBIOS walk happen. Without this, the
@@ -10038,7 +10063,7 @@ EFI_STATUS efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) {
1003810063 if (uefi_call_wrapper(g_gop->QueryMode, 4,
1003910064 g_gop, m, &info_sz, &info) != EFI_SUCCESS)
1004010065 continue;
10041- /* v0.4.19 — also log PixelFormat and PixelsPerScanLine
10066+ /* v0.4.20 — also log PixelFormat and PixelsPerScanLine
1004210067 so we can see if a card (e.g. old Radeon HD 4350) only
1004310068 offers BltOnly modes (PixelFormat=3) that prevent
1004410069 direct-fb rendering. */
@@ -10053,7 +10078,7 @@ EFI_STATUS efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) {
1005310078 log_line(L"[GFX] NO GOP PROTOCOL FOUND — firmware has no UEFI graphics. "
1005410079 L"Falling back to 800x600 default. UI will not render correctly.");
1005510080 }
10056- /* v0.4.19 — MP Services Protocol diagnostic. Without this log it
10081+ /* v0.4.20 — MP Services Protocol diagnostic. Without this log it
1005710082 was impossible to tell from a field report whether multi-core
1005810083 dispatch failed (LocateProtocol error / GetNumberOfProcessors
1005910084 returned 1) or the test was simply running on a single-core
@@ -10668,7 +10693,28 @@ EFI_STATUS efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) {
1066810693 per-test results to survive that. Cheap (1× per test, not
1066910694 1× per log line). */
1067010695 flush_log_now();
10671- g_summary[i] = r;
10696+ /* v0.4.20 — ACCUMULATE across marathon passes, do not OVERWRITE.
10697+ Pre-v0.4.20 the line was `g_summary[i] = r;` which kept only
10698+ the LAST pass's per-test result. On a 16-hour marathon with
10699+ an intermittent error rate of 1 per pass, that meant the
10700+ final summary table showed "errors: 0" because the most
10701+ recent pass happened to be clean — completely hiding the
10702+ 24 cumulative errors found across earlier passes. Also fed
10703+ into JSON `summary.total_errors: 0` and `verdict: "PASS"`,
10704+ which then misled any automated post-test analyzer. */
10705+ if (g_run_passes_done == 0) {
10706+ /* First pass: initialize with this pass's result */
10707+ g_summary[i] = r;
10708+ } else {
10709+ /* Subsequent passes: accumulate counts; status is "sticky":
10710+ FAIL wins over PASS wins over SKIP. */
10711+ g_summary[i].errors += r.errors;
10712+ g_summary[i].bytes += r.bytes;
10713+ g_summary[i].time_ms += r.time_ms;
10714+ if (r.status == 2) g_summary[i].status = 2; /* FAIL is sticky */
10715+ else if (g_summary[i].status == 0 && r.status == 1)
10716+ g_summary[i].status = 1; /* upgrade SKIP→PASS */
10717+ }
1067210718 /* Bump cumulative error counter shown in the live header. */
1067310719 g_run_total_errors += r.errors;
1067410720
@@ -10763,7 +10809,7 @@ EFI_STATUS efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) {
1076310809 SPrint(lb, sizeof(lb),
1076410810 L"[ERR] T=%s Core=%d Addr=0x%lx Exp=0x%lx Act=0x%lx XOR=0x%lx DIMM=%s "
1076510811 L"~bg=%d ~bank=%d ~row=0x%lx ~col=0x%x",
10766- g_tests[ r->test].name , r->core + 1,
10812+ name_for_kernel( r->test) , r->core + 1,
1076710813 r->phys_addr, r->expected, r->actual, r->xor_mask,
1076810814 dimm_lab,
1076910815 coords.bank_group, coords.bank, coords.row, coords.column);
0 commit comments