Skip to content

Commit ff98d6c

Browse files
zahclaude
andcommitted
M9.R.42.1: characterise the sgdisk + udev race via REPRO_DISK_DIAG
PHASE A of M9.R.42 — per the M9.R.41.13 close-out + user's standing "diagnose before speculating on fixes" principle: instrument disk_apply.nim with a kernel-state snapshot hook before proposing ANY fix shape. When REPRO_DISK_DIAG=<path> is set, applyDiskLayout appends a labelled snapshot block to <path> around every sgdisk + partprobe call. Each block captures: - /proc/partitions - ls -la /dev/<diskBase>* (the partition device nodes) - ls /sys/class/block - ls /sys/block/<diskBase>/ + ../size - ls /dev/disk/by-partuuid - udevadm settle --timeout=10 + its exit code The snapshots fire at six labelled boundaries per disk: before-table-<diskName> / after-table-<diskName> before-sgdisk-n-<partName> / after-sgdisk-n-<partName> (per part) before-partprobe-<diskName> / after-partprobe-<diskName> This gives the M9.R.41.13 evidence audit the time-series of kernel state needed to determine whether sgdisk's exit-4 false alarm is: (a) udev hasn't drained when our validation runs → fix: udevadm settle (b) sgdisk genuinely buggy in this kernel/virtio combo → fix: sfdisk swap (c) intermittent BLKRRPART race → fix: bounded retry 3 unit tests in t_m9r42_1_disk_diag_hook.nim pin the behaviour: 1. diag OFF (env unset) → no file IO, hot path stays clean 2. snapshotKernelState renders the label/device header + each "$ <cmd>" probe line 3. diag ON wires through applyDiskLayout: every labelled before/after pair lands in the diag file (verified against a 2-partition fixture) Existing t_m9r22b_1 + t_m9r22b_3 tests still pass. This is purely additive: no behavior change when REPRO_DISK_DIAG is unset. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 9a5ec1b commit ff98d6c

2 files changed

Lines changed: 229 additions & 1 deletion

File tree

libs/repro_profile/src/repro_profile/disk_apply.nim

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,16 @@
2020
## All operations go through ``./disk_tools.nim`` so the apply path is
2121
## uniformly dry-run-able (``REPRO_DISK_DRY_RUN=1``) and uniformly
2222
## errors via ``DiskToolError``.
23+
##
24+
## M9.R.42.1: when ``REPRO_DISK_DIAG=<path>`` is set, a kernel-state
25+
## snapshot is appended to ``<path>`` around each sgdisk + partprobe
26+
## call so the udev/devtmpfs/sgdisk race the M9.R.41 close-out
27+
## documented can be characterised end-to-end on the live ISO. The
28+
## snapshot includes ``/proc/partitions``, ``ls /dev/<diskBase>*``,
29+
## ``ls /sys/class/block``, and an ``udevadm settle --timeout=10`` exit
30+
## code.
2331

24-
import std/[algorithm, options, os, osproc, sequtils, strutils, tables]
32+
import std/[algorithm, options, os, osproc, sequtils, strutils, tables, times]
2533

2634
import ./types
2735
import ./disk_tools
@@ -61,6 +69,82 @@ proc newApplyContext*(layout: DiskLayout;
6169
proc recordOperation(ctx: ApplyContext; ex: ExecResult) =
6270
ctx.result.operations.add ex
6371

72+
# ---------------------------------------------------------------------
73+
# M9.R.42.1 — kernel-state diagnostic hook around sgdisk + partprobe.
74+
# ---------------------------------------------------------------------
75+
76+
proc diagPath*(): string {.inline.} =
77+
## Returns the value of ``REPRO_DISK_DIAG`` or an empty string when
78+
## diagnostics are off. The env-var is read on every call so the
79+
## installer / test harness can flip it at runtime without restarting.
80+
getEnv("REPRO_DISK_DIAG")
81+
82+
proc diagAppend(text: string) =
83+
## Append ``text`` to the diag file. Best-effort: if writing fails
84+
## (e.g. the path is unwritable) we silently swallow — the diag
85+
## channel must never crash the apply driver, only inform.
86+
let p = diagPath()
87+
if p.len == 0: return
88+
try:
89+
let f = open(p, fmAppend)
90+
defer: f.close()
91+
f.write(text)
92+
except CatchableError:
93+
discard
94+
95+
proc diagCmd(label, cmd: string): string =
96+
## Run ``cmd`` via ``execCmdEx``, capture stdout+stderr+exit, and
97+
## render a labelled block for the diag file. Never raises.
98+
result = " $ " & cmd & "\n"
99+
try:
100+
let pair = execCmdEx(cmd)
101+
result.add " [exit=" & $pair.exitCode & "]\n"
102+
if pair.output.len > 0:
103+
var lineCount = 0
104+
for ln in pair.output.splitLines():
105+
if lineCount >= 40:
106+
result.add " ... (truncated)\n"
107+
break
108+
result.add " " & ln & "\n"
109+
inc lineCount
110+
except CatchableError as e:
111+
result.add " [exec-failed: " & e.msg & "]\n"
112+
113+
proc snapshotKernelState*(label, device: string): string =
114+
## Render a labelled snapshot of /proc/partitions, /sys/class/block,
115+
## /dev/<diskBase>*, and the most recent udev/kobject events visible
116+
## to userspace. Returns the rendered block as a string. Called
117+
## twice around each sgdisk invocation (before + after) when
118+
## ``REPRO_DISK_DIAG`` is set so the time-series of kernel state can
119+
## be inspected post-mortem.
120+
let ts = $now()
121+
result = "\n=== M9.R.42.1 SNAPSHOT label=" & label & " device=" &
122+
device & " ts=" & ts & " ===\n"
123+
let base = extractFilename(device)
124+
result.add diagCmd("cat /proc/partitions",
125+
"cat /proc/partitions 2>&1")
126+
result.add diagCmd("ls -la /dev/" & base & "*",
127+
"ls -la /dev/" & base & "* 2>&1")
128+
result.add diagCmd("ls /sys/class/block",
129+
"ls /sys/class/block 2>&1")
130+
result.add diagCmd("ls /sys/block/" & base & "/",
131+
"ls /sys/block/" & base & "/ 2>&1")
132+
result.add diagCmd("cat /sys/block/" & base & "/size",
133+
"cat /sys/block/" & base & "/size 2>&1")
134+
result.add diagCmd("ls /dev/disk/by-partuuid",
135+
"ls /dev/disk/by-partuuid 2>&1")
136+
# Force udev to drain its queue and report how long it took. If udev
137+
# is the source of the /dev/<base>1 absence this exit will be non-
138+
# zero or take a long time.
139+
result.add diagCmd("udevadm settle --timeout=10 + status",
140+
"udevadm settle --timeout=10 2>&1; echo settle-exit=$?")
141+
142+
proc diagSnapshot*(label, device: string) =
143+
## Emit a labelled snapshot to ``REPRO_DISK_DIAG`` (if set). No-op
144+
## when diag is off — the apply-driver hot path stays clean.
145+
if diagPath().len == 0: return
146+
diagAppend(snapshotKernelState(label, device))
147+
64148
# Forward declarations for the content-walker so applyDiskLayout can
65149
# call applyContentNode and applyContentNode can call its variants.
66150
proc applyContentNode*(ctx: ApplyContext; device, ctxKey: string;
@@ -124,6 +208,7 @@ proc applyDiskLayout*(layout: DiskLayout;
124208
# (which sgdisk supports via its MBR mode).
125209
for diskName, d in ctx.layout.disks:
126210
let tableKind = if d.`type`.len == 0: "gpt" else: d.`type`
211+
diagSnapshot("before-table-" & diskName, d.device)
127212
if tableKind == "gpt":
128213
# `sgdisk -o` zaps any existing GPT and creates a fresh empty
129214
# GPT in one operation, which avoids the parted-then-sgdisk
@@ -133,6 +218,7 @@ proc applyDiskLayout*(layout: DiskLayout;
133218
else:
134219
# MBR path: parted is the right tool for the label.
135220
ctx.recordOperation(partedMklabel(d.device, tableKind))
221+
diagSnapshot("after-table-" & diskName, d.device)
136222
var num = 1
137223
for pName, p in d.partitions:
138224
let gptType = gptTypeCodeFor(p.`type`)
@@ -148,16 +234,20 @@ proc applyDiskLayout*(layout: DiskLayout;
148234
# aligned sector); subsequent partitions start at "0"
149235
# which is sgdisk's "first available sector".
150236
"0"
237+
diagSnapshot("before-sgdisk-n-" & pName, d.device)
151238
ctx.recordOperation(sgdiskCreatePartition(d.device, num,
152239
startArg, sizeArg, gptType, pName))
240+
diagSnapshot("after-sgdisk-n-" & pName, d.device)
153241
if p.bootable:
154242
ctx.recordOperation(partedSetBootable(d.device, num, true))
155243
inc num
156244
# After writing partitions, ask the kernel to re-read the table
157245
# so /dev/<disk>pN show up for the mkfs / cryptsetup steps.
158246
if findExe("partprobe").len > 0:
247+
diagSnapshot("before-partprobe-" & diskName, d.device)
159248
ctx.recordOperation(execTool("partprobe",
160249
@["partprobe", d.device]))
250+
diagSnapshot("after-partprobe-" & diskName, d.device)
161251

162252
# Step 5 + 6 + 7 + 8: walk each partition's content recursively.
163253
for diskName, d in ctx.layout.disks:
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
## M9.R.42.1 — pin the ``REPRO_DISK_DIAG`` kernel-state snapshot hook.
2+
##
3+
## Spec: ``recipes/reproos-iso/run-evidence/m9r41_complete.txt`` PHASE G
4+
## (the Phase 2 race characterisation handoff).
5+
##
6+
## The M9.R.41.13 close-out documented an sgdisk false-alarm + a
7+
## /dev/vda1 absence race on the M9.R.41 base-rootfs (Debian Trixie
8+
## kernel 6.12.86 + virtio-blk + systemd-udev 257.13). M9.R.41.8-12
9+
## attempted 5 pragmatic workarounds (partprobe + sync, alignment,
10+
## explicit start sector, tolerate exit 4, retry the probe) and all
11+
## five were REVERTED because none actually closed the gap.
12+
##
13+
## M9.R.42.1 starts with characterisation BEFORE proposing a fix:
14+
## ``REPRO_DISK_DIAG=<path>`` causes ``disk_apply.nim`` to append a
15+
## kernel-state snapshot to ``<path>`` before AND after each sgdisk +
16+
## partprobe call. The snapshot captures /proc/partitions, /dev/<base>*,
17+
## /sys/block, /dev/disk/by-partuuid, and an ``udevadm settle`` exit
18+
## so the time-series of state can be inspected post-mortem inside
19+
## the launcher diag tarball.
20+
##
21+
## This test pins:
22+
## 1. The diag hook is OFF when REPRO_DISK_DIAG is unset (the
23+
## hot path stays clean — no file IO, no diag block emitted).
24+
## 2. The diag hook fires when REPRO_DISK_DIAG is set: the diag
25+
## file is created + each labelled snapshot block lands.
26+
## 3. The ``snapshotKernelState`` renderer emits the expected
27+
## label/device header + the ``$ <cmd>`` form per probe.
28+
29+
import std/[os, strutils, tables, unittest]
30+
31+
import repro_profile
32+
import repro_cli_support/disk as cli_disk
33+
34+
const TmpRoot = "build/m9r42_1_tmp"
35+
36+
proc resetDir(sub: string): string =
37+
let dir = TmpRoot / sub
38+
if dirExists(dir): removeDir(dir)
39+
createDir(dir)
40+
dir
41+
42+
const FixtureSource = """
43+
import repro_profile
44+
45+
hardware "01M9R42-DIAG":
46+
cpu:
47+
arch: "x86_64"
48+
disko:
49+
disks:
50+
"main":
51+
device: "/dev/loop99"
52+
table: gpt
53+
partitions:
54+
"esp":
55+
kind: esp
56+
size: "512M"
57+
bootable: true
58+
content:
59+
filesystem:
60+
format: "vfat"
61+
mountpoint: "/boot"
62+
"root":
63+
kind: linux
64+
size: "100%"
65+
content:
66+
filesystem:
67+
format: "ext4"
68+
mountpoint: "/"
69+
"""
70+
71+
suite "M9.R.42.1: disk-apply kernel-state diag hook":
72+
73+
setup:
74+
putEnv("REPRO_DISK_DRY_RUN", "1")
75+
76+
teardown:
77+
delEnv("REPRO_DISK_DRY_RUN")
78+
delEnv("REPRO_DISK_DIAG")
79+
80+
test "Test#1: diag OFF when REPRO_DISK_DIAG unset (no file IO)":
81+
delEnv("REPRO_DISK_DIAG")
82+
check diagPath() == ""
83+
# Calling diagSnapshot when off must not create any file.
84+
let dir = resetDir("test1")
85+
let probePath = dir / "should-not-exist.diag"
86+
# Tactically point REPRO_DISK_DIAG at a path and unset it again to
87+
# prove a stale env var doesn't leak.
88+
putEnv("REPRO_DISK_DIAG", probePath)
89+
delEnv("REPRO_DISK_DIAG")
90+
diagSnapshot("test1-label", "/dev/loop99")
91+
check not fileExists(probePath)
92+
93+
test "Test#2: snapshotKernelState renders label + device header":
94+
# Pure-render test: the snapshot block is a string we can
95+
# introspect even without a real /proc on Windows.
96+
let s = snapshotKernelState("before-sgdisk-n-esp", "/dev/loop99")
97+
check s.contains(
98+
"=== M9.R.42.1 SNAPSHOT label=before-sgdisk-n-esp " &
99+
"device=/dev/loop99 ts=")
100+
# Each probe renders as a "$ <cmd>" line.
101+
check s.contains("$ cat /proc/partitions 2>&1")
102+
check s.contains("$ ls -la /dev/loop99* 2>&1")
103+
check s.contains("$ ls /sys/class/block 2>&1")
104+
check s.contains("$ udevadm settle --timeout=10 2>&1")
105+
106+
test "Test#3: diag ON wires through applyDiskLayout":
107+
let dir = resetDir("test3")
108+
let diagFile = dir / "diag.log"
109+
putEnv("REPRO_DISK_DIAG", diagFile)
110+
check diagPath() == diagFile
111+
# Build a layout fixture via the source path the CLI uses.
112+
let src = dir / "hardware.nim"
113+
writeFile(src, FixtureSource)
114+
# Drive the apply via the CLI under DRY_RUN so no real subprocess
115+
# spawns, but the diag-hook calls still fire around each sgdisk +
116+
# partprobe step.
117+
let rc = runDiskCommand(@["apply", src, "--confirm"])
118+
check rc == 0
119+
# The diag file must exist and carry at least the snapshot block
120+
# labels we wired in (one BEFORE + one AFTER for the gpt table,
121+
# and one BEFORE + one AFTER for each partition + partprobe).
122+
check fileExists(diagFile)
123+
let body = readFile(diagFile)
124+
check body.contains("label=before-table-main")
125+
check body.contains("label=after-table-main")
126+
check body.contains("label=before-sgdisk-n-esp")
127+
check body.contains("label=after-sgdisk-n-esp")
128+
check body.contains("label=before-sgdisk-n-root")
129+
check body.contains("label=after-sgdisk-n-root")
130+
# partprobe-around snapshots fire only when partprobe is in PATH;
131+
# on the Windows test host findExe returns "" so we expect either
132+
# both partprobe-snapshot blocks or neither — gate on
133+
# before+after consistency.
134+
let hasBeforePartprobe = body.contains(
135+
"label=before-partprobe-main")
136+
let hasAfterPartprobe = body.contains(
137+
"label=after-partprobe-main")
138+
check hasBeforePartprobe == hasAfterPartprobe

0 commit comments

Comments
 (0)