Skip to content

Commit 031b8ce

Browse files
committed
test(hw): add driver-probe-error check to dmesg verification
``assert_no_kernel_faults`` only catches hard kernel faults (panic, oops, BUG, SError). A driver can silently fail to probe — e.g. a bad DT overlay apply, a phandle that doesn't resolve, a clock provider that never appears — and the test reaches the IIO-device assertion with a missing device, producing a confusing "not found" message that obscures the real root cause. Add ``assert_no_probe_errors(dmesg_txt)`` in ``hw_helpers.py`` that scans for the canonical probe-failure signatures: - ``probe of <dev> failed with error`` (classic ``device_probe`` return-errno message) - ``Error applying overlay`` / ``failed to apply overlay`` (DT overlay apply failure) - ``Error resolving`` (phandle resolution failure) Reuses ``_DMESG_BENIGN_SUBSTRINGS`` so known-benign probe chatter (firmware loads, ``-EPROBE_DEFER`` retries, ZynqMP early-boot warnings) doesn't fire. ``-EPROBE_DEFER`` was already allowlisted; this commit also adds the raw ``error -517`` string for callers that log the numeric errno instead of the symbolic name. Wire the new check in next to every existing ``assert_no_kernel_faults`` site: - ``test_ad9081_zcu102_xsa_hw.py`` - ``test_ad9081_zcu102_system_hw.py`` - ``test_adrv9009_zcu102_hw.py`` (boot dmesg + post-profile-reload dmesg in the outer loop) - ``test_adrv9371_zc706_hw.py`` Also restructure ``test_fmcdaq3_vcu118_hw.py`` so it actually exercises the new check: switch the fixture from ``target`` to ``board`` (gets teardown power-off for free — VCU118 was previously left powered on) and swap the bespoke ``grep -Ei fail|error`` snippet for ``collect_dmesg`` + the two assertions, matching the four other hw tests.
1 parent 81ea893 commit 031b8ce

6 files changed

Lines changed: 74 additions & 23 deletions

test/hw/hw_helpers.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,11 @@ def require_hw_prereqs() -> None:
294294
"failed to load firmware",
295295
# Wifi/USB hotplug noise seen on some Kuiper releases:
296296
"cfg80211: failed to load",
297-
# Harmless driver-level probe deferrals re-tried later:
297+
# Harmless driver-level probe deferrals re-tried later. The kernel
298+
# surfaces these both symbolically (``-EPROBE_DEFER``) and as the
299+
# raw errno (``-517``) depending on the caller.
298300
"EPROBE_DEFER",
301+
"error -517",
299302
# ZynqMP early-boot WARNING: the kernel logs a Call trace through
300303
# gic_of_init / of_irq_init because the RPU-bus interrupt-controller
301304
# cannot be initialized from Linux on ZynqMP. Always benign; the
@@ -346,6 +349,41 @@ def assert_no_kernel_faults(dmesg_txt: str) -> None:
346349
assert not bad, "Kernel fault(s) detected in dmesg:\n" + "\n".join(bad)
347350

348351

352+
# Driver-probe-failure patterns in dmesg. These appear when a probe()
353+
# callback returns a negative errno other than -EPROBE_DEFER (the defer
354+
# path is the normal retry-until-resolved dance and is allowlisted via
355+
# _DMESG_BENIGN_SUBSTRINGS above). Regex, not plain substrings —
356+
# ``probe of <dev> failed with error <N>`` is the canonical kernel
357+
# message. Overlay-apply errors fall in the same bucket because a
358+
# failed overlay almost always cascades into silent probe misses.
359+
_DMESG_PROBE_ERROR_PATTERNS = (
360+
r"probe of \S+ failed with error",
361+
r"Error applying overlay",
362+
r"failed to apply overlay",
363+
r"Error resolving",
364+
)
365+
366+
367+
def assert_no_probe_errors(dmesg_txt: str) -> None:
368+
"""Fail the calling test if *dmesg_txt* contains driver-probe errors.
369+
370+
Complements :func:`assert_no_kernel_faults` — a driver can fail to
371+
probe without ever producing a kernel fault (e.g. a DT overlay
372+
apply error, a regulator not showing up, a phandle mismatch).
373+
Reuses :data:`_DMESG_BENIGN_SUBSTRINGS` so known-benign probe
374+
chatter (firmware loads, ``-EPROBE_DEFER`` retries, ZynqMP early-
375+
boot warnings) does not fire.
376+
"""
377+
compiled = [_re.compile(p) for p in _DMESG_PROBE_ERROR_PATTERNS]
378+
bad: list[str] = []
379+
for line in dmesg_txt.splitlines():
380+
if any(s in line for s in _DMESG_BENIGN_SUBSTRINGS):
381+
continue
382+
if any(rx.search(line) for rx in compiled):
383+
bad.append(line)
384+
assert not bad, "Driver probe errors detected in dmesg:\n" + "\n".join(bad)
385+
386+
349387
def shell_out(shell, cmd: str) -> str:
350388
"""Run *cmd* via an ``ADIShellDriver`` and return the output as a string.
351389

test/hw/test_ad9081_zcu102_system_hw.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
acquire_xsa,
4747
assert_jesd_links_data,
4848
assert_no_kernel_faults,
49+
assert_no_probe_errors,
4950
collect_dmesg,
5051
compile_dts_to_dtb,
5152
deploy_and_boot,
@@ -266,6 +267,7 @@ def test_ad9081_zcu102_system_hw(board, built_kernel_image_zynqmp, tmp_path):
266267

267268
# --- 9. Verify: kernel probe + IIO context + JESD DATA state ---
268269
assert_no_kernel_faults(dmesg_txt)
270+
assert_no_probe_errors(dmesg_txt)
269271
assert "AD9081 Rev." in dmesg_txt or "probed ADC AD9081" in dmesg_txt, (
270272
"AD9081 probe signature was not found in kernel dmesg output"
271273
)

test/hw/test_ad9081_zcu102_xsa_hw.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
acquire_xsa,
3636
assert_jesd_links_data,
3737
assert_no_kernel_faults,
38+
assert_no_probe_errors,
3839
collect_dmesg,
3940
compile_dts_to_dtb,
4041
deploy_and_boot,
@@ -184,6 +185,7 @@ def test_ad9081_zcu102_xsa_hw(board, built_kernel_image_zynqmp, tmp_path):
184185

185186
# --- 7. Verify: kernel probe + IIO context + JESD DATA state ---
186187
assert_no_kernel_faults(dmesg_txt)
188+
assert_no_probe_errors(dmesg_txt)
187189
assert "AD9081 Rev." in dmesg_txt or "probed ADC AD9081" in dmesg_txt, (
188190
"AD9081 probe signature was not found in kernel dmesg output"
189191
)

test/hw/test_adrv9009_zcu102_hw.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
acquire_xsa,
3939
assert_jesd_links_data,
4040
assert_no_kernel_faults,
41+
assert_no_probe_errors,
4142
collect_dmesg,
4243
compile_dts_to_dtb,
4344
deploy_and_boot,
@@ -214,6 +215,7 @@ def test_adrv9009_zcu102_hw(board, built_kernel_image_zynqmp, tmp_path):
214215

215216
# --- 7. Verify: kernel probe + IIO context + JESD DATA state ---
216217
assert_no_kernel_faults(dmesg_txt)
218+
assert_no_probe_errors(dmesg_txt)
217219
assert "adrv9009-phy" in dmesg_txt or "Talise" in dmesg_txt, (
218220
"ADRV9009 phy probe signature was not found in kernel dmesg output"
219221
)
@@ -279,5 +281,7 @@ def test_adrv9009_zcu102_hw(board, built_kernel_image_zynqmp, tmp_path):
279281
# relock both links before re-reading sysfs status.
280282
time.sleep(3.0)
281283
assert_jesd_links_data(shell, context=f"after {filename}")
282-
assert_no_kernel_faults(shell_out(shell, "dmesg"))
284+
dmesg = shell_out(shell, "dmesg")
285+
assert_no_kernel_faults(dmesg)
286+
assert_no_probe_errors(dmesg)
283287
print(f" {filename}: RX+TX JESD DATA OK")

test/hw/test_adrv9371_zc706_hw.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
DEFAULT_OUT_DIR,
3737
acquire_xsa,
3838
assert_no_kernel_faults,
39+
assert_no_probe_errors,
3940
collect_dmesg,
4041
compile_dts_to_dtb,
4142
deploy_and_boot,
@@ -137,6 +138,7 @@ def test_adrv9371_zc706_xsa_hw(board, built_kernel_image_zynq, tmp_path):
137138
grep_pattern="ad9371|ad9528|jesd204|mykonos|probe|failed|error",
138139
)
139140
assert_no_kernel_faults(dmesg_txt)
141+
assert_no_probe_errors(dmesg_txt)
140142
assert "ad9371" in dmesg_txt.lower() or "mykonos" in dmesg_txt.lower(), (
141143
"AD9371 driver probe signature not found in dmesg"
142144
)

test/hw/test_fmcdaq3_vcu118_hw.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,34 +33,37 @@
3333
allow_module_level=True,
3434
)
3535

36+
from test.hw.hw_helpers import ( # noqa: E402
37+
DEFAULT_OUT_DIR,
38+
assert_no_kernel_faults,
39+
assert_no_probe_errors,
40+
collect_dmesg,
41+
)
42+
3643

3744
@pytest.mark.lg_feature(["fmcdaq3", "vcu118"])
38-
def test_fmcdaq3_vcu118_boot_hw(target):
45+
def test_fmcdaq3_vcu118_boot_hw(board):
3946
"""Boot FMCDAQ3+VCU118 with the prebuilt Kuiper image and verify IIO."""
40-
shell = _boot_and_get_shell(target)
41-
_assert_probed_drivers(shell)
42-
_assert_iio_devices(shell)
47+
out_dir = DEFAULT_OUT_DIR
48+
out_dir.mkdir(parents=True, exist_ok=True)
4349

50+
board.transition("shell")
51+
shell = board.target.get_driver("ADIShellDriver")
4452

45-
def _boot_and_get_shell(target):
46-
"""Drive ``BootFabric`` through ``powered_off`` → ``shell`` and return shell."""
47-
strategy = target.get_driver("Strategy")
48-
strategy.transition("powered_off")
49-
strategy.transition("shell")
50-
return target.get_driver("ADIShellDriver")
53+
dmesg_txt = collect_dmesg(
54+
shell,
55+
out_dir,
56+
label="fmcdaq3_vcu118",
57+
grep_pattern="ad9680|ad9152|ad9528|jesd204|probe|failed|error",
58+
)
59+
assert_no_kernel_faults(dmesg_txt)
60+
assert_no_probe_errors(dmesg_txt)
5161

62+
lowered = dmesg_txt.lower()
63+
assert "ad9680" in lowered, "AD9680 driver messages not seen in dmesg"
64+
assert "ad9152" in lowered, "AD9152 driver messages not seen in dmesg"
5265

53-
def _assert_probed_drivers(shell) -> None:
54-
"""Fail unless dmesg shows AD9680 / AD9152 / AD9528 / JESD driver probes."""
55-
out = shell.run_check(
56-
"dmesg | grep -Ei 'ad9680|ad9152|ad9528|jesd204|fail|error' | tail -n 200; true"
57-
)
58-
dmesg = "\n".join(out) if isinstance(out, list) else str(out)
59-
print("\n=== FMCDAQ3 probe-relevant dmesg ===")
60-
print(dmesg)
61-
print("====================================")
62-
assert "ad9680" in dmesg.lower(), "AD9680 driver messages not seen in dmesg"
63-
assert "ad9152" in dmesg.lower(), "AD9152 driver messages not seen in dmesg"
66+
_assert_iio_devices(shell)
6467

6568

6669
def _assert_iio_devices(shell) -> None:

0 commit comments

Comments
 (0)