Skip to content

Commit 030bece

Browse files
kanard38knard38
authored andcommitted
DAOS-19016 test: add fault injection test for daos_eq_poll() error handling
Add a new pool functional test PoolAutotestEqPollFITest that verifies the fix for the stale event pointer dereference in the kv_put() / kv_get() spin loops of src/utils/daos_autotest.c (DAOS-19016). The test enables fault injection point DAOS_FAULT_EQ_POLL_FAIL (ID 135168) via the YAML fault_list section. This causes daos_eq_poll() to return -DER_HG, exercising the rc < 0 break added by the fix. Verification: - daos pool autotest exits with rc == 1 (clean failure, no crash) - DER_HG(-1020) appears in the stderr output - the pool remains healthy after the expected autotest failure Features: autotest Signed-off-by: Cedric Koch-Hofer <cedric.koch-hofer@hpe.com>
1 parent 227f6b7 commit 030bece

3 files changed

Lines changed: 95 additions & 0 deletions

File tree

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
(C) Copyright 2026 Hewlett Packard Enterprise Development LP.
3+
4+
SPDX-License-Identifier: BSD-2-Clause-Patent
5+
"""
6+
from apricot import TestWithServers
7+
8+
9+
class PoolAutotestEqPollFITest(TestWithServers):
10+
"""Test daos pool autotest robustness under daos_eq_poll() fault injection.
11+
12+
Validates the DAOS-19016 fix: the kv_put() and kv_get() spin loops in
13+
src/utils/daos_autotest.c must handle daos_eq_poll() returning a negative
14+
error code without dereferencing the stale event pointer (evp).
15+
16+
Fault injection point DAOS_FAULT_EQ_POLL_FAIL (ID 135168) injects a
17+
-DER_HG return from daos_eq_poll(), exercising the rc < 0 break added by
18+
the fix. The expected outcome is:
19+
- daos pool autotest exits with rc == 1 (no crash or hang)
20+
- the error message contains DER_HG(-1020)
21+
22+
:avocado: recursive
23+
"""
24+
25+
def test_pool_autotest_eq_poll_fi(self):
26+
"""Test that daos pool autotest handles daos_eq_poll() errors correctly.
27+
28+
Run daos pool autotest with fault injection point DAOS_FAULT_EQ_POLL_FAIL
29+
(fault ID 135168, enabled via the YAML faults section) active. Confirm
30+
that when daos_eq_poll() returns -DER_HG the autotest exits cleanly with
31+
rc == 1 and reports DER_HG(-1020), proving that the stale event pointer
32+
fix from DAOS-19016 is working.
33+
34+
:avocado: tags=all,full_regression
35+
:avocado: tags=hw,medium
36+
:avocado: tags=pool,daos_cmd,autotest,fault_injection
37+
:avocado: tags=test_pool_autotest_eq_poll_fi,PoolAutotestEqPollFITest
38+
"""
39+
self.log_step("Create a pool")
40+
self.add_pool()
41+
self.pool.set_query_data()
42+
daos_cmd = self.get_daos_command()
43+
44+
# Fault injection is enabled via the YAML 'fault_list' section.
45+
# The autotest is expected to fail: disable the exception so that the
46+
# CmdResult can be inspected for the expected error signature.
47+
self.log_step("Run pool autotest with daos_eq_poll fault injection (DAOS-19016)")
48+
daos_cmd.exit_status_exception = False
49+
result = daos_cmd.pool_autotest(pool=self.pool.identifier)
50+
51+
self.log_step("Verify autotest exited with the expected error code")
52+
if result.exit_status == 0:
53+
self.fail(
54+
"daos pool autotest succeeded unexpectedly; "
55+
"expected it to fail due to DAOS_FAULT_EQ_POLL_FAIL injection")
56+
if result.exit_status != 1:
57+
self.fail(
58+
f"Expected exit code 1, got {result.exit_status}; "
59+
f"stderr: {result.stderr_text}")
60+
61+
self.log_step("Verify DER_HG(-1020) error in autotest output")
62+
if "DER_HG(-1020)" not in result.stderr_text:
63+
self.fail(
64+
f"Expected 'DER_HG(-1020)' in autotest stderr; "
65+
f"got: {result.stderr_text}")
66+
self.log.info(
67+
"Fault injection correctly propagated DER_HG(-1020) "
68+
"without stale event pointer dereference")
69+
70+
self.log_step("Confirm pool is still healthy after the expected autotest failure")
71+
self.pool.set_query_data()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
hosts:
2+
test_servers: 1
3+
test_clients: 1
4+
timeout: 300
5+
setup:
6+
start_servers_once: False
7+
server_config:
8+
name: daos_server
9+
engines_per_host: 1
10+
engines:
11+
0:
12+
storage: auto
13+
pool:
14+
size: 20G
15+
faults:
16+
fault_list:
17+
- DAOS_FAULT_EQ_POLL_FAIL

src/tests/ftest/util/fault_config_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""
22
(C) Copyright 2019-2024 Intel Corporation.
3+
(C) Copyright 2026 Hewlett Packard Enterprise Development LP
34
45
SPDX-License-Identifier: BSD-2-Clause-Patent
56
"""
@@ -235,6 +236,12 @@
235236
'probability_y': '100',
236237
'interval': '1',
237238
'max_faults': '1'},
239+
'DAOS_FAULT_EQ_POLL_FAIL': {
240+
'id': '135168',
241+
'probability_x': '1000',
242+
'probability_y': '100',
243+
'interval': '100',
244+
'max_faults': '5'},
238245
}
239246

240247

0 commit comments

Comments
 (0)