Skip to content

Commit a04563b

Browse files
j-ororkerestyled-commitsandy31415raul-marquez-csagemini-code-assist[bot]
authored
Updating app reboot logic to make ACL_2_10 test more reliable and consistent (project-chip#43069)
* Updating app reboot logic to make ACL_2_10 test more reliable and consistent * Re-enabling ACL_2_10 and AVSM tests as the updated reboot logic should make these tests more stable in CI * Restyled by ruff * Increasing reboot duration timeout to allow more time for app reboot to complete in CI * Update test_metadata.yaml Re-adding AVSM 2 18-21 to verify the current fix for the reboot logic works for ACL_2_10. * Update test_metadata.yaml Restoring location of the AVSM tests in test_metadata.yaml to match with master * Reenabled AVSM tests that were disabled in CI by removing them from the tests_metadata.yaml script after reestablishing multiple reboots scenario again in matter_testing module and validating it worked with the camera app locally, added comment that factory reset is being implemented by Raul in PR 42848, moved allow_multiple_restarts var into if check for restart mode in app reboot monitor, changes timeout var name to timeout_sec * Restyled by ruff * Restyled by autopep8 * Resolving linting error with trailing whitespace in test runner * Updating comment in test runner for factory reset functionality placeholder, and updating value written to restart flag file for restart_once functionality * Resolving linting error * Apply suggestions from code review by Andrei Co-authored-by: Andrei Litvin <andy314@gmail.com> * Removed logic for single reboot now that the reboot logic holds firmly until after the app-ready-pattern and restart flag file is removed * Restyled by ruff * align reset and reboot logic * Restyled by ruff * Restyled by autopep8 * Update src/python_testing/matter_testing_infrastructure/matter/testing/matter_testing.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Applying suggestions from Cecille, while adding Raul's logic for if check of reset_type var to gate factory_reset_config_removal call in order to avoidr issues with reboot logic * Update scripts/tests/run_python_test.py Co-authored-by: Andrei Litvin <andy314@gmail.com> --------- Co-authored-by: Restyled.io <commits@restyled.io> Co-authored-by: Andrei Litvin <andy314@gmail.com> Co-authored-by: Raul Marquez <rmarquez@csa-iot.org> Co-authored-by: Raul Marquez <130402456+raul-marquez-csa@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 4500157 commit a04563b

3 files changed

Lines changed: 34 additions & 36 deletions

File tree

scripts/tests/run_python_test.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def main_impl(app: str, factory_reset: bool, factory_reset_app_only: bool, app_a
245245
# Remove app config and storage if factory reset is requested
246246
if factory_reset or factory_reset_app_only:
247247
reset_type = FactoryResetType.AppAndController if factory_reset else FactoryResetType.AppOnly
248-
factory_reset_config_removal(reset_type, app_args, script_args)
248+
factory_reset_config_removal(app_args, script_args, reset_type)
249249

250250
app_manager_ref = None
251251
app_manager_lock = threading.Lock()
@@ -361,6 +361,7 @@ def monitor_app_restart_requests(
361361
app_manager_lock,
362362
config: TestRunConfig,
363363
restart_flag_file):
364+
364365
while True:
365366
# Try to read the restart flag file
366367
if not os.path.exists(restart_flag_file):
@@ -370,16 +371,16 @@ def monitor_app_restart_requests(
370371
with open(restart_flag_file, 'r') as f:
371372
flag_file_content = f.read().strip()
372373

373-
# Successfully read the flag file, remove to prevent multiple restarts
374-
os.unlink(restart_flag_file)
375-
log.info("%s requested by test script", flag_file_content.capitalize())
376-
377374
# Determine reset type and remove app/ctrl config and storage
375+
reset_type = None
378376
if flag_file_content == "factory reset":
379377
reset_type = FactoryResetType.AppAndController
378+
380379
elif flag_file_content == "factory reset app only":
381380
reset_type = FactoryResetType.AppOnly
382-
factory_reset_config_removal(reset_type, config.app_args, config.script_args)
381+
382+
if reset_type:
383+
factory_reset_config_removal(config.app_args, config.script_args, reset_type)
383384

384385
# Restart the app
385386
log.info("Restarting app '%s'...", config.app)
@@ -389,6 +390,10 @@ def monitor_app_restart_requests(
389390
new_app_manager.start()
390391
app_manager_ref[0] = new_app_manager
391392

393+
# Successfully read the flag file, remove to prevent multiple restarts
394+
os.unlink(restart_flag_file)
395+
log.info("%s requested by test script", flag_file_content.capitalize())
396+
392397
# Action complete, continue monitoring for additional restart requests
393398
log.info("%s completed, continuing to monitor for additional requests", flag_file_content.capitalize())
394399

@@ -401,8 +406,8 @@ class FactoryResetType(enum.Enum):
401406
def config_files(self, app_args: str, script_args: str) -> typing.Generator[str, None, None]:
402407
"""Yield paths of config/storage files to remove for this reset type."""
403408

404-
# App config files and KVS
405-
yield from glob.glob('/tmp/chip*')
409+
# App config files and KVS, exclude restart flag file
410+
yield from (f for f in glob.glob('/tmp/chip*') if not os.path.basename(f).startswith('chip_test_restart_app'))
406411
yield from glob.glob('/tmp/repl*')
407412

408413
if match := re.search(r"--KVS (?P<path>[^ ]+)", app_args):
@@ -414,7 +419,7 @@ def config_files(self, app_args: str, script_args: str) -> typing.Generator[str,
414419
yield match.group("path")
415420

416421

417-
def factory_reset_config_removal(reset_type: FactoryResetType, app_args: str, script_args: str):
422+
def factory_reset_config_removal(app_args: str, script_args: str, reset_type: FactoryResetType = None):
418423
"""Handles app factory reset requests by removing configuration and storage files."""
419424
for path in reset_type.config_files(app_args, script_args):
420425
log.info("Removing config/storage file, path: '%s'...", path)

src/python_testing/matter_testing_infrastructure/matter/testing/matter_testing.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,15 +1511,18 @@ async def request_device_reboot(self):
15111511
else:
15121512
try:
15131513
# Create the restart flag file to signal the test runner
1514+
# Allow for multiple reboots like SW update tests do using the "restart" mode
1515+
restart_text = "restart"
15141516
with open(restart_flag_file, "w") as f:
1515-
f.write("restart")
1517+
f.write(restart_text)
15161518
LOGGER.info("Created restart flag file to signal app reboot")
15171519

1518-
# The test runner will automatically wait for the app-ready-pattern before continuing
1519-
1520-
# Expire sessions and re-establish connections
1520+
# Expire sessions before the monitor picks up the flag
15211521
self._expire_sessions_on_all_controllers()
1522-
LOGGER.info("App restart completed successfully")
1522+
1523+
await self.wait_for_restart_flag_file_removal(restart_flag_file, restart_text)
1524+
1525+
LOGGER.info("App reboot completed successfully")
15231526

15241527
except Exception as e:
15251528
LOGGER.error(f"Failed to reboot app: {e}")
@@ -1559,17 +1562,27 @@ async def request_device_factory_reset(self, reset_ctrl: bool = False) -> None:
15591562
f.write(restart_flag_text)
15601563
LOGGER.info("Created restart flag file to signal %s request", restart_flag_text)
15611564

1562-
# The test runner will automatically wait for the app-ready-pattern before continuing
1563-
15641565
# Expire sessions and re-establish connections
15651566
self._expire_sessions_on_all_controllers()
15661567
LOGGER.info("%s request sent successfully", restart_flag_text.capitalize())
15671568

1569+
await self.wait_for_restart_flag_file_removal(restart_flag_file, restart_flag_text)
1570+
15681571
except Exception as e:
15691572
err = f"Failed to {restart_flag_text}: {e}"
15701573
LOGGER.error(err)
15711574
asserts.fail(err)
15721575

1576+
async def wait_for_restart_flag_file_removal(self, restart_flag_file, restart_flag_text, timeout_sec=30.0):
1577+
# Wait for the monitor thread to remove the flag file
1578+
# The monitor deletes the flag file AFTER the restart completes, so this ensures
1579+
# the app has fully rebooted and is ready before we continue
1580+
start_time = time.time()
1581+
while os.path.exists(restart_flag_file):
1582+
if time.time() - start_time > timeout_sec:
1583+
asserts.fail(f"App {restart_flag_text} did not complete within timeout (flag file still exists)")
1584+
await asyncio.sleep(0.1)
1585+
15731586

15741587
def _async_runner(body, self: MatterBaseTest, *args, **kwargs):
15751588
"""Runs an async function within the test's event loop with a timeout.

src/python_testing/test_metadata.yaml

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,6 @@ not_automated:
1313
reason: Code/Test not being used or not shared code for any other tests
1414
- name: TC_AVSMTestBase.py
1515
reason: Shared code for TC_AVSM, not a standalone test
16-
- name: TC_AVSM_2_18.py
17-
reason:
18-
AVSM flaky test in CI.
19-
https://github.com/project-chip/connectedhomeip/issues/42871
20-
- name: TC_AVSM_2_19.py
21-
reason:
22-
AVSM flaky test in CI.
23-
https://github.com/project-chip/connectedhomeip/issues/42871
24-
- name: TC_AVSM_2_20.py
25-
reason:
26-
AVSM flaky test in CI.
27-
https://github.com/project-chip/connectedhomeip/issues/42871
28-
- name: TC_AVSM_2_21.py
29-
reason:
30-
AVSM flaky test in CI.
31-
https://github.com/project-chip/connectedhomeip/issues/42871
3216
- name: TC_BRBINFO_3_1.py
3317
reason:
3418
Attribute is not implemented in the bridge app. Same code as
@@ -210,10 +194,6 @@ not_automated:
210194
reason:
211195
Cluster not finalized. Will re-enable before 1.6TE2. CI is not
212196
supported.
213-
- name: TC_ACL_2_10.py
214-
reason:
215-
Flaky, see
216-
https://github.com/project-chip/connectedhomeip/issues/43056
217197

218198
# This is a list of slow tests (just arbitrarily picked around 20 seconds)
219199
# used in some script reporting for "be patient" messages as well as potentially

0 commit comments

Comments
 (0)