Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 70 additions & 33 deletions app/smc/pytest/recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@
import pyluwen
import sys
import time

from pathlib import Path
from twister_harness import DeviceAdapter

# Import tt_boot_fs utilities
sys.path.append(str(Path(__file__).parents[3] / "scripts"))
import yaml

import tt_boot_fs
import tt_fwbundle

from intelhex import IntelHex
from pathlib import Path
from pcie_utils import rescan_pcie
from e2e_smoke import DeviceAdapter

# Import tt_boot_fs utilities
sys.path.append(str(Path(__file__).parents[3] / "scripts"))

logger = logging.getLogger(__name__)

Expand All @@ -27,14 +28,16 @@
ARC_BOOT_STATUS = 0x80030408


def read_boot_status():
def _suffix_from_path(path: Path) -> str:
stem = path.stem
parts = stem.split("tt_boot_fs", 1)
return parts[1] if len(parts) > 1 else ""


def read_boot_status(chip) -> int:
"""
Helper to read the PCIe status register
"""
chips = pyluwen.detect_chips()
if len(chips) == 0:
raise RuntimeError("PCIe card was not detected on this system")
chip = chips[0]
try:
status = chip.axi_read32(ARC_POSTCODE_STATUS)
except Exception:
Expand All @@ -47,6 +50,24 @@ def read_boot_status():
return chip.axi_read32(ARC_BOOT_STATUS)


def check_recovery_active(recovery: bool) -> bool:
"""
Checks if the recovery firmware is active based on boot status register
"""
chips = pyluwen.detect_chips()
if len(chips) == 0:
raise RuntimeError("PCIe card was not detected on this system")
for chip in chips:
if recovery:
assert (read_boot_status(chip) & 0x78) == 0x8, (
"Recovery firmware should be active"
)
else:
assert (read_boot_status(chip) & 0x78) == 0x0, (
"Recovery firmware should not be active"
)


def test_recovery_cmfw(unlaunched_dut: DeviceAdapter):
"""
Tests flashing a bad base CMFW, and makes sure the SMC boots the recovery
Expand All @@ -55,26 +76,43 @@ def test_recovery_cmfw(unlaunched_dut: DeviceAdapter):
"""
# Get the build directory of the DUT
build_dir = unlaunched_dut.device_config.build_dir
# Get the path to base tt_boot_fs.bin
boot_fs = build_dir / "tt_boot_fs.bin"
patched_fs = build_dir / "tt_boot_fs_patched.bin"
assert boot_fs.exists(), f"tt_boot_fs.bin not found at {boot_fs}"
with open(boot_fs, "rb") as f:
bootfs_data = f.read()
fs = tt_boot_fs.BootFs.from_binary(bootfs_data)
# Write copy of tt_boot_fs to new file
with open(patched_fs, "wb") as f:
f.write(bootfs_data)
# Get offset of base CMFW
cmfw_offset = fs.entries["cmfw"].spi_addr
# Write bad data to base CMFW
with open(patched_fs, "r+b") as f:
f.seek(cmfw_offset)
f.write(b"BAD DATA")
# Make bundle from damaged CMFW

board_fs_dict = {}
# Iterate through tt_boot_fs files for various boards in the build dir
for boot_fs in sorted(build_dir.glob("tt_boot_fs*.hex")):
# Create path for patched tt_boot_fs
suffix = _suffix_from_path(boot_fs)
patched_fs = build_dir / f"tt_boot_fs{suffix}_patched.bin"

assert boot_fs.exists(), f"{boot_fs.name} not found at {boot_fs}"
bootfs_data = IntelHex(str(boot_fs)).tobinarray().tobytes()
fs = tt_boot_fs.BootFs.from_binary(bootfs_data)

# Write copy of tt_boot_fs to new file
patched_fs.write_bytes(bootfs_data)

# Corrupt offset of base CMFW (main image)
smc_offset = fs.entries["mainimg"].spi_addr
with open(patched_fs, "r+b") as f:
f.seek(smc_offset)
f.write(b"BAD DATA")
logger.info(
f"Corrupted data at offset {hex(smc_offset)} for tt_boot_fs{suffix}"
)

# Add damaged tt_boot_fs to dict
yaml_suffix = suffix.replace("-", "_")
boot_fs_yaml = build_dir / f"tt_boot_fs{yaml_suffix}.yaml"
with open(str(boot_fs_yaml), "r") as file:
board_name = yaml.safe_load(file)["name"]
board_fs_dict[board_name] = patched_fs
# Create a new fw bundle with the damaged CMFWs
tt_fwbundle.create_fw_bundle(
build_dir / "tt_boot_fs_patched.bundle", [0, 0, 0, 0], {"P100-1": patched_fs}
build_dir / "tt_boot_fs_patched.bundle",
[99, 99, 99, 99],
board_fs_dict,
)

# Flash the damaged CMFW
unlaunched_dut.command = [
unlaunched_dut.west,
Expand All @@ -85,12 +123,13 @@ def test_recovery_cmfw(unlaunched_dut: DeviceAdapter):
"tt_flash",
"--force",
"--no-rebuild",
"--",
"--file",
str(build_dir / "tt_boot_fs_patched.bundle"),
]
unlaunched_dut._flash_and_run()
time.sleep(1)
assert (read_boot_status() & 0x78) == 0x8, "Recovery firmware should be active"
check_recovery_active(True)

# Flash the good CMFW back. Note- this requires an up to date version of tt-flash
unlaunched_dut.command = [
Expand All @@ -105,6 +144,4 @@ def test_recovery_cmfw(unlaunched_dut: DeviceAdapter):
]
unlaunched_dut._flash_and_run()
time.sleep(1)
assert (read_boot_status() & 0x78) == 0x0, (
"Recovery firmware should no longer be active"
)
check_recovery_active(False)
2 changes: 2 additions & 0 deletions app/smc/sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ tests:
extra_overlay_confs:
- recovery.conf
app.recovery-flash:
timeout: 300
sysbuild: true
tags: recovery
harness: pytest
Expand All @@ -34,6 +35,7 @@ tests:
- pytest/recovery.py
pytest_args:
- "--dut-scope=session"
- "--flash-timeout=200"
app.vuart-bh-1x:
tags: e2e
depends_on: bh_1x
Expand Down
44 changes: 30 additions & 14 deletions scripts/ci/run-e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,28 @@ TT_Z_P_ROOT=$(realpath $(dirname $(realpath $0))/../..)
# Prefer Zephyr base from environment, otherwise use the one in this repo
ZEPHYR_BASE=${ZEPHYR_BASE:-$(realpath $TT_Z_P_ROOT/../zephyr)}

run_twister_test() {
# Set tag and outdir suffix args and remove from argument list
local tag=$1
local outdir_suffix=$2
shift 2

# Common Twister execution
$ZEPHYR_BASE/scripts/twister -i -p $SMC_BOARD \
--tag "$tag" -T "$TT_Z_P_ROOT/app" \
--west-flash="--force,--allow-major-downgrades" \
--west-runner tt_flash \
--device-testing -c \
--device-flash-timeout 240 \
--device-serial-pty "$TT_Z_P_ROOT/scripts/smc_console.py -d $CONSOLE_DEV -p" \
--failure-script "$TT_Z_P_ROOT/scripts/smc_test_recovery.py --asic-id $ASIC_ID" \
--flash-before \
--outdir "$ZEPHYR_BASE/twister-$outdir_suffix" \
--extra-args=SB_CONFIG_BOOT_SIGNATURE_KEY_FILE=\"$KEYFILE\" \
-ll DEBUG \
"$@"
}

function print_help {
echo -n "Usage: $0 [-p <pcie_index>] [-t test_set] [-k <keyfile>] "
echo "<board_name> -- [additional twister args]"
Expand Down Expand Up @@ -55,7 +77,7 @@ export CONSOLE_DEV
export BOARD

if [ -z "$TEST_SET" ]; then
TEST_SET=":e2e-flash"
TEST_SET=":e2e-flash:recovery-flash"
fi

if [ -z "$KEYFILE" ]; then
Expand Down Expand Up @@ -83,19 +105,8 @@ make -C $TT_Z_P_ROOT/scripts/tooling -j$(nproc)

if [[ "$TEST_SET" == *"e2e-flash"* ]]; then
# Run a full flash test, using tt-flash as the runner
$ZEPHYR_BASE/scripts/twister -i -p $SMC_BOARD \
--tag e2e-flash -T $TT_Z_P_ROOT/app \
--west-flash="--force,--allow-major-downgrades" \
--west-runner tt_flash \
--device-testing -c \
--device-flash-timeout 240 \
--device-serial-pty "$TT_Z_P_ROOT/scripts/smc_console.py -d $CONSOLE_DEV -p" \
--failure-script "$TT_Z_P_ROOT/scripts/smc_test_recovery.py --asic-id $ASIC_ID" \
--flash-before \
--outdir $ZEPHYR_BASE/twister-e2e-flash \
--extra-args=SB_CONFIG_BOOT_SIGNATURE_KEY_FILE=\"$KEYFILE\" \
-ll DEBUG \
$@
run_twister_test "e2e-flash" "e2e-flash" "$@"

# Restore a stable DMFW, since the copy flashed by BL2 tests will
# leave the DMC flash in a different state than other tests expect
# We erase the DMC flash first to ensure no old image fragments remain
Expand All @@ -106,3 +117,8 @@ if [[ "$TEST_SET" == *"e2e-flash"* ]]; then
west flash -d $ZEPHYR_BASE/build-dmc --domain dmc
rm -rf $ZEPHYR_BASE/build-dmc
fi

if [[ "$TEST_SET" == *"recovery-flash"* ]]; then
# Run a recovery flash test
run_twister_test "recovery" "recovery-flash" "$@"
fi
Loading