Skip to content

Commit 8d67d0c

Browse files
committed
app: smc: pytest: add e2e tests for power
Add e2e stress test to test power ioctl. Ensure a suitable power delta is achieved and that the device remains active. Needs KMD 2.6.0, otherwise the test is skipped. Signed-off-by: James Growden <jgrowden@tenstorrent.com>
1 parent 2bd1266 commit 8d67d0c

File tree

2 files changed

+84
-12
lines changed

2 files changed

+84
-12
lines changed

app/smc/pytest/e2e_smoke.py

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,19 +98,16 @@ def unlaunched_dut(fwbundle):
9898
# Telemetry tags
9999
TAG_CM_FW_VERSION = 29
100100
TAG_DM_APP_FW_VERSION = 26
101+
TAG_TDP = 7
101102

102103
NUM_PD = 16
103104
NUM_VM = 8
104105
NUM_TS = 8
105106

106107

107-
def read_telem(asic_id, telem_idx):
108-
chip = pyluwen.detect_chips()[asic_id]
109-
110-
table_addr = chip.axi_read32(TELEMETRY_DATA_REG_ADDR)
111-
telem = chip.axi_read32(table_addr + telem_idx * 4)
112-
113-
del chip
108+
def read_telem(arc_chip, telem_idx):
109+
table_addr = arc_chip.axi_read32(TELEMETRY_DATA_REG_ADDR)
110+
telem = arc_chip.axi_read32(table_addr + telem_idx * 4)
114111

115112
return telem
116113

@@ -278,19 +275,21 @@ def upgrade_from_version_test(
278275
wait_arc_boot(0, timeout=20)
279276

280277
time.sleep(0.5)
281-
assert dmfw_version_base == read_telem(0, TAG_DM_APP_FW_VERSION)
282-
assert cmfw_version_base == read_telem(0, TAG_CM_FW_VERSION)
283-
278+
chip = pyluwen.detect_chips()[0]
279+
assert dmfw_version_base == read_telem(chip, TAG_DM_APP_FW_VERSION)
280+
assert cmfw_version_base == read_telem(chip, TAG_CM_FW_VERSION)
281+
del chip
284282
# flash firmware to update to
285283
unlaunched_dut.launch()
286284

287285
time.sleep(0.5)
286+
chip = pyluwen.detect_chips()[0]
288287
assert get_ttzp_version.get_ttzp_version_u32(
289288
TTZP / "app/dmc/VERSION"
290-
) == read_telem(0, TAG_DM_APP_FW_VERSION)
289+
) == read_telem(chip, TAG_DM_APP_FW_VERSION)
291290
assert get_ttzp_version.get_ttzp_version_u32(
292291
TTZP / "app/smc/VERSION"
293-
) == read_telem(0, TAG_CM_FW_VERSION)
292+
) == read_telem(chip, TAG_CM_FW_VERSION)
294293

295294

296295
def pvt_comprehensive_test(arc_chip_dut, asic_id):
@@ -937,3 +936,60 @@ def test_pvt_comprehensive(arc_chip_dut, asic_id):
937936
The expectation is that the SMC response to these messages is 0.
938937
"""
939938
assert 0 == pvt_comprehensive_test(arc_chip_dut, asic_id), "test_pvt_msgs failed"
939+
940+
941+
def power_state_toggle_test(arc_chip_dut, asic_id):
942+
"""
943+
Test toggling between high and low power states and verify TDP delta.
944+
945+
Toggles between high and low power states and verifies that the TDP
946+
difference between the two states is greater than 60W.
947+
"""
948+
expected_power_delta = 60
949+
settling_time = 0.5
950+
arc_chip = pyluwen.detect_chips()[asic_id]
951+
952+
try:
953+
logger.info("Setting power state to high")
954+
arc_chip.set_power_state("high")
955+
except Exception as e:
956+
logger.info(f"No driver support for power state IOCTL: {e}")
957+
pytest.skip("Driver does not support power state control")
958+
959+
time.sleep(settling_time) # Allow power state to stabilize
960+
961+
# Measure TDP in high power state
962+
high_power_tdp = read_telem(arc_chip, TAG_TDP)
963+
logger.info(f"High power state TDP: {high_power_tdp}W")
964+
965+
logger.info("Setting power state to low")
966+
arc_chip.set_power_state("low")
967+
time.sleep(settling_time) # Allow power state to stabilize
968+
969+
# Measure TDP in low power state
970+
low_power_tdp = read_telem(arc_chip, TAG_TDP)
971+
logger.info(f"Low power state TDP: {low_power_tdp}W")
972+
973+
# Calculate delta
974+
tdp_delta = high_power_tdp - low_power_tdp
975+
logger.info(f"TDP delta: {tdp_delta}W")
976+
977+
# Verify delta is greater than 60W
978+
assert tdp_delta > expected_power_delta, (
979+
f"TDP delta ({tdp_delta}W) is not greater than {expected_power_delta}W"
980+
)
981+
982+
# Return to high power state
983+
logger.info("Restoring power state to high")
984+
arc_chip.set_power_state("high")
985+
986+
return 0
987+
988+
989+
def test_power_state_toggle(arc_chip_dut, asic_id):
990+
"""
991+
Validates that toggling between high and low power states results in a TDP delta > 60W
992+
"""
993+
assert 0 == power_state_toggle_test(arc_chip_dut, asic_id), (
994+
"power_state_toggle_test failed"
995+
)

app/smc/pytest/e2e_stress.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
voltage_monitors_test,
2121
process_detectors_test,
2222
temperature_sensors_test,
23+
power_state_toggle_test,
2324
)
2425

2526
# Needed to keep ruff from complaining about this "unused import"
@@ -322,6 +323,21 @@ def test_pvt_comprehensive(arc_chip_dut, asic_id):
322323
assert fail_count == 0, f"{test_name} failed {fail_count} times."
323324

324325

326+
def test_power_state_toggle(arc_chip_dut, asic_id):
327+
test_name = "Power state toggle test"
328+
total_tries = min(MAX_TEST_ITERATIONS, 100)
329+
fail_count = 0
330+
331+
for _ in range(total_tries):
332+
fc = power_state_toggle_test(arc_chip_dut, asic_id)
333+
if fc > 0:
334+
logger.error(f"Failed in iteration {_}")
335+
fail_count += fc
336+
337+
report_results(test_name, fail_count, total_tries)
338+
assert fail_count == 0, f"{test_name} failed {fail_count} times."
339+
340+
325341
def test_power_virus(arc_chip_dut, asic_id):
326342
"""
327343
- Run the power virus TTX workload (tt-burnin) for 180 seconds

0 commit comments

Comments
 (0)