Skip to content

Commit 6a6af72

Browse files
committed
lib: bh_arc: reset: add cable fault low-power mode
Read cable power limit from SCRATCH_1 (written by DMC via JTAG). If power limit is 0, enter cable fault mode to minimize power draw. Add magic bits for backward compatibility In cable fault mode: - Keep tensixes, ETH, GDDR, and L2CPU in reset - Always deassert NOC, system, and PCIe resets (needed for ARC-PCIe communication path) - Skip RISC-V and soft reset sequences This allows the board to remain accessible via PCIe even when the 12V-2x6 power cable is missing or improperly installed. Signed-off-by: Sherry Li <xiaoruli@tenstorrent.com>
1 parent b0b6428 commit 6a6af72

File tree

7 files changed

+75
-9
lines changed

7 files changed

+75
-9
lines changed

app/dmc/src/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ static void handle_perst(void)
461461

462462
bharc_disable_i2cbus(&chip->config.arc);
463463
jtag_bootrom_reset_asic(chip);
464+
jtag_bootrom_set_cable_power_limit(chip, chip->data.cable_power_limit);
464465
jtag_bootrom_soft_reset_arc(chip);
465466
jtag_bootrom_teardown(chip);
466467
bharc_enable_i2cbus(&chip->config.arc);

include/tenstorrent/bh_chip.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ struct bh_chip_data {
8787
/* Last seen CM2DM message sequence number, to know if the current message is a repeat. */
8888
uint8_t last_cm2dm_seq_num;
8989
bool last_cm2dm_seq_num_valid;
90+
91+
/* Cable power limit detected at boot, written to scratch register during resets. */
92+
uint16_t cable_power_limit;
9093
};
9194

9295
struct bh_chip {
@@ -139,13 +142,14 @@ extern struct bh_chip BH_CHIPS[BH_CHIP_COUNT];
139142
INIT_STRAP)), \
140143
())}, \
141144
}, \
142-
.auto_reset_timer = Z_TIMER_INITIALIZER( \
143-
BH_CHIPS[idx].auto_reset_timer, bh_chip_auto_reset, NULL), \
145+
.auto_reset_timer = Z_TIMER_INITIALIZER( \
146+
BH_CHIPS[idx].auto_reset_timer, \
147+
bh_chip_auto_reset, NULL), \
144148
},
145149

146150
#define BH_CHIP_PRIMARY_INDEX DT_PROP(DT_PATH(chips), primary)
147151

148-
int jtag_bootrom_reset_sequence(struct bh_chip *chip, bool force_reset);
152+
int jtag_bootrom_reset_sequence(struct bh_chip *chip, bool force_reset, uint16_t cable_power_limit);
149153

150154
void bh_chip_cancel_bus_transfer_set(struct bh_chip *chip);
151155
void bh_chip_cancel_bus_transfer_clear(struct bh_chip *chip);

lib/tenstorrent/bh_arc/reset.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ LOG_MODULE_REGISTER(InitHW, CONFIG_TT_APP_LOG_LEVEL);
4646
static const struct device *const fwtable_dev = DEVICE_DT_GET(DT_NODELABEL(fwtable));
4747
STATUS_ERROR_STATUS0_reg_u error_status0;
4848

49+
/* Cable fault mode: true when DMC reports 0W power limit (no cable or improper installation).
50+
* In this mode, we keep tensixes, ETH, GDDR, and L2CPU in reset to minimize power draw,
51+
* while maintaining the ARC-PCIe NOC path for host communication.
52+
*/
53+
static bool cable_fault_mode;
54+
4955
static const uint8_t kNocRing;
5056
static const uint8_t kNocTlb;
5157
static const uint32_t kSoftReset0Addr = 0xFFB121B0; /* NOC address in each tile */
@@ -68,6 +74,11 @@ static int AssertSoftResets(void)
6874
return 0;
6975
}
7076

77+
/* In cable fault mode, tiles are already in reset - skip NOC writes to them */
78+
if (cable_fault_mode) {
79+
return 0;
80+
}
81+
7182
/* Assert Soft Reset for ERISC, MRISC Tensix (skip L2CPU due to bug) */
7283
bh_soft_reset_all_tensix();
7384

@@ -113,6 +124,11 @@ static int DeassertRiscvResets(void)
113124
return 0;
114125
}
115126

127+
/* In cable fault mode, skip RISC-V deasserts to keep cores in reset */
128+
if (cable_fault_mode) {
129+
return 0;
130+
}
131+
116132
/* Go back to PLL bypass, since RISCV resets need to be deasserted at low speed */
117133
ARRAY_FOR_EACH(pll_devs, i) {
118134
clock_control_configure(pll_devs[i], NULL,
@@ -199,12 +215,37 @@ static int DeassertTileResets(void)
199215
return 0;
200216
}
201217

218+
/* Read cable power limit with magic marker check for backward compatibility.
219+
* - If magic marker present: new DMC, check power limit (0 = cable fault)
220+
* - If magic marker absent: legacy DMC, skip cable fault detection
221+
*/
222+
uint32_t raw_value = ReadReg(DMC_CABLE_POWER_LIMIT_REG_ADDR);
223+
224+
if ((raw_value & CABLE_POWER_LIMIT_MAGIC_MASK) == CABLE_POWER_LIMIT_MAGIC) {
225+
/* New DMC with cable power limit feature */
226+
uint16_t cable_power_limit = raw_value & CABLE_POWER_LIMIT_VALUE_MASK;
227+
228+
LOG_INF("Cable Power Limit: %u", cable_power_limit);
229+
230+
if (cable_power_limit == 0) {
231+
cable_fault_mode = true;
232+
error_status0.f.cable_fault = 1;
233+
LOG_WRN("Cable fault detected (0W power limit). "
234+
"Entering low-power mode - keeping tensixes, ETH, GDDR, L2CPU in "
235+
"reset.");
236+
}
237+
} else {
238+
/* Legacy DMC without cable power limit feature - skip cable fault check */
239+
LOG_INF("Legacy DMC detected (no cable power feature), skipping cable fault check");
240+
}
241+
202242
/* Put all PLLs back into bypass, since tile resets need to be deasserted at low speed */
203243
ARRAY_FOR_EACH(pll_devs, i) {
204244
clock_control_configure(pll_devs[i], NULL,
205245
(void *)CLOCK_CONTROL_TT_BH_CONFIG_BYPASS);
206246
}
207247

248+
/* Always deassert NOC, system, and PCIe resets - needed for ARC-PCIe communication */
208249
RESET_UNIT_GLOBAL_RESET_reg_u global_reset = {.val = RESET_UNIT_GLOBAL_RESET_REG_DEFAULT};
209250

210251
global_reset.f.noc_reset_n = 1;
@@ -213,6 +254,11 @@ static int DeassertTileResets(void)
213254
global_reset.f.ptp_reset_n_refclk = 1;
214255
WriteReg(RESET_UNIT_GLOBAL_RESET_REG_ADDR, global_reset.val);
215256

257+
/* In cable fault mode, keep tensixes, ETH, GDDR, and L2CPU in reset to minimize power */
258+
if (cable_fault_mode) {
259+
return 0;
260+
}
261+
216262
RESET_UNIT_ETH_RESET_reg_u eth_reset = {.val = RESET_UNIT_ETH_RESET_REG_DEFAULT};
217263

218264
eth_reset.f.eth_reset_n = 0x3fff;

lib/tenstorrent/bh_arc/status_reg.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,16 @@
1818

1919
/* SCRATCH_[0-7] */
2020
#define STATUS_POST_CODE_REG_ADDR RESET_UNIT_SCRATCH_REG_ADDR(0)
21-
/* Cable power limit written by DMC via JTAG before ARC boot (0 = cable fault) */
21+
/* Cable power limit written by DMC via JTAG before ARC boot.
22+
* Format: [31:16] = magic marker, [15:0] = power limit in watts
23+
* Magic marker presence indicates DMC supports this feature.
24+
* If magic marker absent (legacy DMC), SMC skips cable fault detection.
25+
* If magic marker present and power_limit=0, cable fault is detected.
26+
*/
2227
#define DMC_CABLE_POWER_LIMIT_REG_ADDR RESET_UNIT_SCRATCH_REG_ADDR(1)
28+
#define CABLE_POWER_LIMIT_MAGIC 0xCAB10000 /* Magic marker in upper 16 bits */
29+
#define CABLE_POWER_LIMIT_MAGIC_MASK 0xFFFF0000
30+
#define CABLE_POWER_LIMIT_VALUE_MASK 0x0000FFFF
2331

2432
/* SCRATCH_RAM[0-63] */
2533
#define STATUS_FW_VERSION_REG_ADDR RESET_UNIT_SCRATCH_RAM_REG_ADDR(0)

lib/tenstorrent/bh_chip/bh_chip.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ int bh_chip_reset_chip(struct bh_chip *chip, bool force_reset)
171171
return ret;
172172
}
173173

174-
ret2 = jtag_bootrom_reset_sequence(chip, force_reset);
174+
ret2 = jtag_bootrom_reset_sequence(chip, force_reset, chip->data.cable_power_limit);
175175

176176
ret = bharc_enable_i2cbus(&chip->config.arc);
177177
if (ret != 0) {

lib/tenstorrent/jtag_bootrom/jtag_bootrom.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,15 @@ void jtag_bootrom_set_cable_power_limit(struct bh_chip *chip, uint16_t power_lim
318318
#ifdef CONFIG_JTAG_LOAD_BOOTROM
319319
const struct device *dev = chip->config.jtag;
320320

321-
/* Write cable power limit to SCRATCH_1 for SMC to read at boot.
322-
* A value of 0 indicates cable fault (no cable or improper installation).
321+
/* Write cable power limit with magic marker for SMC to detect feature support.
322+
* Format: [31:16] = CABLE_POWER_LIMIT_MAGIC, [15:0] = power_limit
323+
* Legacy SMC will read this as a large positive value (not 0), so safe.
324+
* New SMC checks for magic marker to enable cable fault detection.
325+
* A power_limit of 0 indicates cable fault (no cable or improper installation).
323326
*/
324-
jtag_axi_write32(dev, DMC_CABLE_POWER_LIMIT_REG_ADDR, (uint32_t)power_limit);
327+
uint32_t value = CABLE_POWER_LIMIT_MAGIC | (uint32_t)power_limit;
328+
329+
jtag_axi_write32(dev, DMC_CABLE_POWER_LIMIT_REG_ADDR, value);
325330
#endif
326331
}
327332

lib/tenstorrent/jtag_bootrom/reset.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ const size_t get_bootcode_len(void)
2929
return sizeof(bootcode) / sizeof(uint32_t);
3030
}
3131

32-
int jtag_bootrom_reset_sequence(struct bh_chip *chip, bool force_reset)
32+
int jtag_bootrom_reset_sequence(struct bh_chip *chip, bool force_reset, uint16_t cable_power_limit)
3333
{
3434
const uint32_t *const patch = (const uint32_t *)bootcode;
3535
const size_t patch_len = get_bootcode_len();
@@ -64,6 +64,8 @@ int jtag_bootrom_reset_sequence(struct bh_chip *chip, bool force_reset)
6464
printk("Bootrom verification failed\n");
6565
}
6666

67+
jtag_bootrom_set_cable_power_limit(chip, cable_power_limit);
68+
6769
start = k_uptime_get();
6870

6971
#ifdef CONFIG_JTAG_LOAD_ON_PRESET

0 commit comments

Comments
 (0)