66
77#include "noc2axi.h"
88#include "noc_init.h"
9+ #include "harvesting.h"
910
1011#include <stdint.h>
12+ #include <string.h>
1113
1214#include <tenstorrent/post_code.h>
15+ #include <tenstorrent/spi_flash_buf.h>
1316#include <tenstorrent/sys_init_defines.h>
17+ #include <tenstorrent/tt_boot_fs.h>
18+ #include <zephyr/device.h>
19+ #include <zephyr/drivers/flash.h>
1420#include <zephyr/drivers/misc/bh_fwtable.h>
1521#include <zephyr/drivers/dma.h>
1622#include <zephyr/drivers/dma/dma_tt_bh_noc.h>
1723#include <zephyr/init.h>
24+ #include <zephyr/kernel.h>
25+ #include <zephyr/logging/log.h>
26+ #include <zephyr/sys/util.h>
27+
28+ LOG_MODULE_REGISTER (tensix_init , CONFIG_TT_APP_LOG_LEVEL );
1829
1930#define ARC_NOC0_X 8
2031#define ARC_NOC0_Y 0
2132
33+ #define TENSIX_X_START 2
34+ #define TENSIX_Y_START 2
35+ #define TENSIX_X_END 1
36+ #define TENSIX_Y_END 11
2237#define TENSIX_L1_SIZE (1536 * 1024)
2338
39+ /* Tensix RISC control registers */
40+ #define TRISC0_RESET_PC 0xFFB12228
41+ #define TRISC_RESET_PC_OVERRIDE 0xFFB12234
42+ #define SOFT_RESET_0 0xFFB121B0
43+ #define ALL_RISC_SOFT_RESET 0x47800
44+
45+ /* TRISC0 wipe firmware parameters */
46+ #define TRISC_WIPE_FW_TAG "destwipe"
47+ #define TRISC_WIPE_FW_LOAD_ADDR 0x6000 /* TRISC0_CODE region start */
48+
49+ /* Scratchpad buffer size for SPI transfers */
50+ #define SCRATCHPAD_SIZE CONFIG_TT_BH_ARC_SCRATCHPAD_SIZE
51+
52+ /* Counter location for wipe_dest */
53+ #define COUNTER_TENSIX_X 1
54+ #define COUNTER_TENSIX_Y 2
55+ #define COUNTER_L1_ADDR 0x110000 /* Must match firmware hardcoded value */
56+ #define NUM_TENSIX_ROWS 10
57+ #define WIPE_DEST_TIMEOUT_US 10000 /* 10ms timeout */
58+
2459static const struct device * const fwtable_dev = DEVICE_DT_GET (DT_NODELABEL (fwtable ));
2560static const struct device * const dma_noc = DEVICE_DT_GET (DT_NODELABEL (dma1 ));
61+ static const struct device * const flash = DEVICE_DT_GET_OR_NULL (DT_NODELABEL (spi_flash ));
2662
2763/* Enable CG_CTRL_EN in each non-harvested Tensix node and set CG hystersis to 2. */
2864/* This requires NOC init so that broadcast is set up properly. */
@@ -142,13 +178,140 @@ static void wipe_l1(void)
142178 dma_start (dma_noc , 1 );
143179}
144180
181+ /**
182+ * @brief Global synchronization for wipe_dest
183+ *
184+ * This function is used to synchronize the wipe_dest operation across all tensix cores.
185+ * It reads the counter from the chosen tensix core and waits for it to reach the expected count.
186+ * It returns 0 if the counter reached the expected count, -ETIMEDOUT otherwise.
187+ */
188+ static int global_sync (uint8_t ring , uint8_t noc_tlb , uint32_t expected_count )
189+ {
190+ NOC2AXITlbSetup (ring , noc_tlb , COUNTER_TENSIX_X , COUNTER_TENSIX_Y , COUNTER_L1_ADDR );
191+
192+ if (!WAIT_FOR (NOC2AXIRead32 (ring , noc_tlb , COUNTER_L1_ADDR ) >= expected_count ,
193+ WIPE_DEST_TIMEOUT_US , k_busy_wait (10 ))) {
194+ uint32_t actual = NOC2AXIRead32 (ring , noc_tlb , COUNTER_L1_ADDR );
195+
196+ LOG_ERR ("%s: timeout, counter=%u expected=%u" , __func__ , actual , expected_count );
197+ return - ETIMEDOUT ;
198+ }
199+
200+ return 0 ;
201+ }
202+
203+ /**
204+ * @brief Helper function to write 32-bit words to NOC
205+ *
206+ * This function is used to write 32-bit words to NOC using DMA.
207+ */
208+ static int noc2axi_write32_fw (uint8_t * src , uint8_t * dst , size_t len )
209+ {
210+ const uint32_t * fw_words = (const uint32_t * )src ;
211+ size_t num_words = len / sizeof (uint32_t );
212+ uintptr_t addr = (uintptr_t )dst ;
213+
214+ for (size_t i = 0 ; i < num_words ; i ++ ) {
215+ NOC2AXIWrite32 (0 , 0 , addr + i * sizeof (uint32_t ), fw_words [i ]);
216+ }
217+ return 0 ;
218+ }
219+
220+ /**
221+ * @brief Setup the multicast TLB for the unharvested tensix cores
222+ *
223+ * @param addr The address to load the firmware to
224+ */
225+ static inline void setup_tensix_mcast_tlb (uint32_t addr )
226+ {
227+ uint8_t ring = 0 ;
228+ uint8_t noc_tlb = 0 ;
229+
230+ NOC2AXIMulticastTlbSetup (ring , noc_tlb , TENSIX_X_START , TENSIX_Y_START , TENSIX_X_END ,
231+ TENSIX_Y_END , addr , kNoc2AxiOrderingStrict );
232+ }
233+
234+ /**
235+ * @brief Zeros the DEST register of every non-harvested tensix core
236+ *
237+ * The DEST register can only be written by code running on the local TRISC.
238+ * This function loads a wipe firmware from SPI flash to each Tensix's L1,
239+ * runs it on TRISC 0 to clear DEST using 32-bit stores, then puts TRISC 0
240+ * back in reset.
241+ */
242+ static int wipe_dest (void )
243+ {
244+ uint8_t ring = 0 ;
245+ uint8_t noc_tlb = 0 ;
246+ uint8_t wipe_dest_buf [SCRATCHPAD_SIZE ] __aligned (4 );
247+
248+ int rc ;
249+ tt_boot_fs_fd tag_fd ;
250+ size_t image_size ;
251+ size_t spi_address ;
252+
253+ /* Find the TRISC wipe firmware in SPI flash */
254+ rc = tt_boot_fs_find_fd_by_tag (flash , (const uint8_t * )TRISC_WIPE_FW_TAG , & tag_fd );
255+ if (rc < 0 ) {
256+ LOG_ERR ("%s(%s) failed: %d" , "tt_boot_fs_find_fd_by_tag" , TRISC_WIPE_FW_TAG , rc );
257+ return rc ;
258+ }
259+ image_size = tag_fd .flags .f .image_size ;
260+ spi_address = tag_fd .spi_addr ;
261+ LOG_INF ("%s: found %s at 0x%x, size %zu" , __func__ , TRISC_WIPE_FW_TAG , spi_address ,
262+ image_size );
263+
264+ /* Step 1: Zero the completion counter before releasing TRISCs */
265+ NOC2AXITlbSetup (ring , noc_tlb , COUNTER_TENSIX_X , COUNTER_TENSIX_Y , COUNTER_L1_ADDR );
266+ NOC2AXIWrite32 (ring , noc_tlb , COUNTER_L1_ADDR , 0 );
267+
268+ /* Step 2: Load wipe firmware to all non-harvested Tensix L1 using multicast */
269+ setup_tensix_mcast_tlb (TRISC_WIPE_FW_LOAD_ADDR );
270+
271+ /* Round up to ensure all 32-bit writes are complete */
272+ image_size = ROUND_UP (image_size , sizeof (uint32_t ));
273+
274+ rc = spi_transfer_by_parts (
275+ flash , spi_address , image_size , wipe_dest_buf , sizeof (wipe_dest_buf ),
276+ (uint8_t * )(uintptr_t )TRISC_WIPE_FW_LOAD_ADDR , noc2axi_write32_fw );
277+ if (rc < 0 ) {
278+ LOG_ERR ("%s(%s) failed: %d" , "spi_transfer_by_parts" , TRISC_WIPE_FW_TAG , rc );
279+ return rc ;
280+ }
281+ LOG_INF ("%s: firmware loaded" , __func__ );
282+
283+ /* Step 3: Set TRISC 0 reset PC to firmware load address on all Tensix */
284+ setup_tensix_mcast_tlb (TRISC0_RESET_PC );
285+ NOC2AXIWrite32 (ring , noc_tlb , TRISC0_RESET_PC , TRISC_WIPE_FW_LOAD_ADDR );
286+ NOC2AXIWrite32 (ring , noc_tlb , TRISC_RESET_PC_OVERRIDE , 1 );
287+
288+ /* Step 4: Release TRISC 0 from soft reset on all Tensix */
289+ NOC2AXIWrite32 (ring , noc_tlb , SOFT_RESET_0 , ALL_RISC_SOFT_RESET & ~BIT (12 ));
290+
291+ /* Step 5: Wait for all cores to signal completion via atomic counter */
292+ uint32_t expected = POPCOUNT (tile_enable .tensix_col_enabled ) * NUM_TENSIX_ROWS ;
293+ int rc_sync = global_sync (ring , noc_tlb , expected );
294+
295+ if (rc_sync < 0 ) {
296+ return rc_sync ;
297+ }
298+
299+ /* Step 6: Re-assert TRISC 0 soft reset on all Tensix */
300+ setup_tensix_mcast_tlb (SOFT_RESET_0 );
301+ NOC2AXIWrite32 (ring , noc_tlb , SOFT_RESET_0 , ALL_RISC_SOFT_RESET );
302+ NOC2AXIWrite32 (ring , noc_tlb , TRISC_RESET_PC_OVERRIDE , 0 );
303+
304+ LOG_INF ("%s: completed" , __func__ );
305+ return 0 ;
306+ }
307+
145308void TensixInit (void )
146309{
147310 if (!tt_bh_fwtable_get_fw_table (fwtable_dev )-> feature_enable .cg_en ) {
148311 EnableTensixCG ();
149312 }
150313
151- /* wipe_l1() isn 't here because it's only needed on boot & board reset. */
314+ /* wipe_l1()/wipe_dest() aren 't here because they're only needed on boot & board reset. */
152315}
153316
154317static int tensix_init (void )
@@ -162,6 +325,12 @@ static int tensix_init(void)
162325 TensixInit ();
163326
164327 wipe_l1 ();
328+ int rc_wipe_dest = wipe_dest ();
329+
330+ if (rc_wipe_dest < 0 ) {
331+ LOG_ERR ("%s: wipe_dest failed: %d" , __func__ , rc_wipe_dest );
332+ return rc_wipe_dest ;
333+ }
165334
166335 return 0 ;
167336}
0 commit comments