11#ifndef USB4_CONNECT_H
22#define USB4_CONNECT_H
33/*
4- * bank0_8a89 — the USB4 PHY / lane-MODE bring-up engine (THE missing dynamic engine).
5- * Faithful transcription of the ASM2464PD stock firmware (fw_tinygrad.bin) @0x8A89.
4+ * bank0_8a89 @0x8A89 — USB4 PHY / lane-MODE bring-up engine. Faithful transcription
5+ * of the ASM2464PD stock firmware (fw_tinygrad.bin). Arms E764.4 (LINK-MODE-ENABLE)
6+ * and E751 (USB4 link arm), latches the E710/CA06 link-rate into 0x0A9F/0x0A9E, writes
7+ * the per-mode E7E3 descriptor via dd42(), and runs a dynamic service loop that pumps
8+ * PD-RX while bringing up the lanes (waiting on CC33.2 / 0x07ED).
69 *
7- * This is the ONLY function in the whole USB4 path that:
8- * - arms E764 bit4 = LINK-MODE-ENABLE (E764 = (E764 & 0xEF) | 0x10) [the keystone bit]
9- * - arms E751 = 1 (USB4 link arm, inside if(0x0AA0&1))
10- * - latches the E710/CA06 link-rate into 0x0A9F / 0x0A9E
11- * - writes the per-mode E7E3 descriptor via dd42()
12- * - runs the dynamic in-loop service `while(bd6c()){ if(C80A.6) pd_rx_isr(); ... }` that pumps
13- * PD-RX WHILE bringing up the lanes, and waits on link-completion (CC33.2 / 0x07ED).
10+ * Driven per host link-event from the INT0 demux (0e5b -> e94d/e952 -> c9a8 -> 8a89).
11+ * Include AFTER usb4.h and BEFORE sb_router.h.
1412 *
15- * handmade reproduced a byte-faithful STATIC SB block (sb.h) but NEVER ran this engine, so the
16- * link stayed mode0 and the TB4 host never saw a trained USB4 link -> C80A.5 never fired. It is
17- * meant to be RE-DRIVEN per host link-event from the INT0 demux (0e5b -> e94d/e952 -> c9a8 ->
18- * 8a89). usb4_connect_u4 (the inner a3f5 RMW + sb_assert) is called ONLY from inside this wrapper
19- * (8a89 tail @0x8d49) and from the faithful Enter_USB data-msg path (a17f, vdm.h) -- never
20- * standalone from a VDM Accept.
21- *
22- * Included AFTER usb4.h (usb4_connect_u4 fwd-decl, SB_* / P1_* from sb.h, PR(), uart_*, the boot_phy
23- * helpers e7ae/d0d3/phy_cc10_cmd_wait, and pd_rx_isr from pd.h) and BEFORE sb_router.h.
24- *
25- * Helper map (all byte-exact, decompiled from stock):
13+ * Stock helper map (byte-exact):
2614 * bd49() = E710 & 0xE0 bd57() = CA06 & 0x1F bd50() = E716 & 0xFC
27- * bd33() = CC3E & 0xFD bceb(a): a |= 1 bcfe(a): a = (&0xFD)|2 (set bit1)
15+ * bd33() = CC3E & 0xFD bceb(a): a |= 1 bcfe(a): a = (&0xFD)|2
2816 * bd23(a): a = (&0xDF)|0x20 bd3a(a): a = (&0xBF)|0x40 bd65(a): a = (&0x7F)|0x80
2917 * bd2a(a): a &= 0xDF; a &= 0xBF bd5e(a): a = (&0xFB)|0x04
3018 * bce7(v,a): *a = v; E717 = (E717&0xFE)|1
31- * bcf2(): CC3A=(&0xFD)|2; CC38=(&0xFD)|2 bd41(): CC3B &= 0xFD bd14(): CC3A&=0xFD; CC38&=0xFD
32- * 8d6e(): uart_puts; e7ae() (the e7ae C006/C00E PHY-lock busy wait, BOUNDED here)
33- * bd6c(): dcd4(0x0A9D); return 0x0A9D dcd4(m): CA00=(&0xC0)|7; CA0A=2; ( CA0D/CA0E poll, no SE)
34- * af5e() == pd_rx_isr() dd42() == boot_phy_dd42() cd10() == bank0_cd10 (handoff, never returns )
19+ * bcf2(): CC3A=(&0xFD)|2; CC38=(&0xFD)|2 bd41(): CC3B &= 0xFD bd14(): CC3A&=0xFD; CC38&=0xFD
20+ * 8d6e(): uart_puts; e7ae() PHY-lock busy wait (bounded here)
21+ * bd6c(): dcd4(0x0A9D); return 0x0A9D dcd4(m): CA00=(&0xC0)|7; CA0A=2; CA0D/CA0E poll
22+ * af5e() == pd_rx_isr() dd42() == boot_phy_dd42() cd10() == bank0_cd10 (handoff)
3523 */
3624
3725/* tiny single-reg RMW helpers from bank0_8a89's helper block (verbatim) */
@@ -46,30 +34,25 @@ static void u4c_bcf2(void) { REG_TIMER_ENABLE_B = (REG_TIMER_ENABLE_B & 0xFD) |
4634static void u4c_bd41 (void ) { REG_TIMER_CTRL_CC3B &= 0xFD ; }
4735static void u4c_bd14 (void ) { REG_TIMER_ENABLE_B &= 0xFD ; REG_TIMER_ENABLE_A &= 0xFD ; }
4836
49- /* bd6c: pump the link controller (dcd4) then return the mode byte 0x0A9D. Loop continues while
50- * 0x0A9D != 0. dcd4 = CA00=(&0xC0)|7; CA0A=2 (+ a CA0D/CA0E status poll that has no side effect in
51- * the decompile -> reproduced as the two writes). */
37+ /* bd6c: pump the link controller (dcd4 = CA00=(&0xC0)|7; CA0A=2) then return mode byte 0x0A9D. */
5238static uint8_t u4c_bd6c (void ) {
5339 PR (0xCA00 ) = (PR (0xCA00 ) & 0xC0 ) | 0x07 ; /* dcd4 */
5440 PR (0xCA0A ) = 0x02 ;
5541 return PR (0x0A9D );
5642}
5743
58- /* 8d6e: print + e7ae PHY-lock wait. e7ae waits C006[4:0]==0x10 then C00E[2:0]==0 (BOUNDED so a
59- * never-locking PHY can't hang the ISR -- handmade bounds every stock spin). */
44+ /* e7ae: PHY-lock wait — C006[4:0]==0x10 then C00E[2:0]==0 (bounded so it can't hang the ISR). */
6045static void u4c_e7ae_bounded (void ) {
6146 uint16_t g = 0 ;
6247 while (((PR (0xC006 ) & 0x1F ) != 0x10 ) && ++ g < 0x0800 );
6348 g = 0 ;
6449 while (((PR (0xC00E ) & 0x07 ) != 0x00 ) && ++ g < 0x0800 );
6550}
6651
67- /* indicates the engine ran (instrumentation, read from the super-loop)
68- * IRAM-HEADROOM FIX: relocated to XDATA scratch (0x8800..); seeded in main(). */
52+ /* indicates the engine ran (instrumentation, read from the super-loop). XDATA scratch, seeded in main(). */
6953static volatile uint8_t __xdata __at (0x0B51 ) bank0_8a89_entered ;
7054
71- /* ==================================================================================== */
72- /* bank0_8a89 @0x8A89 — verbatim. param = USB4 link mode (0=?,1=USB3.2-tunnel,2=USB4). */
55+ /* bank0_8a89 @0x8A89. param = USB4 link mode (0=?,1=USB3.2-tunnel,2=USB4). */
7356static void bank0_8a89 (uint8_t mode ) {
7457 uint8_t aa0 ; /* 0x0AA0 config byte */
7558 uint8_t a9d ; /* 0x0A9D mode (mutated by the dispatch) */
@@ -98,9 +81,9 @@ static void bank0_8a89(uint8_t mode) {
9881 u4c_e7ae_bounded (); /* e7ae PHY-lock wait */
9982 U4C_BCEB (0xCA81 ); /* bceb(CA81): set bit0 */
10083
101- /* latch link-rate into 0x0A9F / 0x0A9E (pre-arm). Net (per decompile) :
102- * 0x0A9F = (E710 & 0xE0) | 0x1F [bd49()=E710&0xE0, then |0x1F ]
103- * 0x0A9E = (CA06 & 0x1F) | 0x80 [bd57()=CA06&0x1F, then |0x80 ] */
84+ /* latch link-rate into 0x0A9F / 0x0A9E (pre-arm):
85+ * 0x0A9F = (E710 & 0xE0) | 0x1F [bd49()]
86+ * 0x0A9E = (CA06 & 0x1F) | 0x80 [bd57()] */
10487 PR (0x0A9F ) = REG_LINK_WIDTH_E710 & 0x1F ;
10588 PR (0x0A9F ) = (REG_LINK_WIDTH_E710 & 0xE0 ) | 0x1F ;
10689 PR (0x0A9E ) = REG_CPU_MODE_NEXT >> 5 ;
@@ -110,9 +93,9 @@ static void bank0_8a89(uint8_t mode) {
11093 if (aa0 & 0x01 ) { /* 0x0AA0.0 -> arm E751 (USB4 link arm) */
11194 U4C_BD23 (0xE40B ); /* bd23(E40B): set bit5 */
11295 U4C_BD23 (0xC698 ); /* bd23(C698): set bit5 */
113- u4c_bd14 (); /* bd3a()? -> stock bd3a()/ bd14: CC3A/CC38 &=0xFD */
96+ u4c_bd14 (); /* bd14: CC3A/CC38 &=0xFD */
11497 U4C_BCEB (0xCAC4 ); /* bceb(CAC4) */
115- REG_PHY_POLL_E751 = 0x01 ; /* *** E751 = 1 (USB4 LINK ARM) *** */
98+ REG_PHY_POLL_E751 = 0x01 ; /* E751 = 1 (USB4 LINK ARM) */
11699 U4C_BD65 (0xE313 ); /* bd65(E313): set bit7 */
117100 U4C_BCFE (0xE413 ); /* bcfe(E413): set bit1 */
118101 }
@@ -124,12 +107,12 @@ static void bank0_8a89(uint8_t mode) {
124107 U4C_BCFE (0xCC3E ); /* bcfe(CC3E) */
125108 REG_LINK_CTRL_E717 &= 0xFE ;
126109 }
127- u4c_e7ae_bounded (); /* 8d6e(0xff): print + e7ae */
128- P1_WR (0x011F , 0x01 ); /* r3_write_dispatch(val=1,0x011f,R3=2): page1[0x011F]=1 */
110+ u4c_e7ae_bounded (); /* 8d6e(0xff) */
111+ P1_WR (0x011F , 0x01 ); /* page1[0x011F]=1 */
129112 if (aa0 & 0x02 ) u4c_bcf2 (); /* bcf2(): CC3A/CC38 bit1 set */
130113 } else if (PR (0x0A9D ) == 0x01 ) {
131114 if (aa0 & 0x02 ) u4c_bd41 (); /* bd41(): CC3B &= 0xFD */
132- u4c_e7ae_bounded (); /* 8d6e(4): print + e7ae */
115+ u4c_e7ae_bounded (); /* 8d6e(4) */
133116 REG_POWER_DOMAIN = 0x01 ;
134117 if (aa0 & 0x02 ) { U4C_BD3A (0xCC3B ); U4C_BD5E (0xCC37 ); } /* bd3a(CC3B); bd5e(CC37) */
135118 } else { /* mode 0 */
@@ -140,15 +123,15 @@ static void bank0_8a89(uint8_t mode) {
140123 phy_cc10_cmd_wait (0 , 0x27 , 2 ); /* phy_cmd_cc10_and_wait(0,0x27,2) */
141124 PR (0x07ED ) = 0x01 ; /* one-shot suppress (a17c path) */
142125
143- /* the d0d3/cc3f conditional BEFORE the wait loop */
126+ /* d0d3/cc3f conditional before the wait loop */
144127 if (u4c_bd6c () != 0 && (aa0 & 0x08 )) { /* bd6c()!=0 && 0x0AA0.3 */
145128 U4C_BCFE (0xCC3F ); /* bcfe(CC3F) */
146- U4C_BD5E (0xCC3F ); /* bd5e()? stock bd5e on a staged reg */
129+ U4C_BD5E (0xCC3F ); /* bd5e(CC3F) */
147130 u4c_bd2a (0xCC3F ); /* bd2a(): clear bits5,6 */
148131 U4C_BD65 (0xCC3D ); /* bd65(CC3D) */
149132 }
150133
151- /* *** the DYNAMIC link-up service loop (pumps PD-RX while bringing up lanes) *** */
134+ /* dynamic link-up service loop: pump PD-RX while bringing up lanes */
152135 { uint32_t guard = 0 ;
153136 while (u4c_bd6c () != 0 && ++ guard < 200000UL ) {
154137 if (REG_INT_PCIE_NVME & 0x40 ) pd_rx_isr (); /* C80A.6 -> af5e() == pd_rx_isr() */
@@ -170,7 +153,7 @@ static void bank0_8a89(uint8_t mode) {
170153 REG_CMD_CFG_E413 &= 0xFD ;
171154 }
172155
173- /* *** E764 bit4 = LINK-MODE-ENABLE *** (the keystone write) */
156+ /* E764 bit4 = LINK-MODE-ENABLE */
174157 REG_PHY_TIMER_CTRL_E764 = (REG_PHY_TIMER_CTRL_E764 & 0xEF ) | 0x10 ;
175158 /* re-latch link rate into 0x0A9F/0x0A9E (final) */
176159 PR (0x0A9F ) |= (REG_LINK_WIDTH_E710 & 0xE0 ); /* bVar5 | bd49() */
@@ -185,8 +168,7 @@ static void bank0_8a89(uint8_t mode) {
185168 U4C_BCEB (0xCA81 ); /* bceb() */
186169 u4c_bd14 (); /* bd14 */
187170 }
188- /* e0d9(0) -- PHY descriptor seed (bank1, e0d9_stub). Reproduced as no-op arm (the SB e0d9
189- * seed already ran in sb_block_init); the load-bearing effect here is uVar2 selection. */
171+ /* e0d9(0) PHY descriptor seed (already ran in sb_block_init); only uVar2 selection matters here */
190172 uVar2 = (PR (0x09FA ) >> 1 & 1 ) ? 2 : 1 ;
191173 } else if (PR (0x0A9D ) == 0x01 ) {
192174 if (aa0 & 0x02 ) {
@@ -210,7 +192,7 @@ static void bank0_8a89(uint8_t mode) {
210192 if (PR (0x07ED ) == 0 ) {
211193 uint8_t cv = PR (0x0A9D );
212194 if (cv == 0 ) {
213- REG_CPU_CTRL_CC3E = (REG_CPU_CTRL_CC3E & 0xFD ) | 0x02 ; /* bce7(bd33()|2 -> store) : CC3E bit1 + E717.0 */
195+ REG_CPU_CTRL_CC3E = (REG_CPU_CTRL_CC3E & 0xFD ) | 0x02 ; /* bce7: CC3E bit1 + E717.0 */
214196 REG_LINK_CTRL_E717 = (REG_LINK_CTRL_E717 & 0xFE ) | 0x01 ;
215197 REG_CPU_CTRL_CC36 &= 0xDF ;
216198 }
@@ -220,15 +202,12 @@ static void bank0_8a89(uint8_t mode) {
220202 U4C_BD23 (0xCC36 );
221203 }
222204 REG_POWER_MISC_CTRL &= 0xDF ;
223- usb4_connect_u4 (); /* *** drive SB/tunnel connect *** */
205+ usb4_connect_u4 (); /* drive SB/tunnel connect */
224206 }
225207 PR (0x07ED ) = 0x00 ;
226208
227- /* final: if link complete -> CC33 ack + cd10 (downstream PCIe handoff). Stock cd10 brings up
228- * the downstream PCIe link to the GPU then SPINS FOREVER (it is the terminal hand-off state).
229- * handmade can't spin inside the connect path, so we DEFER the downstream bring-up to the
230- * super-loop (sb_tunnel_up_pending -> pcie_power_on) and return -- the link-mode is already
231- * armed (E764.4/E751) which is the load-bearing effect the host needs. */
209+ /* final: if link complete -> CC33 ack + cd10 (downstream PCIe handoff). Stock cd10 spins
210+ * forever; we defer the downstream bring-up to the super-loop (sb_tunnel_up_pending) and return. */
232211 if (PR (0x0AE3 ) == 0 && (REG_CPU_EXEC_STATUS_2 >> 2 & 1 )) {
233212 REG_CPU_EXEC_STATUS_2 = 0x04 ;
234213 sb_tunnel_up_pending = 1 ; /* cd10: defer downstream PCIe bring-up */
@@ -237,25 +216,19 @@ static void bank0_8a89(uint8_t mode) {
237216 }
238217}
239218
240- /* ====================================================================================
241- * FUN_CODE_c9a8 @0xC9A8 — the host-link-event connect dispatcher (e94d=c9a8(0), e952=c9a8(1)).
242- * Gate: (0x09FA.2) && (0x0AF1.0) && (0x07E8 || 0x07EB). When open:
243- * if (0x0B41) e3b7(3); pcie_downstream_link_bringup(0x0AEF); e96c(); then if gated -> 545c();
244- * C6A8 &= ~1; bank0_8a89(arg). tail: 0x07E8=0; 0x0B2F=1.
245- * pcie_downstream_link_bringup(0x0AEF)/3578 is the heavy banked eGPU LTSSM/PERST path; handmade
246- * runs the equivalent (pcie_power_on) from the super-loop via sb_tunnel_up_pending, so we set that
247- * pending flag instead of running the long banked poll inside the ISR.
248- * ==================================================================================== */
219+ /* FUN_CODE_c9a8 @0xC9A8 — host-link-event connect dispatcher (e94d=c9a8(0), e952=c9a8(1)).
220+ * Gate: (0x09FA.2) && (0x0AF1.0) && (0x07E8 || 0x07EB) -> C6A8 &= ~1; bank0_8a89(arg).
221+ * The heavy banked eGPU LTSSM/PERST bring-up (3578) is deferred to the super-loop via
222+ * sb_tunnel_up_pending instead of running the long banked poll inside the ISR. */
249223static void bank0_c9a8 (uint8_t arg ) {
250- PR (0x0A7D ) = arg ; /* xdata_00a7d = param ( mode for 8a89) */
224+ PR (0x0A7D ) = arg ; /* mode for 8a89 */
251225 if (PR (0x09FA ) & 0x04 ) { /* 0x09FA.2 */
252- /* if (0x0B41) bank0_e3b7(3) -- B480 PERST clear; deferred via pcie path below. */
253- /* pcie_downstream_link_bringup(0x0AEF) + e96c(): defer the downstream PCIe bring-up. */
226+ /* defer downstream PCIe bring-up (e3b7(3) B480 PERST clear + 3578) */
254227 sb_tunnel_up_pending = 1 ;
255228 if ((PR (0x0AF1 ) & 0x01 ) &&
256229 (PR (0x07E8 ) != 0 || PR (0x07EB ) != 0 )) { /* the c9a8 connect gate */
257- REG_PHY_CFG_C6A8 &= 0xFE ; /* C6A8 &= ~1 (545c side then this) */
258- bank0_8a89 (PR (0x0A7D )); /* *** run the lane-MODE engine *** */
230+ REG_PHY_CFG_C6A8 &= 0xFE ; /* C6A8 &= ~1 */
231+ bank0_8a89 (PR (0x0A7D )); /* run the lane-MODE engine */
259232 }
260233 PR (0x07E8 ) = 0x00 ;
261234 PR (0x0B2F ) = 0x01 ;
0 commit comments