Skip to content

Commit 465a2e8

Browse files
committed
handmade: trim dev-narrative comments in usb4_connect.h (binary-identical)
Collapse the scratchpad banner + why-it-failed asides on the bank0_8a89/ cd10/c9a8 blocks to terse purpose lines; keep all stock-address annotations. md5 6ca2d282 unchanged -- comment-only, zero regression.
1 parent 6e10c87 commit 465a2e8

1 file changed

Lines changed: 42 additions & 69 deletions

File tree

handmade/src/usb4_connect.h

Lines changed: 42 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,25 @@
11
#ifndef USB4_CONNECT_H
22
#define USB4_CONNECT_H
33
/*
4-
* bank0_8a89 — the USB4 PHY / lane-MODE bring-up engine (THE missing dynamic engine).
5-
* Faithful transcription of the ASM2464PD stock firmware (fw_tinygrad.bin) @0x8A89.
4+
* bank0_8a89 @0x8A89 — USB4 PHY / lane-MODE bring-up engine. Faithful transcription
5+
* of the ASM2464PD stock firmware (fw_tinygrad.bin). Arms E764.4 (LINK-MODE-ENABLE)
6+
* and E751 (USB4 link arm), latches the E710/CA06 link-rate into 0x0A9F/0x0A9E, writes
7+
* the per-mode E7E3 descriptor via dd42(), and runs a dynamic service loop that pumps
8+
* PD-RX while bringing up the lanes (waiting on CC33.2 / 0x07ED).
69
*
7-
* This is the ONLY function in the whole USB4 path that:
8-
* - arms E764 bit4 = LINK-MODE-ENABLE (E764 = (E764 & 0xEF) | 0x10) [the keystone bit]
9-
* - arms E751 = 1 (USB4 link arm, inside if(0x0AA0&1))
10-
* - latches the E710/CA06 link-rate into 0x0A9F / 0x0A9E
11-
* - writes the per-mode E7E3 descriptor via dd42()
12-
* - runs the dynamic in-loop service `while(bd6c()){ if(C80A.6) pd_rx_isr(); ... }` that pumps
13-
* PD-RX WHILE bringing up the lanes, and waits on link-completion (CC33.2 / 0x07ED).
10+
* Driven per host link-event from the INT0 demux (0e5b -> e94d/e952 -> c9a8 -> 8a89).
11+
* Include AFTER usb4.h and BEFORE sb_router.h.
1412
*
15-
* handmade reproduced a byte-faithful STATIC SB block (sb.h) but NEVER ran this engine, so the
16-
* link stayed mode0 and the TB4 host never saw a trained USB4 link -> C80A.5 never fired. It is
17-
* meant to be RE-DRIVEN per host link-event from the INT0 demux (0e5b -> e94d/e952 -> c9a8 ->
18-
* 8a89). usb4_connect_u4 (the inner a3f5 RMW + sb_assert) is called ONLY from inside this wrapper
19-
* (8a89 tail @0x8d49) and from the faithful Enter_USB data-msg path (a17f, vdm.h) -- never
20-
* standalone from a VDM Accept.
21-
*
22-
* Included AFTER usb4.h (usb4_connect_u4 fwd-decl, SB_* / P1_* from sb.h, PR(), uart_*, the boot_phy
23-
* helpers e7ae/d0d3/phy_cc10_cmd_wait, and pd_rx_isr from pd.h) and BEFORE sb_router.h.
24-
*
25-
* Helper map (all byte-exact, decompiled from stock):
13+
* Stock helper map (byte-exact):
2614
* bd49() = E710 & 0xE0 bd57() = CA06 & 0x1F bd50() = E716 & 0xFC
27-
* bd33() = CC3E & 0xFD bceb(a): a |= 1 bcfe(a): a = (&0xFD)|2 (set bit1)
15+
* bd33() = CC3E & 0xFD bceb(a): a |= 1 bcfe(a): a = (&0xFD)|2
2816
* bd23(a): a = (&0xDF)|0x20 bd3a(a): a = (&0xBF)|0x40 bd65(a): a = (&0x7F)|0x80
2917
* bd2a(a): a &= 0xDF; a &= 0xBF bd5e(a): a = (&0xFB)|0x04
3018
* bce7(v,a): *a = v; E717 = (E717&0xFE)|1
31-
* bcf2(): CC3A=(&0xFD)|2; CC38=(&0xFD)|2 bd41(): CC3B &= 0xFD bd14(): CC3A&=0xFD; CC38&=0xFD
32-
* 8d6e(): uart_puts; e7ae() (the e7ae C006/C00E PHY-lock busy wait, BOUNDED here)
33-
* bd6c(): dcd4(0x0A9D); return 0x0A9D dcd4(m): CA00=(&0xC0)|7; CA0A=2; (CA0D/CA0E poll, no SE)
34-
* af5e() == pd_rx_isr() dd42() == boot_phy_dd42() cd10() == bank0_cd10 (handoff, never returns)
19+
* bcf2(): CC3A=(&0xFD)|2; CC38=(&0xFD)|2 bd41(): CC3B &= 0xFD bd14(): CC3A&=0xFD; CC38&=0xFD
20+
* 8d6e(): uart_puts; e7ae() PHY-lock busy wait (bounded here)
21+
* bd6c(): dcd4(0x0A9D); return 0x0A9D dcd4(m): CA00=(&0xC0)|7; CA0A=2; CA0D/CA0E poll
22+
* af5e() == pd_rx_isr() dd42() == boot_phy_dd42() cd10() == bank0_cd10 (handoff)
3523
*/
3624

3725
/* tiny single-reg RMW helpers from bank0_8a89's helper block (verbatim) */
@@ -46,30 +34,25 @@ static void u4c_bcf2(void) { REG_TIMER_ENABLE_B = (REG_TIMER_ENABLE_B & 0xFD) |
4634
static void u4c_bd41(void) { REG_TIMER_CTRL_CC3B &= 0xFD; }
4735
static void u4c_bd14(void) { REG_TIMER_ENABLE_B &= 0xFD; REG_TIMER_ENABLE_A &= 0xFD; }
4836

49-
/* bd6c: pump the link controller (dcd4) then return the mode byte 0x0A9D. Loop continues while
50-
* 0x0A9D != 0. dcd4 = CA00=(&0xC0)|7; CA0A=2 (+ a CA0D/CA0E status poll that has no side effect in
51-
* the decompile -> reproduced as the two writes). */
37+
/* bd6c: pump the link controller (dcd4 = CA00=(&0xC0)|7; CA0A=2) then return mode byte 0x0A9D. */
5238
static uint8_t u4c_bd6c(void) {
5339
PR(0xCA00) = (PR(0xCA00) & 0xC0) | 0x07; /* dcd4 */
5440
PR(0xCA0A) = 0x02;
5541
return PR(0x0A9D);
5642
}
5743

58-
/* 8d6e: print + e7ae PHY-lock wait. e7ae waits C006[4:0]==0x10 then C00E[2:0]==0 (BOUNDED so a
59-
* never-locking PHY can't hang the ISR -- handmade bounds every stock spin). */
44+
/* e7ae: PHY-lock wait — C006[4:0]==0x10 then C00E[2:0]==0 (bounded so it can't hang the ISR). */
6045
static void u4c_e7ae_bounded(void) {
6146
uint16_t g = 0;
6247
while (((PR(0xC006) & 0x1F) != 0x10) && ++g < 0x0800);
6348
g = 0;
6449
while (((PR(0xC00E) & 0x07) != 0x00) && ++g < 0x0800);
6550
}
6651

67-
/* indicates the engine ran (instrumentation, read from the super-loop)
68-
* IRAM-HEADROOM FIX: relocated to XDATA scratch (0x8800..); seeded in main(). */
52+
/* indicates the engine ran (instrumentation, read from the super-loop). XDATA scratch, seeded in main(). */
6953
static volatile uint8_t __xdata __at(0x0B51) bank0_8a89_entered;
7054

71-
/* ==================================================================================== */
72-
/* bank0_8a89 @0x8A89 — verbatim. param = USB4 link mode (0=?,1=USB3.2-tunnel,2=USB4). */
55+
/* bank0_8a89 @0x8A89. param = USB4 link mode (0=?,1=USB3.2-tunnel,2=USB4). */
7356
static void bank0_8a89(uint8_t mode) {
7457
uint8_t aa0; /* 0x0AA0 config byte */
7558
uint8_t a9d; /* 0x0A9D mode (mutated by the dispatch) */
@@ -98,9 +81,9 @@ static void bank0_8a89(uint8_t mode) {
9881
u4c_e7ae_bounded(); /* e7ae PHY-lock wait */
9982
U4C_BCEB(0xCA81); /* bceb(CA81): set bit0 */
10083

101-
/* latch link-rate into 0x0A9F / 0x0A9E (pre-arm). Net (per decompile):
102-
* 0x0A9F = (E710 & 0xE0) | 0x1F [bd49()=E710&0xE0, then |0x1F]
103-
* 0x0A9E = (CA06 & 0x1F) | 0x80 [bd57()=CA06&0x1F, then |0x80] */
84+
/* latch link-rate into 0x0A9F / 0x0A9E (pre-arm):
85+
* 0x0A9F = (E710 & 0xE0) | 0x1F [bd49()]
86+
* 0x0A9E = (CA06 & 0x1F) | 0x80 [bd57()] */
10487
PR(0x0A9F) = REG_LINK_WIDTH_E710 & 0x1F;
10588
PR(0x0A9F) = (REG_LINK_WIDTH_E710 & 0xE0) | 0x1F;
10689
PR(0x0A9E) = REG_CPU_MODE_NEXT >> 5;
@@ -110,9 +93,9 @@ static void bank0_8a89(uint8_t mode) {
11093
if (aa0 & 0x01) { /* 0x0AA0.0 -> arm E751 (USB4 link arm) */
11194
U4C_BD23(0xE40B); /* bd23(E40B): set bit5 */
11295
U4C_BD23(0xC698); /* bd23(C698): set bit5 */
113-
u4c_bd14(); /* bd3a()? -> stock bd3a()/bd14: CC3A/CC38 &=0xFD */
96+
u4c_bd14(); /* bd14: CC3A/CC38 &=0xFD */
11497
U4C_BCEB(0xCAC4); /* bceb(CAC4) */
115-
REG_PHY_POLL_E751 = 0x01; /* *** E751 = 1 (USB4 LINK ARM) *** */
98+
REG_PHY_POLL_E751 = 0x01; /* E751 = 1 (USB4 LINK ARM) */
11699
U4C_BD65(0xE313); /* bd65(E313): set bit7 */
117100
U4C_BCFE(0xE413); /* bcfe(E413): set bit1 */
118101
}
@@ -124,12 +107,12 @@ static void bank0_8a89(uint8_t mode) {
124107
U4C_BCFE(0xCC3E); /* bcfe(CC3E) */
125108
REG_LINK_CTRL_E717 &= 0xFE;
126109
}
127-
u4c_e7ae_bounded(); /* 8d6e(0xff): print + e7ae */
128-
P1_WR(0x011F, 0x01); /* r3_write_dispatch(val=1,0x011f,R3=2): page1[0x011F]=1 */
110+
u4c_e7ae_bounded(); /* 8d6e(0xff) */
111+
P1_WR(0x011F, 0x01); /* page1[0x011F]=1 */
129112
if (aa0 & 0x02) u4c_bcf2(); /* bcf2(): CC3A/CC38 bit1 set */
130113
} else if (PR(0x0A9D) == 0x01) {
131114
if (aa0 & 0x02) u4c_bd41(); /* bd41(): CC3B &= 0xFD */
132-
u4c_e7ae_bounded(); /* 8d6e(4): print + e7ae */
115+
u4c_e7ae_bounded(); /* 8d6e(4) */
133116
REG_POWER_DOMAIN = 0x01;
134117
if (aa0 & 0x02) { U4C_BD3A(0xCC3B); U4C_BD5E(0xCC37); } /* bd3a(CC3B); bd5e(CC37) */
135118
} else { /* mode 0 */
@@ -140,15 +123,15 @@ static void bank0_8a89(uint8_t mode) {
140123
phy_cc10_cmd_wait(0, 0x27, 2); /* phy_cmd_cc10_and_wait(0,0x27,2) */
141124
PR(0x07ED) = 0x01; /* one-shot suppress (a17c path) */
142125

143-
/* the d0d3/cc3f conditional BEFORE the wait loop */
126+
/* d0d3/cc3f conditional before the wait loop */
144127
if (u4c_bd6c() != 0 && (aa0 & 0x08)) { /* bd6c()!=0 && 0x0AA0.3 */
145128
U4C_BCFE(0xCC3F); /* bcfe(CC3F) */
146-
U4C_BD5E(0xCC3F); /* bd5e()? stock bd5e on a staged reg */
129+
U4C_BD5E(0xCC3F); /* bd5e(CC3F) */
147130
u4c_bd2a(0xCC3F); /* bd2a(): clear bits5,6 */
148131
U4C_BD65(0xCC3D); /* bd65(CC3D) */
149132
}
150133

151-
/* *** the DYNAMIC link-up service loop (pumps PD-RX while bringing up lanes) *** */
134+
/* dynamic link-up service loop: pump PD-RX while bringing up lanes */
152135
{ uint32_t guard = 0;
153136
while (u4c_bd6c() != 0 && ++guard < 200000UL) {
154137
if (REG_INT_PCIE_NVME & 0x40) pd_rx_isr(); /* C80A.6 -> af5e() == pd_rx_isr() */
@@ -170,7 +153,7 @@ static void bank0_8a89(uint8_t mode) {
170153
REG_CMD_CFG_E413 &= 0xFD;
171154
}
172155

173-
/* *** E764 bit4 = LINK-MODE-ENABLE *** (the keystone write) */
156+
/* E764 bit4 = LINK-MODE-ENABLE */
174157
REG_PHY_TIMER_CTRL_E764 = (REG_PHY_TIMER_CTRL_E764 & 0xEF) | 0x10;
175158
/* re-latch link rate into 0x0A9F/0x0A9E (final) */
176159
PR(0x0A9F) |= (REG_LINK_WIDTH_E710 & 0xE0); /* bVar5 | bd49() */
@@ -185,8 +168,7 @@ static void bank0_8a89(uint8_t mode) {
185168
U4C_BCEB(0xCA81); /* bceb() */
186169
u4c_bd14(); /* bd14 */
187170
}
188-
/* e0d9(0) -- PHY descriptor seed (bank1, e0d9_stub). Reproduced as no-op arm (the SB e0d9
189-
* seed already ran in sb_block_init); the load-bearing effect here is uVar2 selection. */
171+
/* e0d9(0) PHY descriptor seed (already ran in sb_block_init); only uVar2 selection matters here */
190172
uVar2 = (PR(0x09FA) >> 1 & 1) ? 2 : 1;
191173
} else if (PR(0x0A9D) == 0x01) {
192174
if (aa0 & 0x02) {
@@ -210,7 +192,7 @@ static void bank0_8a89(uint8_t mode) {
210192
if (PR(0x07ED) == 0) {
211193
uint8_t cv = PR(0x0A9D);
212194
if (cv == 0) {
213-
REG_CPU_CTRL_CC3E = (REG_CPU_CTRL_CC3E & 0xFD) | 0x02; /* bce7(bd33()|2 -> store): CC3E bit1 + E717.0 */
195+
REG_CPU_CTRL_CC3E = (REG_CPU_CTRL_CC3E & 0xFD) | 0x02; /* bce7: CC3E bit1 + E717.0 */
214196
REG_LINK_CTRL_E717 = (REG_LINK_CTRL_E717 & 0xFE) | 0x01;
215197
REG_CPU_CTRL_CC36 &= 0xDF;
216198
}
@@ -220,15 +202,12 @@ static void bank0_8a89(uint8_t mode) {
220202
U4C_BD23(0xCC36);
221203
}
222204
REG_POWER_MISC_CTRL &= 0xDF;
223-
usb4_connect_u4(); /* *** drive SB/tunnel connect *** */
205+
usb4_connect_u4(); /* drive SB/tunnel connect */
224206
}
225207
PR(0x07ED) = 0x00;
226208

227-
/* final: if link complete -> CC33 ack + cd10 (downstream PCIe handoff). Stock cd10 brings up
228-
* the downstream PCIe link to the GPU then SPINS FOREVER (it is the terminal hand-off state).
229-
* handmade can't spin inside the connect path, so we DEFER the downstream bring-up to the
230-
* super-loop (sb_tunnel_up_pending -> pcie_power_on) and return -- the link-mode is already
231-
* armed (E764.4/E751) which is the load-bearing effect the host needs. */
209+
/* final: if link complete -> CC33 ack + cd10 (downstream PCIe handoff). Stock cd10 spins
210+
* forever; we defer the downstream bring-up to the super-loop (sb_tunnel_up_pending) and return. */
232211
if (PR(0x0AE3) == 0 && (REG_CPU_EXEC_STATUS_2 >> 2 & 1)) {
233212
REG_CPU_EXEC_STATUS_2 = 0x04;
234213
sb_tunnel_up_pending = 1; /* cd10: defer downstream PCIe bring-up */
@@ -237,25 +216,19 @@ static void bank0_8a89(uint8_t mode) {
237216
}
238217
}
239218

240-
/* ====================================================================================
241-
* FUN_CODE_c9a8 @0xC9A8 — the host-link-event connect dispatcher (e94d=c9a8(0), e952=c9a8(1)).
242-
* Gate: (0x09FA.2) && (0x0AF1.0) && (0x07E8 || 0x07EB). When open:
243-
* if (0x0B41) e3b7(3); pcie_downstream_link_bringup(0x0AEF); e96c(); then if gated -> 545c();
244-
* C6A8 &= ~1; bank0_8a89(arg). tail: 0x07E8=0; 0x0B2F=1.
245-
* pcie_downstream_link_bringup(0x0AEF)/3578 is the heavy banked eGPU LTSSM/PERST path; handmade
246-
* runs the equivalent (pcie_power_on) from the super-loop via sb_tunnel_up_pending, so we set that
247-
* pending flag instead of running the long banked poll inside the ISR.
248-
* ==================================================================================== */
219+
/* FUN_CODE_c9a8 @0xC9A8 — host-link-event connect dispatcher (e94d=c9a8(0), e952=c9a8(1)).
220+
* Gate: (0x09FA.2) && (0x0AF1.0) && (0x07E8 || 0x07EB) -> C6A8 &= ~1; bank0_8a89(arg).
221+
* The heavy banked eGPU LTSSM/PERST bring-up (3578) is deferred to the super-loop via
222+
* sb_tunnel_up_pending instead of running the long banked poll inside the ISR. */
249223
static void bank0_c9a8(uint8_t arg) {
250-
PR(0x0A7D) = arg; /* xdata_00a7d = param (mode for 8a89) */
224+
PR(0x0A7D) = arg; /* mode for 8a89 */
251225
if (PR(0x09FA) & 0x04) { /* 0x09FA.2 */
252-
/* if (0x0B41) bank0_e3b7(3) -- B480 PERST clear; deferred via pcie path below. */
253-
/* pcie_downstream_link_bringup(0x0AEF) + e96c(): defer the downstream PCIe bring-up. */
226+
/* defer downstream PCIe bring-up (e3b7(3) B480 PERST clear + 3578) */
254227
sb_tunnel_up_pending = 1;
255228
if ((PR(0x0AF1) & 0x01) &&
256229
(PR(0x07E8) != 0 || PR(0x07EB) != 0)) { /* the c9a8 connect gate */
257-
REG_PHY_CFG_C6A8 &= 0xFE; /* C6A8 &= ~1 (545c side then this) */
258-
bank0_8a89(PR(0x0A7D)); /* *** run the lane-MODE engine *** */
230+
REG_PHY_CFG_C6A8 &= 0xFE; /* C6A8 &= ~1 */
231+
bank0_8a89(PR(0x0A7D)); /* run the lane-MODE engine */
259232
}
260233
PR(0x07E8) = 0x00;
261234
PR(0x0B2F) = 0x01;

0 commit comments

Comments
 (0)