Skip to content

Commit 573f815

Browse files
bfirshclaude
andcommitted
Split fast/slow memory access paths for ~11% speedup
Add loadDirect() for addressing mode reads (operand fetches from PRG space, zero-page pointer/dummy reads) that skip PPU/APU catch-up checks. Restructure load() to check addr < 0x2000 first so the common RAM path avoids catch-up branches entirely. Optimize push()/pull() to access stack RAM directly instead of dispatching through the mapper. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ce87b65 commit 573f815

File tree

1 file changed

+63
-33
lines changed

1 file changed

+63
-33
lines changed

src/cpu.js

Lines changed: 63 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,12 @@ class CPU {
169169
case 0: {
170170
// Zero Page mode. Use the address given after the opcode,
171171
// but without high byte.
172-
addr = this.load(opaddr + 2);
172+
addr = this.loadDirect(opaddr + 2);
173173
break;
174174
}
175175
case 1: {
176176
// Relative mode.
177-
addr = this.load(opaddr + 2);
177+
addr = this.loadDirect(opaddr + 2);
178178
if (addr < 0x80) {
179179
addr += this.REG_PC;
180180
} else {
@@ -189,7 +189,7 @@ class CPU {
189189
// Note: opaddr is REG_PC which is one less than the actual instruction
190190
// address (opcode is at opaddr+1), so the dummy read targets opaddr+2.
191191
// See https://www.nesdev.org/wiki/CPU_addressing_modes
192-
this.load(opaddr + 2);
192+
this.loadDirect(opaddr + 2);
193193
break;
194194
}
195195
case 3: {
@@ -203,7 +203,7 @@ class CPU {
203203
// Like implied mode, the 6502 performs a dummy read of the byte at PC
204204
// during its second cycle (opaddr+2, see case 2 comment).
205205
// See https://www.nesdev.org/wiki/CPU_addressing_modes
206-
this.load(opaddr + 2);
206+
this.loadDirect(opaddr + 2);
207207
addr = this.REG_ACC;
208208
break;
209209
}
@@ -218,15 +218,15 @@ class CPU {
218218
// The 6502 reads from the unindexed zero-page address while adding X.
219219
// This "dummy read" is a real bus cycle that can trigger I/O side effects.
220220
// See https://www.nesdev.org/wiki/CPU_addressing_modes
221-
let zpBase6 = this.load(opaddr + 2);
222-
this.load(zpBase6); // dummy read from unindexed zero-page address
221+
let zpBase6 = this.loadDirect(opaddr + 2);
222+
this.loadDirect(zpBase6); // dummy read from unindexed zero-page address
223223
addr = (zpBase6 + this.REG_X) & 0xff;
224224
break;
225225
}
226226
case 7: {
227227
// Zero Page Indexed mode, Y as index. Same dummy read behavior as case 6.
228-
let zpBase7 = this.load(opaddr + 2);
229-
this.load(zpBase7); // dummy read from unindexed zero-page address
228+
let zpBase7 = this.loadDirect(opaddr + 2);
229+
this.loadDirect(zpBase7); // dummy read from unindexed zero-page address
230230
addr = (zpBase7 + this.REG_Y) & 0xff;
231231
break;
232232
}
@@ -262,17 +262,21 @@ class CPU {
262262
// Pre-indexed Indirect mode, (d,X). Read pointer from zero page,
263263
// add X, then read the 16-bit effective address. Wraps within zero page.
264264
// Dummy read from the unindexed pointer address while adding X.
265-
let zpPtr10 = this.load(opaddr + 2);
266-
this.load(zpPtr10); // dummy read: 6502 reads from ptr before adding X
265+
let zpPtr10 = this.loadDirect(opaddr + 2);
266+
this.loadDirect(zpPtr10); // dummy read: 6502 reads from ptr before adding X
267267
let zpAddr10 = (zpPtr10 + this.REG_X) & 0xff;
268-
addr = this.load(zpAddr10) | (this.load((zpAddr10 + 1) & 0xff) << 8);
268+
addr =
269+
this.loadDirect(zpAddr10) |
270+
(this.loadDirect((zpAddr10 + 1) & 0xff) << 8);
269271
break;
270272
}
271273
case 11: {
272274
// Post-indexed Indirect mode, (d),Y. Read 16-bit base address from
273275
// zero page, then add Y. Page-crossing dummy read as in case 8.
274-
let zpAddr = this.load(opaddr + 2);
275-
addr = this.load(zpAddr) | (this.load((zpAddr + 1) & 0xff) << 8);
276+
let zpAddr = this.loadDirect(opaddr + 2);
277+
addr =
278+
this.loadDirect(zpAddr) |
279+
(this.loadDirect((zpAddr + 1) & 0xff) << 8);
276280
baseHigh = (addr >> 8) & 0xff;
277281
if ((addr & 0xff00) !== ((addr + this.REG_Y) & 0xff00)) {
278282
this.load((addr & 0xff00) | ((addr + this.REG_Y) & 0xff));
@@ -702,7 +706,7 @@ class CPU {
702706
// last cycle (after the pushes), updating the data bus. This matters
703707
// for open bus behavior when JSR targets unmapped addresses.
704708
// See https://www.nesdev.org/wiki/Open_bus_behavior
705-
this.dataBus = this.load(opaddr + 3);
709+
this.loadDirect(opaddr + 3);
706710
this.REG_PC = addr - 1;
707711
break;
708712
}
@@ -1555,27 +1559,51 @@ class CPU {
15551559
}
15561560

15571561
// Each load() call represents one CPU bus read cycle.
1562+
// Structured with the most common paths first: RAM reads ($0000-$1FFF)
1563+
// and cartridge/PRG reads ($4000+) skip the PPU/APU catch-up checks
1564+
// entirely. Only PPU register reads ($2000-$3FFF) trigger catch-up.
15581565
load(addr) {
1559-
// Catch up PPU before reading PPU registers so the read sees
1560-
// up-to-date VBlank/sprite-0 flags. See _ppuCatchUp().
1561-
if (addr >= 0x2000 && addr < 0x4000) {
1566+
if (addr < 0x2000) {
1567+
// RAM (zero page, stack, general): most common path
1568+
this.dataBus = this.mem[addr & 0x7ff];
1569+
} else if (addr >= 0x4000) {
1570+
// Cartridge ROM/RAM, APU, expansion ($4000+)
1571+
if (addr === 0x4015) {
1572+
// Catch up APU frame counter before reading $4015 so the read sees
1573+
// up-to-date length counter status and IRQ flags.
1574+
this._apuCatchUp();
1575+
// $4015 reads are internal to the 2A03 — the APU status value does
1576+
// not drive the external data bus. Return the status directly without
1577+
// updating dataBus, so open bus reads after $4015 still see the
1578+
// previous bus value. See https://www.nesdev.org/wiki/Open_bus_behavior
1579+
let apuStatus = this.loadFromCartridge(addr);
1580+
this.instrBusCycles++;
1581+
return apuStatus;
1582+
}
1583+
this.dataBus = this.loadFromCartridge(addr);
1584+
} else {
1585+
// PPU registers ($2000-$3FFF): catch up PPU so the read sees
1586+
// up-to-date VBlank/sprite-0 flags. See _ppuCatchUp().
15621587
this._ppuCatchUp();
1588+
this.dataBus = this.loadFromCartridge(addr);
15631589
}
1564-
// Catch up APU frame counter before reading $4015 so the read sees
1565-
// up-to-date length counter status and IRQ flags.
1566-
if (addr === 0x4015) {
1567-
this._apuCatchUp();
1568-
}
1590+
this.instrBusCycles++;
1591+
return this.dataBus;
1592+
}
1593+
1594+
// Fast load for addresses guaranteed to be outside the PPU register range
1595+
// ($2000-$3FFF) and APU status register ($4015). Skips the catch-up checks
1596+
// that load() performs, but still updates dataBus (open bus behavior) and
1597+
// instrBusCycles (PPU catch-up accounting for later PPU register accesses).
1598+
//
1599+
// Safe for:
1600+
// - Zero-page reads ($00-$FF): always internal RAM
1601+
// - Program-space operand reads (opaddr+2/+3): always PRG ROM ($8000+)
1602+
//
1603+
// NOT safe for arbitrary effective addresses that could be PPU/APU I/O.
1604+
loadDirect(addr) {
15691605
if (addr < 0x2000) {
15701606
this.dataBus = this.mem[addr & 0x7ff];
1571-
} else if (addr === 0x4015) {
1572-
// $4015 reads are internal to the 2A03 — the APU status value does
1573-
// not drive the external data bus. Return the status directly without
1574-
// updating dataBus, so open bus reads after $4015 still see the
1575-
// previous bus value. See https://www.nesdev.org/wiki/Open_bus_behavior
1576-
let apuStatus = this.loadFromCartridge(addr);
1577-
this.instrBusCycles++;
1578-
return apuStatus;
15791607
} else {
15801608
this.dataBus = this.loadFromCartridge(addr);
15811609
}
@@ -1629,17 +1657,19 @@ class CPU {
16291657

16301658
push(value) {
16311659
this.dataBus = value;
1632-
this.nes.mmap.write(this.REG_SP | 0x100, value);
1660+
// Stack is always $0100-$01FF (internal RAM), so write directly to mem[]
1661+
// instead of going through the mapper.
1662+
this.mem[this.REG_SP | 0x100] = value;
16331663
this.REG_SP--;
1634-
// this.REG_SP = 0x0100 | (this.REG_SP & 0xff);
16351664
this.REG_SP = this.REG_SP & 0xff;
16361665
this.instrBusCycles++;
16371666
}
16381667

16391668
pull() {
16401669
this.REG_SP++;
16411670
this.REG_SP = this.REG_SP & 0xff;
1642-
this.dataBus = this.nes.mmap.load(0x100 | this.REG_SP);
1671+
// Stack is always $0100-$01FF (internal RAM), so read directly from mem[].
1672+
this.dataBus = this.mem[0x100 | this.REG_SP];
16431673
this.instrBusCycles++;
16441674
return this.dataBus;
16451675
}

0 commit comments

Comments
 (0)