Skip to content

Commit ba4504a

Browse files
committed
n64: inline simple dual mode operations
* Pass JITContext down to leaf emit functions. * Emit inline implementations of basic 64-bit operations. * Use block compile-time information to elide kernel mode checks of the now inlined operations.
1 parent dacacac commit ba4504a

File tree

2 files changed

+76
-65
lines changed

2 files changed

+76
-65
lines changed

Diff for: ares/n64/cpu/cpu.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -922,9 +922,11 @@ struct CPU : Thread {
922922
auto block(u64 vaddr, u32 address, const Context& ctx) -> Block*;
923923

924924
auto emit(u64 vaddr, u32 address, Context::JIT ctx) -> Block*;
925+
auto emitOverflowCheck(reg temp) -> sljit_jump*;
925926
auto emitZeroClear(u32 n) -> void;
926-
auto emitEXECUTE(u32 instruction) -> bool;
927-
auto emitSPECIAL(u32 instruction) -> bool;
927+
auto checkDualAllowed(const Context::JIT& ctx) -> bool;
928+
auto emitEXECUTE(u32 instruction, Context::JIT ctx) -> bool;
929+
auto emitSPECIAL(u32 instruction, Context::JIT ctx) -> bool;
928930
auto emitREGIMM(u32 instruction) -> bool;
929931
auto emitSCC(u32 instruction) -> bool;
930932
auto emitFPU(u32 instruction) -> bool;

Diff for: ares/n64/cpu/recompiler.cpp

+72-63
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,31 @@ auto CPU::Recompiler::emitZeroClear(u32 n) -> void {
127127
if(n == 0) mov64(mem(IpuReg(r[0])), imm(0));
128128
}
129129

130-
auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
130+
auto CPU::Recompiler::emitOverflowCheck(reg temp) -> sljit_jump* {
131+
// If overflow flag set: throw an exception, skip the instruction via the 'end' label.
132+
mov32_f(temp, flag_o);
133+
auto didntOverflow = cmp32_jump(temp, imm(0), flag_eq);
134+
call(&CPU::Exception::arithmeticOverflow, &cpu.exception);
135+
auto end = jump();
136+
setLabel(didntOverflow);
137+
return end;
138+
}
139+
140+
auto CPU::Recompiler::checkDualAllowed(const Context::JIT& ctx) -> bool {
141+
if (ctx.mode != Context::Mode::Kernel && !ctx.is64bit) {
142+
call(&CPU::Exception::reservedInstruction, &self.exception);
143+
return false;
144+
}
145+
146+
return true;
147+
}
148+
149+
auto CPU::Recompiler::emitEXECUTE(u32 instruction, Context::JIT ctx) -> bool {
131150
switch(instruction >> 26) {
132151

133152
//SPECIAL
134153
case 0x00: {
135-
return emitSPECIAL(instruction);
154+
return emitSPECIAL(instruction, ctx);
136155
}
137156

138157
//REGIMM
@@ -308,21 +327,19 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
308327

309328
//DADDI Rt,Rs,i16
310329
case 0x18: {
311-
lea(reg(1), Rt);
312-
lea(reg(2), Rs);
313-
mov32(reg(3), imm(i16));
314-
call(&CPU::DADDI);
315-
emitZeroClear(Rtn);
330+
if (!checkDualAllowed(ctx)) return 1;
331+
add64(reg(0), mem(Rs), imm(i16), set_o);
332+
auto skip = emitOverflowCheck(reg(2));
333+
if(Rtn > 0) mov64(mem(Rt), reg(0));
334+
setLabel(skip);
316335
return 0;
317336
}
318337

319338
//DADDIU Rt,Rs,i16
320339
case 0x19: {
321-
lea(reg(1), Rt);
322-
lea(reg(2), Rs);
323-
mov32(reg(3), imm(i16));
324-
call(&CPU::DADDIU);
325-
emitZeroClear(Rtn);
340+
if (!checkDualAllowed(ctx)) return 1;
341+
add64(reg(0), mem(Rs), imm(i16), set_o);
342+
if(Rtn > 0) mov64(mem(Rt), reg(0));
326343
return 0;
327344
}
328345

@@ -640,7 +657,7 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
640657
return 0;
641658
}
642659

643-
auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
660+
auto CPU::Recompiler::emitSPECIAL(u32 instruction, Context::JIT ctx) -> bool {
644661
switch(instruction & 0x3f) {
645662

646663
//SLL Rd,Rt,Sa
@@ -784,11 +801,10 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
784801

785802
//DSLLV Rd,Rt,Rs
786803
case 0x14: {
787-
lea(reg(1), Rd);
788-
lea(reg(2), Rt);
789-
lea(reg(3), Rs);
790-
call(&CPU::DSLLV);
791-
emitZeroClear(Rdn);
804+
if (!checkDualAllowed(ctx)) return 1;
805+
if (Rdn == 0) return 0;
806+
and64(reg(0), mem(Rs32), imm(63));
807+
shl64(mem(Rd), mem(Rt), reg(0));
792808
return 0;
793809
}
794810

@@ -800,21 +816,19 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
800816

801817
//DSRLV Rd,Rt,Rs
802818
case 0x16: {
803-
lea(reg(1), Rd);
804-
lea(reg(2), Rt);
805-
lea(reg(3), Rs);
806-
call(&CPU::DSRLV);
807-
emitZeroClear(Rdn);
819+
if (!checkDualAllowed(ctx)) return 1;
820+
if (Rdn == 0) return 0;
821+
and64(reg(0), mem(Rs32), imm(63));
822+
lshr64(mem(Rd), mem(Rt), reg(0));
808823
return 0;
809824
}
810825

811826
//DSRAV Rd,Rt,Rs
812827
case 0x17: {
813-
lea(reg(1), Rd);
814-
lea(reg(2), Rt);
815-
lea(reg(3), Rs);
816-
call(&CPU::DSRAV);
817-
emitZeroClear(Rdn);
828+
if (!checkDualAllowed(ctx)) return 1;
829+
if (Rdn == 0) return 0;
830+
and64(reg(0), mem(Rs32), imm(63));
831+
ashr64(mem(Rd), mem(Rt), reg(0));
818832
return 0;
819833
}
820834

@@ -974,41 +988,42 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
974988

975989
//DADD Rd,Rs,Rt
976990
case 0x2c: {
977-
lea(reg(1), Rd);
978-
lea(reg(2), Rs);
979-
lea(reg(3), Rt);
980-
call(&CPU::DADD);
981-
emitZeroClear(Rdn);
991+
if (!checkDualAllowed(ctx)) return 1;
992+
add64(reg(0), mem(Rs), mem(Rt), set_o);
993+
auto skip = emitOverflowCheck(reg(2));
994+
if(Rdn > 0) mov64(mem(Rd), reg(0));
995+
setLabel(skip);
982996
return 0;
983997
}
984998

985999
//DADDU Rd,Rs,Rt
9861000
case 0x2d: {
987-
lea(reg(1), Rd);
988-
lea(reg(2), Rs);
989-
lea(reg(3), Rt);
990-
call(&CPU::DADDU);
991-
emitZeroClear(Rdn);
1001+
if (!checkDualAllowed(ctx)) {
1002+
return 1;
1003+
}
1004+
1005+
if(Rdn == 0) return 0;
1006+
1007+
add64(reg(0), mem(Rs), mem(Rt));
1008+
mov64(mem(Rd), reg(0));
9921009
return 0;
9931010
}
9941011

9951012
//DSUB Rd,Rs,Rt
9961013
case 0x2e: {
997-
lea(reg(1), Rd);
998-
lea(reg(2), Rs);
999-
lea(reg(3), Rt);
1000-
call(&CPU::DSUB);
1001-
emitZeroClear(Rdn);
1014+
if (!checkDualAllowed(ctx)) return 1;
1015+
sub64(reg(0), mem(Rs), mem(Rt), set_o);
1016+
auto skip = emitOverflowCheck(reg(2));
1017+
if(Rdn > 0) mov64(mem(Rd), reg(0));
1018+
setLabel(skip);
10021019
return 0;
10031020
}
10041021

10051022
//DSUBU Rd,Rs,Rt
10061023
case 0x2f: {
1007-
lea(reg(1), Rd);
1008-
lea(reg(2), Rs);
1009-
lea(reg(3), Rt);
1010-
call(&CPU::DSUBU);
1011-
emitZeroClear(Rdn);
1024+
if (!checkDualAllowed(ctx)) return 1;
1025+
sub64(reg(0), mem(Rs), mem(Rt), set_o);
1026+
if(Rdn > 0) mov64(mem(Rd), reg(0));
10121027
return 0;
10131028
}
10141029

@@ -1074,11 +1089,9 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
10741089

10751090
//DSLL Rd,Rt,Sa
10761091
case 0x38: {
1077-
lea(reg(1), Rd);
1078-
lea(reg(2), Rt);
1079-
mov32(reg(3), imm(Sa));
1080-
call(&CPU::DSLL);
1081-
emitZeroClear(Rdn);
1092+
if (!checkDualAllowed(ctx)) return 1;
1093+
if (Rdn == 0) return 0;
1094+
shl64(mem(Rd), mem(Rt), imm(Sa));
10821095
return 0;
10831096
}
10841097

@@ -1100,21 +1113,17 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
11001113

11011114
//DSRA Rd,Rt,Sa
11021115
case 0x3b: {
1103-
lea(reg(1), Rd);
1104-
lea(reg(2), Rt);
1105-
mov32(reg(3), imm(Sa));
1106-
call(&CPU::DSRA);
1107-
emitZeroClear(Rdn);
1116+
if (!checkDualAllowed(ctx)) return 1;
1117+
if (Rdn == 0) return 0;
1118+
ashr64(mem(Rd), mem(Rt), imm(Sa));
11081119
return 0;
11091120
}
11101121

11111122
//DSLL32 Rd,Rt,Sa
11121123
case 0x3c: {
1113-
lea(reg(1), Rd);
1114-
lea(reg(2), Rt);
1115-
mov32(reg(3), imm(Sa+32));
1116-
call(&CPU::DSLL);
1117-
emitZeroClear(Rdn);
1124+
if (!checkDualAllowed(ctx)) return 1;
1125+
if (Rdn == 0) return 0;
1126+
shl64(mem(Rd), mem(Rt), imm(Sa+32));
11181127
return 0;
11191128
}
11201129

0 commit comments

Comments
 (0)