Skip to content

Commit 382c633

Browse files
committed
n64: inline simple dual mode operations
* Pass JITContext down to leaf emit functions. * Emit inline implementations of basic 64-bit operations. * Use block compile-time information to elide kernel mode checks of the now inlined operations.
1 parent 616b3b6 commit 382c633

File tree

2 files changed

+77
-66
lines changed

2 files changed

+77
-66
lines changed

Diff for: ares/n64/cpu/cpu.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -917,9 +917,11 @@ struct CPU : Thread {
917917
auto block(u64 vaddr, u32 address, JITContext ctx) -> Block*;
918918

919919
auto emit(u64 vaddr, u32 address, JITContext ctx) -> Block*;
920+
auto emitOverflowCheck(reg temp) -> sljit_jump*;
920921
auto emitZeroClear(u32 n) -> void;
921-
auto emitEXECUTE(u32 instruction) -> bool;
922-
auto emitSPECIAL(u32 instruction) -> bool;
922+
auto checkDualAllowed(const JITContext& ctx) -> bool;
923+
auto emitEXECUTE(u32 instruction, JITContext ctx) -> bool;
924+
auto emitSPECIAL(u32 instruction, JITContext ctx) -> bool;
923925
auto emitREGIMM(u32 instruction) -> bool;
924926
auto emitSCC(u32 instruction) -> bool;
925927
auto emitFPU(u32 instruction) -> bool;

Diff for: ares/n64/cpu/recompiler.cpp

+73-64
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ auto CPU::Recompiler::emit(u64 vaddr, u32 address, JITContext ctx) -> Block* {
7777
mov32(reg(2), imm(instruction));
7878
call(&CPU::instructionPrologue);
7979
}
80-
bool branched = emitEXECUTE(instruction);
80+
bool branched = emitEXECUTE(instruction, ctx);
8181
if(unlikely(instruction == branchToSelf || instruction == jumpToSelf)) {
8282
//accelerate idle loops
8383
mov32(reg(1), imm(64 * 2));
@@ -134,12 +134,31 @@ auto CPU::Recompiler::emitZeroClear(u32 n) -> void {
134134
if(n == 0) mov64(mem(IpuReg(r[0])), imm(0));
135135
}
136136

137-
auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
137+
auto CPU::Recompiler::emitOverflowCheck(reg temp) -> sljit_jump* {
138+
// If overflow flag set: throw an exception, skip the instruction via the 'end' label.
139+
mov32_f(temp, flag_o);
140+
auto didntOverflow = cmp32_jump(temp, imm(0), flag_eq);
141+
call(&CPU::Exception::arithmeticOverflow, &cpu.exception);
142+
auto end = jump();
143+
setLabel(didntOverflow);
144+
return end;
145+
}
146+
147+
auto CPU::Recompiler::checkDualAllowed(const JITContext& ctx) -> bool {
148+
if (ctx.mode != Context::Mode::Kernel && !ctx.is64bit) {
149+
call(&CPU::Exception::reservedInstruction, &self.exception);
150+
return false;
151+
}
152+
153+
return true;
154+
}
155+
156+
auto CPU::Recompiler::emitEXECUTE(u32 instruction, JITContext ctx) -> bool {
138157
switch(instruction >> 26) {
139158

140159
//SPECIAL
141160
case 0x00: {
142-
return emitSPECIAL(instruction);
161+
return emitSPECIAL(instruction, ctx);
143162
}
144163

145164
//REGIMM
@@ -315,21 +334,19 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
315334

316335
//DADDI Rt,Rs,i16
317336
case 0x18: {
318-
lea(reg(1), Rt);
319-
lea(reg(2), Rs);
320-
mov32(reg(3), imm(i16));
321-
call(&CPU::DADDI);
322-
emitZeroClear(Rtn);
337+
if (!checkDualAllowed(ctx)) return 1;
338+
add64(reg(0), mem(Rs), imm(i16), set_o);
339+
auto skip = emitOverflowCheck(reg(2));
340+
if(Rtn > 0) mov64(mem(Rt), reg(0));
341+
setLabel(skip);
323342
return 0;
324343
}
325344

326345
//DADDIU Rt,Rs,i16
327346
case 0x19: {
328-
lea(reg(1), Rt);
329-
lea(reg(2), Rs);
330-
mov32(reg(3), imm(i16));
331-
call(&CPU::DADDIU);
332-
emitZeroClear(Rtn);
347+
if (!checkDualAllowed(ctx)) return 1;
348+
add64(reg(0), mem(Rs), imm(i16), set_o);
349+
if(Rtn > 0) mov64(mem(Rt), reg(0));
333350
return 0;
334351
}
335352

@@ -647,7 +664,7 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
647664
return 0;
648665
}
649666

650-
auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
667+
auto CPU::Recompiler::emitSPECIAL(u32 instruction, JITContext ctx) -> bool {
651668
switch(instruction & 0x3f) {
652669

653670
//SLL Rd,Rt,Sa
@@ -791,11 +808,10 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
791808

792809
//DSLLV Rd,Rt,Rs
793810
case 0x14: {
794-
lea(reg(1), Rd);
795-
lea(reg(2), Rt);
796-
lea(reg(3), Rs);
797-
call(&CPU::DSLLV);
798-
emitZeroClear(Rdn);
811+
if (!checkDualAllowed(ctx)) return 1;
812+
if (Rdn == 0) return 0;
813+
and64(reg(0), mem(Rs32), imm(63));
814+
shl64(mem(Rd), mem(Rt), reg(0));
799815
return 0;
800816
}
801817

@@ -807,21 +823,19 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
807823

808824
//DSRLV Rd,Rt,Rs
809825
case 0x16: {
810-
lea(reg(1), Rd);
811-
lea(reg(2), Rt);
812-
lea(reg(3), Rs);
813-
call(&CPU::DSRLV);
814-
emitZeroClear(Rdn);
826+
if (!checkDualAllowed(ctx)) return 1;
827+
if (Rdn == 0) return 0;
828+
and64(reg(0), mem(Rs32), imm(63));
829+
lshr64(mem(Rd), mem(Rt), reg(0));
815830
return 0;
816831
}
817832

818833
//DSRAV Rd,Rt,Rs
819834
case 0x17: {
820-
lea(reg(1), Rd);
821-
lea(reg(2), Rt);
822-
lea(reg(3), Rs);
823-
call(&CPU::DSRAV);
824-
emitZeroClear(Rdn);
835+
if (!checkDualAllowed(ctx)) return 1;
836+
if (Rdn == 0) return 0;
837+
and64(reg(0), mem(Rs32), imm(63));
838+
ashr64(mem(Rd), mem(Rt), reg(0));
825839
return 0;
826840
}
827841

@@ -981,41 +995,42 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
981995

982996
//DADD Rd,Rs,Rt
983997
case 0x2c: {
984-
lea(reg(1), Rd);
985-
lea(reg(2), Rs);
986-
lea(reg(3), Rt);
987-
call(&CPU::DADD);
988-
emitZeroClear(Rdn);
998+
if (!checkDualAllowed(ctx)) return 1;
999+
add64(reg(0), mem(Rs), mem(Rt), set_o);
1000+
auto skip = emitOverflowCheck(reg(2));
1001+
if(Rdn > 0) mov64(mem(Rd), reg(0));
1002+
setLabel(skip);
9891003
return 0;
9901004
}
9911005

9921006
//DADDU Rd,Rs,Rt
9931007
case 0x2d: {
994-
lea(reg(1), Rd);
995-
lea(reg(2), Rs);
996-
lea(reg(3), Rt);
997-
call(&CPU::DADDU);
998-
emitZeroClear(Rdn);
1008+
if (!checkDualAllowed(ctx)) {
1009+
return 1;
1010+
}
1011+
1012+
if(Rdn == 0) return 0;
1013+
1014+
add64(reg(0), mem(Rs), mem(Rt));
1015+
mov64(mem(Rd), reg(0));
9991016
return 0;
10001017
}
10011018

10021019
//DSUB Rd,Rs,Rt
10031020
case 0x2e: {
1004-
lea(reg(1), Rd);
1005-
lea(reg(2), Rs);
1006-
lea(reg(3), Rt);
1007-
call(&CPU::DSUB);
1008-
emitZeroClear(Rdn);
1021+
if (!checkDualAllowed(ctx)) return 1;
1022+
sub64(reg(0), mem(Rs), mem(Rt), set_o);
1023+
auto skip = emitOverflowCheck(reg(2));
1024+
if(Rdn > 0) mov64(mem(Rd), reg(0));
1025+
setLabel(skip);
10091026
return 0;
10101027
}
10111028

10121029
//DSUBU Rd,Rs,Rt
10131030
case 0x2f: {
1014-
lea(reg(1), Rd);
1015-
lea(reg(2), Rs);
1016-
lea(reg(3), Rt);
1017-
call(&CPU::DSUBU);
1018-
emitZeroClear(Rdn);
1031+
if (!checkDualAllowed(ctx)) return 1;
1032+
sub64(reg(0), mem(Rs), mem(Rt), set_o);
1033+
if(Rdn > 0) mov64(mem(Rd), reg(0));
10191034
return 0;
10201035
}
10211036

@@ -1081,11 +1096,9 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
10811096

10821097
//DSLL Rd,Rt,Sa
10831098
case 0x38: {
1084-
lea(reg(1), Rd);
1085-
lea(reg(2), Rt);
1086-
mov32(reg(3), imm(Sa));
1087-
call(&CPU::DSLL);
1088-
emitZeroClear(Rdn);
1099+
if (!checkDualAllowed(ctx)) return 1;
1100+
if (Rdn == 0) return 0;
1101+
shl64(mem(Rd), mem(Rt), imm(Sa));
10891102
return 0;
10901103
}
10911104

@@ -1107,21 +1120,17 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
11071120

11081121
//DSRA Rd,Rt,Sa
11091122
case 0x3b: {
1110-
lea(reg(1), Rd);
1111-
lea(reg(2), Rt);
1112-
mov32(reg(3), imm(Sa));
1113-
call(&CPU::DSRA);
1114-
emitZeroClear(Rdn);
1123+
if (!checkDualAllowed(ctx)) return 1;
1124+
if (Rdn == 0) return 0;
1125+
ashr64(mem(Rd), mem(Rt), imm(Sa));
11151126
return 0;
11161127
}
11171128

11181129
//DSLL32 Rd,Rt,Sa
11191130
case 0x3c: {
1120-
lea(reg(1), Rd);
1121-
lea(reg(2), Rt);
1122-
mov32(reg(3), imm(Sa+32));
1123-
call(&CPU::DSLL);
1124-
emitZeroClear(Rdn);
1131+
if (!checkDualAllowed(ctx)) return 1;
1132+
if (Rdn == 0) return 0;
1133+
shl64(mem(Rd), mem(Rt), imm(Sa+32));
11251134
return 0;
11261135
}
11271136

0 commit comments

Comments
 (0)