Skip to content

Commit 72cc8c3

Browse files
committed
Register dumping for debugging.
1 parent 29f5c39 commit 72cc8c3

File tree

6 files changed

+115
-4
lines changed

6 files changed

+115
-4
lines changed

pcsx2/GS/Renderers/SW/GSDrawScanline.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
// Lack of a better home
1818
constexpr GSScanlineConstantData256B g_const_256b;
1919
constexpr GSScanlineConstantData128B g_const_128b;
20+
alignas(16) u8 g_reg_dump_data[16 * 1024 * 1024];
21+
alignas(16) u64 g_reg_dump_counter = 0;
2022
#endif
2123

2224
MULTI_ISA_UNSHARED_IMPL;

pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3459,6 +3459,25 @@ void GSDrawScanlineCodeGenerator::ReadTexelImpl(const Xmm& dst, const Xmm& addr,
34593459
void GSDrawScanlineCodeGenerator::RoundUV(const XYm& u, const XYm& v, const XYm& tmp1,
34603460
const XYm& tmp2, const XYm& tmp3, const XYm& tmp4, const XYm& tmp5, const XYm& tmp6)
34613461
{
3462+
#define PUSH_REG(reg) \
3463+
do { \
3464+
mov(rdx, _rip_global(m_reg_dump_counter)); \
3465+
mov(rax, ptr[rdx]); \
3466+
Label end_label; \
3467+
cmp(rax, 16 * 1024 * 1024 - 16); \
3468+
jle("@f"); \
3469+
jmp(end_label); \
3470+
L("@@"); \
3471+
mov(rdx, _rip_global(m_reg_dump_data)); \
3472+
movaps(ptr[rax + rdx], reg); \
3473+
mov(rdx, _rip_global(m_reg_dump_counter)); \
3474+
add(rax, 16); \
3475+
mov(qword[rdx], rax); \
3476+
L(end_label); \
3477+
mov(rax, 0); \
3478+
mov(rdx, 0); \
3479+
} while (0)
3480+
34623481
for (int i = 0; i < 2; i++)
34633482
{
34643483
// i == 0: U rounding.
@@ -3471,17 +3490,25 @@ void GSDrawScanlineCodeGenerator::RoundUV(const XYm& u, const XYm& v, const XYm&
34713490
broadcastss(tmp1, _rip_local(temp.round.left));
34723491
paddd(tmp1, ptr[_m_const + offsetof(GSScanlineConstantDataT, m_offsets)]);
34733492

3493+
PUSH_REG(tmp1);
3494+
34743495
// const VectorI at_left = VectorI(local.temp.round.prim_left) == curr_x;
34753496

34763497
broadcastss(tmp2, _rip_local(temp.round.prim_left));
34773498
pcmpeqd(tmp1, tmp2);
3499+
3500+
PUSH_REG(tmp2);
3501+
3502+
PUSH_REG(tmp1);
34783503
}
34793504

34803505
// const VectorI round_setting_u = VectorI(local.temp.round.flags_u);
34813506
// const VectorI round_setting_v = VectorI(local.temp.round.flags_v);
34823507

34833508
broadcastss(tmp2, i == 0 ? _rip_local(temp.round.flags_u) : _rip_local(temp.round.flags_v));
34843509

3510+
PUSH_REG(tmp2);
3511+
34853512
// const VectorI round_down_const = VectorI::load<true>(g_const.m_round_down);
34863513
// const VectorI round_down_u = (round_setting_u == round_down_const) & ~at_left;
34873514
// const VectorI round_down_v = (round_setting_v == round_down_const);
@@ -3494,6 +3521,7 @@ void GSDrawScanlineCodeGenerator::RoundUV(const XYm& u, const XYm& v, const XYm&
34943521
movaps(tmp3, tmp1);
34953522
pandn(tmp3, tmp4);
34963523
}
3524+
PUSH_REG(tmp3);
34973525

34983526
// const VectorI round_up_const = VectorI::load<true>(g_const.m_round_up);
34993527
// const VectorI round_up_u = (round_setting_u == round_up_const) |
@@ -3509,16 +3537,22 @@ void GSDrawScanlineCodeGenerator::RoundUV(const XYm& u, const XYm& v, const XYm&
35093537
pand(tmp5, tmp1);
35103538
por(tmp4, tmp5);
35113539
}
3540+
PUSH_REG(tmp4);
35123541

35133542
// const VectorI quarter_texel = VectorI::load<true>(g_const.m_quarter_texel);
35143543
// const VectorI half_texel_mask = VectorI::load<true>(g_const.m_half_texel_mask);
35153544
// VectorI ui = (u + quarter_texel) & half_texel_mask;
35163545
// VectorI vi = (v + quarter_texel) & half_texel_mask;
35173546

35183547
const XYm& uv = i == 0 ? u : v;
3548+
3549+
PUSH_REG(uv);
3550+
35193551
movaps(tmp1, ptr[_m_const + offsetof(GSScanlineConstantDataT, m_quarter_texel)]);
35203552
paddd(tmp1, uv);
35213553
pand(tmp1, ptr[_m_const + offsetof(GSScanlineConstantDataT, m_half_texel_mask)]);
3554+
3555+
PUSH_REG(tmp1);
35223556

35233557
// const VectorI threshold = VectorI::load<true>(g_const.m_round_threshold);
35243558
// VectorI close_u = (u - ui).abs32() <= threshold;
@@ -3532,6 +3566,8 @@ void GSDrawScanlineCodeGenerator::RoundUV(const XYm& u, const XYm& v, const XYm&
35323566
pcmpeqd(tmp6, tmp6);
35333567
pxor(tmp2, tmp6);
35343568

3569+
PUSH_REG(tmp2);
3570+
35353571
// u = u.blend8(ui - threshold, close_u & round_down_u);
35363572
// u = u.blend8(ui + threshold, close_u & round_up_u);
35373573
// v = v.blend8(vi - threshold, close_v & round_down_v);
@@ -3545,10 +3581,18 @@ void GSDrawScanlineCodeGenerator::RoundUV(const XYm& u, const XYm& v, const XYm&
35453581
paddd(tmp6, tmp5);
35463582
pand(tmp2, tmp3);
35473583
pand(tmp6, tmp4);
3584+
3585+
PUSH_REG(tmp3);
3586+
PUSH_REG(tmp4);
3587+
PUSH_REG(tmp2);
3588+
PUSH_REG(tmp6);
3589+
35483590
por(tmp3, tmp4);
35493591
pandn(tmp3, uv);
35503592
por(tmp3, tmp2);
35513593
por(tmp3, tmp6);
35523594
movaps(uv, tmp3);
3595+
3596+
PUSH_REG(uv);
35533597
}
35543598
}

pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.arm64.cpp

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2513,6 +2513,26 @@ void GSDrawScanlineCodeGenerator::RoundUV(
25132513
const VRegister& tmp3, const VRegister& tmp4,
25142514
const VRegister& tmp5, const VRegister& tmp6)
25152515
{
2516+
#define PUSH_REG(reg) \
2517+
do { \
2518+
armAsm->Ldr(_xscratch2, _global(m_reg_dump_counter)); \
2519+
armAsm->Ldr(_xscratch, MemOperand(_xscratch2)); \
2520+
Label end_label; \
2521+
armAsm->Mov(_xscratch2, 16 * 1024 * 1024); \
2522+
armAsm->Sub(_xscratch2, _xscratch2, 16); \
2523+
armAsm->Cmp(_xscratch, _xscratch2); \
2524+
armAsm->B(gt, &end_label); \
2525+
armAsm->Ldr(_xscratch2, _global(m_reg_dump_data)); \
2526+
armAsm->Add(_xscratch2, _xscratch2, _xscratch); \
2527+
armAsm->Str(reg, MemOperand(_xscratch2)); \
2528+
armAsm->Ldr(_xscratch2, _global(m_reg_dump_counter)); \
2529+
armAsm->Add(_xscratch, _xscratch, 16); \
2530+
armAsm->Str(_xscratch, MemOperand(_xscratch2)); \
2531+
armAsm->Bind(&end_label); \
2532+
armAsm->Mov(_xscratch, 0); \
2533+
armAsm->Mov(_xscratch2, 0); \
2534+
} while (0)
2535+
25162536
for (int i = 0; i < 2; i++)
25172537
{
25182538
// i == 0: U rounding.
@@ -2526,17 +2546,24 @@ void GSDrawScanlineCodeGenerator::RoundUV(
25262546
armAsm->Ldr(tmp2.V4S(), _global(const_offsets));
25272547
armAsm->Add(tmp1.V4S(), tmp1.V4S(), tmp2.V4S());
25282548

2549+
PUSH_REG(tmp1);
2550+
25292551
// const VectorI at_left = VectorI(local.temp.round.prim_left) == curr_x;
25302552

25312553
armAsm->Ld1r(tmp2.V4S(), _local(temp.round.prim_left));
25322554
armAsm->Cmeq(tmp1.V4S(), tmp1.V4S(), tmp2.V4S());
2555+
2556+
PUSH_REG(tmp2);
2557+
PUSH_REG(tmp1);
25332558
}
25342559

25352560
// const VectorI round_setting_u = VectorI(local.temp.round.flags_u);
25362561
// const VectorI round_setting_v = VectorI(local.temp.round.flags_v);
25372562

25382563
armAsm->Ld1r(tmp2.V4S(), i == 0 ? _local(temp.round.flags_u) : _local(temp.round.flags_v));
25392564

2565+
PUSH_REG(tmp2);
2566+
25402567
// const VectorI round_down_const = VectorI::load<true>(g_const.m_round_down);
25412568
// const VectorI round_down_u = (round_setting_u == round_down_const) & ~at_left;
25422569
// const VectorI round_down_v = (round_setting_v == round_down_const);
@@ -2546,6 +2573,8 @@ void GSDrawScanlineCodeGenerator::RoundUV(
25462573
if (i == 0)
25472574
armAsm->Bic(tmp3.V4S(), tmp3.V4S(), tmp1.V4S());
25482575

2576+
PUSH_REG(tmp3);
2577+
25492578
// const VectorI round_up_const = VectorI::load<true>(g_const.m_round_up);
25502579
// const VectorI round_up_u = (round_setting_u == round_up_const) |
25512580
// ((round_setting_u == round_down_const) & at_left);
@@ -2561,17 +2590,24 @@ void GSDrawScanlineCodeGenerator::RoundUV(
25612590
armAsm->Orr(tmp4.V4S(), tmp4.V4S(), tmp5.V4S());
25622591
}
25632592

2593+
PUSH_REG(tmp4);
2594+
25642595
// const VectorI quarter_texel = VectorI::load<true>(g_const.m_quarter_texel);
25652596
// const VectorI half_texel_mask = VectorI::load<true>(g_const.m_half_texel_mask);
25662597
// VectorI ui = (u + quarter_texel) & half_texel_mask;
25672598
// VectorI vi = (v + quarter_texel) & half_texel_mask;
25682599

25692600
const VRegister& uv = i == 0 ? u : v;
2601+
2602+
PUSH_REG(uv);
2603+
25702604
armAsm->Ld1r(tmp1.V4S(), _global(const_quarter_texel));
25712605
armAsm->Ld1r(tmp5.V4S(), _global(const_half_texel_mask));
25722606
armAsm->Add(tmp1.V4S(), tmp1.V4S(), uv.V4S());
25732607
armAsm->And(tmp1.V4S(), tmp1.V4S(), tmp5.V4S());
25742608

2609+
PUSH_REG(tmp1);
2610+
25752611
// const VectorI threshold = VectorI::load<true>(g_const.m_round_threshold);
25762612
// VectorI close_u = (u - ui).abs32() <= threshold;
25772613
// VectorI close_v = (v - vi).abs32() <= threshold;
@@ -2582,6 +2618,8 @@ void GSDrawScanlineCodeGenerator::RoundUV(
25822618
armAsm->Cmgt(tmp2.V4S(), tmp2.V4S(), tmp5.V4S());
25832619
armAsm->Mvn(tmp2.V4S(), tmp2.V4S());
25842620

2621+
PUSH_REG(tmp2);
2622+
25852623
// u = u.blend8(ui - threshold, close_u & round_down_u);
25862624
// u = u.blend8(ui + threshold, close_u & round_up_u);
25872625
// v = v.blend8(vi - threshold, close_v & round_down_v);
@@ -2593,10 +2631,18 @@ void GSDrawScanlineCodeGenerator::RoundUV(
25932631
armAsm->Add(tmp6.V4S(), tmp1.V4S(), tmp5.V4S());
25942632
armAsm->And(tmp2.V4S(), tmp2.V4S(), tmp3.V4S());
25952633
armAsm->And(tmp6.V4S(), tmp6.V4S(), tmp4.V4S());
2596-
// armAsm->Bic(uv.V4S(), uv.V4S(), tmp3.V4S()); // FIXME: Uncomment after testing!
2597-
// armAsm->Bic(uv.V4S(), uv.V4S(), tmp4.V4S()); // FIXME: Uncomment after testing!
2598-
// armAsm->Orr(uv.V4S(), uv.V4S(), tmp2.V4S()); // FIXME: Uncomment after testing!
2599-
// armAsm->Orr(uv.V4S(), uv.V4S(), tmp6.V4S()); // FIXME: Uncomment after testing!
2634+
2635+
PUSH_REG(tmp3);
2636+
PUSH_REG(tmp4);
2637+
PUSH_REG(tmp2);
2638+
PUSH_REG(tmp6);
2639+
2640+
armAsm->Bic(uv.V4S(), uv.V4S(), tmp3.V4S());
2641+
armAsm->Bic(uv.V4S(), uv.V4S(), tmp4.V4S());
2642+
armAsm->Orr(uv.V4S(), uv.V4S(), tmp2.V4S());
2643+
armAsm->Orr(uv.V4S(), uv.V4S(), tmp6.V4S());
2644+
2645+
PUSH_REG(uv);
26002646
}
26012647
}
26022648

pcsx2/GS/Renderers/SW/GSRasterizer.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,16 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const u16* index, const GSVer
13851385

13861386
int count = m_edge.count;
13871387

1388+
static bool dumped = false;
1389+
if (g_reg_dump_counter >= 16 * 1024 * 1024 && !dumped)
1390+
{
1391+
Console.Warning("DUMPING REG DATA TO 'reg_dump'");
1392+
FILE* f = fopen("reg_dump", "wb");
1393+
fwrite(g_reg_dump_data, 16, 1024 * 1024, f);
1394+
fclose(f);
1395+
dumped = true;
1396+
}
1397+
13881398
if (count > 0)
13891399
{
13901400
m_setup_prim(vertex, index, dscan, m_local);

pcsx2/GS/Renderers/SW/GSRendererSW.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,9 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data, bool round_uv)
10441044
{
10451045
GSScanlineGlobalData& gd = data->global;
10461046

1047+
gd.m_reg_dump_counter = &g_reg_dump_counter;
1048+
gd.m_reg_dump_data = g_reg_dump_data;
1049+
10471050
const GSDrawingEnvironment& env = *m_draw_env;
10481051
const GSDrawingContext* context = m_context;
10491052
const GS_PRIM_CLASS primclass = m_vt.m_primclass;

pcsx2/GS/Renderers/SW/GSScanlineEnvironment.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p
159159

160160
#endif
161161

162+
u8* m_reg_dump_data;
163+
u64* m_reg_dump_counter;
164+
162165
#ifdef ARCH_ARM64
163166
// Mini version of constant data for ARM64, we don't need all of it
164167
alignas(16) u32 const_test_128b[8][4] = {
@@ -371,3 +374,6 @@ struct alignas(64) GSScanlineConstantData128B
371374

372375
extern const GSScanlineConstantData256B g_const_256b;
373376
extern const GSScanlineConstantData128B g_const_128b;
377+
378+
extern u64 g_reg_dump_counter;
379+
extern u8 g_reg_dump_data[];

0 commit comments

Comments
 (0)