Skip to content

Commit 75d8052

Browse files
committed
[DXBC] discard pixels from kill with ROV instead of returning
Keep the current lane active as it may be needed for derivatives.
1 parent 88c645d commit 75d8052

File tree

2 files changed

+25
-45
lines changed

2 files changed

+25
-45
lines changed

src/xenia/gpu/dxbc_shader_translator.h

+2
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
917917
.SelectFromSwizzled(word_index & 1);
918918
}
919919

920+
void KillPixel(bool condition, const dxbc::Src& condition_src);
921+
920922
void ProcessVectorAluOperation(const ParsedAluInstruction& instr,
921923
uint32_t& result_swizzle,
922924
bool& predicate_written);

src/xenia/gpu/dxbc_shader_translator_alu.cc

+23-45
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@ namespace xe {
1919
namespace gpu {
2020
using namespace ucode;
2121

22+
void DxbcShaderTranslator::KillPixel(bool condition,
23+
const dxbc::Src& condition_src) {
24+
// Discard the pixel, but continue execution if other lanes in the quad need
25+
// this lane for derivatives. The driver may also perform early exiting
26+
// internally if all lanes are discarded if deemed beneficial.
27+
a_.OpDiscard(condition, condition_src);
28+
if (edram_rov_used_) {
29+
// Even though discarding disables all subsequent UAV/ROV writes, also skip
30+
// as much of the Render Backend emulation logic as possible by setting the
31+
// coverage and the mask of the written render targets to zero.
32+
a_.OpMov(dxbc::Dest::R(system_temp_rov_params_, 0b0001), dxbc::Src::LU(0));
33+
}
34+
}
35+
2236
void DxbcShaderTranslator::ProcessVectorAluOperation(
2337
const ParsedAluInstruction& instr, uint32_t& result_swizzle,
2438
bool& predicate_written) {
@@ -492,11 +506,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
492506
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
493507
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
494508
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
495-
if (edram_rov_used_) {
496-
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
497-
} else {
498-
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
499-
}
509+
KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
500510
if (used_result_components) {
501511
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
502512
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@@ -512,11 +522,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
512522
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
513523
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
514524
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
515-
if (edram_rov_used_) {
516-
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
517-
} else {
518-
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
519-
}
525+
KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
520526
if (used_result_components) {
521527
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
522528
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@@ -532,11 +538,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
532538
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
533539
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
534540
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
535-
if (edram_rov_used_) {
536-
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
537-
} else {
538-
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
539-
}
541+
KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
540542
if (used_result_components) {
541543
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
542544
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@@ -552,11 +554,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
552554
a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001),
553555
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
554556
dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY));
555-
if (edram_rov_used_) {
556-
a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
557-
} else {
558-
a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
559-
}
557+
KillPixel(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
560558
if (used_result_components) {
561559
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
562560
dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX),
@@ -952,47 +950,27 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
952950

953951
case AluScalarOpcode::kKillsEq:
954952
a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(0.0f));
955-
if (edram_rov_used_) {
956-
a_.OpRetC(true, ps_src);
957-
} else {
958-
a_.OpDiscard(true, ps_src);
959-
}
953+
KillPixel(true, ps_src);
960954
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
961955
break;
962956
case AluScalarOpcode::kKillsGt:
963957
a_.OpLT(ps_dest, dxbc::Src::LF(0.0f), operand_0_a);
964-
if (edram_rov_used_) {
965-
a_.OpRetC(true, ps_src);
966-
} else {
967-
a_.OpDiscard(true, ps_src);
968-
}
958+
KillPixel(true, ps_src);
969959
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
970960
break;
971961
case AluScalarOpcode::kKillsGe:
972962
a_.OpGE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f));
973-
if (edram_rov_used_) {
974-
a_.OpRetC(true, ps_src);
975-
} else {
976-
a_.OpDiscard(true, ps_src);
977-
}
963+
KillPixel(true, ps_src);
978964
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
979965
break;
980966
case AluScalarOpcode::kKillsNe:
981967
a_.OpNE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f));
982-
if (edram_rov_used_) {
983-
a_.OpRetC(true, ps_src);
984-
} else {
985-
a_.OpDiscard(true, ps_src);
986-
}
968+
KillPixel(true, ps_src);
987969
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
988970
break;
989971
case AluScalarOpcode::kKillsOne:
990972
a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(1.0f));
991-
if (edram_rov_used_) {
992-
a_.OpRetC(true, ps_src);
993-
} else {
994-
a_.OpDiscard(true, ps_src);
995-
}
973+
KillPixel(true, ps_src);
996974
a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f));
997975
break;
998976

0 commit comments

Comments
 (0)