@@ -19,6 +19,20 @@ namespace xe {
19
19
namespace gpu {
20
20
using namespace ucode ;
21
21
22
+ void DxbcShaderTranslator::KillPixel (bool condition,
23
+ const dxbc::Src& condition_src) {
24
+ // Discard the pixel, but continue execution if other lanes in the quad need
25
+ // this lane for derivatives. The driver may also perform early exiting
26
+ // internally if all lanes are discarded if deemed beneficial.
27
+ a_.OpDiscard (condition, condition_src);
28
+ if (edram_rov_used_) {
29
+ // Even though discarding disables all subsequent UAV/ROV writes, also skip
30
+ // as much of the Render Backend emulation logic as possible by setting the
31
+ // coverage and the mask of the written render targets to zero.
32
+ a_.OpMov (dxbc::Dest::R (system_temp_rov_params_, 0b0001 ), dxbc::Src::LU (0 ));
33
+ }
34
+ }
35
+
22
36
void DxbcShaderTranslator::ProcessVectorAluOperation (
23
37
const ParsedAluInstruction& instr, uint32_t & result_swizzle,
24
38
bool & predicate_written) {
@@ -492,11 +506,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
492
506
a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
493
507
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
494
508
dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
495
- if (edram_rov_used_) {
496
- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
497
- } else {
498
- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
499
- }
509
+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
500
510
if (used_result_components) {
501
511
a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
502
512
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -512,11 +522,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
512
522
a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
513
523
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
514
524
dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
515
- if (edram_rov_used_) {
516
- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
517
- } else {
518
- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
519
- }
525
+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
520
526
if (used_result_components) {
521
527
a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
522
528
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -532,11 +538,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
532
538
a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
533
539
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
534
540
dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
535
- if (edram_rov_used_) {
536
- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
537
- } else {
538
- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
539
- }
541
+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
540
542
if (used_result_components) {
541
543
a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
542
544
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -552,11 +554,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
552
554
a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
553
555
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
554
556
dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
555
- if (edram_rov_used_) {
556
- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
557
- } else {
558
- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
559
- }
557
+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
560
558
if (used_result_components) {
561
559
a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
562
560
dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -952,47 +950,27 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
952
950
953
951
case AluScalarOpcode::kKillsEq :
954
952
a_.OpEq (ps_dest, operand_0_a, dxbc::Src::LF (0 .0f ));
955
- if (edram_rov_used_) {
956
- a_.OpRetC (true , ps_src);
957
- } else {
958
- a_.OpDiscard (true , ps_src);
959
- }
953
+ KillPixel (true , ps_src);
960
954
a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
961
955
break ;
962
956
case AluScalarOpcode::kKillsGt :
963
957
a_.OpLT (ps_dest, dxbc::Src::LF (0 .0f ), operand_0_a);
964
- if (edram_rov_used_) {
965
- a_.OpRetC (true , ps_src);
966
- } else {
967
- a_.OpDiscard (true , ps_src);
968
- }
958
+ KillPixel (true , ps_src);
969
959
a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
970
960
break ;
971
961
case AluScalarOpcode::kKillsGe :
972
962
a_.OpGE (ps_dest, operand_0_a, dxbc::Src::LF (0 .0f ));
973
- if (edram_rov_used_) {
974
- a_.OpRetC (true , ps_src);
975
- } else {
976
- a_.OpDiscard (true , ps_src);
977
- }
963
+ KillPixel (true , ps_src);
978
964
a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
979
965
break ;
980
966
case AluScalarOpcode::kKillsNe :
981
967
a_.OpNE (ps_dest, operand_0_a, dxbc::Src::LF (0 .0f ));
982
- if (edram_rov_used_) {
983
- a_.OpRetC (true , ps_src);
984
- } else {
985
- a_.OpDiscard (true , ps_src);
986
- }
968
+ KillPixel (true , ps_src);
987
969
a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
988
970
break ;
989
971
case AluScalarOpcode::kKillsOne :
990
972
a_.OpEq (ps_dest, operand_0_a, dxbc::Src::LF (1 .0f ));
991
- if (edram_rov_used_) {
992
- a_.OpRetC (true , ps_src);
993
- } else {
994
- a_.OpDiscard (true , ps_src);
995
- }
973
+ KillPixel (true , ps_src);
996
974
a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
997
975
break ;
998
976
0 commit comments