Skip to content

Commit b4e17d4

Browse files
authored
[MachineLICM] Use RegisterClassInfo::getRegPressureSetLimit (#119826)
`RegisterClassInfo::getRegPressureSetLimit` is a wrapper of `TargetRegisterInfo::getRegPressureSetLimit` with some logics to adjust the limit by removing reserved registers. It seems that we shouldn't use `TargetRegisterInfo::getRegPressureSetLimit` directly, just like the comment "This limit must be adjusted dynamically for reserved registers" said. Separate from llvm/llvm-project#118787
1 parent e3e26dc commit b4e17d4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+32480
-32047
lines changed

Diff for: llvm/lib/CodeGen/MachineLICM.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ namespace {
124124
const TargetRegisterInfo *TRI = nullptr;
125125
const MachineFrameInfo *MFI = nullptr;
126126
MachineRegisterInfo *MRI = nullptr;
127+
RegisterClassInfo RegClassInfo;
127128
TargetSchedModel SchedModel;
128129
bool PreRegAlloc = false;
129130
bool HasProfileData = false;
@@ -392,6 +393,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
392393
MFI = &MF.getFrameInfo();
393394
MRI = &MF.getRegInfo();
394395
SchedModel.init(&ST);
396+
RegClassInfo.runOnMachineFunction(MF);
395397

396398
HasProfileData = MF.getFunction().hasProfileData();
397399

@@ -408,7 +410,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
408410
std::fill(RegPressure.begin(), RegPressure.end(), 0);
409411
RegLimit.resize(NumRPS);
410412
for (unsigned i = 0, e = NumRPS; i != e; ++i)
411-
RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
413+
RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i);
412414
}
413415

414416
if (HoistConstLoads)

Diff for: llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

+345-325
Large diffs are not rendered by default.

Diff for: llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll

+345-325
Large diffs are not rendered by default.

Diff for: llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

+20-24
Original file line numberDiff line numberDiff line change
@@ -557,11 +557,11 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
557557
; GFX908-NEXT: s_mul_hi_u32 s9, s0, s7
558558
; GFX908-NEXT: s_mul_i32 s0, s0, s7
559559
; GFX908-NEXT: s_add_i32 s1, s9, s1
560-
; GFX908-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
560+
; GFX908-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
561561
; GFX908-NEXT: s_branch .LBB3_2
562562
; GFX908-NEXT: .LBB3_1: ; %Flow20
563563
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
564-
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[0:1]
564+
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[14:15]
565565
; GFX908-NEXT: s_cbranch_vccz .LBB3_12
566566
; GFX908-NEXT: .LBB3_2: ; %bb9
567567
; GFX908-NEXT: ; =>This Loop Header: Depth=1
@@ -571,17 +571,15 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
571571
; GFX908-NEXT: ; %bb.3: ; %bb14
572572
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
573573
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
574-
; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
575574
; GFX908-NEXT: s_mov_b32 s7, s6
576-
; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
577575
; GFX908-NEXT: v_mov_b32_e32 v4, s6
578-
; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v6
579576
; GFX908-NEXT: v_mov_b32_e32 v6, s6
580577
; GFX908-NEXT: v_mov_b32_e32 v9, s7
581578
; GFX908-NEXT: v_mov_b32_e32 v5, s7
582579
; GFX908-NEXT: v_mov_b32_e32 v7, s7
583580
; GFX908-NEXT: v_mov_b32_e32 v8, s6
584-
; GFX908-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
581+
; GFX908-NEXT: v_cmp_lt_i64_e64 s[14:15], s[4:5], 0
582+
; GFX908-NEXT: v_cmp_gt_i64_e64 s[16:17], s[4:5], -1
585583
; GFX908-NEXT: v_mov_b32_e32 v11, v5
586584
; GFX908-NEXT: s_mov_b64 s[18:19], s[10:11]
587585
; GFX908-NEXT: v_mov_b32_e32 v10, v4
@@ -601,9 +599,9 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
601599
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
602600
; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
603601
; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
604-
; GFX908-NEXT: s_add_u32 s18, s18, s14
602+
; GFX908-NEXT: s_add_u32 s18, s18, s0
605603
; GFX908-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[2:3]
606-
; GFX908-NEXT: s_addc_u32 s19, s19, s15
604+
; GFX908-NEXT: s_addc_u32 s19, s19, s1
607605
; GFX908-NEXT: s_mov_b64 s[20:21], 0
608606
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
609607
; GFX908-NEXT: s_cbranch_vccz .LBB3_9
@@ -622,7 +620,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
622620
; GFX908-NEXT: s_waitcnt vmcnt(0)
623621
; GFX908-NEXT: ds_read_b64 v[12:13], v19
624622
; GFX908-NEXT: ds_read_b64 v[14:15], v0
625-
; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1]
623+
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[16:17]
626624
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
627625
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
628626
; GFX908-NEXT: ; %bb.6: ; %bb51
@@ -650,7 +648,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
650648
; GFX908-NEXT: s_mov_b64 s[20:21], -1
651649
; GFX908-NEXT: s_branch .LBB3_4
652650
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
653-
; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
651+
; GFX908-NEXT: s_mov_b64 s[20:21], s[14:15]
654652
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
655653
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
656654
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
@@ -661,7 +659,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
661659
; GFX908-NEXT: s_xor_b64 s[16:17], s[20:21], -1
662660
; GFX908-NEXT: .LBB3_10: ; %Flow19
663661
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
664-
; GFX908-NEXT: s_mov_b64 s[0:1], -1
662+
; GFX908-NEXT: s_mov_b64 s[14:15], -1
665663
; GFX908-NEXT: s_and_b64 vcc, exec, s[16:17]
666664
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
667665
; GFX908-NEXT: ; %bb.11: ; %bb12
@@ -670,7 +668,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
670668
; GFX908-NEXT: s_addc_u32 s5, s5, 0
671669
; GFX908-NEXT: s_add_u32 s10, s10, s12
672670
; GFX908-NEXT: s_addc_u32 s11, s11, s13
673-
; GFX908-NEXT: s_mov_b64 s[0:1], 0
671+
; GFX908-NEXT: s_mov_b64 s[14:15], 0
674672
; GFX908-NEXT: s_branch .LBB3_1
675673
; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock
676674
; GFX908-NEXT: s_endpgm
@@ -720,11 +718,11 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
720718
; GFX90A-NEXT: s_mul_hi_u32 s9, s0, s7
721719
; GFX90A-NEXT: s_mul_i32 s0, s0, s7
722720
; GFX90A-NEXT: s_add_i32 s1, s9, s1
723-
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
721+
; GFX90A-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
724722
; GFX90A-NEXT: s_branch .LBB3_2
725723
; GFX90A-NEXT: .LBB3_1: ; %Flow20
726724
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
727-
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[0:1]
725+
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[14:15]
728726
; GFX90A-NEXT: s_cbranch_vccz .LBB3_12
729727
; GFX90A-NEXT: .LBB3_2: ; %bb9
730728
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
@@ -734,14 +732,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
734732
; GFX90A-NEXT: ; %bb.3: ; %bb14
735733
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
736734
; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
737-
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
738735
; GFX90A-NEXT: s_mov_b32 s7, s6
739-
; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1]
740736
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
741-
; GFX90A-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v8
742737
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[6:7], s[6:7] op_sel:[0,1]
743738
; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
744-
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
739+
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[14:15], s[4:5], 0
740+
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[16:17], s[4:5], -1
745741
; GFX90A-NEXT: s_mov_b64 s[18:19], s[10:11]
746742
; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1]
747743
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -760,8 +756,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
760756
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
761757
; GFX90A-NEXT: v_add_co_u32_sdwa v4, vcc, v4, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
762758
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
763-
; GFX90A-NEXT: s_add_u32 s18, s18, s14
764-
; GFX90A-NEXT: s_addc_u32 s19, s19, s15
759+
; GFX90A-NEXT: s_add_u32 s18, s18, s0
760+
; GFX90A-NEXT: s_addc_u32 s19, s19, s1
765761
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[4:5]
766762
; GFX90A-NEXT: s_mov_b64 s[20:21], 0
767763
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
@@ -781,7 +777,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
781777
; GFX90A-NEXT: s_waitcnt vmcnt(0)
782778
; GFX90A-NEXT: ds_read_b64 v[14:15], v19
783779
; GFX90A-NEXT: ds_read_b64 v[16:17], v0
784-
; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1]
780+
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[16:17]
785781
; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21
786782
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
787783
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
@@ -802,7 +798,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
802798
; GFX90A-NEXT: s_mov_b64 s[20:21], -1
803799
; GFX90A-NEXT: s_branch .LBB3_4
804800
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
805-
; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
801+
; GFX90A-NEXT: s_mov_b64 s[20:21], s[14:15]
806802
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
807803
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
808804
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
@@ -813,7 +809,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
813809
; GFX90A-NEXT: s_xor_b64 s[16:17], s[20:21], -1
814810
; GFX90A-NEXT: .LBB3_10: ; %Flow19
815811
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
816-
; GFX90A-NEXT: s_mov_b64 s[0:1], -1
812+
; GFX90A-NEXT: s_mov_b64 s[14:15], -1
817813
; GFX90A-NEXT: s_and_b64 vcc, exec, s[16:17]
818814
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
819815
; GFX90A-NEXT: ; %bb.11: ; %bb12
@@ -822,7 +818,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
822818
; GFX90A-NEXT: s_addc_u32 s5, s5, 0
823819
; GFX90A-NEXT: s_add_u32 s10, s10, s12
824820
; GFX90A-NEXT: s_addc_u32 s11, s11, s13
825-
; GFX90A-NEXT: s_mov_b64 s[0:1], 0
821+
; GFX90A-NEXT: s_mov_b64 s[14:15], 0
826822
; GFX90A-NEXT: s_branch .LBB3_1
827823
; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock
828824
; GFX90A-NEXT: s_endpgm

0 commit comments

Comments
 (0)