Skip to content

[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #131994

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic
Choose a base branch
from

Conversation

ritter-x2a
Copy link
Member

This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds.

For SWDEV-516125.

Copy link
Member Author

ritter-x2a commented Mar 19, 2025

@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 4d8dda5 to efdf7eb Compare March 21, 2025 07:40
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from a57b595 to dab4dde Compare March 21, 2025 07:40
@llvmbot
Copy link
Member

llvmbot commented Mar 21, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

Changes

This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds.

For SWDEV-516125.


Patch is 410.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131994.diff

20 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (+42-42)
  • (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+34-34)
  • (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+28-28)
  • (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+28-28)
  • (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll (+29-29)
  • (modified) llvm/test/CodeGen/AMDGPU/flat_atomics.ll (+143-143)
  • (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+100-100)
  • (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll (+13-13)
  • (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll (+123-123)
  • (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll (+100-100)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll (+34-34)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll (+34-34)
  • (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+60-60)
  • (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+60-60)
  • (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+60-60)
  • (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+59-59)
  • (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+56-56)
  • (modified) llvm/test/CodeGen/AMDGPU/offset-split-flat.ll (+54-54)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+1-1)
diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
index e74fd21365c9d..90ef9a7a45863 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
@@ -25,7 +25,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
 ; GFX12-GISEL-NEXT:    flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
 ; GFX12-GISEL-NEXT:    s_endpgm
 entry:
-  %gep = getelementptr i32, ptr %addr, i32 -4
+  %gep = getelementptr inbounds i32, ptr %addr, i32 -4
   %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
   ret void
 }
@@ -49,7 +49,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32
 ; GFX12-GISEL-NEXT:    flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
 ; GFX12-GISEL-NEXT:    s_endpgm
 entry:
-  %gep = getelementptr i32, ptr %addr, i32 -4
+  %gep = getelementptr inbounds i32, ptr %addr, i32 -4
   %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
   ret void
 }
@@ -83,7 +83,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
 ; GFX12-GISEL-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX12-GISEL-NEXT:    s_endpgm
 entry:
-  %gep = getelementptr i32, ptr %addr, i32 4
+  %gep = getelementptr inbounds i32, ptr %addr, i32 4
   %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
   store i32 %val, ptr %use
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index 3305cac0d7ea6..9b57bc2f74df0 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -12,8 +12,8 @@
 define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX7:       if:
@@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ;
 ; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX8:       if:
@@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ;
 ; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
 ; OPT-GFX9:       endif:
@@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ;
 ; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX10:       if:
-; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
 ; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
 ; OPT-GFX10-NEXT:    br label [[ENDIF]]
 ; OPT-GFX10:       endif:
@@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
 
@@ -167,12 +167,12 @@ done:
 define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
 ; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX7:       if:
 ; OPT-GFX7-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX7-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX7-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX7-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
 ; OPT-GFX7-NEXT:    br label [[ENDIF]]
 ; OPT-GFX7:       endif:
@@ -182,8 +182,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX8:       if:
@@ -197,12 +197,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
 ; OPT-GFX9-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
 ; OPT-GFX9:       endif:
@@ -212,12 +212,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX10:       if:
 ; OPT-GFX10-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
 ; OPT-GFX10-NEXT:    br label [[ENDIF]]
 ; OPT-GFX10:       endif:
@@ -303,8 +303,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cast = addrspacecast ptr %in.gep to ptr addrspace(1)
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
@@ -325,12 +325,12 @@ done:
 define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
 ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
 ; OPT-NEXT:  entry:
-; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
 ; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT:       if:
 ; OPT-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
-; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
+; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 28
 ; OPT-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
 ; OPT-NEXT:    br label [[ENDIF]]
 ; OPT:       endif:
@@ -416,8 +416,8 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cast = addrspacecast ptr %in.gep to ptr addrspace(4)
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
@@ -438,8 +438,8 @@ done:
 define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX7-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ;
 ; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX8-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ;
 ; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
 ; OPT-GFX9-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
 ; OPT-GFX9-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
@@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ;
 ; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX10-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX10-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
 ; OPT-GFX10-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i32 1024
-  %in.gep = getelementptr i8, ptr %in, i64 4095
+  %out.gep = getelementptr inbounds i32, ptr %out, i32 1024
+  %in.gep = getelementptr inbounds i8, ptr %in, i64 4095
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
@@ -611,8 +611,8 @@ done:
 define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
 ; OPT-NEXT:  entry:
-; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999
-; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096
+; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 99999
+; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4096
 ; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
 ; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -711,8 +711,8 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 99999
-  %in.gep = getelementptr i8, ptr %in, i64 4096
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 99999
+  %in.gep = getelementptr inbounds i8, ptr %in, i64 4096
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
@@ -734,8 +734,8 @@ done:
 define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; OPT-LABEL: @test_sinkable_flat_reg_offset(
 ; OPT-NEXT:  entry:
-; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
+; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
 ; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]]
 ; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -834,8 +834,8 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i32 1024
-  %in.gep = getelementptr i8, ptr %in, i64 %reg
+  %out.gep = getelementptr inbounds i32, ptr %out, i32 1024
+  %in.gep = getelementptr inbounds i8, ptr %in, i64 %reg
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
index c713c48c92457..4a6b1843de3b6 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
@@ -369,7 +369,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret float %result
 }
@@ -563,7 +563,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 -512
+  %gep = getelementptr inbounds float, ptr %ptr, i64 -512
   %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret float %result
 }
@@ -986,7 +986,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret void
 }
@@ -1208,7 +1208,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 -512
+  %gep = getelementptr inbounds float, ptr %ptr, i64 -512
   %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret void
 }
@@ -1397,7 +1397,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
   ret float %result
 }
@@ -1617,7 +1617,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
 ; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, ptr %ptr, i64 511
+  %gep = getelementptr inbounds float, ptr %ptr, i64 511
   %unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ign...
[truncated]

@ritter-x2a ritter-x2a marked this pull request as ready for review March 21, 2025 07:47
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from efdf7eb to bd22087 Compare March 21, 2025 13:35
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from dab4dde to 173036a Compare March 21, 2025 13:35
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from bd22087 to 4248162 Compare March 27, 2025 09:02
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch 2 times, most recently from 5e4da87 to de34b4c Compare March 31, 2025 11:37
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 4248162 to ee21919 Compare March 31, 2025 11:37
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from ee21919 to 75e41ae Compare April 10, 2025 12:27
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from de34b4c to 39089ea Compare April 10, 2025 12:27
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from 39089ea to 0614081 Compare April 10, 2025 14:08
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 75e41ae to 4b88628 Compare April 10, 2025 14:08
This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds.

For SWDEV-516125.
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-19-_amdgpu_nfc_mark_geps_in_flat_offset_folding_tests_as_inbounds branch from 0614081 to ab60c5a Compare April 14, 2025 09:39
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/03-17-_sdag_introduce_inbounds_flag_for_pointer_arithmetic branch from 4b88628 to 139ed05 Compare April 14, 2025 09:39
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants