diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 4342e7a369c13..2601a3b60e658 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -899,6 +899,55 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, MI, MI.getDebugLoc())) { I = std::next(I); MI.eraseFromParent(); + } else { + // At this point, if we still have a VGPR → SGPR copy, it is completely + // illegal. We assume that it was intentionally introduced and should be + // replaced with the READFIRSTLANE to ensure correctness. + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *DRC = TRI->getPhysRegBaseClass(DstReg); + ArrayRef SrcIndices = TRI->getRegSplitParts(SRC, 4); + ArrayRef DstIndices = TRI->getRegSplitParts(DRC, 4); + assert(SrcIndices.size() == DstIndices.size() && + "Register tuple should match"); + MachineInstr *FirstMI = nullptr, *LastMI = nullptr; + // If SrcReg is virtual register can we get the sub reg? one way to handle + // this If def is copy fold the def src. This should not work in case of + // src is and AGPR. Since direct copy from agpr to sgpr is not allowed? + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + // Get the def SrcReg + Register DefSrcReg = DefMI->getOperand(1).getReg(); + // If not a register tuple replace the opcode itself. + if (SrcIndices.size() == 1) { + MI.setDesc(TII->get(AMDGPU::V_READFIRSTLANE_B32)); + MI.addOperand(*MI.getParent()->getParent(), + MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + return true; + } + + if (!DefMI || !DefMI->isCopy() || TRI->isAGPR(*MRI, DefSrcReg)) + return true; + + for (unsigned Idx = 0; Idx < SrcIndices.size(); ++Idx) { + int16_t SubIdx = SrcIndices[Idx]; + Register DefSrcSubReg = TRI->getSubReg(DefSrcReg, SubIdx); + Register DstSubReg = TRI->getSubReg(DstReg, SubIdx); + assert(DstSubReg && DefSrcSubReg && "Failed to find subregs!"); + LastMI = BuildMI(*MI.getParent(), I, MI.getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), DstSubReg) + .addReg(DefSrcSubReg) + .addReg(DefSrcReg, RegState::Implicit); + if (!FirstMI) + FirstMI = LastMI; + } + assert(FirstMI && LastMI); + + FirstMI->addOperand( + MachineOperand::CreateReg(DstReg, true /*IsDef*/, true /*IsImp*/)); + + LastMI->addRegisterKilled(DefSrcReg, TRI); + I = std::next(I); + MI.eraseFromParent(); + DefMI->eraseFromParent(); } return true; } diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll index 34f4476f7fd6a..f7b44c0284886 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll @@ -1,22 +1,202 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -enable-var-scope %s - -; CHECK: illegal VGPR to SGPR copy +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0 declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0 declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0 define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 { + ; GCN-LABEL: name: test_call_external_void_func_a15i32_inreg + ; GCN: bb.0 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr29 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr28 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr27 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr26 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr25 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr24 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr23 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr22 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr21 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr20 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr19 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr18 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr17 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr16 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a15i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a15i32_inreg, implicit-def dead $scc + ; GCN-NEXT: [[COPY24:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY23]] + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY22]] + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY21]] + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY20]] + ; GCN-NEXT: $sgpr12 = COPY [[COPY19]] + ; GCN-NEXT: $sgpr13 = COPY [[COPY18]] + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]] + ; GCN-NEXT: $sgpr15 = COPY [[COPY16]] + ; GCN-NEXT: $vgpr31 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY24]] + ; GCN-NEXT: $sgpr0 = COPY [[COPY15]] + ; GCN-NEXT: $sgpr1 = COPY [[COPY14]] + ; GCN-NEXT: $sgpr2 = COPY [[COPY13]] + ; GCN-NEXT: $sgpr3 = COPY [[COPY12]] + ; GCN-NEXT: $sgpr16 = COPY [[COPY11]] + ; GCN-NEXT: $sgpr17 = COPY [[COPY10]] + ; GCN-NEXT: $sgpr18 = COPY [[COPY9]] + ; GCN-NEXT: $sgpr19 = COPY [[COPY8]] + ; GCN-NEXT: $sgpr20 = COPY [[COPY7]] + ; GCN-NEXT: $sgpr21 = COPY [[COPY6]] + ; GCN-NEXT: $sgpr22 = COPY [[COPY5]] + ; GCN-NEXT: $sgpr23 = COPY [[COPY4]] + ; GCN-NEXT: $sgpr24 = COPY [[COPY3]] + ; GCN-NEXT: $sgpr25 = COPY [[COPY2]] + ; GCN-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a15i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26 + ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: SI_RETURN call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0) ret void } define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 { + ; GCN-LABEL: name: test_call_external_void_func_a16i32_inreg + ; GCN: bb.0 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr1, $vgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr29 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr28 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr27 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr26 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr25 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr24 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr21 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr20 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr19 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr18 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr17 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a16i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a16i32_inreg, implicit-def dead $scc + ; GCN-NEXT: [[COPY25:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY24]] + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY23]] + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY22]] + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY21]] + ; GCN-NEXT: $sgpr12 = COPY [[COPY20]] + ; GCN-NEXT: $sgpr13 = COPY [[COPY19]] + ; GCN-NEXT: $sgpr14 = COPY [[COPY18]] + ; GCN-NEXT: $sgpr15 = COPY [[COPY17]] + ; GCN-NEXT: $vgpr31 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY25]] + ; GCN-NEXT: $sgpr0 = COPY [[COPY16]] + ; GCN-NEXT: $sgpr1 = COPY [[COPY15]] + ; GCN-NEXT: $sgpr2 = COPY [[COPY14]] + ; GCN-NEXT: $sgpr3 = COPY [[COPY13]] + ; GCN-NEXT: $sgpr16 = COPY [[COPY12]] + ; GCN-NEXT: $sgpr17 = COPY [[COPY11]] + ; GCN-NEXT: $sgpr18 = COPY [[COPY10]] + ; GCN-NEXT: $sgpr19 = COPY [[COPY9]] + ; GCN-NEXT: $sgpr20 = COPY [[COPY8]] + ; GCN-NEXT: $sgpr21 = COPY [[COPY7]] + ; GCN-NEXT: $sgpr22 = COPY [[COPY6]] + ; GCN-NEXT: $sgpr23 = COPY [[COPY5]] + ; GCN-NEXT: $sgpr24 = COPY [[COPY4]] + ; GCN-NEXT: $sgpr25 = COPY [[COPY3]] + ; GCN-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GCN-NEXT: $sgpr27 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a16i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27 + ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: SI_RETURN call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0) ret void } define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 { + ; GCN-LABEL: name: test_call_external_void_func_a15i32_inreg_i32_inreg + ; GCN: bb.0 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr1, $vgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr29 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr28 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr27 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr26 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr25 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr24 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr21 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr20 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr19 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr18 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr17 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr16 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a15i32_inreg_i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a15i32_inreg_i32_inreg, implicit-def dead $scc + ; GCN-NEXT: [[COPY25:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY24]] + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY23]] + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY22]] + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY21]] + ; GCN-NEXT: $sgpr12 = COPY [[COPY20]] + ; GCN-NEXT: $sgpr13 = COPY [[COPY19]] + ; GCN-NEXT: $sgpr14 = COPY [[COPY18]] + ; GCN-NEXT: $sgpr15 = COPY [[COPY17]] + ; GCN-NEXT: $vgpr31 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY25]] + ; GCN-NEXT: $sgpr0 = COPY [[COPY16]] + ; GCN-NEXT: $sgpr1 = COPY [[COPY15]] + ; GCN-NEXT: $sgpr2 = COPY [[COPY14]] + ; GCN-NEXT: $sgpr3 = COPY [[COPY13]] + ; GCN-NEXT: $sgpr16 = COPY [[COPY12]] + ; GCN-NEXT: $sgpr17 = COPY [[COPY11]] + ; GCN-NEXT: $sgpr18 = COPY [[COPY10]] + ; GCN-NEXT: $sgpr19 = COPY [[COPY9]] + ; GCN-NEXT: $sgpr20 = COPY [[COPY8]] + ; GCN-NEXT: $sgpr21 = COPY [[COPY7]] + ; GCN-NEXT: $sgpr22 = COPY [[COPY6]] + ; GCN-NEXT: $sgpr23 = COPY [[COPY5]] + ; GCN-NEXT: $sgpr24 = COPY [[COPY4]] + ; GCN-NEXT: $sgpr25 = COPY [[COPY3]] + ; GCN-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GCN-NEXT: $sgpr27 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a15i32_inreg_i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27 + ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: SI_RETURN call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll new file mode 100644 index 0000000000000..e69cea873f2a2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GFX11 + +define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) { + ; GFX11-LABEL: name: s_copysign_f32_bf16 + ; GFX11: bb.0 (%ir-block.0): + ; GFX11-NEXT: liveins: $sgpr0, $sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX11-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], killed [[V_LSHLREV_B32_e64_]], implicit $exec + ; GFX11-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[V_BFI_B32_e64_]], implicit $exec + ; GFX11-NEXT: SI_RETURN_TO_EPILOG $sgpr0 + %sign = fpext bfloat %sign.bf16 to float + %op = call float @llvm.copysign.f32(float %mag, float %sign) + %cast = bitcast float %op to i32 + ret i32 %cast +} diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll index 36714b386e7e5..60b772496cc34 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll @@ -45,8 +45,8 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %da ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr) ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX942-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret double %ret @@ -118,8 +118,8 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX942-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -145,8 +145,8 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw__noprivate(ptr %ptr, ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX942-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll index 597f90c0f4e84..c1c77d8ed65ec 100644 --- a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll @@ -1,65 +1,105 @@ -; RUN: not llc -mtriple=amdgcn -verify-machineinstrs=0 < %s 2>&1 | FileCheck -check-prefix=ERR %s -; RUN: not llc -mtriple=amdgcn -verify-machineinstrs=0 < %s 2>&1 | FileCheck -check-prefix=GCN %s - -; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_i32 void (): illegal VGPR to SGPR copy -; GCN: ; illegal copy v1 to s9 +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 { + ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_i32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr9 + ; CHECK-NEXT: S_ENDPGM 0 %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"() call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr) ret void } -; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal VGPR to SGPR copy -; GCN: ; illegal copy v[0:1] to s[10:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 { + ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v2i32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1 + ; CHECK-NEXT: $sgpr10 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1, implicit-def $sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr11 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr10_sgpr11 + ; CHECK-NEXT: S_ENDPGM 0 %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"() call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr) ret void } -; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal VGPR to SGPR copy -; GCN: ; illegal copy v[0:3] to s[8:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 { + ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v4i32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: $sgpr8 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: $sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: $sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: S_ENDPGM 0 %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"() call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr) ret void } -; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal VGPR to SGPR copy -; GCN: ; illegal copy v[0:7] to s[8:15] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 { + ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v8i32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: $sgpr8 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: $sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: $sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: $sgpr12 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: $sgpr13 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: $sgpr14 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: $sgpr15 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK-NEXT: S_ENDPGM 0 %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"() call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr) ret void } -; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal VGPR to SGPR copy -; GCN: ; illegal copy v[0:15] to s[16:31] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 { + ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v16i32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr16 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-NEXT: $sgpr17 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr18 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr19 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr20 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr21 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr22 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr23 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr24 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr25 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr26 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr27 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr28 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr29 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr30 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: $sgpr31 = V_READFIRSTLANE_B32 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-NEXT: S_ENDPGM 0 %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"() call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr) ret void } -; ERR: error: :0:0: in function illegal_agpr_to_sgpr_copy_i32 void (): illegal VGPR to SGPR copy -; GCN: v_accvgpr_read_b32 [[COPY1:v[0-9]+]], a1 -; GCN: ; illegal copy [[COPY1]] to s9 define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_i32() #1 { + ; CHECK-LABEL: name: illegal_agpr_to_sgpr_copy_i32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $agpr1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $agpr1 + ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr9 + ; CHECK-NEXT: S_ENDPGM 0 %agpr = call i32 asm sideeffect "; def $0", "=${a1}"() call void asm sideeffect "; use $0", "${s9}"(i32 %agpr) ret void } -; ERR: error: :0:0: in function illegal_agpr_to_sgpr_copy_v2i32 void (): illegal VGPR to SGPR copy -; GCN-DAG: v_accvgpr_read_b32 v[[COPY1L:[0-9]+]], a0 -; GCN-DAG: v_accvgpr_read_b32 v[[COPY1H:[0-9]+]], a1 -; GCN: ; illegal copy v[[[COPY1L]]:[[COPY1H]]] to s[10:11] -define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_v2i32() #1 { - %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${a[0:1]}"() - call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr) - ret void -} - attributes #0 = { nounwind } attributes #1 = { nounwind "target-cpu"="gfx908" } diff --git a/llvm/test/CodeGen/AMDGPU/issue130443.ll b/llvm/test/CodeGen/AMDGPU/issue130443.ll new file mode 100644 index 0000000000000..19357986272cf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue130443.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GFX9 + +declare hidden void @external_void_func_a15i32_inreg([16 x i32] inreg) + +define void @test_call_external_void_func_a15i32_inreg([16 x i32] inreg %arg0) { + ; GFX9-LABEL: name: test_call_external_void_func_a15i32_inreg + ; GFX9: bb.0 (%ir-block.0): + ; GFX9-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr1, $vgpr31 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr29 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr28 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr27 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr26 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr25 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr24 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr21 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr20 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr19 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr18 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr17 + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX9-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GFX9-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX9-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX9-NEXT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GFX9-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a15i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a15i32_inreg, implicit-def dead $scc + ; GFX9-NEXT: [[COPY25:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX9-NEXT: $sgpr4_sgpr5 = COPY [[COPY24]] + ; GFX9-NEXT: $sgpr6_sgpr7 = COPY [[COPY23]] + ; GFX9-NEXT: $sgpr8_sgpr9 = COPY [[COPY22]] + ; GFX9-NEXT: $sgpr10_sgpr11 = COPY [[COPY21]] + ; GFX9-NEXT: $sgpr12 = COPY [[COPY20]] + ; GFX9-NEXT: $sgpr13 = COPY [[COPY19]] + ; GFX9-NEXT: $sgpr14 = COPY [[COPY18]] + ; GFX9-NEXT: $sgpr15 = COPY [[COPY17]] + ; GFX9-NEXT: $vgpr31 = COPY [[COPY]] + ; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY25]] + ; GFX9-NEXT: $sgpr0 = COPY [[COPY16]] + ; GFX9-NEXT: $sgpr1 = COPY [[COPY15]] + ; GFX9-NEXT: $sgpr2 = COPY [[COPY14]] + ; GFX9-NEXT: $sgpr3 = COPY [[COPY13]] + ; GFX9-NEXT: $sgpr16 = COPY [[COPY12]] + ; GFX9-NEXT: $sgpr17 = COPY [[COPY11]] + ; GFX9-NEXT: $sgpr18 = COPY [[COPY10]] + ; GFX9-NEXT: $sgpr19 = COPY [[COPY9]] + ; GFX9-NEXT: $sgpr20 = COPY [[COPY8]] + ; GFX9-NEXT: $sgpr21 = COPY [[COPY7]] + ; GFX9-NEXT: $sgpr22 = COPY [[COPY6]] + ; GFX9-NEXT: $sgpr23 = COPY [[COPY5]] + ; GFX9-NEXT: $sgpr24 = COPY [[COPY4]] + ; GFX9-NEXT: $sgpr25 = COPY [[COPY3]] + ; GFX9-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX9-NEXT: $sgpr27 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GFX9-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a15i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27 + ; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GFX9-NEXT: SI_RETURN + call void @external_void_func_a15i32_inreg([16 x i32] inreg %arg0) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll index f0b3d334af67d..ad13390e3285b 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll @@ -1,14 +1,73 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs=0 -O0 2> %t.err < %s | FileCheck %s -; RUN: FileCheck -check-prefix=ERR %s < %t.err +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -O0 -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN ; FIXME: This error will be fixed by supporting arbitrary divergent ; dynamic allocas by performing a wave umax of the size. -; ERR: error: :0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy - -; CHECK: ; illegal copy v0 to s32 define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) { + ; GCN-LABEL: name: move_to_valu_assert_srd_is_physreg_swdev503538 + ; GCN: bb.0.entry: + ; GCN-NEXT: successors: %bb.4(0x80000000) + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $sgpr32 + ; GCN-NEXT: $sgpr32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.5(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY2]], implicit $exec + ; GCN-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: + ; GCN-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (load (s32) from %ir.alloca, align 8, addrspace 5) + ; GCN-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GCN-NEXT: SI_WATERFALL_LOOP %bb.4, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1.loadstoreloop: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.6, %3, %bb.1 + ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY3]], [[PHI]], 0, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: BUFFER_STORE_BYTE_OFFEN killed [[V_MOV_B32_e32_]], killed [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.1, addrspace 5) + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GCN-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PHI]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN-NEXT: S_CMP_LT_U32 [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2.Flow: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3.split: + ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %idx = load i32, ptr addrspace(1) %ptr, align 4 %zero = extractelement <4 x i32> zeroinitializer, i32 %idx diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll index 242b5e9aeaf42..96d590108fb71 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll @@ -1,41 +1,42 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 2> %t.err < %s | FileCheck %s -; RUN: FileCheck -check-prefix=ERR %s < %t.err +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN + ; FIXME: These tests cannot be tail called, and should be executed in a waterfall loop. declare hidden void @void_func_i32_inreg(i32 inreg) -; ERR: error: :0:0: in function tail_call_i32_inreg_divergent void (i32): illegal VGPR to SGPR copy -; ERR: error: :0:0: in function indirect_tail_call_i32_inreg_divergent void (i32): illegal VGPR to SGPR copy - define void @tail_call_i32_inreg_divergent(i32 %vgpr) { -; CHECK-LABEL: tail_call_i32_inreg_divergent: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s16, s33 -; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: v_writelane_b32 v40, s16, 2 -; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, void_func_i32_inreg@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s17, s17, void_func_i32_inreg@rel32@hi+12 -; CHECK-NEXT: ; illegal copy v0 to s0 -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 -; CHECK-NEXT: v_readlane_b32 s30, v40, 0 -; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_readlane_b32 s4, v40, 2 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_mov_b32 s33, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] + ; GCN-LABEL: name: tail_call_i32_inreg_divergent + ; GCN: bb.0 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @void_func_i32_inreg, target-flags(amdgpu-rel32-hi) @void_func_i32_inreg, implicit-def dead $scc + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]] + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]] + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]] + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY6]] + ; GCN-NEXT: $sgpr12 = COPY [[COPY5]] + ; GCN-NEXT: $sgpr13 = COPY [[COPY4]] + ; GCN-NEXT: $sgpr14 = COPY [[COPY3]] + ; GCN-NEXT: $sgpr15 = COPY [[COPY2]] + ; GCN-NEXT: $vgpr31 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]] + ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @void_func_i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0 + ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: SI_RETURN tail call void @void_func_i32_inreg(i32 inreg %vgpr) ret void } @@ -43,35 +44,38 @@ define void @tail_call_i32_inreg_divergent(i32 %vgpr) { @constant = external hidden addrspace(4) constant ptr define void @indirect_tail_call_i32_inreg_divergent(i32 %vgpr) { -; CHECK-LABEL: indirect_tail_call_i32_inreg_divergent: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s16, s33 -; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v40, s16, 2 -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, constant@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s17, s17, constant@rel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 -; CHECK-NEXT: ; illegal copy v0 to s0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 -; CHECK-NEXT: v_readlane_b32 s30, v40, 0 -; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_readlane_b32 s4, v40, 2 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_mov_b32 s33, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] + ; GCN-LABEL: name: indirect_tail_call_i32_inreg_divergent + ; GCN: bb.0 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @constant, target-flags(amdgpu-rel32-hi) @constant, implicit-def dead $scc + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from @constant, addrspace 4) + ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]] + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]] + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]] + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY6]] + ; GCN-NEXT: $sgpr12 = COPY [[COPY5]] + ; GCN-NEXT: $sgpr13 = COPY [[COPY4]] + ; GCN-NEXT: $sgpr14 = COPY [[COPY3]] + ; GCN-NEXT: $sgpr15 = COPY [[COPY2]] + ; GCN-NEXT: $vgpr31 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]] + ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec + ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0 + ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: SI_RETURN %fptr = load ptr, ptr addrspace(4) @constant, align 8 tail call void %fptr(i32 inreg %vgpr) ret void diff --git a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll index de3b1d5bf78b3..4e03c6070314f 100644 --- a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll @@ -1,6 +1,5 @@ -; XFAIL: * -; REQUIRES: asserts -; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN ; write_register doesn't prevent us from illegally trying to write a ; vgpr value into a scalar register, but I don't think there's much we @@ -11,6 +10,14 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 declare void @llvm.amdgcn.wave.barrier() #2 define amdgpu_kernel void @write_vgpr_into_sgpr() { + ; GCN-LABEL: name: write_vgpr_into_sgpr + ; GCN: bb.0 (%ir-block.0): + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: $exec_lo = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec + ; GCN-NEXT: WAVE_BARRIER + ; GCN-NEXT: S_ENDPGM 0 %tid = call i32 @llvm.amdgcn.workitem.id.x() call void @llvm.write_register.i32(metadata !0, i32 %tid) call void @llvm.amdgcn.wave.barrier() #2