Skip to content

Commit dd92878

Browse files
androm3daSergei Larin
authored andcommitted
Automerge: [Hexagon] Handle subreg copies between DoubleRegs and IntRegs (#181360)
ISel can generate truncating COPYs from DoubleRegs to IntRegs when a 64-bit result (e.g., C2_mask) is used in a 32-bit context. Several passes crashed on this pattern: BitTracker asserted WD >= WS for COPY instructions. Handle the WD < WS case by extracting the low WD bits from the source. HexagonInstrInfo::copyPhysReg had no case for IntRegs <- DoubleRegs or DoubleRegs <- IntRegs. Add both directions, respecting the subreg index on the operand (isub_lo/isub_hi) when present. HexagonTfrCleanup asserted that source and destination register sizes match. Replace with proper subreg resolution on both operands and a hasNoVRegs() guard since the pass runs post-RA. HexagonGenMux asserted no subregs on physical register operands. Preserve subreg information when building mux instructions and resolve subregs when fixing kill flags. Co-authored-by: Sergei Larin <slarin@codeaurora.org> --------- Co-authored-by: Sergei Larin <slarin@codeaurora.org>
2 parents d368af6 + 689ecf8 commit dd92878

File tree

7 files changed

+195
-18
lines changed

7 files changed

+195
-18
lines changed

llvm/lib/Target/Hexagon/BitTracker.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -736,16 +736,24 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI,
736736
case TargetOpcode::COPY: {
737737
// COPY can transfer a smaller register into a wider one.
738738
// If that is the case, fill the remaining high bits with 0.
739+
// COPY can also transfer a wider register into a narrower one,
740+
// in which case the high bits are simply truncated.
739741
RegisterRef RD = MI.getOperand(0);
740742
RegisterRef RS = MI.getOperand(1);
741743
assert(RD.Sub == 0);
742744
uint16_t WD = getRegBitWidth(RD);
743745
uint16_t WS = getRegBitWidth(RS);
744-
assert(WD >= WS);
745746
RegisterCell Src = getCell(RS, Inputs);
746747
RegisterCell Res(WD);
747-
Res.insert(Src, BitMask(0, WS-1));
748-
Res.fill(WS, WD, BitValue::Zero);
748+
if (WD <= WS) {
749+
// Truncating copy: extract low WD bits from source.
750+
RegisterCell Trunc = Src.extract(BitMask(0, WD - 1));
751+
Res.insert(Trunc, BitMask(0, WD - 1));
752+
} else {
753+
// Widening copy: insert all source bits, zero-fill high bits.
754+
Res.insert(Src, BitMask(0, WS - 1));
755+
Res.fill(WS, WD, BitValue::Zero);
756+
}
749757
putCell(RD, Res, Outputs);
750758
break;
751759
}

llvm/lib/Target/Hexagon/HexagonGenMux.cpp

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -324,18 +324,41 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
324324

325325
MachineBasicBlock &B = *MX.At->getParent();
326326
const DebugLoc &DL = B.findDebugLoc(MX.At);
327-
auto NewMux = BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
328-
.addReg(MX.PredR)
329-
.add(*MX.SrcT)
330-
.add(*MX.SrcF);
331-
NewMux->clearKillInfo();
327+
MachineInstrBuilder MIB = BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR);
328+
MIB.addReg(MX.PredR);
329+
auto AddSrc = [&](MachineOperand *SrcOp) {
330+
if (!SrcOp->isReg()) {
331+
MIB.add(*SrcOp);
332+
return;
333+
}
334+
Register Reg = SrcOp->getReg();
335+
unsigned Sub = SrcOp->getSubReg();
336+
RegState RS = {};
337+
if (SrcOp->isKill())
338+
RS |= RegState::Kill;
339+
if (SrcOp->isUndef())
340+
RS |= RegState::Undef;
341+
if (SrcOp->isDebug())
342+
RS |= RegState::Debug;
343+
if (SrcOp->isInternalRead())
344+
RS |= RegState::InternalRead;
345+
if (Sub) {
346+
// Preserve subregister information instead of resolving to physical
347+
// reg.
348+
MIB.addReg(Reg, RS, Sub);
349+
} else {
350+
MIB.addReg(Reg, RS);
351+
}
352+
};
353+
AddSrc(MX.SrcT);
354+
AddSrc(MX.SrcF);
355+
MIB.getInstr()->clearKillInfo();
332356
B.remove(MX.Def1);
333357
B.remove(MX.Def2);
334358
Changed = true;
335359
}
336360

337361
// Fix up kill flags.
338-
339362
LiveRegUnits LPR(*HRI);
340363
LPR.addLiveOuts(B);
341364
for (MachineInstr &I : llvm::reverse(B)) {
@@ -348,7 +371,12 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
348371
for (MachineOperand &Op : I.operands()) {
349372
if (!Op.isReg() || !Op.isUse())
350373
continue;
351-
assert(Op.getSubReg() == 0 && "Should have physical registers only");
374+
if (Op.getSubReg() != 0) {
375+
Register R = HRI->getSubReg(Op.getReg(), Op.getSubReg());
376+
bool Live = !LPR.available(R);
377+
Op.setIsKill(!Live);
378+
continue;
379+
}
352380
bool Live = !LPR.available(Op.getReg());
353381
Op.setIsKill(!Live);
354382
}

llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,33 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
882882
.addReg(SrcReg).addReg(SrcReg, KillFlag);
883883
return;
884884
}
885+
if (Hexagon::IntRegsRegClass.contains(DestReg) &&
886+
Hexagon::DoubleRegsRegClass.contains(SrcReg)) {
887+
// Truncating copy: extract the specified or low sub-register.
888+
unsigned SrcSub = Hexagon::isub_lo;
889+
if (I->isCopy()) {
890+
unsigned OpSub = I->getOperand(1).getSubReg();
891+
if (OpSub == Hexagon::isub_lo || OpSub == Hexagon::isub_hi)
892+
SrcSub = OpSub;
893+
}
894+
Register SrcHalf = HRI.getSubReg(SrcReg, SrcSub);
895+
BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg)
896+
.addReg(SrcHalf, KillFlag);
897+
return;
898+
}
899+
if (Hexagon::DoubleRegsRegClass.contains(DestReg) &&
900+
Hexagon::IntRegsRegClass.contains(SrcReg)) {
901+
// Inserting copy: write into the specified or low sub-register half.
902+
unsigned DstSub = Hexagon::isub_lo;
903+
if (I->isCopy()) {
904+
unsigned OpSub = I->getOperand(0).getSubReg();
905+
if (OpSub == Hexagon::isub_lo || OpSub == Hexagon::isub_hi)
906+
DstSub = OpSub;
907+
}
908+
Register DstHalf = HRI.getSubReg(DestReg, DstSub);
909+
BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DstHalf).addReg(SrcReg, KillFlag);
910+
return;
911+
}
885912
if (Hexagon::CtrRegsRegClass.contains(DestReg) &&
886913
Hexagon::IntRegsRegClass.contains(SrcReg)) {
887914
BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg)

llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,23 +181,59 @@ bool HexagonTfrCleanup::rewriteIfImm(MachineInstr *MI, ImmediateMap &IMap,
181181
return false;
182182
}
183183

184-
unsigned DstR = MI->getOperand(0).getReg();
185-
unsigned SrcR = MI->getOperand(1).getReg();
186-
bool Tmp, Is32;
187-
if (!isIntReg(DstR, Is32) || !isIntReg(SrcR, Tmp))
184+
Register DstBase = MI->getOperand(0).getReg();
185+
Register SrcBase = MI->getOperand(1).getReg();
186+
unsigned DstSub = MI->getOperand(0).getSubReg();
187+
unsigned SrcSub = MI->getOperand(1).getSubReg();
188+
189+
if (!DstBase.isPhysical() || !SrcBase.isPhysical())
188190
return false;
189-
assert(Tmp == Is32 && "Register size mismatch");
191+
192+
// Determine effective registers and widths, accounting for subregs.
193+
bool Is32Dst = false, Is32Src = false;
194+
unsigned DstR = DstBase;
195+
unsigned SrcR = SrcBase;
196+
197+
if (DstSub) {
198+
if (DstSub != Hexagon::isub_lo && DstSub != Hexagon::isub_hi)
199+
return false;
200+
Is32Dst = true;
201+
DstR = TRI->getSubReg(DstBase, DstSub);
202+
} else {
203+
if (!isIntReg(DstBase, Is32Dst))
204+
return false;
205+
}
206+
207+
if (SrcSub) {
208+
if (SrcSub != Hexagon::isub_lo && SrcSub != Hexagon::isub_hi)
209+
return false;
210+
if (!Hexagon::DoubleRegsRegClass.contains(SrcBase))
211+
return false;
212+
Is32Src = true;
213+
SrcR = TRI->getSubReg(SrcBase, SrcSub);
214+
} else {
215+
if (!isIntReg(SrcBase, Is32Src))
216+
return false;
217+
}
218+
219+
// After resolving subregs, check that effective register sizes match.
220+
bool Is32DstResolved = Hexagon::IntRegsRegClass.contains(DstR);
221+
bool Is32SrcResolved = Hexagon::IntRegsRegClass.contains(SrcR);
222+
if (Is32DstResolved != Is32SrcResolved)
223+
return false;
224+
190225
uint64_t Val;
191226
bool Found = getReg(SrcR, Val, IMap);
192227
if (!Found)
193228
return false;
194229

195230
MachineBasicBlock &B = *MI->getParent();
231+
MachineFunction *F = B.getParent();
196232
DebugLoc DL = MI->getDebugLoc();
197-
int64_t SVal = Is32 ? int32_t(Val) : Val;
198-
auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
233+
int64_t SVal = Is32Dst ? int32_t(Val) : Val;
234+
auto &HST = F->getSubtarget<HexagonSubtarget>();
199235
MachineInstr *NewMI;
200-
if (Is32)
236+
if (Is32Dst)
201237
NewMI = BuildMI(B, MI, DL, HII->get(A2_tfrsi), DstR).addImm(SVal);
202238
else if (isInt<8>(SVal))
203239
NewMI = BuildMI(B, MI, DL, HII->get(A2_tfrpi), DstR).addImm(SVal);
@@ -259,6 +295,10 @@ bool HexagonTfrCleanup::eraseIfRedundant(MachineInstr *MI,
259295
}
260296

261297
bool HexagonTfrCleanup::runOnMachineFunction(MachineFunction &MF) {
298+
// This pass is intended to run post-RA. If virtual registers are present,
299+
// skip safely to avoid touching non-physical registers.
300+
if (!MF.getProperties().hasNoVRegs())
301+
return false;
262302
bool Changed = false;
263303
// Map: 32-bit register -> immediate value.
264304
// 64-bit registers are stored through their subregisters.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# RUN: llc -mtriple=hexagon -verify-machineinstrs %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=hexagon -stop-after=tfr-cleanup -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIR
3+
4+
# Ensure copyPhysReg handles int<-dbl subregister and dbl subregister<-int,
5+
# and does not crash on scalar copies.
6+
7+
# CHECK-LABEL: phys_copy:
8+
# CHECK: combine(
9+
10+
# MIR-LABEL: name: phys_copy
11+
# MIR: $r5 = {{A2_tfr|COPY}} $r4
12+
# MIR: $r6 = COPY {{.*}}$r4
13+
# MIR: $r1 = COPY {{.*}}$r6
14+
15+
---
16+
name: phys_copy
17+
tracksRegLiveness: true
18+
body: |
19+
bb.0:
20+
liveins: $r4, $r0
21+
; Int<-Int
22+
$r5 = COPY $r4
23+
; Use $r5 so it isn't DCE'd
24+
$r0 = COPY $r5
25+
; Dbl subreg<-Int (use virtual double)
26+
%0:doubleregs = REG_SEQUENCE $r4, %subreg.isub_lo, $r4, %subreg.isub_hi
27+
; Int<-Dbl subreg
28+
$r6 = COPY %0.isub_lo
29+
; Use $r6 to keep it live
30+
$r1 = COPY $r6
31+
PS_jmpret $r31, implicit-def dead $pc, implicit $r0, implicit $r1
32+
...
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; RUN: llc -stop-after=tfr-cleanup -verify-machineinstrs -mtriple=hexagon %s -o - | FileCheck %s
2+
3+
; Create a copy from a 64-bit argument (double regs) to 32-bit intregs via subreg.
4+
; The tfr-cleanup pass should not assert on size mismatch and should leave the
5+
; copy intact when sizes differ.
6+
7+
; CHECK: name: test
8+
; CHECK: liveins: $d0, $r2
9+
; CHECK: renamable $r0 = A2_add killed renamable $r0, renamable $r1
10+
; CHECK: S2_storeri_io
11+
12+
define dso_local void @test(i64 %x, ptr nocapture %out) local_unnamed_addr {
13+
entry:
14+
%lo = trunc i64 %x to i32
15+
%hi.shift = lshr i64 %x, 32
16+
%hi = trunc i64 %hi.shift to i32
17+
%sum = add i32 %lo, %hi
18+
store i32 %sum, ptr %out, align 4
19+
ret void
20+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; RUN: llc -mtriple=hexagon -mcpu=hexagonv73 -mattr=+hvxv73,+hvx-length128b \
2+
; RUN: < %s | FileCheck %s
3+
;
4+
; Check that a truncating copy from DoubleRegs to IntRegs (generated by
5+
; C2_mask + truncation) does not crash in BitTracker, HexagonTfrCleanup,
6+
; or ExpandPostRAPseudo.
7+
8+
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
9+
target triple = "hexagon-unknown-linux-musl"
10+
11+
; CHECK-LABEL: endgame:
12+
; CHECK: mask(p0)
13+
; CHECK: popcount
14+
; CHECK: jumpr r31
15+
define i16 @endgame(<8 x i32> %0) {
16+
entry:
17+
%1 = icmp eq <8 x i32> %0, zeroinitializer
18+
%rdx.op150 = shufflevector <8 x i1> zeroinitializer, <8 x i1> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19+
%2 = bitcast <16 x i1> %rdx.op150 to i16
20+
%3 = call i16 @llvm.ctpop.i16(i16 %2)
21+
ret i16 %3
22+
}

0 commit comments

Comments
 (0)