-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[ARM] Stop gluing ALU nodes to branches / selects #116970
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-arm Author: Sergei Barannikov (s-barannikov) ChangesFollowing #116547 and #116676, this PR changes the type of results and operands of some nodes to accept / return a normal type instead of Glue. Unfortunately, changing the result type of one node requires changing the operand types of all potential consumer nodes, which in turn requires changing the result types of all other possible producer nodes. So this is a bulk change. Patch is 1.12 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116970.diff 82 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 73ee8cf81adcd60..3306573236266e0 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -4123,17 +4123,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- SDValue N3 = N->getOperand(3);
- SDValue InGlue = N->getOperand(4);
+ SDValue Flags = N->getOperand(3);
assert(N1.getOpcode() == ISD::BasicBlock);
assert(N2.getOpcode() == ISD::Constant);
- assert(N3.getOpcode() == ISD::Register);
unsigned CC = (unsigned)N2->getAsZExtVal();
- if (InGlue.getOpcode() == ARMISD::CMPZ) {
- if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
- SDValue Int = InGlue.getOperand(0);
+ if (Flags.getOpcode() == ARMISD::CMPZ) {
+ if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ SDValue Int = Flags.getOperand(0);
uint64_t ID = Int->getConstantOperandVal(1);
// Handle low-overhead loops.
@@ -4155,15 +4153,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ReplaceUses(N, LoopEnd);
CurDAG->RemoveDeadNode(N);
- CurDAG->RemoveDeadNode(InGlue.getNode());
+ CurDAG->RemoveDeadNode(Flags.getNode());
CurDAG->RemoveDeadNode(Int.getNode());
return;
}
}
bool SwitchEQNEToPLMI;
- SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
- InGlue = N->getOperand(4);
+ SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
+ Flags = N->getOperand(3);
if (SwitchEQNEToPLMI) {
switch ((ARMCC::CondCodes)CC) {
@@ -4178,26 +4176,22 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
}
+ SDValue InChain =
+ CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
+ SDValue InGlue = InChain.getValue(1);
+
SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
- SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
- SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
- MVT::Glue, Ops);
- Chain = SDValue(ResNode, 0);
- if (N->getNumValues() == 2) {
- InGlue = SDValue(ResNode, 1);
- ReplaceUses(SDValue(N, 1), InGlue);
- }
- ReplaceUses(SDValue(N, 0),
- SDValue(Chain.getNode(), Chain.getResNo()));
- CurDAG->RemoveDeadNode(N);
+ SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32),
+ InChain, InGlue};
+ CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
return;
}
case ARMISD::CMPZ: {
// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
// This allows us to avoid materializing the expensive negative constant.
- // The CMPZ #0 is useless and will be peepholed away but we need to keep it
- // for its glue output.
+ // The CMPZ #0 is useless and will be peepholed away but we need to keep
+ // it for its flags output.
SDValue X = N->getOperand(0);
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
@@ -4224,7 +4218,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
if (Add) {
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
- CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
+ CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
}
}
// Other cases are autogenerated.
@@ -4232,11 +4226,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV: {
- SDValue InGlue = N->getOperand(4);
+ SDValue Flags = N->getOperand(3);
- if (InGlue.getOpcode() == ARMISD::CMPZ) {
+ if (Flags.getOpcode() == ARMISD::CMPZ) {
bool SwitchEQNEToPLMI;
- SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
+ SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
if (SwitchEQNEToPLMI) {
SDValue ARMcc = N->getOperand(2);
@@ -4253,10 +4247,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
- N->getOperand(3), N->getOperand(4)};
+ N->getOperand(3)};
CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
}
-
}
// Other cases are autogenerated.
break;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6b290135c5bcbac..1b83902da006ef9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4924,14 +4924,11 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
LHS.getConstantOperandVal(1) < 31) {
unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
- SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
- DAG.getVTList(MVT::i32, MVT::i32),
- LHS.getOperand(0),
- DAG.getConstant(ShiftAmt, dl, MVT::i32));
- SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
- Shift.getValue(1), SDValue());
+ SDValue Shift =
+ DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
+ LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
- return Chain.getValue(1);
+ return Shift.getValue(1);
}
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
@@ -4963,7 +4960,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
}
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+ return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
@@ -4978,24 +4975,7 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
else
Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
FlagsVT, LHS);
- return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags);
-}
-
-/// duplicateCmp - Glue values can have only one use, so this function
-/// duplicates a comparison node.
-SDValue
-ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
- unsigned Opc = Cmp.getOpcode();
- SDLoc DL(Cmp);
- if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
- return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
-
- assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
- SDValue Flags = Cmp.getOperand(0);
- assert((Flags.getOpcode() == ARMISD::CMPFP ||
- Flags.getOpcode() == ARMISD::CMPFPw0) &&
- "unexpected operand of FMSTAT");
- return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags);
+ return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
}
// This function returns three things: the arithmetic computation itself
@@ -5023,7 +5003,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
case ISD::SADDO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
break;
case ISD::UADDO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
@@ -5032,17 +5012,17 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ARMISD::ADDC, dl,
DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
.getValue(0);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
break;
case ISD::SSUBO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
break;
case ISD::USUBO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
break;
case ISD::UMULO:
// We generate a UMUL_LOHI and then check if the high word is 0.
@@ -5050,7 +5030,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::UMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
DAG.getConstant(0, dl, MVT::i32));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
@@ -5061,7 +5041,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Value.getValue(0),
DAG.getConstant(31, dl, MVT::i32)));
@@ -5081,15 +5061,14 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDLoc dl(Op);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
EVT VT = Op.getValueType();
- SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
- ARMcc, CCR, OverflowCmp);
+ SDValue Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
@@ -5226,11 +5205,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
- return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
- OverflowCmp, DAG);
+ return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
}
// Convert:
@@ -5258,14 +5235,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
False = SelectTrue;
}
- if (True.getNode() && False.getNode()) {
- EVT VT = Op.getValueType();
- SDValue ARMcc = Cond.getOperand(2);
- SDValue CCR = Cond.getOperand(3);
- SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
- assert(True.getValueType() == VT);
- return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
- }
+ if (True.getNode() && False.getNode())
+ return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
+ Cond.getOperand(3), DAG);
}
}
@@ -5330,8 +5302,8 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
- SDValue TrueVal, SDValue ARMcc, SDValue CCR,
- SDValue Cmp, SelectionDAG &DAG) const {
+ SDValue TrueVal, SDValue ARMcc, SDValue Cmp,
+ SelectionDAG &DAG) const {
if (!Subtarget->hasFP64() && VT == MVT::f64) {
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
@@ -5343,15 +5315,14 @@ SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue FalseLow = FalseVal.getValue(0);
SDValue FalseHigh = FalseVal.getValue(1);
- SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
- ARMcc, CCR, Cmp);
+ SDValue Low =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, ARMcc, Cmp);
SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
- ARMcc, CCR, duplicateCmp(Cmp, DAG));
+ ARMcc, Cmp);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
} else {
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
- Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Cmp);
}
}
@@ -5625,12 +5596,11 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc;
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
// Choose GE over PL, which vsel does now support
if (ARMcc->getAsZExtVal() == ARMCC::PL)
ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
- return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
+ return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -5660,13 +5630,10 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
+ SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
- // FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
- Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
+ Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
}
return Result;
}
@@ -5767,9 +5734,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
bitcastf32Toi32(RHS, DAG), Mask);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
- Chain, Dest, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
+ Cmp);
}
SDValue LHS1, LHS2;
@@ -5780,9 +5746,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
- return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
+ return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
}
return SDValue();
@@ -5816,9 +5781,8 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
(ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
CondCode = ARMCC::getOppositeCondition(CondCode);
ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
OverflowCmp);
}
@@ -5870,18 +5834,15 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
CondCode = ARMCC::getOppositeCondition(CondCode);
ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
}
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
OverflowCmp);
}
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
- Chain, Dest, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
}
if (getTargetMachine().Options.UnsafeFPMath &&
@@ -5896,14 +5857,12 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
- SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
+ SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
- SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
- Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
+ SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
+ Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
}
return Res;
}
@@ -6408,7 +6367,6 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -6423,8 +6381,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
- ARMcc, CCR, CmpLo);
+ SDValue Lo =
+ DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue HiBigShift = Opc == ISD::SRA
@@ -6433,8 +6391,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
: DAG.getConstant(0, dl, VT);
SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
- ARMcc, CCR, CmpHi);
+ SDValue Hi =
+ DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -6452,7 +6410,6 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -6466,14 +6423,14 @@ SDValue ARMTargetLowering::LowerShiftLeftPart...
[truncated]
|
Following llvm#116547 and llvm#116676, this PR changes the type of results and operands of some nodes to accept / return a normal type instead of Glue. Unfortunately, changing the result type of one node requires changing the operand types of all potential consumer nodes, which in turn requires changing the result types of all other possible producer nodes. So this is a bulk change.
508e70f
to
2487cff
Compare
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; | ||
|
||
} // hasSideEffects | ||
|
||
def : ARMPat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some patterns have to be defined out-of-line because the number of instruction operands no longer matches the number of SDNode operands. On an instruction, pred
is considered single operand, while there are two operands on SDNode that should match it (cc
and flags
).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That seems to me like the kind of thing it's worth mentioning in the commit message, so that it'll be available to anyone in the future reading this patch.
(LLVM has moved its git hosting once already, so I think it's better not to assume the Github PR comments will always be available.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added comments to three places. Please let me know if you think it wasn't necessary and I should've just updated the description.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fine by me – as long as it's in the git history somewhere, people will be able to work it out. And you may be right that the comment has ongoing value outside the context of the change.
✅ With the latest revision this PR passed the C/C++ code formatter. |
; THUMB2-NEXT: movne.w lr, #65536 | ||
; THUMB2-NEXT: it ne | ||
; THUMB2-NEXT: movne.w r12, #1 | ||
; THUMB2-NEXT: it ne | ||
; THUMB2-NEXT: movne.w lr, #65536 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not optimized to a single IT block because the optimization pass does not hoist mov.w lr, #1
appearing between conditional moves. It only hoists register copies.
@@ -105,77 +105,177 @@ entry: | |||
@bb = hidden local_unnamed_addr global i64 0, align 8 | |||
|
|||
define dso_local i64 @cc() local_unnamed_addr #1 { | |||
; CHECK-LABEL: cc: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apparently, check lines were only present for V6M.
; CHECK-MVE-NEXT: lsls r0, r0, #31 | ||
; CHECK-MVE-NEXT: lsls r1, r1, #31 | ||
; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 | ||
; CHECK-MVE-NEXT: cmp r0, #0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should've been optimized further, let me investigate this small regression.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason is that the nodes are scheduled a little differently, which ARMBaseInstrInfo::optimizeCompareInstr
is unable to handle. I think this can be fixed, but should be done separately. Given the overall improvement this PR provides, I suppose it's okay to do this later?
@@ -1965,32 +1965,34 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t | |||
; ARM-ENABLE-NEXT: @ %bb.1: @ %bb3 | |||
; ARM-ENABLE-NEXT: push {r4, r7, lr} | |||
; ARM-ENABLE-NEXT: add r7, sp, #4 | |||
; ARM-ENABLE-NEXT: sub r4, sp, #16 | |||
; ARM-ENABLE-NEXT: sub r4, sp, #24 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any idea what's up with this increased size of stack frame? I can see that it goes with a newly introduced spill and reload of d10 to the [r4, #16]
slot, but why has this change created the need for an extra spill at all? Has pre-RA scheduling somehow increased pressure?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't investigate this regrssion as the test appears to be testing something different. I'll take a look.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that the reason is that DAG scheduler placed VADD before the call to pow
and this increased register pressure over the limit. (Unfortunately, machine scheduler can't reorder instructions around calls.)
; CHECK-NEXT: cmp r2, #42 | ||
; CHECK-NEXT: orrne r0, r0, #16 | ||
; CHECK-NEXT: and r0, r0, #4 | ||
; CHECK-NEXT: bic r1, r1, #255 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks a bit odd too – it seemed obviously better the previous way, and there's no obvious reason why your change should have caused it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll check this out, too. Thanks for noticing, I've missed this one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; | ||
|
||
} // hasSideEffects | ||
|
||
def : ARMPat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That seems to me like the kind of thing it's worth mentioning in the commit message, so that it'll be available to anyone in the future reading this patch.
(LLVM has moved its git hosting once already, so I think it's better not to assume the Github PR comments will always be available.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a big patch, but it's my absolute favourite kind: reduces the total amount of code and makes things better! Overall the diff stats are negative in llvm/lib
and in llvm/test
.
I didn't read absolutely all of the code diffs through, but I did spot-check a few things and came across a couple more tiny regressions, which I've highlighted just in case you happen to know what caused them. But I agree with you that this seems overall a good thing, and knock-on effects on scheduling can be dealt with later. (Also, this patch is big enough by itself and doesn't need extra things folded into it!)
Good point, I'll put the comment in code. |
02cd2f2
to
cffe4df
Compare
cffe4df
to
b2cfc51
Compare
d0412e9
to
486d07f
Compare
I've run into another crash in Wine compiled for armv7 with clang after this has landed; I've finished bisecting the code now, will look into pinpointing which individual object file the faulty generated code is within. |
I kind of expected that. |
I've pinpointed it to one single object file (all the others can be compiled with this change without breakage), but I'll try to pinpoint the individual function(s) that contain the issue as well. Compiling with The issue does have some knock-on effects on other projects in my testing setup, so I'll revert it now and continue pinpointing it tomorrow, and I'll hand over the narrowed down case to you then. |
I haven't yet managed to pinpoint exactly which function the breakage happens in (I didn't have quite as much time to spend on it today as I had hoped), but the faulty translation unit can be reproduced with https://martin.st/temp/virtual-preproc.c:
(Not sure if all those |
Thank you, |
I managed to pinpoint the faulting change; instead of trying to split the source C file (which turned out to be quite annoying here), I diffed the output assembly and selectively applied parts of the differences to the output assembly; the faulty change is the one in @@ -11514,18 +11515,18 @@
.LBB59_6: @ %sw.bb5.i
movs r6, #29
movs r5, #13
- ands r4, lr, #16777216
- it eq
- moveq r6, #23
+ lsls.w r4, lr, #7
+ it pl
+ movpl r6, #23
it eq
moveq r5, #7
b .LBB59_11
.LBB59_7: @ %sw.bb1.i
movs r6, #25
movs r5, #9
- ands r4, lr, #16777216
- it eq
- moveq r6, #19
+ lsls.w r4, lr, #7
+ it pl
+ movpl r6, #19
it eq
moveq r5, #3
b .LBB59_11 |
Now I see the fault with the code change. @@ -11514,18 +11515,18 @@
.LBB59_6: @ %sw.bb5.i
movs r6, #29
movs r5, #13
- ands r4, lr, #16777216
- it eq
- moveq r6, #23
+ lsls.w r4, lr, #7
+ it pl
+ movpl r6, #23
it eq
moveq r5, #7
b .LBB59_11 The updated But after the flag setting instruction and conditional mov, we have another Edit: Also, if I edit the following |
The guilty function is After this patch, CMPZ can have multiple uses, but only one of them is updated. This can be worked around by checking that CMPZ has exactly one use, but ideally this optimization should happen earlier, in PerformTargetDAGCombine. I'll add a workaround now and will try to move this optimization to ARMTargetLowering in a later patch. |
Re-landing #116970 after fixing miscompilation error. The original change made it possible for CMPZ to have multiple uses; `ARMDAGToDAGISel::SelectCMPZ` was not prepared for this. Pull Request: #118887 Original commit message: Following #116547 and #116676, this PR changes the type of results and operands of some nodes to accept / return a normal type instead of Glue. Unfortunately, changing the result type of one node requires changing the operand types of all potential consumer nodes, which in turn requires changing the result types of all other possible producer nodes. So this is a bulk change.
Following #116547 and #116676, this PR changes the type of results and operands of some nodes to accept / return a normal type instead of Glue.
Unfortunately, changing the result type of one node requires changing the operand types of all potential consumer nodes, which in turn requires changing the result types of all other possible producer nodes. So this is a bulk change.