Skip to content

SelectionDAG: Support nofpclass #108350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -2136,6 +2136,14 @@ class SelectionDAG {
/// positive or negative zero.
bool isKnownNeverZeroFloat(SDValue Op) const;

/// Test whether the given floating point SDValue is known to never be
/// positive zero.
bool isKnownNeverPosZeroFloat(SDValue Op) const;

/// Test whether the given floating point SDValue is known to never be
/// negative zero.
bool isKnownNeverNegZeroFloat(SDValue Op) const;

/// Test whether the given SDValue is known to contain non-zero value(s).
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;

Expand Down
33 changes: 29 additions & 4 deletions llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ struct SDNodeFlags {
bool Exact : 1;
bool Disjoint : 1;
bool NonNeg : 1;
// deprecated: Use NoQNanS && NoSNaNs
bool NoNaNs : 1;
bool NoInfs : 1;
bool NoSignedZeros : 1;
Expand All @@ -400,19 +401,27 @@ struct SDNodeFlags {
// Instructions with attached 'unpredictable' metadata on IR level.
bool Unpredictable : 1;

bool NoQNaNs : 1;
bool NoSNaNs : 1;
bool NoPosZeros : 1;
bool NoNegZeros : 1;

public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
: NoUnsignedWrap(false), NoSignedWrap(false), Exact(false),
Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false),
NoSignedZeros(false), AllowReciprocal(false), AllowContract(false),
ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false),
Unpredictable(false) {}
Unpredictable(false), NoQNaNs(false), NoSNaNs(false), NoPosZeros(false),
NoNegZeros(false) {}

/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
setNoNaNs(FPMO.hasNoNaNs());
setNoSNaNs(FPMO.hasNoNaNs());
setNoQNaNs(FPMO.hasNoNaNs());
setNoInfs(FPMO.hasNoInfs());
setNoNegZeros(FPMO.hasNoSignedZeros());
setNoSignedZeros(FPMO.hasNoSignedZeros());
setAllowReciprocal(FPMO.hasAllowReciprocal());
setAllowContract(FPMO.hasAllowContract());
Expand All @@ -426,8 +435,20 @@ struct SDNodeFlags {
void setExact(bool b) { Exact = b; }
void setDisjoint(bool b) { Disjoint = b; }
void setNonNeg(bool b) { NonNeg = b; }
void setNoNaNs(bool b) { NoNaNs = b; }
[[deprecated("Use SetSNaNs() and SetQNaNs()")]] void setNoNaNs(bool b) {
NoNaNs = NoQNaNs = NoSNaNs = b;
}
void setNoQNaNs(bool b) {
NoQNaNs = b;
NoNaNs = (NoQNaNs && NoSNaNs);
}
void setNoSNaNs(bool b) {
NoSNaNs = b;
NoNaNs = (NoQNaNs && NoSNaNs);
}
void setNoInfs(bool b) { NoInfs = b; }
void setNoPosZeros(bool b) { NoPosZeros = b; }
void setNoNegZeros(bool b) { NoNegZeros = b; }
void setNoSignedZeros(bool b) { NoSignedZeros = b; }
void setAllowReciprocal(bool b) { AllowReciprocal = b; }
void setAllowContract(bool b) { AllowContract = b; }
Expand All @@ -442,8 +463,12 @@ struct SDNodeFlags {
bool hasExact() const { return Exact; }
bool hasDisjoint() const { return Disjoint; }
bool hasNonNeg() const { return NonNeg; }
bool hasNoNaNs() const { return NoNaNs; }
bool hasNoNaNs() const { return (NoSNaNs && NoQNaNs); }
bool hasNoSNaNs() const { return NoSNaNs; }
bool hasNoQNaNs() const { return NoQNaNs; }
bool hasNoInfs() const { return NoInfs; }
bool hasNoPosZeros() const { return NoPosZeros; }
bool hasNoNegZeros() const { return NoNegZeros; }
bool hasNoSignedZeros() const { return NoSignedZeros; }
bool hasAllowReciprocal() const { return AllowReciprocal; }
bool hasAllowContract() const { return AllowContract; }
Expand Down
49 changes: 46 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5435,7 +5435,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {

bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
if (getTarget().Options.NoNaNsFPMath)
return true;
SDNodeFlags OpFlags = Op->getFlags();
if (SNaN && OpFlags.hasNoSNaNs())
return true;
if (OpFlags.hasNoSNaNs() && OpFlags.hasNoQNaNs())
return true;

if (Depth >= MaxRecursionDepth)
Expand Down Expand Up @@ -5569,11 +5574,39 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
assert(Op.getValueType().isFloatingPoint() &&
"Floating point type expected");

SDNodeFlags OpFlags = Op->getFlags();
if (OpFlags.hasNoPosZeros() && OpFlags.hasNoNegZeros())
return true;

// If the value is a constant, we can obviously see if it is a zero or not.
return ISD::matchUnaryFpPredicate(
Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
}

bool SelectionDAG::isKnownNeverPosZeroFloat(SDValue Op) const {
assert(Op.getValueType().isFloatingPoint() && "Floating point type expected");

SDNodeFlags OpFlags = Op->getFlags();
if (OpFlags.hasNoPosZeros())
return true;

// If the value is a constant, we can obviously see if it is a zero or not.
return ISD::matchUnaryFpPredicate(
Op, [](ConstantFPSDNode *C) { return !C->isZero() || C->isNegative(); });
}

bool SelectionDAG::isKnownNeverNegZeroFloat(SDValue Op) const {
assert(Op.getValueType().isFloatingPoint() && "Floating point type expected");

SDNodeFlags OpFlags = Op->getFlags();
if (OpFlags.hasNoNegZeros())
return true;

// If the value is a constant, we can obviously see if it is a zero or not.
return ISD::matchUnaryFpPredicate(
Op, [](ConstantFPSDNode *C) { return !C->isZero() || !C->isNegative(); });
}

bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
Expand Down Expand Up @@ -7490,6 +7523,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N2.getOpcode() != ISD::DELETED_NODE &&
N3.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
SDNodeFlags NewFlags = Flags;
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA:
Expand Down Expand Up @@ -7535,6 +7569,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert((!VT.isVector() || VT.getVectorElementCount() ==
N1.getValueType().getVectorElementCount()) &&
"SETCC vector element counts must match!");
if (N1->getFlags().hasNoNaNs() && N2->getFlags().hasNoNaNs()) {
NewFlags.setNoQNaNs(true);
NewFlags.setNoSNaNs(true);
}
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
return V;
Expand All @@ -7548,6 +7586,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::SELECT:
case ISD::VSELECT:
if ((N1->getFlags().hasNoNaNs() && N2->getFlags().hasNoNaNs()) ||
N3->getFlags().hasNoNaNs()) {
NewFlags.setNoQNaNs(true);
NewFlags.setNoSNaNs(true);
}
if (SDValue V = simplifySelect(N1, N2, N3))
return V;
break;
Expand Down Expand Up @@ -7654,12 +7697,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
E->intersectFlagsWith(Flags);
E->intersectFlagsWith(NewFlags);
return SDValue(E, 0);
}

N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
N->setFlags(Flags);
N->setFlags(NewFlags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
Expand Down
37 changes: 35 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3708,8 +3708,24 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
bool Negate = false;

SDNodeFlags Flags;
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
SelectInst *NewI = dyn_cast<SelectInst>(cast<SelectInst>(I).clone());
if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
Flags.copyFMF(*FPOp);
if (Cond->getFlags().hasNoNaNs() ||
(LHSVal->getFlags().hasNoNaNs() && RHSVal->getFlags().hasNoNaNs())) {
FastMathFlags FMF = FPOp->getFastMathFlags();
FMF.setNoNaNs(true);
NewI->setFastMathFlags(FMF);
CmpInst *CmpCond = dyn_cast<CmpInst>(NewI->getCondition());
if (isa<FPMathOperator>(CmpCond)) {
FastMathFlags CondFMF = CmpCond->getFastMathFlags();
CondFMF.setNoNaNs(true);
CmpCond->setFastMathFlags(CondFMF);
}
Flags.setNoQNaNs(true);
Flags.setNoSNaNs(true);
}
}

Flags.setUnpredictable(
cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
Expand All @@ -3735,7 +3751,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
// -0.0 is less than +0.0.
const Value *LHS, *RHS;
auto SPR = matchSelectPattern(&I, LHS, RHS);
auto SPR = matchSelectPattern(NewI, LHS, RHS);
ISD::NodeType Opc = ISD::DELETED_NODE;
switch (SPR.Flavor) {
case SPF_UMAX: Opc = ISD::UMAX; break;
Expand Down Expand Up @@ -3798,6 +3814,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
BaseOps.clear();
}
}
NewI->deleteValue();

if (IsUnaryAbs) {
for (unsigned i = 0; i != NumValues; ++i) {
Expand Down Expand Up @@ -11775,6 +11792,22 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
AssertOp = ISD::AssertZext;
if (Arg.hasAttribute(Attribute::NoFPClass)) {
SDNodeFlags InValFlags = InVals[i]->getFlags();
bool NoSNaN = ((Arg.getNoFPClass() & llvm::fcSNan) == llvm::fcSNan);
bool NoQNaN = ((Arg.getNoFPClass() & llvm::fcQNan) == llvm::fcQNan);
InValFlags.setNoSNaNs(NoSNaN);
InValFlags.setNoQNaNs(NoQNaN);
bool NoPosZeros =
((Arg.getNoFPClass() & llvm::fcPosZero) == llvm::fcPosZero);
bool NoNegZeros =
((Arg.getNoFPClass() & llvm::fcNegZero) == llvm::fcNegZero);
InValFlags.setNoPosZeros(NoPosZeros);
InValFlags.setNoNegZeros(NoNegZeros);
InValFlags.setNoInfs((Arg.getNoFPClass() & llvm::fcInf) ==
llvm::fcInf);
InVals[i]->setFlags(InValFlags);
}

ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr, NewRoot,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8422,6 +8422,10 @@ TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
"Wrong opcode");

EVT VT = Node->getValueType(0);
if (VT.isVector() && isOperationLegal(Opcode, VT.getScalarType()))
return SDValue();

if (Node->getFlags().hasNoNaNs()) {
ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
SDValue Op1 = Node->getOperand(0);
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AMDGPU/known-never-snan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ define float @v_select_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%b.nnan.add = fadd nnan float %b, 1.0
Expand Down
28 changes: 7 additions & 21 deletions llvm/test/CodeGen/AMDGPU/reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -498,18 +498,11 @@ entry:
; XVI-NEXT: s_setpc_b64

; GFX9: s_waitcnt
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], v0, v1{{$}}
; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1

; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1

; VI-DAG: v_max_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
; VI-DAG: v_max_f16_sdwa [[MAX0:v[0-9]+]], v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], v0, v1
; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
entry:
Expand Down Expand Up @@ -537,19 +530,12 @@ entry:
; XVI-NEXT: s_setpc_b64

; GFX9: s_waitcnt
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], v0, v1{{$}}
; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1

; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1

; VI-DAG: v_min_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
; VI-DAG: v_min_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
; VI-DAG: v_min_f16_sdwa [[MIN0:v[0-9]+]], v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_min_f16_e32 [[MIN1:v[0-9]+]], v0, v1
; VI: v_min_f16_e32 v0, [[MIN1]], [[MIN0]]
define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
Expand Down
Loading
Loading