summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-05-15 14:16:24 +0000
committerSanjay Patel <spatel@rotateright.com>2018-05-15 14:16:24 +0000
commit8652c53d291f26691e359c115d58574ddf742a0b (patch)
treef2726f60023f82d65065de28025da482c5b5f545 /llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
parent891ebcdbaad113b81511af0cf232908c484d3cc6 (diff)
downloadbcm5719-llvm-8652c53d291f26691e359c115d58574ddf742a0b.tar.gz
bcm5719-llvm-8652c53d291f26691e359c115d58574ddf742a0b.zip
[DAG] propagate FMF for all FPMathOperators
This is a simple hack based on what's proposed in D37686, but we can extend it if needed in follow-ups. It gets us most of the FMF functionality that we want without adding any state bits to the flags. It also intentionally leaves out non-FMF flags (nsw, etc) to minimize the patch. It should provide a superset of the functionality from D46563 - the extra tests show propagation and codegen diffs for fcmp, vecreduce, and FP libcalls. The PPC log2() test shows the limits of this most basic approach - we only applied 'afn' to the last node created for the call. AFAIK, there aren't any libcall optimizations based on the flags currently, so that shouldn't make any difference. Differential Revision: https://reviews.llvm.org/D46854 llvm-svn: 332358
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp34
1 files changed, 22 insertions, 12 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c79a885ece5..277f48df783 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1069,6 +1069,22 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
+ // Propagate the fast-math-flags of this IR instruction to the DAG node that
+ // maps to this instruction.
+ // TODO: We could handle all flags (nsw, etc) here.
+ // TODO: If an IR instruction maps to >1 node, only the final node will have
+ // flags set.
+ if (SDNode *Node = getNodeForIRValue(&I)) {
+ SDNodeFlags IncomingFlags;
+ IncomingFlags.copyFMF(*FPMO);
+ if (!Node->getFlags().isDefined())
+ Node->setFlags(IncomingFlags);
+ else
+ Node->intersectFlagsWith(IncomingFlags);
+ }
+ }
+
if (!isa<TerminatorInst>(&I) && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
@@ -2753,9 +2769,6 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
Flags.setVectorReduction(true);
LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
}
- if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
- Flags.copyFMF(*FPOp);
- }
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -2851,13 +2864,12 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
predicate = FCmpInst::Predicate(FC->getPredicate());
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- ISD::CondCode Condition = getFCmpCondCode(predicate);
- // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
- // FIXME: We should propagate the fast-math-flags to the DAG node itself for
- // further optimization, but currently FMF is only applicable to binary nodes.
- if (TM.Options.NoNaNsFPMath)
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ auto *FPMO = dyn_cast<FPMathOperator>(&I);
+ if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
+
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
@@ -8082,8 +8094,6 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
FastMathFlags FMF;
if (isa<FPMathOperator>(I))
FMF = I.getFastMathFlags();
- SDNodeFlags SDFlags;
- SDFlags.setNoNaNs(FMF.noNaNs());
switch (Intrinsic) {
case Intrinsic::experimental_vector_reduce_fadd:
@@ -8126,10 +8136,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
case Intrinsic::experimental_vector_reduce_fmax:
- Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
break;
case Intrinsic::experimental_vector_reduce_fmin:
- Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
OpenPOWER on IntegriCloud