summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-02-01 16:57:18 +0000
committerSanjay Patel <spatel@rotateright.com>2018-02-01 16:57:18 +0000
commit657e5d8d41e7aae1cabfcb6e3200a9d4981a456e (patch)
treefd37bde2c22c124863fe42f5a53b42432a2ce57f /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
parent396fc876946d73bd047ab54e21d5acc1a6d3023c (diff)
downloadbcm5719-llvm-657e5d8d41e7aae1cabfcb6e3200a9d4981a456e.tar.gz
bcm5719-llvm-657e5d8d41e7aae1cabfcb6e3200a9d4981a456e.zip
[DAGCombiner] filter out denorm inputs when calculating sqrt estimate (PR34994)
As shown in the example in PR34994: https://bugs.llvm.org/show_bug.cgi?id=34994 ...we can return a very wrong answer (inf instead of 0.0) for square root when using a reciprocal square root estimate instruction. Here, I've conditionalized the filtering out of denorms based on the function having "denormal-fp-math"="ieee" in its attributes. The other options for this attribute are 'preserve-sign' and 'positive-zero'. So we don't generate this extra code by default with just '-ffast-math' (because then there's no denormal attribute string at all), but it works if you specify '-ffast-math -fdenormal-fp-math=ieee' from clang. As noted in the review, there may be other problems in clang that affect the results depending on platform (Linux x86 at least), but this should allow creating the desired codegen. Differential Revision: https://reviews.llvm.org/D42323 llvm-svn: 323981
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp35
1 files changed, 25 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0eb8d35cafa..f03deb1c66a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17454,19 +17454,34 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
if (!Reciprocal) {
- // Unfortunately, Est is now NaN if the input was exactly 0.0.
- // Select out this case and force the answer to 0.0.
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 for those cases.
EVT VT = Op.getValueType();
SDLoc DL(Op);
-
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
EVT CCVT = getSetCCResultType(VT);
- SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
-
- Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
- ZeroCmp, FPZero, Est);
- AddToWorklist(Est.getNode());
+ ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Attribute Denorms = F.getFnAttribute("denormal-fp-math");
+ if (Denorms.getValueAsString().equals("ieee")) {
+ // fabs(X) < SmallestNormal ? 0.0 : Est
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
+ AddToWorklist(Fabs.getNode());
+ AddToWorklist(IsDenorm.getNode());
+ AddToWorklist(Est.getNode());
+ } else {
+ // X == 0.0 ? 0.0 : Est
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
+ AddToWorklist(IsZero.getNode());
+ AddToWorklist(Est.getNode());
+ }
}
}
return Est;
OpenPOWER on IntegriCloud