diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-02-01 16:57:18 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-02-01 16:57:18 +0000 |
commit | 657e5d8d41e7aae1cabfcb6e3200a9d4981a456e (patch) | |
tree | fd37bde2c22c124863fe42f5a53b42432a2ce57f /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | |
parent | 396fc876946d73bd047ab54e21d5acc1a6d3023c (diff) | |
download | bcm5719-llvm-657e5d8d41e7aae1cabfcb6e3200a9d4981a456e.tar.gz bcm5719-llvm-657e5d8d41e7aae1cabfcb6e3200a9d4981a456e.zip |
[DAGCombiner] filter out denorm inputs when calculating sqrt estimate (PR34994)
As shown in the example in PR34994:
https://bugs.llvm.org/show_bug.cgi?id=34994
...we can return a very wrong answer (inf instead of 0.0) for square root when
using a reciprocal square root estimate instruction.
Here, I've conditionalized the filtering out of denorms based on the function
having "denormal-fp-math"="ieee" in its attributes. The other options for this
attribute are 'preserve-sign' and 'positive-zero'.
So we don't generate this extra code by default with just '-ffast-math' (because
then there's no denormal attribute string at all), but it works if you specify
'-ffast-math -fdenormal-fp-math=ieee' from clang.
As noted in the review, there may be other problems in clang that affect the
results depending on platform (Linux x86 at least), but this should allow
creating the desired codegen.
Differential Revision: https://reviews.llvm.org/D42323
llvm-svn: 323981
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 |
1 files changed, 25 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0eb8d35cafa..f03deb1c66a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17454,19 +17454,34 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); if (!Reciprocal) { - // Unfortunately, Est is now NaN if the input was exactly 0.0. - // Select out this case and force the answer to 0.0. + // The estimate is now completely wrong if the input was exactly 0.0 or + // possibly a denormal. Force the answer to 0.0 for those cases. EVT VT = Op.getValueType(); SDLoc DL(Op); - - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - - Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, - ZeroCmp, FPZero, Est); - AddToWorklist(Est.getNode()); + ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; + const Function &F = DAG.getMachineFunction().getFunction(); + Attribute Denorms = F.getFnAttribute("denormal-fp-math"); + if (Denorms.getValueAsString().equals("ieee")) { + // fabs(X) < SmallestNormal ? 0.0 : Est + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); + AddToWorklist(Fabs.getNode()); + AddToWorklist(IsDenorm.getNode()); + AddToWorklist(Est.getNode()); + } else { + // X == 0.0 ? 0.0 : Est + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); + AddToWorklist(IsZero.getNode()); + AddToWorklist(Est.getNode()); + } } } return Est; |