diff options
author | Sanjay Patel <spatel@rotateright.com> | 2014-10-09 21:26:35 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2014-10-09 21:26:35 +0000 |
commit | 3d497cd778261818eb76b5ad97f7f751cfebfcbb (patch) | |
tree | bfe0a173e16fda4bc5c4271a572710d6e44f6c38 | |
parent | 6d28da10e5d1ff6567871dd5929ec84b762fe208 (diff) | |
download | bcm5719-llvm-3d497cd778261818eb76b5ad97f7f751cfebfcbb.tar.gz bcm5719-llvm-3d497cd778261818eb76b5ad97f7f751cfebfcbb.zip |
Improve sqrt estimate algorithm (fast-math)
This patch changes the fast-math implementation for calculating sqrt(x) from:
y = 1 / (1 / sqrt(x))
to:
y = x * (1 / sqrt(x))
This has 2 benefits: less code / faster code and one less estimate instruction
that may lose precision.
The only target that will be affected (until http://reviews.llvm.org/D5658 is approved)
is PPC. The difference in codegen for PPC is 2 less flops for a single-precision sqrtf
or vector sqrtf and 4 less flops for a double-precision sqrt.
We also eliminate a constant load and extra register usage.
Differential Revision: http://reviews.llvm.org/D5682
llvm-svn: 219445
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/recipest.ll | 11 |
2 files changed, 18 insertions, 26 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 73f661d7c04..76442fbda93 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7088,26 +7088,25 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue DAGCombiner::visitFSQRT(SDNode *N) { if (DAG.getTarget().Options.UnsafeFPMath) { - // Compute this as 1/(1/sqrt(X)): the reciprocal of the reciprocal sqrt. + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { AddToWorklist(RV.getNode()); - RV = BuildReciprocalEstimate(RV); - if (RV.getNode()) { - // Unfortunately, RV is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - EVT VT = RV.getValueType(); - - SDValue Zero = DAG.getConstantFP(0.0, VT); - SDValue ZeroCmp = - DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), - N->getOperand(0), Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - AddToWorklist(RV.getNode()); + EVT VT = RV.getValueType(); + RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); + AddToWorklist(RV.getNode()); - RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, - SDLoc(N), VT, ZeroCmp, Zero, RV); - return RV; - } + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, VT); + SDValue ZeroCmp = + DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, + SDLoc(N), VT, ZeroCmp, Zero, RV); + return RV; } } return SDValue(); diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index de74c043ece..2f6a3eca488 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -197,11 +197,7 @@ define double @foo3(double %a) nounwind { ; CHECK-NEXT: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul -; CHECK-NEXT: fre -; CHECK-NEXT: fnmsub -; CHECK-NEXT: fmadd -; CHECK-NEXT: fnmsub -; CHECK-NEXT: fmadd +; CHECK-NEXT: fmul ; CHECK: blr ; CHECK-SAFE: @foo3 @@ -220,9 +216,7 @@ define float @goo3(float %a) nounwind { ; CHECK: fmuls ; CHECK-NEXT: fmadds ; CHECK-NEXT: fmuls -; CHECK-NEXT: fres -; CHECK-NEXT: fnmsubs -; CHECK-NEXT: fmadds +; CHECK-NEXT: fmuls ; CHECK: blr ; CHECK-SAFE: @goo3 @@ -236,7 +230,6 @@ define <4 x float> @hoo3(<4 x float> %a) nounwind { ; CHECK: @hoo3 ; CHECK: vrsqrtefp -; CHECK-DAG: vrefp ; CHECK-DAG: vcmpeqfp ; CHECK-SAFE: @hoo3 |