summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2014-10-09 21:26:35 +0000
committerSanjay Patel <spatel@rotateright.com>2014-10-09 21:26:35 +0000
commit3d497cd778261818eb76b5ad97f7f751cfebfcbb (patch)
treebfe0a173e16fda4bc5c4271a572710d6e44f6c38 /llvm/lib/CodeGen
parent6d28da10e5d1ff6567871dd5929ec84b762fe208 (diff)
downloadbcm5719-llvm-3d497cd778261818eb76b5ad97f7f751cfebfcbb.tar.gz
bcm5719-llvm-3d497cd778261818eb76b5ad97f7f751cfebfcbb.zip
Improve sqrt estimate algorithm (fast-math)
This patch changes the fast-math implementation for calculating sqrt(x) from: y = 1 / (1 / sqrt(x)) to: y = x * (1 / sqrt(x)) This has 2 benefits: less code / faster code and one less estimate instruction that may lose precision. The only target that will be affected (until http://reviews.llvm.org/D5658 is approved) is PPC. The difference in codegen for PPC is 2 less flops for a single-precision sqrtf or vector sqrtf and 4 less flops for a double-precision sqrt. We also eliminate a constant load and extra register usage. Differential Revision: http://reviews.llvm.org/D5682 llvm-svn: 219445
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp33
1 files changed, 16 insertions, 17 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 73f661d7c04..76442fbda93 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7088,26 +7088,25 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
if (DAG.getTarget().Options.UnsafeFPMath) {
- // Compute this as 1/(1/sqrt(X)): the reciprocal of the reciprocal sqrt.
+ // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
AddToWorklist(RV.getNode());
- RV = BuildReciprocalEstimate(RV);
- if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0.
- // Select out this case and force the answer to 0.
- EVT VT = RV.getValueType();
-
- SDValue Zero = DAG.getConstantFP(0.0, VT);
- SDValue ZeroCmp =
- DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
- N->getOperand(0), Zero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
- AddToWorklist(RV.getNode());
+ EVT VT = RV.getValueType();
+ RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+ AddToWorklist(RV.getNode());
- RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
- SDLoc(N), VT, ZeroCmp, Zero, RV);
- return RV;
- }
+ // Unfortunately, RV is now NaN if the input was exactly 0.
+ // Select out this case and force the answer to 0.
+ SDValue Zero = DAG.getConstantFP(0.0, VT);
+ SDValue ZeroCmp =
+ DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+ AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
+ SDLoc(N), VT, ZeroCmp, Zero, RV);
+ return RV;
}
}
return SDValue();
OpenPOWER on IntegriCloud