diff options
author | Sanjay Patel <spatel@rotateright.com> | 2014-10-24 17:02:16 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2014-10-24 17:02:16 +0000 |
commit | 957efc23bb87d341a1b478d87a48bb888c2d4068 (patch) | |
tree | 48ae584987b7970cb90899c03590938f4d622799 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp | |
parent | 5e3a421bfcb891fc7821daa501e30c113fb1bf16 (diff) | |
download | bcm5719-llvm-957efc23bb87d341a1b478d87a48bb888c2d4068.tar.gz bcm5719-llvm-957efc23bb87d341a1b478d87a48bb888c2d4068.zip |
Use rsqrt (X86) to speed up reciprocal square root calcs
This is a first step for generating SSE rsqrt instructions for
reciprocal square root calcs when fast-math is allowed.
For now, be conservative and only enable this for AMD btver2
where performance improves significantly - for example, 29%
on llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c
(if we convert the data type to single-precision float).
This patch adds a two constant version of the Newton-Raphson
refinement algorithm to DAGCombiner that can be selected by any target
via a parameter returned by getRsqrtEstimate()..
See PR20900 for more details:
http://llvm.org/bugs/show_bug.cgi?id=20900
Differential Revision: http://reviews.llvm.org/D5658
llvm-svn: 220570
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ef357f4b7f2..72d3a59192b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7466,7 +7466,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { + unsigned &RefinementSteps, + bool &UseOneConstNR) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || @@ -7479,6 +7480,7 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) ++RefinementSteps; + UseOneConstNR = true; return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); |