diff options
author | Sanjay Patel <spatel@rotateright.com> | 2014-09-30 20:28:48 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2014-09-30 20:28:48 +0000 |
commit | 8fde95cb2b547a98b55e13f005ca00b899c095fb (patch) | |
tree | ab61764af3435ad1b99aac48b5508cfb7b3bdbd9 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp | |
parent | 916cca728fbc5b8458b30a59e06d72a11f691f65 (diff) | |
download | bcm5719-llvm-8fde95cb2b547a98b55e13f005ca00b899c095fb.tar.gz bcm5719-llvm-8fde95cb2b547a98b55e13f005ca00b899c095fb.zip |
Split the estimate() interface into separate functions for each type. NFC.
It was hacky to use an opcode as a switch because it won't always match
(rsqrte != sqrte), and it looks like we'll need to add more special casing
per arch than I had hoped for. Eg, x86 will prefer a different NR estimate
implementation. ARM will want to use it's 'step' instructions. There also
don't appear to be any new estimate instructions in any arch in a long,
long time. Altivec vloge and vexpte may have been the first and last in
that field...
llvm-svn: 218698
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 48 |
1 files changed, 29 insertions, 19 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5750e2fbb65..e0396fdfe46 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7458,25 +7458,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// -SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { +SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { EVT VT = Operand.getValueType(); - SDValue RV; - if (Opcode == ISD::FSQRT) { - if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || - (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || - (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) - RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); - } else if (Opcode == ISD::FDIV) { - if ((VT == MVT::f32 && Subtarget.hasFRES()) || - (VT == MVT::f64 && Subtarget.hasFRE()) || - (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) - RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); - } - if (RV.getNode()) { + if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || + (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { // Convergence is quadratic, so we essentially double the number of digits // correct after every iteration. For both FRE and FRSQRTE, the minimum // architected relative accuracy is 2^-5. When hasRecipPrec(), this is @@ -7484,8 +7473,29 @@ SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand, RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) ++RefinementSteps; + return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } - return RV; + return SDValue(); +} + +SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { + EVT VT = Operand.getValueType(); + if ((VT == MVT::f32 && Subtarget.hasFRES()) || + (VT == MVT::f64 && Subtarget.hasFRE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { + // Convergence is quadratic, so we essentially double the number of digits + // correct after every iteration. For both FRE and FRSQRTE, the minimum + // architected relative accuracy is 2^-5. When hasRecipPrec(), this is + // 2^-14. IEEE float has 23 digits and double has 52 digits. + RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; + if (VT.getScalarType() == MVT::f64) + ++RefinementSteps; + return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); + } + return SDValue(); } static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, |