diff options
| author | Eric Christopher <echristo@gmail.com> | 2016-06-07 20:27:12 +0000 |
|---|---|---|
| committer | Eric Christopher <echristo@gmail.com> | 2016-06-07 20:27:12 +0000 |
| commit | 538d09d0dd691b8e434a5f6c80d87f75e54e1a63 (patch) | |
| tree | 6250196798a9661bd4344ddba155e07f5351773e /llvm/lib | |
| parent | d7c717c4351ec5c822960e038ebc8d9d2bf63f1c (diff) | |
| download | bcm5719-llvm-538d09d0dd691b8e434a5f6c80d87f75e54e1a63.tar.gz bcm5719-llvm-538d09d0dd691b8e434a5f6c80d87f75e54e1a63.zip | |
Revert "Differential Revision: http://reviews.llvm.org/D20557"
Author: Wei Ding <wei.ding2@amd.com>
Date: Tue Jun 7 19:04:44 2016 +0000
Differential Revision: http://reviews.llvm.org/D20557
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272044
91177308-0d34-0410-b5e6-96231b3b80d8
as it was breaking the bots.
This reverts commit r272044.
llvm-svn: 272056
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 72 |
1 files changed, 17 insertions, 55 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ea01d9a2ed6..1fe896f4601 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -36,12 +36,6 @@ using namespace llvm; -// -amdgpu-fast-fdiv - Command line option to enable faster 2.5 ulp fdiv. -static cl::opt<bool> EnableAMDGPUFastFDIV( - "amdgpu-fast-fdiv", - cl::desc("Enable faster 2.5 ulp fdiv"), - cl::init(false)); - static unsigned findFirstFreeSGPR(CCState &CCInfo) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -1947,11 +1941,8 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const { } SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { - const SDNodeFlags *Flags = Op->getFlags(); - if (Flags->hasAllowReciprocal()) { - if (SDValue FastLowered = LowerFastFDIV(Op, DAG)) - return FastLowered; - } + if (SDValue FastLowered = LowerFastFDIV(Op, DAG)) + return FastLowered; // This uses v_rcp_f32 which does not handle denormals. Let this hit a // selection error for now rather than do something incorrect. @@ -1962,61 +1953,32 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); - // faster 2.5 ulp fdiv when using -amdgpu-fast-fdiv flag - if (EnableAMDGPUFastFDIV) { - SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS); + SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS); - const APFloat K0Val(BitsToFloat(0x6f800000)); - const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32); + const APFloat K0Val(BitsToFloat(0x6f800000)); + const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32); - const APFloat K1Val(BitsToFloat(0x2f800000)); - const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32); + const APFloat K1Val(BitsToFloat(0x2f800000)); + const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32); - const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32); + const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32); - EVT SetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32); - SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT); + SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT); - SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One); + SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One); - // TODO: Should this propagate fast-math-flags? + // TODO: Should this propagate fast-math-flags? - r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3); + r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3); - SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1); + SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0); - return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); - } - - // Generates more precise fpdiv32. - const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32); - - SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1); - - SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS); - SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS); - - SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled); - - SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled); - - SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, ApproxRcp, One); - SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp, ApproxRcp); - - SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, NumeratorScaled, Fma1); - - SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, NumeratorScaled); - SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul); - SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled); - - SDValue Scale = NumeratorScaled.getValue(1); - SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4, Fma1, Fma3, Scale); - - return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS); + return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); } SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { |

