diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-02 22:25:04 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-02 22:25:04 +0000 |
| commit | 979902b3ff9009b5909ae245baf43d15f470e22b (patch) | |
| tree | 3f3e3c755714fbf029214143203498971b78aad7 /llvm/lib | |
| parent | 47509f618507faab74d5983d442937738607aa29 (diff) | |
| download | bcm5719-llvm-979902b3ff9009b5909ae245baf43d15f470e22b.tar.gz bcm5719-llvm-979902b3ff9009b5909ae245baf43d15f470e22b.zip | |
AMDGPU: fdiv -1, x -> rcp -x
llvm-svn: 277535
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 41 |
1 files changed, 25 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6f56920cbf8..25ba21edb72 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2464,22 +2464,31 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) { - if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals())) && - CLHS->isExactlyValue(1.0)) { - // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to - // the CI documentation has a worst case error of 1 ulp. - // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to - // use it as long as we aren't trying to use denormals. - - // 1.0 / sqrt(x) -> rsq(x) - // - // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP - // error seems really high at 2^29 ULP. - if (RHS.getOpcode() == ISD::FSQRT) - return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0)); - - // 1.0 / x -> rcp(x) - return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); + if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals()))) { + + if (CLHS->isExactlyValue(1.0)) { + // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to + // the CI documentation has a worst case error of 1 ulp. + // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to + // use it as long as we aren't trying to use denormals. + + // 1.0 / sqrt(x) -> rsq(x) + // + // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP + // error seems really high at 2^29 ULP. + if (RHS.getOpcode() == ISD::FSQRT) + return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0)); + + // 1.0 / x -> rcp(x) + return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); + } + + // Same as for 1.0, but expand the sign out of the constant. + if (CLHS->isExactlyValue(-1.0)) { + // -1.0 / x -> rcp (fneg x) + SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + return DAG.getNode(AMDGPUISD::RCP, SL, VT, FNegRHS); + } } } |

