diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 37 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 3 |
3 files changed, 31 insertions, 10 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index b107ed4238a..b3429b9748c 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -936,16 +936,27 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res); } -static SDValue performUnsafeFDIV(SDValue Op, SelectionDAG &DAG) { +// Catch division cases where we can use shortcuts with rcp and rsq +// instructions. +SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); EVT VT = Op.getValueType(); + bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) { - if (CLHS->isExactlyValue(1.0)) { + if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals())) && + CLHS->isExactlyValue(1.0)) { + // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to + // the CI documentation has a worst case error of 1 ulp. + // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to + // use it as long as we aren't trying to use denormals. // 1.0 / sqrt(x) -> rsq(x) + // + // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP + // error seems really high at 2^29 ULP. if (RHS.getOpcode() == ISD::FSQRT) return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0)); @@ -954,15 +965,25 @@ static SDValue performUnsafeFDIV(SDValue Op, SelectionDAG &DAG) { } } - // Turn into multiply by the reciprocal - // x / y -> x * (1.0 / y) - SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip); + if (Unsafe) { + // Turn into multiply by the reciprocal. + // x / y -> x * (1.0 / y) + SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip); + } + + return SDValue(); } SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { - if (DAG.getTarget().Options.UnsafeFPMath) - return performUnsafeFDIV(Op, DAG); + SDValue FastLowered = LowerFastFDIV(Op, DAG); + if (FastLowered.getNode()) + return FastLowered; + + // This uses v_rcp_f32 which does not handle denormals. Let this hit a + // selection error for now rather than do something incorrect. + if (Subtarget->hasFP32Denormals()) + return SDValue(); SDLoc SL(Op); SDValue LHS = Op.getOperand(0); diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index aa09d2c9f8e..9e9a0b05f53 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -27,6 +27,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 1d2dd2f63a6..1a9be6c6157 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1800,10 +1800,9 @@ def : Pat < // VOP1 Patterns //===----------------------------------------------------------------------===// +let Predicates = [UnsafeFPMath] in { def : RcpPat<V_RCP_F64_e32, f64>; defm : RsqPat<V_RSQ_F64_e32, f64>; - -let Predicates = [UnsafeFPMath] in { defm : RsqPat<V_RSQ_F32_e32, f32>; } |