diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-07-15 20:18:31 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-07-15 20:18:31 +0000 |
| commit | e9fa3b8e6bb9a8d6d6674058c18b264cce83b026 (patch) | |
| tree | 484738461c1e4b03b182af1987892e309eb56a67 /llvm/lib/Target | |
| parent | 1d077749ea874e9bf71ce96f5c3426e7d549118b (diff) | |
| download | bcm5719-llvm-e9fa3b8e6bb9a8d6d6674058c18b264cce83b026.tar.gz bcm5719-llvm-e9fa3b8e6bb9a8d6d6674058c18b264cce83b026.zip | |
R600/SI: Implement less wrong f32 fdiv
Assuming single precision denormals and accurate sqrt/div are not
reported, this passes the OpenCL conformance test.
llvm-svn: 213089
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 76 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 11 |
3 files changed, 83 insertions, 7 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index a7db2a9a3d9..56e760cf517 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -221,6 +221,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::FNEG, MVT::f64, Expand); setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FDIV, MVT::f32, Custom); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); @@ -633,6 +635,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::FDIV: return LowerFDIV(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_WO_CHAIN: { @@ -930,6 +933,79 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res); } +static SDValue performUnsafeFDIV(SDValue Op, SelectionDAG &DAG) { + SDLoc SL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + EVT VT = Op.getValueType(); + + if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) { + if (CLHS->isExactlyValue(1.0)) { + + // 1.0 / sqrt(x) -> rsq(x) + if (RHS.getOpcode() == ISD::FSQRT) + return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0)); + + // 1.0 / x -> rcp(x) + return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); + } + } + + // Turn into multiply by the reciprocal + // x / y -> x * (1.0 / y) + SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip); +} + +SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { + if (DAG.getTarget().Options.UnsafeFPMath) + return performUnsafeFDIV(Op, DAG); + + SDLoc SL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS); + + const APFloat K0Val(BitsToFloat(0x6f800000)); + const SDValue K0 = DAG.getConstantFP(K0Val, MVT::f32); + + const APFloat K1Val(BitsToFloat(0x2f800000)); + const SDValue K1 = DAG.getConstantFP(K1Val, MVT::f32); + + const SDValue One = DAG.getTargetConstantFP(1.0, MVT::f32); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32); + + SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT); + + SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One); + + r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3); + + SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1); + + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0); + + return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); +} + +SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { + return SDValue(); +} + +SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + + if (VT == MVT::f32) + return LowerFDIV32(Op, DAG); + + if (VT == MVT::f64) + return LowerFDIV64(Op, DAG); + + llvm_unreachable("Unexpected type for fdiv"); +} + SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); StoreSDNode *Store = cast<StoreSDNode>(Op); diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index e25323ae87d..aa09d2c9f8e 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -27,6 +27,9 @@ class SITargetLowering : public AMDGPUTargetLowering { SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index e72203321a5..1d2dd2f63a6 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1800,11 +1800,13 @@ def : Pat < // VOP1 Patterns //===----------------------------------------------------------------------===// -def : RcpPat<V_RCP_F32_e32, f32>; def : RcpPat<V_RCP_F64_e32, f64>; -defm : RsqPat<V_RSQ_F32_e32, f32>; defm : RsqPat<V_RSQ_F64_e32, f64>; +let Predicates = [UnsafeFPMath] in { +defm : RsqPat<V_RSQ_F32_e32, f32>; +} + //===----------------------------------------------------------------------===// // VOP2 Patterns //===----------------------------------------------------------------------===// @@ -2337,11 +2339,6 @@ def : Pat < >; def : Pat< - (fdiv f32:$src0, f32:$src1), - (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1)) ->; - -def : Pat< (fdiv f64:$src0, f64:$src1), (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0)) >; |

