diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 03:05:41 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 03:05:41 +0000 |
| commit | 4052a576c05a9d11b7f7ac354db901275101003b (patch) | |
| tree | e08f863ad685f9e13aa5d88520e15e85c20ab48a /llvm/lib | |
| parent | ce84130f8562c8c990362502f03d04187a0be581 (diff) | |
| download | bcm5719-llvm-4052a576c05a9d11b7f7ac354db901275101003b.tar.gz bcm5719-llvm-4052a576c05a9d11b7f7ac354db901275101003b.zip | |
AMDGPU: Custom lower f16 fdiv
llvm-svn: 290301
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 |
2 files changed, 22 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b9302582fa8..5411ccf7400 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -299,7 +299,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); setOperationAction(ISD::FMINNUM, MVT::f16, Legal); - setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Custom); // F16 - VOP3 Actions. setOperationAction(ISD::FMA, MVT::f16, Legal); @@ -3008,6 +3008,23 @@ static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, GlueChain.getValue(2)); } +SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src0 = Op.getOperand(0); + SDValue Src1 = Op.getOperand(1); + + SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0); + SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1); + + SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1); + SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1); + + SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32); + SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag); + + return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0); +} + // Faster 2.5 ULP division that does not support denormals. SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); @@ -3201,6 +3218,9 @@ SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const { if (VT == MVT::f64) return LowerFDIV64(Op, DAG); + if (VT == MVT::f16) + return LowerFDIV16(Op, DAG); + llvm_unreachable("Unexpected type for fdiv"); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index cb6d5364793..b4d87d9406f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -37,6 +37,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; |

