summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-22 03:05:41 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-22 03:05:41 +0000
commit4052a576c05a9d11b7f7ac354db901275101003b (patch)
treee08f863ad685f9e13aa5d88520e15e85c20ab48a /llvm/lib
parentce84130f8562c8c990362502f03d04187a0be581 (diff)
downloadbcm5719-llvm-4052a576c05a9d11b7f7ac354db901275101003b.tar.gz
bcm5719-llvm-4052a576c05a9d11b7f7ac354db901275101003b.zip
AMDGPU: Custom lower f16 fdiv
llvm-svn: 290301
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
2 files changed, 22 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b9302582fa8..5411ccf7400 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -299,7 +299,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Custom);
// F16 - VOP3 Actions.
setOperationAction(ISD::FMA, MVT::f16, Legal);
@@ -3008,6 +3008,23 @@ static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
GlueChain.getValue(2));
}
+SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src0 = Op.getOperand(0);
+ SDValue Src1 = Op.getOperand(1);
+
+ SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
+ SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
+
+ SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
+ SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
+
+ SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
+ SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag);
+
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0);
+}
+
// Faster 2.5 ULP division that does not support denormals.
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
@@ -3201,6 +3218,9 @@ SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::f64)
return LowerFDIV64(Op, DAG);
+ if (VT == MVT::f16)
+ return LowerFDIV16(Op, DAG);
+
llvm_unreachable("Unexpected type for fdiv");
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index cb6d5364793..b4d87d9406f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -37,6 +37,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
OpenPOWER on IntegriCloud