summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorDavid Stuttard <david.stuttard@amd.com>2019-07-29 08:15:10 +0000
committerDavid Stuttard <david.stuttard@amd.com>2019-07-29 08:15:10 +0000
commit20235ef3e75191f43de330d67ffb74306c8997e0 (patch)
tree1542c13e7b01aab0ec9ae86363341e2726b79e6f /llvm/lib
parent85380601033396dfa2f514733d61c0d31c54cfca (diff)
downloadbcm5719-llvm-20235ef3e75191f43de330d67ffb74306c8997e0.tar.gz
bcm5719-llvm-20235ef3e75191f43de330d67ffb74306c8997e0.zip
[AMDGPU] Enable v4f16 and above for v_pk_fma instructions
Summary: If isel is presented with <2 x half> vectors then it will correctly select v_pk_fma style instructions. If isel is presented with e.g. <4 x half> vectors it will scalarize, unlike for other instruction types (such as fadd, fmul etc.) Added extra support to enable this. Updated one of the tests to include a test for this (as well as extending the test to GFX9) Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65325 Change-Id: I50a4577a3f8223fb53992af3b7d26121f65b71ee llvm-svn: 367206
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp27
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
2 files changed, 28 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9a323c649ab..2a69b2ebb60 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -653,6 +653,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::v4f16, Custom);
setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f16, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
@@ -3971,6 +3972,30 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
}
+SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+ assert(VT == MVT::v4i16 || VT == MVT::v4f16);
+
+ SDValue Lo0, Hi0;
+ std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
+ SDValue Lo1, Hi1;
+ std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
+ SDValue Lo2, Hi2;
+ std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);
+
+ SDLoc SL(Op);
+
+ SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1, Lo2,
+ Op->getFlags());
+ SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1, Hi2,
+ Op->getFlags());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
+}
+
+
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
@@ -4023,6 +4048,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMINNUM:
case ISD::FMAXNUM:
return lowerFMINNUM_FMAXNUM(Op, DAG);
+ case ISD::FMA:
+ return splitTernaryVectorOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 27c6445d60a..410460226a2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -331,6 +331,7 @@ public:
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
OpenPOWER on IntegriCloud