summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp62
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
2 files changed, 63 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5e6325aa93e..a0e4c8a587f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -630,6 +630,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+
setOperationAction(ISD::SHL, MVT::v4i16, Custom);
setOperationAction(ISD::SRA, MVT::v4i16, Custom);
setOperationAction(ISD::SRL, MVT::v4i16, Custom);
@@ -3957,6 +3960,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::FP_ROUND:
@@ -4740,6 +4745,63 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
}
+static bool elementPairIsContiguous(ArrayRef<int> Mask, int Elt) {
+ assert(Elt % 2 == 0);
+ return Mask[Elt + 1] == Mask[Elt] + 1 && (Mask[Elt] % 2 == 0);
+}
+
+SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ EVT ResultVT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+
+ EVT PackVT = ResultVT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ EVT EltVT = PackVT.getVectorElementType();
+ int SrcNumElts = Op.getOperand(0).getValueType().getVectorNumElements();
+
+ // vector_shuffle <0,1,6,7> lhs, rhs
+ // -> concat_vectors (extract_subvector lhs, 0), (extract_subvector rhs, 2)
+ //
+ // vector_shuffle <6,7,2,3> lhs, rhs
+ // -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 2)
+ //
+ // vector_shuffle <6,7,0,1> lhs, rhs
+ // -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 0)
+
+ // Avoid scalarizing when both halves are reading from consecutive elements.
+ SmallVector<SDValue, 4> Pieces;
+ for (int I = 0, N = ResultVT.getVectorNumElements(); I != N; I += 2) {
+ if (elementPairIsContiguous(SVN->getMask(), I)) {
+ const int Idx = SVN->getMaskElt(I);
+ int VecIdx = Idx < SrcNumElts ? 0 : 1;
+ int EltIdx = Idx < SrcNumElts ? Idx : Idx - SrcNumElts;
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL,
+ PackVT, SVN->getOperand(VecIdx),
+ DAG.getConstant(EltIdx, SL, MVT::i32));
+ Pieces.push_back(SubVec);
+ } else {
+ const int Idx0 = SVN->getMaskElt(I);
+ const int Idx1 = SVN->getMaskElt(I + 1);
+ int VecIdx0 = Idx0 < SrcNumElts ? 0 : 1;
+ int VecIdx1 = Idx1 < SrcNumElts ? 0 : 1;
+ int EltIdx0 = Idx0 < SrcNumElts ? Idx0 : Idx0 - SrcNumElts;
+ int EltIdx1 = Idx1 < SrcNumElts ? Idx1 : Idx1 - SrcNumElts;
+
+ SDValue Vec0 = SVN->getOperand(VecIdx0);
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec0, DAG.getConstant(EltIdx0, SL, MVT::i32));
+
+ SDValue Vec1 = SVN->getOperand(VecIdx1);
+ SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec1, DAG.getConstant(EltIdx1, SL, MVT::i32));
+ Pieces.push_back(DAG.getBuildVector(PackVT, SL, { Elt0, Elt1 }));
+ }
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces);
+}
+
SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index b3762dc3483..909ee8f7987 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -123,6 +123,7 @@ private:
SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
OpenPOWER on IntegriCloud