summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp57
1 files changed, 57 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 36d60dee0c2..d4d29caefea 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26350,6 +26350,54 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
}
+// We are looking for a shuffle where both sources are concatenated with undef
+// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
+// if we can express this as a single-source shuffle, that's preferable.
+static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX2() || !isa<ShuffleVectorSDNode>(N))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // We only care about shuffles of 128/256-bit vectors of 32/64-bit values.
+ if (!VT.is128BitVector() && !VT.is256BitVector())
+ return SDValue();
+
+ if (VT.getVectorElementType() != MVT::i32 &&
+ VT.getVectorElementType() != MVT::i64 &&
+ VT.getVectorElementType() != MVT::f32 &&
+ VT.getVectorElementType() != MVT::f64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Check that both sources are concats with undef.
+ if (N0.getOpcode() != ISD::CONCAT_VECTORS ||
+ N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+ N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() ||
+ !N1.getOperand(1).isUndef())
+ return SDValue();
+
+ // Construct the new shuffle mask. Elements from the first source retain their
+ // index, but elements from the second source no longer need to skip an undef.
+ SmallVector<int, 8> Mask;
+ int NumElts = VT.getVectorNumElements();
+ for (int i = 0; i < NumElts; ++i) {
+ int Elt = cast<ShuffleVectorSDNode>(N)->getMaskElt(i);
+ if (Elt < NumElts)
+ Mask.push_back(Elt);
+ else
+ Mask.push_back(Elt - NumElts / 2);
+ }
+
+ SDLoc DL(N);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),
+ N1.getOperand(0));
+ return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);
+}
+
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -26432,6 +26480,15 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
return LD;
+ // For AVX2, we sometimes want to combine
+ // (vector_shuffle <mask> (concat_vectors t1, undef)
+ // (concat_vectors t2, undef))
+ // Into:
+ // (vector_shuffle <mask> (concat_vectors t1, t2), undef)
+ // Since the latter can be efficiently lowered with VPERMD/VPERMQ
+ if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget))
+ return ShufConcat;
+
if (isTargetShuffle(N->getOpcode())) {
SDValue Op(N, 0);
if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget))
OpenPOWER on IntegriCloud