diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 36d60dee0c2..d4d29caefea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26350,6 +26350,54 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget, return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS); } +// We are looking for a shuffle where both sources are concatenated with undef +// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so +// if we can express this as a single-source shuffle, that's preferable. +static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + if (!Subtarget.hasAVX2() || !isa<ShuffleVectorSDNode>(N)) + return SDValue(); + + EVT VT = N->getValueType(0); + + // We only care about shuffles of 128/256-bit vectors of 32/64-bit values. + if (!VT.is128BitVector() && !VT.is256BitVector()) + return SDValue(); + + if (VT.getVectorElementType() != MVT::i32 && + VT.getVectorElementType() != MVT::i64 && + VT.getVectorElementType() != MVT::f32 && + VT.getVectorElementType() != MVT::f64) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Check that both sources are concats with undef. + if (N0.getOpcode() != ISD::CONCAT_VECTORS || + N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || + N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() || + !N1.getOperand(1).isUndef()) + return SDValue(); + + // Construct the new shuffle mask. Elements from the first source retain their + // index, but elements from the second source no longer need to skip an undef. + SmallVector<int, 8> Mask; + int NumElts = VT.getVectorNumElements(); + for (int i = 0; i < NumElts; ++i) { + int Elt = cast<ShuffleVectorSDNode>(N)->getMaskElt(i); + if (Elt < NumElts) + Mask.push_back(Elt); + else + Mask.push_back(Elt - NumElts / 2); + } + + SDLoc DL(N); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0), + N1.getOperand(0)); + return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask); +} + static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -26432,6 +26480,15 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) return LD; + // For AVX2, we sometimes want to combine + // (vector_shuffle <mask> (concat_vectors t1, undef) + // (concat_vectors t2, undef)) + // Into: + // (vector_shuffle <mask> (concat_vectors t1, t2), undef) + // Since the latter can be efficiently lowered with VPERMD/VPERMQ + if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget)) + return ShufConcat; + if (isTargetShuffle(N->getOpcode())) { SDValue Op(N, 0); if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget)) |