diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4f4f8c9b47b..3351f1321c8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12606,8 +12606,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, {0, 1, 2, 3, 0, 1, 2, 3}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 8, 9, 10, 11})) { - MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, DAG.getIntPtrConstant(0, DL)); SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, @@ -12616,8 +12615,39 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); } - // Try to lower to to vshuf64x2/vshuf32x4. assert(WidenedMask.size() == 4); + + // See if this is an insertion of the lower 128-bits of V2 into V1. + bool IsInsert = true; + int V2Index = -1; + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) + continue; + + // Make sure all V1 subvectors are in place. + if (WidenedMask[i] < 4) { + if (WidenedMask[i] != i) { + IsInsert = false; + break; + } + } else { + // Make sure we only have a single V2 index and its the lowest 128-bits. + if (V2Index >= 0 || WidenedMask[i] != 4) { + IsInsert = false; + break; + } + V2Index = i; + } + } + if (IsInsert && V2Index >= 0) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); + SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, + DAG.getIntPtrConstant(0, DL)); + return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); + } + + // Try to lower to to vshuf64x2/vshuf32x4. SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; unsigned PermMask = 0; // Insure elements came from the same Op. |

