diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 |
1 files changed, 31 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2ffd917337c..cf22461a92f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15048,20 +15048,49 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, if (Zeroable.isAllOnesValue()) return getZeroVector(VT, Subtarget, DAG, DL); + bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode()); + + // Create an alternative mask with info about zeroable elements. + // Here we do not set undef elements as zeroable. + SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end()); + if (V2IsZero) { + assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!"); + for (int i = 0; i != NumElements; ++i) + if (Mask[i] != SM_SentinelUndef && Zeroable[i]) + ZeroableMask[i] = SM_SentinelZero; + } + // Try to collapse shuffles into using a vector type with fewer elements but // wider element types. We cap this to not form integers or floating point // elements wider than 64 bits, but it might be interesting to form i128 // integers to handle flipping the low and high halves of AVX 256-bit vectors. SmallVector<int, 16> WidenedMask; if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && - canWidenShuffleElements(Mask, WidenedMask)) { + canWidenShuffleElements(ZeroableMask, WidenedMask)) { MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); - MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2); + int NewNumElts = NumElements / 2; + MVT NewVT = MVT::getVectorVT(NewEltVT, NewNumElts); // Make sure that the new vector type is legal. For example, v2f64 isn't // legal on SSE1. if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { + if (V2IsZero) { + // Modify the new Mask to take all zeros from the all-zero vector. + // Choose indices that are blend-friendly. + bool UsedZeroVector = false; + assert(find(WidenedMask, SM_SentinelZero) != WidenedMask.end() && + "V2's non-undef elements are used?!"); + for (int i = 0; i != NewNumElts; ++i) + if (WidenedMask[i] == SM_SentinelZero) { + WidenedMask[i] = i + NewNumElts; + UsedZeroVector = true; + } + // Ensure all elements of V2 are zero - isBuildVectorAllZeros permits + // some elements to be undef. + if (UsedZeroVector) + V2 = getZeroVector(NewVT, Subtarget, DAG, DL); + } V1 = DAG.getBitcast(NewVT, V1); V2 = DAG.getBitcast(NewVT, V2); return DAG.getBitcast( |