summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp33
1 files changed, 31 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2ffd917337c..cf22461a92f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -15048,20 +15048,49 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);
+ bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode());
+
+ // Create an alternative mask with info about zeroable elements.
+ // Here we do not set undef elements as zeroable.
+ SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
+ if (V2IsZero) {
+ assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
+ for (int i = 0; i != NumElements; ++i)
+ if (Mask[i] != SM_SentinelUndef && Zeroable[i])
+ ZeroableMask[i] = SM_SentinelZero;
+ }
+
// Try to collapse shuffles into using a vector type with fewer elements but
// wider element types. We cap this to not form integers or floating point
// elements wider than 64 bits, but it might be interesting to form i128
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
- canWidenShuffleElements(Mask, WidenedMask)) {
+ canWidenShuffleElements(ZeroableMask, WidenedMask)) {
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
- MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
+ int NewNumElts = NumElements / 2;
+ MVT NewVT = MVT::getVectorVT(NewEltVT, NewNumElts);
// Make sure that the new vector type is legal. For example, v2f64 isn't
// legal on SSE1.
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
+ if (V2IsZero) {
+ // Modify the new Mask to take all zeros from the all-zero vector.
+ // Choose indices that are blend-friendly.
+ bool UsedZeroVector = false;
+ assert(find(WidenedMask, SM_SentinelZero) != WidenedMask.end() &&
+ "V2's non-undef elements are used?!");
+ for (int i = 0; i != NewNumElts; ++i)
+ if (WidenedMask[i] == SM_SentinelZero) {
+ WidenedMask[i] = i + NewNumElts;
+ UsedZeroVector = true;
+ }
+ // Ensure all elements of V2 are zero - isBuildVectorAllZeros permits
+ // some elements to be undef.
+ if (UsedZeroVector)
+ V2 = getZeroVector(NewVT, Subtarget, DAG, DL);
+ }
V1 = DAG.getBitcast(NewVT, V1);
V2 = DAG.getBitcast(NewVT, V2);
return DAG.getBitcast(
OpenPOWER on IntegriCloud