summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-11-01 11:28:32 -0700
committerCraig Topper <craig.topper@intel.com>2019-11-01 13:06:03 -0700
commiteeeb18cd075ddf7a44c8571f9e17e4b1fcbc8aa4 (patch)
tree883be51979c37fdaa548ecd312e1f84402360b1c /llvm/lib
parentdeaf121b657323fde17dd862a13b05e8b7ee6954 (diff)
downloadbcm5719-llvm-eeeb18cd075ddf7a44c8571f9e17e4b1fcbc8aa4.tar.gz
bcm5719-llvm-eeeb18cd075ddf7a44c8571f9e17e4b1fcbc8aa4.zip
[X86] Change the behavior of canWidenShuffleElements used by lowerV2X128Shuffle to match the behavior in lowerVectorShuffle with regards to zeroable elements.
Previously we marked zeroable elements in a way that prevented the widening check from recognizing that it could widen. Now we only mark them zeroable if V2 is an all zeros vector. This matches what we do for widening elements in lowerVectorShuffle. Fixes PR43866.
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp33
1 files changed, 14 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c7a45f65e98..2862b7aa3b5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5324,15 +5324,18 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
static bool canWidenShuffleElements(ArrayRef<int> Mask,
const APInt &Zeroable,
+ bool V2IsZero,
SmallVectorImpl<int> &WidenedMask) {
- SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
- for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
- if (TargetMask[i] == SM_SentinelUndef)
- continue;
- if (Zeroable[i])
- TargetMask[i] = SM_SentinelZero;
+ // Create an alternative mask with info about zeroable elements.
+ // Here we do not set undef elements as zeroable.
+ SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
+ if (V2IsZero) {
+ assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
+ for (int i = 0, Size = Mask.size(); i != Size; ++i)
+ if (Mask[i] != SM_SentinelUndef && Zeroable[i])
+ ZeroableMask[i] = SM_SentinelZero;
}
- return canWidenShuffleElements(TargetMask, WidenedMask);
+ return canWidenShuffleElements(ZeroableMask, WidenedMask);
}
static bool canWidenShuffleElements(ArrayRef<int> Mask) {
@@ -14817,8 +14820,10 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
if (Subtarget.hasAVX2() && V2.isUndef())
return SDValue();
+ bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode());
+
SmallVector<int, 4> WidenedMask;
- if (!canWidenShuffleElements(Mask, Zeroable, WidenedMask))
+ if (!canWidenShuffleElements(Mask, Zeroable, V2IsZero, WidenedMask))
return SDValue();
bool IsLowZero = (Zeroable & 0x3) == 0x3;
@@ -17095,23 +17100,13 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode());
- // Create an alternative mask with info about zeroable elements.
- // Here we do not set undef elements as zeroable.
- SmallVector<int, 64> ZeroableMask(OrigMask.begin(), OrigMask.end());
- if (V2IsZero) {
- assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
- for (int i = 0; i != NumElements; ++i)
- if (OrigMask[i] != SM_SentinelUndef && Zeroable[i])
- ZeroableMask[i] = SM_SentinelZero;
- }
-
// Try to collapse shuffles into using a vector type with fewer elements but
// wider element types. We cap this to not form integers or floating point
// elements wider than 64 bits, but it might be interesting to form i128
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
- canWidenShuffleElements(ZeroableMask, WidenedMask)) {
+ canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) {
// Shuffle mask widening should not interfere with a broadcast opportunity
// by obfuscating the operands with bitcasts.
// TODO: Avoid lowering directly from this top-level function: make this
OpenPOWER on IntegriCloud