summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-09-15 18:11:13 +0000
committerCraig Topper <craig.topper@intel.com>2017-09-15 18:11:13 +0000
commit7a183e27601d5241978c940f2381fe745d7bdbfb (patch)
tree7904a104e4c55c493e671dc0d9322faa8b6b33bc /llvm/lib/Target/X86/X86ISelLowering.cpp
parente05e2f8b3459d580c68451c184497fe31693bd05 (diff)
downloadbcm5719-llvm-7a183e27601d5241978c940f2381fe745d7bdbfb.tar.gz
bcm5719-llvm-7a183e27601d5241978c940f2381fe745d7bdbfb.zip
[X86] Prefer VPERMQ over VPERM2F128 for any unary shuffle, not just the ones that can be done with a insertf128
The early out for AVX2 in lowerV2X128VectorShuffle is positioned in a weird spot below some shuffle mask equivalency checks. But I think we want to allow VPERMQ for any unary shuffle. Differential Revision: https://reviews.llvm.org/D37893 llvm-svn: 313373
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
1 files changed, 4 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ff00d9e5e00..4df7621bee2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12151,6 +12151,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
+ if (Subtarget.hasAVX2() && V2.isUndef())
+ return SDValue();
+
SmallVector<int, 4> WidenedMask;
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
@@ -12174,9 +12178,6 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
// subvector.
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
- // With AVX2, use VPERMQ/VPERMPD to allow memory folding.
- if (Subtarget.hasAVX2() && V2.isUndef())
- return SDValue();
// With AVX1, use vperm2f128 (below) to allow load folding. Otherwise,
// this will likely become vinsertf128 which can't fold a 256-bit memop.
OpenPOWER on IntegriCloud