diff options
author | Craig Topper <craig.topper@intel.com> | 2017-09-15 18:11:13 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-09-15 18:11:13 +0000 |
commit | 7a183e27601d5241978c940f2381fe745d7bdbfb (patch) | |
tree | 7904a104e4c55c493e671dc0d9322faa8b6b33bc /llvm/lib/Target/X86/X86ISelLowering.cpp | |
parent | e05e2f8b3459d580c68451c184497fe31693bd05 (diff) | |
download | bcm5719-llvm-7a183e27601d5241978c940f2381fe745d7bdbfb.tar.gz bcm5719-llvm-7a183e27601d5241978c940f2381fe745d7bdbfb.zip |
[X86] Prefer VPERMQ over VPERM2F128 for any unary shuffle, not just the ones that can be done with a insertf128
The early out for AVX2 in lowerV2X128VectorShuffle is positioned in a weird spot below some shuffle mask equivalency checks.
But I think we want to allow VPERMQ for any unary shuffle.
Differential Revision: https://reviews.llvm.org/D37893
llvm-svn: 313373
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ff00d9e5e00..4df7621bee2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12151,6 +12151,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding. + if (Subtarget.hasAVX2() && V2.isUndef()) + return SDValue(); + SmallVector<int, 4> WidenedMask; if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); @@ -12174,9 +12178,6 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { - // With AVX2, use VPERMQ/VPERMPD to allow memory folding. - if (Subtarget.hasAVX2() && V2.isUndef()) - return SDValue(); // With AVX1, use vperm2f128 (below) to allow load folding. Otherwise, // this will likely become vinsertf128 which can't fold a 256-bit memop. |