summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-02-18 16:46:12 +0000
committerSanjay Patel <spatel@rotateright.com>2019-02-18 16:46:12 +0000
commitfff628274d462c099c17cbb20fa09beb1b8105f4 (patch)
treedefa7765ce3a2d12867ebe7b318ad6f7f5951d13 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent9d800a135a74a085c8fb7647ba6ef8b9daac202a (diff)
downloadbcm5719-llvm-fff628274d462c099c17cbb20fa09beb1b8105f4.tar.gz
bcm5719-llvm-fff628274d462c099c17cbb20fa09beb1b8105f4.zip
[x86] split more v8f32/v8i32 shuffles in lowering
Similar to D57867 - this is a small patch with lots of test diffs. With half-vector-width narrowing potential, using an extract + 128-bit vshufps is a win because it replaces a 256-bit shuffle with a 128-bit shufle. This seems like it should be a win even for targets with 'fast-variable-shuffle', but we are intentionally deferring that to an independent change to make sure that is true. Differential Revision: https://reviews.llvm.org/D58181 llvm-svn: 354279
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
1 files changed, 5 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e033c211943..df0009b3fd7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14605,10 +14605,11 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
if (NumUpperHalves == 1) {
// AVX2 has efficient 32/64-bit element cross-lane shuffles.
if (Subtarget.hasAVX2()) {
- // extract128 + vunpckhps, is better than vblend + vpermps.
- // TODO: Refine to account for unary shuffle, splat, and other masks?
- if (EltWidth == 32 && NumLowerHalves &&
- HalfVT.is128BitVector() && !is128BitUnpackShuffleMask(HalfMask))
+ // extract128 + vunpckhps/vshufps, is better than vblend + vpermps.
+ if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() &&
+ !is128BitUnpackShuffleMask(HalfMask) &&
+ (!isSingleSHUFPSMask(HalfMask) ||
+ Subtarget.hasFastVariableShuffle()))
return SDValue();
// If this is a unary shuffle (assume that the 2nd operand is
// canonicalized to undef), then we can use vpermpd. Otherwise, we
OpenPOWER on IntegriCloud