summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-02 14:16:25 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-02 14:16:25 +0000
commit8971b2904ed494684e4029ea86a013ccb02c44ec (patch)
tree7754bae56211399522d5444ebb6cb9357d5424ba /llvm/lib/Target/X86/X86ISelLowering.cpp
parent4cb5613c386ebd7b70bc78925a3badf164159b8f (diff)
downloadbcm5719-llvm-8971b2904ed494684e4029ea86a013ccb02c44ec.tar.gz
bcm5719-llvm-8971b2904ed494684e4029ea86a013ccb02c44ec.zip
[X86][SSE] Attempt to combine 64-bit and 32-bit shuffles to unary shuffles before bit shifts
We are combining shuffles to bit shifts before unary permutes, which means we can't fold loads plus the destination register is destructive llvm-svn: 306978
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp53
1 files changed, 21 insertions, 32 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c72f195ab65..45407114541 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27156,6 +27156,26 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
+ // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
+ // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
+ // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
+ if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
+ !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) {
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
+ // Narrow the repeated mask to create 32-bit element permutes.
+ SmallVector<int, 4> WordMask = RepeatedMask;
+ if (MaskScalarSizeInBits == 64)
+ scaleShuffleMask(2, RepeatedMask, WordMask);
+
+ Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
+ ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
+ PermuteImm = getV4X86ShuffleImm(WordMask);
+ return true;
+ }
+ }
+
// Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask;
@@ -27201,38 +27221,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- // Ensure we don't contain any zero elements.
- if (ContainsZeros)
- return false;
-
- assert(llvm::all_of(Mask, [&](int M) {
- return SM_SentinelUndef <= M && M < (int)NumMaskElts;
- }) && "Expected unary shuffle");
-
- // We only support permutation of 32/64 bit elements after this.
- if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64)
- return false;
-
- // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
- // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
- if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX())
- return false;
-
- // We need a repeating shuffle mask for VPERMILPS/PSHUFD.
- SmallVector<int, 4> RepeatedMask;
- if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask))
- return false;
-
- // Narrow the repeated mask for 32-bit element permutes.
- SmallVector<int, 4> WordMask = RepeatedMask;
- if (MaskScalarSizeInBits == 64)
- scaleShuffleMask(2, RepeatedMask, WordMask);
-
- Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
- ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32);
- ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
- PermuteImm = getV4X86ShuffleImm(WordMask);
- return true;
+ return false;
}
// Attempt to match a combined unary shuffle mask against supported binary
OpenPOWER on IntegriCloud