summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-15 11:39:21 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-15 11:39:21 +0000
commit6ce08672fb4d3945db7a410704292392cf4042b2 (patch)
tree82e033f7fcfbc76c04cb10b5ca432e3896b5ee0e
parent0446b40b6369c68ecbe328016d92e55e546d4d60 (diff)
downloadbcm5719-llvm-6ce08672fb4d3945db7a410704292392cf4042b2.tar.gz
bcm5719-llvm-6ce08672fb4d3945db7a410704292392cf4042b2.zip
[X86][AVX] lowerShuffleAsLanePermuteAndPermute - fully populate the lane shuffle mask (PR40730)
As detailed on PR40730, we are not correctly filling in the lane shuffle mask (D53148/rL344446) - we fill in for the correct src lane but don't add it to the correct mask element, so any reference to the correct element is likely to see an UNDEF mask index. This allows constant folding to propagate UNDEFs prior to the lane mask being (correctly) lowered to vperm2f128. This patch fixes the issue by fully populating the lane shuffle mask - this is more than is necessary (if we only filled in the required mask elements we might be able to match other shuffle instructions - broadcasts etc.), but its the most cautious approach as this needs to be cherrypicked into the 8.0.0 release branch. Differential Revision: https://reviews.llvm.org/D58237 llvm-svn: 354117
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/test/CodeGen/X86/pr40730.ll2
2 files changed, 12 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0ecead8b583..2fa50635a6a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14081,7 +14081,6 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
int NumEltsPerLane = NumElts / NumLanes;
SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
- SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
for (int i = 0; i != NumElts; ++i) {
@@ -14096,10 +14095,20 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
return SDValue();
SrcLaneMask[DstLane] = SrcLane;
- LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
}
+ // Make sure we set all elements of the lane mask, to avoid undef propagation.
+ SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+ for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
+ int SrcLane = SrcLaneMask[DstLane];
+ if (0 <= SrcLane)
+ for (int j = 0; j != NumEltsPerLane; ++j) {
+ LaneMask[(DstLane * NumEltsPerLane) + j] =
+ (SrcLane * NumEltsPerLane) + j;
+ }
+ }
+
// If we're only shuffling a single lowest lane and the rest are identity
// then don't bother.
// TODO - isShuffleMaskInputInPlace could be extended to something like this.
diff --git a/llvm/test/CodeGen/X86/pr40730.ll b/llvm/test/CodeGen/X86/pr40730.ll
index 679812c16f8..12b372dea33 100644
--- a/llvm/test/CodeGen/X86/pr40730.ll
+++ b/llvm/test/CodeGen/X86/pr40730.ll
@@ -19,7 +19,7 @@ define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
; CHECK: .LCPI1_0:
; CHECK-NEXT: .quad 60129542157
; CHECK-NEXT: .quad 60129542157
-; CHECK-NEXT: .zero 8
+; CHECK-NEXT: .quad 68719476736
; CHECK-NEXT: .quad 60129542157
define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
OpenPOWER on IntegriCloud