summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/test/CodeGen/X86/pr40730.ll2
2 files changed, 12 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0ecead8b583..2fa50635a6a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14081,7 +14081,6 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
int NumEltsPerLane = NumElts / NumLanes;
SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
- SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
for (int i = 0; i != NumElts; ++i) {
@@ -14096,10 +14095,20 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
return SDValue();
SrcLaneMask[DstLane] = SrcLane;
- LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
}
+ // Make sure we set all elements of the lane mask, to avoid undef propagation.
+ SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+ for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
+ int SrcLane = SrcLaneMask[DstLane];
+ if (0 <= SrcLane)
+ for (int j = 0; j != NumEltsPerLane; ++j) {
+ LaneMask[(DstLane * NumEltsPerLane) + j] =
+ (SrcLane * NumEltsPerLane) + j;
+ }
+ }
+
// If we're only shuffling a single lowest lane and the rest are identity
// then don't bother.
// TODO - isShuffleMaskInputInPlace could be extended to something like this.
diff --git a/llvm/test/CodeGen/X86/pr40730.ll b/llvm/test/CodeGen/X86/pr40730.ll
index 679812c16f8..12b372dea33 100644
--- a/llvm/test/CodeGen/X86/pr40730.ll
+++ b/llvm/test/CodeGen/X86/pr40730.ll
@@ -19,7 +19,7 @@ define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
; CHECK: .LCPI1_0:
; CHECK-NEXT: .quad 60129542157
; CHECK-NEXT: .quad 60129542157
-; CHECK-NEXT: .zero 8
+; CHECK-NEXT: .quad 68719476736
; CHECK-NEXT: .quad 60129542157
define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
OpenPOWER on IntegriCloud