diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr40730.ll | 2 |
2 files changed, 12 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0ecead8b583..2fa50635a6a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14081,7 +14081,6 @@ static SDValue lowerShuffleAsLanePermuteAndPermute( int NumEltsPerLane = NumElts / NumLanes; SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef); - SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef); SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef); for (int i = 0; i != NumElts; ++i) { @@ -14096,10 +14095,20 @@ static SDValue lowerShuffleAsLanePermuteAndPermute( return SDValue(); SrcLaneMask[DstLane] = SrcLane; - LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane); PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane); } + // Make sure we set all elements of the lane mask, to avoid undef propagation. + SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef); + for (int DstLane = 0; DstLane != NumLanes; ++DstLane) { + int SrcLane = SrcLaneMask[DstLane]; + if (0 <= SrcLane) + for (int j = 0; j != NumEltsPerLane; ++j) { + LaneMask[(DstLane * NumEltsPerLane) + j] = + (SrcLane * NumEltsPerLane) + j; + } + } + // If we're only shuffling a single lowest lane and the rest are identity // then don't bother. // TODO - isShuffleMaskInputInPlace could be extended to something like this. diff --git a/llvm/test/CodeGen/X86/pr40730.ll b/llvm/test/CodeGen/X86/pr40730.ll index 679812c16f8..12b372dea33 100644 --- a/llvm/test/CodeGen/X86/pr40730.ll +++ b/llvm/test/CodeGen/X86/pr40730.ll @@ -19,7 +19,7 @@ define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) { ; CHECK: .LCPI1_0: ; CHECK-NEXT: .quad 60129542157 ; CHECK-NEXT: .quad 60129542157 -; CHECK-NEXT: .zero 8 +; CHECK-NEXT: .quad 68719476736 ; CHECK-NEXT: .quad 60129542157 define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) { |

