summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll12
2 files changed, 18 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 49d30c7cbae..b7446002922 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9850,11 +9850,16 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
MVT VT = MVT::getVectorVT(EltVT, Mask.size());
+ // We can't assume a canonical shuffle mask, so try the commuted version too.
+ SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommutedMask);
+
// Match any of unary/binary or low/high.
for (unsigned i = 0; i != 4; ++i) {
SmallVector<int, 16> UnpackMask;
createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
- if (isTargetShuffleEquivalent(Mask, UnpackMask))
+ if (isTargetShuffleEquivalent(Mask, UnpackMask) ||
+ isTargetShuffleEquivalent(CommutedMask, UnpackMask))
return true;
}
return false;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll
index 47d9c41e019..58e4c10232f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll
@@ -221,3 +221,15 @@ define <16 x i8> @unpckl_unary_extracted_v32i8(<32 x i8> %x) {
ret <16 x i8> %r
}
+; This would infinite loop because we did not recognize the unpack shuffle mask in commuted form.
+
+define <8 x i32> @extract_unpckl_v8i32(<8 x i32> %a) {
+; ALL-LABEL: extract_unpckl_v8i32:
+; ALL: # %bb.0:
+; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
+; ALL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 4, i32 undef, i32 5, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %shuffle
+}
+
OpenPOWER on IntegriCloud