[DAGCombiner] Improve the folding of target independet shuffles to Undef.

When combining a pair of shuffle nodes, check if the combined shuffle mask is trivially Undef. In case, immediately fold that pair of shuffles to Undef. The lack of checks for undef masks was the root-cause of a poor-codegen bug in the dag combiner. Example: %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 1, i32 6> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 6> %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 3> Before this patch, on x86 (with -mcpu=corei7) we failed to fold the entire sequence to Undef value and therefore we generated: shufps $-123, %xmm1, $xmm0 pshufd $-46, %xmm0, %xmm0 With this patch, the entire shuffle sequence is folded to Undef and no shuffles are generated in the output assembly. Added new test cases to test 'combine-vec-shuffle-5.ll'. llvm-svn: 215797
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-08-16 00:29:44 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-08-16 00:29:44 +0000
commit: b23bad11e792210b1c8d494941dbd85ca05bd943 (patch)
tree: 88798491c649101abbedceb80ee6783855bf49fb
parent: ac3997eb52022555af90d9be49e7e7fa4afa2672 (diff)
download: bcm5719-llvm-b23bad11e792210b1c8d494941dbd85ca05bd943.tar.gz
bcm5719-llvm-b23bad11e792210b1c8d494941dbd85ca05bd943.zip
2 files changed, 207 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c1ff817fc35..e9a38b1b437 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10787,6 +10787,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
         Idx = OtherSV->getMaskElt(Idx);
       Mask.push_back(Idx);
     }
+
+    // Check if all indices in Mask are Undef. In case, propagate Undef.
+    bool isUndefMask = true;
+    for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+      isUndefMask &= Mask[i] < 0;
+
+    if (isUndefMask)
+      return DAG.getUNDEF(VT);
     
     bool CommuteOperands = false;
     if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
@@ -10932,6 +10940,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       Mask.push_back(Idx);
     }
 
+    // Check if all indices in Mask are Undef. In case, propagate Undef.
+    bool isUndefMask = true;
+    for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+      isUndefMask &= Mask[i] < 0;
+
+    if (isUndefMask)
+      return DAG.getUNDEF(VT);
+
     // Avoid introducing shuffles with illegal mask.
     if (TLI.isShuffleMaskLegal(Mask, VT)) {
       if (IsSV1Undef)
diff --git a/llvm/test/CodeGen/X86/combine-vec-shuffle-5.ll b/llvm/test/CodeGen/X86/combine-vec-shuffle-5.ll
index 16c45efe4be..82b020e8cb4 100644
--- a/llvm/test/CodeGen/X86/combine-vec-shuffle-5.ll
+++ b/llvm/test/CodeGen/X86/combine-vec-shuffle-5.ll
@@ -255,3 +255,194 @@ define <4 x i8> @test4c(<4 x i8>* %a, <4 x i8>* %b) {
 ; CHECK: blendps $13
 ; CHECK: ret
 
+
+; Verify that the dag combiner correctly folds the following shuffle pairs to Undef.
+
+define <4 x i32> @test1b(<4 x i32> %A) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test1b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test2b(<4 x i32> %A) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 1, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 6, i32 7>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test2b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test3b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test3b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test4b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 0, i32 3, i32 3, i32 0>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test4b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test5b(<4 x i32> %A) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test5b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test6b(<4 x i32> %A) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 1, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 6, i32 7>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test6b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test7b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test7b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test8b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 6>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 3>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test8b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test9b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 1, i32 undef, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 2, i32 1>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 1, i32 2>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test9b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test10b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 undef, i32 undef, i32 1, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %A, <4 x i32> <i32 0, i32 6, i32 1, i32 0>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 2>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test10b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test11b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test11b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test12b(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 0, i32 3, i32 3, i32 0>
+  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 1, i32 4>
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test12b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test13b(<8 x i32> %A, <8 x i32> %B) {
+  %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 undef>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> %B, <8 x i32> <i32 1, i32 3, i32 1, i32 3, i32 1, i32 3, i32 1, i32 3>
+  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 0, i32 9, i32 1, i32 10, i32 0, i32 9, i32 1, i32 10>
+  ret <8 x i32> %3
+}
+; CHECK-LABEL: test13b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test14b(<8 x i32> %A, <8 x i32> %B) {
+  %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 undef, i32 1, i32 1, i32 undef, i32 undef, i32 1, i32 1, i32 undef>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> %B, <8 x i32> <i32 0, i32 3, i32 3, i32 0, i32 0, i32 3, i32 3, i32 0>
+  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 9, i32 1, i32 8, i32 1, i32 9, i32 1, i32 8>
+  ret <8 x i32> %3
+}
+; CHECK-LABEL: test14b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test15b(<8 x i32> %A, <8 x i32> %B) {
+  %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 0, i32 1, i32 undef, i32 11, i32 0, i32 1, i32 undef, i32 11>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 8, i32 9, i32 2, i32 11, i32 8, i32 9, i32 2, i32 11>
+  %3 = shufflevector <8 x i32> %2, <8 x i32> %A, <8 x i32> <i32 2, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 undef, i32 2>
+  ret <8 x i32> %3
+}
+; CHECK-LABEL: test15b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test16b(<8 x i32> %A, <8 x i32> %B) {
+  %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 10, i32 undef, i32 undef, i32 1, i32 10>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> %A, <8 x i32> <i32 0, i32 10, i32 2, i32 11, i32 0, i32 10, i32 2, i32 11>
+  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 4, i32 9, i32 undef, i32 0, i32 4, i32 9, i32 undef, i32 0>
+  ret <8 x i32> %3
+}
+; CHECK-LABEL: test16b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-08-16 00:29:44 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-08-16 00:29:44 +0000
commit	b23bad11e792210b1c8d494941dbd85ca05bd943 (patch)
tree	88798491c649101abbedceb80ee6783855bf49fb
parent	ac3997eb52022555af90d9be49e7e7fa4afa2672 (diff)
download	bcm5719-llvm-b23bad11e792210b1c8d494941dbd85ca05bd943.tar.gz bcm5719-llvm-b23bad11e792210b1c8d494941dbd85ca05bd943.zip