[DagCombine] Improve DAGCombiner BUILD_VECTOR when it has two sources of elements

This partially fixes PR21943. For AVX, we go from: vmovq (%rsi), %xmm0 vmovq (%rdi), %xmm1 vpermilps $-27, %xmm1, %xmm2 ## xmm2 = xmm1[1,1,2,3] vinsertps $16, %xmm2, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm2[0],xmm1[2,3] vinsertps $32, %xmm0, %xmm1, %xmm1 ## xmm1 = xmm1[0,1],xmm0[0],xmm1[3] vpermilps $-27, %xmm0, %xmm0 ## xmm0 = xmm0[1,1,2,3] vinsertps $48, %xmm0, %xmm1, %xmm0 ## xmm0 = xmm1[0,1,2],xmm0[0] To the expected: vmovq (%rdi), %xmm0 vmovhpd (%rsi), %xmm0, %xmm0 retq Fixing this for AVX2 is still open. Differential Revision: http://reviews.llvm.org/D6749 llvm-svn: 224759
author: Michael Kuperstein <michael.m.kuperstein@intel.com> 2014-12-23 08:59:45 +0000
committer: Michael Kuperstein <michael.m.kuperstein@intel.com> 2014-12-23 08:59:45 +0000
commit: f4536ea6e8743c34600d88d4329cd0e859b6b19a (patch)
tree: 8022a5c95232d0b14ad17aa4b983ee36c93574c7 /llvm/test/CodeGen
parent: 04b16b51ec2903f1be8a4f9297a24e73b0bbda52 (diff)
download: bcm5719-llvm-f4536ea6e8743c34600d88d4329cd0e859b6b19a.tar.gz
bcm5719-llvm-f4536ea6e8743c34600d88d4329cd0e859b6b19a.zip
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index e7bae3415bf..897a69a5496 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1584,6 +1584,26 @@ define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) {
   ret <4 x i32> %2
 }
 
+define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) {
+; SSE-LABEL: combine_test22:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq    (%rdi), %xmm0
+; SSE-NEXT:    movhpd  (%rsi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_test22:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq    (%rdi), %xmm0
+; AVX1-NEXT:    vmovhpd  (%rsi), %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; Current AVX2 lowering of this is still awful, not adding a test case.
+  %1 = load <2 x float>* %a, align 8
+  %2 = load <2 x float>* %b, align 8
+  %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %3
+}
+
 ; Check some negative cases.
 ; FIXME: Do any of these really make sense? Are they redundant with the above tests?
author	Michael Kuperstein <michael.m.kuperstein@intel.com>	2014-12-23 08:59:45 +0000
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>	2014-12-23 08:59:45 +0000
commit	f4536ea6e8743c34600d88d4329cd0e859b6b19a (patch)
tree	8022a5c95232d0b14ad17aa4b983ee36c93574c7 /llvm/test/CodeGen
parent	04b16b51ec2903f1be8a4f9297a24e73b0bbda52 (diff)
download	bcm5719-llvm-f4536ea6e8743c34600d88d4329cd0e859b6b19a.tar.gz bcm5719-llvm-f4536ea6e8743c34600d88d4329cd0e859b6b19a.zip