summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-10-01 11:14:02 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-10-01 11:14:02 +0000
commit6c02c031b86205f0bdb935b50665ea6c369c350c (patch)
tree110299b1838784d5395f9830e1ef6f9955f6dc8e /llvm/test
parent048486109bfa55744e0c968a5d1a0f01fee565ad (diff)
downloadbcm5719-llvm-6c02c031b86205f0bdb935b50665ea6c369c350c.tar.gz
bcm5719-llvm-6c02c031b86205f0bdb935b50665ea6c369c350c.zip
[x86] Fix a few more tiny patterns with the new vector shuffle lowering
that keep cropping up in the regression test suite. This also addresses one of the issues raised on the mailing list with failing to form 'movsd' in as many cases as we realistically should. There will be corresponding patches forthcoming for v4f32 at least. This was a lot of fuss for a relatively small gain, but all the fuss was on my end trying different ways of holding the pieces of the x86 fragment patterns *just right*. Now that it works, the code is reasonably simple. In the new test cases I'm adding here, v2i64 sticks out as just plain horrible. I've not come up with any great ideas here other than that it would be nice to recognize when we're *going* to take a domain crossing hit and cross earlier to get the decent instructions. At least with AVX it is slightly less silly.... llvm-svn: 218756
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll190
1 files changed, 190 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 67723dd07b2..15e98822246 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -714,6 +714,196 @@ define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
ret <2 x double> %shuffle
}
+define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
+; SSE2-LABEL: insert_reg_lo_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %rdi, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: insert_reg_lo_v2i64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movd %rdi, %xmm1
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: insert_reg_lo_v2i64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %rdi, %xmm1
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_reg_lo_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %rdi, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_reg_lo_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq %rdi, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_reg_lo_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq %rdi, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT: retq
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
+; SSE2-LABEL: insert_mem_lo_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq (%rdi), %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: insert_mem_lo_v2i64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movq (%rdi), %xmm1
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: insert_mem_lo_v2i64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movq (%rdi), %xmm1
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_mem_lo_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movq (%rdi), %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_mem_lo_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq (%rdi), %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_lo_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq (%rdi), %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT: retq
+ %a = load i64* %ptr
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
+; SSE-LABEL: insert_reg_hi_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movd %rdi, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_hi_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq %rdi, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
+; SSE-LABEL: insert_mem_hi_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movq (%rdi), %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_hi_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq (%rdi), %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %a = load i64* %ptr
+ %v = insertelement <2 x i64> undef, i64 %a, i32 0
+ %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
+; SSE-LABEL: insert_reg_lo_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movsd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_lo_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
+; SSE-LABEL: insert_mem_lo_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movlpd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_lo_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %a = load double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
+; SSE-LABEL: insert_reg_hi_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_hi_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
+; SSE-LABEL: insert_mem_hi_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movhpd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_hi_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %a = load double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x double> %shuffle
+}
+
define <2 x double> @insert_dup_reg_v2f64(double %a) {
; FIXME: We should match movddup for SSE3 and higher here.
;
OpenPOWER on IntegriCloud