diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-01 19:42:23 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-01 19:42:23 +0000 |
commit | f739d8a2ed898934978e68ccabdfdb91be2db8c1 (patch) | |
tree | 2cf365bfcce6c730dbb166da22bcbcbbae4cd0a2 /llvm | |
parent | e6a46463726aa489a996fba33ed1622f2395bffb (diff) | |
download | bcm5719-llvm-f739d8a2ed898934978e68ccabdfdb91be2db8c1.tar.gz bcm5719-llvm-f739d8a2ed898934978e68ccabdfdb91be2db8c1.zip |
[X86][SSE] Regenerated the vec_insert tests.
llvm-svn: 265179
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/test/CodeGen/X86/vec_ins_extract-1.ll | 87 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_ins_extract.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-3.ll | 23 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-4.ll | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-5.ll | 165 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-7.ll | 44 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-8.ll | 57 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-9.ll | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-mmx.ll | 89 |
9 files changed, 410 insertions, 121 deletions
diff --git a/llvm/test/CodeGen/X86/vec_ins_extract-1.ll b/llvm/test/CodeGen/X86/vec_ins_extract-1.ll index 565be7a6cc7..8019e11ad4c 100644 --- a/llvm/test/CodeGen/X86/vec_ins_extract-1.ll +++ b/llvm/test/CodeGen/X86/vec_ins_extract-1.ll @@ -1,24 +1,109 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | grep "(%esp,%eax,4)" | count 4 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64 ; Inserts and extracts with variable indices must be lowered ; to memory accesses. define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { +; X32-LABEL: t0: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $32, %esp +; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movl $76, (%esp,%eax,4) +; X32-NEXT: movl (%esp), %eax +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: t0: +; X64: # BB#0: +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: movl $76, -24(%rsp,%rax,4) +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: retq %t13 = insertelement <4 x i32> %t8, i32 76, i32 %t7 %t9 = extractelement <4 x i32> %t13, i32 0 ret i32 %t9 } + define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { +; X32-LABEL: t1: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl $76, %ecx +; X32-NEXT: pinsrd $0, %ecx, %xmm0 +; X32-NEXT: movdqa %xmm0, (%esp) +; X32-NEXT: movl (%esp,%eax,4), %eax +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: t1: +; X64: # BB#0: +; X64-NEXT: movl $76, %eax +; X64-NEXT: pinsrd $0, %eax, %xmm0 +; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: movl -24(%rsp,%rax,4), %eax +; X64-NEXT: retq %t13 = insertelement <4 x i32> %t8, i32 76, i32 0 %t9 = extractelement <4 x i32> %t13, i32 %t7 ret i32 %t9 } + define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { +; X32-LABEL: t2: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $32, %esp +; X32-NEXT: movdqa %xmm0, (%esp) +; X32-NEXT: pinsrd $0, (%esp,%eax,4), %xmm0 +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: t2: +; X64: # BB#0: +; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: pinsrd $0, -24(%rsp,%rax,4), %xmm0 +; X64-NEXT: retq %t9 = extractelement <4 x i32> %t8, i32 %t7 %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 0 ret <4 x i32> %t13 } + define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind { +; X32-LABEL: t3: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $32, %esp +; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movd %xmm0, (%esp,%eax,4) +; X32-NEXT: movaps (%esp), %xmm0 +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: t3: +; X64: # BB#0: +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: movd %xmm0, -24(%rsp,%rax,4) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq %t9 = extractelement <4 x i32> %t8, i32 0 %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 %t7 ret <4 x i32> %t13 diff --git a/llvm/test/CodeGen/X86/vec_ins_extract.ll b/llvm/test/CodeGen/X86/vec_ins_extract.ll index e92f46dbabb..90dcbdfa43b 100644 --- a/llvm/test/CodeGen/X86/vec_ins_extract.ll +++ b/llvm/test/CodeGen/X86/vec_ins_extract.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt < %s -scalarrepl -instcombine | \ ; RUN: llc -march=x86 -mcpu=yonah | not grep sub.*esp diff --git a/llvm/test/CodeGen/X86/vec_insert-3.ll b/llvm/test/CodeGen/X86/vec_insert-3.ll index 75244ae0b71..57a265a0ce3 100644 --- a/llvm/test/CodeGen/X86/vec_insert-3.ll +++ b/llvm/test/CodeGen/X86/vec_insert-3.ll @@ -1,10 +1,23 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64 define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind { -; CHECK-LABEL: t1: -; CHECK: punpcklqdq -; CHECK-NEXT: retq - +; X32-LABEL: t1: +; X32: # BB#0: +; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] +; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] +; X32-NEXT: retl +; +; X64-LABEL: t1: +; X64: # BB#0: +; X64-NEXT: movd %rdi, %xmm1 +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: retq %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1 ret <2 x i64> %tmp1 } diff --git a/llvm/test/CodeGen/X86/vec_insert-4.ll b/llvm/test/CodeGen/X86/vec_insert-4.ll index 2c31e56b4af..c847ac98300 100644 --- a/llvm/test/CodeGen/X86/vec_insert-4.ll +++ b/llvm/test/CodeGen/X86/vec_insert-4.ll @@ -1,11 +1,40 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1 - -; ModuleID = '<stdin>' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i686-apple-darwin9.2.2" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin9.2.2 -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin9.2.2 -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64 define <8 x float> @f(<8 x float> %a, i32 %b) nounwind { +; X32-LABEL: f: +; X32: ## BB#0: ## %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-32, %esp +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movl $1084227584, (%esp,%eax,4) ## imm = 0x40A00000 +; X32-NEXT: movaps (%esp), %xmm0 +; X32-NEXT: movaps {{[0-9]+}}(%esp), %xmm1 +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: f: +; X64: ## BB#0: ## %entry +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: andq $-32, %rsp +; X64-NEXT: subq $64, %rsp +; X64-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm0, (%rsp) +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: movl $1084227584, (%rsp,%rax,4) ## imm = 0x40A00000 +; X64-NEXT: movaps (%rsp), %xmm0 +; X64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; X64-NEXT: movq %rbp, %rsp +; X64-NEXT: popq %rbp +; X64-NEXT: retq entry: - %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b ; <<4 x float>> [#uses=1] - ret <8 x float> %vecins + %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b + ret <8 x float> %vecins } diff --git a/llvm/test/CodeGen/X86/vec_insert-5.ll b/llvm/test/CodeGen/X86/vec_insert-5.ll index 14b57e76dc8..1d6c785bafe 100644 --- a/llvm/test/CodeGen/X86/vec_insert-5.ll +++ b/llvm/test/CodeGen/X86/vec_insert-5.ll @@ -1,17 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X64 + ; There are no MMX operations in @t1 define void @t1(i32 %a, x86_mmx* %P) nounwind { -; CHECK-LABEL: t1: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: shll $12, %ecx -; CHECK-NEXT: movd %ecx, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] -; CHECK-NEXT: movq %xmm0, (%eax) -; CHECK-NEXT: retl +; X32-LABEL: t1: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: shll $12, %ecx +; X32-NEXT: movd %ecx, %xmm0 +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; X32-NEXT: movq %xmm0, (%eax) +; X32-NEXT: retl +; +; X64-LABEL: t1: +; X64: # BB#0: +; X64-NEXT: shll $12, %edi +; X64-NEXT: movd %rdi, %xmm0 +; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-NEXT: movq %xmm0, (%rsi) +; X64-NEXT: retq %tmp12 = shl i32 %a, 12 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0 @@ -21,87 +32,135 @@ define void @t1(i32 %a, x86_mmx* %P) nounwind { } define <4 x float> @t2(<4 x float>* %P) nounwind { -; CHECK-LABEL: t2: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movaps (%eax), %xmm1 -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] -; CHECK-NEXT: retl +; X32-LABEL: t2: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movaps (%eax), %xmm1 +; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] +; X32-NEXT: retl +; +; X64-LABEL: t2: +; X64: # BB#0: +; X64-NEXT: movaps (%rdi), %xmm1 +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] +; X64-NEXT: retq %tmp1 = load <4 x float>, <4 x float>* %P %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 > ret <4 x float> %tmp2 } define <4 x float> @t3(<4 x float>* %P) nounwind { -; CHECK-LABEL: t3: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movapd (%eax), %xmm0 -; CHECK-NEXT: xorpd %xmm1, %xmm1 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; CHECK-NEXT: retl +; X32-LABEL: t3: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movapd (%eax), %xmm0 +; X32-NEXT: xorpd %xmm1, %xmm1 +; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; X32-NEXT: retl +; +; X64-LABEL: t3: +; X64: # BB#0: +; X64-NEXT: movapd (%rdi), %xmm0 +; X64-NEXT: xorpd %xmm1, %xmm1 +; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; X64-NEXT: retq %tmp1 = load <4 x float>, <4 x float>* %P %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 > ret <4 x float> %tmp2 } define <4 x float> @t4(<4 x float>* %P) nounwind { -; CHECK-LABEL: t4: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movaps (%eax), %xmm0 -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] -; CHECK-NEXT: retl +; X32-LABEL: t4: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movaps (%eax), %xmm0 +; X32-NEXT: xorps %xmm1, %xmm1 +; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] +; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] +; X32-NEXT: retl +; +; X64-LABEL: t4: +; X64: # BB#0: +; X64-NEXT: movaps (%rdi), %xmm0 +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] +; X64-NEXT: retq %tmp1 = load <4 x float>, <4 x float>* %P %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 > ret <4 x float> %tmp2 } define <16 x i8> @t5(<16 x i8> %x) nounwind { -; CHECK-LABEL: t5: -; CHECK: # BB#0: -; CHECK-NEXT: psrlw $8, %xmm0 -; CHECK-NEXT: retl +; X32-LABEL: t5: +; X32: # BB#0: +; X32-NEXT: psrlw $8, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: t5: +; X64: # BB#0: +; X64-NEXT: psrlw $8, %xmm0 +; X64-NEXT: retq %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> ret <16 x i8> %s } define <16 x i8> @t6(<16 x i8> %x) nounwind { -; CHECK-LABEL: t6: -; CHECK: # BB#0: -; CHECK-NEXT: psrlw $8, %xmm0 -; CHECK-NEXT: retl +; X32-LABEL: t6: +; X32: # BB#0: +; X32-NEXT: psrlw $8, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: t6: +; X64: # BB#0: +; X64-NEXT: psrlw $8, %xmm0 +; X64-NEXT: retq %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> ret <16 x i8> %s } define <16 x i8> @t7(<16 x i8> %x) nounwind { -; CHECK-LABEL: t7: -; CHECK: # BB#0: -; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; CHECK-NEXT: retl +; X32-LABEL: t7: +; X32: # BB#0: +; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] +; X32-NEXT: retl +; +; X64-LABEL: t7: +; X64: # BB#0: +; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] +; X64-NEXT: retq %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2> ret <16 x i8> %s } define <16 x i8> @t8(<16 x i8> %x) nounwind { -; CHECK-LABEL: t8: -; CHECK: # BB#0: -; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero -; CHECK-NEXT: retl +; X32-LABEL: t8: +; X32: # BB#0: +; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero +; X32-NEXT: retl +; +; X64-LABEL: t8: +; X64: # BB#0: +; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero +; X64-NEXT: retq %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> ret <16 x i8> %s } define <16 x i8> @t9(<16 x i8> %x) nounwind { -; CHECK-LABEL: t9: -; CHECK: # BB#0: -; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero -; CHECK-NEXT: retl +; X32-LABEL: t9: +; X32: # BB#0: +; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero +; X32-NEXT: retl +; +; X64-LABEL: t9: +; X64: # BB#0: +; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero +; X64-NEXT: retq %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef> ret <16 x i8> %s } diff --git a/llvm/test/CodeGen/X86/vec_insert-7.ll b/llvm/test/CodeGen/X86/vec_insert-7.ll index 27187183d43..02db6e6d875 100644 --- a/llvm/test/CodeGen/X86/vec_insert-7.ll +++ b/llvm/test/CodeGen/X86/vec_insert-7.ll @@ -1,26 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -march=x86 -mattr=+mmx,+sse4.2 -mtriple=i686-apple-darwin9 | FileCheck %s +; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=+mmx,+sse4.2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+mmx,+sse4.2 | FileCheck %s --check-prefix=X64 ; MMX insertelement is not available; these are promoted to XMM. ; (Without SSE they are split to two ints, and the code is much better.) define x86_mmx @mmx_movzl(x86_mmx %x) nounwind { -; CHECK-LABEL: mmx_movzl: -; CHECK: ## BB#0: -; CHECK-NEXT: subl $20, %esp -; CHECK-NEXT: movq %mm0, {{[0-9]+}}(%esp) -; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: pinsrd $0, %eax, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; CHECK-NEXT: movq %xmm1, (%esp) -; CHECK-NEXT: movq (%esp), %mm0 -; CHECK-NEXT: addl $20, %esp -; CHECK-NEXT: retl +; X32-LABEL: mmx_movzl: +; X32: ## BB#0: +; X32-NEXT: subl $20, %esp +; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp) +; X32-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; X32-NEXT: movl $32, %eax +; X32-NEXT: pinsrd $0, %eax, %xmm0 +; X32-NEXT: pxor %xmm1, %xmm1 +; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; X32-NEXT: movq %xmm1, (%esp) +; X32-NEXT: movq (%esp), %mm0 +; X32-NEXT: addl $20, %esp +; X32-NEXT: retl +; +; X64-LABEL: mmx_movzl: +; X64: ## BB#0: +; X64-NEXT: movdq2q %xmm0, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero +; X64-NEXT: movl $32, %eax +; X64-NEXT: pinsrq $0, %rax, %xmm1 +; X64-NEXT: pxor %xmm0, %xmm0 +; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; X64-NEXT: retq %tmp = bitcast x86_mmx %x to <2 x i32> - %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0 ; <<2 x i32>> [#uses=1] - %tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1 ; <<2 x i32>> [#uses=1] + %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0 + %tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1 %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx ret x86_mmx %tmp9 } diff --git a/llvm/test/CodeGen/X86/vec_insert-8.ll b/llvm/test/CodeGen/X86/vec_insert-8.ll index 917832c40ad..d612e7eb10d 100644 --- a/llvm/test/CodeGen/X86/vec_insert-8.ll +++ b/llvm/test/CodeGen/X86/vec_insert-8.ll @@ -1,15 +1,58 @@ -; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64 ; tests variable insert and extract of a 4 x i32 -define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind { +define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind { +; X32-LABEL: var_insert: +; X32: # BB#0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl 12(%ebp), %ecx +; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movl %eax, (%esp,%ecx,4) +; X32-NEXT: movaps (%esp), %xmm0 +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: var_insert: +; X64: # BB#0: # %entry +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movslq %esi, %rax +; X64-NEXT: movl %edi, -24(%rsp,%rax,4) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: - %tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp3 + %tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx + ret <4 x i32> %tmp3 } -define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind { +define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind { +; X32-LABEL: var_extract: +; X32: # BB#0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movl (%esp,%eax,4), %eax +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: var_extract: +; X64: # BB#0: # %entry +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: movl -24(%rsp,%rax,4), %eax +; X64-NEXT: retq entry: - %tmp3 = extractelement <4 x i32> %x, i32 %idx ; <<i32>> [#uses=1] - ret i32 %tmp3 + %tmp3 = extractelement <4 x i32> %x, i32 %idx + ret i32 %tmp3 } diff --git a/llvm/test/CodeGen/X86/vec_insert-9.ll b/llvm/test/CodeGen/X86/vec_insert-9.ll index 5f2e676ef1a..ec4a0288e10 100644 --- a/llvm/test/CodeGen/X86/vec_insert-9.ll +++ b/llvm/test/CodeGen/X86/vec_insert-9.ll @@ -1,9 +1,21 @@ -; RUN: llc < %s -march=x86 -mattr=+sse4.1 > %t -; RUN: grep pinsrd %t | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind { +; X32-LABEL: var_insert2: +; X32: # BB#0: # %entry +; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: var_insert2: +; X64: # BB#0: # %entry +; X64-NEXT: movd %edi, %xmm0 +; X64-NEXT: pinsrd $3, %esi, %xmm0 +; X64-NEXT: retq entry: - %tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0 ; <<4 x i32>> [#uses=1] - %tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3 ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp4 + %tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0 + %tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3 + ret <4 x i32> %tmp4 } diff --git a/llvm/test/CodeGen/X86/vec_insert-mmx.ll b/llvm/test/CodeGen/X86/vec_insert-mmx.ll index cbd420885ac..64c014784f4 100644 --- a/llvm/test/CodeGen/X86/vec_insert-mmx.ll +++ b/llvm/test/CodeGen/X86/vec_insert-mmx.ll @@ -1,37 +1,55 @@ -; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-32 -; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse4.1 | FileCheck %s -check-prefix=X86-64 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse4.1 | FileCheck %s --check-prefix=X64 ; This is not an MMX operation; promoted to XMM. define x86_mmx @t0(i32 %A) nounwind { -; X86-32-LABEL: t0: -; X86-32: ## BB#0: -; X86-32: movd {{[0-9]+}}(%esp), %xmm0 -; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] -; X86-32-NEXT: movq %xmm0, (%esp) -; X86-32-NEXT: movq (%esp), %mm0 -; X86-32-NEXT: addl $12, %esp -; X86-32-NEXT: retl +; X32-LABEL: t0: +; X32: ## BB#0: +; X32-NEXT: subl $12, %esp +; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; X32-NEXT: movq %xmm0, (%esp) +; X32-NEXT: movq (%esp), %mm0 +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; +; X64-LABEL: t0: +; X64: ## BB#0: +; X64-NEXT: movd %rdi, %xmm0 +; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-NEXT: retq %tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1 %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx ret x86_mmx %tmp4 } define <8 x i8> @t1(i8 zeroext %x) nounwind { -; X86-32-LABEL: t1: -; X86-32: ## BB#0: -; X86-32-NOT: movl -; X86-32-NEXT: movd {{[0-9]+}}(%esp), %xmm0 -; X86-32-NEXT: retl +; X32-LABEL: t1: +; X32: ## BB#0: +; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: retl +; +; X64-LABEL: t1: +; X64: ## BB#0: +; X64-NEXT: movd %edi, %xmm0 +; X64-NEXT: retq %r = insertelement <8 x i8> undef, i8 %x, i32 0 ret <8 x i8> %r } ; PR2574 define <2 x float> @t2(<2 x float> %a0) { -; X86-32-LABEL: t2: -; X86-32: ## BB#0: -; X86-32-NEXT: xorps %xmm0, %xmm0 -; X86-32-NEXT: retl +; X32-LABEL: t2: +; X32: ## BB#0: +; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: t2: +; X64: ## BB#0: +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: retq %v1 = insertelement <2 x float> %a0, float 0.000000e+00, i32 0 %v2 = insertelement <2 x float> %v1, float 0.000000e+00, i32 1 ret <2 x float> %v2 @@ -42,14 +60,31 @@ define <2 x float> @t2(<2 x float> %a0) { ; PR2562 define void @t3() { -; X86-64-LABEL: t3: -; X86-64: ## BB#0: -; X86-64: pmovzxwd (%rcx) -; X86-64-NEXT: movzwl -; X86-64-NEXT: pinsrd $0 -; X86-64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; X86-64-NEXT: movq %xmm0 -; X86-64-NEXT: retq +; X32-LABEL: t3: +; X32: ## BB#0: +; X32-NEXT: movl L_g0$non_lazy_ptr, %eax +; X32-NEXT: movl L_g1$non_lazy_ptr, %ecx +; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; X32-NEXT: movzwl (%eax), %eax +; X32-NEXT: movd %eax, %xmm1 +; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X32-NEXT: movq %xmm0, (%ecx) +; X32-NEXT: retl +; +; X64-LABEL: t3: +; X64: ## BB#0: +; X64-NEXT: movq _g0@{{.*}}(%rip), %rax +; X64-NEXT: movq _g1@{{.*}}(%rip), %rcx +; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-NEXT: movzwl (%rax), %eax +; X64-NEXT: pinsrd $0, %eax, %xmm0 +; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; X64-NEXT: movq %xmm0, (%rcx) +; X64-NEXT: retq load i16, i16* @g0 load <4 x i16>, <4 x i16>* @g1 insertelement <4 x i16> %2, i16 %1, i32 0 |