diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/load-local-v3i1.ll | 70 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_arith-3.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_cast-2.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_cast-3.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_load-2.ll | 31 |
5 files changed, 84 insertions, 29 deletions
diff --git a/llvm/test/CodeGen/X86/load-local-v3i1.ll b/llvm/test/CodeGen/X86/load-local-v3i1.ll new file mode 100644 index 00000000000..88b87c273e8 --- /dev/null +++ b/llvm/test/CodeGen/X86/load-local-v3i1.ll @@ -0,0 +1,70 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s + +; widen a v3i1 to v4i1 to do a vector load/store. We would previously +; reconstruct the said v3i1 from the first element of the vector by filling all +; the lanes of the vector with that first element, which was obviously wrong. +; This was done in the type-legalizing of the DAG, when legalizing the load. + +; Function Attrs: argmemonly nounwind readonly +declare <3 x i32> @llvm.masked.load.v3i32.p1v3i32(<3 x i32> addrspace(1)*, i32, <3 x i1>, <3 x i32>) + +; Function Attrs: argmemonly nounwind +declare void @llvm.masked.store.v3i32.p1v3i32(<3 x i32>, <3 x i32> addrspace(1)*, i32, <3 x i1>) + +define <3 x i32> @masked_load_v3(i32 addrspace(1)*, <3 x i1>) { +entry: + %2 = bitcast i32 addrspace(1)* %0 to <3 x i32> addrspace(1)* + %3 = call <3 x i32> @llvm.masked.load.v3i32.p1v3i32(<3 x i32> addrspace(1)* %2, i32 4, <3 x i1> %1, <3 x i32> undef) + ret <3 x i32> %3 +} + +define void @masked_store4_v3(<3 x i32>, i32 addrspace(1)*, <3 x i1>) { +entry: + %3 = bitcast i32 addrspace(1)* %1 to <3 x i32> addrspace(1)* + call void @llvm.masked.store.v3i32.p1v3i32(<3 x i32> %0, <3 x i32> addrspace(1)* %3, i32 4, <3 x i1> %2) + ret void +} + +define void @local_load_v3i1(i32 addrspace(1)* %out, i32 addrspace(1)* %in, <3 x i1>* %predicate_ptr) nounwind { +; CHECK-LABEL: local_load_v3i1: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: movzbl (%rdx), %ebp +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: movd %ecx, %xmm0 +; CHECK-NEXT: pinsrd $1, %eax, %xmm0 +; CHECK-NEXT: shrl $2, %ebp +; CHECK-NEXT: andl $1, %ebp +; CHECK-NEXT: pinsrd $2, %ebp, %xmm0 +; CHECK-NEXT: movd %xmm0, %ebx +; CHECK-NEXT: pextrd $1, %xmm0, %r15d +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: movl %ebx, %esi +; CHECK-NEXT: movl %r15d, %edx +; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: callq masked_load_v3 +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: movl %ebx, %esi +; CHECK-NEXT: movl %r15d, %edx +; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: callq masked_store4_v3 +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %predicate = load <3 x i1>, <3 x i1>* %predicate_ptr + %load1 = call <3 x i32> @masked_load_v3(i32 addrspace(1)* %in, <3 x i1> %predicate) + call void @masked_store4_v3(<3 x i32> %load1, i32 addrspace(1)* %out, <3 x i1> %predicate) + ret void +} diff --git a/llvm/test/CodeGen/X86/widen_arith-3.ll b/llvm/test/CodeGen/X86/widen_arith-3.ll index aa656de2342..3e455f7f14c 100644 --- a/llvm/test/CodeGen/X86/widen_arith-3.ll +++ b/llvm/test/CodeGen/X86/widen_arith-3.ll @@ -12,7 +12,7 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: andl $-8, %esp -; CHECK-NEXT: subl $40, %esp +; CHECK-NEXT: subl $32, %esp ; CHECK-NEXT: movl {{\.LCPI.*}}, %eax ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 @@ -26,9 +26,7 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl 12(%ebp), %edx ; CHECK-NEXT: movl 8(%ebp), %ecx -; CHECK-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; CHECK-NEXT: pinsrd $2, 4(%edx,%eax,8), %xmm2 +; CHECK-NEXT: pmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; CHECK-NEXT: psubd %xmm0, %xmm2 ; CHECK-NEXT: pextrw $4, %xmm2, 4(%ecx,%eax,8) ; CHECK-NEXT: pshufb %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/widen_cast-2.ll b/llvm/test/CodeGen/X86/widen_cast-2.ll index e7780912cd9..0bbcd391d22 100644 --- a/llvm/test/CodeGen/X86/widen_cast-2.ll +++ b/llvm/test/CodeGen/X86/widen_cast-2.ll @@ -21,9 +21,8 @@ define void @convert(<7 x i32>* %dst, <14 x i16>* %src) nounwind { ; CHECK-NEXT: movdqa 16(%edx,%eax), %xmm2 ; CHECK-NEXT: psubw %xmm0, %xmm1 ; CHECK-NEXT: psubw %xmm0, %xmm2 -; CHECK-NEXT: movd %xmm2, 16(%ecx,%eax) -; CHECK-NEXT: pextrd $1, %xmm2, 20(%ecx,%eax) ; CHECK-NEXT: pextrd $2, %xmm2, 24(%ecx,%eax) +; CHECK-NEXT: movq %xmm2, 16(%ecx,%eax) ; CHECK-NEXT: movdqa %xmm1, (%ecx,%eax) ; CHECK-NEXT: incl (%esp) ; CHECK-NEXT: cmpl $3, (%esp) diff --git a/llvm/test/CodeGen/X86/widen_cast-3.ll b/llvm/test/CodeGen/X86/widen_cast-3.ll index 18a04c48a59..a4d37823dfc 100644 --- a/llvm/test/CodeGen/X86/widen_cast-3.ll +++ b/llvm/test/CodeGen/X86/widen_cast-3.ll @@ -11,8 +11,7 @@ define void @convert(<12 x i8>* %dst.addr, <3 x i32> %src) nounwind { ; X86-NEXT: pcmpeqd %xmm1, %xmm1 ; X86-NEXT: psubd %xmm1, %xmm0 ; X86-NEXT: pextrd $2, %xmm0, 8(%eax) -; X86-NEXT: pextrd $1, %xmm0, 4(%eax) -; X86-NEXT: movd %xmm0, (%eax) +; X86-NEXT: movq %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: convert: diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll index 23b68b26980..ea8f4ff0528 100644 --- a/llvm/test/CodeGen/X86/widen_load-2.ll +++ b/llvm/test/CodeGen/X86/widen_load-2.ll @@ -15,8 +15,7 @@ define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { ; X86-NEXT: movdqa (%edx), %xmm0 ; X86-NEXT: paddd (%ecx), %xmm0 ; X86-NEXT: pextrd $2, %xmm0, 8(%eax) -; X86-NEXT: pextrd $1, %xmm0, 4(%eax) -; X86-NEXT: movd %xmm0, (%eax) +; X86-NEXT: movq %xmm0, (%eax) ; X86-NEXT: retl $4 ; ; X64-LABEL: add3i32: @@ -40,16 +39,13 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: pinsrd $1, 4(%edx), %xmm0 +; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: pinsrd $2, 8(%edx), %xmm0 -; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-NEXT: pinsrd $1, 4(%ecx), %xmm1 +; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X86-NEXT: pinsrd $2, 8(%ecx), %xmm1 ; X86-NEXT: paddd %xmm0, %xmm1 -; X86-NEXT: pextrd $1, %xmm1, 4(%eax) +; X86-NEXT: movq %xmm1, (%eax) ; X86-NEXT: pextrd $2, %xmm1, 8(%eax) -; X86-NEXT: movd %xmm1, (%eax) ; X86-NEXT: retl $4 ; ; X64-LABEL: add3i32_2: @@ -81,9 +77,8 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { ; X86-NEXT: movdqa 16(%edx), %xmm1 ; X86-NEXT: paddd (%ecx), %xmm0 ; X86-NEXT: paddd 16(%ecx), %xmm1 -; X86-NEXT: movd %xmm1, 16(%eax) -; X86-NEXT: pextrd $1, %xmm1, 20(%eax) ; X86-NEXT: pextrd $2, %xmm1, 24(%eax) +; X86-NEXT: movq %xmm1, 16(%eax) ; X86-NEXT: movdqa %xmm0, (%eax) ; X86-NEXT: retl $4 ; @@ -151,16 +146,12 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 16(%ebp), %ecx ; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X86-NEXT: pinsrd $2, 4(%edx), %xmm0 -; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; X86-NEXT: pinsrd $2, 4(%ecx), %xmm1 +; X86-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X86-NEXT: paddd %xmm0, %xmm1 ; X86-NEXT: pextrw $4, %xmm1, 4(%eax) ; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] @@ -225,8 +216,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* ; X86-NEXT: movdqa 16(%edx), %xmm1 ; X86-NEXT: paddw (%ecx), %xmm0 ; X86-NEXT: paddw 16(%ecx), %xmm1 -; X86-NEXT: movd %xmm1, 16(%eax) -; X86-NEXT: pextrd $1, %xmm1, 20(%eax) +; X86-NEXT: movq %xmm1, 16(%eax) ; X86-NEXT: movdqa %xmm0, (%eax) ; X86-NEXT: retl $4 ; @@ -331,11 +321,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp ; X86-NEXT: movdqa 16(%edx), %xmm1 ; X86-NEXT: paddb (%ecx), %xmm0 ; X86-NEXT: paddb 16(%ecx), %xmm1 -; X86-NEXT: movd %xmm1, 16(%eax) -; X86-NEXT: pextrd $1, %xmm1, 20(%eax) ; X86-NEXT: pextrd $2, %xmm1, 24(%eax) ; X86-NEXT: pextrw $6, %xmm1, 28(%eax) ; X86-NEXT: pextrb $14, %xmm1, 30(%eax) +; X86-NEXT: movq %xmm1, 16(%eax) ; X86-NEXT: movdqa %xmm0, (%eax) ; X86-NEXT: retl $4 ; |

