summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/load-local-v3i1.ll70
-rw-r--r--llvm/test/CodeGen/X86/widen_arith-3.ll6
-rw-r--r--llvm/test/CodeGen/X86/widen_cast-2.ll3
-rw-r--r--llvm/test/CodeGen/X86/widen_cast-3.ll3
-rw-r--r--llvm/test/CodeGen/X86/widen_load-2.ll31
5 files changed, 84 insertions, 29 deletions
diff --git a/llvm/test/CodeGen/X86/load-local-v3i1.ll b/llvm/test/CodeGen/X86/load-local-v3i1.ll
new file mode 100644
index 00000000000..88b87c273e8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/load-local-v3i1.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s
+
+; widen a v3i1 to v4i1 to do a vector load/store. We would previously
+; reconstruct the said v3i1 from the first element of the vector by filling all
+; the lanes of the vector with that first element, which was obviously wrong.
+; This was done in the type-legalizing of the DAG, when legalizing the load.
+
+; Function Attrs: argmemonly nounwind readonly
+declare <3 x i32> @llvm.masked.load.v3i32.p1v3i32(<3 x i32> addrspace(1)*, i32, <3 x i1>, <3 x i32>)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.masked.store.v3i32.p1v3i32(<3 x i32>, <3 x i32> addrspace(1)*, i32, <3 x i1>)
+
+define <3 x i32> @masked_load_v3(i32 addrspace(1)*, <3 x i1>) {
+entry:
+ %2 = bitcast i32 addrspace(1)* %0 to <3 x i32> addrspace(1)*
+ %3 = call <3 x i32> @llvm.masked.load.v3i32.p1v3i32(<3 x i32> addrspace(1)* %2, i32 4, <3 x i1> %1, <3 x i32> undef)
+ ret <3 x i32> %3
+}
+
+define void @masked_store4_v3(<3 x i32>, i32 addrspace(1)*, <3 x i1>) {
+entry:
+ %3 = bitcast i32 addrspace(1)* %1 to <3 x i32> addrspace(1)*
+ call void @llvm.masked.store.v3i32.p1v3i32(<3 x i32> %0, <3 x i32> addrspace(1)* %3, i32 4, <3 x i1> %2)
+ ret void
+}
+
+define void @local_load_v3i1(i32 addrspace(1)* %out, i32 addrspace(1)* %in, <3 x i1>* %predicate_ptr) nounwind {
+; CHECK-LABEL: local_load_v3i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: movq %rdi, %r14
+; CHECK-NEXT: movzbl (%rdx), %ebp
+; CHECK-NEXT: movl %ebp, %eax
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: movl %ebp, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: pinsrd $1, %eax, %xmm0
+; CHECK-NEXT: shrl $2, %ebp
+; CHECK-NEXT: andl $1, %ebp
+; CHECK-NEXT: pinsrd $2, %ebp, %xmm0
+; CHECK-NEXT: movd %xmm0, %ebx
+; CHECK-NEXT: pextrd $1, %xmm0, %r15d
+; CHECK-NEXT: movq %rsi, %rdi
+; CHECK-NEXT: movl %ebx, %esi
+; CHECK-NEXT: movl %r15d, %edx
+; CHECK-NEXT: movl %ebp, %ecx
+; CHECK-NEXT: callq masked_load_v3
+; CHECK-NEXT: movq %r14, %rdi
+; CHECK-NEXT: movl %ebx, %esi
+; CHECK-NEXT: movl %r15d, %edx
+; CHECK-NEXT: movl %ebp, %ecx
+; CHECK-NEXT: callq masked_store4_v3
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+ %predicate = load <3 x i1>, <3 x i1>* %predicate_ptr
+ %load1 = call <3 x i32> @masked_load_v3(i32 addrspace(1)* %in, <3 x i1> %predicate)
+ call void @masked_store4_v3(<3 x i32> %load1, i32 addrspace(1)* %out, <3 x i1> %predicate)
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/widen_arith-3.ll b/llvm/test/CodeGen/X86/widen_arith-3.ll
index aa656de2342..3e455f7f14c 100644
--- a/llvm/test/CodeGen/X86/widen_arith-3.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-3.ll
@@ -12,7 +12,7 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: andl $-8, %esp
-; CHECK-NEXT: subl $40, %esp
+; CHECK-NEXT: subl $32, %esp
; CHECK-NEXT: movl {{\.LCPI.*}}, %eax
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
@@ -26,9 +26,7 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl 12(%ebp), %edx
; CHECK-NEXT: movl 8(%ebp), %ecx
-; CHECK-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; CHECK-NEXT: pinsrd $2, 4(%edx,%eax,8), %xmm2
+; CHECK-NEXT: pmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; CHECK-NEXT: psubd %xmm0, %xmm2
; CHECK-NEXT: pextrw $4, %xmm2, 4(%ecx,%eax,8)
; CHECK-NEXT: pshufb %xmm1, %xmm2
diff --git a/llvm/test/CodeGen/X86/widen_cast-2.ll b/llvm/test/CodeGen/X86/widen_cast-2.ll
index e7780912cd9..0bbcd391d22 100644
--- a/llvm/test/CodeGen/X86/widen_cast-2.ll
+++ b/llvm/test/CodeGen/X86/widen_cast-2.ll
@@ -21,9 +21,8 @@ define void @convert(<7 x i32>* %dst, <14 x i16>* %src) nounwind {
; CHECK-NEXT: movdqa 16(%edx,%eax), %xmm2
; CHECK-NEXT: psubw %xmm0, %xmm1
; CHECK-NEXT: psubw %xmm0, %xmm2
-; CHECK-NEXT: movd %xmm2, 16(%ecx,%eax)
-; CHECK-NEXT: pextrd $1, %xmm2, 20(%ecx,%eax)
; CHECK-NEXT: pextrd $2, %xmm2, 24(%ecx,%eax)
+; CHECK-NEXT: movq %xmm2, 16(%ecx,%eax)
; CHECK-NEXT: movdqa %xmm1, (%ecx,%eax)
; CHECK-NEXT: incl (%esp)
; CHECK-NEXT: cmpl $3, (%esp)
diff --git a/llvm/test/CodeGen/X86/widen_cast-3.ll b/llvm/test/CodeGen/X86/widen_cast-3.ll
index 18a04c48a59..a4d37823dfc 100644
--- a/llvm/test/CodeGen/X86/widen_cast-3.ll
+++ b/llvm/test/CodeGen/X86/widen_cast-3.ll
@@ -11,8 +11,7 @@ define void @convert(<12 x i8>* %dst.addr, <3 x i32> %src) nounwind {
; X86-NEXT: pcmpeqd %xmm1, %xmm1
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: pextrd $2, %xmm0, 8(%eax)
-; X86-NEXT: pextrd $1, %xmm0, 4(%eax)
-; X86-NEXT: movd %xmm0, (%eax)
+; X86-NEXT: movq %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: convert:
diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll
index 23b68b26980..ea8f4ff0528 100644
--- a/llvm/test/CodeGen/X86/widen_load-2.ll
+++ b/llvm/test/CodeGen/X86/widen_load-2.ll
@@ -15,8 +15,7 @@ define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
; X86-NEXT: movdqa (%edx), %xmm0
; X86-NEXT: paddd (%ecx), %xmm0
; X86-NEXT: pextrd $2, %xmm0, 8(%eax)
-; X86-NEXT: pextrd $1, %xmm0, 4(%eax)
-; X86-NEXT: movd %xmm0, (%eax)
+; X86-NEXT: movq %xmm0, (%eax)
; X86-NEXT: retl $4
;
; X64-LABEL: add3i32:
@@ -40,16 +39,13 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: pinsrd $1, 4(%edx), %xmm0
+; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: pinsrd $2, 8(%edx), %xmm0
-; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: pinsrd $1, 4(%ecx), %xmm1
+; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: pinsrd $2, 8(%ecx), %xmm1
; X86-NEXT: paddd %xmm0, %xmm1
-; X86-NEXT: pextrd $1, %xmm1, 4(%eax)
+; X86-NEXT: movq %xmm1, (%eax)
; X86-NEXT: pextrd $2, %xmm1, 8(%eax)
-; X86-NEXT: movd %xmm1, (%eax)
; X86-NEXT: retl $4
;
; X64-LABEL: add3i32_2:
@@ -81,9 +77,8 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
; X86-NEXT: movdqa 16(%edx), %xmm1
; X86-NEXT: paddd (%ecx), %xmm0
; X86-NEXT: paddd 16(%ecx), %xmm1
-; X86-NEXT: movd %xmm1, 16(%eax)
-; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
+; X86-NEXT: movq %xmm1, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl $4
;
@@ -151,16 +146,12 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 16(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %edx
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-NEXT: pinsrd $2, 4(%edx), %xmm0
-; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X86-NEXT: pinsrd $2, 4(%ecx), %xmm1
+; X86-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X86-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; X86-NEXT: paddd %xmm0, %xmm1
; X86-NEXT: pextrw $4, %xmm1, 4(%eax)
; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
@@ -225,8 +216,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
; X86-NEXT: movdqa 16(%edx), %xmm1
; X86-NEXT: paddw (%ecx), %xmm0
; X86-NEXT: paddw 16(%ecx), %xmm1
-; X86-NEXT: movd %xmm1, 16(%eax)
-; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
+; X86-NEXT: movq %xmm1, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl $4
;
@@ -331,11 +321,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
; X86-NEXT: movdqa 16(%edx), %xmm1
; X86-NEXT: paddb (%ecx), %xmm0
; X86-NEXT: paddb 16(%ecx), %xmm1
-; X86-NEXT: movd %xmm1, 16(%eax)
-; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
; X86-NEXT: pextrw $6, %xmm1, 28(%eax)
; X86-NEXT: pextrb $14, %xmm1, 30(%eax)
+; X86-NEXT: movq %xmm1, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl $4
;
OpenPOWER on IntegriCloud