summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/widen_load-2.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/widen_load-2.ll')
-rw-r--r--llvm/test/CodeGen/X86/widen_load-2.ll91
1 files changed, 48 insertions, 43 deletions
diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll
index a816dd0d9b9..e4ee28a8f88 100644
--- a/llvm/test/CodeGen/X86/widen_load-2.ll
+++ b/llvm/test/CodeGen/X86/widen_load-2.ll
@@ -15,8 +15,7 @@ define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
; X86-NEXT: movdqa (%edx), %xmm0
; X86-NEXT: paddd (%ecx), %xmm0
; X86-NEXT: pextrd $2, %xmm0, 8(%eax)
-; X86-NEXT: pextrd $1, %xmm0, 4(%eax)
-; X86-NEXT: movd %xmm0, (%eax)
+; X86-NEXT: movq %xmm0, (%eax)
; X86-NEXT: retl $4
;
; X64-LABEL: add3i32:
@@ -40,16 +39,13 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: pinsrd $1, 4(%edx), %xmm0
+; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: pinsrd $2, 8(%edx), %xmm0
-; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: pinsrd $1, 4(%ecx), %xmm1
+; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: pinsrd $2, 8(%ecx), %xmm1
; X86-NEXT: paddd %xmm0, %xmm1
-; X86-NEXT: pextrd $1, %xmm1, 4(%eax)
+; X86-NEXT: movq %xmm1, (%eax)
; X86-NEXT: pextrd $2, %xmm1, 8(%eax)
-; X86-NEXT: movd %xmm1, (%eax)
; X86-NEXT: retl $4
;
; X64-LABEL: add3i32_2:
@@ -81,9 +77,8 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
; X86-NEXT: movdqa 16(%edx), %xmm1
; X86-NEXT: paddd (%ecx), %xmm0
; X86-NEXT: paddd 16(%ecx), %xmm1
-; X86-NEXT: movd %xmm1, 16(%eax)
-; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
+; X86-NEXT: movq %xmm1, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl $4
;
@@ -148,25 +143,31 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
; X86-LABEL: add3i16:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: pinsrw $2, 4(%edx), %xmm0
-; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: pinsrw $2, 4(%ecx), %xmm1
-; X86-NEXT: paddw %xmm0, %xmm1
-; X86-NEXT: pextrw $2, %xmm1, 4(%eax)
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 16(%ebp), %ecx
+; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X86-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X86-NEXT: paddd %xmm0, %xmm1
+; X86-NEXT: pextrw $4, %xmm1, 4(%eax)
+; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; X86-NEXT: movd %xmm1, (%eax)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: add3i16:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: paddw %xmm0, %xmm1
-; X64-NEXT: pextrw $2, %xmm1, 4(%rdi)
+; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-NEXT: paddd %xmm0, %xmm1
+; X64-NEXT: pextrw $4, %xmm1, 4(%rdi)
+; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; X64-NEXT: movd %xmm1, (%rdi)
; X64-NEXT: retq
%a = load %i16vec3, %i16vec3* %ap, align 16
@@ -215,8 +216,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
; X86-NEXT: movdqa 16(%edx), %xmm1
; X86-NEXT: paddw (%ecx), %xmm0
; X86-NEXT: paddw 16(%ecx), %xmm1
-; X86-NEXT: movd %xmm1, 16(%eax)
-; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
+; X86-NEXT: movq %xmm1, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl $4
;
@@ -280,23 +280,27 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
; X86-LABEL: add3i8:
; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: paddb %xmm0, %xmm1
-; X86-NEXT: pextrb $2, %xmm1, 2(%eax)
+; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-NEXT: paddd %xmm0, %xmm1
+; X86-NEXT: pextrb $8, %xmm1, 2(%eax)
+; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; X86-NEXT: pextrw $0, %xmm1, (%eax)
+; X86-NEXT: addl $12, %esp
; X86-NEXT: retl $4
;
; X64-LABEL: add3i8:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT: paddb %xmm0, %xmm1
-; X64-NEXT: pextrb $2, %xmm1, 2(%rdi)
+; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X64-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X64-NEXT: paddd %xmm0, %xmm1
+; X64-NEXT: pextrb $8, %xmm1, 2(%rdi)
+; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; X64-NEXT: pextrw $0, %xmm1, (%rdi)
; X64-NEXT: retq
%a = load %i8vec3, %i8vec3* %ap, align 16
@@ -317,11 +321,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
; X86-NEXT: movdqa 16(%edx), %xmm1
; X86-NEXT: paddb (%ecx), %xmm0
; X86-NEXT: paddb 16(%ecx), %xmm1
-; X86-NEXT: movd %xmm1, 16(%eax)
-; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
; X86-NEXT: pextrw $6, %xmm1, 28(%eax)
; X86-NEXT: pextrb $14, %xmm1, 30(%eax)
+; X86-NEXT: movq %xmm1, 16(%eax)
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl $4
;
@@ -350,6 +353,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pack* %rot) nounwind {
; X86-LABEL: rot:
; X86: # %bb.0: # %entry
+; X86-NEXT: subl $16, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -357,11 +361,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; X86-NEXT: movw $-24930, (%edx) # imm = 0x9E9E
; X86-NEXT: movb $1, 2(%ecx)
; X86-NEXT: movw $257, (%ecx) # imm = 0x101
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: psrlw $1, %xmm0
-; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
-; X86-NEXT: pextrb $2, %xmm0, 2(%eax)
+; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-NEXT: psrld $1, %xmm0
+; X86-NEXT: pextrb $8, %xmm0, 2(%eax)
+; X86-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; X86-NEXT: pextrw $0, %xmm0, (%eax)
+; X86-NEXT: addl $16, %esp
; X86-NEXT: retl $4
;
; X64-LABEL: rot:
@@ -371,10 +376,10 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E
; X64-NEXT: movb $1, 2(%rdx)
; X64-NEXT: movw $257, (%rdx) # imm = 0x101
-; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: psrlw $1, %xmm0
-; X64-NEXT: pand {{.*}}(%rip), %xmm0
-; X64-NEXT: pextrb $2, %xmm0, 2(%rdi)
+; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X64-NEXT: psrld $1, %xmm0
+; X64-NEXT: pextrb $8, %xmm0, 2(%rdi)
+; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; X64-NEXT: pextrw $0, %xmm0, (%rdi)
; X64-NEXT: retq
entry:
OpenPOWER on IntegriCloud