diff options
-rw-r--r-- | llvm/test/CodeGen/X86/gather-addresses.ll | 230 |
1 files changed, 156 insertions, 74 deletions
diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll index 6431847064f..8fd584d28e5 100644 --- a/llvm/test/CodeGen/X86/gather-addresses.ll +++ b/llvm/test/CodeGen/X86/gather-addresses.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN -; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN +; RUN: llc -mtriple=x86_64-linux -mattr=+sse2 < %s | FileCheck %s --check-prefixes=LIN,LIN-SSE2 +; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefixes=LIN,LIN-SSE4 +; RUN: llc -mtriple=x86_64-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefixes=WIN,WIN-SSE2 +; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefixes=WIN,WIN-SSE4 ; RUN: llc -mtriple=i686-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN32 ; rdar://7398554 @@ -9,37 +11,71 @@ ; element out of the index vector. define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { -; LIN-LABEL: foo: -; LIN: # %bb.0: -; LIN-NEXT: movdqa (%rsi), %xmm0 -; LIN-NEXT: pand (%rdx), %xmm0 -; LIN-NEXT: pextrq $1, %xmm0, %rax -; LIN-NEXT: movq %xmm0, %rcx -; LIN-NEXT: movslq %ecx, %rdx -; LIN-NEXT: sarq $32, %rcx -; LIN-NEXT: movslq %eax, %rsi -; LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; LIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; LIN-NEXT: sarq $32, %rax -; LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; LIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; LIN-NEXT: retq +; LIN-SSE2-LABEL: foo: +; LIN-SSE2: # %bb.0: +; LIN-SSE2-NEXT: movdqa (%rsi), %xmm0 +; LIN-SSE2-NEXT: pand (%rdx), %xmm0 +; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; LIN-SSE2-NEXT: movq %xmm1, %rax +; LIN-SSE2-NEXT: movq %xmm0, %rcx +; LIN-SSE2-NEXT: movslq %ecx, %rdx +; LIN-SSE2-NEXT: sarq $32, %rcx +; LIN-SSE2-NEXT: movslq %eax, %rsi +; LIN-SSE2-NEXT: sarq $32, %rax +; LIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; LIN-SSE2-NEXT: retq ; -; WIN-LABEL: foo: -; WIN: # %bb.0: -; WIN-NEXT: movdqa (%rdx), %xmm0 -; WIN-NEXT: pand (%r8), %xmm0 -; WIN-NEXT: pextrq $1, %xmm0, %rax -; WIN-NEXT: movq %xmm0, %rdx -; WIN-NEXT: movslq %edx, %r8 -; WIN-NEXT: sarq $32, %rdx -; WIN-NEXT: movslq %eax, %r9 -; WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; WIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; WIN-NEXT: sarq $32, %rax -; WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; WIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; WIN-NEXT: retq +; LIN-SSE4-LABEL: foo: +; LIN-SSE4: # %bb.0: +; LIN-SSE4-NEXT: movdqa (%rsi), %xmm0 +; LIN-SSE4-NEXT: pand (%rdx), %xmm0 +; LIN-SSE4-NEXT: pextrq $1, %xmm0, %rax +; LIN-SSE4-NEXT: movq %xmm0, %rcx +; LIN-SSE4-NEXT: movslq %ecx, %rdx +; LIN-SSE4-NEXT: sarq $32, %rcx +; LIN-SSE4-NEXT: movslq %eax, %rsi +; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN-SSE4-NEXT: sarq $32, %rax +; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; LIN-SSE4-NEXT: retq +; +; WIN-SSE2-LABEL: foo: +; WIN-SSE2: # %bb.0: +; WIN-SSE2-NEXT: movdqa (%rdx), %xmm0 +; WIN-SSE2-NEXT: pand (%r8), %xmm0 +; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; WIN-SSE2-NEXT: movq %xmm1, %rax +; WIN-SSE2-NEXT: movq %xmm0, %rdx +; WIN-SSE2-NEXT: movslq %edx, %r8 +; WIN-SSE2-NEXT: sarq $32, %rdx +; WIN-SSE2-NEXT: movslq %eax, %r9 +; WIN-SSE2-NEXT: sarq $32, %rax +; WIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; WIN-SSE2-NEXT: retq +; +; WIN-SSE4-LABEL: foo: +; WIN-SSE4: # %bb.0: +; WIN-SSE4-NEXT: movdqa (%rdx), %xmm0 +; WIN-SSE4-NEXT: pand (%r8), %xmm0 +; WIN-SSE4-NEXT: pextrq $1, %xmm0, %rax +; WIN-SSE4-NEXT: movq %xmm0, %rdx +; WIN-SSE4-NEXT: movslq %edx, %r8 +; WIN-SSE4-NEXT: sarq $32, %rdx +; WIN-SSE4-NEXT: movslq %eax, %r9 +; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; WIN-SSE4-NEXT: sarq $32, %rax +; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; WIN-SSE4-NEXT: retq ; ; LIN32-LABEL: foo: ; LIN32: # %bb.0: @@ -87,49 +123,95 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; cache works for x86-32. Note that in this case it will not be used for index ; calculation, since indexes are 32-bit, not 64. define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind { -; LIN-LABEL: old: -; LIN: # %bb.0: -; LIN-NEXT: movdqa (%rsi), %xmm0 -; LIN-NEXT: pand (%rdx), %xmm0 -; LIN-NEXT: pextrq $1, %xmm0, %rax -; LIN-NEXT: movq %rax, %rdx -; LIN-NEXT: shrq $32, %rdx -; LIN-NEXT: movq %xmm0, %rsi -; LIN-NEXT: movq %rsi, %rdi -; LIN-NEXT: shrq $32, %rdi -; LIN-NEXT: andl %ecx, %esi -; LIN-NEXT: andl %ecx, %eax -; LIN-NEXT: andq %rcx, %rdi -; LIN-NEXT: andq %rcx, %rdx -; LIN-NEXT: movq %rdi, %xmm1 -; LIN-NEXT: movq %rsi, %xmm0 -; LIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; LIN-NEXT: movq %rdx, %xmm2 -; LIN-NEXT: movq %rax, %xmm1 -; LIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; LIN-NEXT: retq +; LIN-SSE2-LABEL: old: +; LIN-SSE2: # %bb.0: +; LIN-SSE2-NEXT: movdqa (%rsi), %xmm0 +; LIN-SSE2-NEXT: pand (%rdx), %xmm0 +; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; LIN-SSE2-NEXT: movq %xmm1, %rax +; LIN-SSE2-NEXT: movq %rax, %rdx +; LIN-SSE2-NEXT: shrq $32, %rdx +; LIN-SSE2-NEXT: movq %xmm0, %rsi +; LIN-SSE2-NEXT: movq %rsi, %rdi +; LIN-SSE2-NEXT: shrq $32, %rdi +; LIN-SSE2-NEXT: andl %ecx, %esi +; LIN-SSE2-NEXT: andl %ecx, %eax +; LIN-SSE2-NEXT: andq %rcx, %rdi +; LIN-SSE2-NEXT: andq %rcx, %rdx +; LIN-SSE2-NEXT: movq %rdi, %xmm1 +; LIN-SSE2-NEXT: movq %rsi, %xmm0 +; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; LIN-SSE2-NEXT: movq %rdx, %xmm2 +; LIN-SSE2-NEXT: movq %rax, %xmm1 +; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; LIN-SSE2-NEXT: retq +; +; LIN-SSE4-LABEL: old: +; LIN-SSE4: # %bb.0: +; LIN-SSE4-NEXT: movdqa (%rsi), %xmm0 +; LIN-SSE4-NEXT: pand (%rdx), %xmm0 +; LIN-SSE4-NEXT: pextrq $1, %xmm0, %rax +; LIN-SSE4-NEXT: movq %rax, %rdx +; LIN-SSE4-NEXT: shrq $32, %rdx +; LIN-SSE4-NEXT: movq %xmm0, %rsi +; LIN-SSE4-NEXT: movq %rsi, %rdi +; LIN-SSE4-NEXT: shrq $32, %rdi +; LIN-SSE4-NEXT: andl %ecx, %esi +; LIN-SSE4-NEXT: andl %ecx, %eax +; LIN-SSE4-NEXT: andq %rcx, %rdi +; LIN-SSE4-NEXT: andq %rcx, %rdx +; LIN-SSE4-NEXT: movq %rdi, %xmm1 +; LIN-SSE4-NEXT: movq %rsi, %xmm0 +; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; LIN-SSE4-NEXT: movq %rdx, %xmm2 +; LIN-SSE4-NEXT: movq %rax, %xmm1 +; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; LIN-SSE4-NEXT: retq +; +; WIN-SSE2-LABEL: old: +; WIN-SSE2: # %bb.0: +; WIN-SSE2-NEXT: movdqa (%rdx), %xmm0 +; WIN-SSE2-NEXT: pand (%r8), %xmm0 +; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; WIN-SSE2-NEXT: movq %xmm1, %r8 +; WIN-SSE2-NEXT: movq %r8, %rcx +; WIN-SSE2-NEXT: shrq $32, %rcx +; WIN-SSE2-NEXT: movq %xmm0, %rax +; WIN-SSE2-NEXT: movq %rax, %rdx +; WIN-SSE2-NEXT: shrq $32, %rdx +; WIN-SSE2-NEXT: andl %r9d, %eax +; WIN-SSE2-NEXT: andl %r9d, %r8d +; WIN-SSE2-NEXT: andq %r9, %rdx +; WIN-SSE2-NEXT: andq %r9, %rcx +; WIN-SSE2-NEXT: movq %rdx, %xmm1 +; WIN-SSE2-NEXT: movq %rax, %xmm0 +; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; WIN-SSE2-NEXT: movq %rcx, %xmm2 +; WIN-SSE2-NEXT: movq %r8, %xmm1 +; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; WIN-SSE2-NEXT: retq ; -; WIN-LABEL: old: -; WIN: # %bb.0: -; WIN-NEXT: movdqa (%rdx), %xmm0 -; WIN-NEXT: pand (%r8), %xmm0 -; WIN-NEXT: pextrq $1, %xmm0, %r8 -; WIN-NEXT: movq %r8, %rcx -; WIN-NEXT: shrq $32, %rcx -; WIN-NEXT: movq %xmm0, %rax -; WIN-NEXT: movq %rax, %rdx -; WIN-NEXT: shrq $32, %rdx -; WIN-NEXT: andl %r9d, %eax -; WIN-NEXT: andl %r9d, %r8d -; WIN-NEXT: andq %r9, %rdx -; WIN-NEXT: andq %r9, %rcx -; WIN-NEXT: movq %rdx, %xmm1 -; WIN-NEXT: movq %rax, %xmm0 -; WIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; WIN-NEXT: movq %rcx, %xmm2 -; WIN-NEXT: movq %r8, %xmm1 -; WIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; WIN-NEXT: retq +; WIN-SSE4-LABEL: old: +; WIN-SSE4: # %bb.0: +; WIN-SSE4-NEXT: movdqa (%rdx), %xmm0 +; WIN-SSE4-NEXT: pand (%r8), %xmm0 +; WIN-SSE4-NEXT: pextrq $1, %xmm0, %r8 +; WIN-SSE4-NEXT: movq %r8, %rcx +; WIN-SSE4-NEXT: shrq $32, %rcx +; WIN-SSE4-NEXT: movq %xmm0, %rax +; WIN-SSE4-NEXT: movq %rax, %rdx +; WIN-SSE4-NEXT: shrq $32, %rdx +; WIN-SSE4-NEXT: andl %r9d, %eax +; WIN-SSE4-NEXT: andl %r9d, %r8d +; WIN-SSE4-NEXT: andq %r9, %rdx +; WIN-SSE4-NEXT: andq %r9, %rcx +; WIN-SSE4-NEXT: movq %rdx, %xmm1 +; WIN-SSE4-NEXT: movq %rax, %xmm0 +; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; WIN-SSE4-NEXT: movq %rcx, %xmm2 +; WIN-SSE4-NEXT: movq %r8, %xmm1 +; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; WIN-SSE4-NEXT: retq ; ; LIN32-LABEL: old: ; LIN32: # %bb.0: |