Make musttail more robust for vector types on x86

Previously I tried to plug musttail into the existing vararg lowering code. That turned out to be a mistake, because non-vararg calls use significantly different register lowering, even on x86. For example, AVX vectors are usually passed in registers to normal functions and memory to vararg functions. Now musttail uses a completely separate lowering. Hopefully this can be used as the basis for non-x86 perfect forwarding. Reviewers: majnemer Differential Revision: http://reviews.llvm.org/D6156 llvm-svn: 224745
author: Reid Kleckner <reid@kleckner.net> 2014-12-22 23:58:37 +0000
committer: Reid Kleckner <reid@kleckner.net> 2014-12-22 23:58:37 +0000
commit: ce0093344fa1f9a10831038bfd47703b699db5f4 (patch)
tree: 8a8a03dc74553dfc2302f41c6a839dde66def234 /llvm/test
parent: ea37c1173e0a58a18a95c37535f3ac0abacccc03 (diff)
download: bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.tar.gz
bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.zip
2 files changed, 116 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/musttail-fastcall.ll b/llvm/test/CodeGen/X86/musttail-fastcall.ll
new file mode 100644
index 00000000000..c7e5ffcfa87
--- /dev/null
+++ b/llvm/test/CodeGen/X86/musttail-fastcall.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+
+; While we don't support varargs with fastcall, we do support forwarding.
+
+@asdf = internal constant [4 x i8] c"asdf"
+
+declare void @puts(i8*)
+
+define i32 @call_fast_thunk() {
+  %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+  ret i32 %r
+}
+
+define x86_fastcallcc i32 @fast_thunk(...) {
+  call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
+  %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
+  ret i32 %r
+}
+
+; Check that we spill and fill around the call to puts.
+
+; CHECK-LABEL: @fast_thunk@0:
+; CHECK-DAG: movl %ecx, {{.*}}
+; CHECK-DAG: movl %edx, {{.*}}
+; CHECK: calll _puts
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: jmp @fast_target@12
+
+define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
+  %a0 = add i32 %a, %b
+  %a1 = add i32 %a0, %c
+  ret i32 %a1
+}
+
+; Repeat the test for vectorcall, which has XMM registers.
+
+define i32 @call_vector_thunk() {
+  %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+  ret i32 %r
+}
+
+define x86_vectorcallcc i32 @vector_thunk(...) {
+  call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
+  %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
+  ret i32 %r
+}
+
+; Check that we spill and fill SSE registers around the call to puts.
+
+; CHECK-LABEL: vector_thunk@@0:
+; CHECK-DAG: movl %ecx, {{.*}}
+; CHECK-DAG: movl %edx, {{.*}}
+
+; SSE2-DAG: movups %xmm0, {{.*}}
+; SSE2-DAG: movups %xmm1, {{.*}}
+; SSE2-DAG: movups %xmm2, {{.*}}
+; SSE2-DAG: movups %xmm3, {{.*}}
+; SSE2-DAG: movups %xmm4, {{.*}}
+; SSE2-DAG: movups %xmm5, {{.*}}
+
+; AVX-DAG: vmovups %ymm0, {{.*}}
+; AVX-DAG: vmovups %ymm1, {{.*}}
+; AVX-DAG: vmovups %ymm2, {{.*}}
+; AVX-DAG: vmovups %ymm3, {{.*}}
+; AVX-DAG: vmovups %ymm4, {{.*}}
+; AVX-DAG: vmovups %ymm5, {{.*}}
+
+; AVX512-DAG: vmovups %zmm0, {{.*}}
+; AVX512-DAG: vmovups %zmm1, {{.*}}
+; AVX512-DAG: vmovups %zmm2, {{.*}}
+; AVX512-DAG: vmovups %zmm3, {{.*}}
+; AVX512-DAG: vmovups %zmm4, {{.*}}
+; AVX512-DAG: vmovups %zmm5, {{.*}}
+
+; CHECK: calll _puts
+
+; SSE2-DAG: movups {{.*}}, %xmm0
+; SSE2-DAG: movups {{.*}}, %xmm1
+; SSE2-DAG: movups {{.*}}, %xmm2
+; SSE2-DAG: movups {{.*}}, %xmm3
+; SSE2-DAG: movups {{.*}}, %xmm4
+; SSE2-DAG: movups {{.*}}, %xmm5
+
+; AVX-DAG: vmovups {{.*}}, %ymm0
+; AVX-DAG: vmovups {{.*}}, %ymm1
+; AVX-DAG: vmovups {{.*}}, %ymm2
+; AVX-DAG: vmovups {{.*}}, %ymm3
+; AVX-DAG: vmovups {{.*}}, %ymm4
+; AVX-DAG: vmovups {{.*}}, %ymm5
+
+; AVX512-DAG: vmovups {{.*}}, %zmm0
+; AVX512-DAG: vmovups {{.*}}, %zmm1
+; AVX512-DAG: vmovups {{.*}}, %zmm2
+; AVX512-DAG: vmovups {{.*}}, %zmm3
+; AVX512-DAG: vmovups {{.*}}, %zmm4
+; AVX512-DAG: vmovups {{.*}}, %zmm5
+
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: jmp vector_target@@12
+
+define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {
+  %a0 = add i32 %a, %b
+  %a1 = add i32 %a0, %c
+  ret i32 %a1
+}
diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll
index 1e99c141c03..b6ca184e71f 100644
--- a/llvm/test/CodeGen/X86/musttail-varargs.ll
+++ b/llvm/test/CodeGen/X86/musttail-varargs.ll
@@ -5,9 +5,16 @@
 ; pack. Doing a normal call will clobber all argument registers, and we will
 ; spill around it. A simple adjustment should not require any XMM spills.
 
+declare void @llvm.va_start(i8*) nounwind
+
 declare void(i8*, ...)* @get_f(i8* %this)
 
 define void @f_thunk(i8* %this, ...) {
+  ; Use va_start so that we exercise the combination.
+  %ap = alloca [4 x i8*], align 16
+  %ap_i8 = bitcast [4 x i8*]* %ap to i8*
+  call void @llvm.va_start(i8* %ap_i8)
+
   %fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this)
   musttail call void (i8*, ...)* %fptr(i8* %this, ...)
   ret void
author	Reid Kleckner <reid@kleckner.net>	2014-12-22 23:58:37 +0000
committer	Reid Kleckner <reid@kleckner.net>	2014-12-22 23:58:37 +0000
commit	ce0093344fa1f9a10831038bfd47703b699db5f4 (patch)
tree	8a8a03dc74553dfc2302f41c6a839dde66def234 /llvm/test
parent	ea37c1173e0a58a18a95c37535f3ac0abacccc03 (diff)
download	bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.tar.gz bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.zip