diff options
author | Reid Kleckner <reid@kleckner.net> | 2014-12-22 23:58:37 +0000 |
---|---|---|
committer | Reid Kleckner <reid@kleckner.net> | 2014-12-22 23:58:37 +0000 |
commit | ce0093344fa1f9a10831038bfd47703b699db5f4 (patch) | |
tree | 8a8a03dc74553dfc2302f41c6a839dde66def234 /llvm/test/CodeGen | |
parent | ea37c1173e0a58a18a95c37535f3ac0abacccc03 (diff) | |
download | bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.tar.gz bcm5719-llvm-ce0093344fa1f9a10831038bfd47703b699db5f4.zip |
Make musttail more robust for vector types on x86
Previously I tried to plug musttail into the existing vararg lowering
code. That turned out to be a mistake, because non-vararg calls use
significantly different register lowering, even on x86. For example, AVX
vectors are usually passed in registers to normal functions and memory
to vararg functions. Now musttail uses a completely separate lowering.
Hopefully this can be used as the basis for non-x86 perfect forwarding.
Reviewers: majnemer
Differential Revision: http://reviews.llvm.org/D6156
llvm-svn: 224745
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/musttail-fastcall.ll | 109 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/musttail-varargs.ll | 7 |
2 files changed, 116 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/musttail-fastcall.ll b/llvm/test/CodeGen/X86/musttail-fastcall.ll new file mode 100644 index 00000000000..c7e5ffcfa87 --- /dev/null +++ b/llvm/test/CodeGen/X86/musttail-fastcall.ll @@ -0,0 +1,109 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2 +; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 + +; While we don't support varargs with fastcall, we do support forwarding. + +@asdf = internal constant [4 x i8] c"asdf" + +declare void @puts(i8*) + +define i32 @call_fast_thunk() { + %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3) + ret i32 %r +} + +define x86_fastcallcc i32 @fast_thunk(...) { + call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0)) + %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...) + ret i32 %r +} + +; Check that we spill and fill around the call to puts. + +; CHECK-LABEL: @fast_thunk@0: +; CHECK-DAG: movl %ecx, {{.*}} +; CHECK-DAG: movl %edx, {{.*}} +; CHECK: calll _puts +; CHECK-DAG: movl {{.*}}, %ecx +; CHECK-DAG: movl {{.*}}, %edx +; CHECK: jmp @fast_target@12 + +define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) { + %a0 = add i32 %a, %b + %a1 = add i32 %a0, %c + ret i32 %a1 +} + +; Repeat the test for vectorcall, which has XMM registers. + +define i32 @call_vector_thunk() { + %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3) + ret i32 %r +} + +define x86_vectorcallcc i32 @vector_thunk(...) { + call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0)) + %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...) + ret i32 %r +} + +; Check that we spill and fill SSE registers around the call to puts. + +; CHECK-LABEL: vector_thunk@@0: +; CHECK-DAG: movl %ecx, {{.*}} +; CHECK-DAG: movl %edx, {{.*}} + +; SSE2-DAG: movups %xmm0, {{.*}} +; SSE2-DAG: movups %xmm1, {{.*}} +; SSE2-DAG: movups %xmm2, {{.*}} +; SSE2-DAG: movups %xmm3, {{.*}} +; SSE2-DAG: movups %xmm4, {{.*}} +; SSE2-DAG: movups %xmm5, {{.*}} + +; AVX-DAG: vmovups %ymm0, {{.*}} +; AVX-DAG: vmovups %ymm1, {{.*}} +; AVX-DAG: vmovups %ymm2, {{.*}} +; AVX-DAG: vmovups %ymm3, {{.*}} +; AVX-DAG: vmovups %ymm4, {{.*}} +; AVX-DAG: vmovups %ymm5, {{.*}} + +; AVX512-DAG: vmovups %zmm0, {{.*}} +; AVX512-DAG: vmovups %zmm1, {{.*}} +; AVX512-DAG: vmovups %zmm2, {{.*}} +; AVX512-DAG: vmovups %zmm3, {{.*}} +; AVX512-DAG: vmovups %zmm4, {{.*}} +; AVX512-DAG: vmovups %zmm5, {{.*}} + +; CHECK: calll _puts + +; SSE2-DAG: movups {{.*}}, %xmm0 +; SSE2-DAG: movups {{.*}}, %xmm1 +; SSE2-DAG: movups {{.*}}, %xmm2 +; SSE2-DAG: movups {{.*}}, %xmm3 +; SSE2-DAG: movups {{.*}}, %xmm4 +; SSE2-DAG: movups {{.*}}, %xmm5 + +; AVX-DAG: vmovups {{.*}}, %ymm0 +; AVX-DAG: vmovups {{.*}}, %ymm1 +; AVX-DAG: vmovups {{.*}}, %ymm2 +; AVX-DAG: vmovups {{.*}}, %ymm3 +; AVX-DAG: vmovups {{.*}}, %ymm4 +; AVX-DAG: vmovups {{.*}}, %ymm5 + +; AVX512-DAG: vmovups {{.*}}, %zmm0 +; AVX512-DAG: vmovups {{.*}}, %zmm1 +; AVX512-DAG: vmovups {{.*}}, %zmm2 +; AVX512-DAG: vmovups {{.*}}, %zmm3 +; AVX512-DAG: vmovups {{.*}}, %zmm4 +; AVX512-DAG: vmovups {{.*}}, %zmm5 + +; CHECK-DAG: movl {{.*}}, %ecx +; CHECK-DAG: movl {{.*}}, %edx +; CHECK: jmp vector_target@@12 + +define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) { + %a0 = add i32 %a, %b + %a1 = add i32 %a0, %c + ret i32 %a1 +} diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll index 1e99c141c03..b6ca184e71f 100644 --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -5,9 +5,16 @@ ; pack. Doing a normal call will clobber all argument registers, and we will ; spill around it. A simple adjustment should not require any XMM spills. +declare void @llvm.va_start(i8*) nounwind + declare void(i8*, ...)* @get_f(i8* %this) define void @f_thunk(i8* %this, ...) { + ; Use va_start so that we exercise the combination. + %ap = alloca [4 x i8*], align 16 + %ap_i8 = bitcast [4 x i8*]* %ap to i8* + call void @llvm.va_start(i8* %ap_i8) + %fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this) musttail call void (i8*, ...)* %fptr(i8* %this, ...) ret void |