diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/musttail-indirect.ll | 124 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/musttail-thiscall.ll | 31 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/musttail.ll | 75 |
3 files changed, 226 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/X86/musttail-indirect.ll b/llvm/test/CodeGen/X86/musttail-indirect.ll new file mode 100644 index 00000000000..9d21b5ea5d5 --- /dev/null +++ b/llvm/test/CodeGen/X86/musttail-indirect.ll @@ -0,0 +1,124 @@ +; RUN: llc < %s -mtriple=i686-win32 | FileCheck %s +; RUN: llc < %s -mtriple=i686-win32 -O0 | FileCheck %s + +; IR simplified from the following C++ snippet compiled for i686-windows-msvc: + +; struct A { A(); ~A(); int a; }; +; +; struct B { +; virtual int f(int); +; virtual int g(A, int, A); +; virtual void h(A, int, A); +; virtual A i(A, int, A); +; virtual A j(int); +; }; +; +; int (B::*mp_f)(int) = &B::f; +; int (B::*mp_g)(A, int, A) = &B::g; +; void (B::*mp_h)(A, int, A) = &B::h; +; A (B::*mp_i)(A, int, A) = &B::i; +; A (B::*mp_j)(int) = &B::j; + +; Each member pointer creates a thunk. The ones with inalloca are required to +; tail calls by the ABI, even at O0. + +%struct.B = type { i32 (...)** } +%struct.A = type { i32 } + +; CHECK-LABEL: f_thunk: +; CHECK: jmpl +; CHECK-NOT: ret +define x86_thiscallcc i32 @f_thunk(%struct.B* %this, i32) { +entry: + %1 = bitcast %struct.B* %this to i32 (%struct.B*, i32)*** + %vtable = load i32 (%struct.B*, i32)*** %1 + %2 = load i32 (%struct.B*, i32)** %vtable + %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, i32 %0) + ret i32 %3 +} + +; Inalloca thunks shouldn't require any stores to the stack. +; CHECK-LABEL: g_thunk: +; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}} +; CHECK: jmpl +; CHECK-NOT: ret +define x86_thiscallcc i32 @g_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) { +entry: + %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** + %vtable = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1 + %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 1 + %2 = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn + %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0) + ret i32 %3 +} + +; CHECK-LABEL: h_thunk: +; CHECK: jmpl +; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}} +; CHECK-NOT: ret +define x86_thiscallcc void @h_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) { +entry: + %1 = bitcast %struct.B* %this to void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** + %vtable = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1 + %vfn = getelementptr inbounds void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 2 + %2 = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn + musttail call x86_thiscallcc void %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0) + ret void +} + +; CHECK-LABEL: i_thunk: +; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}} +; CHECK: jmpl +; CHECK-NOT: ret +define x86_thiscallcc %struct.A* @i_thunk(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca) { +entry: + %1 = bitcast %struct.B* %this to %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** + %vtable = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** %1 + %vfn = getelementptr inbounds %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vtable, i32 3 + %2 = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vfn + %3 = musttail call x86_thiscallcc %struct.A* %2(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca %0) + ret %struct.A* %3 +} + +; CHECK-LABEL: j_thunk: +; CHECK: jmpl +; CHECK-NOT: ret +define x86_thiscallcc void @j_thunk(%struct.A* noalias sret %agg.result, %struct.B* %this, i32) { +entry: + %1 = bitcast %struct.B* %this to void (%struct.A*, %struct.B*, i32)*** + %vtable = load void (%struct.A*, %struct.B*, i32)*** %1 + %vfn = getelementptr inbounds void (%struct.A*, %struct.B*, i32)** %vtable, i32 4 + %2 = load void (%struct.A*, %struct.B*, i32)** %vfn + musttail call x86_thiscallcc void %2(%struct.A* sret %agg.result, %struct.B* %this, i32 %0) + ret void +} + +; CHECK-LABEL: _stdcall_thunk@8: +; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}} +; CHECK: jmpl +; CHECK-NOT: ret +define x86_stdcallcc i32 @stdcall_thunk(<{ %struct.B*, %struct.A }>* inalloca) { +entry: + %this_ptr = getelementptr inbounds <{ %struct.B*, %struct.A }>* %0, i32 0, i32 0 + %this = load %struct.B** %this_ptr + %1 = bitcast %struct.B* %this to i32 (<{ %struct.B*, %struct.A }>*)*** + %vtable = load i32 (<{ %struct.B*, %struct.A }>*)*** %1 + %vfn = getelementptr inbounds i32 (<{ %struct.B*, %struct.A }>*)** %vtable, i32 1 + %2 = load i32 (<{ %struct.B*, %struct.A }>*)** %vfn + %3 = musttail call x86_stdcallcc i32 %2(<{ %struct.B*, %struct.A }>* inalloca %0) + ret i32 %3 +} + +; CHECK-LABEL: @fastcall_thunk@8: +; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}} +; CHECK: jmpl +; CHECK-NOT: ret +define x86_fastcallcc i32 @fastcall_thunk(%struct.B* inreg %this, <{ %struct.A }>* inalloca) { +entry: + %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A }>*)*** + %vtable = load i32 (%struct.B*, <{ %struct.A }>*)*** %1 + %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A }>*)** %vtable, i32 1 + %2 = load i32 (%struct.B*, <{ %struct.A }>*)** %vfn + %3 = musttail call x86_fastcallcc i32 %2(%struct.B* inreg %this, <{ %struct.A }>* inalloca %0) + ret i32 %3 +} diff --git a/llvm/test/CodeGen/X86/musttail-thiscall.ll b/llvm/test/CodeGen/X86/musttail-thiscall.ll new file mode 100644 index 00000000000..8ea12482e50 --- /dev/null +++ b/llvm/test/CodeGen/X86/musttail-thiscall.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=x86 < %s | FileCheck %s +; RUN: llc -march=x86 -O0 < %s | FileCheck %s + +; CHECK-LABEL: t1: +; CHECK: jmp {{_?}}t1_callee +define x86_thiscallcc void @t1(i8* %this) { + %adj = getelementptr i8* %this, i32 4 + musttail call x86_thiscallcc void @t1_callee(i8* %adj) + ret void +} +declare x86_thiscallcc void @t1_callee(i8* %this) + +; CHECK-LABEL: t2: +; CHECK: jmp {{_?}}t2_callee +define x86_thiscallcc i32 @t2(i8* %this, i32 %a) { + %adj = getelementptr i8* %this, i32 4 + %rv = musttail call x86_thiscallcc i32 @t2_callee(i8* %adj, i32 %a) + ret i32 %rv +} +declare x86_thiscallcc i32 @t2_callee(i8* %this, i32 %a) + +; CHECK-LABEL: t3: +; CHECK: jmp {{_?}}t3_callee +define x86_thiscallcc i8* @t3(i8* %this, <{ i8*, i32 }>* inalloca %args) { + %adj = getelementptr i8* %this, i32 4 + %a_ptr = getelementptr <{ i8*, i32 }>* %args, i32 0, i32 1 + store i32 0, i32* %a_ptr + %rv = musttail call x86_thiscallcc i8* @t3_callee(i8* %adj, <{ i8*, i32 }>* inalloca %args) + ret i8* %rv +} +declare x86_thiscallcc i8* @t3_callee(i8* %this, <{ i8*, i32 }>* inalloca %args); diff --git a/llvm/test/CodeGen/X86/musttail.ll b/llvm/test/CodeGen/X86/musttail.ll index 75b217f9947..ca5d3119cf1 100644 --- a/llvm/test/CodeGen/X86/musttail.ll +++ b/llvm/test/CodeGen/X86/musttail.ll @@ -1,8 +1,6 @@ ; RUN: llc -march=x86 < %s | FileCheck %s - -; FIXME: Eliminate this tail call at -O0, since musttail is a correctness -; requirement. -; RUN: not llc -march=x86 -O0 < %s +; RUN: llc -march=x86 -O0 < %s | FileCheck %s +; RUN: llc -march=x86 -disable-tail-calls < %s | FileCheck %s declare void @t1_callee(i8*) define void @t1(i32* %a) { @@ -21,3 +19,72 @@ define i32* @t2() { %w = bitcast i8* %v to i32* ret i32* %w } + +; Complex frame layout: stack realignment with dynamic alloca. +define void @t3(i32 %n) alignstack(32) nounwind { +entry: +; CHECK: t3: +; CHECK: pushl %ebp +; CHECK: pushl %esi +; CHECK: andl $-32, %esp +; CHECK: movl %esp, %esi +; CHECK: popl %esi +; CHECK: popl %ebp +; CHECK-NEXT: jmp {{_?}}t3_callee + %a = alloca i8, i32 %n + call void @capture(i8* %a) + musttail call void @t3_callee(i32 %n) nounwind + ret void +} + +declare void @capture(i8*) +declare void @t3_callee(i32) + +; Test that we actually copy in and out stack arguments that aren't forwarded +; without modification. +define i32 @t4({}* %fn, i32 %n, i32 %r) { +; CHECK-LABEL: t4: +; CHECK: incl %[[r:.*]] +; CHECK: decl %[[n:.*]] +; CHECK: movl %[[r]], {{[0-9]+}}(%esp) +; CHECK: movl %[[n]], {{[0-9]+}}(%esp) +; CHECK: jmpl *%{{.*}} + +entry: + %r1 = add i32 %r, 1 + %n1 = sub i32 %n, 1 + %fn_cast = bitcast {}* %fn to i32 ({}*, i32, i32)* + %r2 = musttail call i32 %fn_cast({}* %fn, i32 %n1, i32 %r1) + ret i32 %r2 +} + +; Combine the complex stack frame with the parameter modification. +define i32 @t5({}* %fn, i32 %n, i32 %r) alignstack(32) { +; CHECK-LABEL: t5: +; CHECK: pushl %ebp +; CHECK: movl %esp, %ebp +; CHECK: pushl %esi +; Align the stack. +; CHECK: andl $-32, %esp +; CHECK: movl %esp, %esi +; Modify the args. +; CHECK: incl %[[r:.*]] +; CHECK: decl %[[n:.*]] +; Store them through ebp, since that's the only stable arg pointer. +; CHECK: movl %[[r]], {{[0-9]+}}(%ebp) +; CHECK: movl %[[n]], {{[0-9]+}}(%ebp) +; Epilogue. +; CHECK: leal {{[-0-9]+}}(%ebp), %esp +; CHECK: popl %esi +; CHECK: popl %ebp +; CHECK: jmpl *%{{.*}} + +entry: + %a = alloca i8, i32 %n + call void @capture(i8* %a) + %r1 = add i32 %r, 1 + %n1 = sub i32 %n, 1 + %fn_cast = bitcast {}* %fn to i32 ({}*, i32, i32)* + %r2 = musttail call i32 %fn_cast({}* %fn, i32 %n1, i32 %r1) + ret i32 %r2 +} |

