summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/docs/BitCodeFormat.rst1
-rw-r--r--llvm/docs/CodeGenerator.rst4
-rw-r--r--llvm/docs/LangRef.rst17
-rw-r--r--llvm/include/llvm/IR/CallingConv.h5
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp2
-rw-r--r--llvm/lib/AsmParser/LLToken.h1
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp3
-rw-r--r--llvm/lib/IR/AsmWriter.cpp1
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.td2
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp10
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp19
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h1
-rw-r--r--llvm/test/CodeGen/X86/musttail-tailcc.ll114
-rw-r--r--llvm/test/CodeGen/X86/tailcall-tailcc.ll155
-rw-r--r--llvm/test/CodeGen/X86/tailcc-calleesave.ll19
-rw-r--r--llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll40
-rw-r--r--llvm/test/CodeGen/X86/tailcc-fastcc.ll49
-rw-r--r--llvm/test/CodeGen/X86/tailcc-fastisel.ll18
-rw-r--r--llvm/test/CodeGen/X86/tailcc-largecode.ll71
-rw-r--r--llvm/test/CodeGen/X86/tailcc-stackalign.ll23
-rw-r--r--llvm/test/CodeGen/X86/tailcc-structret.ll7
-rw-r--r--llvm/test/CodeGen/X86/tailccbyval.ll21
-rw-r--r--llvm/test/CodeGen/X86/tailccbyval64.ll42
-rw-r--r--llvm/test/CodeGen/X86/tailccfp.ll6
-rw-r--r--llvm/test/CodeGen/X86/tailccfp2.ll27
-rw-r--r--llvm/test/CodeGen/X86/tailccpic1.ll16
-rw-r--r--llvm/test/CodeGen/X86/tailccpic2.ll15
-rw-r--r--llvm/test/CodeGen/X86/tailccstack64.ll28
-rw-r--r--llvm/utils/vim/syntax/llvm.vim1
31 files changed, 703 insertions, 19 deletions
diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst
index 4e653ae55d5..dce84620fd7 100644
--- a/llvm/docs/BitCodeFormat.rst
+++ b/llvm/docs/BitCodeFormat.rst
@@ -794,6 +794,7 @@ function. The operand fields are:
* ``preserve_allcc``: code 15
* ``swiftcc`` : code 16
* ``cxx_fast_tlscc``: code 17
+ * ``tailcc`` : code 18
* ``x86_stdcallcc``: code 64
* ``x86_fastcallcc``: code 65
* ``arm_apcscc``: code 66
diff --git a/llvm/docs/CodeGenerator.rst b/llvm/docs/CodeGenerator.rst
index 343b9879972..75330a5df3b 100644
--- a/llvm/docs/CodeGenerator.rst
+++ b/llvm/docs/CodeGenerator.rst
@@ -2068,12 +2068,12 @@ supported on x86/x86-64, PowerPC, and WebAssembly. It is performed on x86/x86-64
and PowerPC if:
* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC
- calling convention) or ``cc 11`` (HiPE calling convention).
+ calling convention), ``cc 11`` (HiPE calling convention), or ``tailcc``.
* The call is a tail call - in tail position (ret immediately follows call and
ret uses value of call or is void).
-* Option ``-tailcallopt`` is enabled.
+* Option ``-tailcallopt`` is enabled or the calling convention is ``tailcc``.
* Platform-specific constraints are met.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index d9a38907c92..e797b1f9a15 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -299,7 +299,7 @@ added in the future:
allows the target to use whatever tricks it wants to produce fast
code for the target, without having to conform to an externally
specified ABI (Application Binary Interface). `Tail calls can only
- be optimized when this, the GHC or the HiPE convention is
+ be optimized when this, the tailcc, the GHC or the HiPE convention is
used. <CodeGenerator.html#id80>`_ This calling convention does not
support varargs and requires the prototype of all callees to exactly
match the prototype of the function definition.
@@ -436,6 +436,14 @@ added in the future:
- On X86-64 RCX and R8 are available for additional integer returns, and
XMM2 and XMM3 are available for additional FP/vector returns.
- On iOS platforms, we use AAPCS-VFP calling convention.
+"``tailcc``" - Tail callable calling convention
+ This calling convention ensures that calls in tail position will always be
+ tail call optimized. This calling convention is equivalent to fastcc,
+ except for an additional guarantee that tail calls will be produced
+ whenever possible. `Tail calls can only be optimized when this, the fastcc,
+ the GHC or the HiPE convention is used. <CodeGenerator.html#id80>`_ This
+ calling convention does not support varargs and requires the prototype of
+ all callees to exactly match the prototype of the function definition.
"``cc <n>``" - Numbered convention
Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific
@@ -10232,11 +10240,12 @@ This instruction requires several arguments:
Tail call optimization for calls marked ``tail`` is guaranteed to occur if
the following conditions are met:
- - Caller and callee both have the calling convention ``fastcc``.
+ - Caller and callee both have the calling convention ``fastcc`` or ``tailcc``.
- The call is in tail position (ret immediately follows call and ret
uses value of call or is void).
- - Option ``-tailcallopt`` is enabled, or
- ``llvm::GuaranteedTailCallOpt`` is ``true``.
+ - Option ``-tailcallopt`` is enabled,
+ ``llvm::GuaranteedTailCallOpt`` is ``true``, or the calling convention
+ is ``tailcc``
- `Platform-specific constraints are
met. <CodeGenerator.html#tailcallopt>`_
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index 6f4989268fa..c1c979c2e2a 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -75,6 +75,11 @@ namespace CallingConv {
// CXX_FAST_TLS - Calling convention for access functions.
CXX_FAST_TLS = 17,
+ /// Tail - This calling convention attemps to make calls as fast as
+ /// possible while guaranteeing that tail call optimization can always
+ /// be performed.
+ Tail = 18,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 72d2357c293..5292b0e6274 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -622,6 +622,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(amdgpu_ps);
KEYWORD(amdgpu_cs);
KEYWORD(amdgpu_kernel);
+ KEYWORD(tailcc);
KEYWORD(cc);
KEYWORD(c);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 5ea0b7d39c1..9bb3ca145c2 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1955,6 +1955,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'amdgpu_ps'
/// ::= 'amdgpu_cs'
/// ::= 'amdgpu_kernel'
+/// ::= 'tailcc'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
@@ -2000,6 +2001,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break;
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
+ case lltok::kw_tailcc: CC = CallingConv::Tail; break;
case lltok::kw_cc: {
Lex.Lex();
return ParseUInt32(CC);
diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h
index 0e9ba4db474..f49feb2dc14 100644
--- a/llvm/lib/AsmParser/LLToken.h
+++ b/llvm/lib/AsmParser/LLToken.h
@@ -168,6 +168,7 @@ enum Kind {
kw_amdgpu_ps,
kw_amdgpu_cs,
kw_amdgpu_kernel,
+ kw_tailcc,
// Attributes:
kw_attributes,
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index 3ef90d32daf..6c059665fca 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -523,7 +523,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
+ ((!TM.Options.GuaranteedTailCallOpt &&
+ CS.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 3f140ba01d8..91f22dbb17a 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -352,6 +352,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::PreserveAll: Out << "preserve_allcc"; break;
case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
case CallingConv::GHC: Out << "ghccc"; break;
+ case CallingConv::Tail: Out << "tailcc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 1c3034a5116..4c49d68bec9 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -433,6 +433,7 @@ defm X86_SysV64_RegCall :
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ CCIfCC<"CallingConv::Tail", CCDelegateTo<RetCC_X86_32_Fast>>,
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
@@ -1000,6 +1001,7 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+ CCIfCC<"CallingConv::Tail", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 97abd084bf5..e5e089d07d5 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1160,6 +1160,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
+ CC != CallingConv::Tail &&
CC != CallingConv::X86_FastCall &&
CC != CallingConv::X86_StdCall &&
CC != CallingConv::X86_ThisCall &&
@@ -1173,7 +1174,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
+ CC == CallingConv::Tail)
return false;
// Let SDISel handle vararg functions.
@@ -3157,7 +3159,7 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
if (Subtarget->getTargetTriple().isOSMSVCRT())
return 0;
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
- CC == CallingConv::HiPE)
+ CC == CallingConv::HiPE || CC == CallingConv::Tail)
return 0;
if (CS)
@@ -3208,6 +3210,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
default: return false;
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::Tail:
case CallingConv::WebKit_JS:
case CallingConv::Swift:
case CallingConv::X86_FastCall:
@@ -3224,7 +3227,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
+ CC == CallingConv::Tail)
return false;
// Don't know how to handle Win64 varargs yet. Nothing special needed for
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index af3a33ffd4e..fabc3e581ff 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -2269,7 +2269,8 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr
bool IsNested = HasNestArgument(&MF);
if (CallingConvention == CallingConv::X86_FastCall ||
- CallingConvention == CallingConv::Fast) {
+ CallingConvention == CallingConv::Fast ||
+ CallingConvention == CallingConv::Tail) {
if (IsNested)
report_fatal_error("Segmented stacks does not support fastcall with "
"nested function.");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3806b0e2330..052300d6f72 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2963,7 +2963,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
- CC == CallingConv::HHVM);
+ CC == CallingConv::HHVM || CC == CallingConv::Tail);
}
/// Return true if we might ever do TCO for calls with this calling convention.
@@ -2989,7 +2989,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
/// Return true if the function is being made into a tailcall target by
/// changing its ABI.
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
- return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
+ return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
}
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
@@ -3615,6 +3615,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
bool IsSibcall = false;
+ bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
+ CallConv == CallingConv::Tail;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
@@ -3635,8 +3637,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Attr.getValueAsString() == "true")
isTailCall = false;
- if (Subtarget.isPICStyleGOT() &&
- !MF.getTarget().Options.GuaranteedTailCallOpt) {
+ if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
// If we are using a GOT, disable tail calls to external symbols with
// default visibility. Tail calling such a symbol requires using a GOT
// relocation, which forces early binding of the symbol. This breaks code
@@ -3663,7 +3664,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
- if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
+ if (!IsGuaranteeTCO && isTailCall)
IsSibcall = true;
if (isTailCall)
@@ -3695,8 +3696,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- canGuaranteeTCO(CallConv))
+ else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -4321,6 +4321,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
bool CCMatch = CallerCC == CalleeCC;
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
+ bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
+ CalleeCC == CallingConv::Tail;
// Win64 functions have extra shadow space for argument homing. Don't do the
// sibcall if the caller and callee have mismatched expectations for this
@@ -4328,7 +4330,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (IsCalleeWin64 != IsCallerWin64)
return false;
- if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
+ if (IsGuaranteeTCO) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
@@ -24421,6 +24423,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
case CallingConv::Fast:
+ case CallingConv::Tail:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
NestReg = X86::EAX;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 4d7495641d9..b5b1c19c455 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -815,6 +815,7 @@ public:
// On Win64, all these conventions just use the default convention.
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::Tail:
case CallingConv::Swift:
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
diff --git a/llvm/test/CodeGen/X86/musttail-tailcc.ll b/llvm/test/CodeGen/X86/musttail-tailcc.ll
new file mode 100644
index 00000000000..6057045a77d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/musttail-tailcc.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32
+
+; tailcc will turn all of these musttail calls into tail calls.
+
+declare tailcc i32 @tailcallee(i32 %a1, i32 %a2)
+
+define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
+; X64-LABEL: tailcaller:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp tailcallee # TAILCALL
+;
+; X32-LABEL: tailcaller:
+; X32: # %bb.0: # %entry
+; X32-NEXT: jmp tailcallee # TAILCALL
+entry:
+ %tmp11 = musttail call tailcc i32 @tailcallee(i32 %in1, i32 %in2)
+ ret i32 %tmp11
+}
+
+declare tailcc i8* @alias_callee()
+
+define tailcc noalias i8* @noalias_caller() nounwind {
+; X64-LABEL: noalias_caller:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp alias_callee # TAILCALL
+;
+; X32-LABEL: noalias_caller:
+; X32: # %bb.0:
+; X32-NEXT: jmp alias_callee # TAILCALL
+ %p = musttail call tailcc i8* @alias_callee()
+ ret i8* %p
+}
+
+declare tailcc noalias i8* @noalias_callee()
+
+define tailcc i8* @alias_caller() nounwind {
+; X64-LABEL: alias_caller:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp noalias_callee # TAILCALL
+;
+; X32-LABEL: alias_caller:
+; X32: # %bb.0:
+; X32-NEXT: jmp noalias_callee # TAILCALL
+ %p = musttail call tailcc noalias i8* @noalias_callee()
+ ret i8* %p
+}
+
+define tailcc void @void_test(i32, i32, i32, i32) {
+; X64-LABEL: void_test:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: jmp void_test # TAILCALL
+;
+; X32-LABEL: void_test:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: jmp void_test # TAILCALL
+ entry:
+ musttail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
+ ret void
+}
+
+define tailcc i1 @i1test(i32, i32, i32, i32) {
+; X64-LABEL: i1test:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: jmp i1test # TAILCALL
+;
+; X32-LABEL: i1test:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: jmp i1test # TAILCALL
+ entry:
+ %4 = musttail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+ ret i1 %4
+}
diff --git a/llvm/test/CodeGen/X86/tailcall-tailcc.ll b/llvm/test/CodeGen/X86/tailcall-tailcc.ll
new file mode 100644
index 00000000000..5a427034a72
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcall-tailcc.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32
+
+; With -tailcallopt, CodeGen guarantees a tail call optimization
+; for all of these.
+
+declare tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
+
+define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
+; X64-LABEL: tailcaller:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp tailcallee # TAILCALL
+;
+; X32-LABEL: tailcaller:
+; X32: # %bb.0: # %entry
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: jmp tailcallee # TAILCALL
+entry:
+ %tmp11 = tail call tailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
+ ret i32 %tmp11
+}
+
+declare tailcc i8* @alias_callee()
+
+define tailcc noalias i8* @noalias_caller() nounwind {
+; X64-LABEL: noalias_caller:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp alias_callee # TAILCALL
+;
+; X32-LABEL: noalias_caller:
+; X32: # %bb.0:
+; X32-NEXT: jmp alias_callee # TAILCALL
+ %p = tail call tailcc i8* @alias_callee()
+ ret i8* %p
+}
+
+declare tailcc noalias i8* @noalias_callee()
+
+define tailcc i8* @alias_caller() nounwind {
+; X64-LABEL: alias_caller:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp noalias_callee # TAILCALL
+;
+; X32-LABEL: alias_caller:
+; X32: # %bb.0:
+; X32-NEXT: jmp noalias_callee # TAILCALL
+ %p = tail call tailcc noalias i8* @noalias_callee()
+ ret i8* %p
+}
+
+declare tailcc i32 @i32_callee()
+
+define tailcc i32 @ret_undef() nounwind {
+; X64-LABEL: ret_undef:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp i32_callee # TAILCALL
+;
+; X32-LABEL: ret_undef:
+; X32: # %bb.0:
+; X32-NEXT: jmp i32_callee # TAILCALL
+ %p = tail call tailcc i32 @i32_callee()
+ ret i32 undef
+}
+
+declare tailcc void @does_not_return()
+
+define tailcc i32 @noret() nounwind {
+; X64-LABEL: noret:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: popq %rax
+; X64-NEXT: jmp does_not_return # TAILCALL
+;
+; X32-LABEL: noret:
+; X32: # %bb.0:
+; X32-NEXT: jmp does_not_return # TAILCALL
+ tail call tailcc void @does_not_return()
+ unreachable
+}
+
+define tailcc void @void_test(i32, i32, i32, i32) {
+; X64-LABEL: void_test:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: jmp void_test # TAILCALL
+;
+; X32-LABEL: void_test:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: jmp void_test # TAILCALL
+ entry:
+ tail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
+ ret void
+}
+
+define tailcc i1 @i1test(i32, i32, i32, i32) {
+; X64-LABEL: i1test:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: jmp i1test # TAILCALL
+;
+; X32-LABEL: i1test:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: jmp i1test # TAILCALL
+ entry:
+ %4 = tail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+ ret i1 %4
+}
diff --git a/llvm/test/CodeGen/X86/tailcc-calleesave.ll b/llvm/test/CodeGen/X86/tailcc-calleesave.ll
new file mode 100644
index 00000000000..09685fb17cb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcc-calleesave.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=core < %s | FileCheck %s
+
+target triple = "i686-apple-darwin"
+
+declare tailcc void @foo(i32, i32, i32, i32, i32, i32)
+declare i32* @bar(i32*)
+
+define tailcc void @hoge(i32 %b) nounwind {
+; Do not overwrite pushed callee-save registers
+; CHECK: pushl
+; CHECK: subl $[[SIZE:[0-9]+]], %esp
+; CHECK-NOT: [[SIZE]](%esp)
+ %a = alloca i32
+ store i32 0, i32* %a
+ %d = tail call i32* @bar(i32* %a) nounwind
+ store i32 %b, i32* %d
+ tail call tailcc void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) nounwind
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll b/llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll
new file mode 100644
index 00000000000..3199b8c34b7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=NO-OPTION
+; RUN: llc < %s -mtriple=x86_64-- -disable-tail-calls | FileCheck %s --check-prefix=DISABLE-TRUE
+; RUN: llc < %s -mtriple=x86_64-- -disable-tail-calls=false | FileCheck %s --check-prefix=DISABLE-FALSE
+
+; Check that command line option "-disable-tail-calls" overrides function
+; attribute "disable-tail-calls".
+
+; NO-OPTION-LABEL: {{\_?}}func_attr
+; NO-OPTION: callq {{\_?}}callee
+
+; DISABLE-FALSE-LABEL: {{\_?}}func_attr
+; DISABLE-FALSE: jmp {{\_?}}callee
+
+; DISABLE-TRUE-LABEL: {{\_?}}func_attr
+; DISABLE-TRUE: callq {{\_?}}callee
+
+define tailcc i32 @func_attr(i32 %a) #0 {
+entry:
+ %call = tail call tailcc i32 @callee(i32 %a)
+ ret i32 %call
+}
+
+; NO-OPTION-LABEL: {{\_?}}func_noattr
+; NO-OPTION: jmp {{\_?}}callee
+
+; DISABLE-FALSE-LABEL: {{\_?}}func_noattr
+; DISABLE-FALSE: jmp {{\_?}}callee
+
+; DISABLE-TRUE-LABEL: {{\_?}}func_noattr
+; DISABLE-TRUE: callq {{\_?}}callee
+
+define tailcc i32 @func_noattr(i32 %a) {
+entry:
+ %call = tail call tailcc i32 @callee(i32 %a)
+ ret i32 %call
+}
+
+declare tailcc i32 @callee(i32)
+
+attributes #0 = { "disable-tail-calls"="true" }
diff --git a/llvm/test/CodeGen/X86/tailcc-fastcc.ll b/llvm/test/CodeGen/X86/tailcc-fastcc.ll
new file mode 100644
index 00000000000..03369855de4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcc-fastcc.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -tailcallopt < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc -tailcallopt < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32
+
+; llc -tailcallopt should not enable tail calls from fastcc to tailcc or vice versa
+
+declare tailcc i32 @tailcallee1(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
+
+define fastcc i32 @tailcaller1(i32 %in1, i32 %in2) nounwind {
+; X64-LABEL: tailcaller1:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: callq tailcallee1
+; X64-NEXT: retq $8
+;
+; X32-LABEL: tailcaller1:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %edx
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll tailcallee1
+; X32-NEXT: retl
+entry:
+ %tmp11 = tail call tailcc i32 @tailcallee1(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
+ ret i32 %tmp11
+}
+
+declare fastcc i32 @tailcallee2(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
+
+define tailcc i32 @tailcaller2(i32 %in1, i32 %in2) nounwind {
+; X64-LABEL: tailcaller2:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: callq tailcallee2
+; X64-NEXT: retq $8
+;
+; X32-LABEL: tailcaller2:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %edx
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll tailcallee2
+; X32-NEXT: retl
+entry:
+ %tmp11 = tail call fastcc i32 @tailcallee2(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
+ ret i32 %tmp11
+}
diff --git a/llvm/test/CodeGen/X86/tailcc-fastisel.ll b/llvm/test/CodeGen/X86/tailcc-fastisel.ll
new file mode 100644
index 00000000000..e6d75faf4cd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcc-fastisel.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -fast-isel -fast-isel-abort=1 | FileCheck %s
+
+%0 = type { i64, i32, i8* }
+
+define tailcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
+fail: ; preds = %entry
+ %tmp20 = tail call tailcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
+; CHECK: jmp "_visit_array_aux<`Reference>" ## TAILCALL
+ ret i8* %tmp20
+}
+
+define i32 @foo() nounwind {
+entry:
+ %0 = tail call i32 (...) @bar() nounwind ; <i32> [#uses=1]
+ ret i32 %0
+}
+
+declare i32 @bar(...) nounwind
diff --git a/llvm/test/CodeGen/X86/tailcc-largecode.ll b/llvm/test/CodeGen/X86/tailcc-largecode.ll
new file mode 100644
index 00000000000..a3b5c300745
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcc-largecode.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -code-model=large -enable-misched=false | FileCheck %s
+
+declare tailcc i32 @callee(i32 %arg)
+define tailcc i32 @directcall(i32 %arg) {
+entry:
+; This is the large code model, so &callee may not fit into the jmp
+; instruction. Instead, stick it into a register.
+; CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]]
+; CHECK: jmpq *[[REGISTER]] # TAILCALL
+ %res = tail call tailcc i32 @callee(i32 %arg)
+ ret i32 %res
+}
+
+; Check that the register used for an indirect tail call doesn't
+; clobber any of the arguments.
+define tailcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) {
+; Adjust the stack to enter the function. (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+; CHECK: pushq
+; Put the call target into R11, which won't be clobbered while restoring
+; callee-saved registers and won't be used for passing arguments.
+; CHECK: movq %rdi, %rax
+; Pass the stack argument.
+; CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+; CHECK: movl $1, %edi
+; CHECK: movl $2, %esi
+; CHECK: movl $3, %edx
+; CHECK: movl $4, %ecx
+; CHECK: movl $5, %r8d
+; CHECK: movl $6, %r9d
+; Adjust the stack to "return".
+; CHECK: popq
+; And tail-call to the target.
+; CHECK: jmpq *%rax # TAILCALL
+ %res = tail call tailcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
+ i32 6, i32 7)
+ ret i32 %res
+}
+
+; Check that the register used for a direct tail call doesn't clobber
+; any of the arguments.
+declare tailcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32)
+define tailcc i32 @direct_manyargs() {
+; Adjust the stack to enter the function. (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+; CHECK: pushq
+; Pass the stack argument.
+; CHECK: movl $7, 16(%rsp)
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction. Put it into a register which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+; CHECK: movabsq $manyargs_callee, %rax
+; Pass the register arguments, in the right registers.
+; CHECK: movl $1, %edi
+; CHECK: movl $2, %esi
+; CHECK: movl $3, %edx
+; CHECK: movl $4, %ecx
+; CHECK: movl $5, %r8d
+; CHECK: movl $6, %r9d
+; Adjust the stack to "return".
+; CHECK: popq
+; And tail-call to the target.
+; CHECK: jmpq *%rax # TAILCALL
+ %res = tail call tailcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
+ i32 5, i32 6, i32 7)
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/tailcc-stackalign.ll b/llvm/test/CodeGen/X86/tailcc-stackalign.ll
new file mode 100644
index 00000000000..36333a9a213
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcc-stackalign.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i686-unknown-linux -no-x86-call-frame-opt | FileCheck %s
+; Linux has 8 byte alignment so the params cause stack size 20,
+; ensure that a normal tailcc call has matching stack size
+
+
+define tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+ ret i32 %a3
+}
+
+define tailcc i32 @tailcaller(i32 %in1, i32 %in2, i32 %in3, i32 %in4) {
+ %tmp11 = tail call tailcc i32 @tailcallee(i32 %in1, i32 %in2,
+ i32 %in1, i32 %in2)
+ ret i32 %tmp11
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+ %tmp1 = call tailcc i32 @tailcaller( i32 1, i32 2, i32 3, i32 4 )
+ ; expect match subl [stacksize] here
+ ret i32 0
+}
+
+; CHECK: calll tailcaller
+; CHECK-NEXT: subl $12
diff --git a/llvm/test/CodeGen/X86/tailcc-structret.ll b/llvm/test/CodeGen/X86/tailcc-structret.ll
new file mode 100644
index 00000000000..2d83d4a3c9f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcc-structret.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s
+define tailcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
+entry:
+ %2 = tail call tailcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
+ ret { { i8*, i8* }*, i8*} %2
+; CHECK: jmp init
+}
diff --git a/llvm/test/CodeGen/X86/tailccbyval.ll b/llvm/test/CodeGen/X86/tailccbyval.ll
new file mode 100644
index 00000000000..dbde868e511
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailccbyval.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s
+%struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define tailcc i32 @tailcallee(%struct.s* byval %a) nounwind {
+entry:
+ %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
+ %tmp3 = load i32, i32* %tmp2
+ ret i32 %tmp3
+; CHECK: tailcallee
+; CHECK: movl 4(%esp), %eax
+}
+
+define tailcc i32 @tailcaller(%struct.s* byval %a) nounwind {
+entry:
+ %tmp4 = tail call tailcc i32 @tailcallee(%struct.s* byval %a )
+ ret i32 %tmp4
+; CHECK: tailcaller
+; CHECK: jmp tailcallee
+}
diff --git a/llvm/test/CodeGen/X86/tailccbyval64.ll b/llvm/test/CodeGen/X86/tailccbyval64.ll
new file mode 100644
index 00000000000..47d20ea972a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailccbyval64.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
+
+; FIXME: Win64 does not support byval.
+
+; Expect the entry point.
+; CHECK-LABEL: tailcaller:
+
+; Expect 2 rep;movs because of tail call byval lowering.
+; CHECK: rep;
+; CHECK: rep;
+
+; A sequence of copyto/copyfrom virtual registers is used to deal with byval
+; lowering appearing after moving arguments to registers. The following two
+; checks verify that the register allocator changes those sequences to direct
+; moves to argument register where it can (for registers that are not used in
+; byval lowering - not rsi, not rdi, not rcx).
+; Expect argument 4 to be moved directly to register edx.
+; CHECK: movl $7, %edx
+
+; Expect argument 6 to be moved directly to register r8.
+; CHECK: movl $17, %r8d
+
+; Expect not call but jmp to @tailcallee.
+; CHECK: jmp tailcallee
+
+; Expect the trailer.
+; CHECK: .size tailcaller
+
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+ i64, i64, i64, i64, i64, i64, i64, i64,
+ i64, i64, i64, i64, i64, i64, i64, i64 }
+
+declare tailcc i64 @tailcallee(%struct.s* byval %a, i64 %val, i64 %val2, i64 %val3, i64 %val4, i64 %val5)
+
+
+define tailcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
+entry:
+ %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 1
+ %tmp3 = load i64, i64* %tmp2, align 8
+ %tmp4 = tail call tailcc i64 @tailcallee(%struct.s* byval %a , i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
+ ret i64 %tmp4
+}
diff --git a/llvm/test/CodeGen/X86/tailccfp.ll b/llvm/test/CodeGen/X86/tailccfp.ll
new file mode 100644
index 00000000000..32814e93f45
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailccfp.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s
+define tailcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
+ %Y = tail call tailcc i32 %FP(double 0.0, i32 %X)
+ ret i32 %Y
+; CHECK: jmpl
+}
diff --git a/llvm/test/CodeGen/X86/tailccfp2.ll b/llvm/test/CodeGen/X86/tailccfp2.ll
new file mode 100644
index 00000000000..f8b29b386ad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailccfp2.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s
+
+declare i32 @putchar(i32)
+
+define tailcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+; CHECK-LABEL: checktail:
+ %tmp1 = icmp sgt i32 %x, 0
+ br i1 %tmp1, label %if-then, label %if-else
+
+if-then:
+ %fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)*
+ %arg1 = add i32 %x, -1
+ call i32 @putchar(i32 90)
+; CHECK: jmpl *%e{{.*}}
+ %res = tail call tailcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
+ ret i32 %res
+
+if-else:
+ ret i32 %x
+}
+
+
+define i32 @main() nounwind {
+ %f = bitcast i32 (i32, i32*, i32)* @checktail to i32*
+ %res = tail call tailcc i32 @checktail( i32 10, i32* %f,i32 10)
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/tailccpic1.ll b/llvm/test/CodeGen/X86/tailccpic1.ll
new file mode 100644
index 00000000000..de8f2219bc2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailccpic1.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
+
+; This test uses guaranteed TCO so these will be tail calls, despite the early
+; binding issues.
+
+define protected tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+ ret i32 %a3
+}
+
+define tailcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+ %tmp11 = tail call tailcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
+ ret i32 %tmp11
+; CHECK: jmp tailcallee
+}
diff --git a/llvm/test/CodeGen/X86/tailccpic2.ll b/llvm/test/CodeGen/X86/tailccpic2.ll
new file mode 100644
index 00000000000..314cd8f2fd6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailccpic2.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
+
+define tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+ ret i32 %a3
+}
+
+define tailcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+ %tmp11 = tail call tailcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
+ ret i32 %tmp11
+; CHECK: movl tailcallee@GOT
+; CHECK: jmpl
+}
+
diff --git a/llvm/test/CodeGen/X86/tailccstack64.ll b/llvm/test/CodeGen/X86/tailccstack64.ll
new file mode 100644
index 00000000000..bd0f4a73950
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailccstack64.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+
+; FIXME: Redundant unused stack allocation could be eliminated.
+; CHECK: subq ${{24|72|80}}, %rsp
+
+; Check that lowered arguments on the stack do not overwrite each other.
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: movl [[A1:32|144]](%rsp), [[R1:%e..]]
+; Move param %in1 to temp register (%r10d).
+; CHECK: movl [[A2:40|152]](%rsp), [[R2:%[a-z0-9]+]]
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl {{%edi|%ecx}}, [[R1]]
+; Move param %in2 to stack.
+; CHECK-DAG: movl [[R2]], [[A1]](%rsp)
+; Move result of addition to stack.
+; CHECK-DAG: movl [[R1]], [[A2]](%rsp)
+; Eventually, do a TAILCALL
+; CHECK: TAILCALL
+
+declare tailcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b) nounwind
+
+define tailcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) nounwind {
+entry:
+ %tmp = add i32 %in1, %p1
+ %retval = tail call tailcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
+ ret i32 %retval
+}
diff --git a/llvm/utils/vim/syntax/llvm.vim b/llvm/utils/vim/syntax/llvm.vim
index 14987cb2348..487a37b4b86 100644
--- a/llvm/utils/vim/syntax/llvm.vim
+++ b/llvm/utils/vim/syntax/llvm.vim
@@ -82,6 +82,7 @@ syn keyword llvmKeyword
\ externally_initialized
\ extern_weak
\ fastcc
+ \ tailcc
\ filter
\ from
\ gc
OpenPOWER on IntegriCloud