summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallLowering.cpp176
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallLowering.h4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll134
-rw-r--r--llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll3
-rw-r--r--llvm/test/CodeGen/AArch64/dllimport.ll2
7 files changed, 321 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index e996cf1c833..cfdf3f5bf90 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -90,6 +90,13 @@ public:
/// True if the call must be tail call optimized.
bool IsMustTailCall = false;
+
+ /// True if the call passes all target-independent checks for tail call
+ /// optimization.
+ bool IsTailCall = false;
+
+ /// True if the call is to a vararg function.
+ bool IsVarArg = false;
};
/// Argument handling is mostly uniform between the four places that
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index d433155160b..1c8e4541817 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -64,7 +64,9 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
Info.CallConv = CS.getCallingConv();
Info.SwiftErrorVReg = SwiftErrorVReg;
Info.IsMustTailCall = CS.isMustTailCall();
-
+ Info.IsTailCall = CS.isTailCall() &&
+ isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
+ Info.IsVarArg = CS.getFunctionType()->isVarArg();
return lowerCall(MIRBuilder, Info);
}
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index 9ace33b3985..a8a1389f08c 100644
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -233,6 +233,17 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<Register> VRegs,
Register SwiftErrorVReg) const {
+
+ // Check if a tail call was lowered in this block. If so, we already handled
+ // the terminator.
+ MachineFunction &MF = MIRBuilder.getMF();
+ if (MF.getFrameInfo().hasTailCall()) {
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+ auto FirstTerm = MBB.getFirstTerminator();
+ if (FirstTerm != MBB.end() && FirstTerm->isCall())
+ return true;
+ }
+
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
"Return value without a vreg");
@@ -403,6 +414,129 @@ bool AArch64CallLowering::lowerFormalArguments(
return true;
}
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
+ return CC == CallingConv::Fast;
+}
+
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::PreserveMost:
+ case CallingConv::Swift:
+ return true;
+ default:
+ return canGuaranteeTCO(CC);
+ }
+}
+
+bool AArch64CallLowering::isEligibleForTailCallOptimization(
+ MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const {
+ CallingConv::ID CalleeCC = Info.CallConv;
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &CallerF = MF.getFunction();
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
+
+ if (!mayTailCallThisCC(CalleeCC)) {
+ LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
+ return false;
+ }
+
+ if (Info.IsVarArg) {
+ LLVM_DEBUG(dbgs() << "... Tail calling varargs not supported yet.\n");
+ return false;
+ }
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call. Working around this *is* possible (see
+ // X86).
+ //
+ // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
+ // it?
+ //
+ // On Windows, "inreg" attributes signify non-aggregate indirect returns.
+ // In this case, it is necessary to save/restore X0 in the callee. Tail
+ // call opt interferes with this. So we disable tail call opt when the
+ // caller has an argument with "inreg" attribute.
+ //
+ // FIXME: Check whether the callee also has an "inreg" argument.
+ if (any_of(CallerF.args(), [](const Argument &A) {
+ return A.hasByValAttr() || A.hasInRegAttr();
+ })) {
+ LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval or "
+ "inreg arguments.\n");
+ return false;
+ }
+
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on AArch64 when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (Info.Callee.isGlobal()) {
+ const GlobalValue *GV = Info.Callee.getGlobal();
+ const Triple &TT = MF.getTarget().getTargetTriple();
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+ TT.isOSBinFormatMachO())) {
+ LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
+ "with weak linkage for this OS.\n");
+ return false;
+ }
+ }
+
+ // If we have -tailcallopt and matching CCs, at this point, we could return
+ // true. However, we don't have full tail call support yet. So, continue
+ // checking. We want to emit a sibling call.
+
+ // I want anyone implementing a new calling convention to think long and hard
+ // about this assert.
+ assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
+ "Unexpected variadic calling convention");
+
+ // For now, only support the case where the calling conventions match.
+ if (!CCMatch) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Cannot tail call with mismatched calling conventions yet.\n");
+ return false;
+ }
+
+ // For now, only handle callees that take no arguments.
+ if (!Info.OrigArgs.empty()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Cannot tail call callees with outgoing arguments yet.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "... Call is eligible for tail call optimization.\n");
+ return true;
+}
+
+static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
+ bool IsTailCall) {
+ if (!IsTailCall)
+ return IsIndirect ? AArch64::BLR : AArch64::BL;
+
+ if (!IsIndirect)
+ return AArch64::TCRETURNdi;
+
+ // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
+ // x16 or x17.
+ if (CallerF.hasFnAttribute("branch-target-enforcement"))
+ return AArch64::TCRETURNriBTI;
+
+ return AArch64::TCRETURNri;
+}
+
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
@@ -411,6 +545,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
auto &DL = F.getParent()->getDataLayout();
if (Info.IsMustTailCall) {
+ // TODO: Until we lower all tail calls, we should fall back on this.
LLVM_DEBUG(dbgs() << "Cannot lower musttail calls yet.\n");
return false;
}
@@ -423,6 +558,11 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SplitArgs.back().Flags[0].setZExt();
}
+ bool IsSibCall =
+ Info.IsTailCall && isEligibleForTailCallOptimization(MIRBuilder, Info);
+ if (IsSibCall)
+ MF.getFrameInfo().setHasTailCall();
+
// Find out which ABI gets to decide where things go.
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
CCAssignFn *AssignFnFixed =
@@ -430,14 +570,33 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFnVarArg =
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
- auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
+ // If we have a sibling call, then we don't have to adjust the stack.
+ // Otherwise, we need to adjust it.
+ MachineInstrBuilder CallSeqStart;
+ if (!IsSibCall)
+ CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- auto MIB = MIRBuilder.buildInstrNoInsert(Info.Callee.isReg() ? AArch64::BLR
- : AArch64::BL);
+ unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), IsSibCall);
+
+ // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
+ // register class. Until we can do that, we should fall back here.
+ if (Opc == AArch64::TCRETURNriBTI) {
+ LLVM_DEBUG(
+ dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
+ return false;
+ }
+
+ auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
+ // Add the byte offset for the tail call. We only have sibling calls, so this
+ // is always 0.
+ // TODO: Handle tail calls where we will have a different value here.
+ if (IsSibCall)
+ MIB.addImm(0);
+
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
@@ -486,10 +645,13 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
}
- CallSeqStart.addImm(Handler.StackSize).addImm(0);
- MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
- .addImm(Handler.StackSize)
- .addImm(0);
+ if (!IsSibCall) {
+ // If we aren't sibcalling, we need to move the stack.
+ CallSeqStart.addImm(Handler.StackSize).addImm(0);
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
+ .addImm(Handler.StackSize)
+ .addImm(0);
+ }
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.h b/llvm/lib/Target/AArch64/AArch64CallLowering.h
index 5da72286a5d..0bf250b85a3 100644
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -43,6 +43,10 @@ public:
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
+ /// Returns true if the call can be lowered as a tail call.
+ bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const;
+
bool supportSwiftError() const override { return true; }
private:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
new file mode 100644
index 00000000000..688c0971089
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=DARWIN,COMMON
+; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=WINDOWS,COMMON
+
+declare void @simple_fn()
+define void @tail_call() {
+ ; COMMON-LABEL: name: tail_call
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
+ tail call void @simple_fn()
+ ret void
+}
+
+; We should get a TCRETURNri here.
+; FIXME: We don't need the COPY.
+define void @indirect_tail_call(void()* %func) {
+ ; COMMON-LABEL: name: indirect_tail_call
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $x0
+ ; COMMON: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
+ ; COMMON: TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
+ tail call void %func()
+ ret void
+}
+
+declare void @outgoing_args_fn(i32)
+; Right now, callees with outgoing arguments should not be tail called.
+; TODO: Support this.
+define void @test_outgoing_args(i32 %a) {
+ ; COMMON-LABEL: name: test_outgoing_args
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $w0
+ ; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $w0 = COPY [[COPY]](s32)
+ ; COMMON: BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void @outgoing_args_fn(i32 %a)
+ ret void
+}
+
+; Right now, this should not be tail called.
+; TODO: Support this.
+declare void @varargs(i32, double, i64, ...)
+define void @test_varargs() {
+ ; COMMON-LABEL: name: test_varargs
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+ ; COMMON: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; COMMON: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: $w0 = COPY [[C]](s32)
+ ; COMMON: $d0 = COPY [[C1]](s64)
+ ; COMMON: $x1 = COPY [[C2]](s64)
+ ; COMMON: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
+ ret void
+}
+
+; Unsupported calling convention for tail calls. Make sure we never tail call
+; it.
+declare ghccc void @bad_call_conv_fn()
+define void @test_bad_call_conv() {
+ ; COMMON-LABEL: name: test_bad_call_conv
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @bad_call_conv_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call ghccc void @bad_call_conv_fn()
+ ret void
+}
+
+; Shouldn't tail call when the caller has byval arguments.
+define void @test_byval(i8* byval %ptr) {
+ ; COMMON-LABEL: name: test_byval
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; COMMON: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void @simple_fn()
+ ret void
+}
+
+; Shouldn't tail call when the caller has inreg arguments.
+define void @test_inreg(i8* inreg %ptr) {
+ ; COMMON-LABEL: name: test_inreg
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: liveins: $x0
+ ; COMMON: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call void @simple_fn()
+ ret void
+}
+
+; Shouldn't tail call when the OS doesn't support it. Windows supports this,
+; so we should be able to tail call there.
+declare extern_weak void @extern_weak_fn()
+define void @test_extern_weak() {
+ ; DARWIN-LABEL: name: test_extern_weak
+ ; DARWIN: bb.1 (%ir-block.0):
+ ; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; DARWIN: BL @extern_weak_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; DARWIN: RET_ReallyLR
+ ; WINDOWS-LABEL: name: test_extern_weak
+ ; WINDOWS: bb.1 (%ir-block.0):
+ ; WINDOWS: TCRETURNdi @extern_weak_fn, 0, csr_aarch64_aapcs, implicit $sp
+ tail call void @extern_weak_fn()
+ ret void
+}
+
+; Right now, mismatched calling conventions should not be tail called.
+; TODO: Support this.
+declare fastcc void @fast_fn()
+define void @test_mismatched_caller() {
+ ; COMMON-LABEL: name: test_mismatched_caller
+ ; COMMON: bb.1 (%ir-block.0):
+ ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: BL @fast_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; COMMON: RET_ReallyLR
+ tail call fastcc void @fast_fn()
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll b/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
index d7e3748d22a..3fb9e320f89 100644
--- a/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
+++ b/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
@@ -1,4 +1,7 @@
; RUN: llc -mtriple aarch64--none-eabi -mattr=+bti < %s | FileCheck %s
+; RUN: llc -mtriple aarch64--none-eabi -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mattr=+bti %s -verify-machineinstrs -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,FALLBACK
+
+; FALLBACK: remark: <unknown>:0:0: unable to translate instruction: call: ' tail call void %p()' (in function: bti_enabled)
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-arm-none-eabi"
diff --git a/llvm/test/CodeGen/AArch64/dllimport.ll b/llvm/test/CodeGen/AArch64/dllimport.ll
index 281c847a39a..cd440c36b21 100644
--- a/llvm/test/CodeGen/AArch64/dllimport.ll
+++ b/llvm/test/CodeGen/AArch64/dllimport.ll
@@ -59,4 +59,4 @@ define i32 @call_internal() {
; CHECK-LABEL: call_internal
; DAG-ISEL: b internal
; FAST-ISEL: b internal
-; GLOBAL-ISEL: bl internal
+; GLOBAL-ISEL: b internal
OpenPOWER on IntegriCloud