summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Braun <matze@braunis.de>2016-04-04 18:56:13 +0000
committerMatthias Braun <matze@braunis.de>2016-04-04 18:56:13 +0000
commit870c34f0cfe0678dedcec33770fe4304b60c2e0c (patch)
tree46113c7f1c7c847dcceddbb570c39e0262fd6f7b
parenteb3219a9c23444361172f9840f2338cc5de0152d (diff)
downloadbcm5719-llvm-870c34f0cfe0678dedcec33770fe4304b60c2e0c.tar.gz
bcm5719-llvm-870c34f0cfe0678dedcec33770fe4304b60c2e0c.zip
ARM, AArch64, X86: Check preserved registers for tail calls.
We can only perform a tail call to a callee that preserves all the registers that the caller needs to preserve. This situation happens with calling conventions like preserver_mostcc or cxx_fast_tls. It was explicitely handled for fast_tls and failing for preserve_most. This patch generalizes the check to any calling convention. Related to rdar://24207743 Differential Revision: http://reviews.llvm.org/D18680 llvm-svn: 265329
-rw-r--r--llvm/include/llvm/Target/TargetRegisterInfo.h4
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp15
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll24
-rw-r--r--llvm/test/CodeGen/ARM/cxx-tlscc.ll22
7 files changed, 80 insertions, 22 deletions
diff --git a/llvm/include/llvm/Target/TargetRegisterInfo.h b/llvm/include/llvm/Target/TargetRegisterInfo.h
index 90d1dd5d24c..ff5147e6f1b 100644
--- a/llvm/include/llvm/Target/TargetRegisterInfo.h
+++ b/llvm/include/llvm/Target/TargetRegisterInfo.h
@@ -460,6 +460,10 @@ public:
llvm_unreachable("target does not provide no preserved mask");
}
+ /// Return true if all bits that are set in mask \p mask0 are also set in
+ /// \p mask1.
+ bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
+
/// Return all the call-preserved register masks defined for this target.
virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
virtual ArrayRef<const char *> getRegMaskNames() const = 0;
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 0a7042ac3db..fc88629b5c8 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -388,6 +388,15 @@ bool TargetRegisterInfo::needsStackRealignment(
return false;
}
+bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
+ const uint32_t *mask1) const {
+ unsigned N = (getNumRegs()+31) / 32;
+ for (unsigned I = 0; I < N; ++I)
+ if ((mask0[I] & mask1[I]) != mask0[I])
+ return false;
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 318f7c90220..6d8f3eebff3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2816,13 +2816,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
- // Disable tailcall for CXX_FAST_TLS when callee and caller have different
- // calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
- if (!CCMatch &&
- (CallerCC == CallingConv::CXX_FAST_TLS ||
- CalleeCC == CallingConv::CXX_FAST_TLS))
- return false;
-
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
@@ -2882,6 +2875,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CCAssignFnForCall(CalleeCC, isVarArg),
CCAssignFnForCall(CallerCC, isVarArg)))
return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ if (!CCMatch) {
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
+ TRI->getCallPreservedMask(MF, CalleeCC)))
+ return false;
+ }
// Nothing more to check if the callee is taking no arguments
if (Outs.empty())
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 908f40db587..f950adb9159 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2101,14 +2101,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
- bool CCMatch = CallerCC == CalleeCC;
-
- // Disable tailcall for CXX_FAST_TLS when callee and caller have different
- // calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
- if (!CCMatch &&
- (CallerCC == CallingConv::CXX_FAST_TLS ||
- CalleeCC == CallingConv::CXX_FAST_TLS))
- return false;
assert(Subtarget->supportsTailCall());
@@ -2152,6 +2144,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CCAssignFnForNode(CalleeCC, true, isVarArg),
CCAssignFnForNode(CallerCC, true, isVarArg)))
return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ if (CalleeCC != CallerCC) {
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
+ TRI->getCallPreservedMask(MF, CalleeCC)))
+ return false;
+ }
// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8c17a47674d..9ba8f1e8e01 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3818,13 +3818,6 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (IsCalleeWin64 != IsCallerWin64)
return false;
- // Disable tailcall for CXX_FAST_TLS when callee and caller have different
- // calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
- if (!CCMatch &&
- (CallerCC == CallingConv::CXX_FAST_TLS ||
- CalleeCC == CallingConv::CXX_FAST_TLS))
- return false;
-
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
@@ -3888,6 +3881,13 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
RetCC_X86, RetCC_X86))
return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ if (!CCMatch) {
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
+ TRI->getCallPreservedMask(MF, CalleeCC)))
+ return false;
+ }
unsigned StackArgsSize = 0;
diff --git a/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll b/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll
new file mode 100644
index 00000000000..ab96e609dd4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll
@@ -0,0 +1,24 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple="aarch64--"
+
+declare void @somefunc()
+define preserve_mostcc void @test_ccmismatch_notail() {
+; Ensure that no tail call is used here, as the called function somefunc does
+; not preserve enough registers for preserve_mostcc.
+; CHECK-LABEL: test_ccmismatch_notail:
+; CHECK-NOT: b somefunc
+; CHECK: bl somefunc
+ tail call void @somefunc()
+ ret void
+}
+
+declare preserve_mostcc void @some_preserve_most_func()
+define void @test_ccmismatch_tail() {
+; We can perform a tail call here, because some_preserve_most_func preserves
+; all registers necessary for test_ccmismatch_tail.
+; CHECK-LABEL: test_ccmismatch_tail:
+; CHECK-NOT: bl some_preserve_most_func
+; CHECK: b some_preserve_most_func
+ tail call preserve_mostcc void @some_preserve_most_func()
+ ret void
+}
diff --git a/llvm/test/CodeGen/ARM/cxx-tlscc.ll b/llvm/test/CodeGen/ARM/cxx-tlscc.ll
index 48cce4f01be..d49c61ab093 100644
--- a/llvm/test/CodeGen/ARM/cxx-tlscc.ll
+++ b/llvm/test/CodeGen/ARM/cxx-tlscc.ll
@@ -126,5 +126,27 @@ entry:
ret void
}
+declare void @somefunc()
+define cxx_fast_tlscc void @test_ccmismatch_notail() {
+; A tail call is not possible here because somefunc does not preserve enough
+; registers.
+; CHECK-LABEL: test_ccmismatch_notail:
+; CHECK-NOT: b _somefunc
+; CHECK: bl _somefunc
+ tail call void @somefunc()
+ ret void
+}
+
+declare cxx_fast_tlscc void @some_fast_tls_func()
+define void @test_ccmismatch_tail() {
+; We can perform a tail call here because some_fast_tls_func preserves all
+; necessary registers (and more).
+; CHECK-LABEL: test_ccmismatch_tail:
+; CHECK-NOT: bl _some_fast_tls_func
+; CHECK: b _some_fast_tls_func
+ tail call cxx_fast_tlscc void @some_fast_tls_func()
+ ret void
+}
+
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
attributes #1 = { nounwind }
OpenPOWER on IntegriCloud