diff options
| -rw-r--r-- | llvm/include/llvm/Target/TargetRegisterInfo.h | 4 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetRegisterInfo.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/cxx-tlscc.ll | 22 | 
7 files changed, 80 insertions, 22 deletions
diff --git a/llvm/include/llvm/Target/TargetRegisterInfo.h b/llvm/include/llvm/Target/TargetRegisterInfo.h index 90d1dd5d24c..ff5147e6f1b 100644 --- a/llvm/include/llvm/Target/TargetRegisterInfo.h +++ b/llvm/include/llvm/Target/TargetRegisterInfo.h @@ -460,6 +460,10 @@ public:      llvm_unreachable("target does not provide no preserved mask");    } +  /// Return true if all bits that are set in mask \p mask0 are also set in +  /// \p mask1. +  bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const; +    /// Return all the call-preserved register masks defined for this target.    virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;    virtual ArrayRef<const char *> getRegMaskNames() const = 0; diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 0a7042ac3db..fc88629b5c8 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -388,6 +388,15 @@ bool TargetRegisterInfo::needsStackRealignment(    return false;  } +bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, +                                            const uint32_t *mask1) const { +  unsigned N = (getNumRegs()+31) / 32; +  for (unsigned I = 0; I < N; ++I) +    if ((mask0[I] & mask1[I]) != mask0[I]) +      return false; +  return true; +} +  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void  TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 318f7c90220..6d8f3eebff3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2816,13 +2816,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(    CallingConv::ID CallerCC = CallerF->getCallingConv();    bool CCMatch = CallerCC == CalleeCC; -  // Disable tailcall for CXX_FAST_TLS when callee and caller have different -  // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. -  if (!CCMatch && -      (CallerCC == CallingConv::CXX_FAST_TLS || -       CalleeCC == CallingConv::CXX_FAST_TLS)) -    return false; -    // Byval parameters hand the function a pointer directly into the stack area    // we want to reuse during a tail call. Working around this *is* possible (see    // X86) but less efficient and uglier in LowerCall. @@ -2882,6 +2875,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(                                    CCAssignFnForCall(CalleeCC, isVarArg),                                    CCAssignFnForCall(CallerCC, isVarArg)))      return false; +  // The callee has to preserve all registers the caller needs to preserve. +  if (!CCMatch) { +    const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); +    if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), +                                 TRI->getCallPreservedMask(MF, CalleeCC))) +      return false; +  }    // Nothing more to check if the callee is taking no arguments    if (Outs.empty()) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 908f40db587..f950adb9159 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2101,14 +2101,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,    MachineFunction &MF = DAG.getMachineFunction();    const Function *CallerF = MF.getFunction();    CallingConv::ID CallerCC = CallerF->getCallingConv(); -  bool CCMatch = CallerCC == CalleeCC; - -  // Disable tailcall for CXX_FAST_TLS when callee and caller have different -  // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. -  if (!CCMatch && -      (CallerCC == CallingConv::CXX_FAST_TLS || -       CalleeCC == CallingConv::CXX_FAST_TLS)) -    return false;    assert(Subtarget->supportsTailCall()); @@ -2152,6 +2144,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,                                    CCAssignFnForNode(CalleeCC, true, isVarArg),                                    CCAssignFnForNode(CallerCC, true, isVarArg)))      return false; +  // The callee has to preserve all registers the caller needs to preserve. +  if (CalleeCC != CallerCC) { +    const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); +    if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), +                                 TRI->getCallPreservedMask(MF, CalleeCC))) +      return false; +  }    // If Caller's vararg or byval argument has been split between registers and    // stack, do not perform tail call, since part of the argument is in caller's diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8c17a47674d..9ba8f1e8e01 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3818,13 +3818,6 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(    if (IsCalleeWin64 != IsCallerWin64)      return false; -  // Disable tailcall for CXX_FAST_TLS when callee and caller have different -  // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. -  if (!CCMatch && -      (CallerCC == CallingConv::CXX_FAST_TLS || -       CalleeCC == CallingConv::CXX_FAST_TLS)) -    return false; -    if (DAG.getTarget().Options.GuaranteedTailCallOpt) {      if (canGuaranteeTCO(CalleeCC) && CCMatch)        return true; @@ -3888,6 +3881,13 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(    if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,                                    RetCC_X86, RetCC_X86))      return false; +  // The callee has to preserve all registers the caller needs to preserve. +  if (!CCMatch) { +    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); +    if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), +                                 TRI->getCallPreservedMask(MF, CalleeCC))) +      return false; +  }    unsigned StackArgsSize = 0; diff --git a/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll b/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll new file mode 100644 index 00000000000..ab96e609dd4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll @@ -0,0 +1,24 @@ +; RUN: llc -o - %s | FileCheck %s +target triple="aarch64--" + +declare void @somefunc() +define preserve_mostcc void @test_ccmismatch_notail() { +; Ensure that no tail call is used here, as the called function somefunc does +; not preserve enough registers for preserve_mostcc. +; CHECK-LABEL: test_ccmismatch_notail: +; CHECK-NOT: b somefunc +; CHECK: bl somefunc +  tail call void @somefunc() +  ret void +} + +declare preserve_mostcc void @some_preserve_most_func() +define void @test_ccmismatch_tail() { +; We can perform a tail call here, because some_preserve_most_func preserves +; all registers necessary for test_ccmismatch_tail. +; CHECK-LABEL: test_ccmismatch_tail: +; CHECK-NOT: bl some_preserve_most_func +; CHECK: b some_preserve_most_func +  tail call preserve_mostcc void @some_preserve_most_func() +  ret void +} diff --git a/llvm/test/CodeGen/ARM/cxx-tlscc.ll b/llvm/test/CodeGen/ARM/cxx-tlscc.ll index 48cce4f01be..d49c61ab093 100644 --- a/llvm/test/CodeGen/ARM/cxx-tlscc.ll +++ b/llvm/test/CodeGen/ARM/cxx-tlscc.ll @@ -126,5 +126,27 @@ entry:    ret void  } +declare void @somefunc() +define cxx_fast_tlscc void @test_ccmismatch_notail() { +; A tail call is not possible here because somefunc does not preserve enough +; registers. +; CHECK-LABEL: test_ccmismatch_notail: +; CHECK-NOT: b _somefunc +; CHECK: bl _somefunc +  tail call void @somefunc() +  ret void +} + +declare cxx_fast_tlscc void @some_fast_tls_func() +define void @test_ccmismatch_tail() { +; We can perform a tail call here because some_fast_tls_func preserves all +; necessary registers (and more). +; CHECK-LABEL: test_ccmismatch_tail: +; CHECK-NOT: bl _some_fast_tls_func +; CHECK: b _some_fast_tls_func +  tail call cxx_fast_tlscc void @some_fast_tls_func() +  ret void +} +  attributes #0 = { nounwind "no-frame-pointer-elim"="true" }  attributes #1 = { nounwind }  | 

