summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp59
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h8
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h13
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp14
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.h2
-rw-r--r--llvm/test/CodeGen/AArch64/cxx-tlscc.ll83
8 files changed, 145 insertions, 45 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 66d92100e63..388d64ec4e9 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -288,6 +288,14 @@ def CSR_AArch64_CXX_TLS_Darwin
(sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
(sequence "D%u", 0, 31))>;
+// CSRs that are handled by prologue, epilogue.
+def CSR_AArch64_CXX_TLS_Darwin_PE
+ : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
+ : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
+
// The ELF stub used for TLS-descriptor access saves every feasible
// register. Only X0 and LR are clobbered.
def CSR_AArch64_TLS_ELF
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index cae2d527629..0ac4b39b035 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3646,6 +3646,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (F.isVarArg())
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4fe8565a06c..9f5beff1210 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3271,6 +3271,19 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (AArch64::GPR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else if (AArch64::FPR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
RetOps[0] = Chain; // Update chain.
@@ -10003,3 +10016,49 @@ Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) cons
IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
}
+
+void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ // Update IsSplitCSR in AArch64unctionInfo.
+ AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void AArch64TargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (AArch64::GPR64RegClass.contains(*I))
+ RC = &AArch64::GPR64RegClass;
+ else if (AArch64::FPR64RegClass.contains(*I))
+ RC = &AArch64::FPR64RegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 51a3fee4791..e99616c9406 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -385,6 +385,14 @@ public:
bool isCheapToSpeculateCtlz() const override {
return true;
}
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index cc0ccc2771f..318f8395350 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -72,16 +72,22 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
/// registers.
unsigned VarArgsFPRSize;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR;
+
public:
AArch64FunctionInfo()
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+ IsSplitCSR(false) {}
explicit AArch64FunctionInfo(MachineFunction &MF)
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+ IsSplitCSR(false) {
(void)MF;
}
@@ -96,6 +102,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
unsigned getLocalStackSize() const { return LocalStackSize; }
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 763b2337de1..32b4888f2f6 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
@@ -47,11 +48,22 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS)
- return CSR_AArch64_CXX_TLS_Darwin_SaveList;
+ return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
+ CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
+ CSR_AArch64_CXX_TLS_Darwin_SaveList;
else
return CSR_AArch64_AAPCS_SaveList;
}
+const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
+ return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index c01bfa5ea70..f33f788fd43 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -35,6 +35,8 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
diff --git a/llvm/test/CodeGen/AArch64/cxx-tlscc.ll b/llvm/test/CodeGen/AArch64/cxx-tlscc.ll
index 39f6c0fbec9..a9ae00c8d27 100644
--- a/llvm/test/CodeGen/AArch64/cxx-tlscc.ll
+++ b/llvm/test/CodeGen/AArch64/cxx-tlscc.ll
@@ -13,7 +13,7 @@ declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
-define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
%.b.i = load i1, i1* @__tls_guard, align 1
br i1 %.b.i, label %__tls_init.exit, label %init.i
@@ -28,50 +28,49 @@ __tls_init.exit:
}
; CHECK-LABEL: _ZTW2sg
-; CHECK-DAG: stp d31, d30
-; CHECK-DAG: stp d29, d28
-; CHECK-DAG: stp d27, d26
-; CHECK-DAG: stp d25, d24
-; CHECK-DAG: stp d23, d22
-; CHECK-DAG: stp d21, d20
-; CHECK-DAG: stp d19, d18
-; CHECK-DAG: stp d17, d16
-; CHECK-DAG: stp d7, d6
-; CHECK-DAG: stp d5, d4
-; CHECK-DAG: stp d3, d2
-; CHECK-DAG: stp d1, d0
-; CHECK-DAG: stp x20, x19
-; CHECK-DAG: stp x14, x13
-; CHECK-DAG: stp x12, x11
-; CHECK-DAG: stp x10, x9
-; CHECK-DAG: stp x8, x7
-; CHECK-DAG: stp x6, x5
-; CHECK-DAG: stp x4, x3
-; CHECK-DAG: stp x2, x1
-; CHECK-DAG: stp x29, x30
+; CHECK-NOT: stp d31, d30
+; CHECK-NOT: stp d29, d28
+; CHECK-NOT: stp d27, d26
+; CHECK-NOT: stp d25, d24
+; CHECK-NOT: stp d23, d22
+; CHECK-NOT: stp d21, d20
+; CHECK-NOT: stp d19, d18
+; CHECK-NOT: stp d17, d16
+; CHECK-NOT: stp d7, d6
+; CHECK-NOT: stp d5, d4
+; CHECK-NOT: stp d3, d2
+; CHECK-NOT: stp d1, d0
+; CHECK-NOT: stp x20, x19
+; CHECK-NOT: stp x14, x13
+; CHECK-NOT: stp x12, x11
+; CHECK-NOT: stp x10, x9
+; CHECK-NOT: stp x8, x7
+; CHECK-NOT: stp x6, x5
+; CHECK-NOT: stp x4, x3
+; CHECK-NOT: stp x2, x1
; CHECK: blr
; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
; CHECK: blr
; CHECK: tlv_atexit
; CHECK: [[BB_end]]:
; CHECK: blr
-; CHECK-DAG: ldp x2, x1
-; CHECK-DAG: ldp x4, x3
-; CHECK-DAG: ldp x6, x5
-; CHECK-DAG: ldp x8, x7
-; CHECK-DAG: ldp x10, x9
-; CHECK-DAG: ldp x12, x11
-; CHECK-DAG: ldp x14, x13
-; CHECK-DAG: ldp x20, x19
-; CHECK-DAG: ldp d1, d0
-; CHECK-DAG: ldp d3, d2
-; CHECK-DAG: ldp d5, d4
-; CHECK-DAG: ldp d7, d6
-; CHECK-DAG: ldp d17, d16
-; CHECK-DAG: ldp d19, d18
-; CHECK-DAG: ldp d21, d20
-; CHECK-DAG: ldp d23, d22
-; CHECK-DAG: ldp d25, d24
-; CHECK-DAG: ldp d27, d26
-; CHECK-DAG: ldp d29, d28
-; CHECK-DAG: ldp d31, d30
+; CHECK-NOT: ldp x2, x1
+; CHECK-NOT: ldp x4, x3
+; CHECK-NOT: ldp x6, x5
+; CHECK-NOT: ldp x8, x7
+; CHECK-NOT: ldp x10, x9
+; CHECK-NOT: ldp x12, x11
+; CHECK-NOT: ldp x14, x13
+; CHECK-NOT: ldp x20, x19
+; CHECK-NOT: ldp d1, d0
+; CHECK-NOT: ldp d3, d2
+; CHECK-NOT: ldp d5, d4
+; CHECK-NOT: ldp d7, d6
+; CHECK-NOT: ldp d17, d16
+; CHECK-NOT: ldp d19, d18
+; CHECK-NOT: ldp d21, d20
+; CHECK-NOT: ldp d23, d22
+; CHECK-NOT: ldp d25, d24
+; CHECK-NOT: ldp d27, d26
+; CHECK-NOT: ldp d29, d28
+; CHECK-NOT: ldp d31, d30
OpenPOWER on IntegriCloud