summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-08 12:04:32 +0000
committerChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-08 12:04:32 +0000
commit98c1894755c87fc1c42b2152fdb4e064a8b89f1e (patch)
tree6aa029c2c2e35cb5bfc90a889a26235af8f608c1
parent957d849e03cbc3cd638b6e16fa7ec4937b07dbdc (diff)
downloadbcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.tar.gz
bcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.zip
CXX_FAST_TLS calling convention: performance improvement for PPC64
This is the same change on PPC64 as r255821 on AArch64. I have even borrowed his commit message. The access function has a short entry and a short exit, the initialization block is only run the first time. To improve the performance, we want to have a short frame at the entry and exit. We explicitly handle most of the CSRs via copies. Only the CSRs that are not handled via copies will be in CSR_SaveList. Frame lowering and prologue/epilogue insertion will generate a short frame in the entry and exit according to CSR_SaveList. The majority of the CSRs will be handled by register allcoator. Register allocator will try to spill and reload them in the initialization block. We add CSRsViaCopy, it will be explicitly handled during lowering. 1> we first set FunctionLoweringInfo->SplitCSR if conditions are met (the target supports it for the given machine function and the function has only return exits). We also call TLI->initializeSplitCSR to perform initialization. 2> we call TLI->insertCopiesSplitCSR to insert copies from CSRsViaCopy to virtual registers at beginning of the entry block and copies from virtual registers to CSRsViaCopy at beginning of the exit blocks. 3> we also need to make sure the explicit copies will not be eliminated. Author: Tom Jablin (tjablin) Reviewers: hfinkel kbarton cycheng http://reviews.llvm.org/D17533 llvm-svn: 265781
-rw-r--r--llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.td11
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp73
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h12
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp28
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h1
-rw-r--r--llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll43
9 files changed, 185 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 679ade185e1..0a0e0795c66 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -63,12 +63,15 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ // Resize before the early returns. Some backends expect that
+ // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no
+ // saved registers.
+ SavedRegs.resize(TRI.getNumRegs());
+
// Early exit if there are no callee saved registers.
if (!CSRegs || CSRegs[0] == 0)
return;
- SavedRegs.resize(TRI.getNumRegs());
-
// In Naked functions we aren't going to save any registers.
if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
return;
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 5bc9124f808..2fd3ab67297 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -243,12 +243,23 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
+// CSRs that are handled by prologue, epilogue.
+def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
+
+def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>;
+
def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
+def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>;
+
def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
+def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>;
+
def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
+def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>;
+
def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 0f4dee3ded4..7ff7e48795d 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1597,6 +1597,9 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0aedb419201..0f71b8d048b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6029,6 +6029,25 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+
+ if (PPC::G8RCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else if (PPC::F8RCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else if (PPC::CRRCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i1));
+ else if (PPC::VRRCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::Other));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
+
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
@@ -11922,3 +11941,57 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const {
return PPC::createFastISel(FuncInfo, LibInfo);
}
+
+void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ if (Subtarget.isDarwinABI()) return;
+ if (!Subtarget.isPPC64()) return;
+
+ // Update IsSplitCSR in PPCFunctionInfo
+ PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
+ PFI->setIsSplitCSR(true);
+}
+
+void PPCTargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ MachineBasicBlock::iterator MBBI = Entry->begin();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (PPC::G8RCRegClass.contains(*I))
+ RC = &PPC::G8RCRegClass;
+ else if (PPC::F8RCRegClass.contains(*I))
+ RC = &PPC::F8RCRegClass;
+ else if (PPC::CRRCRegClass.contains(*I))
+ RC = &PPC::CRRCRegClass;
+ else if (PPC::VRRCRegClass.contains(*I))
+ RC = &PPC::VRRCRegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(*I);
+
+ // Insert the copy-back instructions right before the terminator
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2c4409d7da2..5351b43c683 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -442,6 +442,18 @@ namespace llvm {
return true;
}
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return
+ MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 10a8ce068d4..4c29aa06f04 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -104,6 +104,10 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// Whether this uses the PIC Base register or not.
bool UsesPICBase;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies
+ bool IsSplitCSR;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -125,7 +129,8 @@ public:
VarArgsNumFPR(0),
CRSpillFrameIndex(0),
MF(MF),
- UsesPICBase(0) {}
+ UsesPICBase(0),
+ IsSplitCSR(false) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -196,6 +201,9 @@ public:
void setUsesPICBase(bool uses) { UsesPICBase = uses; }
bool usesPICBase() const { return UsesPICBase; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
MCSymbol *getPICOffsetSymbol() const;
MCSymbol *getGlobalEPSymbol() const;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 934bdf62241..5fef2924afe 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -116,6 +116,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
: CSR_Darwin32_SaveList);
+ if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
+ return CSR_SRV464_TLS_PE_SaveList;
+
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
@@ -128,6 +131,31 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_SVR432_SaveList);
}
+const MCPhysReg *
+PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
+ if (Subtarget.isDarwinABI())
+ return nullptr;
+ if (!TM.isPPC64())
+ return nullptr;
+ if (MF->getFunction()->getCallingConv() != CallingConv::CXX_FAST_TLS)
+ return nullptr;
+ if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
+ return nullptr;
+
+ // On PPC64, we might need to save r2 (but only if it is not reserved).
+ bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2);
+ if (Subtarget.hasAltivec())
+ return SaveR2
+ ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList
+ : CSR_SVR464_Altivec_ViaCopy_SaveList;
+ else
+ return SaveR2
+ ? CSR_SVR464_R2_ViaCopy_SaveList
+ : CSR_SVR464_ViaCopy_SaveList;
+}
+
const uint32_t *
PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index b15fde83c9f..459502eeb2e 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -75,6 +75,7 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const override;
diff --git a/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll b/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
new file mode 100644
index 00000000000..2baff849884
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s --enable-shrink-wrap=false -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s
+%struct.S = type { i8 }
+
+@sg = internal thread_local global %struct.S zeroinitializer, align 1
+@__dso_handle = external global i8
+@__tls_guard = internal thread_local unnamed_addr global i1 false
+@sum1 = internal thread_local global i32 0, align 4
+
+declare void @_ZN1SC1Ev(%struct.S*)
+declare void @_ZN1SD1Ev(%struct.S*)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+; CHECK-LABEL: _ZTW2sg
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
+ %.b.i = load i1, i1* @__tls_guard, align 1
+; CHECK: bc 12, 1, [[BB_end:.?LBB0_[0-9]+]]
+ br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+; CHECK: Folded Spill
+ store i1 true, i1* @__tls_guard, align 1
+ tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2
+; CHECK: bl _ZN1SC1Ev
+ %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2
+; CHECK: Folded Reload
+; CHECK: _tlv_atexit
+ br label %__tls_init.exit
+
+; CHECK: [[BB_end]]:
+__tls_init.exit:
+ ret %struct.S* @sg
+}
+
+; CHECK-LABEL: _ZTW4sum1
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+ ret i32* @sum1
+}
+
+define cxx_fast_tlscc i32* @_ZTW4sum2() #0 {
+ ret i32* @sum1
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" } \ No newline at end of file
OpenPOWER on IntegriCloud