diff options
| author | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-04-08 12:04:32 +0000 |
|---|---|---|
| committer | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-04-08 12:04:32 +0000 |
| commit | 98c1894755c87fc1c42b2152fdb4e064a8b89f1e (patch) | |
| tree | 6aa029c2c2e35cb5bfc90a889a26235af8f608c1 /llvm/lib/Target/PowerPC | |
| parent | 957d849e03cbc3cd638b6e16fa7ec4937b07dbdc (diff) | |
| download | bcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.tar.gz bcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.zip | |
CXX_FAST_TLS calling convention: performance improvement for PPC64
This is the same change on PPC64 as r255821 on AArch64. I have even borrowed
his commit message.
The access function has a short entry and a short exit, the initialization
block is only run the first time. To improve the performance, we want to
have a short frame at the entry and exit.
We explicitly handle most of the CSRs via copies. Only the CSRs that are not
handled via copies will be in CSR_SaveList.
Frame lowering and prologue/epilogue insertion will generate a short frame
in the entry and exit according to CSR_SaveList. The majority of the CSRs will
be handled by register allcoator. Register allocator will try to spill and
reload them in the initialization block.
We add CSRsViaCopy, it will be explicitly handled during lowering.
1> we first set FunctionLoweringInfo->SplitCSR if conditions are met (the target
supports it for the given machine function and the function has only return
exits). We also call TLI->initializeSplitCSR to perform initialization.
2> we call TLI->insertCopiesSplitCSR to insert copies from CSRsViaCopy to
virtual registers at beginning of the entry block and copies from virtual
registers to CSRsViaCopy at beginning of the exit blocks.
3> we also need to make sure the explicit copies will not be eliminated.
Author: Tom Jablin (tjablin)
Reviewers: hfinkel kbarton cycheng
http://reviews.llvm.org/D17533
llvm-svn: 265781
Diffstat (limited to 'llvm/lib/Target/PowerPC')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCCallingConv.td | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCFastISel.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 73 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 28 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 1 |
7 files changed, 137 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index 5bc9124f808..2fd3ab67297 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -243,12 +243,23 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, F27, F28, F29, F30, F31, CR2, CR3, CR4 )>; +// CSRs that are handled by prologue, epilogue. +def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>; + +def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>; + def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>; +def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>; + def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>; +def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>; + def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>; +def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>; + def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10), diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 0f4dee3ded4..7ff7e48795d 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -1597,6 +1597,9 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + const ReturnInst *Ret = cast<ReturnInst>(I); const Function &F = *I->getParent()->getParent(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0aedb419201..0f71b8d048b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6029,6 +6029,25 @@ PPCTargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const MCPhysReg *I = + TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + if (I) { + for (; *I; ++I) { + + if (PPC::G8RCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i64)); + else if (PPC::F8RCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); + else if (PPC::CRRCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i1)); + else if (PPC::VRRCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::Other)); + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + } + } + RetOps[0] = Chain; // Update chain. // Add the flag if we have it. @@ -11922,3 +11941,57 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return PPC::createFastISel(FuncInfo, LibInfo); } + +void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { + if (Subtarget.isDarwinABI()) return; + if (!Subtarget.isPPC64()) return; + + // Update IsSplitCSR in PPCFunctionInfo + PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); + PFI->setIsSplitCSR(true); +} + +void PPCTargetLowering::insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const { + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); + if (!IStart) + return; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); + for (const MCPhysReg *I = IStart; *I; ++I) { + const TargetRegisterClass *RC = nullptr; + if (PPC::G8RCRegClass.contains(*I)) + RC = &PPC::G8RCRegClass; + else if (PPC::F8RCRegClass.contains(*I)) + RC = &PPC::F8RCRegClass; + else if (PPC::CRRCRegClass.contains(*I)) + RC = &PPC::CRRCRegClass; + else if (PPC::VRRCRegClass.contains(*I)) + RC = &PPC::VRRCRegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + + unsigned NewVR = MRI->createVirtualRegister(RC); + // Create copy from CSR to a virtual register. + // FIXME: this currently does not emit CFI pseudo-instructions, it works + // fine for CXX_FAST_TLS since the C++-style TLS access functions should be + // nounwind. If we want to generalize this later, we may need to emit + // CFI pseudo-instructions. + assert(Entry->getParent()->getFunction()->hasFnAttribute( + Attribute::NoUnwind) && + "Function should be nounwind in insertCopiesSplitCSR!"); + Entry->addLiveIn(*I); + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) + .addReg(*I); + + // Insert the copy-back instructions right before the terminator + for (auto *Exit : Exits) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) + .addReg(NewVR); + } +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 2c4409d7da2..5351b43c683 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -442,6 +442,18 @@ namespace llvm { return true; } + bool supportSplitCSR(MachineFunction *MF) const override { + return + MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + } + + void initializeSplitCSR(MachineBasicBlock *Entry) const override; + + void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; + /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 10a8ce068d4..4c29aa06f04 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -104,6 +104,10 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// Whether this uses the PIC Base register or not. bool UsesPICBase; + /// True if this function has a subset of CSRs that is handled explicitly via + /// copies + bool IsSplitCSR; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -125,7 +129,8 @@ public: VarArgsNumFPR(0), CRSpillFrameIndex(0), MF(MF), - UsesPICBase(0) {} + UsesPICBase(0), + IsSplitCSR(false) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -196,6 +201,9 @@ public: void setUsesPICBase(bool uses) { UsesPICBase = uses; } bool usesPICBase() const { return UsesPICBase; } + bool isSplitCSR() const { return IsSplitCSR; } + void setIsSplitCSR(bool s) { IsSplitCSR = s; } + MCSymbol *getPICOffsetSymbol() const; MCSymbol *getGlobalEPSymbol() const; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 934bdf62241..5fef2924afe 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -116,6 +116,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList : CSR_Darwin32_SaveList); + if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR()) + return CSR_SRV464_TLS_PE_SaveList; + // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); @@ -128,6 +131,31 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : CSR_SVR432_SaveList); } +const MCPhysReg * +PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { + assert(MF && "Invalid MachineFunction pointer."); + const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>(); + if (Subtarget.isDarwinABI()) + return nullptr; + if (!TM.isPPC64()) + return nullptr; + if (MF->getFunction()->getCallingConv() != CallingConv::CXX_FAST_TLS) + return nullptr; + if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR()) + return nullptr; + + // On PPC64, we might need to save r2 (but only if it is not reserved). + bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2); + if (Subtarget.hasAltivec()) + return SaveR2 + ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList + : CSR_SVR464_Altivec_ViaCopy_SaveList; + else + return SaveR2 + ? CSR_SVR464_R2_ViaCopy_SaveList + : CSR_SVR464_ViaCopy_SaveList; +} + const uint32_t * PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index b15fde83c9f..459502eeb2e 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -75,6 +75,7 @@ public: /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const override; |

