summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
authorChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-08 12:04:32 +0000
committerChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-08 12:04:32 +0000
commit98c1894755c87fc1c42b2152fdb4e064a8b89f1e (patch)
tree6aa029c2c2e35cb5bfc90a889a26235af8f608c1 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent957d849e03cbc3cd638b6e16fa7ec4937b07dbdc (diff)
downloadbcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.tar.gz
bcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.zip
CXX_FAST_TLS calling convention: performance improvement for PPC64
This is the same change on PPC64 as r255821 on AArch64. I have even borrowed his commit message. The access function has a short entry and a short exit, the initialization block is only run the first time. To improve the performance, we want to have a short frame at the entry and exit. We explicitly handle most of the CSRs via copies. Only the CSRs that are not handled via copies will be in CSR_SaveList. Frame lowering and prologue/epilogue insertion will generate a short frame in the entry and exit according to CSR_SaveList. The majority of the CSRs will be handled by register allcoator. Register allocator will try to spill and reload them in the initialization block. We add CSRsViaCopy, it will be explicitly handled during lowering. 1> we first set FunctionLoweringInfo->SplitCSR if conditions are met (the target supports it for the given machine function and the function has only return exits). We also call TLI->initializeSplitCSR to perform initialization. 2> we call TLI->insertCopiesSplitCSR to insert copies from CSRsViaCopy to virtual registers at beginning of the entry block and copies from virtual registers to CSRsViaCopy at beginning of the exit blocks. 3> we also need to make sure the explicit copies will not be eliminated. Author: Tom Jablin (tjablin) Reviewers: hfinkel kbarton cycheng http://reviews.llvm.org/D17533 llvm-svn: 265781
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp73
1 files changed, 73 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0aedb419201..0f71b8d048b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6029,6 +6029,25 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+
+ if (PPC::G8RCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else if (PPC::F8RCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else if (PPC::CRRCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i1));
+ else if (PPC::VRRCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::Other));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
+
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
@@ -11922,3 +11941,57 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const {
return PPC::createFastISel(FuncInfo, LibInfo);
}
+
+void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ if (Subtarget.isDarwinABI()) return;
+ if (!Subtarget.isPPC64()) return;
+
+ // Update IsSplitCSR in PPCFunctionInfo
+ PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
+ PFI->setIsSplitCSR(true);
+}
+
+void PPCTargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ MachineBasicBlock::iterator MBBI = Entry->begin();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (PPC::G8RCRegClass.contains(*I))
+ RC = &PPC::G8RCRegClass;
+ else if (PPC::F8RCRegClass.contains(*I))
+ RC = &PPC::F8RCRegClass;
+ else if (PPC::CRRCRegClass.contains(*I))
+ RC = &PPC::CRRCRegClass;
+ else if (PPC::VRRCRegClass.contains(*I))
+ RC = &PPC::VRRCRegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(*I);
+
+ // Insert the copy-back instructions right before the terminator
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), *I)
+ .addReg(NewVR);
+ }
+}
OpenPOWER on IntegriCloud