summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
authorChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-08 12:04:32 +0000
committerChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-08 12:04:32 +0000
commit98c1894755c87fc1c42b2152fdb4e064a8b89f1e (patch)
tree6aa029c2c2e35cb5bfc90a889a26235af8f608c1 /llvm/lib/Target/PowerPC
parent957d849e03cbc3cd638b6e16fa7ec4937b07dbdc (diff)
downloadbcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.tar.gz
bcm5719-llvm-98c1894755c87fc1c42b2152fdb4e064a8b89f1e.zip
CXX_FAST_TLS calling convention: performance improvement for PPC64
This is the same change on PPC64 as r255821 on AArch64. I have even borrowed his commit message. The access function has a short entry and a short exit, the initialization block is only run the first time. To improve the performance, we want to have a short frame at the entry and exit. We explicitly handle most of the CSRs via copies. Only the CSRs that are not handled via copies will be in CSR_SaveList. Frame lowering and prologue/epilogue insertion will generate a short frame in the entry and exit according to CSR_SaveList. The majority of the CSRs will be handled by register allcoator. Register allocator will try to spill and reload them in the initialization block. We add CSRsViaCopy, it will be explicitly handled during lowering. 1> we first set FunctionLoweringInfo->SplitCSR if conditions are met (the target supports it for the given machine function and the function has only return exits). We also call TLI->initializeSplitCSR to perform initialization. 2> we call TLI->insertCopiesSplitCSR to insert copies from CSRsViaCopy to virtual registers at beginning of the entry block and copies from virtual registers to CSRsViaCopy at beginning of the exit blocks. 3> we also need to make sure the explicit copies will not be eliminated. Author: Tom Jablin (tjablin) Reviewers: hfinkel kbarton cycheng http://reviews.llvm.org/D17533 llvm-svn: 265781
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.td11
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp73
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h12
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp28
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h1
7 files changed, 137 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 5bc9124f808..2fd3ab67297 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -243,12 +243,23 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
+// CSRs that are handled by prologue, epilogue.
+def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
+
+def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>;
+
def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
+def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>;
+
def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
+def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>;
+
def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
+def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>;
+
def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 0f4dee3ded4..7ff7e48795d 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1597,6 +1597,9 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0aedb419201..0f71b8d048b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6029,6 +6029,25 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+
+ if (PPC::G8RCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else if (PPC::F8RCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else if (PPC::CRRCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i1));
+ else if (PPC::VRRCRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::Other));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
+
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
@@ -11922,3 +11941,57 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const {
return PPC::createFastISel(FuncInfo, LibInfo);
}
+
+void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ if (Subtarget.isDarwinABI()) return;
+ if (!Subtarget.isPPC64()) return;
+
+ // Update IsSplitCSR in PPCFunctionInfo
+ PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
+ PFI->setIsSplitCSR(true);
+}
+
+void PPCTargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ MachineBasicBlock::iterator MBBI = Entry->begin();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (PPC::G8RCRegClass.contains(*I))
+ RC = &PPC::G8RCRegClass;
+ else if (PPC::F8RCRegClass.contains(*I))
+ RC = &PPC::F8RCRegClass;
+ else if (PPC::CRRCRegClass.contains(*I))
+ RC = &PPC::CRRCRegClass;
+ else if (PPC::VRRCRegClass.contains(*I))
+ RC = &PPC::VRRCRegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(*I);
+
+ // Insert the copy-back instructions right before the terminator
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2c4409d7da2..5351b43c683 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -442,6 +442,18 @@ namespace llvm {
return true;
}
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return
+ MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 10a8ce068d4..4c29aa06f04 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -104,6 +104,10 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// Whether this uses the PIC Base register or not.
bool UsesPICBase;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies
+ bool IsSplitCSR;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -125,7 +129,8 @@ public:
VarArgsNumFPR(0),
CRSpillFrameIndex(0),
MF(MF),
- UsesPICBase(0) {}
+ UsesPICBase(0),
+ IsSplitCSR(false) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -196,6 +201,9 @@ public:
void setUsesPICBase(bool uses) { UsesPICBase = uses; }
bool usesPICBase() const { return UsesPICBase; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
MCSymbol *getPICOffsetSymbol() const;
MCSymbol *getGlobalEPSymbol() const;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 934bdf62241..5fef2924afe 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -116,6 +116,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
: CSR_Darwin32_SaveList);
+ if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
+ return CSR_SRV464_TLS_PE_SaveList;
+
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
@@ -128,6 +131,31 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_SVR432_SaveList);
}
+const MCPhysReg *
+PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
+ if (Subtarget.isDarwinABI())
+ return nullptr;
+ if (!TM.isPPC64())
+ return nullptr;
+ if (MF->getFunction()->getCallingConv() != CallingConv::CXX_FAST_TLS)
+ return nullptr;
+ if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
+ return nullptr;
+
+ // On PPC64, we might need to save r2 (but only if it is not reserved).
+ bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2);
+ if (Subtarget.hasAltivec())
+ return SaveR2
+ ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList
+ : CSR_SVR464_Altivec_ViaCopy_SaveList;
+ else
+ return SaveR2
+ ? CSR_SVR464_R2_ViaCopy_SaveList
+ : CSR_SVR464_ViaCopy_SaveList;
+}
+
const uint32_t *
PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index b15fde83c9f..459502eeb2e 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -75,6 +75,7 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const override;
OpenPOWER on IntegriCloud