diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Localizer.cpp | 219 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h | 4 |
3 files changed, 166 insertions, 61 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index a55c1be1a09..83840aa7fbb 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -724,6 +724,10 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } +unsigned TargetTransformInfo::getGISelRematGlobalCost() const { + return TTIImpl->getGISelRematGlobalCost(); +} + int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index e61cddf114a..9b99ec12b82 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -20,17 +21,55 @@ using namespace llvm; char Localizer::ID = 0; -INITIALIZE_PASS(Localizer, DEBUG_TYPE, - "Move/duplicate certain instructions close to their use", false, - false) +INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) Localizer::Localizer() : MachineFunctionPass(ID) { initializeLocalizerPass(*PassRegistry::getPassRegistry()); } -void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } +void Localizer::init(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction()); +} bool Localizer::shouldLocalize(const MachineInstr &MI) { + // Assuming a spill and reload of a value has a cost of 1 instruction each, + // this helper function computes the maximum number of uses we should consider + // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We + // break even in terms of code size when the original MI has 2 users vs + // choosing to potentially spill. Any more than 2 users we we have a net code + // size increase. This doesn't take into account register pressure though. + auto maxUses = [](unsigned RematCost) { + // A cost of 1 means remats are basically free. + if (RematCost == 1) + return UINT_MAX; + if (RematCost == 2) + return 2U; + + // Remat is too expensive, only sink if there's one user. + if (RematCost > 2) + return 1U; + llvm_unreachable("Unexpected remat cost"); + }; + + // Helper to walk through uses and terminate if we've reached a limit. Saves + // us spending time traversing uses if all we want to know is if it's >= min. + auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { + unsigned NumUses = 0; + auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end(); + for (; UI != UE && NumUses < MaxUses; ++UI) { + NumUses++; + } + // If we haven't reached the end yet then there are more than MaxUses users. + return UI == UE; + }; + switch (MI.getOpcode()) { default: return false; @@ -40,10 +79,20 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_FRAME_INDEX: return true; + case TargetOpcode::G_GLOBAL_VALUE: { + unsigned RematCost = TTI->getGISelRematGlobalCost(); + unsigned Reg = MI.getOperand(0).getReg(); + unsigned MaxUses = maxUses(RematCost); + if (MaxUses == UINT_MAX) + return true; // Remats are "free" so always localize. + bool B = isUsesAtMost(Reg, MaxUses); + return B; + } } } void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -57,6 +106,106 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } +bool Localizer::localizeInterBlock( + MachineFunction &MF, SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) { + bool Changed = false; + DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef; + + // Since the IRTranslator only emits constants into the entry block, and the + // rest of the GISel pipeline generally emits constants close to their users, + // we only localize instructions in the entry block here. This might change if + // we start doing CSE across blocks. + auto &MBB = MF.front(); + for (MachineInstr &MI : MBB) { + if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) + continue; + LLVM_DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + MachineInstr &UseMI = *MOUse.getParent(); + if (MRI->hasOneUse(Reg) && !UseMI.isPHI()) + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI); + else + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), + LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + return Changed; +} + +bool Localizer::localizeIntraBlock( + SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) { + bool Changed = false; + + // For each already-localized instruction which has multiple users, then we + // scan the block top down from the current position until we hit one of them. + + // FIXME: Consider doing inst duplication if live ranges are very long due to + // many users, but this case may be better served by regalloc improvements. + + for (MachineInstr *MI : LocalizedInstrs) { + unsigned Reg = MI->getOperand(0).getReg(); + MachineBasicBlock &MBB = *MI->getParent(); + // If the instruction has a single use, we would have already moved it right + // before its user in localizeInterBlock(). + if (MRI->hasOneUse(Reg)) + continue; + + // All of the user MIs of this reg. + SmallPtrSet<MachineInstr *, 32> Users; + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) + Users.insert(&UseMI); + + MachineBasicBlock::iterator II(MI); + ++II; + while (II != MBB.end() && !Users.count(&*II)) + ++II; + + LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II + << "\n"); + assert(II != MBB.end() && "Didn't find the user in the MBB"); + MI->removeFromParent(); + MBB.insert(II, MI); + Changed = true; + } + return Changed; +} + bool Localizer::runOnMachineFunction(MachineFunction &MF) { // If the ISel pipeline failed, do not bother running that pass. if (MF.getProperties().hasProperty( @@ -67,62 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { init(MF); - bool Changed = false; - // Keep track of the instructions we localized. - // We won't need to process them if we see them later in the CFG. - SmallPtrSet<MachineInstr *, 16> LocalizedInstrs; - DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef; - // TODO: Do bottom up traversal. - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) - continue; - LLVM_DEBUG(dbgs() << "Should localize: " << MI); - assert(MI.getDesc().getNumDefs() == 1 && - "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); - // Check if all the users of MI are local. - // We are going to invalidation the list of use operands, so we - // can't use range iterator. - for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); - MOIt != MOItEnd;) { - MachineOperand &MOUse = *MOIt++; - // Check if the use is already local. - MachineBasicBlock *InsertMBB; - LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); - dbgs() << "Checking use: " << MIUse - << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); - if (isLocalUse(MOUse, MI, InsertMBB)) - continue; - LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); - Changed = true; - auto MBBAndReg = std::make_pair(InsertMBB, Reg); - auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); - if (NewVRegIt == MBBWithLocalDef.end()) { - // Create the localized instruction. - MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); - LocalizedInstrs.insert(LocalizedMI); - // Don't try to be smart for the insertion point. - // There is no guarantee that the first seen use is the first - // use in the block. - InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), - LocalizedMI); + // Keep track of the instructions we localized. We'll do a second pass of + // intra-block localization to further reduce live ranges. + SmallPtrSet<MachineInstr *, 32> LocalizedInstrs; - // Set a new register for the definition. - unsigned NewReg = - MRI->createGenericVirtualRegister(MRI->getType(Reg)); - MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); - LocalizedMI->getOperand(0).setReg(NewReg); - NewVRegIt = - MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; - LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); - } - LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) - << '\n'); - // Update the user reg. - MOUse.setReg(NewVRegIt->second); - } - } - } - return Changed; + bool Changed = localizeInterBlock(MF, LocalizedInstrs); + return Changed |= localizeIntraBlock(LocalizedInstrs); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 67c3707ec5c..10c15a139b4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -165,6 +165,10 @@ public: return false; } + unsigned getGISelRematGlobalCost() const { + return 2; + } + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; |