diff options
author | Amara Emerson <aemerson@apple.com> | 2019-06-17 23:20:29 +0000 |
---|---|---|
committer | Amara Emerson <aemerson@apple.com> | 2019-06-17 23:20:29 +0000 |
commit | 146882242fb5a69b1a4114dbb9f280c9da89f6cb (patch) | |
tree | ee2f5070a9179b4c2258423c8c9e554b2b0dd595 /llvm/lib/CodeGen/GlobalISel/Localizer.cpp | |
parent | f9bff2a55e745d40992b60c0c8c1d3be3bbcf0e4 (diff) | |
download | bcm5719-llvm-146882242fb5a69b1a4114dbb9f280c9da89f6cb.tar.gz bcm5719-llvm-146882242fb5a69b1a4114dbb9f280c9da89f6cb.zip |
[GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block.
Inter-block localization is the same as what currently happens, except now it
only runs on the entry block because that's where the problematic constants with
long live ranges come from.
The second phase is a new intra-block localization phase which attempts to
re-sink the already localized instructions further right before one of the
multiple uses.
One additional change is to also localize G_GLOBAL_VALUE as they're constants
too. However, on some targets like arm64 it takes multiple instructions to
materialize the value, so some additional heuristics with a TTI hook have been
introduced attempt to prevent code size regressions when localizing these.
Overall, these changes improve CTMark code size on arm64 by 1.2%.
Full code size results:
Program baseline new diff
------------------------------------------------------------------------------
test-suite...-typeset/consumer-typeset.test 1249984 1217216 -2.6%
test-suite...:: CTMark/ClamAV/clamscan.test 1264928 1232152 -2.6%
test-suite :: CTMark/SPASS/SPASS.test 1394092 1361316 -2.4%
test-suite...Mark/mafft/pairlocalalign.test 731320 714928 -2.2%
test-suite :: CTMark/lencod/lencod.test 1340592 1324200 -1.2%
test-suite :: CTMark/kimwitu++/kc.test 3853512 3820420 -0.9%
test-suite :: CTMark/Bullet/bullet.test 3406036 3389652 -0.5%
test-suite...ark/tramp3d-v4/tramp3d-v4.test 8017000 8016992 -0.0%
test-suite...TMark/7zip/7zip-benchmark.test 2856588 2856588 0.0%
test-suite...:: CTMark/sqlite3/sqlite3.test 765704 765704 0.0%
Geomean difference -1.2%
Differential Revision: https://reviews.llvm.org/D63303
llvm-svn: 363632
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel/Localizer.cpp')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Localizer.cpp | 219 |
1 files changed, 158 insertions, 61 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index e61cddf114a..9b99ec12b82 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -20,17 +21,55 @@ using namespace llvm; char Localizer::ID = 0; -INITIALIZE_PASS(Localizer, DEBUG_TYPE, - "Move/duplicate certain instructions close to their use", false, - false) +INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) Localizer::Localizer() : MachineFunctionPass(ID) { initializeLocalizerPass(*PassRegistry::getPassRegistry()); } -void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } +void Localizer::init(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction()); +} bool Localizer::shouldLocalize(const MachineInstr &MI) { + // Assuming a spill and reload of a value has a cost of 1 instruction each, + // this helper function computes the maximum number of uses we should consider + // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We + // break even in terms of code size when the original MI has 2 users vs + // choosing to potentially spill. Any more than 2 users we we have a net code + // size increase. This doesn't take into account register pressure though. + auto maxUses = [](unsigned RematCost) { + // A cost of 1 means remats are basically free. + if (RematCost == 1) + return UINT_MAX; + if (RematCost == 2) + return 2U; + + // Remat is too expensive, only sink if there's one user. + if (RematCost > 2) + return 1U; + llvm_unreachable("Unexpected remat cost"); + }; + + // Helper to walk through uses and terminate if we've reached a limit. Saves + // us spending time traversing uses if all we want to know is if it's >= min. + auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { + unsigned NumUses = 0; + auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end(); + for (; UI != UE && NumUses < MaxUses; ++UI) { + NumUses++; + } + // If we haven't reached the end yet then there are more than MaxUses users. + return UI == UE; + }; + switch (MI.getOpcode()) { default: return false; @@ -40,10 +79,20 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_FRAME_INDEX: return true; + case TargetOpcode::G_GLOBAL_VALUE: { + unsigned RematCost = TTI->getGISelRematGlobalCost(); + unsigned Reg = MI.getOperand(0).getReg(); + unsigned MaxUses = maxUses(RematCost); + if (MaxUses == UINT_MAX) + return true; // Remats are "free" so always localize. + bool B = isUsesAtMost(Reg, MaxUses); + return B; + } } } void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -57,6 +106,106 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } +bool Localizer::localizeInterBlock( + MachineFunction &MF, SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) { + bool Changed = false; + DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef; + + // Since the IRTranslator only emits constants into the entry block, and the + // rest of the GISel pipeline generally emits constants close to their users, + // we only localize instructions in the entry block here. This might change if + // we start doing CSE across blocks. + auto &MBB = MF.front(); + for (MachineInstr &MI : MBB) { + if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) + continue; + LLVM_DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + MachineInstr &UseMI = *MOUse.getParent(); + if (MRI->hasOneUse(Reg) && !UseMI.isPHI()) + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI); + else + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), + LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + return Changed; +} + +bool Localizer::localizeIntraBlock( + SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) { + bool Changed = false; + + // For each already-localized instruction which has multiple users, then we + // scan the block top down from the current position until we hit one of them. + + // FIXME: Consider doing inst duplication if live ranges are very long due to + // many users, but this case may be better served by regalloc improvements. + + for (MachineInstr *MI : LocalizedInstrs) { + unsigned Reg = MI->getOperand(0).getReg(); + MachineBasicBlock &MBB = *MI->getParent(); + // If the instruction has a single use, we would have already moved it right + // before its user in localizeInterBlock(). + if (MRI->hasOneUse(Reg)) + continue; + + // All of the user MIs of this reg. + SmallPtrSet<MachineInstr *, 32> Users; + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) + Users.insert(&UseMI); + + MachineBasicBlock::iterator II(MI); + ++II; + while (II != MBB.end() && !Users.count(&*II)) + ++II; + + LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II + << "\n"); + assert(II != MBB.end() && "Didn't find the user in the MBB"); + MI->removeFromParent(); + MBB.insert(II, MI); + Changed = true; + } + return Changed; +} + bool Localizer::runOnMachineFunction(MachineFunction &MF) { // If the ISel pipeline failed, do not bother running that pass. if (MF.getProperties().hasProperty( @@ -67,62 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { init(MF); - bool Changed = false; - // Keep track of the instructions we localized. - // We won't need to process them if we see them later in the CFG. - SmallPtrSet<MachineInstr *, 16> LocalizedInstrs; - DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef; - // TODO: Do bottom up traversal. - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) - continue; - LLVM_DEBUG(dbgs() << "Should localize: " << MI); - assert(MI.getDesc().getNumDefs() == 1 && - "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); - // Check if all the users of MI are local. - // We are going to invalidation the list of use operands, so we - // can't use range iterator. - for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); - MOIt != MOItEnd;) { - MachineOperand &MOUse = *MOIt++; - // Check if the use is already local. - MachineBasicBlock *InsertMBB; - LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); - dbgs() << "Checking use: " << MIUse - << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); - if (isLocalUse(MOUse, MI, InsertMBB)) - continue; - LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); - Changed = true; - auto MBBAndReg = std::make_pair(InsertMBB, Reg); - auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); - if (NewVRegIt == MBBWithLocalDef.end()) { - // Create the localized instruction. - MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); - LocalizedInstrs.insert(LocalizedMI); - // Don't try to be smart for the insertion point. - // There is no guarantee that the first seen use is the first - // use in the block. - InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), - LocalizedMI); + // Keep track of the instructions we localized. We'll do a second pass of + // intra-block localization to further reduce live ranges. + SmallPtrSet<MachineInstr *, 32> LocalizedInstrs; - // Set a new register for the definition. - unsigned NewReg = - MRI->createGenericVirtualRegister(MRI->getType(Reg)); - MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); - LocalizedMI->getOperand(0).setReg(NewReg); - NewVRegIt = - MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; - LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); - } - LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) - << '\n'); - // Update the user reg. - MOUse.setReg(NewVRegIt->second); - } - } - } - return Changed; + bool Changed = localizeInterBlock(MF, LocalizedInstrs); + return Changed |= localizeIntraBlock(LocalizedInstrs); } |