diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 330 |
1 files changed, 322 insertions, 8 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index a0c4a25bb5b..d60dc43d19b 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -32,10 +32,12 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> +#include <functional> #include <iterator> #include <limits> @@ -51,6 +53,9 @@ STATISTIC(NumUnscaledPairCreated, STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); +DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", + "Controls which pairs are considered for renaming"); + // The LdStLimit limits how far we search for load/store pairs. static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); @@ -76,6 +81,11 @@ using LdStPairFlags = struct LdStPairFlags { // to be extended, 0 means I, and 1 means the returned iterator. int SExtIdx = -1; + // If not none, RenameReg can be used to rename the result register of the + // first store in a pair. Currently this only works when merging stores + // forward. + Optional<MCPhysReg> RenameReg = None; + LdStPairFlags() = default; void setMergeForward(bool V = true) { MergeForward = V; } @@ -83,6 +93,10 @@ using LdStPairFlags = struct LdStPairFlags { void setSExtIdx(int V) { SExtIdx = V; } int getSExtIdx() const { return SExtIdx; } + + void setRenameReg(MCPhysReg R) { RenameReg = R; } + void clearRenameReg() { RenameReg = None; } + Optional<MCPhysReg> getRenameReg() const { return RenameReg; } }; struct AArch64LoadStoreOpt : public MachineFunctionPass { @@ -99,6 +113,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; + LiveRegUnits DefinedInBB; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AAResultsWrapperPass>(); @@ -599,8 +614,8 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, } } -static const MachineOperand &getLdStRegOp(const MachineInstr &MI, - unsigned PairedRegOp = 0) { +static MachineOperand &getLdStRegOp(MachineInstr &MI, + unsigned PairedRegOp = 0) { assert(PairedRegOp < 2 && "Unexpected register operand idx."); unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; return MI.getOperand(Idx); @@ -783,6 +798,44 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, return NextI; } +// Apply Fn to all instructions between MI and the beginning of the block, until +// a def for DefReg is reached. Returns true, iff Fn returns true for all +// visited instructions. Stop after visiting Limit iterations. +static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, + const TargetRegisterInfo *TRI, unsigned Limit, + std::function<bool(MachineInstr &, bool)> &Fn) { + auto MBB = MI.getParent(); + for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(), + E = MBB->rend(); + I != E; I++) { + if (!Limit) + return false; + --Limit; + + bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) { + return MOP.isReg() && MOP.isDef() && + TRI->regsOverlap(MOP.getReg(), DefReg); + }); + if (!Fn(*I, isDef)) + return false; + if (isDef) + break; + } + return true; +} + +static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, + const TargetRegisterInfo *TRI) { + + for (const MachineOperand &MOP : phys_regs_and_masks(MI)) + if (MOP.isReg() && MOP.isKill()) + Units.removeReg(MOP.getReg()); + + for (const MachineOperand &MOP : phys_regs_and_masks(MI)) + if (MOP.isReg() && !MOP.isKill()) + Units.addReg(MOP.getReg()); +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, @@ -803,6 +856,70 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, int OffsetStride = IsUnscaled ? getMemScale(*I) : 1; bool MergeForward = Flags.getMergeForward(); + + Optional<MCPhysReg> RenameReg = Flags.getRenameReg(); + if (MergeForward && RenameReg) { + MCRegister RegToRename = getLdStRegOp(*I).getReg(); + DefinedInBB.addReg(*RenameReg); + + // Return the sub/super register for RenameReg, matching the size of + // OriginalReg. + auto GetMatchingSubReg = [this, + RenameReg](MCPhysReg OriginalReg) -> MCPhysReg { + for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg)) + if (TRI->getMinimalPhysRegClass(OriginalReg) == + TRI->getMinimalPhysRegClass(SubOrSuper)) + return SubOrSuper; + llvm_unreachable("Should have found matching sub or super register!"); + }; + + std::function<bool(MachineInstr &, bool)> UpdateMIs = + [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) { + if (IsDef) { + bool SeenDef = false; + for (auto &MOP : MI.operands()) { + // Rename the first explicit definition and all implicit + // definitions matching RegToRename. + if (MOP.isReg() && + (!SeenDef || (MOP.isDef() && MOP.isImplicit())) && + TRI->regsOverlap(MOP.getReg(), RegToRename)) { + assert((MOP.isImplicit() || + (MOP.isRenamable() && !MOP.isEarlyClobber())) && + "Need renamable operands"); + MOP.setReg(GetMatchingSubReg(MOP.getReg())); + SeenDef = true; + } + } + } else { + for (auto &MOP : MI.operands()) { + if (MOP.isReg() && TRI->regsOverlap(MOP.getReg(), RegToRename)) { + assert(MOP.isImplicit() || + (MOP.isRenamable() && !MOP.isEarlyClobber()) && + "Need renamable operands"); + MOP.setReg(GetMatchingSubReg(MOP.getReg())); + } + } + } + LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n"); + return true; + }; + forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs); + + // Make sure the register used for renaming is not used between the paired + // instructions. That would trash the content before the new paired + // instruction. + for (auto &MI : + iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>( + std::next(I), std::next(Paired))) + assert(all_of(MI.operands(), + [this, &RenameReg](const MachineOperand &MOP) { + return !MOP.isReg() || + !TRI->regsOverlap(MOP.getReg(), *RenameReg); + }) && + "Rename register used between paired instruction, trashing the " + "content"); + } + // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; @@ -931,6 +1048,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, } LLVM_DEBUG(dbgs() << "\n"); + if (MergeForward) + for (const MachineOperand &MOP : phys_regs_and_masks(*I)) + if (MOP.isReg() && MOP.isKill()) + DefinedInBB.addReg(MOP.getReg()); + // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); @@ -1207,6 +1329,144 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair? } +static bool +canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, + SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, + const TargetRegisterInfo *TRI) { + if (!FirstMI.mayStore()) + return false; + + // Check if we can find an unused register which we can use to rename + // the register used by the first load/store. + auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); + MachineFunction &MF = *FirstMI.getParent()->getParent(); + if (!RegClass || !MF.getRegInfo().tracksLiveness()) + return false; + + auto RegToRename = getLdStRegOp(FirstMI).getReg(); + // For now, we only rename if the store operand gets killed at the store. + if (!getLdStRegOp(FirstMI).isKill() && + !any_of(FirstMI.operands(), + [TRI, RegToRename](const MachineOperand &MOP) { + return MOP.isReg() && MOP.isImplicit() && MOP.isKill() && + TRI->regsOverlap(RegToRename, MOP.getReg()); + })) { + LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n"); + return false; + } + auto canRenameMOP = [](const MachineOperand &MOP) { + return MOP.isImplicit() || + (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); + }; + + bool FoundDef = false; + + // For each instruction between FirstMI and the previous def for RegToRename, + // we + // * check if we can rename RegToRename in this instruction + // * collect the registers used and required register classes for RegToRename. + std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI, + bool IsDef) { + LLVM_DEBUG(dbgs() << "Checking " << MI << "\n"); + // Currently we do not try to rename across frame-setup instructions. + if (MI.getFlag(MachineInstr::FrameSetup)) { + LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently (" + << MI << ")\n"); + return false; + } + + UsedInBetween.accumulate(MI); + + // For a definition, check that we can rename the definition and exit the + // loop. + FoundDef = IsDef; + + // For defs, check if we can rename the first def of RegToRename. + if (FoundDef) { + for (auto &MOP : MI.operands()) { + if (!MOP.isReg() || !MOP.isDef() || + !TRI->regsOverlap(MOP.getReg(), RegToRename)) + continue; + if (!canRenameMOP(MOP)) { + LLVM_DEBUG(dbgs() + << " Cannot rename " << MOP << " in " << MI << "\n"); + return false; + } + RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); + } + return true; + } else { + for (auto &MOP : MI.operands()) { + if (!MOP.isReg() || !TRI->regsOverlap(MOP.getReg(), RegToRename)) + continue; + + if (!canRenameMOP(MOP)) { + LLVM_DEBUG(dbgs() + << " Cannot rename " << MOP << " in " << MI << "\n"); + return false; + } + RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); + } + } + return true; + }; + + if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs)) + return false; + + if (!FoundDef) { + LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n"); + return false; + } + return true; +} + +// Check if we can find a physical register for renaming. This register must: +// * not be defined up to FirstMI (checking DefinedInBB) +// * not used between the MI and the defining instruction of the register to +// rename (checked using UsedInBetween). +// * is available in all used register classes (checked using RequiredClasses). +static Optional<MCPhysReg> tryToFindRegisterToRename( + MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB, + LiveRegUnits &UsedInBetween, + SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, + const TargetRegisterInfo *TRI) { + auto &MF = *FirstMI.getParent()->getParent(); + + // Checks if any sub- or super-register of PR is callee saved. + auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) { + return any_of(TRI->sub_and_superregs_inclusive(PR), + [&MF, TRI](MCPhysReg SubOrSuper) { + return TRI->isCalleeSavedPhysReg(SubOrSuper, MF); + }); + }; + + // Check if PR or one of its sub- or super-registers can be used for all + // required register classes. + auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) { + return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) { + return any_of(TRI->sub_and_superregs_inclusive(PR), + [C, TRI](MCPhysReg SubOrSuper) { + return C == TRI->getMinimalPhysRegClass(SubOrSuper); + }); + }); + }; + + auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); + for (const MCPhysReg &PR : *RegClass) { + if (DefinedInBB.available(PR) && UsedInBetween.available(PR) && + !AnySubOrSuperRegCalleePreserved(PR) && CanBeUsedForAllClasses(PR)) { + DefinedInBB.addReg(PR); + LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI) + << "\n"); + return {PR}; + } + } + LLVM_DEBUG(dbgs() << "No rename register found from " + << TRI->getRegClassName(RegClass) << "\n"); + return None; +} + /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator @@ -1215,6 +1475,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool FindNarrowMerge) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; + MachineBasicBlock::iterator MBBIWithRenameReg; MachineInstr &FirstMI = *I; ++MBBI; @@ -1226,6 +1487,13 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); + Optional<bool> MaybeCanRename = None; + SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses; + LiveRegUnits UsedInBetween; + UsedInBetween.init(*TRI); + + Flags.clearRenameReg(); + // Track which register units have been modified and used between the first // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); @@ -1237,6 +1505,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; + UsedInBetween.accumulate(MI); + // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) @@ -1329,7 +1599,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, !(MI.mayLoad() && !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && !mayAlias(MI, MemInsns, AA)) { + Flags.setMergeForward(false); + Flags.clearRenameReg(); return MBBI; } @@ -1337,18 +1609,41 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) && - !(MayLoad && + if (!(MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && !mayAlias(FirstMI, MemInsns, AA)) { - Flags.setMergeForward(true); - return MBBI; + + if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { + Flags.setMergeForward(true); + Flags.clearRenameReg(); + return MBBI; + } + + if (DebugCounter::shouldExecute(RegRenamingCounter)) { + if (!MaybeCanRename) + MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, + RequiredClasses, TRI)}; + + if (*MaybeCanRename) { + Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename( + FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, + TRI); + if (MaybeRenameReg) { + Flags.setRenameReg(*MaybeRenameReg); + Flags.setMergeForward(true); + MBBIWithRenameReg = MBBI; + } + } + } } // Unable to combine these instructions due to interference in between. // Keep looking. } } + if (Flags.getRenameReg()) + return MBBIWithRenameReg; + // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. if (MI.isCall()) @@ -1680,7 +1975,13 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { ++NumUnscaledPairCreated; // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. + auto Prev = std::prev(MBBI); MBBI = mergePairedInsns(MBBI, Paired, Flags); + // Collect liveness info for instructions between Prev and the new position + // MBBI. + for (auto I = std::next(Prev); I != MBBI; I++) + updateDefinedRegisters(*I, DefinedInBB, TRI); + return true; } return false; @@ -1742,6 +2043,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { + bool Modified = false; // Four tranformations to do here: // 1) Find loads that directly read from stores and promote them by @@ -1786,8 +2088,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] + + if (MBB.getParent()->getRegInfo().tracksLiveness()) { + DefinedInBB.clear(); + DefinedInBB.addLiveIns(MBB); + } + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { + // Track currently live registers up to this point, to help with + // searching for a rename register on demand. + updateDefinedRegisters(*MBBI, DefinedInBB, TRI); if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI)) Modified = true; else @@ -1825,11 +2136,14 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { // or store. ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); + DefinedInBB.init(*TRI); bool Modified = false; bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); - for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt); + for (auto &MBB : Fn) { + auto M = optimizeBlock(MBB, enableNarrowZeroStOpt); + Modified |= M; + } return Modified; } |