diff options
author | Sam Parker <sam.parker@arm.com> | 2019-11-26 10:25:04 +0000 |
---|---|---|
committer | Sam Parker <sam.parker@arm.com> | 2019-11-26 10:27:46 +0000 |
commit | 28166816b05aebb3154e5f8a28b3ef447cce8471 (patch) | |
tree | 687b1f187926d122f6aa34f43172e12ed6f55b7c /llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | |
parent | 3ec193fb527e697faac4ef8f30934dd7bce849a7 (diff) | |
download | bcm5719-llvm-28166816b05aebb3154e5f8a28b3ef447cce8471.tar.gz bcm5719-llvm-28166816b05aebb3154e5f8a28b3ef447cce8471.zip |
[ARM][ReachingDefs] Remove dead code in loloops.
Add some more helper functions to ReachingDefs to query the uses of
a given MachineInstr and also to query whether two MachineInstrs use
the same def of a register.
For Arm, while tail-predicating, these helpers are used in the
low-overhead loops to remove the dead code that calculates the number
of loop iterations.
Differential Revision: https://reviews.llvm.org/D70240
Diffstat (limited to 'llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 189 |
1 files changed, 140 insertions, 49 deletions
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 7487a43b7aa..756d0fdb557 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -110,12 +110,41 @@ namespace { // Check the branch targets are within range and we satisfy our // restrictions. - void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA); + void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA, + MachineLoopInfo *MLI); bool FoundAllComponents() const { return Start && Dec && End; } + // Return the loop iteration count, or the number of elements if we're tail + // predicating. + MachineOperand &getCount() { + return IsTailPredicationLegal() ? + VCTP->getOperand(1) : Start->getOperand(0); + } + + unsigned getStartOpcode() const { + bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; + if (!IsTailPredicationLegal()) + return IsDo ? ARM::t2DLS : ARM::t2WLS; + + switch (VCTP->getOpcode()) { + default: + llvm_unreachable("unhandled vctp opcode"); + break; + case ARM::MVE_VCTP8: + return IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; + case ARM::MVE_VCTP16: + return IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; + case ARM::MVE_VCTP32: + return IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; + case ARM::MVE_VCTP64: + return IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; + } + return 0; + } + void dump() const { if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; @@ -130,6 +159,7 @@ namespace { class ARMLowOverheadLoops : public MachineFunctionPass { MachineFunction *MF = nullptr; + MachineLoopInfo *MLI = nullptr; ReachingDefAnalysis *RDA = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; @@ -236,7 +266,8 @@ MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { } void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, - ReachingDefAnalysis *RDA) { + ReachingDefAnalysis *RDA, + MachineLoopInfo *MLI) { if (Revert) return; @@ -273,14 +304,70 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, if (!InsertPt) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; + return; } else LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt); - LLVM_DEBUG(if (IsTailPredicationLegal()) { - dbgs() << "ARM Loops: Will use tail predication to convert:\n"; + // For tail predication, we need to provide the number of elements, instead + // of the iteration count, to the loop start instruction. The number of + // elements is provided to the vctp instruction, so we need to check that + // we can use this register at InsertPt. + if (!IsTailPredicationLegal()) + return; + + Register NumElements = VCTP->getOperand(1).getReg(); + + // If the register is defined within loop, then we can't perform TP. + // TODO: Check whether this is just a mov of a register that would be + // available. + if (RDA->getReachingDef(VCTP, NumElements) >= 0) { + CannotTailPredicate = true; + return; + } + + // We can't perform TP if the register does not hold the same value at + // InsertPt as the liveout value. + MachineBasicBlock *InsertBB = InsertPt->getParent(); + if (!RDA->hasSameReachingDef(InsertPt, &InsertBB->back(), + NumElements)) { + CannotTailPredicate = true; + return; + } + + // Especially in the case of while loops, InsertBB may not be the + // preheader, so we need to check that the register isn't redefined + // before entering the loop. + auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB, + Register NumElements) { + // NumElements is redefined in this block. + if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0) + return true; + + // Don't continue searching up through multiple predecessors. + if (MBB->pred_size() > 1) + return true; + + return false; + }; + + // First, find the block that looks like the preheader. + MachineBasicBlock *MBB = MLI->findLoopPreheader(ML, true); + if (!MBB) { + CannotTailPredicate = true; + return; + } + + // Then search backwards for a def, until we get to InsertBB. + while (MBB != InsertBB) { + CannotTailPredicate = CannotProvideElements(MBB, NumElements); + if (CannotTailPredicate) + return; + MBB = *MBB->pred_begin(); + } + + LLVM_DEBUG(dbgs() << "ARM Loops: Will use tail predication to convert:\n"; for (auto *MI : VPTUsers) - dbgs() << " - " << *MI; - }); + dbgs() << " - " << *MI;); } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { @@ -291,7 +378,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { MF = &mf; LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); - auto &MLI = getAnalysis<MachineLoopInfo>(); + MLI = &getAnalysis<MachineLoopInfo>(); RDA = &getAnalysis<ReachingDefAnalysis>(); MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MRI = &MF->getRegInfo(); @@ -301,7 +388,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; - for (auto ML : MLI) { + for (auto ML : *MLI) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } @@ -317,7 +404,14 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { for (auto I = ML->begin(), E = ML->end(); I != E; ++I) Changed |= ProcessLoop(*I); - LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); + LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n"; + if (auto *Preheader = ML->getLoopPreheader()) + dbgs() << " - " << Preheader->getName() << "\n"; + else if (auto *Preheader = MLI->findLoopPreheader(ML)) + dbgs() << " - " << Preheader->getName() << "\n"; + for (auto *MBB : ML->getBlocks()) + dbgs() << " - " << MBB->getName() << "\n"; + ); // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. @@ -333,28 +427,15 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { }; LowOverheadLoop LoLoop(ML); - // Search the preheader for the start intrinsic, or look through the - // predecessors of the header to find exactly one set.iterations intrinsic. + // Search the preheader for the start intrinsic. // FIXME: I don't see why we shouldn't be supporting multiple predecessors // with potentially multiple set.loop.iterations, so we need to enable this. if (auto *Preheader = ML->getLoopPreheader()) LoLoop.Start = SearchForStart(Preheader); - else { - LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n" - << " - Performing manual predecessor search.\n"); - MachineBasicBlock *Pred = nullptr; - for (auto *MBB : ML->getHeader()->predecessors()) { - if (!ML->contains(MBB)) { - if (Pred) { - LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n"); - LoLoop.Start = nullptr; - break; - } - Pred = MBB; - LoLoop.Start = SearchForStart(MBB); - } - } - } + else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) + LoLoop.Start = SearchForStart(Preheader); + else + return false; // Find the low-overhead loop components and decide whether or not to fall // back to a normal loop. Also look for a vctp instructions and decide @@ -412,7 +493,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { if (!LoLoop.FoundAllComponents()) return false; - LoLoop.CheckLegality(BBUtils.get(), RDA); + LoLoop.CheckLegality(BBUtils.get(), RDA, MLI); Expand(LoLoop); return true; } @@ -504,35 +585,45 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { MachineInstr *Start = LoLoop.Start; MachineBasicBlock *MBB = InsertPt->getParent(); bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; - unsigned Opc = 0; - - if (!LoLoop.IsTailPredicationLegal()) - Opc = IsDo ? ARM::t2DLS : ARM::t2WLS; - else { - switch (LoLoop.VCTP->getOpcode()) { - case ARM::MVE_VCTP8: - Opc = IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; - break; - case ARM::MVE_VCTP16: - Opc = IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; - break; - case ARM::MVE_VCTP32: - Opc = IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; - break; - case ARM::MVE_VCTP64: - Opc = IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; - break; - } - } + unsigned Opc = LoLoop.getStartOpcode(); + MachineOperand &Count = LoLoop.getCount(); MachineInstrBuilder MIB = BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); MIB.addDef(ARM::LR); - MIB.add(Start->getOperand(0)); + MIB.add(Count); if (!IsDo) MIB.add(Start->getOperand(1)); + // When using tail-predication, try to delete the dead code that was used to + // calculate the number of loop iterations. + if (LoLoop.IsTailPredicationLegal()) { + SmallVector<MachineInstr*, 4> Killed; + SmallVector<MachineInstr*, 4> Dead; + if (auto *Def = RDA->getReachingMIDef(Start, + Start->getOperand(0).getReg())) { + Killed.push_back(Def); + + while (!Killed.empty()) { + MachineInstr *Def = Killed.back(); + Killed.pop_back(); + Dead.push_back(Def); + for (auto &MO : Def->operands()) { + if (!MO.isReg() || !MO.isKill()) + continue; + + MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg()); + if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1) + Killed.push_back(Kill); + } + } + for (auto *MI : Dead) + MI->eraseFromParent(); + } + } + + // If we're inserting at a mov lr, then remove it as it's redundant. if (InsertPt != Start) InsertPt->eraseFromParent(); Start->eraseFromParent(); |