summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2019-11-26 10:25:04 +0000
committerSam Parker <sam.parker@arm.com>2019-11-26 10:27:46 +0000
commit28166816b05aebb3154e5f8a28b3ef447cce8471 (patch)
tree687b1f187926d122f6aa34f43172e12ed6f55b7c /llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
parent3ec193fb527e697faac4ef8f30934dd7bce849a7 (diff)
downloadbcm5719-llvm-28166816b05aebb3154e5f8a28b3ef447cce8471.tar.gz
bcm5719-llvm-28166816b05aebb3154e5f8a28b3ef447cce8471.zip
[ARM][ReachingDefs] Remove dead code in loloops.
Add some more helper functions to ReachingDefs to query the uses of a given MachineInstr and also to query whether two MachineInstrs use the same def of a register. For Arm, while tail-predicating, these helpers are used in the low-overhead loops to remove the dead code that calculates the number of loop iterations. Differential Revision: https://reviews.llvm.org/D70240
Diffstat (limited to 'llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp189
1 files changed, 140 insertions, 49 deletions
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 7487a43b7aa..756d0fdb557 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -110,12 +110,41 @@ namespace {
// Check the branch targets are within range and we satisfy our
// restrictions.
- void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA);
+ void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA,
+ MachineLoopInfo *MLI);
bool FoundAllComponents() const {
return Start && Dec && End;
}
+ // Return the loop iteration count, or the number of elements if we're tail
+ // predicating.
+ MachineOperand &getCount() {
+ return IsTailPredicationLegal() ?
+ VCTP->getOperand(1) : Start->getOperand(0);
+ }
+
+ unsigned getStartOpcode() const {
+ bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
+ if (!IsTailPredicationLegal())
+ return IsDo ? ARM::t2DLS : ARM::t2WLS;
+
+ switch (VCTP->getOpcode()) {
+ default:
+ llvm_unreachable("unhandled vctp opcode");
+ break;
+ case ARM::MVE_VCTP8:
+ return IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
+ case ARM::MVE_VCTP16:
+ return IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
+ case ARM::MVE_VCTP32:
+ return IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
+ case ARM::MVE_VCTP64:
+ return IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
+ }
+ return 0;
+ }
+
void dump() const {
if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
@@ -130,6 +159,7 @@ namespace {
class ARMLowOverheadLoops : public MachineFunctionPass {
MachineFunction *MF = nullptr;
+ MachineLoopInfo *MLI = nullptr;
ReachingDefAnalysis *RDA = nullptr;
const ARMBaseInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
@@ -236,7 +266,8 @@ MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) {
}
void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
- ReachingDefAnalysis *RDA) {
+ ReachingDefAnalysis *RDA,
+ MachineLoopInfo *MLI) {
if (Revert)
return;
@@ -273,14 +304,70 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
if (!InsertPt) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
Revert = true;
+ return;
} else
LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
- LLVM_DEBUG(if (IsTailPredicationLegal()) {
- dbgs() << "ARM Loops: Will use tail predication to convert:\n";
+ // For tail predication, we need to provide the number of elements, instead
+ // of the iteration count, to the loop start instruction. The number of
+ // elements is provided to the vctp instruction, so we need to check that
+ // we can use this register at InsertPt.
+ if (!IsTailPredicationLegal())
+ return;
+
+ Register NumElements = VCTP->getOperand(1).getReg();
+
+ // If the register is defined within loop, then we can't perform TP.
+ // TODO: Check whether this is just a mov of a register that would be
+ // available.
+ if (RDA->getReachingDef(VCTP, NumElements) >= 0) {
+ CannotTailPredicate = true;
+ return;
+ }
+
+ // We can't perform TP if the register does not hold the same value at
+ // InsertPt as the liveout value.
+ MachineBasicBlock *InsertBB = InsertPt->getParent();
+ if (!RDA->hasSameReachingDef(InsertPt, &InsertBB->back(),
+ NumElements)) {
+ CannotTailPredicate = true;
+ return;
+ }
+
+ // Especially in the case of while loops, InsertBB may not be the
+ // preheader, so we need to check that the register isn't redefined
+ // before entering the loop.
+ auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB,
+ Register NumElements) {
+ // NumElements is redefined in this block.
+ if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0)
+ return true;
+
+ // Don't continue searching up through multiple predecessors.
+ if (MBB->pred_size() > 1)
+ return true;
+
+ return false;
+ };
+
+ // First, find the block that looks like the preheader.
+ MachineBasicBlock *MBB = MLI->findLoopPreheader(ML, true);
+ if (!MBB) {
+ CannotTailPredicate = true;
+ return;
+ }
+
+ // Then search backwards for a def, until we get to InsertBB.
+ while (MBB != InsertBB) {
+ CannotTailPredicate = CannotProvideElements(MBB, NumElements);
+ if (CannotTailPredicate)
+ return;
+ MBB = *MBB->pred_begin();
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Will use tail predication to convert:\n";
for (auto *MI : VPTUsers)
- dbgs() << " - " << *MI;
- });
+ dbgs() << " - " << *MI;);
}
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
@@ -291,7 +378,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
- auto &MLI = getAnalysis<MachineLoopInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
RDA = &getAnalysis<ReachingDefAnalysis>();
MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
MRI = &MF->getRegInfo();
@@ -301,7 +388,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
BBUtils->adjustBBOffsetsAfter(&MF->front());
bool Changed = false;
- for (auto ML : MLI) {
+ for (auto ML : *MLI) {
if (!ML->getParentLoop())
Changed |= ProcessLoop(ML);
}
@@ -317,7 +404,14 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
Changed |= ProcessLoop(*I);
- LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n";
+ if (auto *Preheader = ML->getLoopPreheader())
+ dbgs() << " - " << Preheader->getName() << "\n";
+ else if (auto *Preheader = MLI->findLoopPreheader(ML))
+ dbgs() << " - " << Preheader->getName() << "\n";
+ for (auto *MBB : ML->getBlocks())
+ dbgs() << " - " << MBB->getName() << "\n";
+ );
// Search the given block for a loop start instruction. If one isn't found,
// and there's only one predecessor block, search that one too.
@@ -333,28 +427,15 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
};
LowOverheadLoop LoLoop(ML);
- // Search the preheader for the start intrinsic, or look through the
- // predecessors of the header to find exactly one set.iterations intrinsic.
+ // Search the preheader for the start intrinsic.
// FIXME: I don't see why we shouldn't be supporting multiple predecessors
// with potentially multiple set.loop.iterations, so we need to enable this.
if (auto *Preheader = ML->getLoopPreheader())
LoLoop.Start = SearchForStart(Preheader);
- else {
- LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
- << " - Performing manual predecessor search.\n");
- MachineBasicBlock *Pred = nullptr;
- for (auto *MBB : ML->getHeader()->predecessors()) {
- if (!ML->contains(MBB)) {
- if (Pred) {
- LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
- LoLoop.Start = nullptr;
- break;
- }
- Pred = MBB;
- LoLoop.Start = SearchForStart(MBB);
- }
- }
- }
+ else if (auto *Preheader = MLI->findLoopPreheader(ML, true))
+ LoLoop.Start = SearchForStart(Preheader);
+ else
+ return false;
// Find the low-overhead loop components and decide whether or not to fall
// back to a normal loop. Also look for a vctp instructions and decide
@@ -412,7 +493,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
if (!LoLoop.FoundAllComponents())
return false;
- LoLoop.CheckLegality(BBUtils.get(), RDA);
+ LoLoop.CheckLegality(BBUtils.get(), RDA, MLI);
Expand(LoLoop);
return true;
}
@@ -504,35 +585,45 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
MachineInstr *Start = LoLoop.Start;
MachineBasicBlock *MBB = InsertPt->getParent();
bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
- unsigned Opc = 0;
-
- if (!LoLoop.IsTailPredicationLegal())
- Opc = IsDo ? ARM::t2DLS : ARM::t2WLS;
- else {
- switch (LoLoop.VCTP->getOpcode()) {
- case ARM::MVE_VCTP8:
- Opc = IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
- break;
- case ARM::MVE_VCTP16:
- Opc = IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
- break;
- case ARM::MVE_VCTP32:
- Opc = IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
- break;
- case ARM::MVE_VCTP64:
- Opc = IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
- break;
- }
- }
+ unsigned Opc = LoLoop.getStartOpcode();
+ MachineOperand &Count = LoLoop.getCount();
MachineInstrBuilder MIB =
BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
MIB.addDef(ARM::LR);
- MIB.add(Start->getOperand(0));
+ MIB.add(Count);
if (!IsDo)
MIB.add(Start->getOperand(1));
+ // When using tail-predication, try to delete the dead code that was used to
+ // calculate the number of loop iterations.
+ if (LoLoop.IsTailPredicationLegal()) {
+ SmallVector<MachineInstr*, 4> Killed;
+ SmallVector<MachineInstr*, 4> Dead;
+ if (auto *Def = RDA->getReachingMIDef(Start,
+ Start->getOperand(0).getReg())) {
+ Killed.push_back(Def);
+
+ while (!Killed.empty()) {
+ MachineInstr *Def = Killed.back();
+ Killed.pop_back();
+ Dead.push_back(Def);
+ for (auto &MO : Def->operands()) {
+ if (!MO.isReg() || !MO.isKill())
+ continue;
+
+ MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg());
+ if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1)
+ Killed.push_back(Kill);
+ }
+ }
+ for (auto *MI : Dead)
+ MI->eraseFromParent();
+ }
+ }
+
+ // If we're inserting at a mov lr, then remove it as it's redundant.
if (InsertPt != Start)
InsertPt->eraseFromParent();
Start->eraseFromParent();
OpenPOWER on IntegriCloud