summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineCombiner.cpp
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2017-09-13 23:23:09 +0000
committerHans Wennborg <hans@hanshq.net>2017-09-13 23:23:09 +0000
commit06e2a384c26dfc400470da13abdb29612aa5ef52 (patch)
treedee68c6c19970a7f13f43a1e05d7b08a090049c8 /llvm/lib/CodeGen/MachineCombiner.cpp
parentd866b47989064330489c34aa1ac0716876155cfd (diff)
downloadbcm5719-llvm-06e2a384c26dfc400470da13abdb29612aa5ef52.tar.gz
bcm5719-llvm-06e2a384c26dfc400470da13abdb29612aa5ef52.zip
Revert r312719 "[MachineCombiner] Update instruction depths incrementally for large BBs."
This caused PR34596. > [MachineCombiner] Update instruction depths incrementally for large BBs. > > Summary: > For large basic blocks with lots of combinable instructions, the > MachineTraceMetrics computations in MachineCombiner can dominate the compile > time, as computing the trace information is quadratic in the number of > instructions in a BB and it's relevant successors/predecessors. > > In most cases, knowing the instruction depth should be enough to make > combination decisions. As we already iterate over all instructions in a basic > block, the instruction depth can be computed incrementally. This reduces the > cost of machine-combine drastically in cases where lots of instructions > are combined. The major drawback is that AFAIK, computing the critical path > length cannot be done incrementally. Therefore we only compute > instruction depths incrementally, for basic blocks with more > instructions than inc_threshold. The -machine-combiner-inc-threshold > option can be used to set the threshold and allows for easier > experimenting and checking if using incremental updates for all basic > blocks has any impact on the performance. > > Reviewers: sanjoy, Gerolf, MatzeB, efriedma, fhahn > > Reviewed By: fhahn > > Subscribers: kiranchandramohan, javed.absar, efriedma, llvm-commits > > Differential Revision: https://reviews.llvm.org/D36619 llvm-svn: 313213
Diffstat (limited to 'llvm/lib/CodeGen/MachineCombiner.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp105
1 files changed, 23 insertions, 82 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 8586cf9679f..e6f80dbb863 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -35,11 +34,6 @@ using namespace llvm;
STATISTIC(NumInstCombined, "Number of machineinst combined");
-static cl::opt<unsigned>
-inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
- cl::desc("Incremental depth computation will be used for basic "
- "blocks with more instructions."), cl::init(500));
-
namespace {
class MachineCombiner : public MachineFunctionPass {
const TargetInstrInfo *TII;
@@ -79,7 +73,7 @@ private:
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern, bool SlackIsAccurate);
+ MachineCombinerPattern Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -253,8 +247,7 @@ bool MachineCombiner::improvesCriticalPathLen(
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern,
- bool SlackIsAccurate) {
+ MachineCombinerPattern Pattern) {
assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
"Missing machine model\n");
// NewRoot is the last instruction in the \p InsInstrs vector.
@@ -265,7 +258,7 @@ bool MachineCombiner::improvesCriticalPathLen(
unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
- DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
dbgs() << " RootDepth: " << RootDepth << "\n");
@@ -288,18 +281,17 @@ bool MachineCombiner::improvesCriticalPathLen(
RootLatency += TSchedModel.computeInstrLatency(I);
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
- unsigned NewCycleCount = NewRootDepth + NewRootLatency;
- unsigned OldCycleCount = RootDepth + RootLatency +
- (SlackIsAccurate ? RootSlack : 0);
+
DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
dbgs() << " RootLatency: " << RootLatency << "\n";
- dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
- << SlackIsAccurate << "\n";
+ dbgs() << " RootSlack: " << RootSlack << "\n";
dbgs() << " NewRootDepth + NewRootLatency = "
- << NewCycleCount << "\n";
+ << NewRootDepth + NewRootLatency << "\n";
dbgs() << " RootDepth + RootLatency + RootSlack = "
- << OldCycleCount << "\n";
- );
+ << RootDepth + RootLatency + RootSlack << "\n";);
+
+ unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+ unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
return NewCycleCount <= OldCycleCount;
}
@@ -362,44 +354,17 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
-/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
-/// depths if requested.
-///
-/// \param MBB basic block to insert instructions in
-/// \param MI current machine instruction
-/// \param InsInstrs new instructions to insert in \p MBB
-/// \param DelInstrs instruction to delete from \p MBB
-/// \param MinInstr is a pointer to the machine trace information
-/// \param RegUnits set of live registers, needed to compute instruction depths
-/// \param IncrementalUpdate if true, compute instruction depths incrementally,
-/// otherwise invalidate the trace
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
SmallVector<MachineInstr *, 16> InsInstrs,
SmallVector<MachineInstr *, 16> DelInstrs,
- MachineTraceMetrics::Ensemble *MinInstr,
- SparseSet<LiveRegUnit> &RegUnits,
- bool IncrementalUpdate) {
+ MachineTraceMetrics *Traces) {
for (auto *InstrPtr : InsInstrs)
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
-
- for (auto *InstrPtr : DelInstrs) {
+ for (auto *InstrPtr : DelInstrs)
InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
- // Erase all LiveRegs defined by the removed instruction
- for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
- if (I->MI == InstrPtr)
- I = RegUnits.erase(I);
- else
- I++;
- }
- }
-
- if (IncrementalUpdate)
- for (auto *InstrPtr : InsInstrs)
- MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
- else
- MinInstr->invalidate(MBB);
-
- NumInstCombined++;
+ ++NumInstCombined;
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
}
/// Substitute a slow code sequence with a faster one by
@@ -413,16 +378,9 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
bool Changed = false;
DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
- bool IncrementalUpdate = false;
auto BlockIter = MBB->begin();
- auto LastUpdate = BlockIter;
// Check if the block is in a loop.
const MachineLoop *ML = MLI->getLoopFor(MBB);
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
-
- SparseSet<LiveRegUnit> RegUnits;
- RegUnits.setUniverse(TRI->getNumRegUnits());
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
@@ -461,6 +419,9 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
SmallVector<MachineInstr *, 16> InsInstrs;
SmallVector<MachineInstr *, 16> DelInstrs;
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ Traces->verifyAnalysis();
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
unsigned NewInstCount = InsInstrs.size();
@@ -475,41 +436,23 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P))
SubstituteAlways = true;
- if (IncrementalUpdate) {
- // Update depths since the last incremental update.
- MinInstr->updateDepths(LastUpdate, std::next(BlockIter), RegUnits);
- LastUpdate = BlockIter;
- }
-
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
- RegUnits, IncrementalUpdate);
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
// Eagerly stop after the first pattern fires.
Changed = true;
break;
} else {
- // For big basic blocks, we only compute the full trace the first time
- // we hit this. We do not invalidate the trace, but instead update the
- // instruction depths incrementally.
- // NOTE: Only the instruction depths up to MI are accurate. All other
- // trace information is not updated.
+ // Calculating the trace metrics may be expensive,
+ // so only do this when necessary.
MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
- Traces->verifyAnalysis();
if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
- InstrIdxForVirtReg, P,
- !IncrementalUpdate) &&
+ InstrIdxForVirtReg, P) &&
preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
- if (MBB->size() > inc_threshold)
- // Use incremental depth updates for basic blocks above treshold
- IncrementalUpdate = true;
-
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
- RegUnits, IncrementalUpdate);
-
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
// Eagerly stop after the first pattern fires.
Changed = true;
break;
@@ -524,8 +467,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
}
- if (Changed && IncrementalUpdate)
- Traces->invalidate(MBB);
return Changed;
}
OpenPOWER on IntegriCloud