diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2013-04-02 17:49:51 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2013-04-02 17:49:51 +0000 |
commit | 3ca14772d0c3d50e5b8b32fd405ebc0b571d1ab2 (patch) | |
tree | c3d70bb8b0c2d3ae4d760de3256d7f24a9d2861d /llvm | |
parent | 2040f9800832659a9804f223b7c241d621ee42bc (diff) | |
download | bcm5719-llvm-3ca14772d0c3d50e5b8b32fd405ebc0b571d1ab2.tar.gz bcm5719-llvm-3ca14772d0c3d50e5b8b32fd405ebc0b571d1ab2.zip |
Count processor resources individually in MachineTraceMetrics.
The new instruction scheduling models provide information about the
number of cycles consumed on each processor resource. This makes it
possible to estimate ILP more accurately than simply counting
instructions / issue width.
The functions getResourceDepth() and getResourceLength() now identify
the limiting processor resource, and return a cycle count based on that.
This gives more precise resource information, particularly in traces
that use one resource a lot more than others.
llvm-svn: 178553
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/CodeGen/MachineTraceMetrics.h | 25 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MachineTraceMetrics.cpp | 153 |
2 files changed, 169 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h index eaaa70a67d8..2775a048582 100644 --- a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h +++ b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h @@ -107,6 +107,13 @@ public: /// Get the fixed resource information about MBB. Compute it on demand. const FixedBlockInfo *getResources(const MachineBasicBlock*); + /// Get the scaled number of cycles used per processor resource in MBB. + /// This is an array with SchedModel.getNumProcResourceKinds() entries. + /// The getResources() function above must have been called first. + /// + /// These numbers have already been scaled by SchedModel.getResourceFactor(). + ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const; + /// A virtual register or regunit required by a basic block or its trace /// successors. struct LiveInReg { @@ -284,6 +291,8 @@ public: class Ensemble { SmallVector<TraceBlockInfo, 4> BlockInfo; DenseMap<const MachineInstr*, InstrCycles> Cycles; + SmallVector<unsigned, 0> ProcResourceDepths; + SmallVector<unsigned, 0> ProcResourceHeights; friend class Trace; void computeTrace(const MachineBasicBlock*); @@ -303,6 +312,8 @@ public: const MachineLoop *getLoopFor(const MachineBasicBlock*) const; const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const; + ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const; public: virtual ~Ensemble(); @@ -343,8 +354,22 @@ private: // One entry per basic block, indexed by block number. SmallVector<FixedBlockInfo, 4> BlockInfo; + // Cycles consumed on each processor resource per block. + // The number of processor resource kinds is constant for a given subtarget, + // but it is not known at compile time. The number of cycles consumed by + // block B on processor resource R is at ProcResourceCycles[B*Kinds + R] + // where Kinds = SchedModel.getNumProcResourceKinds(). + SmallVector<unsigned, 0> ProcResourceCycles; + // One ensemble per strategy. Ensemble* Ensembles[TS_NumStrategies]; + + // Convert scaled resource usage to a cycle count that can be compared with + // latencies. + unsigned getCycles(unsigned Scaled) { + unsigned Factor = SchedModel.getLatencyFactor(); + return (Scaled + Factor - 1) / Factor; + } }; inline raw_ostream &operator<<(raw_ostream &OS, diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 5bf0176eadc..c154b5c9c10 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF->getTarget().getSubtarget<TargetSubtargetInfo>(); SchedModel.init(*ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); + ProcResourceCycles.resize(MF->getNumBlockIDs() * + SchedModel.getNumProcResourceKinds()); return false; } @@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { return FBI; // Compute resource usage in the block. - // FIXME: Compute per-functional unit counts. FBI->HasCalls = false; unsigned InstrCount = 0; + + // Add up per-processor resource cycles as well. + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + SmallVector<unsigned, 32> PRCycles(PRKinds); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { const MachineInstr *MI = I; @@ -96,11 +103,39 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { ++InstrCount; if (MI->isCall()) FBI->HasCalls = true; + + // Count processor resources used. + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI); + if (!SC->isValid()) + continue; + + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); + PRCycles[PI->ProcResourceIdx] += PI->Cycles; + } } FBI->InstrCount = InstrCount; + + // Scale the resource cycles so they are comparable. + unsigned PROffset = MBB->getNumber() * PRKinds; + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceCycles[PROffset + K] = + PRCycles[K] * SchedModel.getResourceFactor(K); + return FBI; } +ArrayRef<unsigned> +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { + assert(BlockInfo[MBBNum].hasResources() && + "getResources() must be called before getProcResourceCycles()"); + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + return ArrayRef<unsigned>(&ProcResourceCycles[MBBNum * PRKinds], PRKinds); +} + + //===----------------------------------------------------------------------===// // Ensemble utility functions //===----------------------------------------------------------------------===// @@ -108,6 +143,9 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) : MTM(*ct) { BlockInfo.resize(MTM.BlockInfo.size()); + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); + ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); } // Virtual destructor serves as an anchor. @@ -123,21 +161,32 @@ MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { void MachineTraceMetrics::Ensemble:: computeDepthResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources from trace above. The top block is simple. if (!TBI->Pred) { TBI->InstrDepth = 0; TBI->Head = MBB->getNumber(); + std::fill(ProcResourceDepths.begin() + PROffset, + ProcResourceDepths.begin() + PROffset + PRKinds, 0); return; } // Compute from the block above. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + unsigned PredNum = TBI->Pred->getNumber(); + TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; TBI->Head = PredTBI->Head; + + // Compute per-resource depths. + ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum); + ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; } // Update resource-related information in the TraceBlockInfo for MBB. @@ -145,22 +194,33 @@ computeDepthResources(const MachineBasicBlock *MBB) { void MachineTraceMetrics::Ensemble:: computeHeightResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources for the current block. TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber()); // The trace tail is done. if (!TBI->Succ) { TBI->Tail = MBB->getNumber(); + std::copy(PRCycles.begin(), PRCycles.end(), + ProcResourceHeights.begin() + PROffset); return; } // Compute from the block below. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + unsigned SuccNum = TBI->Succ->getNumber(); + TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); TBI->InstrHeight += SuccTBI->InstrHeight; TBI->Tail = SuccTBI->Tail; + + // Compute per-resource heights. + ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; } // Check if depth resources for MBB are valid and return the TBI. @@ -181,6 +241,31 @@ getHeightResources(const MachineBasicBlock *MBB) const { return TBI->hasValidHeight() ? TBI : 0; } +/// Get an array of processor resource depths for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by all blocks preceding MBB in its trace. It does not include instructions +/// in MBB. +/// +/// Compare TraceBlockInfo::InstrDepth. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceDepths(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + return ArrayRef<unsigned>(&ProcResourceDepths[MBBNum * PRKinds], PRKinds); +} + +/// Get an array of processor resource heights for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by this block and all blocks following it in its trace. +/// +/// Compare TraceBlockInfo::InstrHeight. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceHeights(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + return ArrayRef<unsigned>(&ProcResourceHeights[MBBNum * PRKinds], PRKinds); +} + //===----------------------------------------------------------------------===// // Trace Selection Strategies //===----------------------------------------------------------------------===// @@ -713,11 +798,24 @@ computeInstrDepths(const MachineBasicBlock *MBB) { SmallVector<DataDep, 8> Deps; while (!Stack.empty()) { MBB = Stack.pop_back_val(); - DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrDepths = true; TBI.CriticalPath = 0; + // Print out resource depths here as well. + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrDepth); + ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + if (PRDepths[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Also compute the critical path length through MBB when possible. if (TBI.HasValidInstrHeights) TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); @@ -928,6 +1026,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) { TBI.HasValidInstrHeights = true; TBI.CriticalPath = 0; + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrHeight); + ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber()); + for (unsigned K = 0; K != PRHeights.size(); ++K) + if (PRHeights[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Get dependencies from PHIs in the trace successor. const MachineBasicBlock *Succ = TBI.Succ; // If MBB is the last block in the trace, and it has a back-edge to the @@ -1058,27 +1168,52 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { } unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { - // For now, we compute the resource depth from instruction count / issue - // width. Eventually, we should compute resource depth per functional unit - // and return the max. + // Find the limiting processor resource. + // Numbers have been pre-scaled to be comparable. + unsigned PRMax = 0; + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + if (Bottom) { + ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); + } else { + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K]); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + unsigned Instrs = TBI.InstrDepth; if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. - return Instrs; + return std::max(Instrs, PRMax); } unsigned MachineTraceMetrics::Trace:: getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { + // Add up resources above and below the center block. + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); + unsigned PRMax = 0; + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + PRMax = std::max(PRMax, PRCycles); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. - return Instrs; + return std::max(Instrs, PRMax); } void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { |