diff options
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/CodeGen/CodeGen.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MachineCombiner.cpp | 434 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MachineTraceMetrics.cpp | 59 | ||||
-rw-r--r-- | llvm/lib/CodeGen/TargetSchedule.cpp | 22 |
5 files changed, 503 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index b71b30cc1cc..2a247c12e64 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -49,6 +49,7 @@ add_llvm_library(LLVMCodeGen MachineBranchProbabilityInfo.cpp MachineCSE.cpp MachineCodeEmitter.cpp + MachineCombiner.cpp MachineCopyPropagation.cpp MachineDominators.cpp MachineDominanceFrontier.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index b3beac3932b..12b0411065a 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); initializeMachineCopyPropagationPass(Registry); + initializeMachineCombinerPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachinePostDominatorTreePass(Registry); diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp new file mode 100644 index 00000000000..591c4caf66e --- /dev/null +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -0,0 +1,434 @@ +//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The machine combiner pass uses machine trace metrics to ensure the combined +// instructions does not lengthen the critical path or the resource depth. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "machine-combiner" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +STATISTIC(NumInstCombined, "Number of machineinst combined"); + +namespace { +class MachineCombiner : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; + MachineRegisterInfo *MRI; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + + TargetSchedModel TSchedModel; + + /// OptSize - True if optimizing for code size. + bool OptSize; + +public: + static char ID; + MachineCombiner() : MachineFunctionPass(ID) { + initializeMachineCombinerPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { return "Machine InstCombiner"; } + +private: + bool doSubstitute(unsigned NewSize, unsigned OldSize); + bool combineInstructions(MachineBasicBlock *); + MachineInstr *getOperandDef(const MachineOperand &MO); + unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace); + unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace); + bool + preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg); + bool preservesResourceLen(MachineBasicBlock *MBB, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs); + void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs, + SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); +}; +} + +char MachineCombiner::ID = 0; +char &llvm::MachineCombinerID = MachineCombiner::ID; + +INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", + "Machine InstCombiner", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", + false, false) + +void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { + MachineInstr *DefInstr = nullptr; + // We need a virtual register definition. + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + DefInstr = MRI->getUniqueVRegDef(MO.getReg()); + // PHI's have no depth etc. + if (DefInstr && DefInstr->isPHI()) + DefInstr = nullptr; + return DefInstr; +} + +/// getDepth - Computes depth of instructions in vector \InsInstr. +/// +/// \param InsInstrs is a vector of machine instructions +/// \param InstrIdxForVirtReg is a dense map of virtual register to index +/// of defining machine instruction in \p InsInstrs +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Depth of last instruction in \InsInstrs ("NewRoot") +unsigned +MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace) { + + SmallVector<unsigned, 16> InstrDepth; + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + + // Foreach instruction in in the new sequence compute the depth based on the + // operands. Use the trace information when possible. For new operands which + // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth + for (auto *InstrPtr : InsInstrs) { // for each Use + unsigned IDepth = 0; + DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); + for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = InstrPtr->getOperand(i); + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isUse()) + continue; + unsigned DepthOp = 0; + unsigned LatencyOp = 0; + DenseMap<unsigned, unsigned>::iterator II = + InstrIdxForVirtReg.find(MO.getReg()); + if (II != InstrIdxForVirtReg.end()) { + // Operand is new virtual register not in trace + assert(II->second >= 0 && II->second < InstrDepth.size() && + "Bad Index"); + MachineInstr *DefInstr = InsInstrs[II->second]; + assert(DefInstr && + "There must be a definition for a new virtual register"); + DepthOp = InstrDepth[II->second]; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } else { + MachineInstr *DefInstr = getOperandDef(MO); + if (DefInstr) { + DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } + } + IDepth = std::max(IDepth, DepthOp + LatencyOp); + } + InstrDepth.push_back(IDepth); + } + unsigned NewRootIdx = InsInstrs.size() - 1; + return InstrDepth[NewRootIdx]; +} + +/// getLatency - Computes instruction latency as max of latency of defined +/// operands +/// +/// \param Root is a machine instruction that could be replaced by NewRoot. +/// It is used to compute a more accurate latency information for NewRoot in +/// case there is a dependent instruction in the same trace (\p BlockTrace) +/// \param NewRoot is the instruction for which the latency is computed +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Latency of \p NewRoot +unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace) { + + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + + // Check each definition in NewRoot and compute the latency + unsigned NewRootLatency = 0; + + for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = NewRoot->getOperand(i); + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isDef()) + continue; + // Get the first instruction that uses MO + MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); + RI++; + MachineInstr *UseMO = RI->getParent(); + unsigned LatencyOp = 0; + if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) { + LatencyOp = TSchedModel.computeOperandLatency( + NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, + UseMO->findRegisterUseOperandIdx(MO.getReg())); + } else { + LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode()); + } + NewRootLatency = std::max(NewRootLatency, LatencyOp); + } + return NewRootLatency; +} + +/// preservesCriticalPathlen - True when the new instruction sequence does not +/// lengthen the critical path. The DAGCombine code sequence ends in MI +/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A +/// necessary condition for the new sequence to replace the old sequence is that +/// is cannot lengthen the critical path. This is decided by the formula +/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). +/// The slack is the number of cycles Root can be delayed before the critical +/// patch becomes longer. +bool MachineCombiner::preservesCriticalPathLen( + MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) { + + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + // NewRoot is the last instruction in the \p InsInstrs vector + // Get depth and latency of NewRoot + unsigned NewRootIdx = InsInstrs.size() - 1; + MachineInstr *NewRoot = InsInstrs[NewRootIdx]; + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); + + // Get depth, latency and slack of Root + unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + unsigned RootLatency = TSchedModel.computeInstrLatency(Root); + unsigned RootSlack = BlockTrace.getInstrSlack(Root); + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth + << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency + << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency " + << NewRootDepth + NewRootLatency << "\n"; + dbgs() << " RootDepth + RootLatency + RootSlack " + << RootDepth + RootLatency + RootSlack << "\n";); + + /// True when the new sequence does not lenghten the critical path. + return ((NewRootDepth + NewRootLatency) <= + (RootDepth + RootLatency + RootSlack)); +} + +/// helper routine to convert instructions into SC +void MachineCombiner::instr2instrSC( + SmallVectorImpl<MachineInstr *> &Instrs, + SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) { + for (auto *InstrPtr : Instrs) { + unsigned Opc = InstrPtr->getOpcode(); + unsigned Idx = TII->get(Opc).getSchedClass(); + const MCSchedClassDesc *SC = SchedModel->getSchedClassDesc(Idx); + InstrsSC.push_back(SC); + } +} +/// preservesResourceLen - True when the new instructions do not increase +/// resource length +bool MachineCombiner::preservesResourceLen( + MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs) { + + // Compute current resource length + + ArrayRef<const MachineBasicBlock *> MBBarr(MBB); + unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); + + // Deal with SC rather than Instructions. + SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC; + SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC; + + instr2instrSC(InsInstrs, InsInstrsSC); + instr2instrSC(DelInstrs, DelInstrsSC); + + ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC); + ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC); + + // Compute new resource length + unsigned ResLenAfterCombine = + BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); + + DEBUG(dbgs() << "RESOURCE DATA: \n"; + dbgs() << " resource len before: " << ResLenBeforeCombine + << " after: " << ResLenAfterCombine << "\n";); + + return ResLenAfterCombine <= ResLenBeforeCombine; +} + +/// \returns true when new instruction sequence should be generated +/// independent if it lenghtens critical path or not +bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { + if (OptSize && (NewSize < OldSize)) + return true; + if (!TSchedModel.hasInstrSchedModel()) + return true; + return false; +} + +/// combineInstructions - substitute a slow code sequence with a faster one by +/// evaluating instruction combining pattern. +/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction +/// combining based on machine trace metrics. Only combine a sequence of +/// instructions when this neither lengthens the critical path nor increases +/// resource pressure. When optimizing for codesize always combine when the new +/// sequence is shorter. +bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { + bool Changed = false; + DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); + + auto BlockIter = MBB->begin(); + + while (BlockIter != MBB->end()) { + auto &MI = *BlockIter++; + + DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); + SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern; + // The motivating example is: + // + // MUL Other MUL_op1 MUL_op2 Other + // \ / \ | / + // ADD/SUB => MADD/MSUB + // (=Root) (=NewRoot) + + // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is + // usually beneficial for code size it unfortunately can hurt performance + // when the ADD is on the critical path, but the MUL is not. With the + // substitution the MUL becomes part of the critical path (in form of the + // MADD) and can lengthen it on architectures where the MADD latency is + // longer than the ADD latency. + // + // For each instruction we check if it can be the root of a combiner + // pattern. Then for each pattern the new code sequence in form of MI is + // generated and evaluated. When the efficiency criteria (don't lengthen + // critical path, don't use more resources) is met the new sequence gets + // hooked up into the basic block before the old sequence is removed. + // + // The algorithm does not try to evaluate all patterns and pick the best. + // This is only an artificial restriction though. In practice there is + // mostly one pattern and hasPattern() can order patterns based on an + // internal cost heuristic. + + if (TII->hasPattern(MI, Pattern)) { + for (auto P : Pattern) { + SmallVector<MachineInstr *, 16> InsInstrs; + SmallVector<MachineInstr *, 16> DelInstrs; + DenseMap<unsigned, unsigned> InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!InsInstrs.size()) + continue; + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lenghten the critical path nor increases + // resource pressure. + if (doSubstitute(InsInstrs.size(), DelInstrs.size()) || + (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator) & MI, + (MachineInstr *)InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParent(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); + Traces->verifyAnalysis(); + // Eagerly stop after the first pattern fired + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + for (auto *InstrPtr : InsInstrs) { + MachineFunction *MF = MBB->getParent(); + MF->DeleteMachineInstr((MachineInstr *)InstrPtr); + } + } + InstrIdxForVirtReg.clear(); + } + } + } + + return Changed; +} + +bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + const TargetSubtargetInfo &STI = + MF.getTarget().getSubtarget<TargetSubtargetInfo>(); + SchedModel = STI.getSchedModel(); + TSchedModel.init(*SchedModel, &STI, TII); + MRI = &MF.getRegInfo(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; + + OptSize = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + + DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); + if (!TII->useMachineCombiner()) { + DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); + return false; + } + + bool Changed = false; + + // Try to combine instructions. + for (auto &MBB : MF) + Changed |= combineInstructions(&MBB); + + return Changed; +} diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 1bbf0ade6a5..93528e05d3a 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -1169,6 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { return DepCycle; } +/// When bottom is set include instructions in current block in estimate. unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { // Find the limiting processor resource. // Numbers have been pre-scaled to be comparable. @@ -1185,7 +1186,9 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { // Convert to cycle count. PRMax = TE.MTM.getCycles(PRMax); + /// All instructions before current block unsigned Instrs = TBI.InstrDepth; + // plus instructions in current block if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) @@ -1194,44 +1197,72 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { return std::max(Instrs, PRMax); } - -unsigned MachineTraceMetrics::Trace:: -getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks, - ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const { +unsigned MachineTraceMetrics::Trace::getResourceLength( + ArrayRef<const MachineBasicBlock *> Extrablocks, + ArrayRef<const MCSchedClassDesc *> ExtraInstrs, + ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const { // Add up resources above and below the center block. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); unsigned PRMax = 0; - for (unsigned K = 0; K != PRDepths.size(); ++K) { - unsigned PRCycles = PRDepths[K] + PRHeights[K]; - for (unsigned I = 0; I != Extrablocks.size(); ++I) - PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; - for (unsigned I = 0; I != ExtraInstrs.size(); ++I) { - const MCSchedClassDesc* SC = ExtraInstrs[I]; + + // Capture computing cycles from extra instructions + auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs, + unsigned ResourceIdx) + ->unsigned { + unsigned Cycles = 0; + for (unsigned I = 0; I != Instrs.size(); ++I) { + const MCSchedClassDesc *SC = Instrs[I]; if (!SC->isValid()) continue; for (TargetSchedModel::ProcResIter - PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), - PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { - if (PI->ProcResourceIdx != K) + PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), + PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + if (PI->ProcResourceIdx != ResourceIdx) continue; - PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K)); + Cycles += + (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); } } + return Cycles; + }; + + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + PRCycles += extraCycles(ExtraInstrs, K); + PRCycles -= extraCycles(RemoveInstrs, K); PRMax = std::max(PRMax, PRCycles); } // Convert to cycle count. PRMax = TE.MTM.getCycles(PRMax); + // Instrs: #instructions in current trace outside current block. unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + // Add instruction count from the extra blocks. for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + Instrs += ExtraInstrs.size(); + Instrs -= RemoveInstrs.size(); if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. return std::max(Instrs, PRMax); } +bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI, + const MachineInstr *UseMI) const { + if (DefMI->getParent() == UseMI->getParent()) + return true; + + const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()]; + const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()]; + + return DepTBI.isUsefulDominator(TBI); +} + void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { OS << getName() << " ensemble:\n"; for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp index b0f2ca68884..f42946f35ef 100644 --- a/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/llvm/lib/CodeGen/TargetSchedule.cpp @@ -225,6 +225,28 @@ unsigned TargetSchedModel::computeOperandLatency( return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } +unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { + assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); + + unsigned SCIdx = TII->get(Opcode).getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); + unsigned Latency = 0; + + if (SCDesc->isValid() && !SCDesc->isVariant()) { + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); + } + return Latency; + } + + assert(Latency && "No MI sched latency"); + return 0; +} + unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI, bool UseDefaultDefLatency) const { |