diff options
author | Clement Courbet <courbet@google.com> | 2018-12-17 08:08:31 +0000 |
---|---|---|
committer | Clement Courbet <courbet@google.com> | 2018-12-17 08:08:31 +0000 |
commit | cc5e6a72de590bff4f1e68b7271d9f82bc66b995 (patch) | |
tree | 38daf2528d9f190e3ba1743d0cb592fa782698b4 /llvm/tools/llvm-mca/lib | |
parent | 792d4f130d6fcfebc14f0e4e20d3111a24852da2 (diff) | |
download | bcm5719-llvm-cc5e6a72de590bff4f1e68b7271d9f82bc66b995.tar.gz bcm5719-llvm-cc5e6a72de590bff4f1e68b7271d9f82bc66b995.zip |
[llvm-mca] Move llvm-mca library to llvm/lib/MCA.
Summary: See PR38731.
Reviewers: andreadb
Subscribers: mgorny, javed.absar, tschuett, gbedwell, andreadb, RKSimon, llvm-commits
Differential Revision: https://reviews.llvm.org/D55557
llvm-svn: 349332
Diffstat (limited to 'llvm/tools/llvm-mca/lib')
20 files changed, 0 insertions, 3203 deletions
diff --git a/llvm/tools/llvm-mca/lib/CMakeLists.txt b/llvm/tools/llvm-mca/lib/CMakeLists.txt deleted file mode 100644 index 21b6e34cc7e..00000000000 --- a/llvm/tools/llvm-mca/lib/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -include_directories(${LLVM_MCA_SOURCE_DIR}/include) - -add_library(LLVMMCA - STATIC - Context.cpp - HWEventListener.cpp - HardwareUnits/HardwareUnit.cpp - HardwareUnits/LSUnit.cpp - HardwareUnits/RegisterFile.cpp - HardwareUnits/ResourceManager.cpp - HardwareUnits/RetireControlUnit.cpp - HardwareUnits/Scheduler.cpp - InstrBuilder.cpp - Instruction.cpp - Pipeline.cpp - Stages/DispatchStage.cpp - Stages/EntryStage.cpp - Stages/ExecuteStage.cpp - Stages/InstructionTables.cpp - Stages/RetireStage.cpp - Stages/Stage.cpp - Support.cpp - ) - -llvm_update_compile_flags(LLVMMCA) -llvm_map_components_to_libnames(libs - MC - Support - ) - -target_link_libraries(LLVMMCA ${libs}) -set_target_properties(LLVMMCA PROPERTIES FOLDER "Libraries") diff --git a/llvm/tools/llvm-mca/lib/Context.cpp b/llvm/tools/llvm-mca/lib/Context.cpp deleted file mode 100644 index 17b992aac9c..00000000000 --- a/llvm/tools/llvm-mca/lib/Context.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===---------------------------- Context.cpp -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a class for holding ownership of various simulated -/// hardware units. A Context also provides a utility routine for constructing -/// a default out-of-order pipeline with fetch, dispatch, execute, and retire -/// stages. -/// -//===----------------------------------------------------------------------===// - -#include "Context.h" -#include "HardwareUnits/RegisterFile.h" -#include "HardwareUnits/RetireControlUnit.h" -#include "HardwareUnits/Scheduler.h" -#include "Stages/DispatchStage.h" -#include "Stages/EntryStage.h" -#include "Stages/ExecuteStage.h" -#include "Stages/RetireStage.h" - -namespace llvm { -namespace mca { - -std::unique_ptr<Pipeline> -Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, - SourceMgr &SrcMgr) { - const MCSchedModel &SM = STI.getSchedModel(); - - // Create the hardware units defining the backend. - auto RCU = llvm::make_unique<RetireControlUnit>(SM); - auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize); - auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize, - Opts.StoreQueueSize, Opts.AssumeNoAlias); - auto HWS = llvm::make_unique<Scheduler>(SM, *LSU); - - // Create the pipeline stages. - auto Fetch = llvm::make_unique<EntryStage>(SrcMgr); - auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth, - *RCU, *PRF); - auto Execute = llvm::make_unique<ExecuteStage>(*HWS); - auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF); - - // Pass the ownership of all the hardware units to this Context. - addHardwareUnit(std::move(RCU)); - addHardwareUnit(std::move(PRF)); - addHardwareUnit(std::move(LSU)); - addHardwareUnit(std::move(HWS)); - - // Build the pipeline. - auto StagePipeline = llvm::make_unique<Pipeline>(); - StagePipeline->appendStage(std::move(Fetch)); - StagePipeline->appendStage(std::move(Dispatch)); - StagePipeline->appendStage(std::move(Execute)); - StagePipeline->appendStage(std::move(Retire)); - return StagePipeline; -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/HWEventListener.cpp b/llvm/tools/llvm-mca/lib/HWEventListener.cpp deleted file mode 100644 index 3930e2555a9..00000000000 --- a/llvm/tools/llvm-mca/lib/HWEventListener.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a vtable anchor for class HWEventListener. -/// -//===----------------------------------------------------------------------===// - -#include "HWEventListener.h" - -namespace llvm { -namespace mca { - -// Anchor the vtable here. -void HWEventListener::anchor() {} -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp deleted file mode 100644 index 4e46ffacbd4..00000000000 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the anchor for the base class that describes -/// simulated hardware units. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/HardwareUnit.h" - -namespace llvm { -namespace mca { - -// Pin the vtable with this method. -HardwareUnit::~HardwareUnit() = default; - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp deleted file mode 100644 index ed8269167fe..00000000000 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp +++ /dev/null @@ -1,190 +0,0 @@ -//===----------------------- LSUnit.cpp --------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A Load-Store Unit for the llvm-mca tool. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/LSUnit.h" -#include "Instruction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, - bool AssumeNoAlias) - : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) { - if (SM.hasExtraProcessorInfo()) { - const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); - if (!LQ_Size && EPI.LoadQueueID) { - const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID); - LQ_Size = LdQDesc.BufferSize; - } - - if (!SQ_Size && EPI.StoreQueueID) { - const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID); - SQ_Size = StQDesc.BufferSize; - } - } -} - -#ifndef NDEBUG -void LSUnit::dump() const { - dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n'; - dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n'; - dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n'; - dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n'; -} -#endif - -void LSUnit::assignLQSlot(unsigned Index) { - assert(!isLQFull()); - assert(LoadQueue.count(Index) == 0); - - LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index - << ",slot=" << LoadQueue.size() << ">\n"); - LoadQueue.insert(Index); -} - -void LSUnit::assignSQSlot(unsigned Index) { - assert(!isSQFull()); - assert(StoreQueue.count(Index) == 0); - - LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index - << ",slot=" << StoreQueue.size() << ">\n"); - StoreQueue.insert(Index); -} - -void LSUnit::dispatch(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned IsMemBarrier = Desc.HasSideEffects; - assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); - - const unsigned Index = IR.getSourceIndex(); - if (Desc.MayLoad) { - if (IsMemBarrier) - LoadBarriers.insert(Index); - assignLQSlot(Index); - } - - if (Desc.MayStore) { - if (IsMemBarrier) - StoreBarriers.insert(Index); - assignSQSlot(Index); - } -} - -LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.MayLoad && isLQFull()) - return LSUnit::LSU_LQUEUE_FULL; - if (Desc.MayStore && isSQFull()) - return LSUnit::LSU_SQUEUE_FULL; - return LSUnit::LSU_AVAILABLE; -} - -bool LSUnit::isReady(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - const unsigned Index = IR.getSourceIndex(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - assert((IsALoad || IsAStore) && "Not a memory operation!"); - assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!"); - assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!"); - - if (IsALoad && !LoadBarriers.empty()) { - unsigned LoadBarrierIndex = *LoadBarriers.begin(); - // A younger load cannot pass a older load barrier. - if (Index > LoadBarrierIndex) - return false; - // A load barrier cannot pass a older load. - if (Index == LoadBarrierIndex && Index != *LoadQueue.begin()) - return false; - } - - if (IsAStore && !StoreBarriers.empty()) { - unsigned StoreBarrierIndex = *StoreBarriers.begin(); - // A younger store cannot pass a older store barrier. - if (Index > StoreBarrierIndex) - return false; - // A store barrier cannot pass a older store. - if (Index == StoreBarrierIndex && Index != *StoreQueue.begin()) - return false; - } - - // A load may not pass a previous store unless flag 'NoAlias' is set. - // A load may pass a previous load. - if (NoAlias && IsALoad) - return true; - - if (StoreQueue.size()) { - // A load may not pass a previous store. - // A store may not pass a previous store. - if (Index > *StoreQueue.begin()) - return false; - } - - // Okay, we are older than the oldest store in the queue. - // If there are no pending loads, then we can say for sure that this - // instruction is ready. - if (isLQEmpty()) - return true; - - // Check if there are no older loads. - if (Index <= *LoadQueue.begin()) - return true; - - // There is at least one younger load. - // - // A store may not pass a previous load. - // A load may pass a previous load. - return !IsAStore; -} - -void LSUnit::onInstructionExecuted(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - const unsigned Index = IR.getSourceIndex(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - - if (IsALoad) { - if (LoadQueue.erase(Index)) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the load queue.\n"); - } - if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) { - LLVM_DEBUG( - dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the set of load barriers.\n"); - LoadBarriers.erase(Index); - } - } - - if (IsAStore) { - if (StoreQueue.erase(Index)) { - LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the store queue.\n"); - } - - if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) { - LLVM_DEBUG( - dbgs() << "[LSUnit]: Instruction idx=" << Index - << " has been removed from the set of store barriers.\n"); - StoreBarriers.erase(Index); - } - } -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp deleted file mode 100644 index f96e4cab4b9..00000000000 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp +++ /dev/null @@ -1,491 +0,0 @@ -//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a register mapping file class. This class is responsible -/// for managing hardware register files and the tracking of data dependencies -/// between registers. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/RegisterFile.h" -#include "Instruction.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri, - unsigned NumRegs) - : MRI(mri), - RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}), - ZeroRegisters(mri.getNumRegs(), false) { - initialize(SM, NumRegs); -} - -void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) { - // Create a default register file that "sees" all the machine registers - // declared by the target. The number of physical registers in the default - // register file is set equal to `NumRegs`. A value of zero for `NumRegs` - // means: this register file has an unbounded number of physical registers. - RegisterFiles.emplace_back(NumRegs); - if (!SM.hasExtraProcessorInfo()) - return; - - // For each user defined register file, allocate a RegisterMappingTracker - // object. The size of every register file, as well as the mapping between - // register files and register classes is specified via tablegen. - const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo(); - - // Skip invalid register file at index 0. - for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) { - const MCRegisterFileDesc &RF = Info.RegisterFiles[I]; - assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!"); - - // The cost of a register definition is equivalent to the number of - // physical registers that are allocated at register renaming stage. - unsigned Length = RF.NumRegisterCostEntries; - const MCRegisterCostEntry *FirstElt = - &Info.RegisterCostTable[RF.RegisterCostEntryIdx]; - addRegisterFile(RF, ArrayRef<MCRegisterCostEntry>(FirstElt, Length)); - } -} - -void RegisterFile::cycleStart() { - for (RegisterMappingTracker &RMT : RegisterFiles) - RMT.NumMoveEliminated = 0; -} - -void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF, - ArrayRef<MCRegisterCostEntry> Entries) { - // A default register file is always allocated at index #0. That register file - // is mainly used to count the total number of mappings created by all - // register files at runtime. Users can limit the number of available physical - // registers in register file #0 through the command line flag - // `-register-file-size`. - unsigned RegisterFileIndex = RegisterFiles.size(); - RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle, - RF.AllowZeroMoveEliminationOnly); - - // Special case where there is no register class identifier in the set. - // An empty set of register classes means: this register file contains all - // the physical registers specified by the target. - // We optimistically assume that a register can be renamed at the cost of a - // single physical register. The constructor of RegisterFile ensures that - // a RegisterMapping exists for each logical register defined by the Target. - if (Entries.empty()) - return; - - // Now update the cost of individual registers. - for (const MCRegisterCostEntry &RCE : Entries) { - const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); - for (const MCPhysReg Reg : RC) { - RegisterRenamingInfo &Entry = RegisterMappings[Reg].second; - IndexPlusCostPairTy &IPC = Entry.IndexPlusCost; - if (IPC.first && IPC.first != RegisterFileIndex) { - // The only register file that is allowed to overlap is the default - // register file at index #0. The analysis is inaccurate if register - // files overlap. - errs() << "warning: register " << MRI.getName(Reg) - << " defined in multiple register files."; - } - IPC = std::make_pair(RegisterFileIndex, RCE.Cost); - Entry.RenameAs = Reg; - Entry.AllowMoveElimination = RCE.AllowMoveElimination; - - // Assume the same cost for each sub-register. - for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) { - RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second; - if (!OtherEntry.IndexPlusCost.first && - (!OtherEntry.RenameAs || - MRI.isSuperRegister(*I, OtherEntry.RenameAs))) { - OtherEntry.IndexPlusCost = IPC; - OtherEntry.RenameAs = Reg; - } - } - } - } -} - -void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef<unsigned> UsedPhysRegs) { - unsigned RegisterFileIndex = Entry.IndexPlusCost.first; - unsigned Cost = Entry.IndexPlusCost.second; - if (RegisterFileIndex) { - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - RMT.NumUsedPhysRegs += Cost; - UsedPhysRegs[RegisterFileIndex] += Cost; - } - - // Now update the default register mapping tracker. - RegisterFiles[0].NumUsedPhysRegs += Cost; - UsedPhysRegs[0] += Cost; -} - -void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, - MutableArrayRef<unsigned> FreedPhysRegs) { - unsigned RegisterFileIndex = Entry.IndexPlusCost.first; - unsigned Cost = Entry.IndexPlusCost.second; - if (RegisterFileIndex) { - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - RMT.NumUsedPhysRegs -= Cost; - FreedPhysRegs[RegisterFileIndex] += Cost; - } - - // Now update the default register mapping tracker. - RegisterFiles[0].NumUsedPhysRegs -= Cost; - FreedPhysRegs[0] += Cost; -} - -void RegisterFile::addRegisterWrite(WriteRef Write, - MutableArrayRef<unsigned> UsedPhysRegs) { - WriteState &WS = *Write.getWriteState(); - unsigned RegID = WS.getRegisterID(); - assert(RegID && "Adding an invalid register definition?"); - - LLVM_DEBUG({ - dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex() - << ", " << MRI.getName(RegID) << "]\n"; - }); - - // If RenameAs is equal to RegID, then RegID is subject to register renaming - // and false dependencies on RegID are all eliminated. - - // If RenameAs references the invalid register, then we optimistically assume - // that it can be renamed. In the absence of tablegen descriptors for register - // files, RenameAs is always set to the invalid register ID. In all other - // cases, RenameAs must be either equal to RegID, or it must reference a - // super-register of RegID. - - // If RenameAs is a super-register of RegID, then a write to RegID has always - // a false dependency on RenameAs. The only exception is for when the write - // implicitly clears the upper portion of the underlying register. - // If a write clears its super-registers, then it is renamed as `RenameAs`. - bool IsWriteZero = WS.isWriteZero(); - bool IsEliminated = WS.isEliminated(); - bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated; - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - WS.setPRF(RRI.IndexPlusCost.first); - - if (RRI.RenameAs && RRI.RenameAs != RegID) { - RegID = RRI.RenameAs; - WriteRef &OtherWrite = RegisterMappings[RegID].first; - - if (!WS.clearsSuperRegisters()) { - // The processor keeps the definition of `RegID` together with register - // `RenameAs`. Since this partial write is not renamed, no physical - // register is allocated. - ShouldAllocatePhysRegs = false; - - WriteState *OtherWS = OtherWrite.getWriteState(); - if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { - // This partial write has a false dependency on RenameAs. - assert(!IsEliminated && "Unexpected partial update!"); - OtherWS->addUser(&WS); - } - } - } - - // Update zero registers. - unsigned ZeroRegisterID = - WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); - if (IsWriteZero) { - ZeroRegisters.setBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.setBit(*I); - } else { - ZeroRegisters.clearBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.clearBit(*I); - } - - // If this is move has been eliminated, then the call to tryEliminateMove - // should have already updated all the register mappings. - if (!IsEliminated) { - // Update the mapping for register RegID including its sub-registers. - RegisterMappings[RegID].first = Write; - RegisterMappings[RegID].second.AliasRegID = 0U; - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - RegisterMappings[*I].first = Write; - RegisterMappings[*I].second.AliasRegID = 0U; - } - - // No physical registers are allocated for instructions that are optimized - // in hardware. For example, zero-latency data-dependency breaking - // instructions don't consume physical registers. - if (ShouldAllocatePhysRegs) - allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); - } - - if (!WS.clearsSuperRegisters()) - return; - - for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { - if (!IsEliminated) { - RegisterMappings[*I].first = Write; - RegisterMappings[*I].second.AliasRegID = 0U; - } - - if (IsWriteZero) - ZeroRegisters.setBit(*I); - else - ZeroRegisters.clearBit(*I); - } -} - -void RegisterFile::removeRegisterWrite( - const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) { - // Early exit if this write was eliminated. A write eliminated at register - // renaming stage generates an alias, and it is not added to the PRF. - if (WS.isEliminated()) - return; - - unsigned RegID = WS.getRegisterID(); - - assert(RegID != 0 && "Invalidating an already invalid register?"); - assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && - "Invalidating a write of unknown cycles!"); - assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); - - bool ShouldFreePhysRegs = !WS.isWriteZero(); - unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; - if (RenameAs && RenameAs != RegID) { - RegID = RenameAs; - - if (!WS.clearsSuperRegisters()) { - // Keep the definition of `RegID` together with register `RenameAs`. - ShouldFreePhysRegs = false; - } - } - - if (ShouldFreePhysRegs) - freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs); - - WriteRef &WR = RegisterMappings[RegID].first; - if (WR.getWriteState() == &WS) - WR.invalidate(); - - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WriteRef &OtherWR = RegisterMappings[*I].first; - if (OtherWR.getWriteState() == &WS) - OtherWR.invalidate(); - } - - if (!WS.clearsSuperRegisters()) - return; - - for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WriteRef &OtherWR = RegisterMappings[*I].first; - if (OtherWR.getWriteState() == &WS) - OtherWR.invalidate(); - } -} - -bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { - const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()]; - const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()]; - - // From and To must be owned by the same PRF. - const RegisterRenamingInfo &RRIFrom = RMFrom.second; - const RegisterRenamingInfo &RRITo = RMTo.second; - unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first; - if (RegisterFileIndex != RRITo.IndexPlusCost.first) - return false; - - // We only allow move elimination for writes that update a full physical - // register. On X86, move elimination is possible with 32-bit general purpose - // registers because writes to those registers are not partial writes. If a - // register move is a partial write, then we conservatively assume that move - // elimination fails, since it would either trigger a partial update, or the - // issue of a merge opcode. - // - // Note that this constraint may be lifted in future. For example, we could - // make this model more flexible, and let users customize the set of registers - // (i.e. register classes) that allow move elimination. - // - // For now, we assume that there is a strong correlation between registers - // that allow move elimination, and how those same registers are renamed in - // hardware. - if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) { - // Early exit if the PRF doesn't support move elimination for this register. - if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination) - return false; - if (!WS.clearsSuperRegisters()) - return false; - } - - RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; - if (RMT.MaxMoveEliminatedPerCycle && - RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle) - return false; - - bool IsZeroMove = ZeroRegisters[RS.getRegisterID()]; - if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove) - return false; - - MCPhysReg FromReg = RS.getRegisterID(); - MCPhysReg ToReg = WS.getRegisterID(); - - // Construct an alias. - MCPhysReg AliasReg = FromReg; - if (RRIFrom.RenameAs) - AliasReg = RRIFrom.RenameAs; - - const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second; - if (RMAlias.AliasRegID) - AliasReg = RMAlias.AliasRegID; - - if (AliasReg != ToReg) { - RegisterMappings[ToReg].second.AliasRegID = AliasReg; - for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I) - RegisterMappings[*I].second.AliasRegID = AliasReg; - } - - RMT.NumMoveEliminated++; - if (IsZeroMove) { - WS.setWriteZero(); - RS.setReadZero(); - } - WS.setEliminated(); - - return true; -} - -void RegisterFile::collectWrites(const ReadState &RS, - SmallVectorImpl<WriteRef> &Writes) const { - unsigned RegID = RS.getRegisterID(); - assert(RegID && RegID < RegisterMappings.size()); - LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " - << MRI.getName(RegID) << '\n'); - - // Check if this is an alias. - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - if (RRI.AliasRegID) - RegID = RRI.AliasRegID; - - const WriteRef &WR = RegisterMappings[RegID].first; - if (WR.isValid()) - Writes.push_back(WR); - - // Handle potential partial register updates. - for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - const WriteRef &WR = RegisterMappings[*I].first; - if (WR.isValid()) - Writes.push_back(WR); - } - - // Remove duplicate entries and resize the input vector. - if (Writes.size() > 1) { - sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { - return Lhs.getWriteState() < Rhs.getWriteState(); - }); - auto It = std::unique(Writes.begin(), Writes.end()); - Writes.resize(std::distance(Writes.begin(), It)); - } - - LLVM_DEBUG({ - for (const WriteRef &WR : Writes) { - const WriteState &WS = *WR.getWriteState(); - dbgs() << "[PRF] Found a dependent use of Register " - << MRI.getName(WS.getRegisterID()) << " (defined by instruction #" - << WR.getSourceIndex() << ")\n"; - } - }); -} - -void RegisterFile::addRegisterRead(ReadState &RS, - SmallVectorImpl<WriteRef> &Defs) const { - unsigned RegID = RS.getRegisterID(); - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - RS.setPRF(RRI.IndexPlusCost.first); - if (RS.isIndependentFromDef()) - return; - - if (ZeroRegisters[RS.getRegisterID()]) - RS.setReadZero(); - collectWrites(RS, Defs); - RS.setDependentWrites(Defs.size()); -} - -unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const { - SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles()); - - // Find how many new mappings must be created for each register file. - for (const unsigned RegID : Regs) { - const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; - const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; - if (Entry.first) - NumPhysRegs[Entry.first] += Entry.second; - NumPhysRegs[0] += Entry.second; - } - - unsigned Response = 0; - for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { - unsigned NumRegs = NumPhysRegs[I]; - if (!NumRegs) - continue; - - const RegisterMappingTracker &RMT = RegisterFiles[I]; - if (!RMT.NumPhysRegs) { - // The register file has an unbounded number of microarchitectural - // registers. - continue; - } - - if (RMT.NumPhysRegs < NumRegs) { - // The current register file is too small. This may occur if the number of - // microarchitectural registers in register file #0 was changed by the - // users via flag -reg-file-size. Alternatively, the scheduling model - // specified a too small number of registers for this register file. - LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n"); - - // FIXME: Normalize the instruction register count to match the - // NumPhysRegs value. This is a highly unusual case, and is not expected - // to occur. This normalization is hiding an inconsistency in either the - // scheduling model or in the value that the user might have specified - // for NumPhysRegs. - NumRegs = RMT.NumPhysRegs; - } - - if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs)) - Response |= (1U << I); - } - - return Response; -} - -#ifndef NDEBUG -void RegisterFile::dump() const { - for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { - const RegisterMapping &RM = RegisterMappings[I]; - const RegisterRenamingInfo &RRI = RM.second; - if (ZeroRegisters[I]) { - dbgs() << MRI.getName(I) << ", " << I - << ", PRF=" << RRI.IndexPlusCost.first - << ", Cost=" << RRI.IndexPlusCost.second - << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I] - << ","; - RM.first.dump(); - dbgs() << '\n'; - } - } - - for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { - dbgs() << "Register File #" << I; - const RegisterMappingTracker &RMT = RegisterFiles[I]; - dbgs() << "\n TotalMappings: " << RMT.NumPhysRegs - << "\n NumUsedMappings: " << RMT.NumUsedPhysRegs << '\n'; - } -} -#endif - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp deleted file mode 100644 index f12238ab801..00000000000 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp +++ /dev/null @@ -1,326 +0,0 @@ -//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// The classes here represent processor resource units and their management -/// strategy. These classes are managed by the Scheduler. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/ResourceManager.h" -#include "Support.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace mca { - -#define DEBUG_TYPE "llvm-mca" -ResourceStrategy::~ResourceStrategy() = default; - -uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) { - // This method assumes that ReadyMask cannot be zero. - uint64_t CandidateMask = ReadyMask & NextInSequenceMask; - if (CandidateMask) { - CandidateMask = PowerOf2Floor(CandidateMask); - NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); - return CandidateMask; - } - - NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; - RemovedFromNextInSequence = 0; - CandidateMask = ReadyMask & NextInSequenceMask; - - if (CandidateMask) { - CandidateMask = PowerOf2Floor(CandidateMask); - NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); - return CandidateMask; - } - - NextInSequenceMask = ResourceUnitMask; - CandidateMask = PowerOf2Floor(ReadyMask & NextInSequenceMask); - NextInSequenceMask &= (CandidateMask | (CandidateMask - 1)); - return CandidateMask; -} - -void DefaultResourceStrategy::used(uint64_t Mask) { - if (Mask > NextInSequenceMask) { - RemovedFromNextInSequence |= Mask; - return; - } - - NextInSequenceMask &= (~Mask); - if (NextInSequenceMask) - return; - - NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence; - RemovedFromNextInSequence = 0; -} - -ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index, - uint64_t Mask) - : ProcResourceDescIndex(Index), ResourceMask(Mask), - BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask)>1) { - if (IsAGroup) - ResourceSizeMask = ResourceMask ^ PowerOf2Floor(ResourceMask); - else - ResourceSizeMask = (1ULL << Desc.NumUnits) - 1; - ReadyMask = ResourceSizeMask; - AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize); - Unavailable = false; -} - -bool ResourceState::isReady(unsigned NumUnits) const { - return (!isReserved() || isADispatchHazard()) && - countPopulation(ReadyMask) >= NumUnits; -} - -ResourceStateEvent ResourceState::isBufferAvailable() const { - if (isADispatchHazard() && isReserved()) - return RS_RESERVED; - if (!isBuffered() || AvailableSlots) - return RS_BUFFER_AVAILABLE; - return RS_BUFFER_UNAVAILABLE; -} - -#ifndef NDEBUG -void ResourceState::dump() const { - dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask - << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize - << ", AvailableSlots=" << AvailableSlots - << ", Reserved=" << Unavailable << '\n'; -} -#endif - -static unsigned getResourceStateIndex(uint64_t Mask) { - return std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask); -} - -static std::unique_ptr<ResourceStrategy> -getStrategyFor(const ResourceState &RS) { - if (RS.isAResourceGroup() || RS.getNumUnits() > 1) - return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask()); - return std::unique_ptr<ResourceStrategy>(nullptr); -} - -ResourceManager::ResourceManager(const MCSchedModel &SM) { - computeProcResourceMasks(SM, ProcResID2Mask); - Resources.resize(SM.getNumProcResourceKinds()); - Strategies.resize(SM.getNumProcResourceKinds()); - - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - uint64_t Mask = ProcResID2Mask[I]; - unsigned Index = getResourceStateIndex(Mask); - Resources[Index] = - llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask); - Strategies[Index] = getStrategyFor(*Resources[Index]); - } -} - -void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S, - uint64_t ResourceMask) { - unsigned Index = getResourceStateIndex(ResourceMask); - assert(Index < Resources.size() && "Invalid processor resource index!"); - assert(S && "Unexpected null strategy in input!"); - Strategies[Index] = std::move(S); -} - -unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const { - return Resources[getResourceStateIndex(Mask)]->getProcResourceID(); -} - -unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const { - return Resources[getResourceStateIndex(ResourceID)]->getNumUnits(); -} - -// Returns the actual resource consumed by this Use. -// First, is the primary resource ID. -// Second, is the specific sub-resource ID. -ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) { - unsigned Index = getResourceStateIndex(ResourceID); - ResourceState &RS = *Resources[Index]; - assert(RS.isReady() && "No available units to select!"); - - // Special case where RS is not a group, and it only declares a single - // resource unit. - if (!RS.isAResourceGroup() && RS.getNumUnits() == 1) - return std::make_pair(ResourceID, RS.getReadyMask()); - - uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask()); - if (RS.isAResourceGroup()) - return selectPipe(SubResourceID); - return std::make_pair(ResourceID, SubResourceID); -} - -void ResourceManager::use(const ResourceRef &RR) { - // Mark the sub-resource referenced by RR as used. - unsigned RSID = getResourceStateIndex(RR.first); - ResourceState &RS = *Resources[RSID]; - RS.markSubResourceAsUsed(RR.second); - // Remember to update the resource strategy for non-group resources with - // multiple units. - if (RS.getNumUnits() > 1) - Strategies[RSID]->used(RR.second); - - // If there are still available units in RR.first, - // then we are done. - if (RS.isReady()) - return; - - // Notify to other resources that RR.first is no longer available. - for (std::unique_ptr<ResourceState> &Res : Resources) { - ResourceState &Current = *Res; - if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) - continue; - - if (Current.containsResource(RR.first)) { - unsigned Index = getResourceStateIndex(Current.getResourceMask()); - Current.markSubResourceAsUsed(RR.first); - Strategies[Index]->used(RR.first); - } - } -} - -void ResourceManager::release(const ResourceRef &RR) { - ResourceState &RS = *Resources[getResourceStateIndex(RR.first)]; - bool WasFullyUsed = !RS.isReady(); - RS.releaseSubResource(RR.second); - if (!WasFullyUsed) - return; - - for (std::unique_ptr<ResourceState> &Res : Resources) { - ResourceState &Current = *Res; - if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first) - continue; - - if (Current.containsResource(RR.first)) - Current.releaseSubResource(RR.first); - } -} - -ResourceStateEvent -ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const { - ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; - for (uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - Result = RS.isBufferAvailable(); - if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) - break; - } - return Result; -} - -void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) { - for (const uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); - RS.reserveBuffer(); - - if (RS.isADispatchHazard()) { - assert(!RS.isReserved()); - RS.setReserved(); - } - } -} - -void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) { - for (const uint64_t R : Buffers) - Resources[getResourceStateIndex(R)]->releaseBuffer(); -} - -bool ResourceManager::canBeIssued(const InstrDesc &Desc) const { - return all_of( - Desc.Resources, [&](const std::pair<uint64_t, const ResourceUsage> &E) { - unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits; - unsigned Index = getResourceStateIndex(E.first); - return Resources[Index]->isReady(NumUnits); - }); -} - -// Returns true if all resources are in-order, and there is at least one -// resource which is a dispatch hazard (BufferSize = 0). -bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const { - if (!canBeIssued(Desc)) - return false; - bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) { - unsigned Index = getResourceStateIndex(BufferMask); - const ResourceState &Resource = *Resources[Index]; - return Resource.isInOrder() || Resource.isADispatchHazard(); - }); - if (!AllInOrderResources) - return false; - - return any_of(Desc.Buffers, [&](uint64_t BufferMask) { - return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard(); - }); -} - -void ResourceManager::issueInstruction( - const InstrDesc &Desc, - SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) { - for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) { - const CycleSegment &CS = R.second.CS; - if (!CS.size()) { - releaseResource(R.first); - continue; - } - - assert(CS.begin() == 0 && "Invalid {Start, End} cycles!"); - if (!R.second.isReserved()) { - ResourceRef Pipe = selectPipe(R.first); - use(Pipe); - BusyResources[Pipe] += CS.size(); - // Replace the resource mask with a valid processor resource index. - const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)]; - Pipe.first = RS.getProcResourceID(); - Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>( - Pipe, ResourceCycles(CS.size()))); - } else { - assert((countPopulation(R.first) > 1) && "Expected a group!"); - // Mark this group as reserved. - assert(R.second.isReserved()); - reserveResource(R.first); - BusyResources[ResourceRef(R.first, R.first)] += CS.size(); - } - } -} - -void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) { - for (std::pair<ResourceRef, unsigned> &BR : BusyResources) { - if (BR.second) - BR.second--; - if (!BR.second) { - // Release this resource. - const ResourceRef &RR = BR.first; - - if (countPopulation(RR.first) == 1) - release(RR); - - releaseResource(RR.first); - ResourcesFreed.push_back(RR); - } - } - - for (const ResourceRef &RF : ResourcesFreed) - BusyResources.erase(RF); -} - -void ResourceManager::reserveResource(uint64_t ResourceID) { - ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; - assert(!Resource.isReserved()); - Resource.setReserved(); -} - -void ResourceManager::releaseResource(uint64_t ResourceID) { - ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)]; - Resource.clearReserved(); -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp deleted file mode 100644 index bd7b411af11..00000000000 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp +++ /dev/null @@ -1,88 +0,0 @@ -//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file simulates the hardware responsible for retiring instructions. -/// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/RetireControlUnit.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) - : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { - // Check if the scheduling model provides extra information about the machine - // processor. If so, then use that information to set the reorder buffer size - // and the maximum number of instructions retired per cycle. - if (SM.hasExtraProcessorInfo()) { - const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); - if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; - MaxRetirePerCycle = EPI.MaxRetirePerCycle; - } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); -} - -// Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(const InstRef &IR, - unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast<unsigned>(Queue.size())); - // Zero latency instructions may have zero uOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); - unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; - NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; - return TokenID; -} - -const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { - return Queue[CurrentInstructionSlotIdx]; -} - -void RetireControlUnit::consumeCurrentToken() { - RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; - assert(Current.NumSlots && "Reserved zero slots?"); - assert(Current.IR && "Invalid RUToken in the RCU queue."); - Current.IR.getInstruction()->retire(); - - // Update the slot index to be the next item in the circular queue. - CurrentInstructionSlotIdx += Current.NumSlots; - CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; -} - -void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { - assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); - Queue[TokenID].Executed = true; -} - -#ifndef NDEBUG -void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; -} -#endif - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp deleted file mode 100644 index f0ac59e5bc2..00000000000 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp +++ /dev/null @@ -1,245 +0,0 @@ -//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// A scheduler for processor resource units and processor resource groups. -// -//===----------------------------------------------------------------------===// - -#include "HardwareUnits/Scheduler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace mca { - -#define DEBUG_TYPE "llvm-mca" - -void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) { - // Ensure we have a valid (non-null) strategy object. - Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>(); -} - -// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. -SchedulerStrategy::~SchedulerStrategy() = default; -DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; - -#ifndef NDEBUG -void Scheduler::dump() const { - dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; - dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; - dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; - Resources->dump(); -} -#endif - -Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - - switch (Resources->canBeDispatched(Desc.Buffers)) { - case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: - return Scheduler::SC_BUFFERS_FULL; - case ResourceStateEvent::RS_RESERVED: - return Scheduler::SC_DISPATCH_GROUP_STALL; - case ResourceStateEvent::RS_BUFFER_AVAILABLE: - break; - } - - // Give lower priority to LSUnit stall events. - switch (LSU.isAvailable(IR)) { - case LSUnit::LSU_LQUEUE_FULL: - return Scheduler::SC_LOAD_QUEUE_FULL; - case LSUnit::LSU_SQUEUE_FULL: - return Scheduler::SC_STORE_QUEUE_FULL; - case LSUnit::LSU_AVAILABLE: - return Scheduler::SC_AVAILABLE; - } - - llvm_unreachable("Don't know how to process this LSU state result!"); -} - -void Scheduler::issueInstructionImpl( - InstRef &IR, - SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) { - Instruction *IS = IR.getInstruction(); - const InstrDesc &D = IS->getDesc(); - - // Issue the instruction and collect all the consumed resources - // into a vector. That vector is then used to notify the listener. - Resources->issueInstruction(D, UsedResources); - - // Notify the instruction that it started executing. - // This updates the internal state of each write. - IS->execute(); - - if (IS->isExecuting()) - IssuedSet.emplace_back(IR); - else if (IS->isExecuted()) - LSU.onInstructionExecuted(IR); -} - -// Release the buffered resources and issue the instruction. -void Scheduler::issueInstruction( - InstRef &IR, - SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources, - SmallVectorImpl<InstRef> &ReadyInstructions) { - const Instruction &Inst = *IR.getInstruction(); - bool HasDependentUsers = Inst.hasDependentUsers(); - - Resources->releaseBuffers(Inst.getDesc().Buffers); - issueInstructionImpl(IR, UsedResources); - // Instructions that have been issued during this cycle might have unblocked - // other dependent instructions. Dependent instructions may be issued during - // this same cycle if operands have ReadAdvance entries. Promote those - // instructions to the ReadySet and notify the caller that those are ready. - if (HasDependentUsers) - promoteToReadySet(ReadyInstructions); -} - -void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) { - // Scan the set of waiting instructions and promote them to the - // ready queue if operands are all ready. - unsigned RemovedElements = 0; - for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { - InstRef &IR = *I; - if (!IR) - break; - - // Check if this instruction is now ready. In case, force - // a transition in state using method 'update()'. - Instruction &IS = *IR.getInstruction(); - if (!IS.isReady()) - IS.update(); - - // Check if there are still unsolved data dependencies. - if (!isReady(IR)) { - ++I; - continue; - } - - Ready.emplace_back(IR); - ReadySet.emplace_back(IR); - - IR.invalidate(); - ++RemovedElements; - std::iter_swap(I, E - RemovedElements); - } - - WaitSet.resize(WaitSet.size() - RemovedElements); -} - -InstRef Scheduler::select() { - unsigned QueueIndex = ReadySet.size(); - for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { - const InstRef &IR = ReadySet[I]; - if (QueueIndex == ReadySet.size() || - Strategy->compare(IR, ReadySet[QueueIndex])) { - const InstrDesc &D = IR.getInstruction()->getDesc(); - if (Resources->canBeIssued(D)) - QueueIndex = I; - } - } - - if (QueueIndex == ReadySet.size()) - return InstRef(); - - // We found an instruction to issue. - InstRef IR = ReadySet[QueueIndex]; - std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]); - ReadySet.pop_back(); - return IR; -} - -void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) { - unsigned RemovedElements = 0; - for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { - InstRef &IR = *I; - if (!IR) - break; - Instruction &IS = *IR.getInstruction(); - if (!IS.isExecuted()) { - LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR - << " is still executing.\n"); - ++I; - continue; - } - - // Instruction IR has completed execution. - LSU.onInstructionExecuted(IR); - Executed.emplace_back(IR); - ++RemovedElements; - IR.invalidate(); - std::iter_swap(I, E - RemovedElements); - } - - IssuedSet.resize(IssuedSet.size() - RemovedElements); -} - -void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed, - SmallVectorImpl<InstRef> &Executed, - SmallVectorImpl<InstRef> &Ready) { - // Release consumed resources. - Resources->cycleEvent(Freed); - - // Propagate the cycle event to the 'Issued' and 'Wait' sets. - for (InstRef &IR : IssuedSet) - IR.getInstruction()->cycleEvent(); - - updateIssuedSet(Executed); - - for (InstRef &IR : WaitSet) - IR.getInstruction()->cycleEvent(); - - promoteToReadySet(Ready); -} - -bool Scheduler::mustIssueImmediately(const InstRef &IR) const { - // Instructions that use an in-order dispatch/issue processor resource must be - // issued immediately to the pipeline(s). Any other in-order buffered - // resources (i.e. BufferSize=1) is consumed. - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc); -} - -void Scheduler::dispatch(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - Resources->reserveBuffers(Desc.Buffers); - - // If necessary, reserve queue entries in the load-store unit (LSU). - bool IsMemOp = Desc.MayLoad || Desc.MayStore; - if (IsMemOp) - LSU.dispatch(IR); - - if (!isReady(IR)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n"); - WaitSet.push_back(IR); - return; - } - - // Don't add a zero-latency instruction to the Ready queue. - // A zero-latency instruction doesn't consume any scheduler resources. That is - // because it doesn't need to be executed, and it is often removed at register - // renaming stage. For example, register-register moves are often optimized at - // register renaming stage by simply updating register aliases. On some - // targets, zero-idiom instructions (for example: a xor that clears the value - // of a register) are treated specially, and are often eliminated at register - // renaming stage. - if (!mustIssueImmediately(IR)) { - LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n"); - ReadySet.push_back(IR); - } -} - -bool Scheduler::isReady(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsMemOp = Desc.MayLoad || Desc.MayStore; - return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR)); -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/InstrBuilder.cpp b/llvm/tools/llvm-mca/lib/InstrBuilder.cpp deleted file mode 100644 index f3960826b5c..00000000000 --- a/llvm/tools/llvm-mca/lib/InstrBuilder.cpp +++ /dev/null @@ -1,675 +0,0 @@ -//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements the InstrBuilder interface. -/// -//===----------------------------------------------------------------------===// - -#include "InstrBuilder.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, - const llvm::MCInstrInfo &mcii, - const llvm::MCRegisterInfo &mri, - const llvm::MCInstrAnalysis &mcia) - : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), - FirstReturnInst(true) { - computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); -} - -static void initializeUsedResources(InstrDesc &ID, - const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI, - ArrayRef<uint64_t> ProcResourceMasks) { - const MCSchedModel &SM = STI.getSchedModel(); - - // Populate resources consumed. - using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; - std::vector<ResourcePlusCycles> Worklist; - - // Track cycles contributed by resources that are in a "Super" relationship. - // This is required if we want to correctly match the behavior of method - // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set - // of "consumed" processor resources and resource cycles, the logic in - // ExpandProcResource() doesn't update the number of resource cycles - // contributed by a "Super" resource to a group. - // We need to take this into account when we find that a processor resource is - // part of a group, and it is also used as the "Super" of other resources. - // This map stores the number of cycles contributed by sub-resources that are - // part of a "Super" resource. The key value is the "Super" resource mask ID. - DenseMap<uint64_t, unsigned> SuperResources; - - unsigned NumProcResources = SM.getNumProcResourceKinds(); - APInt Buffers(NumProcResources, 0); - - for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { - const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; - const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); - uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; - if (PR.BufferSize != -1) - Buffers.setBit(PRE->ProcResourceIdx); - CycleSegment RCy(0, PRE->Cycles, false); - Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); - if (PR.SuperIdx) { - uint64_t Super = ProcResourceMasks[PR.SuperIdx]; - SuperResources[Super] += PRE->Cycles; - } - } - - // Sort elements by mask popcount, so that we prioritize resource units over - // resource groups, and smaller groups over larger groups. - sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { - unsigned popcntA = countPopulation(A.first); - unsigned popcntB = countPopulation(B.first); - if (popcntA < popcntB) - return true; - if (popcntA > popcntB) - return false; - return A.first < B.first; - }); - - uint64_t UsedResourceUnits = 0; - - // Remove cycles contributed by smaller resources. - for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { - ResourcePlusCycles &A = Worklist[I]; - if (!A.second.size()) { - A.second.NumUnits = 0; - A.second.setReserved(); - ID.Resources.emplace_back(A); - continue; - } - - ID.Resources.emplace_back(A); - uint64_t NormalizedMask = A.first; - if (countPopulation(A.first) == 1) { - UsedResourceUnits |= A.first; - } else { - // Remove the leading 1 from the resource group mask. - NormalizedMask ^= PowerOf2Floor(NormalizedMask); - } - - for (unsigned J = I + 1; J < E; ++J) { - ResourcePlusCycles &B = Worklist[J]; - if ((NormalizedMask & B.first) == NormalizedMask) { - B.second.CS.subtract(A.second.size() - SuperResources[A.first]); - if (countPopulation(B.first) > 1) - B.second.NumUnits++; - } - } - } - - // A SchedWrite may specify a number of cycles in which a resource group - // is reserved. For example (on target x86; cpu Haswell): - // - // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { - // let ResourceCycles = [2, 2, 3]; - // } - // - // This means: - // Resource units HWPort0 and HWPort1 are both used for 2cy. - // Resource group HWPort01 is the union of HWPort0 and HWPort1. - // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 - // will not be usable for 2 entire cycles from instruction issue. - // - // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency - // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an - // extra delay on top of the 2 cycles latency. - // During those extra cycles, HWPort01 is not usable by other instructions. - for (ResourcePlusCycles &RPC : ID.Resources) { - if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { - // Remove the leading 1 from the resource group mask. - uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); - if ((Mask & UsedResourceUnits) == Mask) - RPC.second.setReserved(); - } - } - - // Identify extra buffers that are consumed through super resources. - for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { - for (unsigned I = 1, E = NumProcResources; I < E; ++I) { - const MCProcResourceDesc &PR = *SM.getProcResource(I); - if (PR.BufferSize == -1) - continue; - - uint64_t Mask = ProcResourceMasks[I]; - if (Mask != SR.first && ((Mask & SR.first) == SR.first)) - Buffers.setBit(I); - } - } - - // Now set the buffers. - if (unsigned NumBuffers = Buffers.countPopulation()) { - ID.Buffers.resize(NumBuffers); - for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { - if (Buffers[I]) { - --NumBuffers; - ID.Buffers[NumBuffers] = ProcResourceMasks[I]; - } - } - } - - LLVM_DEBUG({ - for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) - dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n'; - for (const uint64_t R : ID.Buffers) - dbgs() << "\t\tBuffer Mask=" << R << '\n'; - }); -} - -static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, - const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI) { - if (MCDesc.isCall()) { - // We cannot estimate how long this call will take. - // Artificially set an arbitrarily high latency (100cy). - ID.MaxLatency = 100U; - return; - } - - int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); - // If latency is unknown, then conservatively assume a MaxLatency of 100cy. - ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); -} - -static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { - // Count register definitions, and skip non register operands in the process. - unsigned I, E; - unsigned NumExplicitDefs = MCDesc.getNumDefs(); - for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { - const MCOperand &Op = MCI.getOperand(I); - if (Op.isReg()) - --NumExplicitDefs; - } - - if (NumExplicitDefs) { - return make_error<InstructionError<MCInst>>( - "Expected more register operand definitions.", MCI); - } - - if (MCDesc.hasOptionalDef()) { - // Always assume that the optional definition is the last operand. - const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); - if (I == MCI.getNumOperands() || !Op.isReg()) { - std::string Message = - "expected a register operand for an optional definition. Instruction " - "has not been correctly analyzed."; - return make_error<InstructionError<MCInst>>(Message, MCI); - } - } - - return ErrorSuccess(); -} - -void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, - unsigned SchedClassID) { - const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); - const MCSchedModel &SM = STI.getSchedModel(); - const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); - - // Assumptions made by this algorithm: - // 1. The number of explicit and implicit register definitions in a MCInst - // matches the number of explicit and implicit definitions according to - // the opcode descriptor (MCInstrDesc). - // 2. Uses start at index #(MCDesc.getNumDefs()). - // 3. There can only be a single optional register definition, an it is - // always the last operand of the sequence (excluding extra operands - // contributed by variadic opcodes). - // - // These assumptions work quite well for most out-of-order in-tree targets - // like x86. This is mainly because the vast majority of instructions is - // expanded to MCInst using a straightforward lowering logic that preserves - // the ordering of the operands. - // - // About assumption 1. - // The algorithm allows non-register operands between register operand - // definitions. This helps to handle some special ARM instructions with - // implicit operand increment (-mtriple=armv7): - // - // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed - // @ <MCOperand Reg:59> - // @ <MCOperand Imm:0> (!!) - // @ <MCOperand Reg:67> - // @ <MCOperand Imm:0> - // @ <MCOperand Imm:14> - // @ <MCOperand Reg:0>> - // - // MCDesc reports: - // 6 explicit operands. - // 1 optional definition - // 2 explicit definitions (!!) - // - // The presence of an 'Imm' operand between the two register definitions - // breaks the assumption that "register definitions are always at the - // beginning of the operand sequence". - // - // To workaround this issue, this algorithm ignores (i.e. skips) any - // non-register operands between register definitions. The optional - // definition is still at index #(NumOperands-1). - // - // According to assumption 2. register reads start at #(NumExplicitDefs-1). - // That means, register R1 from the example is both read and written. - unsigned NumExplicitDefs = MCDesc.getNumDefs(); - unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); - unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; - unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; - if (MCDesc.hasOptionalDef()) - TotalDefs++; - - unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); - ID.Writes.resize(TotalDefs + NumVariadicOps); - // Iterate over the operands list, and skip non-register operands. - // The first NumExplictDefs register operands are expected to be register - // definitions. - unsigned CurrentDef = 0; - unsigned i = 0; - for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { - const MCOperand &Op = MCI.getOperand(i); - if (!Op.isReg()) - continue; - - WriteDescriptor &Write = ID.Writes[CurrentDef]; - Write.OpIndex = i; - if (CurrentDef < NumWriteLatencyEntries) { - const MCWriteLatencyEntry &WLE = - *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); - // Conservatively default to MaxLatency. - Write.Latency = - WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); - Write.SClassOrWriteResourceID = WLE.WriteResourceID; - } else { - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - } - Write.IsOptionalDef = false; - LLVM_DEBUG({ - dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - CurrentDef++; - } - - assert(CurrentDef == NumExplicitDefs && - "Expected more register operand definitions."); - for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { - unsigned Index = NumExplicitDefs + CurrentDef; - WriteDescriptor &Write = ID.Writes[Index]; - Write.OpIndex = ~CurrentDef; - Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; - if (Index < NumWriteLatencyEntries) { - const MCWriteLatencyEntry &WLE = - *STI.getWriteLatencyEntry(&SCDesc, Index); - // Conservatively default to MaxLatency. - Write.Latency = - WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); - Write.SClassOrWriteResourceID = WLE.WriteResourceID; - } else { - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - } - - Write.IsOptionalDef = false; - assert(Write.RegisterID != 0 && "Expected a valid phys register!"); - LLVM_DEBUG({ - dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex - << ", PhysReg=" << MRI.getName(Write.RegisterID) - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - } - - if (MCDesc.hasOptionalDef()) { - WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; - Write.OpIndex = MCDesc.getNumOperands() - 1; - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - Write.IsOptionalDef = true; - LLVM_DEBUG({ - dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - } - - if (!NumVariadicOps) - return; - - // FIXME: if an instruction opcode is flagged 'mayStore', and it has no - // "unmodeledSideEffects', then this logic optimistically assumes that any - // extra register operands in the variadic sequence is not a register - // definition. - // - // Otherwise, we conservatively assume that any register operand from the - // variadic sequence is both a register read and a register write. - bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && - !MCDesc.hasUnmodeledSideEffects(); - CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); - for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); - I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { - const MCOperand &Op = MCI.getOperand(OpIndex); - if (!Op.isReg()) - continue; - - WriteDescriptor &Write = ID.Writes[CurrentDef]; - Write.OpIndex = OpIndex; - // Assign a default latency for this write. - Write.Latency = ID.MaxLatency; - Write.SClassOrWriteResourceID = 0; - Write.IsOptionalDef = false; - ++CurrentDef; - LLVM_DEBUG({ - dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex - << ", Latency=" << Write.Latency - << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; - }); - } - - ID.Writes.resize(CurrentDef); -} - -void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, - unsigned SchedClassID) { - const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); - unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); - unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); - // Remove the optional definition. - if (MCDesc.hasOptionalDef()) - --NumExplicitUses; - unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); - unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; - ID.Reads.resize(TotalUses); - unsigned CurrentUse = 0; - for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; - ++I, ++OpIndex) { - const MCOperand &Op = MCI.getOperand(OpIndex); - if (!Op.isReg()) - continue; - - ReadDescriptor &Read = ID.Reads[CurrentUse]; - Read.OpIndex = OpIndex; - Read.UseIndex = I; - Read.SchedClassID = SchedClassID; - ++CurrentUse; - LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex - << ", UseIndex=" << Read.UseIndex << '\n'); - } - - // For the purpose of ReadAdvance, implicit uses come directly after explicit - // uses. The "UseIndex" must be updated according to that implicit layout. - for (unsigned I = 0; I < NumImplicitUses; ++I) { - ReadDescriptor &Read = ID.Reads[CurrentUse + I]; - Read.OpIndex = ~I; - Read.UseIndex = NumExplicitUses + I; - Read.RegisterID = MCDesc.getImplicitUses()[I]; - Read.SchedClassID = SchedClassID; - LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex - << ", UseIndex=" << Read.UseIndex << ", RegisterID=" - << MRI.getName(Read.RegisterID) << '\n'); - } - - CurrentUse += NumImplicitUses; - - // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no - // "unmodeledSideEffects", then this logic optimistically assumes that any - // extra register operands in the variadic sequence are not register - // definition. - - bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && - !MCDesc.hasUnmodeledSideEffects(); - for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); - I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { - const MCOperand &Op = MCI.getOperand(OpIndex); - if (!Op.isReg()) - continue; - - ReadDescriptor &Read = ID.Reads[CurrentUse]; - Read.OpIndex = OpIndex; - Read.UseIndex = NumExplicitUses + NumImplicitUses + I; - Read.SchedClassID = SchedClassID; - ++CurrentUse; - LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex - << ", UseIndex=" << Read.UseIndex << '\n'); - } - - ID.Reads.resize(CurrentUse); -} - -Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, - const MCInst &MCI) const { - if (ID.NumMicroOps != 0) - return ErrorSuccess(); - - bool UsesMemory = ID.MayLoad || ID.MayStore; - bool UsesBuffers = !ID.Buffers.empty(); - bool UsesResources = !ID.Resources.empty(); - if (!UsesMemory && !UsesBuffers && !UsesResources) - return ErrorSuccess(); - - StringRef Message; - if (UsesMemory) { - Message = "found an inconsistent instruction that decodes " - "into zero opcodes and that consumes load/store " - "unit resources."; - } else { - Message = "found an inconsistent instruction that decodes " - "to zero opcodes and that consumes scheduler " - "resources."; - } - - return make_error<InstructionError<MCInst>>(Message, MCI); -} - -Expected<const InstrDesc &> -InstrBuilder::createInstrDescImpl(const MCInst &MCI) { - assert(STI.getSchedModel().hasInstrSchedModel() && - "Itineraries are not yet supported!"); - - // Obtain the instruction descriptor from the opcode. - unsigned short Opcode = MCI.getOpcode(); - const MCInstrDesc &MCDesc = MCII.get(Opcode); - const MCSchedModel &SM = STI.getSchedModel(); - - // Then obtain the scheduling class information from the instruction. - unsigned SchedClassID = MCDesc.getSchedClass(); - bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); - - // Try to solve variant scheduling classes. - if (IsVariant) { - unsigned CPUID = SM.getProcessorID(); - while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) - SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); - - if (!SchedClassID) { - return make_error<InstructionError<MCInst>>( - "unable to resolve scheduling class for write variant.", MCI); - } - } - - // Check if this instruction is supported. Otherwise, report an error. - const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); - if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { - return make_error<InstructionError<MCInst>>( - "found an unsupported instruction in the input assembly sequence.", - MCI); - } - - // Create a new empty descriptor. - std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>(); - ID->NumMicroOps = SCDesc.NumMicroOps; - - if (MCDesc.isCall() && FirstCallInst) { - // We don't correctly model calls. - WithColor::warning() << "found a call in the input assembly sequence.\n"; - WithColor::note() << "call instructions are not correctly modeled. " - << "Assume a latency of 100cy.\n"; - FirstCallInst = false; - } - - if (MCDesc.isReturn() && FirstReturnInst) { - WithColor::warning() << "found a return instruction in the input" - << " assembly sequence.\n"; - WithColor::note() << "program counter updates are ignored.\n"; - FirstReturnInst = false; - } - - ID->MayLoad = MCDesc.mayLoad(); - ID->MayStore = MCDesc.mayStore(); - ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); - - initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); - computeMaxLatency(*ID, MCDesc, SCDesc, STI); - - if (Error Err = verifyOperands(MCDesc, MCI)) - return std::move(Err); - - populateWrites(*ID, MCI, SchedClassID); - populateReads(*ID, MCI, SchedClassID); - - LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); - LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); - - // Sanity check on the instruction descriptor. - if (Error Err = verifyInstrDesc(*ID, MCI)) - return std::move(Err); - - // Now add the new descriptor. - SchedClassID = MCDesc.getSchedClass(); - bool IsVariadic = MCDesc.isVariadic(); - if (!IsVariadic && !IsVariant) { - Descriptors[MCI.getOpcode()] = std::move(ID); - return *Descriptors[MCI.getOpcode()]; - } - - VariantDescriptors[&MCI] = std::move(ID); - return *VariantDescriptors[&MCI]; -} - -Expected<const InstrDesc &> -InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { - if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) - return *Descriptors[MCI.getOpcode()]; - - if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) - return *VariantDescriptors[&MCI]; - - return createInstrDescImpl(MCI); -} - -Expected<std::unique_ptr<Instruction>> -InstrBuilder::createInstruction(const MCInst &MCI) { - Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); - if (!DescOrErr) - return DescOrErr.takeError(); - const InstrDesc &D = *DescOrErr; - std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D); - - // Check if this is a dependency breaking instruction. - APInt Mask; - - unsigned ProcID = STI.getSchedModel().getProcessorID(); - bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID); - bool IsDepBreaking = - IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID); - if (MCIA.isOptimizableRegisterMove(MCI, ProcID)) - NewIS->setOptimizableMove(); - - // Initialize Reads first. - for (const ReadDescriptor &RD : D.Reads) { - int RegID = -1; - if (!RD.isImplicitRead()) { - // explicit read. - const MCOperand &Op = MCI.getOperand(RD.OpIndex); - // Skip non-register operands. - if (!Op.isReg()) - continue; - RegID = Op.getReg(); - } else { - // Implicit read. - RegID = RD.RegisterID; - } - - // Skip invalid register operands. - if (!RegID) - continue; - - // Okay, this is a register operand. Create a ReadState for it. - assert(RegID > 0 && "Invalid register ID found!"); - NewIS->getUses().emplace_back(RD, RegID); - ReadState &RS = NewIS->getUses().back(); - - if (IsDepBreaking) { - // A mask of all zeroes means: explicit input operands are not - // independent. - if (Mask.isNullValue()) { - if (!RD.isImplicitRead()) - RS.setIndependentFromDef(); - } else { - // Check if this register operand is independent according to `Mask`. - // Note that Mask may not have enough bits to describe all explicit and - // implicit input operands. If this register operand doesn't have a - // corresponding bit in Mask, then conservatively assume that it is - // dependent. - if (Mask.getBitWidth() > RD.UseIndex) { - // Okay. This map describe register use `RD.UseIndex`. - if (Mask[RD.UseIndex]) - RS.setIndependentFromDef(); - } - } - } - } - - // Early exit if there are no writes. - if (D.Writes.empty()) - return std::move(NewIS); - - // Track register writes that implicitly clear the upper portion of the - // underlying super-registers using an APInt. - APInt WriteMask(D.Writes.size(), 0); - - // Now query the MCInstrAnalysis object to obtain information about which - // register writes implicitly clear the upper portion of a super-register. - MCIA.clearsSuperRegisters(MRI, MCI, WriteMask); - - // Initialize writes. - unsigned WriteIndex = 0; - for (const WriteDescriptor &WD : D.Writes) { - unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID - : MCI.getOperand(WD.OpIndex).getReg(); - // Check if this is a optional definition that references NoReg. - if (WD.IsOptionalDef && !RegID) { - ++WriteIndex; - continue; - } - - assert(RegID && "Expected a valid register ID!"); - NewIS->getDefs().emplace_back(WD, RegID, - /* ClearsSuperRegs */ WriteMask[WriteIndex], - /* WritesZero */ IsZeroIdiom); - ++WriteIndex; - } - - return std::move(NewIS); -} -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Instruction.cpp b/llvm/tools/llvm-mca/lib/Instruction.cpp deleted file mode 100644 index 47ba2f8043a..00000000000 --- a/llvm/tools/llvm-mca/lib/Instruction.cpp +++ /dev/null @@ -1,205 +0,0 @@ -//===--------------------- Instruction.cpp ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines abstractions used by the Pipeline to model register reads, -// register writes and instructions. -// -//===----------------------------------------------------------------------===// - -#include "Instruction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { -namespace mca { - -void ReadState::writeStartEvent(unsigned Cycles) { - assert(DependentWrites); - assert(CyclesLeft == UNKNOWN_CYCLES); - - // This read may be dependent on more than one write. This typically occurs - // when a definition is the result of multiple writes where at least one - // write does a partial register update. - // The HW is forced to do some extra bookkeeping to track of all the - // dependent writes, and implement a merging scheme for the partial writes. - --DependentWrites; - TotalCycles = std::max(TotalCycles, Cycles); - - if (!DependentWrites) { - CyclesLeft = TotalCycles; - IsReady = !CyclesLeft; - } -} - -void WriteState::onInstructionIssued() { - assert(CyclesLeft == UNKNOWN_CYCLES); - // Update the number of cycles left based on the WriteDescriptor info. - CyclesLeft = getLatency(); - - // Now that the time left before write-back is known, notify - // all the users. - for (const std::pair<ReadState *, int> &User : Users) { - ReadState *RS = User.first; - unsigned ReadCycles = std::max(0, CyclesLeft - User.second); - RS->writeStartEvent(ReadCycles); - } - - // Notify any writes that are in a false dependency with this write. - if (PartialWrite) - PartialWrite->writeStartEvent(CyclesLeft); -} - -void WriteState::addUser(ReadState *User, int ReadAdvance) { - // If CyclesLeft is different than -1, then we don't need to - // update the list of users. We can just notify the user with - // the actual number of cycles left (which may be zero). - if (CyclesLeft != UNKNOWN_CYCLES) { - unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance); - User->writeStartEvent(ReadCycles); - return; - } - - if (llvm::find_if(Users, [&User](const std::pair<ReadState *, int> &Use) { - return Use.first == User; - }) == Users.end()) { - Users.emplace_back(User, ReadAdvance); - } -} - -void WriteState::addUser(WriteState *User) { - if (CyclesLeft != UNKNOWN_CYCLES) { - User->writeStartEvent(std::max(0, CyclesLeft)); - return; - } - - assert(!PartialWrite && "PartialWrite already set!"); - PartialWrite = User; - User->setDependentWrite(this); -} - -void WriteState::cycleEvent() { - // Note: CyclesLeft can be a negative number. It is an error to - // make it an unsigned quantity because users of this write may - // specify a negative ReadAdvance. - if (CyclesLeft != UNKNOWN_CYCLES) - CyclesLeft--; - - if (DependentWriteCyclesLeft) - DependentWriteCyclesLeft--; -} - -void ReadState::cycleEvent() { - // Update the total number of cycles. - if (DependentWrites && TotalCycles) { - --TotalCycles; - return; - } - - // Bail out immediately if we don't know how many cycles are left. - if (CyclesLeft == UNKNOWN_CYCLES) - return; - - if (CyclesLeft) { - --CyclesLeft; - IsReady = !CyclesLeft; - } -} - -#ifndef NDEBUG -void WriteState::dump() const { - dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID " - << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }"; -} - -void WriteRef::dump() const { - dbgs() << "IID=" << getSourceIndex() << ' '; - if (isValid()) - getWriteState()->dump(); - else - dbgs() << "(null)"; -} -#endif - -void Instruction::dispatch(unsigned RCUToken) { - assert(Stage == IS_INVALID); - Stage = IS_AVAILABLE; - RCUTokenID = RCUToken; - - // Check if input operands are already available. - update(); -} - -void Instruction::execute() { - assert(Stage == IS_READY); - Stage = IS_EXECUTING; - - // Set the cycles left before the write-back stage. - CyclesLeft = getLatency(); - - for (WriteState &WS : getDefs()) - WS.onInstructionIssued(); - - // Transition to the "executed" stage if this is a zero-latency instruction. - if (!CyclesLeft) - Stage = IS_EXECUTED; -} - -void Instruction::forceExecuted() { - assert(Stage == IS_READY && "Invalid internal state!"); - CyclesLeft = 0; - Stage = IS_EXECUTED; -} - -void Instruction::update() { - assert(isDispatched() && "Unexpected instruction stage found!"); - - if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); })) - return; - - // A partial register write cannot complete before a dependent write. - auto IsDefReady = [&](const WriteState &Def) { - if (!Def.getDependentWrite()) { - unsigned CyclesLeft = Def.getDependentWriteCyclesLeft(); - return !CyclesLeft || CyclesLeft < getLatency(); - } - return false; - }; - - if (all_of(getDefs(), IsDefReady)) - Stage = IS_READY; -} - -void Instruction::cycleEvent() { - if (isReady()) - return; - - if (isDispatched()) { - for (ReadState &Use : getUses()) - Use.cycleEvent(); - - for (WriteState &Def : getDefs()) - Def.cycleEvent(); - - update(); - return; - } - - assert(isExecuting() && "Instruction not in-flight?"); - assert(CyclesLeft && "Instruction already executed?"); - for (WriteState &Def : getDefs()) - Def.cycleEvent(); - CyclesLeft--; - if (!CyclesLeft) - Stage = IS_EXECUTED; -} - -const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max(); - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/LLVMBuild.txt b/llvm/tools/llvm-mca/lib/LLVMBuild.txt deleted file mode 100644 index 75f3a9e2229..00000000000 --- a/llvm/tools/llvm-mca/lib/LLVMBuild.txt +++ /dev/null @@ -1,22 +0,0 @@ -;===- ./tools/llvm-mca/lib/LLVMBuild.txt -----------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = MCA -parent = Libraries -required_libraries = MC Support diff --git a/llvm/tools/llvm-mca/lib/Pipeline.cpp b/llvm/tools/llvm-mca/lib/Pipeline.cpp deleted file mode 100644 index 0357124bef5..00000000000 --- a/llvm/tools/llvm-mca/lib/Pipeline.cpp +++ /dev/null @@ -1,97 +0,0 @@ -//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements an ordered container of stages that simulate the -/// pipeline of a hardware backend. -/// -//===----------------------------------------------------------------------===// - -#include "Pipeline.h" -#include "HWEventListener.h" -#include "llvm/Support/Debug.h" - -namespace llvm { -namespace mca { - -#define DEBUG_TYPE "llvm-mca" - -void Pipeline::addEventListener(HWEventListener *Listener) { - if (Listener) - Listeners.insert(Listener); - for (auto &S : Stages) - S->addListener(Listener); -} - -bool Pipeline::hasWorkToProcess() { - return any_of(Stages, [](const std::unique_ptr<Stage> &S) { - return S->hasWorkToComplete(); - }); -} - -Expected<unsigned> Pipeline::run() { - assert(!Stages.empty() && "Unexpected empty pipeline found!"); - - do { - notifyCycleBegin(); - if (Error Err = runCycle()) - return std::move(Err); - notifyCycleEnd(); - ++Cycles; - } while (hasWorkToProcess()); - - return Cycles; -} - -Error Pipeline::runCycle() { - Error Err = ErrorSuccess(); - // Update stages before we start processing new instructions. - for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { - const std::unique_ptr<Stage> &S = *I; - Err = S->cycleStart(); - } - - // Now fetch and execute new instructions. - InstRef IR; - Stage &FirstStage = *Stages[0]; - while (!Err && FirstStage.isAvailable(IR)) - Err = FirstStage.execute(IR); - - // Update stages in preparation for a new cycle. - for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) { - const std::unique_ptr<Stage> &S = *I; - Err = S->cycleEnd(); - } - - return Err; -} - -void Pipeline::appendStage(std::unique_ptr<Stage> S) { - assert(S && "Invalid null stage in input!"); - if (!Stages.empty()) { - Stage *Last = Stages.back().get(); - Last->setNextInSequence(S.get()); - } - - Stages.push_back(std::move(S)); -} - -void Pipeline::notifyCycleBegin() { - LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n'); - for (HWEventListener *Listener : Listeners) - Listener->onCycleBegin(); -} - -void Pipeline::notifyCycleEnd() { - LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n"); - for (HWEventListener *Listener : Listeners) - Listener->onCycleEnd(); -} -} // namespace mca. -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp deleted file mode 100644 index 838dbad22e3..00000000000 --- a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp +++ /dev/null @@ -1,185 +0,0 @@ -//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file models the dispatch component of an instruction pipeline. -/// -/// The DispatchStage is responsible for updating instruction dependencies -/// and communicating to the simulated instruction scheduler that an instruction -/// is ready to be scheduled for execution. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/DispatchStage.h" -#include "HWEventListener.h" -#include "HardwareUnits/Scheduler.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -void DispatchStage::notifyInstructionDispatched(const InstRef &IR, - ArrayRef<unsigned> UsedRegs, - unsigned UOps) const { - LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); - notifyEvent<HWInstructionEvent>( - HWInstructionDispatchedEvent(IR, UsedRegs, UOps)); -} - -bool DispatchStage::checkPRF(const InstRef &IR) const { - SmallVector<unsigned, 4> RegDefs; - for (const WriteState &RegDef : IR.getInstruction()->getDefs()) - RegDefs.emplace_back(RegDef.getRegisterID()); - - const unsigned RegisterMask = PRF.isAvailable(RegDefs); - // A mask with all zeroes means: register files are available. - if (RegisterMask) { - notifyEvent<HWStallEvent>( - HWStallEvent(HWStallEvent::RegisterFileStall, IR)); - return false; - } - - return true; -} - -bool DispatchStage::checkRCU(const InstRef &IR) const { - const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; - if (RCU.isAvailable(NumMicroOps)) - return true; - notifyEvent<HWStallEvent>( - HWStallEvent(HWStallEvent::RetireControlUnitStall, IR)); - return false; -} - -bool DispatchStage::canDispatch(const InstRef &IR) const { - return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR); -} - -void DispatchStage::updateRAWDependencies(ReadState &RS, - const MCSubtargetInfo &STI) { - SmallVector<WriteRef, 4> DependentWrites; - - // Collect all the dependent writes, and update RS internal state. - PRF.addRegisterRead(RS, DependentWrites); - - // We know that this read depends on all the writes in DependentWrites. - // For each write, check if we have ReadAdvance information, and use it - // to figure out in how many cycles this read becomes available. - const ReadDescriptor &RD = RS.getDescriptor(); - const MCSchedModel &SM = STI.getSchedModel(); - const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); - for (WriteRef &WR : DependentWrites) { - WriteState &WS = *WR.getWriteState(); - unsigned WriteResID = WS.getWriteResourceID(); - int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); - WS.addUser(&RS, ReadAdvance); - } -} - -Error DispatchStage::dispatch(InstRef IR) { - assert(!CarryOver && "Cannot dispatch another instruction!"); - Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - const unsigned NumMicroOps = Desc.NumMicroOps; - if (NumMicroOps > DispatchWidth) { - assert(AvailableEntries == DispatchWidth); - AvailableEntries = 0; - CarryOver = NumMicroOps - DispatchWidth; - CarriedOver = IR; - } else { - assert(AvailableEntries >= NumMicroOps); - AvailableEntries -= NumMicroOps; - } - - // Check if this is an optimizable reg-reg move. - bool IsEliminated = false; - if (IS.isOptimizableMove()) { - assert(IS.getDefs().size() == 1 && "Expected a single input!"); - assert(IS.getUses().size() == 1 && "Expected a single output!"); - IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]); - } - - // A dependency-breaking instruction doesn't have to wait on the register - // input operands, and it is often optimized at register renaming stage. - // Update RAW dependencies if this instruction is not a dependency-breaking - // instruction. A dependency-breaking instruction is a zero-latency - // instruction that doesn't consume hardware resources. - // An example of dependency-breaking instruction on X86 is a zero-idiom XOR. - // - // We also don't update data dependencies for instructions that have been - // eliminated at register renaming stage. - if (!IsEliminated) { - for (ReadState &RS : IS.getUses()) - updateRAWDependencies(RS, STI); - } - - // By default, a dependency-breaking zero-idiom is expected to be optimized - // at register renaming stage. That means, no physical register is allocated - // to the instruction. - SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles()); - for (WriteState &WS : IS.getDefs()) - PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); - - // Reserve slots in the RCU, and notify the instruction that it has been - // dispatched to the schedulers for execution. - IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); - - // Notify listeners of the "instruction dispatched" event, - // and move IR to the next stage. - notifyInstructionDispatched(IR, RegisterFiles, - std::min(DispatchWidth, NumMicroOps)); - return moveToTheNextStage(IR); -} - -Error DispatchStage::cycleStart() { - PRF.cycleStart(); - - if (!CarryOver) { - AvailableEntries = DispatchWidth; - return ErrorSuccess(); - } - - AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; - unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries; - CarryOver -= DispatchedOpcodes; - assert(CarriedOver && "Invalid dispatched instruction"); - - SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U); - notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes); - if (!CarryOver) - CarriedOver = InstRef(); - return ErrorSuccess(); -} - -bool DispatchStage::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); - if (Required > AvailableEntries) - return false; - // The dispatch logic doesn't internally buffer instructions. It only accepts - // instructions that can be successfully moved to the next stage during this - // same cycle. - return canDispatch(IR); -} - -Error DispatchStage::execute(InstRef &IR) { - assert(canDispatch(IR) && "Cannot dispatch another instruction!"); - return dispatch(IR); -} - -#ifndef NDEBUG -void DispatchStage::dump() const { - PRF.dump(); - RCU.dump(); -} -#endif -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Stages/EntryStage.cpp b/llvm/tools/llvm-mca/lib/Stages/EntryStage.cpp deleted file mode 100644 index f552132cac6..00000000000 --- a/llvm/tools/llvm-mca/lib/Stages/EntryStage.cpp +++ /dev/null @@ -1,76 +0,0 @@ -//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the Fetch stage of an instruction pipeline. Its sole -/// purpose in life is to produce instructions for the rest of the pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/EntryStage.h" -#include "Instruction.h" - -namespace llvm { -namespace mca { - -bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; } - -bool EntryStage::isAvailable(const InstRef & /* unused */) const { - if (CurrentInstruction) - return checkNextStage(CurrentInstruction); - return false; -} - -void EntryStage::getNextInstruction() { - assert(!CurrentInstruction && "There is already an instruction to process!"); - if (!SM.hasNext()) - return; - SourceRef SR = SM.peekNext(); - std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second); - CurrentInstruction = InstRef(SR.first, Inst.get()); - Instructions.emplace_back(std::move(Inst)); - SM.updateNext(); -} - -llvm::Error EntryStage::execute(InstRef & /*unused */) { - assert(CurrentInstruction && "There is no instruction to process!"); - if (llvm::Error Val = moveToTheNextStage(CurrentInstruction)) - return Val; - - // Move the program counter. - CurrentInstruction.invalidate(); - getNextInstruction(); - return llvm::ErrorSuccess(); -} - -llvm::Error EntryStage::cycleStart() { - if (!CurrentInstruction) - getNextInstruction(); - return llvm::ErrorSuccess(); -} - -llvm::Error EntryStage::cycleEnd() { - // Find the first instruction which hasn't been retired. - auto Range = make_range(&Instructions[NumRetired], Instructions.end()); - auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) { - return !I->isRetired(); - }); - - NumRetired = std::distance(Instructions.begin(), It); - // Erase instructions up to the first that hasn't been retired. - if ((NumRetired * 2) >= Instructions.size()) { - Instructions.erase(Instructions.begin(), It); - NumRetired = 0; - } - - return llvm::ErrorSuccess(); -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Stages/ExecuteStage.cpp b/llvm/tools/llvm-mca/lib/Stages/ExecuteStage.cpp deleted file mode 100644 index 298f08a2887..00000000000 --- a/llvm/tools/llvm-mca/lib/Stages/ExecuteStage.cpp +++ /dev/null @@ -1,219 +0,0 @@ -//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the execution stage of an instruction pipeline. -/// -/// The ExecuteStage is responsible for managing the hardware scheduler -/// and issuing notifications that an instruction has been executed. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/ExecuteStage.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) { - switch (Status) { - case Scheduler::SC_LOAD_QUEUE_FULL: - return HWStallEvent::LoadQueueFull; - case Scheduler::SC_STORE_QUEUE_FULL: - return HWStallEvent::StoreQueueFull; - case Scheduler::SC_BUFFERS_FULL: - return HWStallEvent::SchedulerQueueFull; - case Scheduler::SC_DISPATCH_GROUP_STALL: - return HWStallEvent::DispatchGroupStall; - case Scheduler::SC_AVAILABLE: - return HWStallEvent::Invalid; - } - - llvm_unreachable("Don't know how to process this StallKind!"); -} - -bool ExecuteStage::isAvailable(const InstRef &IR) const { - if (Scheduler::Status S = HWS.isAvailable(IR)) { - HWStallEvent::GenericEventType ET = toHWStallEventType(S); - notifyEvent<HWStallEvent>(HWStallEvent(ET, IR)); - return false; - } - - return true; -} - -Error ExecuteStage::issueInstruction(InstRef &IR) { - SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used; - SmallVector<InstRef, 4> Ready; - HWS.issueInstruction(IR, Used, Ready); - - notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); - notifyInstructionIssued(IR, Used); - if (IR.getInstruction()->isExecuted()) { - notifyInstructionExecuted(IR); - // FIXME: add a buffer of executed instructions. - if (Error S = moveToTheNextStage(IR)) - return S; - } - - for (const InstRef &I : Ready) - notifyInstructionReady(I); - return ErrorSuccess(); -} - -Error ExecuteStage::issueReadyInstructions() { - InstRef IR = HWS.select(); - while (IR) { - if (Error Err = issueInstruction(IR)) - return Err; - - // Select the next instruction to issue. - IR = HWS.select(); - } - - return ErrorSuccess(); -} - -Error ExecuteStage::cycleStart() { - SmallVector<ResourceRef, 8> Freed; - SmallVector<InstRef, 4> Executed; - SmallVector<InstRef, 4> Ready; - - HWS.cycleEvent(Freed, Executed, Ready); - - for (const ResourceRef &RR : Freed) - notifyResourceAvailable(RR); - - for (InstRef &IR : Executed) { - notifyInstructionExecuted(IR); - // FIXME: add a buffer of executed instructions. - if (Error S = moveToTheNextStage(IR)) - return S; - } - - for (const InstRef &IR : Ready) - notifyInstructionReady(IR); - - return issueReadyInstructions(); -} - -#ifndef NDEBUG -static void verifyInstructionEliminated(const InstRef &IR) { - const Instruction &Inst = *IR.getInstruction(); - assert(Inst.isEliminated() && "Instruction was not eliminated!"); - assert(Inst.isReady() && "Instruction in an inconsistent state!"); - - // Ensure that instructions eliminated at register renaming stage are in a - // consistent state. - const InstrDesc &Desc = Inst.getDesc(); - assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!"); -} -#endif - -Error ExecuteStage::handleInstructionEliminated(InstRef &IR) { -#ifndef NDEBUG - verifyInstructionEliminated(IR); -#endif - notifyInstructionReady(IR); - notifyInstructionIssued(IR, {}); - IR.getInstruction()->forceExecuted(); - notifyInstructionExecuted(IR); - return moveToTheNextStage(IR); -} - -// Schedule the instruction for execution on the hardware. -Error ExecuteStage::execute(InstRef &IR) { - assert(isAvailable(IR) && "Scheduler is not available!"); - -#ifndef NDEBUG - // Ensure that the HWS has not stored this instruction in its queues. - HWS.sanityCheck(IR); -#endif - - if (IR.getInstruction()->isEliminated()) - return handleInstructionEliminated(IR); - - // Reserve a slot in each buffered resource. Also, mark units with - // BufferSize=0 as reserved. Resources with a buffer size of zero will only - // be released after MCIS is issued, and all the ResourceCycles for those - // units have been consumed. - HWS.dispatch(IR); - notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); - if (!HWS.isReady(IR)) - return ErrorSuccess(); - - // If we did not return early, then the scheduler is ready for execution. - notifyInstructionReady(IR); - - // If we cannot issue immediately, the HWS will add IR to its ready queue for - // execution later, so we must return early here. - if (!HWS.mustIssueImmediately(IR)) - return ErrorSuccess(); - - // Issue IR to the underlying pipelines. - return issueInstruction(IR); -} - -void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const { - LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n'); - notifyEvent<HWInstructionEvent>( - HWInstructionEvent(HWInstructionEvent::Executed, IR)); -} - -void ExecuteStage::notifyInstructionReady(const InstRef &IR) const { - LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n'); - notifyEvent<HWInstructionEvent>( - HWInstructionEvent(HWInstructionEvent::Ready, IR)); -} - -void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const { - LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.' - << RR.second << "]\n"); - for (HWEventListener *Listener : getListeners()) - Listener->onResourceAvailable(RR); -} - -void ExecuteStage::notifyInstructionIssued( - const InstRef &IR, - ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const { - LLVM_DEBUG({ - dbgs() << "[E] Instruction Issued: #" << IR << '\n'; - for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) { - dbgs() << "[E] Resource Used: [" << Resource.first.first << '.' - << Resource.first.second << "], "; - dbgs() << "cycles: " << Resource.second << '\n'; - } - }); - notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used)); -} - -void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, - bool Reserved) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.Buffers.empty()) - return; - - SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); - std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); - if (Reserved) { - for (HWEventListener *Listener : getListeners()) - Listener->onReservedBuffers(IR, BufferIDs); - return; - } - - for (HWEventListener *Listener : getListeners()) - Listener->onReleasedBuffers(IR, BufferIDs); -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Stages/InstructionTables.cpp b/llvm/tools/llvm-mca/lib/Stages/InstructionTables.cpp deleted file mode 100644 index 33c30e7f95c..00000000000 --- a/llvm/tools/llvm-mca/lib/Stages/InstructionTables.cpp +++ /dev/null @@ -1,69 +0,0 @@ -//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements the method InstructionTables::execute(). -/// Method execute() prints a theoretical resource pressure distribution based -/// on the information available in the scheduling model, and without running -/// the pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/InstructionTables.h" - -namespace llvm { -namespace mca { - -Error InstructionTables::execute(InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - UsedResources.clear(); - - // Identify the resources consumed by this instruction. - for (const std::pair<uint64_t, ResourceUsage> Resource : Desc.Resources) { - // Skip zero-cycle resources (i.e., unused resources). - if (!Resource.second.size()) - continue; - unsigned Cycles = Resource.second.size(); - unsigned Index = std::distance( - Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first)); - const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index); - unsigned NumUnits = ProcResource.NumUnits; - if (!ProcResource.SubUnitsIdxBegin) { - // The number of cycles consumed by each unit. - for (unsigned I = 0, E = NumUnits; I < E; ++I) { - ResourceRef ResourceUnit = std::make_pair(Index, 1U << I); - UsedResources.emplace_back( - std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits))); - } - continue; - } - - // This is a group. Obtain the set of resources contained in this - // group. Some of these resources may implement multiple units. - // Uniformly distribute Cycles across all of the units. - for (unsigned I1 = 0; I1 < NumUnits; ++I1) { - unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1]; - const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx); - // Compute the number of cycles consumed by each resource unit. - for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) { - ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2); - UsedResources.emplace_back(std::make_pair( - ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits))); - } - } - } - - // Send a fake instruction issued event to all the views. - HWInstructionIssuedEvent Event(IR, UsedResources); - notifyEvent<HWInstructionIssuedEvent>(Event); - return ErrorSuccess(); -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Stages/RetireStage.cpp b/llvm/tools/llvm-mca/lib/Stages/RetireStage.cpp deleted file mode 100644 index 47eed5f2c9c..00000000000 --- a/llvm/tools/llvm-mca/lib/Stages/RetireStage.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines the retire stage of an instruction pipeline. -/// The RetireStage represents the process logic that interacts with the -/// simulated RetireControlUnit hardware. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/RetireStage.h" -#include "HWEventListener.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "llvm-mca" - -namespace llvm { -namespace mca { - -llvm::Error RetireStage::cycleStart() { - if (RCU.isEmpty()) - return llvm::ErrorSuccess(); - - const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle(); - unsigned NumRetired = 0; - while (!RCU.isEmpty()) { - if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) - break; - const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); - if (!Current.Executed) - break; - RCU.consumeCurrentToken(); - notifyInstructionRetired(Current.IR); - NumRetired++; - } - - return llvm::ErrorSuccess(); -} - -llvm::Error RetireStage::execute(InstRef &IR) { - RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID()); - return llvm::ErrorSuccess(); -} - -void RetireStage::notifyInstructionRetired(const InstRef &IR) const { - LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n'); - llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles()); - const Instruction &Inst = *IR.getInstruction(); - - for (const WriteState &WS : Inst.getDefs()) - PRF.removeRegisterWrite(WS, FreedRegs); - notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs)); -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Stages/Stage.cpp b/llvm/tools/llvm-mca/lib/Stages/Stage.cpp deleted file mode 100644 index c3cfe47d24e..00000000000 --- a/llvm/tools/llvm-mca/lib/Stages/Stage.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===---------------------- Stage.cpp ---------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file defines a stage. -/// A chain of stages compose an instruction pipeline. -/// -//===----------------------------------------------------------------------===// - -#include "Stages/Stage.h" - -namespace llvm { -namespace mca { - -// Pin the vtable here in the implementation file. -Stage::~Stage() = default; - -void Stage::addListener(HWEventListener *Listener) { - Listeners.insert(Listener); -} - -} // namespace mca -} // namespace llvm diff --git a/llvm/tools/llvm-mca/lib/Support.cpp b/llvm/tools/llvm-mca/lib/Support.cpp deleted file mode 100644 index a6ff26dafb5..00000000000 --- a/llvm/tools/llvm-mca/lib/Support.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//===--------------------- Support.cpp --------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// This file implements a few helper functions used by various pipeline -/// components. -/// -//===----------------------------------------------------------------------===// - -#include "Support.h" -#include "llvm/MC/MCSchedule.h" - -namespace llvm { -namespace mca { - -void computeProcResourceMasks(const MCSchedModel &SM, - SmallVectorImpl<uint64_t> &Masks) { - unsigned ProcResourceID = 0; - - // Create a unique bitmask for every processor resource unit. - // Skip resource at index 0, since it always references 'InvalidUnit'. - Masks.resize(SM.getNumProcResourceKinds()); - for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { - const MCProcResourceDesc &Desc = *SM.getProcResource(I); - if (Desc.SubUnitsIdxBegin) - continue; - Masks[I] = 1ULL << ProcResourceID; - ProcResourceID++; - } - - // Create a unique bitmask for every processor resource group. - for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { - const MCProcResourceDesc &Desc = *SM.getProcResource(I); - if (!Desc.SubUnitsIdxBegin) - continue; - Masks[I] = 1ULL << ProcResourceID; - for (unsigned U = 0; U < Desc.NumUnits; ++U) { - uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]]; - Masks[I] |= OtherMask; - } - ProcResourceID++; - } -} - -double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, - unsigned NumMicroOps, - ArrayRef<unsigned> ProcResourceUsage) { - // The block throughput is bounded from above by the hardware dispatch - // throughput. That is because the DispatchWidth is an upper bound on the - // number of opcodes that can be part of a single dispatch group. - double Max = static_cast<double>(NumMicroOps) / DispatchWidth; - - // The block throughput is also limited by the amount of hardware parallelism. - // The number of available resource units affects the resource pressure - // distribution, as well as how many blocks can be executed every cycle. - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - unsigned ResourceCycles = ProcResourceUsage[I]; - if (!ResourceCycles) - continue; - - const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); - double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits; - Max = std::max(Max, Throughput); - } - - // The block reciprocal throughput is computed as the MAX of: - // - (NumMicroOps / DispatchWidth) - // - (NumUnits / ResourceCycles) for every consumed processor resource. - return Max; -} - -} // namespace mca -} // namespace llvm |