diff options
Diffstat (limited to 'llvm/tools/llvm-mca')
| -rw-r--r-- | llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp | 94 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/Views/RegisterFileStatistics.h | 19 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h | 14 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/include/Instruction.h | 21 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/include/Stages/DispatchStage.h | 4 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp | 44 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp | 14 |
7 files changed, 163 insertions, 47 deletions
diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp index bd638d9795a..06202bc4142 100644 --- a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp @@ -21,10 +21,12 @@ namespace mca { RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti) : STI(sti) { const MCSchedModel &SM = STI.getSchedModel(); - RegisterFileUsage Empty = {0, 0, 0}; + RegisterFileUsage RFUEmpty = {0, 0, 0}; + MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0}; if (!SM.hasExtraProcessorInfo()) { // Assume a single register file. - RegisterFiles.emplace_back(Empty); + PRFUsage.emplace_back(RFUEmpty); + MoveElimInfo.emplace_back(MEIEmpty); return; } @@ -35,8 +37,42 @@ RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti) // be skipped. If there are no user defined register files, then reserve a // single entry for the default register file at index #0. unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U); - RegisterFiles.resize(NumRegFiles); - std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty); + + PRFUsage.resize(NumRegFiles); + std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty); + + MoveElimInfo.resize(NumRegFiles); + std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty); +} + +void RegisterFileStatistics::updateRegisterFileUsage( + ArrayRef<unsigned> UsedPhysRegs) { + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) { + RegisterFileUsage &RFU = PRFUsage[I]; + unsigned NumUsedPhysRegs = UsedPhysRegs[I]; + RFU.CurrentlyUsedMappings += NumUsedPhysRegs; + RFU.TotalMappings += NumUsedPhysRegs; + RFU.MaxUsedMappings = + std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings); + } +} + +void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) { + if (!Inst.isOptimizableMove()) + return; + + assert(Inst.getDefs().size() == 1 && "Expected a single definition!"); + assert(Inst.getUses().size() == 1 && "Expected a single register use!"); + const WriteState &WS = Inst.getDefs()[0]; + const ReadState &RS = Inst.getUses()[0]; + + MoveEliminationInfo &Info = + MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()]; + Info.TotalMoveEliminationCandidates++; + if (WS.isEliminated()) + Info.CurrentMovesEliminated++; + if (WS.isWriteZero() && RS.isReadZero()) + Info.TotalMovesThatPropagateZero++; } void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) { @@ -45,21 +81,24 @@ void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) { break; case HWInstructionEvent::Retired: { const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event); - for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) - RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I]; + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) + PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I]; break; } case HWInstructionEvent::Dispatched: { const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event); - for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) { - RegisterFileUsage &RFU = RegisterFiles[I]; - unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I]; - RFU.CurrentlyUsedMappings += NumUsedPhysRegs; - RFU.TotalMappings += NumUsedPhysRegs; - RFU.MaxUsedMappings = - std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings); - } + updateRegisterFileUsage(DE.UsedPhysRegs); + updateMoveElimInfo(*DE.IR.getInstruction()); + } } +} + +void RegisterFileStatistics::onCycleEnd() { + for (MoveEliminationInfo &MEI : MoveElimInfo) { + unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle; + CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated); + MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated; + MEI.CurrentMovesEliminated = 0; } } @@ -68,14 +107,14 @@ void RegisterFileStatistics::printView(raw_ostream &OS) const { raw_string_ostream TempStream(Buffer); TempStream << "\n\nRegister File statistics:"; - const RegisterFileUsage &GlobalUsage = RegisterFiles[0]; + const RegisterFileUsage &GlobalUsage = PRFUsage[0]; TempStream << "\nTotal number of mappings created: " << GlobalUsage.TotalMappings; TempStream << "\nMax number of mappings used: " << GlobalUsage.MaxUsedMappings << '\n'; - for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) { - const RegisterFileUsage &RFU = RegisterFiles[I]; + for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) { + const RegisterFileUsage &RFU = PRFUsage[I]; // Obtain the register file descriptor from the scheduling model. assert(STI.getSchedModel().hasExtraProcessorInfo() && "Unable to find register file info!"); @@ -98,6 +137,27 @@ void RegisterFileStatistics::printView(raw_ostream &OS) const { << RFU.TotalMappings; TempStream << "\n Max number of mappings used: " << RFU.MaxUsedMappings << '\n'; + const MoveEliminationInfo &MEI = MoveElimInfo[I]; + + if (MEI.TotalMoveEliminationCandidates) { + TempStream << " Number of optimizable moves: " + << MEI.TotalMoveEliminationCandidates; + double EliminatedMovProportion = (double)MEI.TotalMovesEliminated / + MEI.TotalMoveEliminationCandidates * + 100.0; + double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero / + MEI.TotalMoveEliminationCandidates * 100.0; + TempStream << "\n Number of moves eliminated: " + << MEI.TotalMovesEliminated << " " + << format("(%.1f%%)", + floor((EliminatedMovProportion * 10) + 0.5) / 10); + TempStream << "\n Number of zero moves: " + << MEI.TotalMovesThatPropagateZero << " " + << format("(%.1f%%)", + floor((ZeroMovProportion * 10) + 0.5) / 10); + TempStream << "\n Max moves eliminated per cycle: " + << MEI.MaxMovesEliminatedPerCycle << '\n'; + } } TempStream.flush(); diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h index 86858d8bba8..a2c52a668da 100644 --- a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h @@ -21,6 +21,10 @@ /// Number of physical registers: 72 /// Total number of mappings created: 0 /// Max number of mappings used: 0 +/// Number of optimizable moves: 200 +/// Number of moves eliminated: 200 (100.0%) +/// Number of zero moves: 200 (100.0%) +/// Max moves eliminated per cycle: 2 /// /// * Register File #2 -- IntegerPRF: /// Number of physical registers: 64 @@ -49,12 +53,25 @@ class RegisterFileStatistics : public View { unsigned CurrentlyUsedMappings; }; + struct MoveEliminationInfo { + unsigned TotalMoveEliminationCandidates; + unsigned TotalMovesEliminated; + unsigned TotalMovesThatPropagateZero; + unsigned MaxMovesEliminatedPerCycle; + unsigned CurrentMovesEliminated; + }; + // There is one entry for each register file implemented by the processor. - llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles; + llvm::SmallVector<RegisterFileUsage, 4> PRFUsage; + llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo; + + void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs); + void updateMoveElimInfo(const Instruction &Inst); public: RegisterFileStatistics(const llvm::MCSubtargetInfo &sti); + void onCycleEnd() override; void onEvent(const HWInstructionEvent &Event) override; void printView(llvm::raw_ostream &OS) const override; }; diff --git a/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h b/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h index 1cca8b5294d..d9949bf4f6a 100644 --- a/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h +++ b/llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h @@ -173,6 +173,11 @@ class RegisterFile : public HardwareUnit { void freePhysRegs(const RegisterRenamingInfo &Entry, MutableArrayRef<unsigned> FreedPhysRegs); + // Collects writes that are in a RAW dependency with RS. + // This method is called from `addRegisterRead()`. + void collectWrites(const ReadState &RS, + SmallVectorImpl<WriteRef> &Writes) const; + // Create an instance of RegisterMappingTracker for every register file // specified by the processor model. // If no register file is specified, then this method creates a default @@ -189,6 +194,10 @@ public: // No physical regiser is allocated if this write is from a zero-idiom. void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs); + // Collect writes that are in a data dependency with RS, and update RS + // internal state. + void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const; + // Removes write \param WS from the register mappings. // Physical registers may be released to reflect this update. // No registers are released if this write is from a zero-idiom. @@ -200,7 +209,7 @@ public: // If RS is a read from a zero register, and WS is eliminated, then // `WS.WritesZero` is also set, so that method addRegisterWrite() would not // reserve a physical register for it. - bool tryEliminateMove(WriteState &WS, const ReadState &RS); + bool tryEliminateMove(WriteState &WS, ReadState &RS); // Checks if there are enough physical registers in the register files. // Returns a "response mask" where each bit represents the response from a @@ -212,7 +221,8 @@ public: // Current implementation can simulate up to 32 register files (including the // special register file at index #0). unsigned isAvailable(ArrayRef<unsigned> Regs) const; - void collectWrites(SmallVectorImpl<WriteRef> &Writes, unsigned RegID) const; + + // Returns the number of PRFs implemented by this processor. unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } // Notify each PRF that a new cycle just started. diff --git a/llvm/tools/llvm-mca/include/Instruction.h b/llvm/tools/llvm-mca/include/Instruction.h index f83be1ff4bb..7407283bca2 100644 --- a/llvm/tools/llvm-mca/include/Instruction.h +++ b/llvm/tools/llvm-mca/include/Instruction.h @@ -101,6 +101,9 @@ class WriteState { // field RegisterID from WD. unsigned RegisterID; + // Physical register file that serves register RegisterID. + unsigned PRFID; + // True if this write implicitly clears the upper portion of RegisterID's // super-registers. bool ClearsSuperRegs; @@ -135,7 +138,7 @@ public: WriteState(const WriteDescriptor &Desc, unsigned RegID, bool clearsSuperRegs = false, bool writesZero = false) : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), - ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), + PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {} WriteState(const WriteState &Other) = default; @@ -144,6 +147,7 @@ public: int getCyclesLeft() const { return CyclesLeft; } unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; } unsigned getRegisterID() const { return RegisterID; } + unsigned getRegisterFileID() const { return PRFID; } unsigned getLatency() const { return WD->Latency; } void addUser(ReadState *Use, int ReadAdvance); @@ -168,6 +172,8 @@ public: IsEliminated = true; } + void setPRF(unsigned PRF) { PRFID = PRF; } + // On every cycle, update CyclesLeft and notify dependent users. void cycleEvent(); void onInstructionIssued(); @@ -185,6 +191,8 @@ class ReadState { const ReadDescriptor *RD; // Physical register identified associated to this read. unsigned RegisterID; + // Physical register file that serves register RegisterID. + unsigned PRFID; // Number of writes that contribute to the definition of RegisterID. // In the absence of partial register updates, the number of DependentWrites // cannot be more than one. @@ -201,18 +209,21 @@ class ReadState { // This field is set to true only if there are no dependent writes, and // there are no `CyclesLeft' to wait. bool IsReady; + // True if this is a read from a known zero register. + bool IsZero; // True if this register read is from a dependency-breaking instruction. bool IndependentFromDef; public: ReadState(const ReadDescriptor &Desc, unsigned RegID) - : RD(&Desc), RegisterID(RegID), DependentWrites(0), + : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0), CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true), - IndependentFromDef(false) {} + IsZero(false), IndependentFromDef(false) {} const ReadDescriptor &getDescriptor() const { return *RD; } unsigned getSchedClass() const { return RD->SchedClassID; } unsigned getRegisterID() const { return RegisterID; } + unsigned getRegisterFileID() const { return PRFID; } bool isReady() const { return IsReady; } bool isImplicitRead() const { return RD->isImplicitRead(); } @@ -226,6 +237,10 @@ public: DependentWrites = Writes; IsReady = !Writes; } + + bool isReadZero() const { return IsZero; } + void setReadZero() { IsZero = true; } + void setPRF(unsigned ID) { PRFID = ID; } }; /// A sequence of cycles. diff --git a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h index 3595f3122cc..29cace1022e 100644 --- a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h +++ b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h @@ -68,10 +68,6 @@ class DispatchStage final : public Stage { ArrayRef<unsigned> UsedPhysRegs, unsigned uOps) const; - void collectWrites(SmallVectorImpl<WriteRef> &Vec, unsigned RegID) const { - return PRF.collectWrites(Vec, RegID); - } - public: DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI, unsigned MaxDispatchWidth, RetireControlUnit &R, diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp index 71aec49ce77..6bc63a0db50 100644 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp +++ b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp @@ -173,6 +173,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, bool IsEliminated = WS.isEliminated(); bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated; const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + WS.setPRF(RRI.IndexPlusCost.first); if (RRI.RenameAs && RRI.RenameAs != RegID) { RegID = RRI.RenameAs; @@ -217,9 +218,9 @@ void RegisterFile::addRegisterWrite(WriteRef Write, RegisterMappings[*I].second.AliasRegID = 0U; } - // No physical registers are allocated for instructions that are optimized in - // hardware. For example, zero-latency data-dependency breaking instructions - // don't consume physical registers. + // No physical registers are allocated for instructions that are optimized + // in hardware. For example, zero-latency data-dependency breaking + // instructions don't consume physical registers. if (ShouldAllocatePhysRegs) allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); } @@ -288,7 +289,7 @@ void RegisterFile::removeRegisterWrite( } } -bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) { +bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()]; const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()]; @@ -349,15 +350,18 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) { } RMT.NumMoveEliminated++; - if (IsZeroMove) + if (IsZeroMove) { WS.setWriteZero(); + RS.setReadZero(); + } WS.setEliminated(); return true; } -void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes, - unsigned RegID) const { +void RegisterFile::collectWrites(const ReadState &RS, + SmallVectorImpl<WriteRef> &Writes) const { + unsigned RegID = RS.getRegisterID(); assert(RegID && RegID < RegisterMappings.size()); LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " << MRI.getName(RegID) << '\n'); @@ -379,11 +383,13 @@ void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes, } // Remove duplicate entries and resize the input vector. - sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { - return Lhs.getWriteState() < Rhs.getWriteState(); - }); - auto It = std::unique(Writes.begin(), Writes.end()); - Writes.resize(std::distance(Writes.begin(), It)); + if (Writes.size() > 1) { + sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) { + return Lhs.getWriteState() < Rhs.getWriteState(); + }); + auto It = std::unique(Writes.begin(), Writes.end()); + Writes.resize(std::distance(Writes.begin(), It)); + } LLVM_DEBUG({ for (const WriteRef &WR : Writes) { @@ -395,6 +401,20 @@ void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes, }); } +void RegisterFile::addRegisterRead(ReadState &RS, + SmallVectorImpl<WriteRef> &Defs) const { + unsigned RegID = RS.getRegisterID(); + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + RS.setPRF(RRI.IndexPlusCost.first); + if (RS.isIndependentFromDef()) + return; + + if (ZeroRegisters[RS.getRegisterID()]) + RS.setReadZero(); + collectWrites(RS, Defs); + RS.setDependentWrites(Defs.size()); +} + unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const { SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles()); diff --git a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp index 104446e711e..838dbad22e3 100644 --- a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp +++ b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp @@ -67,8 +67,9 @@ void DispatchStage::updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI) { SmallVector<WriteRef, 4> DependentWrites; - collectWrites(DependentWrites, RS.getRegisterID()); - RS.setDependentWrites(DependentWrites.size()); + // Collect all the dependent writes, and update RS internal state. + PRF.addRegisterRead(RS, DependentWrites); + // We know that this read depends on all the writes in DependentWrites. // For each write, check if we have ReadAdvance information, and use it // to figure out in how many cycles this read becomes available. @@ -116,10 +117,8 @@ Error DispatchStage::dispatch(InstRef IR) { // We also don't update data dependencies for instructions that have been // eliminated at register renaming stage. if (!IsEliminated) { - for (ReadState &RS : IS.getUses()) { - if (!RS.isIndependentFromDef()) - updateRAWDependencies(RS, STI); - } + for (ReadState &RS : IS.getUses()) + updateRAWDependencies(RS, STI); } // By default, a dependency-breaking zero-idiom is expected to be optimized @@ -127,8 +126,7 @@ Error DispatchStage::dispatch(InstRef IR) { // to the instruction. SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles()); for (WriteState &WS : IS.getDefs()) - PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), - RegisterFiles); + PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); // Reserve slots in the RCU, and notify the instruction that it has been // dispatched to the schedulers for execution. |

