diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-07-15 11:01:38 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-07-15 11:01:38 +0000 |
commit | ff630c2cdc7f805091e8d8cce40692bb9441fd3a (patch) | |
tree | c5a9574b8fa2836d9ad01248130a3b9f25bbfd76 /llvm/tools/llvm-mca/RegisterFile.cpp | |
parent | 0603bae41cf228ba6489d9aa320835abcce509e5 (diff) | |
download | bcm5719-llvm-ff630c2cdc7f805091e8d8cce40692bb9441fd3a.tar.gz bcm5719-llvm-ff630c2cdc7f805091e8d8cce40692bb9441fd3a.zip |
[llvm-mca][BtVer2] teach how to identify false dependencies on partially written
registers.
The goal of this patch is to improve the throughput analysis in llvm-mca for the
case where instructions perform partial register writes.
On x86, partial register writes are quite difficult to model, mainly because
different processors tend to implement different register merging schemes in
hardware.
When the code contains partial register writes, the IPC (instructions per
cycles) estimated by llvm-mca tends to diverge quite significantly from the
observed IPC (using perf).
Modern AMD processors (at least, from Bulldozer onwards) don't rename partial
registers. Quoting Agner Fog's microarchitecture.pdf:
" The processor always keeps the different parts of an integer register together.
For example, AL and AH are not treated as independent by the out-of-order
execution mechanism. An instruction that writes to part of a register will
therefore have a false dependence on any previous write to the same register or
any part of it."
This patch is a first important step towards improving the analysis of partial
register updates. It changes the semantic of RegisterFile descriptors in
tablegen, and teaches llvm-mca how to identify false dependences in the presence
of partial register writes (for more details: see the new code comments in
include/Target/TargetSchedule.h - class RegisterFile).
This patch doesn't address the case where a write to a part of a register is
followed by a read from the whole register. On Intel chips, high8 registers
(AH/BH/CH/DH)) can be stored in separate physical registers. However, a later
(dirty) read of the full register (example: AX/EAX) triggers a merge uOp, which
adds extra latency (and potentially affects the pipe usage).
This is a very interesting article on the subject with a very informative answer
from Peter Cordes:
https://stackoverflow.com/questions/45660139/how-exactly-do-partial-registers-on-haswell-skylake-perform-writing-al-seems-to
In future, the definition of RegisterFile can be extended with extra information
that may be used to identify delays caused by merge opcodes triggered by a dirty
read of a partial write.
Differential Revision: https://reviews.llvm.org/D49196
llvm-svn: 337123
Diffstat (limited to 'llvm/tools/llvm-mca/RegisterFile.cpp')
-rw-r--r-- | llvm/tools/llvm-mca/RegisterFile.cpp | 135 |
1 files changed, 96 insertions, 39 deletions
diff --git a/llvm/tools/llvm-mca/RegisterFile.cpp b/llvm/tools/llvm-mca/RegisterFile.cpp index 63fe0d2d72e..44de105b899 100644 --- a/llvm/tools/llvm-mca/RegisterFile.cpp +++ b/llvm/tools/llvm-mca/RegisterFile.cpp @@ -26,7 +26,8 @@ namespace mca { RegisterFile::RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri, unsigned NumRegs) - : MRI(mri), RegisterMappings(mri.getNumRegs(), {WriteRef(), {0, 0}}) { + : MRI(mri), RegisterMappings(mri.getNumRegs(), + {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) { initialize(SM, NumRegs); } @@ -71,34 +72,46 @@ void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries, // Special case where there is no register class identifier in the set. // An empty set of register classes means: this register file contains all // the physical registers specified by the target. - if (Entries.empty()) { - for (std::pair<WriteRef, IndexPlusCostPairTy> &Mapping : RegisterMappings) - Mapping.second = std::make_pair(RegisterFileIndex, 1U); + // We optimistically assume that a register can be renamed at the cost of a + // single physical register. The constructor of RegisterFile ensures that + // a RegisterMapping exists for each logical register defined by the Target. + if (Entries.empty()) return; - } // Now update the cost of individual registers. for (const MCRegisterCostEntry &RCE : Entries) { const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); for (const MCPhysReg Reg : RC) { - IndexPlusCostPairTy &Entry = RegisterMappings[Reg].second; - if (Entry.first) { + RegisterRenamingInfo &Entry = RegisterMappings[Reg].second; + IndexPlusCostPairTy &IPC = Entry.IndexPlusCost; + if (IPC.first && IPC.first != RegisterFileIndex) { // The only register file that is allowed to overlap is the default // register file at index #0. The analysis is inaccurate if register // files overlap. errs() << "warning: register " << MRI.getName(Reg) << " defined in multiple register files."; } - Entry.first = RegisterFileIndex; - Entry.second = RCE.Cost; + IPC = std::make_pair(RegisterFileIndex, RCE.Cost); + Entry.RenameAs = Reg; + + // Assume the same cost for each sub-register. + for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) { + RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second; + if (!OtherEntry.IndexPlusCost.first && + (!OtherEntry.RenameAs || + MRI.isSuperRegister(*I, OtherEntry.RenameAs))) { + OtherEntry.IndexPlusCost = IPC; + OtherEntry.RenameAs = Reg; + } + } } } } -void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry, +void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry, MutableArrayRef<unsigned> UsedPhysRegs) { - unsigned RegisterFileIndex = Entry.first; - unsigned Cost = Entry.second; + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; if (RegisterFileIndex) { RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; RMT.NumUsedPhysRegs += Cost; @@ -110,10 +123,10 @@ void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry, UsedPhysRegs[0] += Cost; } -void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry, +void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, MutableArrayRef<unsigned> FreedPhysRegs) { - unsigned RegisterFileIndex = Entry.first; - unsigned Cost = Entry.second; + unsigned RegisterFileIndex = Entry.IndexPlusCost.first; + unsigned Cost = Entry.IndexPlusCost.second; if (RegisterFileIndex) { RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; RMT.NumUsedPhysRegs -= Cost; @@ -128,12 +141,48 @@ void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry, void RegisterFile::addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs, bool ShouldAllocatePhysRegs) { - const WriteState &WS = *Write.getWriteState(); + WriteState &WS = *Write.getWriteState(); unsigned RegID = WS.getRegisterID(); assert(RegID && "Adding an invalid register definition?"); - RegisterMapping &Mapping = RegisterMappings[RegID]; - Mapping.first = Write; + LLVM_DEBUG({ + dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex() + << ", " << MRI.getName(RegID) << "]\n"; + }); + + // If RenameAs is equal to RegID, then RegID is subject to register renaming + // and false dependencies on RegID are all eliminated. + + // If RenameAs references the invalid register, then we optimistically assume + // that it can be renamed. In the absence of tablegen descriptors for register + // files, RenameAs is always set to the invalid register ID. In all other + // cases, RenameAs must be either equal to RegID, or it must reference a + // super-register of RegID. + + // If RenameAs is a super-register of RegID, then a write to RegID has always + // a false dependency on RenameAs. The only exception is for when the write + // implicitly clears the upper portion of the underlying register. + // If a write clears its super-registers, then it is renamed as `RenameAs`. + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + if (RRI.RenameAs && RRI.RenameAs != RegID) { + RegID = RRI.RenameAs; + const WriteRef &OtherWrite = RegisterMappings[RegID].first; + + if (!WS.clearsSuperRegisters()) { + // The processor keeps the definition of `RegID` together with register + // `RenameAs`. Since this partial write is not renamed, no physical + // register is allocated. + ShouldAllocatePhysRegs = false; + + if (OtherWrite.getSourceIndex() != Write.getSourceIndex()) { + // This partial write has a false dependency on RenameAs. + WS.setDependentWrite(OtherWrite.getWriteState()); + } + } + } + + // Update the mapping for register RegID including its sub-registers. + RegisterMappings[RegID].first = Write; for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) RegisterMappings[*I].first = Write; @@ -141,9 +190,8 @@ void RegisterFile::addRegisterWrite(WriteRef Write, // hardware. For example, zero-latency data-dependency breaking instructions // don't consume physical registers. if (ShouldAllocatePhysRegs) - allocatePhysRegs(Mapping.second, UsedPhysRegs); + allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs); - // If this is a partial update, then we are done. if (!WS.clearsSuperRegisters()) return; @@ -155,42 +203,50 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs, bool ShouldFreePhysRegs) { unsigned RegID = WS.getRegisterID(); - bool ShouldInvalidateSuperRegs = WS.clearsSuperRegisters(); assert(RegID != 0 && "Invalidating an already invalid register?"); - assert(WS.getCyclesLeft() != -512 && + assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && "Invalidating a write of unknown cycles!"); assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); - RegisterMapping &Mapping = RegisterMappings[RegID]; - WriteRef &WR = Mapping.first; - if (!WR.isValid()) - return; + + unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; + if (RenameAs && RenameAs != RegID) { + RegID = RenameAs; + + if (!WS.clearsSuperRegisters()) { + // Keep the definition of `RegID` together with register `RenameAs`. + ShouldFreePhysRegs = false; + } + } if (ShouldFreePhysRegs) - freePhysRegs(Mapping.second, FreedPhysRegs); + freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs); + WriteRef &WR = RegisterMappings[RegID].first; if (WR.getWriteState() == &WS) WR.invalidate(); for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WR = RegisterMappings[*I].first; - if (WR.getWriteState() == &WS) - WR.invalidate(); + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); } - if (!ShouldInvalidateSuperRegs) + if (!WS.clearsSuperRegisters()) return; for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) { - WR = RegisterMappings[*I].first; - if (WR.getWriteState() == &WS) - WR.invalidate(); + WriteRef &OtherWR = RegisterMappings[*I].first; + if (OtherWR.getWriteState() == &WS) + OtherWR.invalidate(); } } void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes, unsigned RegID) const { assert(RegID && RegID < RegisterMappings.size()); + LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " + << MRI.getName(RegID) << '\n'); const WriteRef &WR = RegisterMappings[RegID].first; if (WR.isValid()) Writes.push_back(WR); @@ -225,7 +281,8 @@ unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const { // Find how many new mappings must be created for each register file. for (const unsigned RegID : Regs) { - const IndexPlusCostPairTy &Entry = RegisterMappings[RegID].second; + const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; + const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; if (Entry.first) NumPhysRegs[Entry.first] += Entry.second; NumPhysRegs[0] += Entry.second; @@ -266,10 +323,10 @@ void RegisterFile::dump() const { const RegisterMapping &RM = RegisterMappings[I]; if (!RM.first.getWriteState()) continue; - const std::pair<unsigned, unsigned> &IndexPlusCost = RM.second; - dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << IndexPlusCost.first - << ", Cost=" << IndexPlusCost.second - << ", "; + const RegisterRenamingInfo &RRI = RM.second; + dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first + << ", Cost=" << RRI.IndexPlusCost.second + << ", RenameAs=" << RRI.RenameAs << ", "; RM.first.dump(); dbgs() << '\n'; } |