summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/RegisterFile.cpp
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-07-15 11:01:38 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-07-15 11:01:38 +0000
commitff630c2cdc7f805091e8d8cce40692bb9441fd3a (patch)
treec5a9574b8fa2836d9ad01248130a3b9f25bbfd76 /llvm/tools/llvm-mca/RegisterFile.cpp
parent0603bae41cf228ba6489d9aa320835abcce509e5 (diff)
downloadbcm5719-llvm-ff630c2cdc7f805091e8d8cce40692bb9441fd3a.tar.gz
bcm5719-llvm-ff630c2cdc7f805091e8d8cce40692bb9441fd3a.zip
[llvm-mca][BtVer2] teach how to identify false dependencies on partially written
registers. The goal of this patch is to improve the throughput analysis in llvm-mca for the case where instructions perform partial register writes. On x86, partial register writes are quite difficult to model, mainly because different processors tend to implement different register merging schemes in hardware. When the code contains partial register writes, the IPC (instructions per cycles) estimated by llvm-mca tends to diverge quite significantly from the observed IPC (using perf). Modern AMD processors (at least, from Bulldozer onwards) don't rename partial registers. Quoting Agner Fog's microarchitecture.pdf: " The processor always keeps the different parts of an integer register together. For example, AL and AH are not treated as independent by the out-of-order execution mechanism. An instruction that writes to part of a register will therefore have a false dependence on any previous write to the same register or any part of it." This patch is a first important step towards improving the analysis of partial register updates. It changes the semantic of RegisterFile descriptors in tablegen, and teaches llvm-mca how to identify false dependences in the presence of partial register writes (for more details: see the new code comments in include/Target/TargetSchedule.h - class RegisterFile). This patch doesn't address the case where a write to a part of a register is followed by a read from the whole register. On Intel chips, high8 registers (AH/BH/CH/DH)) can be stored in separate physical registers. However, a later (dirty) read of the full register (example: AX/EAX) triggers a merge uOp, which adds extra latency (and potentially affects the pipe usage). This is a very interesting article on the subject with a very informative answer from Peter Cordes: https://stackoverflow.com/questions/45660139/how-exactly-do-partial-registers-on-haswell-skylake-perform-writing-al-seems-to In future, the definition of RegisterFile can be extended with extra information that may be used to identify delays caused by merge opcodes triggered by a dirty read of a partial write. Differential Revision: https://reviews.llvm.org/D49196 llvm-svn: 337123
Diffstat (limited to 'llvm/tools/llvm-mca/RegisterFile.cpp')
-rw-r--r--llvm/tools/llvm-mca/RegisterFile.cpp135
1 files changed, 96 insertions, 39 deletions
diff --git a/llvm/tools/llvm-mca/RegisterFile.cpp b/llvm/tools/llvm-mca/RegisterFile.cpp
index 63fe0d2d72e..44de105b899 100644
--- a/llvm/tools/llvm-mca/RegisterFile.cpp
+++ b/llvm/tools/llvm-mca/RegisterFile.cpp
@@ -26,7 +26,8 @@ namespace mca {
RegisterFile::RegisterFile(const llvm::MCSchedModel &SM,
const llvm::MCRegisterInfo &mri, unsigned NumRegs)
- : MRI(mri), RegisterMappings(mri.getNumRegs(), {WriteRef(), {0, 0}}) {
+ : MRI(mri), RegisterMappings(mri.getNumRegs(),
+ {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) {
initialize(SM, NumRegs);
}
@@ -71,34 +72,46 @@ void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
// Special case where there is no register class identifier in the set.
// An empty set of register classes means: this register file contains all
// the physical registers specified by the target.
- if (Entries.empty()) {
- for (std::pair<WriteRef, IndexPlusCostPairTy> &Mapping : RegisterMappings)
- Mapping.second = std::make_pair(RegisterFileIndex, 1U);
+ // We optimistically assume that a register can be renamed at the cost of a
+ // single physical register. The constructor of RegisterFile ensures that
+ // a RegisterMapping exists for each logical register defined by the Target.
+ if (Entries.empty())
return;
- }
// Now update the cost of individual registers.
for (const MCRegisterCostEntry &RCE : Entries) {
const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
for (const MCPhysReg Reg : RC) {
- IndexPlusCostPairTy &Entry = RegisterMappings[Reg].second;
- if (Entry.first) {
+ RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
+ IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
+ if (IPC.first && IPC.first != RegisterFileIndex) {
// The only register file that is allowed to overlap is the default
// register file at index #0. The analysis is inaccurate if register
// files overlap.
errs() << "warning: register " << MRI.getName(Reg)
<< " defined in multiple register files.";
}
- Entry.first = RegisterFileIndex;
- Entry.second = RCE.Cost;
+ IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
+ Entry.RenameAs = Reg;
+
+ // Assume the same cost for each sub-register.
+ for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
+ RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
+ if (!OtherEntry.IndexPlusCost.first &&
+ (!OtherEntry.RenameAs ||
+ MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
+ OtherEntry.IndexPlusCost = IPC;
+ OtherEntry.RenameAs = Reg;
+ }
+ }
}
}
}
-void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
+void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
MutableArrayRef<unsigned> UsedPhysRegs) {
- unsigned RegisterFileIndex = Entry.first;
- unsigned Cost = Entry.second;
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
if (RegisterFileIndex) {
RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
RMT.NumUsedPhysRegs += Cost;
@@ -110,10 +123,10 @@ void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
UsedPhysRegs[0] += Cost;
}
-void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
+void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
MutableArrayRef<unsigned> FreedPhysRegs) {
- unsigned RegisterFileIndex = Entry.first;
- unsigned Cost = Entry.second;
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
if (RegisterFileIndex) {
RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
RMT.NumUsedPhysRegs -= Cost;
@@ -128,12 +141,48 @@ void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
void RegisterFile::addRegisterWrite(WriteRef Write,
MutableArrayRef<unsigned> UsedPhysRegs,
bool ShouldAllocatePhysRegs) {
- const WriteState &WS = *Write.getWriteState();
+ WriteState &WS = *Write.getWriteState();
unsigned RegID = WS.getRegisterID();
assert(RegID && "Adding an invalid register definition?");
- RegisterMapping &Mapping = RegisterMappings[RegID];
- Mapping.first = Write;
+ LLVM_DEBUG({
+ dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
+ << ", " << MRI.getName(RegID) << "]\n";
+ });
+
+ // If RenameAs is equal to RegID, then RegID is subject to register renaming
+ // and false dependencies on RegID are all eliminated.
+
+ // If RenameAs references the invalid register, then we optimistically assume
+ // that it can be renamed. In the absence of tablegen descriptors for register
+ // files, RenameAs is always set to the invalid register ID. In all other
+ // cases, RenameAs must be either equal to RegID, or it must reference a
+ // super-register of RegID.
+
+ // If RenameAs is a super-register of RegID, then a write to RegID has always
+ // a false dependency on RenameAs. The only exception is for when the write
+ // implicitly clears the upper portion of the underlying register.
+ // If a write clears its super-registers, then it is renamed as `RenameAs`.
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ if (RRI.RenameAs && RRI.RenameAs != RegID) {
+ RegID = RRI.RenameAs;
+ const WriteRef &OtherWrite = RegisterMappings[RegID].first;
+
+ if (!WS.clearsSuperRegisters()) {
+ // The processor keeps the definition of `RegID` together with register
+ // `RenameAs`. Since this partial write is not renamed, no physical
+ // register is allocated.
+ ShouldAllocatePhysRegs = false;
+
+ if (OtherWrite.getSourceIndex() != Write.getSourceIndex()) {
+ // This partial write has a false dependency on RenameAs.
+ WS.setDependentWrite(OtherWrite.getWriteState());
+ }
+ }
+ }
+
+ // Update the mapping for register RegID including its sub-registers.
+ RegisterMappings[RegID].first = Write;
for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
RegisterMappings[*I].first = Write;
@@ -141,9 +190,8 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// hardware. For example, zero-latency data-dependency breaking instructions
// don't consume physical registers.
if (ShouldAllocatePhysRegs)
- allocatePhysRegs(Mapping.second, UsedPhysRegs);
+ allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
- // If this is a partial update, then we are done.
if (!WS.clearsSuperRegisters())
return;
@@ -155,42 +203,50 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS,
MutableArrayRef<unsigned> FreedPhysRegs,
bool ShouldFreePhysRegs) {
unsigned RegID = WS.getRegisterID();
- bool ShouldInvalidateSuperRegs = WS.clearsSuperRegisters();
assert(RegID != 0 && "Invalidating an already invalid register?");
- assert(WS.getCyclesLeft() != -512 &&
+ assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
"Invalidating a write of unknown cycles!");
assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
- RegisterMapping &Mapping = RegisterMappings[RegID];
- WriteRef &WR = Mapping.first;
- if (!WR.isValid())
- return;
+
+ unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
+ if (RenameAs && RenameAs != RegID) {
+ RegID = RenameAs;
+
+ if (!WS.clearsSuperRegisters()) {
+ // Keep the definition of `RegID` together with register `RenameAs`.
+ ShouldFreePhysRegs = false;
+ }
+ }
if (ShouldFreePhysRegs)
- freePhysRegs(Mapping.second, FreedPhysRegs);
+ freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
+ WriteRef &WR = RegisterMappings[RegID].first;
if (WR.getWriteState() == &WS)
WR.invalidate();
for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WR = RegisterMappings[*I].first;
- if (WR.getWriteState() == &WS)
- WR.invalidate();
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
}
- if (!ShouldInvalidateSuperRegs)
+ if (!WS.clearsSuperRegisters())
return;
for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WR = RegisterMappings[*I].first;
- if (WR.getWriteState() == &WS)
- WR.invalidate();
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
}
}
void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
unsigned RegID) const {
assert(RegID && RegID < RegisterMappings.size());
+ LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
+ << MRI.getName(RegID) << '\n');
const WriteRef &WR = RegisterMappings[RegID].first;
if (WR.isValid())
Writes.push_back(WR);
@@ -225,7 +281,8 @@ unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
// Find how many new mappings must be created for each register file.
for (const unsigned RegID : Regs) {
- const IndexPlusCostPairTy &Entry = RegisterMappings[RegID].second;
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
if (Entry.first)
NumPhysRegs[Entry.first] += Entry.second;
NumPhysRegs[0] += Entry.second;
@@ -266,10 +323,10 @@ void RegisterFile::dump() const {
const RegisterMapping &RM = RegisterMappings[I];
if (!RM.first.getWriteState())
continue;
- const std::pair<unsigned, unsigned> &IndexPlusCost = RM.second;
- dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << IndexPlusCost.first
- << ", Cost=" << IndexPlusCost.second
- << ", ";
+ const RegisterRenamingInfo &RRI = RM.second;
+ dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first
+ << ", Cost=" << RRI.IndexPlusCost.second
+ << ", RenameAs=" << RRI.RenameAs << ", ";
RM.first.dump();
dbgs() << '\n';
}
OpenPOWER on IntegriCloud