summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-05-14 15:08:22 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-05-14 15:08:22 +0000
commit8ea3a34e390fd6b5a5754ab28a455cfac04e324b (patch)
treed35e7275c46aed0e0a699c5c407abe3dceef0b13 /llvm/tools/llvm-mca
parent2cb7cf8e87aa873e2a40fba2e93737556a44b477 (diff)
downloadbcm5719-llvm-8ea3a34e390fd6b5a5754ab28a455cfac04e324b.tar.gz
bcm5719-llvm-8ea3a34e390fd6b5a5754ab28a455cfac04e324b.zip
[llvm-mca] Improved support for dependency-breaking instructions.
The tool assumes that a zero-latency instruction that doesn't consume hardware resources is an optimizable dependency-breaking instruction. That means, it doesn't have to wait on register input operands, and it doesn't consume any physical register. The PRF knows how to optimize it at register renaming stage. llvm-svn: 332249
Diffstat (limited to 'llvm/tools/llvm-mca')
-rw-r--r--llvm/tools/llvm-mca/Dispatch.cpp49
-rw-r--r--llvm/tools/llvm-mca/Dispatch.h34
-rw-r--r--llvm/tools/llvm-mca/Instruction.h3
-rw-r--r--llvm/tools/llvm-mca/Scheduler.cpp3
4 files changed, 54 insertions, 35 deletions
diff --git a/llvm/tools/llvm-mca/Dispatch.cpp b/llvm/tools/llvm-mca/Dispatch.cpp
index 2c0227d33a9..1a3fb7e9476 100644
--- a/llvm/tools/llvm-mca/Dispatch.cpp
+++ b/llvm/tools/llvm-mca/Dispatch.cpp
@@ -91,8 +91,8 @@ void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
}
}
-void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
- MutableArrayRef<unsigned> UsedPhysRegs) {
+void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
+ MutableArrayRef<unsigned> UsedPhysRegs) {
unsigned RegisterFileIndex = Entry.first;
unsigned Cost = Entry.second;
if (RegisterFileIndex) {
@@ -106,8 +106,8 @@ void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
UsedPhysRegs[0] += Cost;
}
-void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
- MutableArrayRef<unsigned> FreedPhysRegs) {
+void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
+ MutableArrayRef<unsigned> FreedPhysRegs) {
unsigned RegisterFileIndex = Entry.first;
unsigned Cost = Entry.second;
if (RegisterFileIndex) {
@@ -121,8 +121,9 @@ void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
FreedPhysRegs[0] += Cost;
}
-void RegisterFile::addRegisterMapping(WriteState &WS,
- MutableArrayRef<unsigned> UsedPhysRegs) {
+void RegisterFile::addRegisterWrite(WriteState &WS,
+ MutableArrayRef<unsigned> UsedPhysRegs,
+ bool ShouldAllocatePhysRegs) {
unsigned RegID = WS.getRegisterID();
assert(RegID && "Adding an invalid register definition?");
@@ -131,7 +132,11 @@ void RegisterFile::addRegisterMapping(WriteState &WS,
for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
RegisterMappings[*I].first = &WS;
- createNewMappings(Mapping.second, UsedPhysRegs);
+ // No physical registers are allocated for instructions that are optimized in
+ // hardware. For example, zero-latency data-dependency breaking instructions
+ // don't consume physical registers.
+ if (ShouldAllocatePhysRegs)
+ allocatePhysRegs(Mapping.second, UsedPhysRegs);
// If this is a partial update, then we are done.
if (!WS.fullyUpdatesSuperRegs())
@@ -141,8 +146,9 @@ void RegisterFile::addRegisterMapping(WriteState &WS,
RegisterMappings[*I].first = &WS;
}
-void RegisterFile::invalidateRegisterMapping(
- const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
+void RegisterFile::removeRegisterWrite(
+ const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs,
+ bool ShouldFreePhysRegs) {
unsigned RegID = WS.getRegisterID();
bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs();
@@ -154,7 +160,8 @@ void RegisterFile::invalidateRegisterMapping(
if (!Mapping.first)
return;
- removeMappings(Mapping.second, FreedPhysRegs);
+ if (ShouldFreePhysRegs)
+ freePhysRegs(Mapping.second, FreedPhysRegs);
if (Mapping.first == &WS)
Mapping.first = nullptr;
@@ -261,8 +268,10 @@ void DispatchUnit::notifyInstructionDispatched(const InstRef &IR,
void DispatchUnit::notifyInstructionRetired(const InstRef &IR) {
LLVM_DEBUG(dbgs() << "[E] Instruction Retired: " << IR << '\n');
SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles());
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
for (const std::unique_ptr<WriteState> &WS : IR.getInstruction()->getDefs())
- RAT->invalidateRegisterMapping(*WS.get(), FreedRegs);
+ RAT->removeRegisterWrite(*WS.get(), FreedRegs, !Desc.isZeroLatency());
Owner->notifyInstructionEvent(HWInstructionRetiredEvent(IR, FreedRegs));
Owner->eraseInstruction(IR);
}
@@ -339,18 +348,22 @@ void DispatchUnit::dispatch(InstRef IR, const MCSubtargetInfo &STI) {
AvailableEntries -= NumMicroOps;
}
- // Update RAW dependencies if this instruction is not a zero-latency
- // instruction. The assumption is that a zero-latency instruction doesn't
- // require to be issued to the scheduler for execution. More importantly, it
- // doesn't have to wait on the register input operands.
- if (Desc.MaxLatency || !Desc.Resources.empty())
+ // A dependency-breaking instruction doesn't have to wait on the register
+ // input operands, and it is often optimized at register renaming stage.
+ // Update RAW dependencies if this instruction is not a dependency-breaking
+ // instruction. A dependency-breaking instruction is a zero-latency
+ // instruction that doesn't consume hardware resources.
+ // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
+ if (!Desc.isZeroLatency())
for (std::unique_ptr<ReadState> &RS : IS.getUses())
updateRAWDependencies(*RS, STI);
- // Allocate new mappings.
+ // By default, a dependency-breaking zero-latency instruction is expected to
+ // be optimized at register renaming stage. That means, no physical register
+ // is allocated to the instruction.
SmallVector<unsigned, 4> RegisterFiles(RAT->getNumRegisterFiles());
for (std::unique_ptr<WriteState> &WS : IS.getDefs())
- RAT->addRegisterMapping(*WS, RegisterFiles);
+ RAT->addRegisterWrite(*WS, RegisterFiles, !Desc.isZeroLatency());
// Reserve slots in the RCU, and notify the instruction that it has been
// dispatched to the schedulers for execution.
diff --git a/llvm/tools/llvm-mca/Dispatch.h b/llvm/tools/llvm-mca/Dispatch.h
index b6df6ad2fdb..74b98f1b7ab 100644
--- a/llvm/tools/llvm-mca/Dispatch.h
+++ b/llvm/tools/llvm-mca/Dispatch.h
@@ -104,14 +104,14 @@ class RegisterFile {
// Allocates register mappings in register file specified by the
// IndexPlusCostPairTy object. This method is called from addRegisterMapping.
- void createNewMappings(IndexPlusCostPairTy IPC,
- llvm::MutableArrayRef<unsigned> UsedPhysRegs);
+ void allocatePhysRegs(IndexPlusCostPairTy IPC,
+ llvm::MutableArrayRef<unsigned> UsedPhysRegs);
// Removes a previously allocated mapping from the register file referenced
// by the IndexPlusCostPairTy object. This method is called from
// invalidateRegisterMapping.
- void removeMappings(IndexPlusCostPairTy IPC,
- llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+ void freePhysRegs(IndexPlusCostPairTy IPC,
+ llvm::MutableArrayRef<unsigned> FreedPhysRegs);
// Create an instance of RegisterMappingTracker for every register file
// specified by the processor model.
@@ -126,17 +126,21 @@ public:
initialize(SM, NumRegs);
}
- // Creates a new register mapping for RegID.
- // This reserves a microarchitectural register in every register file that
- // contains RegID.
- void addRegisterMapping(WriteState &WS,
- llvm::MutableArrayRef<unsigned> UsedPhysRegs);
-
- // Invalidates register mappings associated to the input WriteState object.
- // This releases previously allocated mappings for the physical register
- // associated to the WriteState.
- void invalidateRegisterMapping(const WriteState &WS,
- llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+ // This method updates the data dependency graph by inserting a new register
+ // definition. This method is also responsible for updating the number of used
+ // physical registers in the register file(s). The number of physical
+ // registers is updated only if flag ShouldAllocatePhysRegs is set.
+ void addRegisterWrite(WriteState &WS,
+ llvm::MutableArrayRef<unsigned> UsedPhysRegs,
+ bool ShouldAllocatePhysRegs = true);
+
+ // Updates the data dependency graph by removing a write. It also updates the
+ // internal state of the register file(s) by freeing physical registers.
+ // The number of physical registers is updated only if flag ShouldFreePhysRegs
+ // is set.
+ void removeRegisterWrite(const WriteState &WS,
+ llvm::MutableArrayRef<unsigned> FreedPhysRegs,
+ bool ShouldFreePhysRegs = true);
// Checks if there are enough microarchitectural registers in the register
// files. Returns a "response mask" where each bit is the response from a
diff --git a/llvm/tools/llvm-mca/Instruction.h b/llvm/tools/llvm-mca/Instruction.h
index e90f515d772..f68951c4118 100644
--- a/llvm/tools/llvm-mca/Instruction.h
+++ b/llvm/tools/llvm-mca/Instruction.h
@@ -269,6 +269,9 @@ struct InstrDesc {
bool MayLoad;
bool MayStore;
bool HasSideEffects;
+
+ // A zero latency instruction doesn't consume any scheduler resources.
+ bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
};
/// An instruction dispatched to the out-of-order backend.
diff --git a/llvm/tools/llvm-mca/Scheduler.cpp b/llvm/tools/llvm-mca/Scheduler.cpp
index 4abdc97bb0e..3ddcf758363 100644
--- a/llvm/tools/llvm-mca/Scheduler.cpp
+++ b/llvm/tools/llvm-mca/Scheduler.cpp
@@ -260,13 +260,12 @@ void Scheduler::scheduleInstruction(InstRef &IR) {
// targets, zero-idiom instructions (for example: a xor that clears the value
// of a register) are treated speacially, and are often eliminated at register
// renaming stage.
- bool IsZeroLatency = !Desc.MaxLatency && Desc.Resources.empty();
// Instructions that use an in-order dispatch/issue processor resource must be
// issued immediately to the pipeline(s). Any other in-order buffered
// resources (i.e. BufferSize=1) is consumed.
- if (!IsZeroLatency && !Resources->mustIssueImmediately(Desc)) {
+ if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding " << IR
<< " to the Ready Queue\n");
ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();
OpenPOWER on IntegriCloud