summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-11-22 12:48:57 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-11-22 12:48:57 +0000
commit1cb8a3c690ab86925ab27b394b66f08a3e69fa15 (patch)
tree7162042c5a372b8735cba1a5bb8b04dd3f985064
parentb565e6093bc349f935a3944c0c8667e9bfa7927c (diff)
downloadbcm5719-llvm-1cb8a3c690ab86925ab27b394b66f08a3e69fa15.tar.gz
bcm5719-llvm-1cb8a3c690ab86925ab27b394b66f08a3e69fa15.zip
[llvm-mca] Fix an invalid memory read introduced by r346487.
This patch fixes an invalid memory read introduced by r346487. Before this patch, partial register write had to query the latency of the dependent full register write by calling a method on the full write descriptor. However, if the full write is from an already retired instruction, chances are that the EntryStage already reclaimed its memory. In some parial register write tests, valgrind was reporting an invalid memory read. This change fixes the invalid memory access problem. Writes are now responsible for tracking dependent partial register writes, and notify them in the event of instruction issued. That means, partial register writes no longer need to query their associated full write to check when they are ready to execute. Added test X86/BtVer2/partial-reg-update-7.s llvm-svn: 347459
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s104
-rw-r--r--llvm/tools/llvm-mca/include/Instruction.h28
-rw-r--r--llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp6
-rw-r--r--llvm/tools/llvm-mca/lib/Instruction.cpp31
4 files changed, 153 insertions, 16 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s
new file mode 100644
index 00000000000..720a1ed1440
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s
@@ -0,0 +1,104 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=5 < %s | FileCheck %s
+
+sete %r9b
+movzbl %al, %eax
+shll $2, %eax
+imull %ecx, %eax
+cmpl $1025, %eax
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.19
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 sete %r9b
+# CHECK-NEXT: 1 1 0.50 movzbl %al, %eax
+# CHECK-NEXT: 1 1 0.50 shll $2, %eax
+# CHECK-NEXT: 2 3 1.00 imull %ecx, %eax
+# CHECK-NEXT: 1 1 0.50 cmpl $1025, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: 2.00 3.00 - - - - - - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - sete %r9b
+# CHECK-NEXT: 0.01 0.99 - - - - - - - - - - - - movzbl %al, %eax
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - shll $2, %eax
+# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %eax
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - cmpl $1025, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeER . . . . . . sete %r9b
+# CHECK-NEXT: [0,1] DeER . . . . . . movzbl %al, %eax
+# CHECK-NEXT: [0,2] .DeER. . . . . . shll $2, %eax
+# CHECK-NEXT: [0,3] . DeeeER . . . . . imull %ecx, %eax
+# CHECK-NEXT: [0,4] . D==eER . . . . . cmpl $1025, %eax
+# CHECK-NEXT: [1,0] . D===eER. . . . . sete %r9b
+# CHECK-NEXT: [1,1] . D=eE-R. . . . . movzbl %al, %eax
+# CHECK-NEXT: [1,2] . D==eE-R . . . . shll $2, %eax
+# CHECK-NEXT: [1,3] . D==eeeER . . . . imull %ecx, %eax
+# CHECK-NEXT: [1,4] . .D====eER . . . . cmpl $1025, %eax
+# CHECK-NEXT: [2,0] . .D=====eER. . . . sete %r9b
+# CHECK-NEXT: [2,1] . . D===eE-R. . . . movzbl %al, %eax
+# CHECK-NEXT: [2,2] . . D====eE-R . . . shll $2, %eax
+# CHECK-NEXT: [2,3] . . D====eeeER . . . imull %ecx, %eax
+# CHECK-NEXT: [2,4] . . D======eER . . . cmpl $1025, %eax
+# CHECK-NEXT: [3,0] . . D=======eER. . . sete %r9b
+# CHECK-NEXT: [3,1] . . D=====eE-R. . . movzbl %al, %eax
+# CHECK-NEXT: [3,2] . . D======eE-R . . shll $2, %eax
+# CHECK-NEXT: [3,3] . . .D======eeeER . . imull %ecx, %eax
+# CHECK-NEXT: [3,4] . . . D========eER . . cmpl $1025, %eax
+# CHECK-NEXT: [4,0] . . . D=========eER. . sete %r9b
+# CHECK-NEXT: [4,1] . . . D=======eE-R. . movzbl %al, %eax
+# CHECK-NEXT: [4,2] . . . D========eE-R . shll $2, %eax
+# CHECK-NEXT: [4,3] . . . D========eeeER. imull %ecx, %eax
+# CHECK-NEXT: [4,4] . . . D==========eER cmpl $1025, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 5 5.8 0.2 0.0 sete %r9b
+# CHECK-NEXT: 1. 5 4.2 0.2 0.8 movzbl %al, %eax
+# CHECK-NEXT: 2. 5 5.0 0.0 0.8 shll $2, %eax
+# CHECK-NEXT: 3. 5 5.0 0.0 0.0 imull %ecx, %eax
+# CHECK-NEXT: 4. 5 7.0 0.0 0.0 cmpl $1025, %eax
diff --git a/llvm/tools/llvm-mca/include/Instruction.h b/llvm/tools/llvm-mca/include/Instruction.h
index 7407283bca2..2e676088408 100644
--- a/llvm/tools/llvm-mca/include/Instruction.h
+++ b/llvm/tools/llvm-mca/include/Instruction.h
@@ -123,8 +123,10 @@ class WriteState {
// that we don't break the WAW, and the two writes can be merged together.
const WriteState *DependentWrite;
- // Number of writes that are in a WAW dependency with this write.
- unsigned NumWriteUsers;
+ // A partial write that is in a false dependency with this write.
+ WriteState *PartialWrite;
+
+ unsigned DependentWriteCyclesLeft;
// A list of dependent reads. Users is a set of dependent
// reads. A dependent read is added to the set only if CyclesLeft
@@ -139,7 +141,8 @@ public:
bool clearsSuperRegs = false, bool writesZero = false)
: WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
- IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
+ IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
+ DependentWriteCyclesLeft(0) {}
WriteState(const WriteState &Other) = default;
WriteState &operator=(const WriteState &Other) = default;
@@ -151,8 +154,17 @@ public:
unsigned getLatency() const { return WD->Latency; }
void addUser(ReadState *Use, int ReadAdvance);
+ void addUser(WriteState *Use);
+
+ unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; }
+
+ unsigned getNumUsers() const {
+ unsigned NumUsers = Users.size();
+ if (PartialWrite)
+ ++NumUsers;
+ return NumUsers;
+ }
- unsigned getNumUsers() const { return Users.size() + NumWriteUsers; }
bool clearsSuperRegisters() const { return ClearsSuperRegs; }
bool isWriteZero() const { return WritesZero; }
bool isEliminated() const { return IsEliminated; }
@@ -161,10 +173,12 @@ public:
}
const WriteState *getDependentWrite() const { return DependentWrite; }
- void setDependentWrite(WriteState *Other) {
- DependentWrite = Other;
- ++Other->NumWriteUsers;
+ void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
+ void writeStartEvent(unsigned Cycles) {
+ DependentWriteCyclesLeft = Cycles;
+ DependentWrite = nullptr;
}
+
void setWriteZero() { WritesZero = true; }
void setEliminated() {
assert(Users.empty() && "Write is in an inconsistent state.");
diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
index 6bc63a0db50..f96e4cab4b9 100644
--- a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
+++ b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
@@ -185,11 +185,11 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// register is allocated.
ShouldAllocatePhysRegs = false;
- if (OtherWrite.getWriteState() &&
- (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
+ WriteState *OtherWS = OtherWrite.getWriteState();
+ if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
// This partial write has a false dependency on RenameAs.
assert(!IsEliminated && "Unexpected partial update!");
- WS.setDependentWrite(OtherWrite.getWriteState());
+ OtherWS->addUser(&WS);
}
}
}
diff --git a/llvm/tools/llvm-mca/lib/Instruction.cpp b/llvm/tools/llvm-mca/lib/Instruction.cpp
index 832a6199f00..5c46ee995fe 100644
--- a/llvm/tools/llvm-mca/lib/Instruction.cpp
+++ b/llvm/tools/llvm-mca/lib/Instruction.cpp
@@ -49,6 +49,10 @@ void WriteState::onInstructionIssued() {
unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
RS->writeStartEvent(ReadCycles);
}
+
+ // Notify any writes that are in a false dependency with this write.
+ if (PartialWrite)
+ PartialWrite->writeStartEvent(CyclesLeft);
}
void WriteState::addUser(ReadState *User, int ReadAdvance) {
@@ -65,12 +69,26 @@ void WriteState::addUser(ReadState *User, int ReadAdvance) {
Users.insert(NewPair);
}
+void WriteState::addUser(WriteState *User) {
+ if (CyclesLeft != UNKNOWN_CYCLES) {
+ User->writeStartEvent(std::max(0, CyclesLeft));
+ return;
+ }
+
+ assert(!PartialWrite && "PartialWrite already set!");
+ PartialWrite = User;
+ User->setDependentWrite(this);
+}
+
void WriteState::cycleEvent() {
// Note: CyclesLeft can be a negative number. It is an error to
// make it an unsigned quantity because users of this write may
// specify a negative ReadAdvance.
if (CyclesLeft != UNKNOWN_CYCLES)
CyclesLeft--;
+
+ if (DependentWriteCyclesLeft)
+ DependentWriteCyclesLeft--;
}
void ReadState::cycleEvent() {
@@ -143,13 +161,11 @@ void Instruction::update() {
// A partial register write cannot complete before a dependent write.
auto IsDefReady = [&](const WriteState &Def) {
- if (const WriteState *Write = Def.getDependentWrite()) {
- int WriteLatency = Write->getCyclesLeft();
- if (WriteLatency == UNKNOWN_CYCLES)
- return false;
- return static_cast<unsigned>(WriteLatency) < getLatency();
+ if (!Def.getDependentWrite()) {
+ unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
+ return !CyclesLeft || CyclesLeft < getLatency();
}
- return true;
+ return false;
};
if (all_of(getDefs(), IsDefReady))
@@ -164,6 +180,9 @@ void Instruction::cycleEvent() {
for (ReadState &Use : getUses())
Use.cycleEvent();
+ for (WriteState &Def : getDefs())
+ Def.cycleEvent();
+
update();
return;
}
OpenPOWER on IntegriCloud