diff options
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s | 104 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/include/Instruction.h | 28 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp | 6 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/lib/Instruction.cpp | 31 |
4 files changed, 153 insertions, 16 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s new file mode 100644 index 00000000000..720a1ed1440 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=5 < %s | FileCheck %s + +sete %r9b +movzbl %al, %eax +shll $2, %eax +imull %ecx, %eax +cmpl $1025, %eax + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.19 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 sete %r9b +# CHECK-NEXT: 1 1 0.50 movzbl %al, %eax +# CHECK-NEXT: 1 1 0.50 shll $2, %eax +# CHECK-NEXT: 2 3 1.00 imull %ecx, %eax +# CHECK-NEXT: 1 1 0.50 cmpl $1025, %eax + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.00 3.00 - - - - - - 1.00 - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - sete %r9b +# CHECK-NEXT: 0.01 0.99 - - - - - - - - - - - - movzbl %al, %eax +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - shll $2, %eax +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %eax +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - cmpl $1025, %eax + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 + +# CHECK: [0,0] DeER . . . . . . sete %r9b +# CHECK-NEXT: [0,1] DeER . . . . . . movzbl %al, %eax +# CHECK-NEXT: [0,2] .DeER. . . . . . shll $2, %eax +# CHECK-NEXT: [0,3] . DeeeER . . . . . imull %ecx, %eax +# CHECK-NEXT: [0,4] . D==eER . . . . . cmpl $1025, %eax +# CHECK-NEXT: [1,0] . D===eER. . . . . sete %r9b +# CHECK-NEXT: [1,1] . D=eE-R. . . . . movzbl %al, %eax +# CHECK-NEXT: [1,2] . D==eE-R . . . . shll $2, %eax +# CHECK-NEXT: [1,3] . D==eeeER . . . . imull %ecx, %eax +# CHECK-NEXT: [1,4] . .D====eER . . . . cmpl $1025, %eax +# CHECK-NEXT: [2,0] . .D=====eER. . . . sete %r9b +# CHECK-NEXT: [2,1] . . D===eE-R. . . . movzbl %al, %eax +# CHECK-NEXT: [2,2] . . D====eE-R . . . shll $2, %eax +# CHECK-NEXT: [2,3] . . D====eeeER . . . imull %ecx, %eax +# CHECK-NEXT: [2,4] . . D======eER . . . cmpl $1025, %eax +# CHECK-NEXT: [3,0] . . D=======eER. . . sete %r9b +# CHECK-NEXT: [3,1] . . D=====eE-R. . . movzbl %al, %eax +# CHECK-NEXT: [3,2] . . D======eE-R . . shll $2, %eax +# CHECK-NEXT: [3,3] . . .D======eeeER . . imull %ecx, %eax +# CHECK-NEXT: [3,4] . . . D========eER . . cmpl $1025, %eax +# CHECK-NEXT: [4,0] . . . D=========eER. . sete %r9b +# CHECK-NEXT: [4,1] . . . D=======eE-R. . movzbl %al, %eax +# CHECK-NEXT: [4,2] . . . D========eE-R . shll $2, %eax +# CHECK-NEXT: [4,3] . . . D========eeeER. imull %ecx, %eax +# CHECK-NEXT: [4,4] . . . D==========eER cmpl $1025, %eax + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 5 5.8 0.2 0.0 sete %r9b +# CHECK-NEXT: 1. 5 4.2 0.2 0.8 movzbl %al, %eax +# CHECK-NEXT: 2. 5 5.0 0.0 0.8 shll $2, %eax +# CHECK-NEXT: 3. 5 5.0 0.0 0.0 imull %ecx, %eax +# CHECK-NEXT: 4. 5 7.0 0.0 0.0 cmpl $1025, %eax diff --git a/llvm/tools/llvm-mca/include/Instruction.h b/llvm/tools/llvm-mca/include/Instruction.h index 7407283bca2..2e676088408 100644 --- a/llvm/tools/llvm-mca/include/Instruction.h +++ b/llvm/tools/llvm-mca/include/Instruction.h @@ -123,8 +123,10 @@ class WriteState { // that we don't break the WAW, and the two writes can be merged together. const WriteState *DependentWrite; - // Number of writes that are in a WAW dependency with this write. - unsigned NumWriteUsers; + // A partial write that is in a false dependency with this write. + WriteState *PartialWrite; + + unsigned DependentWriteCyclesLeft; // A list of dependent reads. Users is a set of dependent // reads. A dependent read is added to the set only if CyclesLeft @@ -139,7 +141,8 @@ public: bool clearsSuperRegs = false, bool writesZero = false) : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), - IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {} + IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr), + DependentWriteCyclesLeft(0) {} WriteState(const WriteState &Other) = default; WriteState &operator=(const WriteState &Other) = default; @@ -151,8 +154,17 @@ public: unsigned getLatency() const { return WD->Latency; } void addUser(ReadState *Use, int ReadAdvance); + void addUser(WriteState *Use); + + unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; } + + unsigned getNumUsers() const { + unsigned NumUsers = Users.size(); + if (PartialWrite) + ++NumUsers; + return NumUsers; + } - unsigned getNumUsers() const { return Users.size() + NumWriteUsers; } bool clearsSuperRegisters() const { return ClearsSuperRegs; } bool isWriteZero() const { return WritesZero; } bool isEliminated() const { return IsEliminated; } @@ -161,10 +173,12 @@ public: } const WriteState *getDependentWrite() const { return DependentWrite; } - void setDependentWrite(WriteState *Other) { - DependentWrite = Other; - ++Other->NumWriteUsers; + void setDependentWrite(WriteState *Other) { DependentWrite = Other; } + void writeStartEvent(unsigned Cycles) { + DependentWriteCyclesLeft = Cycles; + DependentWrite = nullptr; } + void setWriteZero() { WritesZero = true; } void setEliminated() { assert(Users.empty() && "Write is in an inconsistent state."); diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp index 6bc63a0db50..f96e4cab4b9 100644 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp +++ b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp @@ -185,11 +185,11 @@ void RegisterFile::addRegisterWrite(WriteRef Write, // register is allocated. ShouldAllocatePhysRegs = false; - if (OtherWrite.getWriteState() && - (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { + WriteState *OtherWS = OtherWrite.getWriteState(); + if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { // This partial write has a false dependency on RenameAs. assert(!IsEliminated && "Unexpected partial update!"); - WS.setDependentWrite(OtherWrite.getWriteState()); + OtherWS->addUser(&WS); } } } diff --git a/llvm/tools/llvm-mca/lib/Instruction.cpp b/llvm/tools/llvm-mca/lib/Instruction.cpp index 832a6199f00..5c46ee995fe 100644 --- a/llvm/tools/llvm-mca/lib/Instruction.cpp +++ b/llvm/tools/llvm-mca/lib/Instruction.cpp @@ -49,6 +49,10 @@ void WriteState::onInstructionIssued() { unsigned ReadCycles = std::max(0, CyclesLeft - User.second); RS->writeStartEvent(ReadCycles); } + + // Notify any writes that are in a false dependency with this write. + if (PartialWrite) + PartialWrite->writeStartEvent(CyclesLeft); } void WriteState::addUser(ReadState *User, int ReadAdvance) { @@ -65,12 +69,26 @@ void WriteState::addUser(ReadState *User, int ReadAdvance) { Users.insert(NewPair); } +void WriteState::addUser(WriteState *User) { + if (CyclesLeft != UNKNOWN_CYCLES) { + User->writeStartEvent(std::max(0, CyclesLeft)); + return; + } + + assert(!PartialWrite && "PartialWrite already set!"); + PartialWrite = User; + User->setDependentWrite(this); +} + void WriteState::cycleEvent() { // Note: CyclesLeft can be a negative number. It is an error to // make it an unsigned quantity because users of this write may // specify a negative ReadAdvance. if (CyclesLeft != UNKNOWN_CYCLES) CyclesLeft--; + + if (DependentWriteCyclesLeft) + DependentWriteCyclesLeft--; } void ReadState::cycleEvent() { @@ -143,13 +161,11 @@ void Instruction::update() { // A partial register write cannot complete before a dependent write. auto IsDefReady = [&](const WriteState &Def) { - if (const WriteState *Write = Def.getDependentWrite()) { - int WriteLatency = Write->getCyclesLeft(); - if (WriteLatency == UNKNOWN_CYCLES) - return false; - return static_cast<unsigned>(WriteLatency) < getLatency(); + if (!Def.getDependentWrite()) { + unsigned CyclesLeft = Def.getDependentWriteCyclesLeft(); + return !CyclesLeft || CyclesLeft < getLatency(); } - return true; + return false; }; if (all_of(getDefs(), IsDefReady)) @@ -164,6 +180,9 @@ void Instruction::cycleEvent() { for (ReadState &Use : getUses()) Use.cycleEvent(); + for (WriteState &Def : getDefs()) + Def.cycleEvent(); + update(); return; } |