diff options
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s | 104 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/include/Instruction.h | 28 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp | 6 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/lib/Instruction.cpp | 31 | 
4 files changed, 153 insertions, 16 deletions
| diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s new file mode 100644 index 00000000000..720a1ed1440 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=5 < %s | FileCheck %s + +sete %r9b +movzbl %al, %eax +shll $2, %eax +imull %ecx, %eax +cmpl $1025, %eax + +# CHECK:      Iterations:        100 +# CHECK-NEXT: Instructions:      500 +# CHECK-NEXT: Total Cycles:      504 +# CHECK-NEXT: Total uOps:        600 + +# CHECK:      Dispatch Width:    2 +# CHECK-NEXT: uOps Per Cycle:    1.19 +# CHECK-NEXT: IPC:               0.99 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK:      Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions: +# CHECK-NEXT:  1      1     0.50                        sete	%r9b +# CHECK-NEXT:  1      1     0.50                        movzbl	%al, %eax +# CHECK-NEXT:  1      1     0.50                        shll	$2, %eax +# CHECK-NEXT:  2      3     1.00                        imull	%ecx, %eax +# CHECK-NEXT:  1      1     0.50                        cmpl	$1025, %eax + +# CHECK:      Resources: +# CHECK-NEXT: [0]   - JALU0 +# CHECK-NEXT: [1]   - JALU1 +# CHECK-NEXT: [2]   - JDiv +# CHECK-NEXT: [3]   - JFPA +# CHECK-NEXT: [4]   - JFPM +# CHECK-NEXT: [5]   - JFPU0 +# CHECK-NEXT: [6]   - JFPU1 +# CHECK-NEXT: [7]   - JLAGU +# CHECK-NEXT: [8]   - JMul +# CHECK-NEXT: [9]   - JSAGU +# CHECK-NEXT: [10]  - JSTC +# CHECK-NEXT: [11]  - JVALU0 +# CHECK-NEXT: [12]  - JVALU1 +# CHECK-NEXT: [13]  - JVIMUL + +# CHECK:      Resource pressure per iteration: +# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13] +# CHECK-NEXT: 2.00   3.00    -      -      -      -      -      -     1.00    -      -      -      -      - + +# CHECK:      Resource pressure by instruction: +# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions: +# CHECK-NEXT: 0.99   0.01    -      -      -      -      -      -      -      -      -      -      -      -     sete	%r9b +# CHECK-NEXT: 0.01   0.99    -      -      -      -      -      -      -      -      -      -      -      -     movzbl	%al, %eax +# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -     shll	$2, %eax +# CHECK-NEXT:  -     1.00    -      -      -      -      -      -     1.00    -      -      -      -      -     imull	%ecx, %eax +# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     cmpl	$1025, %eax + +# CHECK:      Timeline view: +# CHECK-NEXT:                     0123456789 +# CHECK-NEXT: Index     0123456789          012345678 + +# CHECK:      [0,0]     DeER .    .    .    .    .  .   sete	%r9b +# CHECK-NEXT: [0,1]     DeER .    .    .    .    .  .   movzbl	%al, %eax +# CHECK-NEXT: [0,2]     .DeER.    .    .    .    .  .   shll	$2, %eax +# CHECK-NEXT: [0,3]     . DeeeER  .    .    .    .  .   imull	%ecx, %eax +# CHECK-NEXT: [0,4]     .  D==eER .    .    .    .  .   cmpl	$1025, %eax +# CHECK-NEXT: [1,0]     .  D===eER.    .    .    .  .   sete	%r9b +# CHECK-NEXT: [1,1]     .   D=eE-R.    .    .    .  .   movzbl	%al, %eax +# CHECK-NEXT: [1,2]     .   D==eE-R    .    .    .  .   shll	$2, %eax +# CHECK-NEXT: [1,3]     .    D==eeeER  .    .    .  .   imull	%ecx, %eax +# CHECK-NEXT: [1,4]     .    .D====eER .    .    .  .   cmpl	$1025, %eax +# CHECK-NEXT: [2,0]     .    .D=====eER.    .    .  .   sete	%r9b +# CHECK-NEXT: [2,1]     .    . D===eE-R.    .    .  .   movzbl	%al, %eax +# CHECK-NEXT: [2,2]     .    . D====eE-R    .    .  .   shll	$2, %eax +# CHECK-NEXT: [2,3]     .    .  D====eeeER  .    .  .   imull	%ecx, %eax +# CHECK-NEXT: [2,4]     .    .   D======eER .    .  .   cmpl	$1025, %eax +# CHECK-NEXT: [3,0]     .    .   D=======eER.    .  .   sete	%r9b +# CHECK-NEXT: [3,1]     .    .    D=====eE-R.    .  .   movzbl	%al, %eax +# CHECK-NEXT: [3,2]     .    .    D======eE-R    .  .   shll	$2, %eax +# CHECK-NEXT: [3,3]     .    .    .D======eeeER  .  .   imull	%ecx, %eax +# CHECK-NEXT: [3,4]     .    .    . D========eER .  .   cmpl	$1025, %eax +# CHECK-NEXT: [4,0]     .    .    . D=========eER.  .   sete	%r9b +# CHECK-NEXT: [4,1]     .    .    .  D=======eE-R.  .   movzbl	%al, %eax +# CHECK-NEXT: [4,2]     .    .    .  D========eE-R  .   shll	$2, %eax +# CHECK-NEXT: [4,3]     .    .    .   D========eeeER.   imull	%ecx, %eax +# CHECK-NEXT: [4,4]     .    .    .    D==========eER   cmpl	$1025, %eax + +# CHECK:      Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK:            [0]    [1]    [2]    [3] +# CHECK-NEXT: 0.     5     5.8    0.2    0.0       sete	%r9b +# CHECK-NEXT: 1.     5     4.2    0.2    0.8       movzbl	%al, %eax +# CHECK-NEXT: 2.     5     5.0    0.0    0.8       shll	$2, %eax +# CHECK-NEXT: 3.     5     5.0    0.0    0.0       imull	%ecx, %eax +# CHECK-NEXT: 4.     5     7.0    0.0    0.0       cmpl	$1025, %eax diff --git a/llvm/tools/llvm-mca/include/Instruction.h b/llvm/tools/llvm-mca/include/Instruction.h index 7407283bca2..2e676088408 100644 --- a/llvm/tools/llvm-mca/include/Instruction.h +++ b/llvm/tools/llvm-mca/include/Instruction.h @@ -123,8 +123,10 @@ class WriteState {    // that we don't break the WAW, and the two writes can be merged together.    const WriteState *DependentWrite; -  // Number of writes that are in a WAW dependency with this write. -  unsigned NumWriteUsers; +  // A partial write that is in a false dependency with this write. +  WriteState *PartialWrite; + +  unsigned DependentWriteCyclesLeft;    // A list of dependent reads. Users is a set of dependent    // reads. A dependent read is added to the set only if CyclesLeft @@ -139,7 +141,8 @@ public:               bool clearsSuperRegs = false, bool writesZero = false)        : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),          PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), -        IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {} +        IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr), +        DependentWriteCyclesLeft(0) {}    WriteState(const WriteState &Other) = default;    WriteState &operator=(const WriteState &Other) = default; @@ -151,8 +154,17 @@ public:    unsigned getLatency() const { return WD->Latency; }    void addUser(ReadState *Use, int ReadAdvance); +  void addUser(WriteState *Use); + +  unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; } + +  unsigned getNumUsers() const { +    unsigned NumUsers = Users.size(); +    if (PartialWrite) +      ++NumUsers; +    return NumUsers; +  } -  unsigned getNumUsers() const { return Users.size() + NumWriteUsers; }    bool clearsSuperRegisters() const { return ClearsSuperRegs; }    bool isWriteZero() const { return WritesZero; }    bool isEliminated() const { return IsEliminated; } @@ -161,10 +173,12 @@ public:    }    const WriteState *getDependentWrite() const { return DependentWrite; } -  void setDependentWrite(WriteState *Other) { -    DependentWrite = Other; -    ++Other->NumWriteUsers; +  void setDependentWrite(WriteState *Other) { DependentWrite = Other; } +  void writeStartEvent(unsigned Cycles) { +    DependentWriteCyclesLeft = Cycles; +    DependentWrite = nullptr;    } +    void setWriteZero() { WritesZero = true; }    void setEliminated() {      assert(Users.empty() && "Write is in an inconsistent state."); diff --git a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp index 6bc63a0db50..f96e4cab4b9 100644 --- a/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp +++ b/llvm/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp @@ -185,11 +185,11 @@ void RegisterFile::addRegisterWrite(WriteRef Write,        // register is allocated.        ShouldAllocatePhysRegs = false; -      if (OtherWrite.getWriteState() && -          (OtherWrite.getSourceIndex() != Write.getSourceIndex())) { +      WriteState *OtherWS = OtherWrite.getWriteState(); +      if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {          // This partial write has a false dependency on RenameAs.          assert(!IsEliminated && "Unexpected partial update!"); -        WS.setDependentWrite(OtherWrite.getWriteState()); +        OtherWS->addUser(&WS);        }      }    } diff --git a/llvm/tools/llvm-mca/lib/Instruction.cpp b/llvm/tools/llvm-mca/lib/Instruction.cpp index 832a6199f00..5c46ee995fe 100644 --- a/llvm/tools/llvm-mca/lib/Instruction.cpp +++ b/llvm/tools/llvm-mca/lib/Instruction.cpp @@ -49,6 +49,10 @@ void WriteState::onInstructionIssued() {      unsigned ReadCycles = std::max(0, CyclesLeft - User.second);      RS->writeStartEvent(ReadCycles);    } + +  // Notify any writes that are in a false dependency with this write. +  if (PartialWrite) +    PartialWrite->writeStartEvent(CyclesLeft);  }  void WriteState::addUser(ReadState *User, int ReadAdvance) { @@ -65,12 +69,26 @@ void WriteState::addUser(ReadState *User, int ReadAdvance) {    Users.insert(NewPair);  } +void WriteState::addUser(WriteState *User) { +  if (CyclesLeft != UNKNOWN_CYCLES) { +    User->writeStartEvent(std::max(0, CyclesLeft)); +    return; +  } + +  assert(!PartialWrite && "PartialWrite already set!"); +  PartialWrite = User; +  User->setDependentWrite(this); +} +  void WriteState::cycleEvent() {    // Note: CyclesLeft can be a negative number. It is an error to    // make it an unsigned quantity because users of this write may    // specify a negative ReadAdvance.    if (CyclesLeft != UNKNOWN_CYCLES)      CyclesLeft--; + +  if (DependentWriteCyclesLeft) +    DependentWriteCyclesLeft--;  }  void ReadState::cycleEvent() { @@ -143,13 +161,11 @@ void Instruction::update() {    // A partial register write cannot complete before a dependent write.    auto IsDefReady = [&](const WriteState &Def) { -    if (const WriteState *Write = Def.getDependentWrite()) { -      int WriteLatency = Write->getCyclesLeft(); -      if (WriteLatency == UNKNOWN_CYCLES) -        return false; -      return static_cast<unsigned>(WriteLatency) < getLatency(); +    if (!Def.getDependentWrite()) { +      unsigned CyclesLeft = Def.getDependentWriteCyclesLeft(); +      return !CyclesLeft || CyclesLeft < getLatency();      } -    return true; +    return false;    };    if (all_of(getDefs(), IsDefReady)) @@ -164,6 +180,9 @@ void Instruction::cycleEvent() {      for (ReadState &Use : getUses())        Use.cycleEvent(); +    for (WriteState &Def : getDefs()) +      Def.cycleEvent(); +      update();      return;    } | 

