diff options
23 files changed, 219 insertions, 114 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s index a15dc1027c7..7955ff743cc 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s @@ -5,11 +5,12 @@ vmulps %xmm0, %xmm1, %xmm2 vhaddps %xmm2, %xmm2, %xmm3 vhaddps %xmm3, %xmm3, %xmm4 -# CHECK: Iterations: 300 -# CHECK-NEXT: Instructions: 900 -# CHECK-NEXT: Total Cycles: 610 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 1.48 +# CHECK: Iterations: 300 +# CHECK-NEXT: Instructions: 900 +# CHECK-NEXT: Total Cycles: 610 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 1.48 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s index a5b15d1bd9b..4c343d62a48 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s @@ -4,11 +4,12 @@ vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 11 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.18 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 11 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.18 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s index 2f9a7972587..9ac266b9c29 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s @@ -4,11 +4,12 @@ vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %ymm1, %ymm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 12 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.17 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s index 77fc5faec4e..e9fd32f00c4 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s @@ -10,11 +10,12 @@ vhaddps %xmm3, %xmm3, %xmm4 # DISABLED-NOT: Instruction Info: -# ENABLED: Iterations: 100 -# ENABLED-NEXT: Instructions: 300 -# ENABLED-NEXT: Total Cycles: 209 -# ENABLED-NEXT: Dispatch Width: 2 -# ENABLED-NEXT: IPC: 1.44 +# ENABLED: Iterations: 100 +# ENABLED-NEXT: Instructions: 300 +# ENABLED-NEXT: Total Cycles: 209 +# ENABLED-NEXT: Dispatch Width: 2 +# ENABLED-NEXT: IPC: 1.44 +# ENABLED-NEXT: Block RThroughput: 2.0 # ENABLED: Instruction Info: # ENABLED-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s index 95d69064d71..9f42c47384d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s @@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi) vmovaps 48(%rsi), %xmm0 vmovaps %xmm0, 48(%rdi) -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 2403 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.33 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 2403 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s index 708add22a69..f86325dbb8e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s @@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi) vmovaps 48(%rsi), %xmm0 vmovaps %xmm0, 48(%rdi) -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 408 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 1.96 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 408 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 1.96 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index 6042ce35a18..8165b994acd 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -17,11 +17,12 @@ vsqrtps %xmm0, %xmm2 vaddps %ymm0, %ymm1, %ymm2 vsqrtps %ymm0, %ymm2 -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 6306 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.13 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 6306 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.13 +# CHECK-NEXT: Block RThroughput: 63.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s index c1f9ea246d3..f186f138fa8 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s @@ -18,11 +18,12 @@ vaddps %xmm0, %xmm1, %xmm2 vaddps %xmm0, %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 16 -# CHECK-NEXT: Total Cycles: 31 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.52 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 16 +# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.52 +# CHECK-NEXT: Block RThroughput: 21.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s index 7e48e7a81dd..ee69d2c2140 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s @@ -8,11 +8,12 @@ vaddps %xmm0, %xmm0, %xmm1 vmulps (%rdi), %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s index af64ca71db2..a27e74a2e7b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s @@ -7,11 +7,12 @@ # The second integer multiply can start at cycle 2 because the implicit reads # can start after the load operand is evaluated. -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s index b3a8f6f98f2..773f91b6fc0 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s @@ -5,11 +5,12 @@ add (%rsp), %rsi add %rdx, %r8 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 3 -# CHECK-NEXT: Total Cycles: 7 -# CHECK-NEXT: Dispatch Width: 3 -# CHECK-NEXT: IPC: 0.43 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 7 +# CHECK-NEXT: Dispatch Width: 3 +# CHECK-NEXT: IPC: 0.43 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s index fed5fd0e477..6324f4bcfec 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s @@ -4,11 +4,12 @@ vaddps %xmm0, %xmm0, %xmm0 vmulps %xmm0, %xmm0, %xmm0 -# CHECK: Iterations: 5 -# CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 28 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.36 +# CHECK: Iterations: 5 +# CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s index 50c21dc32b9..40d37fb30cc 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s @@ -4,11 +4,12 @@ vaddps %xmm0, %xmm0, %xmm0 vmulps %xmm0, %xmm0, %xmm0 -# CHECK: Iterations: 5 -# CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 28 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.36 +# CHECK: Iterations: 5 +# CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 13 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s index 7ba27ecb5c5..59829415005 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s @@ -3,11 +3,12 @@ idiv %eax -# CHECK: Iterations: 2 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 55 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.04 +# CHECK: Iterations: 2 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 55 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.04 +# CHECK-NEXT: Block RThroughput: 25.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s index 122e24392c8..369a82ef6ef 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s @@ -3,11 +3,12 @@ idiv %eax -# CHECK: Iterations: 22 -# CHECK-NEXT: Instructions: 22 -# CHECK-NEXT: Total Cycles: 553 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.04 +# CHECK: Iterations: 22 +# CHECK-NEXT: Instructions: 22 +# CHECK-NEXT: Total Cycles: 553 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.04 +# CHECK-NEXT: Block RThroughput: 25.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s index fc14e46677d..6489f237864 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s @@ -35,11 +35,12 @@ vaddps %ymm3, %ymm0, %ymm5 vaddps %ymm3, %ymm0, %ymm6 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 33 -# CHECK-NEXT: Total Cycles: 70 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.47 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 33 +# CHECK-NEXT: Total Cycles: 70 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.47 +# CHECK-NEXT: Block RThroughput: 64.0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s index b501934aa34..caaf1ccb145 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s @@ -4,11 +4,12 @@ vmulps (%rsi), %xmm0, %xmm0 add %rsi, %rsi -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s index 4b3f549024e..05328080deb 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s @@ -3,11 +3,12 @@ add %edi, %eax -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 100 -# CHECK-NEXT: Total Cycles: 103 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.97 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 100 +# CHECK-NEXT: Total Cycles: 103 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.97 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s index 7cbc0a8c7ff..acdcdebe261 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s @@ -4,11 +4,12 @@ vaddps %xmm0, %xmm0, %xmm1 vandps (%rdi), %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 9 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.22 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 9 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.22 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s index 2b091d8788a..60bf59fed2b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s @@ -4,11 +4,12 @@ vaddps %ymm0, %ymm0, %ymm1 vandps (%rdi), %ymm1, %ymm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/tools/llvm-mca/SummaryView.cpp b/llvm/tools/llvm-mca/SummaryView.cpp index 511727bc750..9b6e1d9b183 100644 --- a/llvm/tools/llvm-mca/SummaryView.cpp +++ b/llvm/tools/llvm-mca/SummaryView.cpp @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// #include "SummaryView.h" +#include "Support.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Format.h" namespace mca { @@ -22,19 +24,83 @@ namespace mca { using namespace llvm; +void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) { + // We are only interested in the "instruction dispatched" events generated by + // the dispatch stage for instructions that are part of iteration #0. + if (Event.Type != HWInstructionEvent::Dispatched) + return; + + if (Event.IR.getSourceIndex() >= Source.size()) + return; + + // Update the cumulative number of resource cycles based on the processor + // resource usage information available from the instruction descriptor. We need to + // compute the cumulative number of resource cycles for every processor + // resource which is consumed by an instruction of the block. + const Instruction &Inst = *Event.IR.getInstruction(); + const InstrDesc &Desc = Inst.getDesc(); + NumMicroOps += Desc.NumMicroOps; + for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) { + if (!RU.second.size()) + continue; + + assert(RU.second.NumUnits && "Expected more than one unit used!"); + if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) { + ProcResourceUsage[RU.first] = RU.second.size(); + continue; + } + + ProcResourceUsage[RU.first] += RU.second.size(); + } +} + +double SummaryView::getBlockRThroughput() const { + assert(NumMicroOps && "Expected at least one micro opcode!"); + + SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds()); + computeProcResourceMasks(SM, Masks); + + // The block throughput is bounded from above by the hardware dispatch + // throughput. That is because the DispatchWidth is an upper bound on the + // number of opcodes that can be part of a single dispatch group. + double Max = static_cast<double>(NumMicroOps) / DispatchWidth; + + // The block throughput is also limited by the amount of hardware parallelism. + // The number of available resource units affects the resource pressure + // distributed, as well as how many blocks can be executed every cycle. + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + uint64_t Mask = Masks[I]; + const auto It = ProcResourceUsage.find_as(Mask); + if (It != ProcResourceUsage.end()) { + const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); + unsigned NumUnits = MCDesc.NumUnits; + double Throughput = static_cast<double>(It->second) / NumUnits; + Max = std::max(Max, Throughput); + } + } + + // The block reciprocal throughput is computed as the MAX of: + // - (#uOps / DispatchWidth) + // - (#units / resource cycles) for every consumed processor resource. + return Max; +} + void SummaryView::printView(raw_ostream &OS) const { unsigned Iterations = Source.getNumIterations(); unsigned Instructions = Source.size(); unsigned TotalInstructions = Instructions * Iterations; double IPC = (double)TotalInstructions / TotalCycles; + double BlockRThroughput = getBlockRThroughput(); std::string Buffer; raw_string_ostream TempStream(Buffer); - TempStream << "Iterations: " << Iterations; - TempStream << "\nInstructions: " << TotalInstructions; - TempStream << "\nTotal Cycles: " << TotalCycles; - TempStream << "\nDispatch Width: " << DispatchWidth; - TempStream << "\nIPC: " << format("%.2f", IPC) << '\n'; + TempStream << "Iterations: " << Iterations; + TempStream << "\nInstructions: " << TotalInstructions; + TempStream << "\nTotal Cycles: " << TotalCycles; + TempStream << "\nDispatch Width: " << DispatchWidth; + TempStream << "\nIPC: " << format("%.2f", IPC); + TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput) + << '\n'; TempStream.flush(); OS << Buffer; } diff --git a/llvm/tools/llvm-mca/SummaryView.h b/llvm/tools/llvm-mca/SummaryView.h index 0484057fb10..fe8a5e20f9d 100644 --- a/llvm/tools/llvm-mca/SummaryView.h +++ b/llvm/tools/llvm-mca/SummaryView.h @@ -14,12 +14,12 @@ /// performance throughput. Below is an example of summary view: /// /// -/// Iterations: 300 -/// Instructions: 900 -/// Total Cycles: 610 -/// Dispatch Width: 2 -/// IPC: 1.48 -/// +/// Iterations: 300 +/// Instructions: 900 +/// Total Cycles: 610 +/// Dispatch Width: 2 +/// IPC: 1.48 +/// Block RThroughput: 2.0 /// /// The summary view collects a few performance numbers. The two main /// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle). @@ -31,22 +31,41 @@ #include "SourceMgr.h" #include "View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSchedule.h" #include "llvm/Support/raw_ostream.h" namespace mca { /// A view that collects and prints a few performance numbers. class SummaryView : public View { + const llvm::MCSchedModel &SM; const SourceMgr &Source; const unsigned DispatchWidth; unsigned TotalCycles; + // The total number of micro opcodes contributed by a block of instructions. + unsigned NumMicroOps; + // For each processor resource, this map stores the cumulative number of + // resource cycles consumed by a block of instructions. The resource mask ID + // is used as the key value to access elements of this map. + llvm::DenseMap<uint64_t, unsigned> ProcResourceUsage; + + // Compute the reciprocal throughput for the analyzed code block. + // The reciprocal block throughput is computed as the MAX between: + // - NumMicroOps / DispatchWidth + // - Total Resource Cycles / #Units (for every resource consumed). + double getBlockRThroughput() const; public: - SummaryView(const SourceMgr &S, unsigned Width) - : Source(S), DispatchWidth(Width), TotalCycles(0) {} + SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S, + unsigned Width) + : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0), + NumMicroOps(0) {} void onCycleEnd() override { ++TotalCycles; } + void onInstructionEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override; }; } // namespace mca diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 1e93a7fdcdb..925584b0193 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -495,7 +495,7 @@ int main(int argc, char **argv) { LoadQueueSize, StoreQueueSize, AssumeNoAlias); mca::BackendPrinter Printer(B); - Printer.addView(llvm::make_unique<mca::SummaryView>(S, Width)); + Printer.addView(llvm::make_unique<mca::SummaryView>(SM, S, Width)); if (PrintInstructionInfoView) Printer.addView( llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP)); |