diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-05-23 15:59:27 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-05-23 15:59:27 +0000 |
| commit | 3fc20c9c7f013cd7ad274a8c4d7f99fde98c65a7 (patch) | |
| tree | 41ae64000d1d49ec080e8ecbf4dd14e74ac8071e /llvm | |
| parent | feb3146d4b3e7306be6611532f1219859bbef54e (diff) | |
| download | bcm5719-llvm-3fc20c9c7f013cd7ad274a8c4d7f99fde98c65a7.tar.gz bcm5719-llvm-3fc20c9c7f013cd7ad274a8c4d7f99fde98c65a7.zip | |
[llvm-mca] Print the "Block RThroughput" in the SummaryView.
This patch implements the "block reciprocal throughput" computation in the
SummaryView.
The block reciprocal throughput is computed as the MAX of:
- NumMicroOps / DispatchWidth
- Resource Cycles / #Units (for every resource consumed).
The block throughput is bounded from above by the hardware dispatch throughput.
That is because the DispatchWidth is an upper bound on how many opcodes can be part
of a single dispatch group.
The block throughput is also limited by the amount of hardware parallelism. The
number of available resource units affects how the resource pressure is
distributed, and also how many blocks can be delivered every cycle.
llvm-svn: 333095
Diffstat (limited to 'llvm')
23 files changed, 219 insertions, 114 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s index a15dc1027c7..7955ff743cc 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s @@ -5,11 +5,12 @@ vmulps %xmm0, %xmm1, %xmm2 vhaddps %xmm2, %xmm2, %xmm3 vhaddps %xmm3, %xmm3, %xmm4 -# CHECK: Iterations: 300 -# CHECK-NEXT: Instructions: 900 -# CHECK-NEXT: Total Cycles: 610 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 1.48 +# CHECK: Iterations: 300 +# CHECK-NEXT: Instructions: 900 +# CHECK-NEXT: Total Cycles: 610 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 1.48 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s index a5b15d1bd9b..4c343d62a48 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s @@ -4,11 +4,12 @@ vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 11 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.18 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 11 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.18 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s index 2f9a7972587..9ac266b9c29 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s @@ -4,11 +4,12 @@ vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %ymm1, %ymm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 12 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.17 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s index 77fc5faec4e..e9fd32f00c4 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s @@ -10,11 +10,12 @@ vhaddps %xmm3, %xmm3, %xmm4 # DISABLED-NOT: Instruction Info: -# ENABLED: Iterations: 100 -# ENABLED-NEXT: Instructions: 300 -# ENABLED-NEXT: Total Cycles: 209 -# ENABLED-NEXT: Dispatch Width: 2 -# ENABLED-NEXT: IPC: 1.44 +# ENABLED: Iterations: 100 +# ENABLED-NEXT: Instructions: 300 +# ENABLED-NEXT: Total Cycles: 209 +# ENABLED-NEXT: Dispatch Width: 2 +# ENABLED-NEXT: IPC: 1.44 +# ENABLED-NEXT: Block RThroughput: 2.0 # ENABLED: Instruction Info: # ENABLED-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s index 95d69064d71..9f42c47384d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s @@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi) vmovaps 48(%rsi), %xmm0 vmovaps %xmm0, 48(%rdi) -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 2403 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.33 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 2403 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s index 708add22a69..f86325dbb8e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s @@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi) vmovaps 48(%rsi), %xmm0 vmovaps %xmm0, 48(%rdi) -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 408 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 1.96 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 408 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 1.96 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index 6042ce35a18..8165b994acd 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -17,11 +17,12 @@ vsqrtps %xmm0, %xmm2 vaddps %ymm0, %ymm1, %ymm2 vsqrtps %ymm0, %ymm2 -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 6306 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.13 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 6306 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.13 +# CHECK-NEXT: Block RThroughput: 63.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s index c1f9ea246d3..f186f138fa8 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s @@ -18,11 +18,12 @@ vaddps %xmm0, %xmm1, %xmm2 vaddps %xmm0, %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 16 -# CHECK-NEXT: Total Cycles: 31 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.52 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 16 +# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.52 +# CHECK-NEXT: Block RThroughput: 21.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s index 7e48e7a81dd..ee69d2c2140 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s @@ -8,11 +8,12 @@ vaddps %xmm0, %xmm0, %xmm1 vmulps (%rdi), %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s index af64ca71db2..a27e74a2e7b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s @@ -7,11 +7,12 @@ # The second integer multiply can start at cycle 2 because the implicit reads # can start after the load operand is evaluated. -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s index b3a8f6f98f2..773f91b6fc0 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s @@ -5,11 +5,12 @@ add (%rsp), %rsi add %rdx, %r8 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 3 -# CHECK-NEXT: Total Cycles: 7 -# CHECK-NEXT: Dispatch Width: 3 -# CHECK-NEXT: IPC: 0.43 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 7 +# CHECK-NEXT: Dispatch Width: 3 +# CHECK-NEXT: IPC: 0.43 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s index fed5fd0e477..6324f4bcfec 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s @@ -4,11 +4,12 @@ vaddps %xmm0, %xmm0, %xmm0 vmulps %xmm0, %xmm0, %xmm0 -# CHECK: Iterations: 5 -# CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 28 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.36 +# CHECK: Iterations: 5 +# CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s index 50c21dc32b9..40d37fb30cc 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s @@ -4,11 +4,12 @@ vaddps %xmm0, %xmm0, %xmm0 vmulps %xmm0, %xmm0, %xmm0 -# CHECK: Iterations: 5 -# CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 28 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.36 +# CHECK: Iterations: 5 +# CHECK-NEXT: Instructions: 10 +# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 13 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s index 7ba27ecb5c5..59829415005 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s @@ -3,11 +3,12 @@ idiv %eax -# CHECK: Iterations: 2 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 55 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.04 +# CHECK: Iterations: 2 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 55 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.04 +# CHECK-NEXT: Block RThroughput: 25.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s index 122e24392c8..369a82ef6ef 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s @@ -3,11 +3,12 @@ idiv %eax -# CHECK: Iterations: 22 -# CHECK-NEXT: Instructions: 22 -# CHECK-NEXT: Total Cycles: 553 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.04 +# CHECK: Iterations: 22 +# CHECK-NEXT: Instructions: 22 +# CHECK-NEXT: Total Cycles: 553 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.04 +# CHECK-NEXT: Block RThroughput: 25.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s index fc14e46677d..6489f237864 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s @@ -35,11 +35,12 @@ vaddps %ymm3, %ymm0, %ymm5 vaddps %ymm3, %ymm0, %ymm6 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 33 -# CHECK-NEXT: Total Cycles: 70 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.47 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 33 +# CHECK-NEXT: Total Cycles: 70 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.47 +# CHECK-NEXT: Block RThroughput: 64.0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s index b501934aa34..caaf1ccb145 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s @@ -4,11 +4,12 @@ vmulps (%rsi), %xmm0, %xmm0 add %rsi, %rsi -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s index 4b3f549024e..05328080deb 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s @@ -3,11 +3,12 @@ add %edi, %eax -# CHECK: Iterations: 100 -# CHECK-NEXT: Instructions: 100 -# CHECK-NEXT: Total Cycles: 103 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.97 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 100 +# CHECK-NEXT: Total Cycles: 103 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.97 +# CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s index 7cbc0a8c7ff..acdcdebe261 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s @@ -4,11 +4,12 @@ vaddps %xmm0, %xmm0, %xmm1 vandps (%rdi), %xmm1, %xmm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 9 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.22 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 9 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.22 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s index 2b091d8788a..60bf59fed2b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s @@ -4,11 +4,12 @@ vaddps %ymm0, %ymm0, %ymm1 vandps (%rdi), %ymm1, %ymm2 -# CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 -# CHECK-NEXT: IPC: 0.20 +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps diff --git a/llvm/tools/llvm-mca/SummaryView.cpp b/llvm/tools/llvm-mca/SummaryView.cpp index 511727bc750..9b6e1d9b183 100644 --- a/llvm/tools/llvm-mca/SummaryView.cpp +++ b/llvm/tools/llvm-mca/SummaryView.cpp @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// #include "SummaryView.h" +#include "Support.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Format.h" namespace mca { @@ -22,19 +24,83 @@ namespace mca { using namespace llvm; +void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) { + // We are only interested in the "instruction dispatched" events generated by + // the dispatch stage for instructions that are part of iteration #0. + if (Event.Type != HWInstructionEvent::Dispatched) + return; + + if (Event.IR.getSourceIndex() >= Source.size()) + return; + + // Update the cumulative number of resource cycles based on the processor + // resource usage information available from the instruction descriptor. We need to + // compute the cumulative number of resource cycles for every processor + // resource which is consumed by an instruction of the block. + const Instruction &Inst = *Event.IR.getInstruction(); + const InstrDesc &Desc = Inst.getDesc(); + NumMicroOps += Desc.NumMicroOps; + for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) { + if (!RU.second.size()) + continue; + + assert(RU.second.NumUnits && "Expected more than one unit used!"); + if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) { + ProcResourceUsage[RU.first] = RU.second.size(); + continue; + } + + ProcResourceUsage[RU.first] += RU.second.size(); + } +} + +double SummaryView::getBlockRThroughput() const { + assert(NumMicroOps && "Expected at least one micro opcode!"); + + SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds()); + computeProcResourceMasks(SM, Masks); + + // The block throughput is bounded from above by the hardware dispatch + // throughput. That is because the DispatchWidth is an upper bound on the + // number of opcodes that can be part of a single dispatch group. + double Max = static_cast<double>(NumMicroOps) / DispatchWidth; + + // The block throughput is also limited by the amount of hardware parallelism. + // The number of available resource units affects the resource pressure + // distributed, as well as how many blocks can be executed every cycle. + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + uint64_t Mask = Masks[I]; + const auto It = ProcResourceUsage.find_as(Mask); + if (It != ProcResourceUsage.end()) { + const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); + unsigned NumUnits = MCDesc.NumUnits; + double Throughput = static_cast<double>(It->second) / NumUnits; + Max = std::max(Max, Throughput); + } + } + + // The block reciprocal throughput is computed as the MAX of: + // - (#uOps / DispatchWidth) + // - (#units / resource cycles) for every consumed processor resource. + return Max; +} + void SummaryView::printView(raw_ostream &OS) const { unsigned Iterations = Source.getNumIterations(); unsigned Instructions = Source.size(); unsigned TotalInstructions = Instructions * Iterations; double IPC = (double)TotalInstructions / TotalCycles; + double BlockRThroughput = getBlockRThroughput(); std::string Buffer; raw_string_ostream TempStream(Buffer); - TempStream << "Iterations: " << Iterations; - TempStream << "\nInstructions: " << TotalInstructions; - TempStream << "\nTotal Cycles: " << TotalCycles; - TempStream << "\nDispatch Width: " << DispatchWidth; - TempStream << "\nIPC: " << format("%.2f", IPC) << '\n'; + TempStream << "Iterations: " << Iterations; + TempStream << "\nInstructions: " << TotalInstructions; + TempStream << "\nTotal Cycles: " << TotalCycles; + TempStream << "\nDispatch Width: " << DispatchWidth; + TempStream << "\nIPC: " << format("%.2f", IPC); + TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput) + << '\n'; TempStream.flush(); OS << Buffer; } diff --git a/llvm/tools/llvm-mca/SummaryView.h b/llvm/tools/llvm-mca/SummaryView.h index 0484057fb10..fe8a5e20f9d 100644 --- a/llvm/tools/llvm-mca/SummaryView.h +++ b/llvm/tools/llvm-mca/SummaryView.h @@ -14,12 +14,12 @@ /// performance throughput. Below is an example of summary view: /// /// -/// Iterations: 300 -/// Instructions: 900 -/// Total Cycles: 610 -/// Dispatch Width: 2 -/// IPC: 1.48 -/// +/// Iterations: 300 +/// Instructions: 900 +/// Total Cycles: 610 +/// Dispatch Width: 2 +/// IPC: 1.48 +/// Block RThroughput: 2.0 /// /// The summary view collects a few performance numbers. The two main /// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle). @@ -31,22 +31,41 @@ #include "SourceMgr.h" #include "View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSchedule.h" #include "llvm/Support/raw_ostream.h" namespace mca { /// A view that collects and prints a few performance numbers. class SummaryView : public View { + const llvm::MCSchedModel &SM; const SourceMgr &Source; const unsigned DispatchWidth; unsigned TotalCycles; + // The total number of micro opcodes contributed by a block of instructions. + unsigned NumMicroOps; + // For each processor resource, this map stores the cumulative number of + // resource cycles consumed by a block of instructions. The resource mask ID + // is used as the key value to access elements of this map. + llvm::DenseMap<uint64_t, unsigned> ProcResourceUsage; + + // Compute the reciprocal throughput for the analyzed code block. + // The reciprocal block throughput is computed as the MAX between: + // - NumMicroOps / DispatchWidth + // - Total Resource Cycles / #Units (for every resource consumed). + double getBlockRThroughput() const; public: - SummaryView(const SourceMgr &S, unsigned Width) - : Source(S), DispatchWidth(Width), TotalCycles(0) {} + SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S, + unsigned Width) + : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0), + NumMicroOps(0) {} void onCycleEnd() override { ++TotalCycles; } + void onInstructionEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override; }; } // namespace mca diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 1e93a7fdcdb..925584b0193 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -495,7 +495,7 @@ int main(int argc, char **argv) { LoadQueueSize, StoreQueueSize, AssumeNoAlias); mca::BackendPrinter Printer(B); - Printer.addView(llvm::make_unique<mca::SummaryView>(S, Width)); + Printer.addView(llvm::make_unique<mca::SummaryView>(SM, S, Width)); if (PrintInstructionInfoView) Printer.addView( llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP)); |

