diff options
78 files changed, 554 insertions, 199 deletions
diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index 5dcd97fb113..43e64c329c9 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -238,7 +238,10 @@ the following command using the example located at Iterations: 300 Instructions: 900 Total Cycles: 610 + Total uOps: 900 + Dispatch Width: 2 + uOps Per Cycle: 1.48 IPC: 1.48 Block RThroughput: 2.0 @@ -285,35 +288,45 @@ the following command using the example located at - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4 According to this report, the dot-product kernel has been executed 300 times, -for a total of 900 dynamically executed instructions. +for a total of 900 simulated instructions. The total number of simulated micro +opcodes (uOps) is also 900. The report is structured in three main sections. The first section collects a few performance numbers; the goal of this section is to give a very quick -overview of the performance throughput. In this example, the two important -performance indicators are **IPC** and **Block RThroughput** (Block Reciprocal +overview of the performance throughput. Important performance indicators are +**IPC**, **uOps Per Cycle**, and **Block RThroughput** (Block Reciprocal Throughput). IPC is computed dividing the total number of simulated instructions by the total -number of cycles. A delta between Dispatch Width and IPC is an indicator of a -performance issue. In the absence of loop-carried data dependencies, the +number of cycles. In the absence of loop-carried data dependencies, the observed IPC tends to a theoretical maximum which can be computed by dividing the number of instructions of a single iteration by the *Block RThroughput*. -IPC is bounded from above by the dispatch width. That is because the dispatch -width limits the maximum size of a dispatch group. IPC is also limited by the -amount of hardware parallelism. The availability of hardware resources affects -the resource pressure distribution, and it limits the number of instructions -that can be executed in parallel every cycle. A delta between Dispatch -Width and the theoretical maximum IPC is an indicator of a performance -bottleneck caused by the lack of hardware resources. In general, the lower the -Block RThroughput, the better. - -In this example, ``Instructions per iteration/Block RThroughput`` is 1.50. Since -there are no loop-carried dependencies, the observed IPC is expected to approach -1.50 when the number of iterations tends to infinity. The delta between the -Dispatch Width (2.00), and the theoretical maximum IPC (1.50) is an indicator of -a performance bottleneck caused by the lack of hardware resources, and the -*Resource pressure view* can help to identify the problematic resource usage. +Field 'uOps Per Cycle' is computed dividing the total number of simulated micro +opcodes by the total number of cycles. A delta between Dispatch Width and this +field is an indicator of a performance issue. In the absence of loop-carried +data dependencies, the observed 'uOps Per Cycle' should tend to a theoretical +maximum throughput which can be computed by dividing the number of uOps of a +single iteration by the *Block RThroughput*. + +Field *uOps Per Cycle* is bounded from above by the dispatch width. That is +because the dispatch width limits the maximum size of a dispatch group. Both IPC +and 'uOps Per Cycle' are limited by the amount of hardware parallelism. The +availability of hardware resources affects the resource pressure distribution, +and it limits the number of instructions that can be executed in parallel every +cycle. A delta between Dispatch Width and the theoretical maximum uOps per +Cycle (computed by dividing the number of uOps of a single iteration by the +*Block RTrhoughput*) is an indicator of a performance bottleneck caused by the +lack of hardware resources. +In general, the lower the Block RThroughput, the better. + +In this example, ``uOps per iteration/Block RThroughput`` is 1.50. Since there +are no loop-carried dependencies, the observed *uOps Per Cycle* is expected to +approach 1.50 when the number of iterations tends to infinity. The delta between +the Dispatch Width (2.00), and the theoretical maximum uOp throughput (1.50) is +an indicator of a performance bottleneck caused by the lack of hardware +resources, and the *Resource pressure view* can help to identify the problematic +resource usage. The second section of the report shows the latency and reciprocal throughput of every instruction in the sequence. That section also reports diff --git a/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s index 48c475e3fba..3a0187fd132 100644 --- a/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s +++ b/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s @@ -6,7 +6,10 @@ # CHECK: Iterations: 600 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 603 -# CHECK-NEXT: Dispatch Width: 3 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s index e6d2f85026b..f24a20b5632 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s @@ -8,12 +8,17 @@ # ALL-NEXT: Instructions: 300 # M1-NEXT: Total Cycles: 76 -# M1-NEXT: Dispatch Width: 4 +# M3-NEXT: Total Cycles: 51 + +# ALL-NEXT: Total uOps: 300 + +# M1: Dispatch Width: 4 +# M1-NEXT: uOps Per Cycle: 3.95 # M1-NEXT: IPC: 3.95 # M1-NEXT: Block RThroughput: 0.3 -# M3-NEXT: Total Cycles: 51 -# M3-NEXT: Dispatch Width: 6 +# M3: Dispatch Width: 6 +# M3-NEXT: uOps Per Cycle: 5.88 # M3-NEXT: IPC: 5.88 # M3-NEXT: Block RThroughput: 0.2 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s index ec29879b356..9eb3f961dd6 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s @@ -14,7 +14,10 @@ ror x1, x2, x3 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 100 # CHECK-NEXT: Total Cycles: 28 -# CHECK-NEXT: Dispatch Width: 6 +# CHECK-NEXT: Total uOps: 100 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 3.57 # CHECK-NEXT: IPC: 3.57 # CHECK-NEXT: Block RThroughput: 0.3 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s index 19665085d1b..e0024612dec 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s @@ -7,13 +7,16 @@ # ALL: Iterations: 1 # ALL-NEXT: Instructions: 1 # ALL-NEXT: Total Cycles: 2 +# ALL-NEXT: Total uOps: 1 -# M1-NEXT: Dispatch Width: 4 -# M3-NEXT: Dispatch Width: 6 - -# ALL-NEXT: IPC: 0.50 - +# M1: Dispatch Width: 4 +# M1-NEXT: uOps Per Cycle: 0.50 +# M1-NEXT: IPC: 0.50 # M1-NEXT: Block RThroughput: 0.3 + +# M3: Dispatch Width: 6 +# M3-NEXT: uOps Per Cycle: 0.50 +# M3-NEXT: IPC: 0.50 # M3-NEXT: Block RThroughput: 0.2 # ALL: Schedulers - number of cycles where we saw N instructions issued: diff --git a/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s b/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s index 603cd52d043..1d7fad19092 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s +++ b/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s @@ -6,7 +6,10 @@ # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 8 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s b/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s index d33f95d5c7b..2bdfbca468c 100644 --- a/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s +++ b/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s @@ -6,7 +6,10 @@ vadd.f32 s0, s2, s2 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 100 # CHECK-NEXT: Total Cycles: 105 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 100 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.95 # CHECK-NEXT: IPC: 0.95 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s index 50ec8462bb9..59b20c755ae 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s @@ -8,7 +8,10 @@ add %eax, %edx # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 3000 # CHECK-NEXT: Total Cycles: 1506 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 3000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.99 # CHECK-NEXT: IPC: 1.99 # CHECK-NEXT: Block RThroughput: 1.5 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s index 60d22ea3a1b..4328056930d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s @@ -16,7 +16,10 @@ bsf %rax, %rcx # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 # CHECK-NEXT: Total Cycles: 704 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1200 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.70 # CHECK-NEXT: IPC: 0.57 # CHECK-NEXT: Block RThroughput: 6.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s index 6d8d8a9d0a1..649c8f982d5 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s @@ -34,7 +34,10 @@ vandps %xmm4, %xmm1, %xmm0 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1800 # CHECK-NEXT: Total Cycles: 3811 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 3400 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.89 # CHECK-NEXT: IPC: 0.47 # CHECK-NEXT: Block RThroughput: 38.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s index 001fb8ed661..7fd97d32d0f 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s @@ -12,7 +12,10 @@ cmovae %ebx, %eax # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 3000 # CHECK-NEXT: Total Cycles: 1504 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 3000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.99 # CHECK-NEXT: IPC: 1.99 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s index 04007f24e42..aed7d75ffdc 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s @@ -15,7 +15,10 @@ vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 6000 # CHECK-NEXT: Total Cycles: 3003 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 6000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 2.00 # CHECK-NEXT: IPC: 2.00 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s index 9ab8d039ccd..ef6faa58eba 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s @@ -16,7 +16,10 @@ vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 6000 # CHECK-NEXT: Total Cycles: 3001 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 6000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 2.00 # CHECK-NEXT: IPC: 2.00 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s index 34cabda553f..b2bd7169c51 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s @@ -13,7 +13,10 @@ sbb %eax, %eax # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 3000 # CHECK-NEXT: Total Cycles: 3003 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 3000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s index da94624fd55..e121941298a 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s @@ -14,7 +14,10 @@ sbb %eax, %eax # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 3007 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 6000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 2.00 # CHECK-NEXT: IPC: 1.50 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s index 8fa166374c7..ca69339467d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s @@ -8,7 +8,10 @@ vpaddd %xmm0, %xmm0, %xmm3 # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1500 # CHECK-NEXT: Total Cycles: 1504 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 1.5 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s index 9dee1e57c21..643e456450c 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s @@ -8,7 +8,10 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Iterations: 300 # CHECK-NEXT: Instructions: 900 # CHECK-NEXT: Total Cycles: 610 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 900 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.48 # CHECK-NEXT: IPC: 1.48 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s index 42d1f15df47..87862a6e5a3 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s @@ -7,7 +7,10 @@ vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 11 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.18 # CHECK-NEXT: IPC: 0.18 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s index a538164afee..80d5109d07e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s @@ -7,7 +7,10 @@ vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 12 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.17 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s index 769e19cb97a..fa19b55c1d1 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s @@ -14,7 +14,11 @@ vhaddps %xmm3, %xmm3, %xmm4 # ENABLED: Iterations: 100 # ENABLED-NEXT: Instructions: 300 # ENABLED-NEXT: Total Cycles: 209 -# ENABLED-NEXT: Dispatch Width: 2 +# ENABLED-NEXT: Total uOps: 300 + + +# ENABLED: Dispatch Width: 2 +# ENABLED-NEXT: uOps Per Cycle: 1.44 # ENABLED-NEXT: IPC: 1.44 # ENABLED-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s index 4733357e87e..2eee80e917e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s @@ -13,7 +13,10 @@ vmovaps %xmm0, 48(%rdi) # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 # CHECK-NEXT: Total Cycles: 2403 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 800 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.33 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 4.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s index 5d394c6691c..8cdba9acc6c 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s @@ -13,7 +13,10 @@ vmovaps %xmm0, 48(%rdi) # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 # CHECK-NEXT: Total Cycles: 408 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 800 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.96 # CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 4.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s index 3c20002d9c8..c45e86ab1eb 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s @@ -30,7 +30,10 @@ vpcmpeqw %xmm3, %xmm3, %xmm5 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1500 # CHECK-NEXT: Total Cycles: 753 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.99 # CHECK-NEXT: IPC: 1.99 # CHECK-NEXT: Block RThroughput: 7.5 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s index fb6eb234801..657eb9737d9 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s @@ -8,7 +8,10 @@ add %ecx, %ebx # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 11 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 4.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s index 0a9ffca64a5..f5ad0be5610 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s @@ -13,7 +13,10 @@ xor %bx, %dx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 4503 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 4500 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 1.5 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s index cda173e5266..ad660a2189b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s @@ -13,7 +13,10 @@ add %cx, %bx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 7503 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 6000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.80 # CHECK-NEXT: IPC: 0.60 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s index 7669621d7af..2d1397c101f 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s @@ -8,7 +8,10 @@ lzcnt %ax, %bx ## partial register stall. # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 1500 # CHECK-NEXT: Total Cycles: 1503 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s index 939af6d06c8..9843a7cf398 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s @@ -14,7 +14,10 @@ lzcnt 2(%rsp), %cx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 7504 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 6000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.80 # CHECK-NEXT: IPC: 0.60 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s index 4833077ba8b..6bc7e75f38c 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s @@ -8,7 +8,10 @@ add %ecx, %ebx # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 8 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.50 # CHECK-NEXT: IPC: 0.38 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index 54b8d132bd3..6bf375be02b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -20,7 +20,10 @@ vsqrtps %ymm0, %ymm2 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 # CHECK-NEXT: Total Cycles: 6306 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1200 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.19 # CHECK-NEXT: IPC: 0.13 # CHECK-NEXT: Block RThroughput: 63.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s index 5ac79011f18..fa501b58ee4 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s @@ -7,7 +7,10 @@ stmxcsr (%rsp) # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 4 # CHECK-NEXT: Total Cycles: 205 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.02 # CHECK-NEXT: IPC: 0.02 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s index f83961e0799..380f8ccecb9 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s @@ -13,7 +13,10 @@ add %ebx, %eax # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 # CHECK-NEXT: Total Cycles: 503 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 800 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.59 # CHECK-NEXT: IPC: 1.59 # CHECK-NEXT: Block RThroughput: 4.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s index af39e59ae40..4a1f8706d96 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s @@ -21,7 +21,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 16 # CHECK-NEXT: Total Cycles: 31 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 16 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.52 # CHECK-NEXT: IPC: 0.52 # CHECK-NEXT: Block RThroughput: 21.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s index 7bd9b8951f9..3896967c585 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s @@ -11,7 +11,10 @@ vmulps (%rdi), %xmm1, %xmm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.20 # CHECK-NEXT: IPC: 0.20 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s index 7cbc24baf3f..99ef892d269 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s @@ -10,7 +10,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.40 # CHECK-NEXT: IPC: 0.20 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s index 722170ab7af..ef8d50aab1e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s @@ -8,7 +8,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 7 -# CHECK-NEXT: Dispatch Width: 3 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 0.43 # CHECK-NEXT: IPC: 0.43 # CHECK-NEXT: Block RThroughput: 1.5 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s index dc3e9470d8f..0f95d5ceab9 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s @@ -7,7 +7,10 @@ vmulps %xmm0, %xmm0, %xmm0 # CHECK: Iterations: 5 # CHECK-NEXT: Instructions: 10 # CHECK-NEXT: Total Cycles: 28 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 10 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s index 722d4763e06..b68ed9ce62b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s @@ -7,7 +7,10 @@ vmulps %xmm0, %xmm0, %xmm0 # CHECK: Iterations: 5 # CHECK-NEXT: Instructions: 10 # CHECK-NEXT: Total Cycles: 28 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 10 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s index ff48db94855..12aeed7acb2 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s @@ -6,7 +6,10 @@ idiv %eax # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 55 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.07 # CHECK-NEXT: IPC: 0.04 # CHECK-NEXT: Block RThroughput: 25.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s index c9057f53066..d67d5e456c3 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s @@ -6,7 +6,10 @@ idiv %eax # CHECK: Iterations: 22 # CHECK-NEXT: Instructions: 22 # CHECK-NEXT: Total Cycles: 553 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 44 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.08 # CHECK-NEXT: IPC: 0.04 # CHECK-NEXT: Block RThroughput: 25.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s index d0b1bf11116..3d09bc788bd 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s @@ -38,7 +38,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 33 # CHECK-NEXT: Total Cycles: 69 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 66 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.96 # CHECK-NEXT: IPC: 0.48 # CHECK-NEXT: Block RThroughput: 64.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s index 32b73f37f78..58c4b4476f5 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s @@ -7,7 +7,10 @@ add %rsi, %rsi # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.20 # CHECK-NEXT: IPC: 0.20 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s index d34db61bcc8..d1285441de5 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s @@ -6,7 +6,10 @@ add %edi, %eax # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 100 # CHECK-NEXT: Total Cycles: 103 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 100 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.97 # CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s index 33b2a4d87e8..57f07e1e8a8 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s @@ -7,7 +7,10 @@ vandps (%rdi), %xmm1, %xmm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 9 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.22 # CHECK-NEXT: IPC: 0.22 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s index 12e170d4019..5650a8ba15c 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s @@ -7,7 +7,10 @@ vandps (%rdi), %ymm1, %ymm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 10 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.40 # CHECK-NEXT: IPC: 0.20 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s index 8a11db9e4fb..110c3c23532 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s @@ -71,7 +71,10 @@ vpxor %xmm3, %xmm3, %xmm5 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 55 # CHECK-NEXT: Total Cycles: 29 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 55 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.90 # CHECK-NEXT: IPC: 1.90 # CHECK-NEXT: Block RThroughput: 27.5 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s index 1f543a566c8..75dfe1f1753 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s @@ -11,7 +11,10 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 318 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.89 # CHECK-NEXT: IPC: 1.89 # CHECK-NEXT: Block RThroughput: 3.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s index be18513fe95..ce578c3ae7b 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s @@ -11,7 +11,10 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 318 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.89 # CHECK-NEXT: IPC: 1.89 # CHECK-NEXT: Block RThroughput: 3.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s index 3fe5eb50282..d0cf359379d 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s @@ -11,7 +11,10 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 318 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.89 # CHECK-NEXT: IPC: 1.89 # CHECK-NEXT: Block RThroughput: 3.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s index a3783fa279f..d7d99861cfb 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s @@ -11,7 +11,10 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 318 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.89 # CHECK-NEXT: IPC: 1.89 # CHECK-NEXT: Block RThroughput: 3.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s index d8718c527f1..ba59a86a048 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s @@ -11,7 +11,10 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 # CHECK-NEXT: Total Cycles: 316 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.90 # CHECK-NEXT: IPC: 1.90 # CHECK-NEXT: Block RThroughput: 3.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s index dcb1bbd4bd3..019f84fe542 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s @@ -8,7 +8,10 @@ add %ecx, %ebx # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 9 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 0.44 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s index 3122d034652..abe923b5906 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s @@ -11,7 +11,10 @@ xor %bx, %dx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 4503 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 4500 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 0.8 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s index 8a7a1fdfdd6..8202a6057c8 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s @@ -11,7 +11,10 @@ add %cx, %bx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 7503 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 4500 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 0.60 # CHECK-NEXT: IPC: 0.60 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s index 596b3309a1e..cae250315db 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s @@ -6,7 +6,10 @@ lzcnt %ax, %bx ## partial register stall. # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 1500 # CHECK-NEXT: Total Cycles: 1504 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 1.00 # CHECK-NEXT: Block RThroughput: 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s index 0f4494165ad..69637932c31 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s @@ -13,7 +13,10 @@ lzcnt 2(%rsp), %cx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 10503 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 7500 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 0.71 # CHECK-NEXT: IPC: 0.43 # CHECK-NEXT: Block RThroughput: 1.3 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s index 5be7b423241..a7e3860e145 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s @@ -12,7 +12,10 @@ addq %rcx, %rdx # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 9 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 0.44 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s index 9a7b43fca57..f7a85399e5e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s @@ -8,7 +8,10 @@ add %ecx, %ebx # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 8 -# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 0.38 # CHECK-NEXT: IPC: 0.38 # CHECK-NEXT: Block RThroughput: 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s index 8c2e6614129..da4699f4615 100644 --- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s @@ -12,30 +12,45 @@ bextrl %esi, (%rdi), %eax # ALL-NEXT: Instructions: 2 # BDWELL-NEXT: Total Cycles: 10 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.20 -# BDWELL-NEXT: Block RThroughput: 1.0 +# BDWELL-NEXT: Total uOps: 4 # BTVER2-NEXT: Total Cycles: 7 -# BTVER2-NEXT: Dispatch Width: 2 -# BTVER2-NEXT: IPC: 0.29 -# BTVER2-NEXT: Block RThroughput: 1.0 +# BTVER2-NEXT: Total uOps: 2 # HASWELL-NEXT: Total Cycles: 10 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.20 -# HASWELL-NEXT: Block RThroughput: 1.0 +# HASWELL-NEXT: Total uOps: 4 # SKYLAKE-NEXT: Total Cycles: 10 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.20 -# SKYLAKE-NEXT: Block RThroughput: 0.7 +# SKYLAKE-NEXT: Total uOps: 4 # ZNVER1-NEXT: Total Cycles: 8 -# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: Total uOps: 3 + +# BTVER2: Dispatch Width: 2 +# BTVER2-NEXT: uOps Per Cycle: 0.29 +# BTVER2-NEXT: IPC: 0.29 +# BTVER2-NEXT: Block RThroughput: 1.0 + +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.38 # ZNVER1-NEXT: IPC: 0.25 # ZNVER1-NEXT: Block RThroughput: 0.8 +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.40 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 1.0 + +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.40 +# HASWELL-NEXT: IPC: 0.20 +# HASWELL-NEXT: Block RThroughput: 1.0 + +# SKYLAKE: Dispatch Width: 6 +# SKYLAKE-NEXT: uOps Per Cycle: 0.40 +# SKYLAKE-NEXT: IPC: 0.20 +# SKYLAKE-NEXT: Block RThroughput: 0.7 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency diff --git a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s index 86ed81f5c02..a9c7c8b8564 100644 --- a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s @@ -11,25 +11,32 @@ bzhil %esi, (%rdi), %eax # ALL-NEXT: Instructions: 2 # BDWELL-NEXT: Total Cycles: 9 -# BDWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: Total Cycles: 9 +# SKYLAKE-NEXT: Total Cycles: 9 +# ZNVER1-NEXT: Total Cycles: 8 + +# ALL-NEXT: Total uOps: 3 + +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.33 # BDWELL-NEXT: IPC: 0.22 # BDWELL-NEXT: Block RThroughput: 0.8 -# HASWELL-NEXT: Total Cycles: 9 -# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.33 # HASWELL-NEXT: IPC: 0.22 # HASWELL-NEXT: Block RThroughput: 0.8 -# SKYLAKE-NEXT: Total Cycles: 9 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.22 -# SKYLAKE-NEXT: Block RThroughput: 0.5 - -# ZNVER1-NEXT: Total Cycles: 8 -# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.38 # ZNVER1-NEXT: IPC: 0.25 # ZNVER1-NEXT: Block RThroughput: 0.8 +# SKYLAKE: Dispatch Width: 6 +# SKYLAKE-NEXT: uOps Per Cycle: 0.33 +# SKYLAKE-NEXT: IPC: 0.22 +# SKYLAKE-NEXT: Block RThroughput: 0.5 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s index e0822d8c2bc..6a4bcff687c 100644 --- a/llvm/test/tools/llvm-mca/X86/cpus.s +++ b/llvm/test/tools/llvm-mca/X86/cpus.s @@ -15,27 +15,54 @@ add %edi, %eax # ALL: Iterations: 100 # ALL-NEXT: Instructions: 100 # ALL-NEXT: Total Cycles: 103 +# ALL-NEXT: Total uOps: 100 -# BROADWELL-NEXT: Dispatch Width: 4 -# BTVER2-NEXT: Dispatch Width: 2 -# HASWELL-NEXT: Dispatch Width: 4 -# IVYBRIDGE-NEXT: Dispatch Width: 4 -# KNL-NEXT: Dispatch Width: 4 -# SANDYBRIDGE-NEXT: Dispatch Width: 4 -# SKX-NEXT: Dispatch Width: 6 -# SKX-AVX512-NEXT: Dispatch Width: 6 -# SLM-NEXT: Dispatch Width: 2 -# ZNVER1-NEXT: Dispatch Width: 4 +# BTVER2: Dispatch Width: 2 +# BTVER2-NEXT: uOps Per Cycle: 0.97 +# BTVER2-NEXT: IPC: 0.97 +# BTVER2-NEXT: Block RThroughput: 0.5 -# ALL-NEXT: IPC: 0.97 +# SLM: Dispatch Width: 2 +# SLM-NEXT: uOps Per Cycle: 0.97 +# SLM-NEXT: IPC: 0.97 +# SLM-NEXT: Block RThroughput: 0.5 +# BROADWELL: Dispatch Width: 4 +# BROADWELL-NEXT: uOps Per Cycle: 0.97 +# BROADWELL-NEXT: IPC: 0.97 # BROADWELL-NEXT: Block RThroughput: 0.3 -# BTVER2-NEXT: Block RThroughput: 0.5 + +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.97 +# HASWELL-NEXT: IPC: 0.97 # HASWELL-NEXT: Block RThroughput: 0.3 + +# IVYBRIDGE: Dispatch Width: 4 +# IVYBRIDGE-NEXT: uOps Per Cycle: 0.97 +# IVYBRIDGE-NEXT: IPC: 0.97 # IVYBRIDGE-NEXT: Block RThroughput: 0.3 + +# KNL: Dispatch Width: 4 +# KNL-NEXT: uOps Per Cycle: 0.97 +# KNL-NEXT: IPC: 0.97 # KNL-NEXT: Block RThroughput: 0.3 + +# SANDYBRIDGE: Dispatch Width: 4 +# SANDYBRIDGE-NEXT: uOps Per Cycle: 0.97 +# SANDYBRIDGE-NEXT: IPC: 0.97 # SANDYBRIDGE-NEXT: Block RThroughput: 0.3 + +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.97 +# ZNVER1-NEXT: IPC: 0.97 +# ZNVER1-NEXT: Block RThroughput: 0.3 + +# SKX: Dispatch Width: 6 +# SKX-NEXT: uOps Per Cycle: 0.97 +# SKX-NEXT: IPC: 0.97 # SKX-NEXT: Block RThroughput: 0.3 + +# SKX-AVX512: Dispatch Width: 6 +# SKX-AVX512-NEXT: uOps Per Cycle: 0.97 +# SKX-AVX512-NEXT: IPC: 0.97 # SKX-AVX512-NEXT: Block RThroughput: 0.3 -# SLM-NEXT: Block RThroughput: 0.5 -# ZNVER1-NEXT: Block RThroughput: 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/default-iterations.s b/llvm/test/tools/llvm-mca/X86/default-iterations.s index 7b89266eaf3..ee1b6169c65 100644 --- a/llvm/test/tools/llvm-mca/X86/default-iterations.s +++ b/llvm/test/tools/llvm-mca/X86/default-iterations.s @@ -8,16 +8,22 @@ add %eax, %eax # CUSTOM: Iterations: 1 # CUSTOM-NEXT: Instructions: 1 # CUSTOM-NEXT: Total Cycles: 4 -# CUSTOM-NEXT: Dispatch Width: 2 -# CUSTOM-NEXT: IPC: 0.25 -# CUSTOM-NEXT: Block RThroughput: 0.5 +# CUSTOM-NEXT: Total uOps: 1 # DEFAULT: Iterations: 100 # DEFAULT-NEXT: Instructions: 100 # DEFAULT-NEXT: Total Cycles: 103 -# DEFAULT-NEXT: Dispatch Width: 2 +# DEFAULT-NEXT: Total uOps: 100 + +# ALL: Dispatch Width: 2 + +# CUSTOM-NEXT: uOps Per Cycle: 0.25 +# CUSTOM-NEXT: IPC: 0.25 + +# DEFAULT-NEXT: uOps Per Cycle: 0.97 # DEFAULT-NEXT: IPC: 0.97 -# DEFAULT-NEXT: Block RThroughput: 0.5 + +# ALL-NEXT: Block RThroughput: 0.5 # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps diff --git a/llvm/test/tools/llvm-mca/X86/dispatch_width.s b/llvm/test/tools/llvm-mca/X86/dispatch_width.s index b6d5792c18c..baaad160f13 100644 --- a/llvm/test/tools/llvm-mca/X86/dispatch_width.s +++ b/llvm/test/tools/llvm-mca/X86/dispatch_width.s @@ -8,11 +8,14 @@ add %eax, %eax # ALL: Iterations: 100 # ALL-NEXT: Instructions: 100 # ALL-NEXT: Total Cycles: 103 +# ALL-NEXT: Total uOps: 100 -# CUSTOM-NEXT: Dispatch Width: 1 -# DEFAULT-NEXT: Dispatch Width: 2 - -# ALL-NEXT: IPC: 0.97 - +# CUSTOM: Dispatch Width: 1 +# CUSTOM-NEXT: uOps Per Cycle: 0.97 +# CUSTOM-NEXT: IPC: 0.97 # CUSTOM-NEXT: Block RThroughput: 1.0 + +# DEFAULT: Dispatch Width: 2 +# DEFAULT-NEXT: uOps Per Cycle: 0.97 +# DEFAULT-NEXT: IPC: 0.97 # DEFAULT-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s index 3df2c388d91..679e8a3b598 100644 --- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s @@ -14,22 +14,36 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ALL-NEXT: Instructions: 2 # BDWELL-NEXT: Total Cycles: 13 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.15 +# BDWELL-NEXT: Total uOps: 3 # HASWELL-NEXT: Total Cycles: 14 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.14 +# HASWELL-NEXT: Total uOps: 3 # SKYLAKE-NEXT: Total Cycles: 13 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.15 +# SKYLAKE-NEXT: Total uOps: 3 # ZNVER1-NEXT: Total Cycles: 15 -# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: Total uOps: 2 + +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.13 # ZNVER1-NEXT: IPC: 0.13 +# ZNVER1-NEXT: Block RThroughput: 1.0 -# ALL-NEXT: Block RThroughput: 1.0 +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.21 +# HASWELL-NEXT: IPC: 0.14 +# HASWELL-NEXT: Block RThroughput: 1.0 + +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.23 +# BDWELL-NEXT: IPC: 0.15 +# BDWELL-NEXT: Block RThroughput: 1.0 + +# SKYLAKE: Dispatch Width: 6 +# SKYLAKE-NEXT: uOps Per Cycle: 0.23 +# SKYLAKE-NEXT: IPC: 0.15 +# SKYLAKE-NEXT: Block RThroughput: 1.0 # ALL: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s index 3ff6906082f..698aba487df 100644 --- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s @@ -14,22 +14,36 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ALL-NEXT: Instructions: 2 # BDWELL-NEXT: Total Cycles: 13 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.15 +# BDWELL-NEXT: Total uOps: 3 # HASWELL-NEXT: Total Cycles: 14 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.14 +# HASWELL-NEXT: Total uOps: 3 # SKYLAKE-NEXT: Total Cycles: 13 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.15 +# SKYLAKE-NEXT: Total uOps: 3 # ZNVER1-NEXT: Total Cycles: 15 -# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: Total uOps: 2 + +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.13 # ZNVER1-NEXT: IPC: 0.13 +# ZNVER1-NEXT: Block RThroughput: 1.0 -# ALL-NEXT: Block RThroughput: 1.0 +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.21 +# HASWELL-NEXT: IPC: 0.14 +# HASWELL-NEXT: Block RThroughput: 1.0 + +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.23 +# BDWELL-NEXT: IPC: 0.15 +# BDWELL-NEXT: Block RThroughput: 1.0 + +# SKYLAKE: Dispatch Width: 6 +# SKYLAKE-NEXT: uOps Per Cycle: 0.23 +# SKYLAKE-NEXT: IPC: 0.15 +# SKYLAKE-NEXT: Block RThroughput: 1.0 # ALL: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/intel-syntax.s b/llvm/test/tools/llvm-mca/X86/intel-syntax.s index 7cf2c13f897..1aaa3902866 100644 --- a/llvm/test/tools/llvm-mca/X86/intel-syntax.s +++ b/llvm/test/tools/llvm-mca/X86/intel-syntax.s @@ -12,7 +12,10 @@ # ALL: Iterations: 100 # ALL-NEXT: Instructions: 400 # ALL-NEXT: Total Cycles: 305 -# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: Total uOps: 500 + +# ALL: Dispatch Width: 2 +# ALL-NEXT: uOps Per Cycle: 1.64 # ALL-NEXT: IPC: 1.31 # ALL-NEXT: Block RThroughput: 2.5 diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s index 108067e391f..21d1030bd62 100644 --- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s @@ -10,7 +10,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 1 # CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s index ab70a6175e2..efbb157f9da 100644 --- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s @@ -10,7 +10,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 1 # CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s index 09eadea4b3c..f7a58ff3853 100644 --- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s @@ -14,7 +14,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 1 # CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s index 21ef65c48cd..f562dbe38a1 100644 --- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s @@ -18,7 +18,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 1 # CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 0.5 @@ -38,7 +41,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 1 # CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 0.5 @@ -58,7 +64,10 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 1 # CHECK-NEXT: Total Cycles: 4 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 1 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s index 48a5e03d61b..5763caaf985 100644 --- a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s +++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s @@ -9,7 +9,10 @@ add %eax, %eax # ALL: Iterations: 100 # ALL-NEXT: Instructions: 100 # ALL-NEXT: Total Cycles: 103 -# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: Total uOps: 100 + +# ALL: Dispatch Width: 2 +# ALL-NEXT: uOps Per Cycle: 0.97 # ALL-NEXT: IPC: 0.97 # ALL-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s index d5cc3c884fc..3e8c8bece2a 100644 --- a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s +++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s @@ -10,7 +10,10 @@ add %eax, %eax # ALL: Iterations: 100 # ALL-NEXT: Instructions: 100 # ALL-NEXT: Total Cycles: 103 -# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: Total uOps: 100 + +# ALL: Dispatch Width: 2 +# ALL-NEXT: uOps Per Cycle: 0.97 # ALL-NEXT: IPC: 0.97 # ALL-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s index aa9561e0649..8950014f6b9 100644 --- a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s +++ b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s @@ -11,7 +11,10 @@ add %eax, %eax # DEFAULTREPORT: Iterations: 100 # DEFAULTREPORT-NEXT: Instructions: 100 # DEFAULTREPORT-NEXT: Total Cycles: 103 -# DEFAULTREPORT-NEXT: Dispatch Width: 2 +# DEFAULTREPORT-NEXT: Total uOps: 100 + +# DEFAULTREPORT: Dispatch Width: 2 +# DEFAULTREPORT-NEXT: uOps Per Cycle: 0.97 # DEFAULTREPORT-NEXT: IPC: 0.97 # DEFAULTREPORT-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s index 076c30a8e17..30c194777e1 100644 --- a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s +++ b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s @@ -10,7 +10,10 @@ add %eax, %eax # ALL: Iterations: 100 # ALL-NEXT: Instructions: 100 # ALL-NEXT: Total Cycles: 103 -# ALL-NEXT: Dispatch Width: 2 +# ALL-NEXT: Total uOps: 100 + +# ALL: Dispatch Width: 2 +# ALL-NEXT: uOps Per Cycle: 0.97 # ALL-NEXT: IPC: 0.97 # ALL-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s b/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s index 0f94f552152..8b9f229ce83 100644 --- a/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s +++ b/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s @@ -8,7 +8,10 @@ add %edi, %eax # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 100 # CHECK-NEXT: Total Cycles: 103 -# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: Total uOps: 100 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.97 # CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s index 10fe5142e23..a4d9d1b42e7 100644 --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s @@ -20,40 +20,61 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ALL-NEXT: Instructions: 2 # BDWELL-NEXT: Total Cycles: 10 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.20 -# BDWELL-NEXT: Block RThroughput: 2.0 +# BDWELL-NEXT: Total uOps: 4 # BTVER2-NEXT: Total Cycles: 11 -# BTVER2-NEXT: Dispatch Width: 2 -# BTVER2-NEXT: IPC: 0.18 -# BTVER2-NEXT: Block RThroughput: 2.0 +# BTVER2-NEXT: Total uOps: 4 # HASWELL-NEXT: Total Cycles: 11 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.18 -# HASWELL-NEXT: Block RThroughput: 2.0 +# HASWELL-NEXT: Total uOps: 4 # IVY-NEXT: Total Cycles: 11 -# IVY-NEXT: Dispatch Width: 4 -# IVY-NEXT: IPC: 0.18 -# IVY-NEXT: Block RThroughput: 1.0 +# IVY-NEXT: Total uOps: 4 # SANDY-NEXT: Total Cycles: 11 -# SANDY-NEXT: Dispatch Width: 4 -# SANDY-NEXT: IPC: 0.18 -# SANDY-NEXT: Block RThroughput: 1.0 +# SANDY-NEXT: Total uOps: 4 # SKYLAKE-NEXT: Total Cycles: 11 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.18 -# SKYLAKE-NEXT: Block RThroughput: 0.7 +# SKYLAKE-NEXT: Total uOps: 4 # ZNVER1-NEXT: Total Cycles: 11 -# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: Total uOps: 2 + +# BTVER2: Dispatch Width: 2 +# BTVER2-NEXT: uOps Per Cycle: 0.36 +# BTVER2-NEXT: IPC: 0.18 +# BTVER2-NEXT: Block RThroughput: 2.0 + +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.18 # ZNVER1-NEXT: IPC: 0.18 # ZNVER1-NEXT: Block RThroughput: 1.0 +# IVY: Dispatch Width: 4 +# IVY-NEXT: uOps Per Cycle: 0.36 +# IVY-NEXT: IPC: 0.18 +# IVY-NEXT: Block RThroughput: 1.0 + +# SANDY: Dispatch Width: 4 +# SANDY-NEXT: uOps Per Cycle: 0.36 +# SANDY-NEXT: IPC: 0.18 +# SANDY-NEXT: Block RThroughput: 1.0 + +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.36 +# HASWELL-NEXT: IPC: 0.18 +# HASWELL-NEXT: Block RThroughput: 2.0 + +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.40 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 2.0 + +# SKYLAKE: Dispatch Width: 6 +# SKYLAKE-NEXT: uOps Per Cycle: 0.36 +# SKYLAKE-NEXT: IPC: 0.18 +# SKYLAKE-NEXT: Block RThroughput: 0.7 + # BTVER2: Timeline view: # BTVER2-NEXT: 0 # BTVER2-NEXT: Index 0123456789 diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s index 8b145e3a723..dd4dd773958 100644 --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s @@ -20,40 +20,61 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ALL-NEXT: Instructions: 2 # BDWELL-NEXT: Total Cycles: 10 -# BDWELL-NEXT: Dispatch Width: 4 -# BDWELL-NEXT: IPC: 0.20 -# BDWELL-NEXT: Block RThroughput: 2.0 +# BDWELL-NEXT: Total uOps: 4 # BTVER2-NEXT: Total Cycles: 11 -# BTVER2-NEXT: Dispatch Width: 2 -# BTVER2-NEXT: IPC: 0.18 -# BTVER2-NEXT: Block RThroughput: 2.0 +# BTVER2-NEXT: Total uOps: 4 # HASWELL-NEXT: Total Cycles: 11 -# HASWELL-NEXT: Dispatch Width: 4 -# HASWELL-NEXT: IPC: 0.18 -# HASWELL-NEXT: Block RThroughput: 2.0 +# HASWELL-NEXT: Total uOps: 4 # IVY-NEXT: Total Cycles: 11 -# IVY-NEXT: Dispatch Width: 4 -# IVY-NEXT: IPC: 0.18 -# IVY-NEXT: Block RThroughput: 1.0 +# IVY-NEXT: Total uOps: 4 # SANDY-NEXT: Total Cycles: 11 -# SANDY-NEXT: Dispatch Width: 4 -# SANDY-NEXT: IPC: 0.18 -# SANDY-NEXT: Block RThroughput: 1.0 +# SANDY-NEXT: Total uOps: 4 # SKYLAKE-NEXT: Total Cycles: 11 -# SKYLAKE-NEXT: Dispatch Width: 6 -# SKYLAKE-NEXT: IPC: 0.18 -# SKYLAKE-NEXT: Block RThroughput: 0.7 +# SKYLAKE-NEXT: Total uOps: 4 # ZNVER1-NEXT: Total Cycles: 11 -# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: Total uOps: 2 + +# BTVER2: Dispatch Width: 2 +# BTVER2-NEXT: uOps Per Cycle: 0.36 +# BTVER2-NEXT: IPC: 0.18 +# BTVER2-NEXT: Block RThroughput: 2.0 + +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.18 # ZNVER1-NEXT: IPC: 0.18 # ZNVER1-NEXT: Block RThroughput: 1.0 +# IVY: Dispatch Width: 4 +# IVY-NEXT: uOps Per Cycle: 0.36 +# IVY-NEXT: IPC: 0.18 +# IVY-NEXT: Block RThroughput: 1.0 + +# SANDY: Dispatch Width: 4 +# SANDY-NEXT: uOps Per Cycle: 0.36 +# SANDY-NEXT: IPC: 0.18 +# SANDY-NEXT: Block RThroughput: 1.0 + +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.36 +# HASWELL-NEXT: IPC: 0.18 +# HASWELL-NEXT: Block RThroughput: 2.0 + +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.40 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 2.0 + +# SKYLAKE: Dispatch Width: 6 +# SKYLAKE-NEXT: uOps Per Cycle: 0.36 +# SKYLAKE-NEXT: IPC: 0.18 +# SKYLAKE-NEXT: Block RThroughput: 0.7 + # BTVER2: Timeline view: # BTVER2-NEXT: 0 # BTVER2-NEXT: Index 0123456789 diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp index 4a147bb6bca..026742ad294 100644 --- a/llvm/tools/llvm-mca/Views/SummaryView.cpp +++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp @@ -63,7 +63,9 @@ void SummaryView::printView(raw_ostream &OS) const { unsigned Iterations = Source.getNumIterations(); unsigned Instructions = Source.size(); unsigned TotalInstructions = Instructions * Iterations; + unsigned TotalUOps = NumMicroOps * Iterations; double IPC = (double)TotalInstructions / TotalCycles; + double UOpsPerCycle = (double)TotalUOps / TotalCycles; double BlockRThroughput = computeBlockRThroughput( SM, DispatchWidth, NumMicroOps, ProcResourceUsage); @@ -72,10 +74,12 @@ void SummaryView::printView(raw_ostream &OS) const { TempStream << "Iterations: " << Iterations; TempStream << "\nInstructions: " << TotalInstructions; TempStream << "\nTotal Cycles: " << TotalCycles; + TempStream << "\nTotal uOps: " << TotalUOps << '\n'; TempStream << "\nDispatch Width: " << DispatchWidth; - TempStream << "\nIPC: " << format("%.2f", IPC); - - // Round to the block reciprocal throughput to the nearest tenth. + TempStream << "\nuOps Per Cycle: " + << format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100); + TempStream << "\nIPC: " + << format("%.2f", floor((IPC * 100) + 0.5) / 100); TempStream << "\nBlock RThroughput: " << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10) << '\n'; |