summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-08-29 17:56:39 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-08-29 17:56:39 +0000
commita2eee4745083e805a7c869d250d5c787b827b75b (patch)
treedb1fe3376f305f3cd8b6164a6428a6495118b161
parent5221e17fd62b465e94631bf522d6789dd13d5854 (diff)
downloadbcm5719-llvm-a2eee4745083e805a7c869d250d5c787b827b75b.tar.gz
bcm5719-llvm-a2eee4745083e805a7c869d250d5c787b827b75b.zip
[llvm-mca] Add fields "Total uOps" and "uOps Per Cycle" to the report generated by the SummaryView.
This patch adds two new fields to the perf report generated by the SummaryView. Fields are now logically organized into two small groups; only the second group contains throughput indicators. Example: ``` Iterations: 100 Instructions: 300 Total Cycles: 414 Total uOps: 700 Dispatch Width: 4 uOps Per Cycle: 1.69 IPC: 0.72 Block RThroughput: 4.0 ``` This patch also updates the docs for llvm-mca. Due to the nature of this change, several tests in the tools/llvm-mca directory were affected, and had to be updated using script `update_mca_test_checks.py`. llvm-svn: 340946
-rw-r--r--llvm/docs/CommandGuide/llvm-mca.rst53
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s5
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s11
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s5
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s13
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s5
-rw-r--r--llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s6
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/rank.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s41
-rw-r--r--llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s27
-rw-r--r--llvm/test/tools/llvm-mca/X86/cpus.s55
-rw-r--r--llvm/test/tools/llvm-mca/X86/default-iterations.s16
-rw-r--r--llvm/test/tools/llvm-mca/X86/dispatch_width.s13
-rw-r--r--llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s30
-rw-r--r--llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s30
-rw-r--r--llvm/test/tools/llvm-mca/X86/intel-syntax.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s15
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-stats-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-stats-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-views-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-views-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-no-stats-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s59
-rw-r--r--llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s59
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.cpp10
78 files changed, 554 insertions, 199 deletions
diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst
index 5dcd97fb113..43e64c329c9 100644
--- a/llvm/docs/CommandGuide/llvm-mca.rst
+++ b/llvm/docs/CommandGuide/llvm-mca.rst
@@ -238,7 +238,10 @@ the following command using the example located at
Iterations: 300
Instructions: 900
Total Cycles: 610
+ Total uOps: 900
+
Dispatch Width: 2
+ uOps Per Cycle: 1.48
IPC: 1.48
Block RThroughput: 2.0
@@ -285,35 +288,45 @@ the following command using the example located at
- - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4
According to this report, the dot-product kernel has been executed 300 times,
-for a total of 900 dynamically executed instructions.
+for a total of 900 simulated instructions. The total number of simulated micro
+opcodes (uOps) is also 900.
The report is structured in three main sections. The first section collects a
few performance numbers; the goal of this section is to give a very quick
-overview of the performance throughput. In this example, the two important
-performance indicators are **IPC** and **Block RThroughput** (Block Reciprocal
+overview of the performance throughput. Important performance indicators are
+**IPC**, **uOps Per Cycle**, and **Block RThroughput** (Block Reciprocal
Throughput).
IPC is computed dividing the total number of simulated instructions by the total
-number of cycles. A delta between Dispatch Width and IPC is an indicator of a
-performance issue. In the absence of loop-carried data dependencies, the
+number of cycles. In the absence of loop-carried data dependencies, the
observed IPC tends to a theoretical maximum which can be computed by dividing
the number of instructions of a single iteration by the *Block RThroughput*.
-IPC is bounded from above by the dispatch width. That is because the dispatch
-width limits the maximum size of a dispatch group. IPC is also limited by the
-amount of hardware parallelism. The availability of hardware resources affects
-the resource pressure distribution, and it limits the number of instructions
-that can be executed in parallel every cycle. A delta between Dispatch
-Width and the theoretical maximum IPC is an indicator of a performance
-bottleneck caused by the lack of hardware resources. In general, the lower the
-Block RThroughput, the better.
-
-In this example, ``Instructions per iteration/Block RThroughput`` is 1.50. Since
-there are no loop-carried dependencies, the observed IPC is expected to approach
-1.50 when the number of iterations tends to infinity. The delta between the
-Dispatch Width (2.00), and the theoretical maximum IPC (1.50) is an indicator of
-a performance bottleneck caused by the lack of hardware resources, and the
-*Resource pressure view* can help to identify the problematic resource usage.
+Field 'uOps Per Cycle' is computed dividing the total number of simulated micro
+opcodes by the total number of cycles. A delta between Dispatch Width and this
+field is an indicator of a performance issue. In the absence of loop-carried
+data dependencies, the observed 'uOps Per Cycle' should tend to a theoretical
+maximum throughput which can be computed by dividing the number of uOps of a
+single iteration by the *Block RThroughput*.
+
+Field *uOps Per Cycle* is bounded from above by the dispatch width. That is
+because the dispatch width limits the maximum size of a dispatch group. Both IPC
+and 'uOps Per Cycle' are limited by the amount of hardware parallelism. The
+availability of hardware resources affects the resource pressure distribution,
+and it limits the number of instructions that can be executed in parallel every
+cycle. A delta between Dispatch Width and the theoretical maximum uOps per
+Cycle (computed by dividing the number of uOps of a single iteration by the
+*Block RTrhoughput*) is an indicator of a performance bottleneck caused by the
+lack of hardware resources.
+In general, the lower the Block RThroughput, the better.
+
+In this example, ``uOps per iteration/Block RThroughput`` is 1.50. Since there
+are no loop-carried dependencies, the observed *uOps Per Cycle* is expected to
+approach 1.50 when the number of iterations tends to infinity. The delta between
+the Dispatch Width (2.00), and the theoretical maximum uOp throughput (1.50) is
+an indicator of a performance bottleneck caused by the lack of hardware
+resources, and the *Resource pressure view* can help to identify the problematic
+resource usage.
The second section of the report shows the latency and reciprocal
throughput of every instruction in the sequence. That section also reports
diff --git a/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s
index 48c475e3fba..3a0187fd132 100644
--- a/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s
+++ b/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s
@@ -6,7 +6,10 @@
# CHECK: Iterations: 600
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 603
-# CHECK-NEXT: Dispatch Width: 3
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
index e6d2f85026b..f24a20b5632 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
@@ -8,12 +8,17 @@
# ALL-NEXT: Instructions: 300
# M1-NEXT: Total Cycles: 76
-# M1-NEXT: Dispatch Width: 4
+# M3-NEXT: Total Cycles: 51
+
+# ALL-NEXT: Total uOps: 300
+
+# M1: Dispatch Width: 4
+# M1-NEXT: uOps Per Cycle: 3.95
# M1-NEXT: IPC: 3.95
# M1-NEXT: Block RThroughput: 0.3
-# M3-NEXT: Total Cycles: 51
-# M3-NEXT: Dispatch Width: 6
+# M3: Dispatch Width: 6
+# M3-NEXT: uOps Per Cycle: 5.88
# M3-NEXT: IPC: 5.88
# M3-NEXT: Block RThroughput: 0.2
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s
index ec29879b356..9eb3f961dd6 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s
@@ -14,7 +14,10 @@ ror x1, x2, x3
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 6
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 3.57
# CHECK-NEXT: IPC: 3.57
# CHECK-NEXT: Block RThroughput: 0.3
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
index 19665085d1b..e0024612dec 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
@@ -7,13 +7,16 @@
# ALL: Iterations: 1
# ALL-NEXT: Instructions: 1
# ALL-NEXT: Total Cycles: 2
+# ALL-NEXT: Total uOps: 1
-# M1-NEXT: Dispatch Width: 4
-# M3-NEXT: Dispatch Width: 6
-
-# ALL-NEXT: IPC: 0.50
-
+# M1: Dispatch Width: 4
+# M1-NEXT: uOps Per Cycle: 0.50
+# M1-NEXT: IPC: 0.50
# M1-NEXT: Block RThroughput: 0.3
+
+# M3: Dispatch Width: 6
+# M3-NEXT: uOps Per Cycle: 0.50
+# M3-NEXT: IPC: 0.50
# M3-NEXT: Block RThroughput: 0.2
# ALL: Schedulers - number of cycles where we saw N instructions issued:
diff --git a/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s b/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s
index 603cd52d043..1d7fad19092 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s
@@ -6,7 +6,10 @@
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 8
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s b/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s
index d33f95d5c7b..2bdfbca468c 100644
--- a/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s
+++ b/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s
@@ -6,7 +6,10 @@ vadd.f32 s0, s2, s2
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 105
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.95
# CHECK-NEXT: IPC: 0.95
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s
index 50ec8462bb9..59b20c755ae 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s
@@ -8,7 +8,10 @@ add %eax, %edx
# CHECK: Iterations: 1000
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 1506
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.99
# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s
index 60d22ea3a1b..4328056930d 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s
@@ -16,7 +16,10 @@ bsf %rax, %rcx
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 704
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.70
# CHECK-NEXT: IPC: 0.57
# CHECK-NEXT: Block RThroughput: 6.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
index 6d8d8a9d0a1..649c8f982d5 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
@@ -34,7 +34,10 @@ vandps %xmm4, %xmm1, %xmm0
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1800
# CHECK-NEXT: Total Cycles: 3811
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.89
# CHECK-NEXT: IPC: 0.47
# CHECK-NEXT: Block RThroughput: 38.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
index 001fb8ed661..7fd97d32d0f 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
@@ -12,7 +12,10 @@ cmovae %ebx, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 1504
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.99
# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
index 04007f24e42..aed7d75ffdc 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
@@ -15,7 +15,10 @@ vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 6000
# CHECK-NEXT: Total Cycles: 3003
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 2.00
# CHECK-NEXT: IPC: 2.00
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s
index 9ab8d039ccd..ef6faa58eba 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s
@@ -16,7 +16,10 @@ vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 6000
# CHECK-NEXT: Total Cycles: 3001
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 2.00
# CHECK-NEXT: IPC: 2.00
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s
index 34cabda553f..b2bd7169c51 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s
@@ -13,7 +13,10 @@ sbb %eax, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 3003
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
index da94624fd55..e121941298a 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
@@ -14,7 +14,10 @@ sbb %eax, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 3007
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 2.00
# CHECK-NEXT: IPC: 1.50
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s
index 8fa166374c7..ca69339467d 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s
@@ -8,7 +8,10 @@ vpaddd %xmm0, %xmm0, %xmm3
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 1504
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
index 9dee1e57c21..643e456450c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
@@ -8,7 +8,10 @@ vhaddps %xmm3, %xmm3, %xmm4
# CHECK: Iterations: 300
# CHECK-NEXT: Instructions: 900
# CHECK-NEXT: Total Cycles: 610
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 900
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.48
# CHECK-NEXT: IPC: 1.48
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
index 42d1f15df47..87862a6e5a3 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
@@ -7,7 +7,10 @@ vhaddps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 11
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.18
# CHECK-NEXT: IPC: 0.18
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
index a538164afee..80d5109d07e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
@@ -7,7 +7,10 @@ vhaddps (%rdi), %ymm1, %ymm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 12
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.17
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
index 769e19cb97a..fa19b55c1d1 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
@@ -14,7 +14,11 @@ vhaddps %xmm3, %xmm3, %xmm4
# ENABLED: Iterations: 100
# ENABLED-NEXT: Instructions: 300
# ENABLED-NEXT: Total Cycles: 209
-# ENABLED-NEXT: Dispatch Width: 2
+# ENABLED-NEXT: Total uOps: 300
+
+
+# ENABLED: Dispatch Width: 2
+# ENABLED-NEXT: uOps Per Cycle: 1.44
# ENABLED-NEXT: IPC: 1.44
# ENABLED-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
index 4733357e87e..2eee80e917e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
@@ -13,7 +13,10 @@ vmovaps %xmm0, 48(%rdi)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 2403
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
index 5d394c6691c..8cdba9acc6c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
@@ -13,7 +13,10 @@ vmovaps %xmm0, 48(%rdi)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 408
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.96
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
index 3c20002d9c8..c45e86ab1eb 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
@@ -30,7 +30,10 @@ vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 753
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.99
# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 7.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s
index fb6eb234801..657eb9737d9 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 11
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s
index 0a9ffca64a5..f5ad0be5610 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s
@@ -13,7 +13,10 @@ xor %bx, %dx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 4503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s
index cda173e5266..ad660a2189b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s
@@ -13,7 +13,10 @@ add %cx, %bx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.80
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s
index 7669621d7af..2d1397c101f 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s
@@ -8,7 +8,10 @@ lzcnt %ax, %bx ## partial register stall.
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 1503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s
index 939af6d06c8..9843a7cf398 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s
@@ -14,7 +14,10 @@ lzcnt 2(%rsp), %cx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7504
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.80
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s
index 4833077ba8b..6bc7e75f38c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 8
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
index 54b8d132bd3..6bf375be02b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
@@ -20,7 +20,10 @@ vsqrtps %ymm0, %ymm2
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 6306
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.19
# CHECK-NEXT: IPC: 0.13
# CHECK-NEXT: Block RThroughput: 63.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s
index 5ac79011f18..fa501b58ee4 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s
@@ -7,7 +7,10 @@ stmxcsr (%rsp)
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 4
# CHECK-NEXT: Total Cycles: 205
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.02
# CHECK-NEXT: IPC: 0.02
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s
index f83961e0799..380f8ccecb9 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s
@@ -13,7 +13,10 @@ add %ebx, %eax
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.59
# CHECK-NEXT: IPC: 1.59
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
index af39e59ae40..4a1f8706d96 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
@@ -21,7 +21,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 16
# CHECK-NEXT: Total Cycles: 31
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 16
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.52
# CHECK-NEXT: IPC: 0.52
# CHECK-NEXT: Block RThroughput: 21.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
index 7bd9b8951f9..3896967c585 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
@@ -11,7 +11,10 @@ vmulps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.20
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
index 7cbc24baf3f..99ef892d269 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
@@ -10,7 +10,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.40
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
index 722170ab7af..ef8d50aab1e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
@@ -8,7 +8,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 7
-# CHECK-NEXT: Dispatch Width: 3
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.43
# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
index dc3e9470d8f..0f95d5ceab9 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
@@ -7,7 +7,10 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
index 722d4763e06..b68ed9ce62b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
@@ -7,7 +7,10 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
index ff48db94855..12aeed7acb2 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
@@ -6,7 +6,10 @@ idiv %eax
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 55
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.07
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 25.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
index c9057f53066..d67d5e456c3 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
@@ -6,7 +6,10 @@ idiv %eax
# CHECK: Iterations: 22
# CHECK-NEXT: Instructions: 22
# CHECK-NEXT: Total Cycles: 553
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 44
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.08
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 25.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
index d0b1bf11116..3d09bc788bd 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
@@ -38,7 +38,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 33
# CHECK-NEXT: Total Cycles: 69
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 66
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.96
# CHECK-NEXT: IPC: 0.48
# CHECK-NEXT: Block RThroughput: 64.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
index 32b73f37f78..58c4b4476f5 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
@@ -7,7 +7,10 @@ add %rsi, %rsi
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.20
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
index d34db61bcc8..d1285441de5 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
@@ -6,7 +6,10 @@ add %edi, %eax
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 103
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.97
# CHECK-NEXT: IPC: 0.97
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
index 33b2a4d87e8..57f07e1e8a8 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
@@ -7,7 +7,10 @@ vandps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.22
# CHECK-NEXT: IPC: 0.22
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
index 12e170d4019..5650a8ba15c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
@@ -7,7 +7,10 @@ vandps (%rdi), %ymm1, %ymm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.40
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s
index 8a11db9e4fb..110c3c23532 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s
@@ -71,7 +71,10 @@ vpxor %xmm3, %xmm3, %xmm5
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 55
# CHECK-NEXT: Total Cycles: 29
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 55
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.90
# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 27.5
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s
index 1f543a566c8..75dfe1f1753 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s
index be18513fe95..ce578c3ae7b 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s
index 3fe5eb50282..d0cf359379d 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s
index a3783fa279f..d7d99861cfb 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s
index d8718c527f1..ba59a86a048 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 316
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.90
# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s
index dcb1bbd4bd3..019f84fe542 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.44
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s
index 3122d034652..abe923b5906 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s
@@ -11,7 +11,10 @@ xor %bx, %dx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 4503
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 0.8
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s
index 8a7a1fdfdd6..8202a6057c8 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s
@@ -11,7 +11,10 @@ add %cx, %bx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7503
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.60
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s
index 596b3309a1e..cae250315db 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s
@@ -6,7 +6,10 @@ lzcnt %ax, %bx ## partial register stall.
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 1504
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 0.3
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s
index 0f4494165ad..69637932c31 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s
@@ -13,7 +13,10 @@ lzcnt 2(%rsp), %cx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 10503
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 7500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.71
# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 1.3
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s
index 5be7b423241..a7e3860e145 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s
@@ -12,7 +12,10 @@ addq %rcx, %rdx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.44
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s
index 9a7b43fca57..f7a85399e5e 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 8
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.38
# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
index 8c2e6614129..da4699f4615 100644
--- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
+++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
@@ -12,30 +12,45 @@ bextrl %esi, (%rdi), %eax
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 10
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.20
-# BDWELL-NEXT: Block RThroughput: 1.0
+# BDWELL-NEXT: Total uOps: 4
# BTVER2-NEXT: Total Cycles: 7
-# BTVER2-NEXT: Dispatch Width: 2
-# BTVER2-NEXT: IPC: 0.29
-# BTVER2-NEXT: Block RThroughput: 1.0
+# BTVER2-NEXT: Total uOps: 2
# HASWELL-NEXT: Total Cycles: 10
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.20
-# HASWELL-NEXT: Block RThroughput: 1.0
+# HASWELL-NEXT: Total uOps: 4
# SKYLAKE-NEXT: Total Cycles: 10
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.20
-# SKYLAKE-NEXT: Block RThroughput: 0.7
+# SKYLAKE-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 8
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 3
+
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.29
+# BTVER2-NEXT: IPC: 0.29
+# BTVER2-NEXT: Block RThroughput: 1.0
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.38
# ZNVER1-NEXT: IPC: 0.25
# ZNVER1-NEXT: Block RThroughput: 0.8
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.40
+# BDWELL-NEXT: IPC: 0.20
+# BDWELL-NEXT: Block RThroughput: 1.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.40
+# HASWELL-NEXT: IPC: 0.20
+# HASWELL-NEXT: Block RThroughput: 1.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.40
+# SKYLAKE-NEXT: IPC: 0.20
+# SKYLAKE-NEXT: Block RThroughput: 0.7
+
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
diff --git a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
index 86ed81f5c02..a9c7c8b8564 100644
--- a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
+++ b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
@@ -11,25 +11,32 @@ bzhil %esi, (%rdi), %eax
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 9
-# BDWELL-NEXT: Dispatch Width: 4
+# HASWELL-NEXT: Total Cycles: 9
+# SKYLAKE-NEXT: Total Cycles: 9
+# ZNVER1-NEXT: Total Cycles: 8
+
+# ALL-NEXT: Total uOps: 3
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.33
# BDWELL-NEXT: IPC: 0.22
# BDWELL-NEXT: Block RThroughput: 0.8
-# HASWELL-NEXT: Total Cycles: 9
-# HASWELL-NEXT: Dispatch Width: 4
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.33
# HASWELL-NEXT: IPC: 0.22
# HASWELL-NEXT: Block RThroughput: 0.8
-# SKYLAKE-NEXT: Total Cycles: 9
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.22
-# SKYLAKE-NEXT: Block RThroughput: 0.5
-
-# ZNVER1-NEXT: Total Cycles: 8
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.38
# ZNVER1-NEXT: IPC: 0.25
# ZNVER1-NEXT: Block RThroughput: 0.8
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.33
+# SKYLAKE-NEXT: IPC: 0.22
+# SKYLAKE-NEXT: Block RThroughput: 0.5
+
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s
index e0822d8c2bc..6a4bcff687c 100644
--- a/llvm/test/tools/llvm-mca/X86/cpus.s
+++ b/llvm/test/tools/llvm-mca/X86/cpus.s
@@ -15,27 +15,54 @@ add %edi, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
+# ALL-NEXT: Total uOps: 100
-# BROADWELL-NEXT: Dispatch Width: 4
-# BTVER2-NEXT: Dispatch Width: 2
-# HASWELL-NEXT: Dispatch Width: 4
-# IVYBRIDGE-NEXT: Dispatch Width: 4
-# KNL-NEXT: Dispatch Width: 4
-# SANDYBRIDGE-NEXT: Dispatch Width: 4
-# SKX-NEXT: Dispatch Width: 6
-# SKX-AVX512-NEXT: Dispatch Width: 6
-# SLM-NEXT: Dispatch Width: 2
-# ZNVER1-NEXT: Dispatch Width: 4
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.97
+# BTVER2-NEXT: IPC: 0.97
+# BTVER2-NEXT: Block RThroughput: 0.5
-# ALL-NEXT: IPC: 0.97
+# SLM: Dispatch Width: 2
+# SLM-NEXT: uOps Per Cycle: 0.97
+# SLM-NEXT: IPC: 0.97
+# SLM-NEXT: Block RThroughput: 0.5
+# BROADWELL: Dispatch Width: 4
+# BROADWELL-NEXT: uOps Per Cycle: 0.97
+# BROADWELL-NEXT: IPC: 0.97
# BROADWELL-NEXT: Block RThroughput: 0.3
-# BTVER2-NEXT: Block RThroughput: 0.5
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.97
+# HASWELL-NEXT: IPC: 0.97
# HASWELL-NEXT: Block RThroughput: 0.3
+
+# IVYBRIDGE: Dispatch Width: 4
+# IVYBRIDGE-NEXT: uOps Per Cycle: 0.97
+# IVYBRIDGE-NEXT: IPC: 0.97
# IVYBRIDGE-NEXT: Block RThroughput: 0.3
+
+# KNL: Dispatch Width: 4
+# KNL-NEXT: uOps Per Cycle: 0.97
+# KNL-NEXT: IPC: 0.97
# KNL-NEXT: Block RThroughput: 0.3
+
+# SANDYBRIDGE: Dispatch Width: 4
+# SANDYBRIDGE-NEXT: uOps Per Cycle: 0.97
+# SANDYBRIDGE-NEXT: IPC: 0.97
# SANDYBRIDGE-NEXT: Block RThroughput: 0.3
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.97
+# ZNVER1-NEXT: IPC: 0.97
+# ZNVER1-NEXT: Block RThroughput: 0.3
+
+# SKX: Dispatch Width: 6
+# SKX-NEXT: uOps Per Cycle: 0.97
+# SKX-NEXT: IPC: 0.97
# SKX-NEXT: Block RThroughput: 0.3
+
+# SKX-AVX512: Dispatch Width: 6
+# SKX-AVX512-NEXT: uOps Per Cycle: 0.97
+# SKX-AVX512-NEXT: IPC: 0.97
# SKX-AVX512-NEXT: Block RThroughput: 0.3
-# SLM-NEXT: Block RThroughput: 0.5
-# ZNVER1-NEXT: Block RThroughput: 0.3
diff --git a/llvm/test/tools/llvm-mca/X86/default-iterations.s b/llvm/test/tools/llvm-mca/X86/default-iterations.s
index 7b89266eaf3..ee1b6169c65 100644
--- a/llvm/test/tools/llvm-mca/X86/default-iterations.s
+++ b/llvm/test/tools/llvm-mca/X86/default-iterations.s
@@ -8,16 +8,22 @@ add %eax, %eax
# CUSTOM: Iterations: 1
# CUSTOM-NEXT: Instructions: 1
# CUSTOM-NEXT: Total Cycles: 4
-# CUSTOM-NEXT: Dispatch Width: 2
-# CUSTOM-NEXT: IPC: 0.25
-# CUSTOM-NEXT: Block RThroughput: 0.5
+# CUSTOM-NEXT: Total uOps: 1
# DEFAULT: Iterations: 100
# DEFAULT-NEXT: Instructions: 100
# DEFAULT-NEXT: Total Cycles: 103
-# DEFAULT-NEXT: Dispatch Width: 2
+# DEFAULT-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+
+# CUSTOM-NEXT: uOps Per Cycle: 0.25
+# CUSTOM-NEXT: IPC: 0.25
+
+# DEFAULT-NEXT: uOps Per Cycle: 0.97
# DEFAULT-NEXT: IPC: 0.97
-# DEFAULT-NEXT: Block RThroughput: 0.5
+
+# ALL-NEXT: Block RThroughput: 0.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/dispatch_width.s b/llvm/test/tools/llvm-mca/X86/dispatch_width.s
index b6d5792c18c..baaad160f13 100644
--- a/llvm/test/tools/llvm-mca/X86/dispatch_width.s
+++ b/llvm/test/tools/llvm-mca/X86/dispatch_width.s
@@ -8,11 +8,14 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
+# ALL-NEXT: Total uOps: 100
-# CUSTOM-NEXT: Dispatch Width: 1
-# DEFAULT-NEXT: Dispatch Width: 2
-
-# ALL-NEXT: IPC: 0.97
-
+# CUSTOM: Dispatch Width: 1
+# CUSTOM-NEXT: uOps Per Cycle: 0.97
+# CUSTOM-NEXT: IPC: 0.97
# CUSTOM-NEXT: Block RThroughput: 1.0
+
+# DEFAULT: Dispatch Width: 2
+# DEFAULT-NEXT: uOps Per Cycle: 0.97
+# DEFAULT-NEXT: IPC: 0.97
# DEFAULT-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
index 3df2c388d91..679e8a3b598 100644
--- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
@@ -14,22 +14,36 @@ vfmadd213ps (%rdi), %xmm1, %xmm2
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 13
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Total uOps: 3
# HASWELL-NEXT: Total Cycles: 14
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Total uOps: 3
# SKYLAKE-NEXT: Total Cycles: 13
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Total uOps: 3
# ZNVER1-NEXT: Total Cycles: 15
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.13
# ZNVER1-NEXT: IPC: 0.13
+# ZNVER1-NEXT: Block RThroughput: 1.0
-# ALL-NEXT: Block RThroughput: 1.0
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.21
+# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Block RThroughput: 1.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.23
+# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Block RThroughput: 1.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.23
+# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Block RThroughput: 1.0
# ALL: Timeline view:
diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
index 3ff6906082f..698aba487df 100644
--- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
@@ -14,22 +14,36 @@ vfmadd213ps (%rdi), %xmm1, %xmm2
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 13
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Total uOps: 3
# HASWELL-NEXT: Total Cycles: 14
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Total uOps: 3
# SKYLAKE-NEXT: Total Cycles: 13
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Total uOps: 3
# ZNVER1-NEXT: Total Cycles: 15
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.13
# ZNVER1-NEXT: IPC: 0.13
+# ZNVER1-NEXT: Block RThroughput: 1.0
-# ALL-NEXT: Block RThroughput: 1.0
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.21
+# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Block RThroughput: 1.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.23
+# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Block RThroughput: 1.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.23
+# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Block RThroughput: 1.0
# ALL: Timeline view:
diff --git a/llvm/test/tools/llvm-mca/X86/intel-syntax.s b/llvm/test/tools/llvm-mca/X86/intel-syntax.s
index 7cf2c13f897..1aaa3902866 100644
--- a/llvm/test/tools/llvm-mca/X86/intel-syntax.s
+++ b/llvm/test/tools/llvm-mca/X86/intel-syntax.s
@@ -12,7 +12,10 @@
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
# ALL-NEXT: Total Cycles: 305
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 500
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 1.64
# ALL-NEXT: IPC: 1.31
# ALL-NEXT: Block RThroughput: 2.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s
index 108067e391f..21d1030bd62 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s
@@ -10,7 +10,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s
index ab70a6175e2..efbb157f9da 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s
@@ -10,7 +10,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s
index 09eadea4b3c..f7a58ff3853 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s
@@ -14,7 +14,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s
index 21ef65c48cd..f562dbe38a1 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s
@@ -18,7 +18,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
@@ -38,7 +41,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
@@ -58,7 +64,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
index 48a5e03d61b..5763caaf985 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
@@ -9,7 +9,10 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 0.97
# ALL-NEXT: IPC: 0.97
# ALL-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
index d5cc3c884fc..3e8c8bece2a 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
@@ -10,7 +10,10 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 0.97
# ALL-NEXT: IPC: 0.97
# ALL-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
index aa9561e0649..8950014f6b9 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
@@ -11,7 +11,10 @@ add %eax, %eax
# DEFAULTREPORT: Iterations: 100
# DEFAULTREPORT-NEXT: Instructions: 100
# DEFAULTREPORT-NEXT: Total Cycles: 103
-# DEFAULTREPORT-NEXT: Dispatch Width: 2
+# DEFAULTREPORT-NEXT: Total uOps: 100
+
+# DEFAULTREPORT: Dispatch Width: 2
+# DEFAULTREPORT-NEXT: uOps Per Cycle: 0.97
# DEFAULTREPORT-NEXT: IPC: 0.97
# DEFAULTREPORT-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
index 076c30a8e17..30c194777e1 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
@@ -10,7 +10,10 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 0.97
# ALL-NEXT: IPC: 0.97
# ALL-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s b/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s
index 0f94f552152..8b9f229ce83 100644
--- a/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s
@@ -8,7 +8,10 @@ add %edi, %eax
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 103
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.97
# CHECK-NEXT: IPC: 0.97
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
index 10fe5142e23..a4d9d1b42e7 100644
--- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
@@ -20,40 +20,61 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 10
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.20
-# BDWELL-NEXT: Block RThroughput: 2.0
+# BDWELL-NEXT: Total uOps: 4
# BTVER2-NEXT: Total Cycles: 11
-# BTVER2-NEXT: Dispatch Width: 2
-# BTVER2-NEXT: IPC: 0.18
-# BTVER2-NEXT: Block RThroughput: 2.0
+# BTVER2-NEXT: Total uOps: 4
# HASWELL-NEXT: Total Cycles: 11
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.18
-# HASWELL-NEXT: Block RThroughput: 2.0
+# HASWELL-NEXT: Total uOps: 4
# IVY-NEXT: Total Cycles: 11
-# IVY-NEXT: Dispatch Width: 4
-# IVY-NEXT: IPC: 0.18
-# IVY-NEXT: Block RThroughput: 1.0
+# IVY-NEXT: Total uOps: 4
# SANDY-NEXT: Total Cycles: 11
-# SANDY-NEXT: Dispatch Width: 4
-# SANDY-NEXT: IPC: 0.18
-# SANDY-NEXT: Block RThroughput: 1.0
+# SANDY-NEXT: Total uOps: 4
# SKYLAKE-NEXT: Total Cycles: 11
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.18
-# SKYLAKE-NEXT: Block RThroughput: 0.7
+# SKYLAKE-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 11
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.36
+# BTVER2-NEXT: IPC: 0.18
+# BTVER2-NEXT: Block RThroughput: 2.0
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.18
# ZNVER1-NEXT: IPC: 0.18
# ZNVER1-NEXT: Block RThroughput: 1.0
+# IVY: Dispatch Width: 4
+# IVY-NEXT: uOps Per Cycle: 0.36
+# IVY-NEXT: IPC: 0.18
+# IVY-NEXT: Block RThroughput: 1.0
+
+# SANDY: Dispatch Width: 4
+# SANDY-NEXT: uOps Per Cycle: 0.36
+# SANDY-NEXT: IPC: 0.18
+# SANDY-NEXT: Block RThroughput: 1.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.36
+# HASWELL-NEXT: IPC: 0.18
+# HASWELL-NEXT: Block RThroughput: 2.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.40
+# BDWELL-NEXT: IPC: 0.20
+# BDWELL-NEXT: Block RThroughput: 2.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.36
+# SKYLAKE-NEXT: IPC: 0.18
+# SKYLAKE-NEXT: Block RThroughput: 0.7
+
# BTVER2: Timeline view:
# BTVER2-NEXT: 0
# BTVER2-NEXT: Index 0123456789
diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
index 8b145e3a723..dd4dd773958 100644
--- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
@@ -20,40 +20,61 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 10
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.20
-# BDWELL-NEXT: Block RThroughput: 2.0
+# BDWELL-NEXT: Total uOps: 4
# BTVER2-NEXT: Total Cycles: 11
-# BTVER2-NEXT: Dispatch Width: 2
-# BTVER2-NEXT: IPC: 0.18
-# BTVER2-NEXT: Block RThroughput: 2.0
+# BTVER2-NEXT: Total uOps: 4
# HASWELL-NEXT: Total Cycles: 11
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.18
-# HASWELL-NEXT: Block RThroughput: 2.0
+# HASWELL-NEXT: Total uOps: 4
# IVY-NEXT: Total Cycles: 11
-# IVY-NEXT: Dispatch Width: 4
-# IVY-NEXT: IPC: 0.18
-# IVY-NEXT: Block RThroughput: 1.0
+# IVY-NEXT: Total uOps: 4
# SANDY-NEXT: Total Cycles: 11
-# SANDY-NEXT: Dispatch Width: 4
-# SANDY-NEXT: IPC: 0.18
-# SANDY-NEXT: Block RThroughput: 1.0
+# SANDY-NEXT: Total uOps: 4
# SKYLAKE-NEXT: Total Cycles: 11
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.18
-# SKYLAKE-NEXT: Block RThroughput: 0.7
+# SKYLAKE-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 11
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.36
+# BTVER2-NEXT: IPC: 0.18
+# BTVER2-NEXT: Block RThroughput: 2.0
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.18
# ZNVER1-NEXT: IPC: 0.18
# ZNVER1-NEXT: Block RThroughput: 1.0
+# IVY: Dispatch Width: 4
+# IVY-NEXT: uOps Per Cycle: 0.36
+# IVY-NEXT: IPC: 0.18
+# IVY-NEXT: Block RThroughput: 1.0
+
+# SANDY: Dispatch Width: 4
+# SANDY-NEXT: uOps Per Cycle: 0.36
+# SANDY-NEXT: IPC: 0.18
+# SANDY-NEXT: Block RThroughput: 1.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.36
+# HASWELL-NEXT: IPC: 0.18
+# HASWELL-NEXT: Block RThroughput: 2.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.40
+# BDWELL-NEXT: IPC: 0.20
+# BDWELL-NEXT: Block RThroughput: 2.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.36
+# SKYLAKE-NEXT: IPC: 0.18
+# SKYLAKE-NEXT: Block RThroughput: 0.7
+
# BTVER2: Timeline view:
# BTVER2-NEXT: 0
# BTVER2-NEXT: Index 0123456789
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp
index 4a147bb6bca..026742ad294 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.cpp
+++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -63,7 +63,9 @@ void SummaryView::printView(raw_ostream &OS) const {
unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
unsigned TotalInstructions = Instructions * Iterations;
+ unsigned TotalUOps = NumMicroOps * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
+ double UOpsPerCycle = (double)TotalUOps / TotalCycles;
double BlockRThroughput = computeBlockRThroughput(
SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
@@ -72,10 +74,12 @@ void SummaryView::printView(raw_ostream &OS) const {
TempStream << "Iterations: " << Iterations;
TempStream << "\nInstructions: " << TotalInstructions;
TempStream << "\nTotal Cycles: " << TotalCycles;
+ TempStream << "\nTotal uOps: " << TotalUOps << '\n';
TempStream << "\nDispatch Width: " << DispatchWidth;
- TempStream << "\nIPC: " << format("%.2f", IPC);
-
- // Round to the block reciprocal throughput to the nearest tenth.
+ TempStream << "\nuOps Per Cycle: "
+ << format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100);
+ TempStream << "\nIPC: "
+ << format("%.2f", floor((IPC * 100) + 0.5) / 100);
TempStream << "\nBlock RThroughput: "
<< format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
<< '\n';
OpenPOWER on IntegriCloud