summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/docs/CommandGuide/llvm-mca.rst53
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s5
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s11
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s5
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s13
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s5
-rw-r--r--llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s6
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/rank.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s41
-rw-r--r--llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s27
-rw-r--r--llvm/test/tools/llvm-mca/X86/cpus.s55
-rw-r--r--llvm/test/tools/llvm-mca/X86/default-iterations.s16
-rw-r--r--llvm/test/tools/llvm-mca/X86/dispatch_width.s13
-rw-r--r--llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s30
-rw-r--r--llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s30
-rw-r--r--llvm/test/tools/llvm-mca/X86/intel-syntax.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s15
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-stats-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-stats-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-views-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-all-views-2.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/option-no-stats-1.s5
-rw-r--r--llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s59
-rw-r--r--llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s59
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.cpp10
78 files changed, 554 insertions, 199 deletions
diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst
index 5dcd97fb113..43e64c329c9 100644
--- a/llvm/docs/CommandGuide/llvm-mca.rst
+++ b/llvm/docs/CommandGuide/llvm-mca.rst
@@ -238,7 +238,10 @@ the following command using the example located at
Iterations: 300
Instructions: 900
Total Cycles: 610
+ Total uOps: 900
+
Dispatch Width: 2
+ uOps Per Cycle: 1.48
IPC: 1.48
Block RThroughput: 2.0
@@ -285,35 +288,45 @@ the following command using the example located at
- - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4
According to this report, the dot-product kernel has been executed 300 times,
-for a total of 900 dynamically executed instructions.
+for a total of 900 simulated instructions. The total number of simulated micro
+opcodes (uOps) is also 900.
The report is structured in three main sections. The first section collects a
few performance numbers; the goal of this section is to give a very quick
-overview of the performance throughput. In this example, the two important
-performance indicators are **IPC** and **Block RThroughput** (Block Reciprocal
+overview of the performance throughput. Important performance indicators are
+**IPC**, **uOps Per Cycle**, and **Block RThroughput** (Block Reciprocal
Throughput).
IPC is computed dividing the total number of simulated instructions by the total
-number of cycles. A delta between Dispatch Width and IPC is an indicator of a
-performance issue. In the absence of loop-carried data dependencies, the
+number of cycles. In the absence of loop-carried data dependencies, the
observed IPC tends to a theoretical maximum which can be computed by dividing
the number of instructions of a single iteration by the *Block RThroughput*.
-IPC is bounded from above by the dispatch width. That is because the dispatch
-width limits the maximum size of a dispatch group. IPC is also limited by the
-amount of hardware parallelism. The availability of hardware resources affects
-the resource pressure distribution, and it limits the number of instructions
-that can be executed in parallel every cycle. A delta between Dispatch
-Width and the theoretical maximum IPC is an indicator of a performance
-bottleneck caused by the lack of hardware resources. In general, the lower the
-Block RThroughput, the better.
-
-In this example, ``Instructions per iteration/Block RThroughput`` is 1.50. Since
-there are no loop-carried dependencies, the observed IPC is expected to approach
-1.50 when the number of iterations tends to infinity. The delta between the
-Dispatch Width (2.00), and the theoretical maximum IPC (1.50) is an indicator of
-a performance bottleneck caused by the lack of hardware resources, and the
-*Resource pressure view* can help to identify the problematic resource usage.
+Field 'uOps Per Cycle' is computed dividing the total number of simulated micro
+opcodes by the total number of cycles. A delta between Dispatch Width and this
+field is an indicator of a performance issue. In the absence of loop-carried
+data dependencies, the observed 'uOps Per Cycle' should tend to a theoretical
+maximum throughput which can be computed by dividing the number of uOps of a
+single iteration by the *Block RThroughput*.
+
+Field *uOps Per Cycle* is bounded from above by the dispatch width. That is
+because the dispatch width limits the maximum size of a dispatch group. Both IPC
+and 'uOps Per Cycle' are limited by the amount of hardware parallelism. The
+availability of hardware resources affects the resource pressure distribution,
+and it limits the number of instructions that can be executed in parallel every
+cycle. A delta between Dispatch Width and the theoretical maximum uOps per
+Cycle (computed by dividing the number of uOps of a single iteration by the
+*Block RTrhoughput*) is an indicator of a performance bottleneck caused by the
+lack of hardware resources.
+In general, the lower the Block RThroughput, the better.
+
+In this example, ``uOps per iteration/Block RThroughput`` is 1.50. Since there
+are no loop-carried dependencies, the observed *uOps Per Cycle* is expected to
+approach 1.50 when the number of iterations tends to infinity. The delta between
+the Dispatch Width (2.00), and the theoretical maximum uOp throughput (1.50) is
+an indicator of a performance bottleneck caused by the lack of hardware
+resources, and the *Resource pressure view* can help to identify the problematic
+resource usage.
The second section of the report shows the latency and reciprocal
throughput of every instruction in the sequence. That section also reports
diff --git a/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s
index 48c475e3fba..3a0187fd132 100644
--- a/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s
+++ b/llvm/test/tools/llvm-mca/AArch64/CortexA57/direct-branch.s
@@ -6,7 +6,10 @@
# CHECK: Iterations: 600
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 603
-# CHECK-NEXT: Dispatch Width: 3
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
index e6d2f85026b..f24a20b5632 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
@@ -8,12 +8,17 @@
# ALL-NEXT: Instructions: 300
# M1-NEXT: Total Cycles: 76
-# M1-NEXT: Dispatch Width: 4
+# M3-NEXT: Total Cycles: 51
+
+# ALL-NEXT: Total uOps: 300
+
+# M1: Dispatch Width: 4
+# M1-NEXT: uOps Per Cycle: 3.95
# M1-NEXT: IPC: 3.95
# M1-NEXT: Block RThroughput: 0.3
-# M3-NEXT: Total Cycles: 51
-# M3-NEXT: Dispatch Width: 6
+# M3: Dispatch Width: 6
+# M3-NEXT: uOps Per Cycle: 5.88
# M3-NEXT: IPC: 5.88
# M3-NEXT: Block RThroughput: 0.2
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s
index ec29879b356..9eb3f961dd6 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/pr38575.s
@@ -14,7 +14,10 @@ ror x1, x2, x3
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 6
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 3.57
# CHECK-NEXT: IPC: 3.57
# CHECK-NEXT: Block RThroughput: 0.3
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
index 19665085d1b..e0024612dec 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
@@ -7,13 +7,16 @@
# ALL: Iterations: 1
# ALL-NEXT: Instructions: 1
# ALL-NEXT: Total Cycles: 2
+# ALL-NEXT: Total uOps: 1
-# M1-NEXT: Dispatch Width: 4
-# M3-NEXT: Dispatch Width: 6
-
-# ALL-NEXT: IPC: 0.50
-
+# M1: Dispatch Width: 4
+# M1-NEXT: uOps Per Cycle: 0.50
+# M1-NEXT: IPC: 0.50
# M1-NEXT: Block RThroughput: 0.3
+
+# M3: Dispatch Width: 6
+# M3-NEXT: uOps Per Cycle: 0.50
+# M3-NEXT: IPC: 0.50
# M3-NEXT: Block RThroughput: 0.2
# ALL: Schedulers - number of cycles where we saw N instructions issued:
diff --git a/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s b/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s
index 603cd52d043..1d7fad19092 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Falkor/zero-latency-store.s
@@ -6,7 +6,10 @@
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 8
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 8
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s b/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s
index d33f95d5c7b..2bdfbca468c 100644
--- a/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s
+++ b/llvm/test/tools/llvm-mca/ARM/simple-test-cortex-a9.s
@@ -6,7 +6,10 @@ vadd.f32 s0, s2, s2
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 105
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.95
# CHECK-NEXT: IPC: 0.95
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s
index 50ec8462bb9..59b20c755ae 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s
@@ -8,7 +8,10 @@ add %eax, %edx
# CHECK: Iterations: 1000
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 1506
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.99
# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s
index 60d22ea3a1b..4328056930d 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s
@@ -16,7 +16,10 @@ bsf %rax, %rcx
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 704
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.70
# CHECK-NEXT: IPC: 0.57
# CHECK-NEXT: Block RThroughput: 6.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
index 6d8d8a9d0a1..649c8f982d5 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
@@ -34,7 +34,10 @@ vandps %xmm4, %xmm1, %xmm0
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1800
# CHECK-NEXT: Total Cycles: 3811
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.89
# CHECK-NEXT: IPC: 0.47
# CHECK-NEXT: Block RThroughput: 38.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
index 001fb8ed661..7fd97d32d0f 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
@@ -12,7 +12,10 @@ cmovae %ebx, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 1504
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.99
# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
index 04007f24e42..aed7d75ffdc 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
@@ -15,7 +15,10 @@ vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 6000
# CHECK-NEXT: Total Cycles: 3003
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 2.00
# CHECK-NEXT: IPC: 2.00
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s
index 9ab8d039ccd..ef6faa58eba 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s
@@ -16,7 +16,10 @@ vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 6000
# CHECK-NEXT: Total Cycles: 3001
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 2.00
# CHECK-NEXT: IPC: 2.00
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s
index 34cabda553f..b2bd7169c51 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s
@@ -13,7 +13,10 @@ sbb %eax, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 3003
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
index da94624fd55..e121941298a 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
@@ -14,7 +14,10 @@ sbb %eax, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 3007
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 2.00
# CHECK-NEXT: IPC: 1.50
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s
index 8fa166374c7..ca69339467d 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s
@@ -8,7 +8,10 @@ vpaddd %xmm0, %xmm0, %xmm3
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 1504
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
index 9dee1e57c21..643e456450c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
@@ -8,7 +8,10 @@ vhaddps %xmm3, %xmm3, %xmm4
# CHECK: Iterations: 300
# CHECK-NEXT: Instructions: 900
# CHECK-NEXT: Total Cycles: 610
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 900
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.48
# CHECK-NEXT: IPC: 1.48
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
index 42d1f15df47..87862a6e5a3 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
@@ -7,7 +7,10 @@ vhaddps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 11
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.18
# CHECK-NEXT: IPC: 0.18
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
index a538164afee..80d5109d07e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
@@ -7,7 +7,10 @@ vhaddps (%rdi), %ymm1, %ymm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 12
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.17
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
index 769e19cb97a..fa19b55c1d1 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
@@ -14,7 +14,11 @@ vhaddps %xmm3, %xmm3, %xmm4
# ENABLED: Iterations: 100
# ENABLED-NEXT: Instructions: 300
# ENABLED-NEXT: Total Cycles: 209
-# ENABLED-NEXT: Dispatch Width: 2
+# ENABLED-NEXT: Total uOps: 300
+
+
+# ENABLED: Dispatch Width: 2
+# ENABLED-NEXT: uOps Per Cycle: 1.44
# ENABLED-NEXT: IPC: 1.44
# ENABLED-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
index 4733357e87e..2eee80e917e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
@@ -13,7 +13,10 @@ vmovaps %xmm0, 48(%rdi)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 2403
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
index 5d394c6691c..8cdba9acc6c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
@@ -13,7 +13,10 @@ vmovaps %xmm0, 48(%rdi)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 408
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.96
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
index 3c20002d9c8..c45e86ab1eb 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
@@ -30,7 +30,10 @@ vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 753
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.99
# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 7.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s
index fb6eb234801..657eb9737d9 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 11
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s
index 0a9ffca64a5..f5ad0be5610 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s
@@ -13,7 +13,10 @@ xor %bx, %dx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 4503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s
index cda173e5266..ad660a2189b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s
@@ -13,7 +13,10 @@ add %cx, %bx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.80
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s
index 7669621d7af..2d1397c101f 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-5.s
@@ -8,7 +8,10 @@ lzcnt %ax, %bx ## partial register stall.
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 1503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s
index 939af6d06c8..9843a7cf398 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s
@@ -14,7 +14,10 @@ lzcnt 2(%rsp), %cx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7504
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.80
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s
index 4833077ba8b..6bc7e75f38c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 8
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
index 54b8d132bd3..6bf375be02b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
@@ -20,7 +20,10 @@ vsqrtps %ymm0, %ymm2
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 6306
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.19
# CHECK-NEXT: IPC: 0.13
# CHECK-NEXT: Block RThroughput: 63.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s
index 5ac79011f18..fa501b58ee4 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s
@@ -7,7 +7,10 @@ stmxcsr (%rsp)
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 4
# CHECK-NEXT: Total Cycles: 205
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.02
# CHECK-NEXT: IPC: 0.02
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s
index f83961e0799..380f8ccecb9 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s
@@ -13,7 +13,10 @@ add %ebx, %eax
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 503
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.59
# CHECK-NEXT: IPC: 1.59
# CHECK-NEXT: Block RThroughput: 4.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
index af39e59ae40..4a1f8706d96 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
@@ -21,7 +21,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 16
# CHECK-NEXT: Total Cycles: 31
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 16
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.52
# CHECK-NEXT: IPC: 0.52
# CHECK-NEXT: Block RThroughput: 21.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
index 7bd9b8951f9..3896967c585 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
@@ -11,7 +11,10 @@ vmulps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.20
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
index 7cbc24baf3f..99ef892d269 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
@@ -10,7 +10,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.40
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
index 722170ab7af..ef8d50aab1e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
@@ -8,7 +8,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 7
-# CHECK-NEXT: Dispatch Width: 3
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.43
# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 1.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
index dc3e9470d8f..0f95d5ceab9 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
@@ -7,7 +7,10 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
index 722d4763e06..b68ed9ce62b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
@@ -7,7 +7,10 @@ vmulps %xmm0, %xmm0, %xmm0
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
index ff48db94855..12aeed7acb2 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
@@ -6,7 +6,10 @@ idiv %eax
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 55
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.07
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 25.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
index c9057f53066..d67d5e456c3 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
@@ -6,7 +6,10 @@ idiv %eax
# CHECK: Iterations: 22
# CHECK-NEXT: Instructions: 22
# CHECK-NEXT: Total Cycles: 553
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 44
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.08
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 25.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
index d0b1bf11116..3d09bc788bd 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
@@ -38,7 +38,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 33
# CHECK-NEXT: Total Cycles: 69
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 66
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.96
# CHECK-NEXT: IPC: 0.48
# CHECK-NEXT: Block RThroughput: 64.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
index 32b73f37f78..58c4b4476f5 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
@@ -7,7 +7,10 @@ add %rsi, %rsi
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.20
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
index d34db61bcc8..d1285441de5 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
@@ -6,7 +6,10 @@ add %edi, %eax
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 103
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.97
# CHECK-NEXT: IPC: 0.97
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
index 33b2a4d87e8..57f07e1e8a8 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
@@ -7,7 +7,10 @@ vandps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.22
# CHECK-NEXT: IPC: 0.22
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
index 12e170d4019..5650a8ba15c 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
@@ -7,7 +7,10 @@ vandps (%rdi), %ymm1, %ymm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.40
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 2.0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s
index 8a11db9e4fb..110c3c23532 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s
@@ -71,7 +71,10 @@ vpxor %xmm3, %xmm3, %xmm5
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 55
# CHECK-NEXT: Total Cycles: 29
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 55
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.90
# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 27.5
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s
index 1f543a566c8..75dfe1f1753 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s
index be18513fe95..ce578c3ae7b 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s
index 3fe5eb50282..d0cf359379d 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s
index a3783fa279f..d7d99861cfb 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 318
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.89
# CHECK-NEXT: IPC: 1.89
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s
index d8718c527f1..ba59a86a048 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s
@@ -11,7 +11,10 @@
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 316
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.90
# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 3.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s
index dcb1bbd4bd3..019f84fe542 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.44
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s
index 3122d034652..abe923b5906 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s
@@ -11,7 +11,10 @@ xor %bx, %dx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 4503
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 0.8
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s
index 8a7a1fdfdd6..8202a6057c8 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s
@@ -11,7 +11,10 @@ add %cx, %bx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7503
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.60
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s
index 596b3309a1e..cae250315db 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s
@@ -6,7 +6,10 @@ lzcnt %ax, %bx ## partial register stall.
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 1504
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 0.3
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s
index 0f4494165ad..69637932c31 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s
@@ -13,7 +13,10 @@ lzcnt 2(%rsp), %cx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 10503
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 7500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.71
# CHECK-NEXT: IPC: 0.43
# CHECK-NEXT: Block RThroughput: 1.3
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s
index 5be7b423241..a7e3860e145 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s
@@ -12,7 +12,10 @@ addq %rcx, %rdx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.44
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s
index 9a7b43fca57..f7a85399e5e 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s
@@ -8,7 +8,10 @@ add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 8
-# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.38
# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 1.0
diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
index 8c2e6614129..da4699f4615 100644
--- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
+++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
@@ -12,30 +12,45 @@ bextrl %esi, (%rdi), %eax
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 10
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.20
-# BDWELL-NEXT: Block RThroughput: 1.0
+# BDWELL-NEXT: Total uOps: 4
# BTVER2-NEXT: Total Cycles: 7
-# BTVER2-NEXT: Dispatch Width: 2
-# BTVER2-NEXT: IPC: 0.29
-# BTVER2-NEXT: Block RThroughput: 1.0
+# BTVER2-NEXT: Total uOps: 2
# HASWELL-NEXT: Total Cycles: 10
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.20
-# HASWELL-NEXT: Block RThroughput: 1.0
+# HASWELL-NEXT: Total uOps: 4
# SKYLAKE-NEXT: Total Cycles: 10
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.20
-# SKYLAKE-NEXT: Block RThroughput: 0.7
+# SKYLAKE-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 8
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 3
+
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.29
+# BTVER2-NEXT: IPC: 0.29
+# BTVER2-NEXT: Block RThroughput: 1.0
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.38
# ZNVER1-NEXT: IPC: 0.25
# ZNVER1-NEXT: Block RThroughput: 0.8
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.40
+# BDWELL-NEXT: IPC: 0.20
+# BDWELL-NEXT: Block RThroughput: 1.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.40
+# HASWELL-NEXT: IPC: 0.20
+# HASWELL-NEXT: Block RThroughput: 1.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.40
+# SKYLAKE-NEXT: IPC: 0.20
+# SKYLAKE-NEXT: Block RThroughput: 0.7
+
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
diff --git a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
index 86ed81f5c02..a9c7c8b8564 100644
--- a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
+++ b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
@@ -11,25 +11,32 @@ bzhil %esi, (%rdi), %eax
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 9
-# BDWELL-NEXT: Dispatch Width: 4
+# HASWELL-NEXT: Total Cycles: 9
+# SKYLAKE-NEXT: Total Cycles: 9
+# ZNVER1-NEXT: Total Cycles: 8
+
+# ALL-NEXT: Total uOps: 3
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.33
# BDWELL-NEXT: IPC: 0.22
# BDWELL-NEXT: Block RThroughput: 0.8
-# HASWELL-NEXT: Total Cycles: 9
-# HASWELL-NEXT: Dispatch Width: 4
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.33
# HASWELL-NEXT: IPC: 0.22
# HASWELL-NEXT: Block RThroughput: 0.8
-# SKYLAKE-NEXT: Total Cycles: 9
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.22
-# SKYLAKE-NEXT: Block RThroughput: 0.5
-
-# ZNVER1-NEXT: Total Cycles: 8
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.38
# ZNVER1-NEXT: IPC: 0.25
# ZNVER1-NEXT: Block RThroughput: 0.8
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.33
+# SKYLAKE-NEXT: IPC: 0.22
+# SKYLAKE-NEXT: Block RThroughput: 0.5
+
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s
index e0822d8c2bc..6a4bcff687c 100644
--- a/llvm/test/tools/llvm-mca/X86/cpus.s
+++ b/llvm/test/tools/llvm-mca/X86/cpus.s
@@ -15,27 +15,54 @@ add %edi, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
+# ALL-NEXT: Total uOps: 100
-# BROADWELL-NEXT: Dispatch Width: 4
-# BTVER2-NEXT: Dispatch Width: 2
-# HASWELL-NEXT: Dispatch Width: 4
-# IVYBRIDGE-NEXT: Dispatch Width: 4
-# KNL-NEXT: Dispatch Width: 4
-# SANDYBRIDGE-NEXT: Dispatch Width: 4
-# SKX-NEXT: Dispatch Width: 6
-# SKX-AVX512-NEXT: Dispatch Width: 6
-# SLM-NEXT: Dispatch Width: 2
-# ZNVER1-NEXT: Dispatch Width: 4
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.97
+# BTVER2-NEXT: IPC: 0.97
+# BTVER2-NEXT: Block RThroughput: 0.5
-# ALL-NEXT: IPC: 0.97
+# SLM: Dispatch Width: 2
+# SLM-NEXT: uOps Per Cycle: 0.97
+# SLM-NEXT: IPC: 0.97
+# SLM-NEXT: Block RThroughput: 0.5
+# BROADWELL: Dispatch Width: 4
+# BROADWELL-NEXT: uOps Per Cycle: 0.97
+# BROADWELL-NEXT: IPC: 0.97
# BROADWELL-NEXT: Block RThroughput: 0.3
-# BTVER2-NEXT: Block RThroughput: 0.5
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.97
+# HASWELL-NEXT: IPC: 0.97
# HASWELL-NEXT: Block RThroughput: 0.3
+
+# IVYBRIDGE: Dispatch Width: 4
+# IVYBRIDGE-NEXT: uOps Per Cycle: 0.97
+# IVYBRIDGE-NEXT: IPC: 0.97
# IVYBRIDGE-NEXT: Block RThroughput: 0.3
+
+# KNL: Dispatch Width: 4
+# KNL-NEXT: uOps Per Cycle: 0.97
+# KNL-NEXT: IPC: 0.97
# KNL-NEXT: Block RThroughput: 0.3
+
+# SANDYBRIDGE: Dispatch Width: 4
+# SANDYBRIDGE-NEXT: uOps Per Cycle: 0.97
+# SANDYBRIDGE-NEXT: IPC: 0.97
# SANDYBRIDGE-NEXT: Block RThroughput: 0.3
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.97
+# ZNVER1-NEXT: IPC: 0.97
+# ZNVER1-NEXT: Block RThroughput: 0.3
+
+# SKX: Dispatch Width: 6
+# SKX-NEXT: uOps Per Cycle: 0.97
+# SKX-NEXT: IPC: 0.97
# SKX-NEXT: Block RThroughput: 0.3
+
+# SKX-AVX512: Dispatch Width: 6
+# SKX-AVX512-NEXT: uOps Per Cycle: 0.97
+# SKX-AVX512-NEXT: IPC: 0.97
# SKX-AVX512-NEXT: Block RThroughput: 0.3
-# SLM-NEXT: Block RThroughput: 0.5
-# ZNVER1-NEXT: Block RThroughput: 0.3
diff --git a/llvm/test/tools/llvm-mca/X86/default-iterations.s b/llvm/test/tools/llvm-mca/X86/default-iterations.s
index 7b89266eaf3..ee1b6169c65 100644
--- a/llvm/test/tools/llvm-mca/X86/default-iterations.s
+++ b/llvm/test/tools/llvm-mca/X86/default-iterations.s
@@ -8,16 +8,22 @@ add %eax, %eax
# CUSTOM: Iterations: 1
# CUSTOM-NEXT: Instructions: 1
# CUSTOM-NEXT: Total Cycles: 4
-# CUSTOM-NEXT: Dispatch Width: 2
-# CUSTOM-NEXT: IPC: 0.25
-# CUSTOM-NEXT: Block RThroughput: 0.5
+# CUSTOM-NEXT: Total uOps: 1
# DEFAULT: Iterations: 100
# DEFAULT-NEXT: Instructions: 100
# DEFAULT-NEXT: Total Cycles: 103
-# DEFAULT-NEXT: Dispatch Width: 2
+# DEFAULT-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+
+# CUSTOM-NEXT: uOps Per Cycle: 0.25
+# CUSTOM-NEXT: IPC: 0.25
+
+# DEFAULT-NEXT: uOps Per Cycle: 0.97
# DEFAULT-NEXT: IPC: 0.97
-# DEFAULT-NEXT: Block RThroughput: 0.5
+
+# ALL-NEXT: Block RThroughput: 0.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/dispatch_width.s b/llvm/test/tools/llvm-mca/X86/dispatch_width.s
index b6d5792c18c..baaad160f13 100644
--- a/llvm/test/tools/llvm-mca/X86/dispatch_width.s
+++ b/llvm/test/tools/llvm-mca/X86/dispatch_width.s
@@ -8,11 +8,14 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
+# ALL-NEXT: Total uOps: 100
-# CUSTOM-NEXT: Dispatch Width: 1
-# DEFAULT-NEXT: Dispatch Width: 2
-
-# ALL-NEXT: IPC: 0.97
-
+# CUSTOM: Dispatch Width: 1
+# CUSTOM-NEXT: uOps Per Cycle: 0.97
+# CUSTOM-NEXT: IPC: 0.97
# CUSTOM-NEXT: Block RThroughput: 1.0
+
+# DEFAULT: Dispatch Width: 2
+# DEFAULT-NEXT: uOps Per Cycle: 0.97
+# DEFAULT-NEXT: IPC: 0.97
# DEFAULT-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
index 3df2c388d91..679e8a3b598 100644
--- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
@@ -14,22 +14,36 @@ vfmadd213ps (%rdi), %xmm1, %xmm2
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 13
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Total uOps: 3
# HASWELL-NEXT: Total Cycles: 14
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Total uOps: 3
# SKYLAKE-NEXT: Total Cycles: 13
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Total uOps: 3
# ZNVER1-NEXT: Total Cycles: 15
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.13
# ZNVER1-NEXT: IPC: 0.13
+# ZNVER1-NEXT: Block RThroughput: 1.0
-# ALL-NEXT: Block RThroughput: 1.0
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.21
+# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Block RThroughput: 1.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.23
+# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Block RThroughput: 1.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.23
+# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Block RThroughput: 1.0
# ALL: Timeline view:
diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
index 3ff6906082f..698aba487df 100644
--- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
@@ -14,22 +14,36 @@ vfmadd213ps (%rdi), %xmm1, %xmm2
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 13
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Total uOps: 3
# HASWELL-NEXT: Total Cycles: 14
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Total uOps: 3
# SKYLAKE-NEXT: Total Cycles: 13
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Total uOps: 3
# ZNVER1-NEXT: Total Cycles: 15
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.13
# ZNVER1-NEXT: IPC: 0.13
+# ZNVER1-NEXT: Block RThroughput: 1.0
-# ALL-NEXT: Block RThroughput: 1.0
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.21
+# HASWELL-NEXT: IPC: 0.14
+# HASWELL-NEXT: Block RThroughput: 1.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.23
+# BDWELL-NEXT: IPC: 0.15
+# BDWELL-NEXT: Block RThroughput: 1.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.23
+# SKYLAKE-NEXT: IPC: 0.15
+# SKYLAKE-NEXT: Block RThroughput: 1.0
# ALL: Timeline view:
diff --git a/llvm/test/tools/llvm-mca/X86/intel-syntax.s b/llvm/test/tools/llvm-mca/X86/intel-syntax.s
index 7cf2c13f897..1aaa3902866 100644
--- a/llvm/test/tools/llvm-mca/X86/intel-syntax.s
+++ b/llvm/test/tools/llvm-mca/X86/intel-syntax.s
@@ -12,7 +12,10 @@
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
# ALL-NEXT: Total Cycles: 305
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 500
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 1.64
# ALL-NEXT: IPC: 1.31
# ALL-NEXT: Block RThroughput: 2.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s
index 108067e391f..21d1030bd62 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-2.s
@@ -10,7 +10,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s
index ab70a6175e2..efbb157f9da 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-3.s
@@ -10,7 +10,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s
index 09eadea4b3c..f7a58ff3853 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-4.s
@@ -14,7 +14,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s
index 21ef65c48cd..f562dbe38a1 100644
--- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s
+++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-5.s
@@ -18,7 +18,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
@@ -38,7 +41,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
@@ -58,7 +64,10 @@
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 1
# CHECK-NEXT: Total Cycles: 4
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 1
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
index 48a5e03d61b..5763caaf985 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
@@ -9,7 +9,10 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 0.97
# ALL-NEXT: IPC: 0.97
# ALL-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
index d5cc3c884fc..3e8c8bece2a 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
@@ -10,7 +10,10 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 0.97
# ALL-NEXT: IPC: 0.97
# ALL-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
index aa9561e0649..8950014f6b9 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
@@ -11,7 +11,10 @@ add %eax, %eax
# DEFAULTREPORT: Iterations: 100
# DEFAULTREPORT-NEXT: Instructions: 100
# DEFAULTREPORT-NEXT: Total Cycles: 103
-# DEFAULTREPORT-NEXT: Dispatch Width: 2
+# DEFAULTREPORT-NEXT: Total uOps: 100
+
+# DEFAULTREPORT: Dispatch Width: 2
+# DEFAULTREPORT-NEXT: uOps Per Cycle: 0.97
# DEFAULTREPORT-NEXT: IPC: 0.97
# DEFAULTREPORT-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
index 076c30a8e17..30c194777e1 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
@@ -10,7 +10,10 @@ add %eax, %eax
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 100
# ALL-NEXT: Total Cycles: 103
-# ALL-NEXT: Dispatch Width: 2
+# ALL-NEXT: Total uOps: 100
+
+# ALL: Dispatch Width: 2
+# ALL-NEXT: uOps Per Cycle: 0.97
# ALL-NEXT: IPC: 0.97
# ALL-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s b/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s
index 0f94f552152..8b9f229ce83 100644
--- a/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-no-stats-1.s
@@ -8,7 +8,10 @@ add %edi, %eax
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 103
-# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.97
# CHECK-NEXT: IPC: 0.97
# CHECK-NEXT: Block RThroughput: 0.5
diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
index 10fe5142e23..a4d9d1b42e7 100644
--- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
@@ -20,40 +20,61 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 10
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.20
-# BDWELL-NEXT: Block RThroughput: 2.0
+# BDWELL-NEXT: Total uOps: 4
# BTVER2-NEXT: Total Cycles: 11
-# BTVER2-NEXT: Dispatch Width: 2
-# BTVER2-NEXT: IPC: 0.18
-# BTVER2-NEXT: Block RThroughput: 2.0
+# BTVER2-NEXT: Total uOps: 4
# HASWELL-NEXT: Total Cycles: 11
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.18
-# HASWELL-NEXT: Block RThroughput: 2.0
+# HASWELL-NEXT: Total uOps: 4
# IVY-NEXT: Total Cycles: 11
-# IVY-NEXT: Dispatch Width: 4
-# IVY-NEXT: IPC: 0.18
-# IVY-NEXT: Block RThroughput: 1.0
+# IVY-NEXT: Total uOps: 4
# SANDY-NEXT: Total Cycles: 11
-# SANDY-NEXT: Dispatch Width: 4
-# SANDY-NEXT: IPC: 0.18
-# SANDY-NEXT: Block RThroughput: 1.0
+# SANDY-NEXT: Total uOps: 4
# SKYLAKE-NEXT: Total Cycles: 11
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.18
-# SKYLAKE-NEXT: Block RThroughput: 0.7
+# SKYLAKE-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 11
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.36
+# BTVER2-NEXT: IPC: 0.18
+# BTVER2-NEXT: Block RThroughput: 2.0
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.18
# ZNVER1-NEXT: IPC: 0.18
# ZNVER1-NEXT: Block RThroughput: 1.0
+# IVY: Dispatch Width: 4
+# IVY-NEXT: uOps Per Cycle: 0.36
+# IVY-NEXT: IPC: 0.18
+# IVY-NEXT: Block RThroughput: 1.0
+
+# SANDY: Dispatch Width: 4
+# SANDY-NEXT: uOps Per Cycle: 0.36
+# SANDY-NEXT: IPC: 0.18
+# SANDY-NEXT: Block RThroughput: 1.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.36
+# HASWELL-NEXT: IPC: 0.18
+# HASWELL-NEXT: Block RThroughput: 2.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.40
+# BDWELL-NEXT: IPC: 0.20
+# BDWELL-NEXT: Block RThroughput: 2.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.36
+# SKYLAKE-NEXT: IPC: 0.18
+# SKYLAKE-NEXT: Block RThroughput: 0.7
+
# BTVER2: Timeline view:
# BTVER2-NEXT: 0
# BTVER2-NEXT: Index 0123456789
diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
index 8b145e3a723..dd4dd773958 100644
--- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
@@ -20,40 +20,61 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# ALL-NEXT: Instructions: 2
# BDWELL-NEXT: Total Cycles: 10
-# BDWELL-NEXT: Dispatch Width: 4
-# BDWELL-NEXT: IPC: 0.20
-# BDWELL-NEXT: Block RThroughput: 2.0
+# BDWELL-NEXT: Total uOps: 4
# BTVER2-NEXT: Total Cycles: 11
-# BTVER2-NEXT: Dispatch Width: 2
-# BTVER2-NEXT: IPC: 0.18
-# BTVER2-NEXT: Block RThroughput: 2.0
+# BTVER2-NEXT: Total uOps: 4
# HASWELL-NEXT: Total Cycles: 11
-# HASWELL-NEXT: Dispatch Width: 4
-# HASWELL-NEXT: IPC: 0.18
-# HASWELL-NEXT: Block RThroughput: 2.0
+# HASWELL-NEXT: Total uOps: 4
# IVY-NEXT: Total Cycles: 11
-# IVY-NEXT: Dispatch Width: 4
-# IVY-NEXT: IPC: 0.18
-# IVY-NEXT: Block RThroughput: 1.0
+# IVY-NEXT: Total uOps: 4
# SANDY-NEXT: Total Cycles: 11
-# SANDY-NEXT: Dispatch Width: 4
-# SANDY-NEXT: IPC: 0.18
-# SANDY-NEXT: Block RThroughput: 1.0
+# SANDY-NEXT: Total uOps: 4
# SKYLAKE-NEXT: Total Cycles: 11
-# SKYLAKE-NEXT: Dispatch Width: 6
-# SKYLAKE-NEXT: IPC: 0.18
-# SKYLAKE-NEXT: Block RThroughput: 0.7
+# SKYLAKE-NEXT: Total uOps: 4
# ZNVER1-NEXT: Total Cycles: 11
-# ZNVER1-NEXT: Dispatch Width: 4
+# ZNVER1-NEXT: Total uOps: 2
+
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.36
+# BTVER2-NEXT: IPC: 0.18
+# BTVER2-NEXT: Block RThroughput: 2.0
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.18
# ZNVER1-NEXT: IPC: 0.18
# ZNVER1-NEXT: Block RThroughput: 1.0
+# IVY: Dispatch Width: 4
+# IVY-NEXT: uOps Per Cycle: 0.36
+# IVY-NEXT: IPC: 0.18
+# IVY-NEXT: Block RThroughput: 1.0
+
+# SANDY: Dispatch Width: 4
+# SANDY-NEXT: uOps Per Cycle: 0.36
+# SANDY-NEXT: IPC: 0.18
+# SANDY-NEXT: Block RThroughput: 1.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.36
+# HASWELL-NEXT: IPC: 0.18
+# HASWELL-NEXT: Block RThroughput: 2.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.40
+# BDWELL-NEXT: IPC: 0.20
+# BDWELL-NEXT: Block RThroughput: 2.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.36
+# SKYLAKE-NEXT: IPC: 0.18
+# SKYLAKE-NEXT: Block RThroughput: 0.7
+
# BTVER2: Timeline view:
# BTVER2-NEXT: 0
# BTVER2-NEXT: Index 0123456789
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp
index 4a147bb6bca..026742ad294 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.cpp
+++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -63,7 +63,9 @@ void SummaryView::printView(raw_ostream &OS) const {
unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
unsigned TotalInstructions = Instructions * Iterations;
+ unsigned TotalUOps = NumMicroOps * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
+ double UOpsPerCycle = (double)TotalUOps / TotalCycles;
double BlockRThroughput = computeBlockRThroughput(
SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
@@ -72,10 +74,12 @@ void SummaryView::printView(raw_ostream &OS) const {
TempStream << "Iterations: " << Iterations;
TempStream << "\nInstructions: " << TotalInstructions;
TempStream << "\nTotal Cycles: " << TotalCycles;
+ TempStream << "\nTotal uOps: " << TotalUOps << '\n';
TempStream << "\nDispatch Width: " << DispatchWidth;
- TempStream << "\nIPC: " << format("%.2f", IPC);
-
- // Round to the block reciprocal throughput to the nearest tenth.
+ TempStream << "\nuOps Per Cycle: "
+ << format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100);
+ TempStream << "\nIPC: "
+ << format("%.2f", floor((IPC * 100) + 0.5) / 100);
TempStream << "\nBlock RThroughput: "
<< format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
<< '\n';
OpenPOWER on IntegriCloud