summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s11
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s11
-rw-r--r--llvm/tools/llvm-mca/SummaryView.cpp76
-rw-r--r--llvm/tools/llvm-mca/SummaryView.h35
-rw-r--r--llvm/tools/llvm-mca/llvm-mca.cpp2
23 files changed, 219 insertions, 114 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
index a15dc1027c7..7955ff743cc 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s
@@ -5,11 +5,12 @@ vmulps %xmm0, %xmm1, %xmm2
vhaddps %xmm2, %xmm2, %xmm3
vhaddps %xmm3, %xmm3, %xmm4
-# CHECK: Iterations: 300
-# CHECK-NEXT: Instructions: 900
-# CHECK-NEXT: Total Cycles: 610
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 1.48
+# CHECK: Iterations: 300
+# CHECK-NEXT: Instructions: 900
+# CHECK-NEXT: Total Cycles: 610
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 1.48
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
index a5b15d1bd9b..4c343d62a48 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
@@ -4,11 +4,12 @@
vshufps $0, %xmm0, %xmm1, %xmm1
vhaddps (%rdi), %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 11
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.18
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 11
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
index 2f9a7972587..9ac266b9c29 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
@@ -4,11 +4,12 @@
vshufps $0, %xmm0, %xmm1, %xmm1
vhaddps (%rdi), %ymm1, %ymm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 12
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.17
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 12
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
index 77fc5faec4e..e9fd32f00c4 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
@@ -10,11 +10,12 @@ vhaddps %xmm3, %xmm3, %xmm4
# DISABLED-NOT: Instruction Info:
-# ENABLED: Iterations: 100
-# ENABLED-NEXT: Instructions: 300
-# ENABLED-NEXT: Total Cycles: 209
-# ENABLED-NEXT: Dispatch Width: 2
-# ENABLED-NEXT: IPC: 1.44
+# ENABLED: Iterations: 100
+# ENABLED-NEXT: Instructions: 300
+# ENABLED-NEXT: Total Cycles: 209
+# ENABLED-NEXT: Dispatch Width: 2
+# ENABLED-NEXT: IPC: 1.44
+# ENABLED-NEXT: Block RThroughput: 2.0
# ENABLED: Instruction Info:
# ENABLED-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
index 95d69064d71..9f42c47384d 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
@@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi)
vmovaps 48(%rsi), %xmm0
vmovaps %xmm0, 48(%rdi)
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 2403
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.33
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
index 708add22a69..f86325dbb8e 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
@@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi)
vmovaps 48(%rsi), %xmm0
vmovaps %xmm0, 48(%rdi)
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 408
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 1.96
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 408
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 1.96
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
index 6042ce35a18..8165b994acd 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
@@ -17,11 +17,12 @@ vsqrtps %xmm0, %xmm2
vaddps %ymm0, %ymm1, %ymm2
vsqrtps %ymm0, %ymm2
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 6306
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.13
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 6306
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 63.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
index c1f9ea246d3..f186f138fa8 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
@@ -18,11 +18,12 @@
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 16
-# CHECK-NEXT: Total Cycles: 31
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.52
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 16
+# CHECK-NEXT: Total Cycles: 31
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.52
+# CHECK-NEXT: Block RThroughput: 21.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
index 7e48e7a81dd..ee69d2c2140 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
@@ -8,11 +8,12 @@
vaddps %xmm0, %xmm0, %xmm1
vmulps (%rdi), %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
index af64ca71db2..a27e74a2e7b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
@@ -7,11 +7,12 @@
# The second integer multiply can start at cycle 2 because the implicit reads
# can start after the load operand is evaluated.
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
index b3a8f6f98f2..773f91b6fc0 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
@@ -5,11 +5,12 @@
add (%rsp), %rsi
add %rdx, %r8
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 3
-# CHECK-NEXT: Total Cycles: 7
-# CHECK-NEXT: Dispatch Width: 3
-# CHECK-NEXT: IPC: 0.43
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 7
+# CHECK-NEXT: Dispatch Width: 3
+# CHECK-NEXT: IPC: 0.43
+# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
index fed5fd0e477..6324f4bcfec 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
@@ -4,11 +4,12 @@
vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0
-# CHECK: Iterations: 5
-# CHECK-NEXT: Instructions: 10
-# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.36
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 28
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
index 50c21dc32b9..40d37fb30cc 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
@@ -4,11 +4,12 @@
vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0
-# CHECK: Iterations: 5
-# CHECK-NEXT: Instructions: 10
-# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.36
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 28
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 13
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
index 7ba27ecb5c5..59829415005 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
@@ -3,11 +3,12 @@
idiv %eax
-# CHECK: Iterations: 2
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 55
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.04
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 55
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 25.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
index 122e24392c8..369a82ef6ef 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
@@ -3,11 +3,12 @@
idiv %eax
-# CHECK: Iterations: 22
-# CHECK-NEXT: Instructions: 22
-# CHECK-NEXT: Total Cycles: 553
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.04
+# CHECK: Iterations: 22
+# CHECK-NEXT: Instructions: 22
+# CHECK-NEXT: Total Cycles: 553
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 25.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
index fc14e46677d..6489f237864 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
@@ -35,11 +35,12 @@
vaddps %ymm3, %ymm0, %ymm5
vaddps %ymm3, %ymm0, %ymm6
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 33
-# CHECK-NEXT: Total Cycles: 70
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.47
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 33
+# CHECK-NEXT: Total Cycles: 70
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.47
+# CHECK-NEXT: Block RThroughput: 64.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
index b501934aa34..caaf1ccb145 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
@@ -4,11 +4,12 @@
vmulps (%rsi), %xmm0, %xmm0
add %rsi, %rsi
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
index 4b3f549024e..05328080deb 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/simple-test.s
@@ -3,11 +3,12 @@
add %edi, %eax
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 100
-# CHECK-NEXT: Total Cycles: 103
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.97
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 100
+# CHECK-NEXT: Total Cycles: 103
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.97
+# CHECK-NEXT: Block RThroughput: 0.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
index 7cbc0a8c7ff..acdcdebe261 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
@@ -4,11 +4,12 @@
vaddps %xmm0, %xmm0, %xmm1
vandps (%rdi), %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.22
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
index 2b091d8788a..60bf59fed2b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
@@ -4,11 +4,12 @@
vaddps %ymm0, %ymm0, %ymm1
vandps (%rdi), %ymm1, %ymm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/tools/llvm-mca/SummaryView.cpp b/llvm/tools/llvm-mca/SummaryView.cpp
index 511727bc750..9b6e1d9b183 100644
--- a/llvm/tools/llvm-mca/SummaryView.cpp
+++ b/llvm/tools/llvm-mca/SummaryView.cpp
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
#include "SummaryView.h"
+#include "Support.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Format.h"
namespace mca {
@@ -22,19 +24,83 @@ namespace mca {
using namespace llvm;
+void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
+ // We are only interested in the "instruction dispatched" events generated by
+ // the dispatch stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ if (Event.IR.getSourceIndex() >= Source.size())
+ return;
+
+ // Update the cumulative number of resource cycles based on the processor
+ // resource usage information available from the instruction descriptor. We need to
+ // compute the cumulative number of resource cycles for every processor
+ // resource which is consumed by an instruction of the block.
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const InstrDesc &Desc = Inst.getDesc();
+ NumMicroOps += Desc.NumMicroOps;
+ for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
+ if (!RU.second.size())
+ continue;
+
+ assert(RU.second.NumUnits && "Expected more than one unit used!");
+ if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
+ ProcResourceUsage[RU.first] = RU.second.size();
+ continue;
+ }
+
+ ProcResourceUsage[RU.first] += RU.second.size();
+ }
+}
+
+double SummaryView::getBlockRThroughput() const {
+ assert(NumMicroOps && "Expected at least one micro opcode!");
+
+ SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
+ computeProcResourceMasks(SM, Masks);
+
+ // The block throughput is bounded from above by the hardware dispatch
+ // throughput. That is because the DispatchWidth is an upper bound on the
+ // number of opcodes that can be part of a single dispatch group.
+ double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+ // The block throughput is also limited by the amount of hardware parallelism.
+ // The number of available resource units affects the resource pressure
+ // distributed, as well as how many blocks can be executed every cycle.
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = Masks[I];
+ const auto It = ProcResourceUsage.find_as(Mask);
+ if (It != ProcResourceUsage.end()) {
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+ unsigned NumUnits = MCDesc.NumUnits;
+ double Throughput = static_cast<double>(It->second) / NumUnits;
+ Max = std::max(Max, Throughput);
+ }
+ }
+
+ // The block reciprocal throughput is computed as the MAX of:
+ // - (#uOps / DispatchWidth)
+ // - (#units / resource cycles) for every consumed processor resource.
+ return Max;
+}
+
void SummaryView::printView(raw_ostream &OS) const {
unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
unsigned TotalInstructions = Instructions * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
+ double BlockRThroughput = getBlockRThroughput();
std::string Buffer;
raw_string_ostream TempStream(Buffer);
- TempStream << "Iterations: " << Iterations;
- TempStream << "\nInstructions: " << TotalInstructions;
- TempStream << "\nTotal Cycles: " << TotalCycles;
- TempStream << "\nDispatch Width: " << DispatchWidth;
- TempStream << "\nIPC: " << format("%.2f", IPC) << '\n';
+ TempStream << "Iterations: " << Iterations;
+ TempStream << "\nInstructions: " << TotalInstructions;
+ TempStream << "\nTotal Cycles: " << TotalCycles;
+ TempStream << "\nDispatch Width: " << DispatchWidth;
+ TempStream << "\nIPC: " << format("%.2f", IPC);
+ TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput)
+ << '\n';
TempStream.flush();
OS << Buffer;
}
diff --git a/llvm/tools/llvm-mca/SummaryView.h b/llvm/tools/llvm-mca/SummaryView.h
index 0484057fb10..fe8a5e20f9d 100644
--- a/llvm/tools/llvm-mca/SummaryView.h
+++ b/llvm/tools/llvm-mca/SummaryView.h
@@ -14,12 +14,12 @@
/// performance throughput. Below is an example of summary view:
///
///
-/// Iterations: 300
-/// Instructions: 900
-/// Total Cycles: 610
-/// Dispatch Width: 2
-/// IPC: 1.48
-///
+/// Iterations: 300
+/// Instructions: 900
+/// Total Cycles: 610
+/// Dispatch Width: 2
+/// IPC: 1.48
+/// Block RThroughput: 2.0
///
/// The summary view collects a few performance numbers. The two main
/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle).
@@ -31,22 +31,41 @@
#include "SourceMgr.h"
#include "View.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/raw_ostream.h"
namespace mca {
/// A view that collects and prints a few performance numbers.
class SummaryView : public View {
+ const llvm::MCSchedModel &SM;
const SourceMgr &Source;
const unsigned DispatchWidth;
unsigned TotalCycles;
+ // The total number of micro opcodes contributed by a block of instructions.
+ unsigned NumMicroOps;
+ // For each processor resource, this map stores the cumulative number of
+ // resource cycles consumed by a block of instructions. The resource mask ID
+ // is used as the key value to access elements of this map.
+ llvm::DenseMap<uint64_t, unsigned> ProcResourceUsage;
+
+ // Compute the reciprocal throughput for the analyzed code block.
+ // The reciprocal block throughput is computed as the MAX between:
+ // - NumMicroOps / DispatchWidth
+ // - Total Resource Cycles / #Units (for every resource consumed).
+ double getBlockRThroughput() const;
public:
- SummaryView(const SourceMgr &S, unsigned Width)
- : Source(S), DispatchWidth(Width), TotalCycles(0) {}
+ SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+ unsigned Width)
+ : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
+ NumMicroOps(0) {}
void onCycleEnd() override { ++TotalCycles; }
+ void onInstructionEvent(const HWInstructionEvent &Event) override;
+
void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 1e93a7fdcdb..925584b0193 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -495,7 +495,7 @@ int main(int argc, char **argv) {
LoadQueueSize, StoreQueueSize, AssumeNoAlias);
mca::BackendPrinter Printer(B);
- Printer.addView(llvm::make_unique<mca::SummaryView>(S, Width));
+ Printer.addView(llvm::make_unique<mca::SummaryView>(SM, S, Width));
if (PrintInstructionInfoView)
Printer.addView(
llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
OpenPOWER on IntegriCloud