summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2018-11-08 14:48:56 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2018-11-08 14:48:56 +0000
commit2ad16b9371ede0fae0e7fcebad5e29bd5da14fb4 (patch)
tree87bb9aee6068ffdbd1baf0148b63bc62798b9a90
parent4b2957243bca25b584373fdd8594a308df102368 (diff)
downloadbcm5719-llvm-2ad16b9371ede0fae0e7fcebad5e29bd5da14fb4.tar.gz
bcm5719-llvm-2ad16b9371ede0fae0e7fcebad5e29bd5da14fb4.zip
[NFC][BdVer2] Tests for load and store throughput (PR39465)
During review it was noted that while it appears that the Piledriver can do two [consecutive] loads per cycle, it can only do one store per cycle. It was suggested that the sched model incorrectly models that, but it was opted to fix this afterwards. These tests show that the two consecutive loads are modelled correctly, and one consecutive stores is not modelled incorrectly. Unless i'm missing the point. https://bugs.llvm.org/show_bug.cgi?id=39465 llvm-svn: 346404
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s604
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s605
2 files changed, 1209 insertions, 0 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s
new file mode 100644
index 00000000000..d8083d49874
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s
@@ -0,0 +1,604 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+movb (%rax), %spl
+movb (%rcx), %bpl
+movb (%rdx), %sil
+movb (%rbx), %dil
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movw (%rax), %sp
+movw (%rcx), %bp
+movw (%rdx), %si
+movw (%rbx), %di
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movl (%rax), %esp
+movl (%rcx), %ebp
+movl (%rdx), %esi
+movl (%rbx), %edi
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movq (%rax), %rsp
+movq (%rcx), %rbp
+movq (%rdx), %rsi
+movq (%rbx), %rdi
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movd (%rax), %mm0
+movd (%rcx), %mm1
+movd (%rdx), %mm2
+movd (%rbx), %mm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movaps (%rax), %xmm0
+movaps (%rcx), %xmm1
+movaps (%rdx), %xmm2
+movaps (%rbx), %xmm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+vmovaps (%rax), %ymm0
+vmovaps (%rcx), %ymm1
+vmovaps (%rdx), %ymm2
+vmovaps (%rbx), %ymm3
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movb (%rax), %spl
+# CHECK-NEXT: 1 5 0.50 * movb (%rcx), %bpl
+# CHECK-NEXT: 1 5 0.50 * movb (%rdx), %sil
+# CHECK-NEXT: 1 5 0.50 * movb (%rbx), %dil
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb (%rax), %spl
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movb (%rcx), %bpl
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movb (%rdx), %sil
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb (%rbx), %dil
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movb (%rax), %spl
+# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl
+# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil
+# CHECK-NEXT: [0,3] D=eeeeeER movb (%rbx), %dil
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb (%rax), %spl
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movb (%rbx), %dil
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movw (%rax), %sp
+# CHECK-NEXT: 1 5 0.50 * movw (%rcx), %bp
+# CHECK-NEXT: 1 5 0.50 * movw (%rdx), %si
+# CHECK-NEXT: 1 5 0.50 * movw (%rbx), %di
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw (%rax), %sp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movw (%rcx), %bp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movw (%rdx), %si
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw (%rbx), %di
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movw (%rax), %sp
+# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp
+# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si
+# CHECK-NEXT: [0,3] D=eeeeeER movw (%rbx), %di
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw (%rax), %sp
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movw (%rbx), %di
+
+# CHECK: [2] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movl (%rax), %esp
+# CHECK-NEXT: 1 5 0.50 * movl (%rcx), %ebp
+# CHECK-NEXT: 1 5 0.50 * movl (%rdx), %esi
+# CHECK-NEXT: 1 5 0.50 * movl (%rbx), %edi
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl (%rax), %esp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movl (%rcx), %ebp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movl (%rdx), %esi
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl (%rbx), %edi
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movl (%rax), %esp
+# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp
+# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi
+# CHECK-NEXT: [0,3] D=eeeeeER movl (%rbx), %edi
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl (%rax), %esp
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movl (%rbx), %edi
+
+# CHECK: [3] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movq (%rax), %rsp
+# CHECK-NEXT: 1 5 0.50 * movq (%rcx), %rbp
+# CHECK-NEXT: 1 5 0.50 * movq (%rdx), %rsi
+# CHECK-NEXT: 1 5 0.50 * movq (%rbx), %rdi
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq (%rax), %rsp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movq (%rcx), %rbp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movq (%rdx), %rsi
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq (%rbx), %rdi
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movq (%rax), %rsp
+# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp
+# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi
+# CHECK-NEXT: [0,3] D=eeeeeER movq (%rbx), %rdi
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq (%rax), %rsp
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movq (%rbx), %rdi
+
+# CHECK: [4] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm0
+# CHECK-NEXT: 1 5 0.50 * movd (%rcx), %mm1
+# CHECK-NEXT: 1 5 0.50 * movd (%rdx), %mm2
+# CHECK-NEXT: 1 5 0.50 * movd (%rbx), %mm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - 2.00 2.00 - - 2.00 2.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - 1.00 - - - 1.00 - - - - movd (%rax), %mm0
+# CHECK-NEXT: 1.00 - - - - - - - - - 1.00 - - - 1.00 - - - - - movd (%rcx), %mm1
+# CHECK-NEXT: 1.00 - - - - - - - - - 1.00 - - - - 1.00 - - - - movd (%rdx), %mm2
+# CHECK-NEXT: - 1.00 - - - - - - - - - 1.00 - - 1.00 - - - - - movd (%rbx), %mm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movd (%rax), %mm0
+# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1
+# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2
+# CHECK-NEXT: [0,3] D=eeeeeER movd (%rbx), %mm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd (%rax), %mm0
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movd (%rbx), %mm3
+
+# CHECK: [5] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movaps (%rax), %xmm0
+# CHECK-NEXT: 1 5 0.50 * movaps (%rcx), %xmm1
+# CHECK-NEXT: 1 5 0.50 * movaps (%rdx), %xmm2
+# CHECK-NEXT: 1 5 0.50 * movaps (%rbx), %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - movaps (%rax), %xmm0
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - - movaps (%rcx), %xmm1
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - - 1.00 - - - - movaps (%rdx), %xmm2
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 1.00 - - - - - movaps (%rbx), %xmm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movaps (%rax), %xmm0
+# CHECK-NEXT: [0,1] DeeeeeER. movaps (%rcx), %xmm1
+# CHECK-NEXT: [0,2] D=eeeeeER movaps (%rdx), %xmm2
+# CHECK-NEXT: [0,3] D=eeeeeER movaps (%rbx), %xmm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps (%rax), %xmm0
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movaps (%rbx), %xmm3
+
+# CHECK: [6] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 3.86
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rax), %ymm0
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rcx), %ymm1
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rdx), %ymm2
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rbx), %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - vmovaps (%rax), %ymm0
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - - vmovaps (%rcx), %ymm1
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - - 1.00 - - - - vmovaps (%rdx), %ymm2
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 1.00 - - - - - vmovaps (%rbx), %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. vmovaps (%rax), %ymm0
+# CHECK-NEXT: [0,1] DeeeeeER. vmovaps (%rcx), %ymm1
+# CHECK-NEXT: [0,2] .DeeeeeER vmovaps (%rdx), %ymm2
+# CHECK-NEXT: [0,3] .DeeeeeER vmovaps (%rbx), %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rax), %ymm0
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vmovaps (%rcx), %ymm1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 vmovaps (%rdx), %ymm2
+# CHECK-NEXT: 3. 1 1.0 1.0 0.0 vmovaps (%rbx), %ymm3
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s
new file mode 100644
index 00000000000..43c2d1f7e64
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s
@@ -0,0 +1,605 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+movb %spl, (%rax)
+movb %bpl, (%rcx)
+movb %sil, (%rdx)
+movb %dil, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movw %sp, (%rax)
+movw %bp, (%rcx)
+movw %si, (%rdx)
+movw %di, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movl %esp, (%rax)
+movl %ebp, (%rcx)
+movl %esi, (%rdx)
+movl %edi, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movq %rsp, (%rax)
+movq %rbp, (%rcx)
+movq %rsi, (%rdx)
+movq %rdi, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movd %mm0, (%rax)
+movd %mm1, (%rcx)
+movd %mm2, (%rdx)
+movd %mm3, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movaps %xmm0, (%rax)
+movaps %xmm1, (%rcx)
+movaps %xmm2, (%rdx)
+movaps %xmm3, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+vmovaps %ymm0, (%rax)
+vmovaps %ymm1, (%rcx)
+vmovaps %ymm2, (%rdx)
+vmovaps %ymm3, (%rbx)
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movb %spl, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movb %bpl, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movb %sil, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movb %dil, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %spl, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %bpl, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %sil, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %dil, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movb %spl, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movb %bpl, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movb %sil, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movb %dil, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx)
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movw %sp, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movw %bp, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movw %si, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movw %di, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %sp, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %bp, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %si, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %di, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movw %sp, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movw %bp, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movw %si, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movw %di, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx)
+
+# CHECK: [2] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movl %esp, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movl %ebp, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movl %esi, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movl %edi, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %esp, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %ebp, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %esi, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %edi, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movl %esp, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movl %ebp, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movl %esi, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movl %edi, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx)
+
+# CHECK: [3] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movq %rsp, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movq %rbp, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movq %rsi, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movq %rdi, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rsp, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rbp, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rsi, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rdi, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movq %rsp, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movq %rbp, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movq %rsi, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movq %rdi, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx)
+
+# CHECK: [4] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 803
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 1.00 * U movd %mm0, (%rax)
+# CHECK-NEXT: 1 2 1.00 * U movd %mm1, (%rcx)
+# CHECK-NEXT: 1 2 1.00 * U movd %mm2, (%rdx)
+# CHECK-NEXT: 1 2 1.00 * U movd %mm3, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - 4.00 - 4.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm0, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm1, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm2, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm3, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
+# CHECK-NEXT: [0,1] D==eeER . movd %mm1, (%rcx)
+# CHECK-NEXT: [0,2] D====eeER . movd %mm2, (%rdx)
+# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 movd %mm1, (%rcx)
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 movd %mm2, (%rdx)
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx)
+
+# CHECK: [5] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm1, (%rcx)
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm2, (%rdx)
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm3, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - 4.00 - 4.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm0, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm1, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm2, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm3, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movaps %xmm0, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movaps %xmm1, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movaps %xmm2, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movaps %xmm3, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movaps %xmm2, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movaps %xmm3, (%rbx)
+
+# CHECK: [6] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 1600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm0, (%rax)
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm3, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - 4.00 - 4.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm0, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm3, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. vmovaps %ymm0, (%rax)
+# CHECK-NEXT: [0,1] .DeER.. vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: [0,2] . DeER. vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: [0,3] . DeER vmovaps %ymm3, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps %ymm0, (%rax)
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 vmovaps %ymm3, (%rbx)
OpenPOWER on IntegriCloud