summaryrefslogtreecommitdiffstats
path: root/llvm/test/tools/llvm-mca
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/tools/llvm-mca')
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s57
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s189
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s118
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s118
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s118
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s169
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s97
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s97
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s97
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s58
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s8
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s67
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s66
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s78
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s79
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s16
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s94
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s114
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s153
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s62
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s72
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s73
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s142
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/load.s66
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s6
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s22
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/store.s82
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s51
28 files changed, 2335 insertions, 34 deletions
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s
new file mode 100644
index 00000000000..e981b2aa4bd
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+aese v0.16b, v1.16b
+aesmc v0.16b, v0.16b
+
+aesd v0.16b, v1.16b
+aesimc v0.16b, v0.16b
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 400
+
+# M3-NEXT: Total Cycles: 203
+# M4-NEXT: Total Cycles: 203
+# M5-NEXT: Total Cycles: 403
+
+# ALL-NEXT: Total uOps: 400
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 1.97
+# M3-NEXT: IPC: 1.97
+
+# M4-NEXT: uOps Per Cycle: 1.97
+# M4-NEXT: IPC: 1.97
+
+# M5-NEXT: uOps Per Cycle: 0.99
+# M5-NEXT: IPC: 0.99
+
+# ALL-NEXT: Block RThroughput: 2.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 1 0.50 aese v0.16b, v1.16b
+# M3-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
+# M3-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
+# M3-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
+
+# M4-NEXT: 1 1 0.50 aese v0.16b, v1.16b
+# M4-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
+# M4-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
+# M4-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
+
+# M5-NEXT: 1 2 0.50 aese v0.16b, v1.16b
+# M5-NEXT: 1 2 0.50 aesmc v0.16b, v0.16b
+# M5-NEXT: 1 2 0.50 aesd v0.16b, v1.16b
+# M5-NEXT: 1 2 0.50 aesimc v0.16b, v0.16b
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s
new file mode 100644
index 00000000000..f23b1f71c53
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s
@@ -0,0 +1,189 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+ld1 {v0.s}[0], [sp]
+ld1r {v0.2s}, [sp]
+ld1 {v0.2s}, [sp]
+ld1 {v0.2s, v1.2s}, [sp]
+ld1 {v0.2s, v1.2s, v2.2s}, [sp]
+ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
+
+ld1 {v0.d}[0], [sp]
+ld1r {v0.2d}, [sp]
+ld1 {v0.2d}, [sp]
+ld1 {v0.2d, v1.2d}, [sp]
+ld1 {v0.2d, v1.2d, v2.2d}, [sp]
+ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
+
+ld1 {v0.s}[0], [sp], #4
+ld1r {v0.2s}, [sp], #4
+ld1 {v0.2s}, [sp], #8
+ld1 {v0.2s, v1.2s}, [sp], #16
+ld1 {v0.2s, v1.2s, v2.2s}, [sp], #24
+ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
+
+ld1 {v0.d}[0], [sp], #8
+ld1r {v0.2d}, [sp], #8
+ld1 {v0.2d}, [sp], #16
+ld1 {v0.2d, v1.2d}, [sp], #32
+ld1 {v0.2d, v1.2d, v2.2d}, [sp], #48
+ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
+
+ld1 {v0.s}[0], [sp], x0
+ld1r {v0.2s}, [sp], x0
+ld1 {v0.2s}, [sp], x0
+ld1 {v0.2s, v1.2s}, [sp], x0
+ld1 {v0.2s, v1.2s, v2.2s}, [sp], x0
+ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
+
+ld1 {v0.d}[0], [sp], x0
+ld1r {v0.2d}, [sp], x0
+ld1 {v0.2d}, [sp], x0
+ld1 {v0.2d, v1.2d}, [sp], x0
+ld1 {v0.2d, v1.2d, v2.2d}, [sp], x0
+ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 3600
+
+# M3-NEXT: Total Cycles: 14903
+# M4-NEXT: Total Cycles: 14703
+# M5-NEXT: Total Cycles: 17203
+
+# ALL-NEXT: Total uOps: 10200
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.68
+# M3-NEXT: IPC: 0.24
+
+# M4-NEXT: uOps Per Cycle: 0.69
+# M4-NEXT: IPC: 0.24
+
+# M5-NEXT: uOps Per Cycle: 0.59
+# M5-NEXT: IPC: 0.21
+
+# ALL-NEXT: Block RThroughput: 39.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
+# M3-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
+# M3-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
+# M3-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
+# M3-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
+# M3-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M3-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
+# M3-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
+# M3-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
+# M3-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
+# M3-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
+# M3-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
+# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
+# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
+# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
+# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
+# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
+# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
+# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
+# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
+# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
+# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
+# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
+# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
+# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
+# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
+# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
+# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M4-NEXT: 2 6 1.00 * ld1 { v0.s }[0], [sp]
+# M4-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
+# M4-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
+# M4-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
+# M4-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
+# M4-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M4-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
+# M4-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
+# M4-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
+# M4-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
+# M4-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
+# M4-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], #4
+# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
+# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
+# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
+# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
+# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
+# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
+# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
+# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], x0
+# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
+# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
+# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
+# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
+# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
+# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
+# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
+# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M5-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
+# M5-NEXT: 1 6 0.50 * ld1r { v0.2s }, [sp]
+# M5-NEXT: 1 6 0.50 * ld1 { v0.2s }, [sp]
+# M5-NEXT: 2 6 1.00 * ld1 { v0.2s, v1.2s }, [sp]
+# M5-NEXT: 3 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
+# M5-NEXT: 4 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M5-NEXT: 2 7 1.00 * ld1 { v0.d }[0], [sp]
+# M5-NEXT: 1 6 0.50 * ld1r { v0.2d }, [sp]
+# M5-NEXT: 1 6 0.50 * ld1 { v0.2d }, [sp]
+# M5-NEXT: 2 6 1.00 * ld1 { v0.2d, v1.2d }, [sp]
+# M5-NEXT: 3 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
+# M5-NEXT: 4 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
+# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], #4
+# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], #8
+# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
+# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], #8
+# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], #8
+# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], #16
+# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
+# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
+# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], x0
+# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], x0
+# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
+# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], x0
+# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], x0
+# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], x0
+# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
+# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s
new file mode 100644
index 00000000000..2ca640dbea7
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s
@@ -0,0 +1,118 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+ld2 {v0.s, v1.s}[0], [sp]
+ld2r {v0.2s, v1.2s}, [sp]
+ld2 {v0.2s, v1.2s}, [sp]
+
+ld2 {v0.d, v1.d}[0], [sp]
+ld2r {v0.2d, v1.2d}, [sp]
+ld2 {v0.2d, v1.2d}, [sp]
+
+ld2 {v0.s, v1.s}[0], [sp], #8
+ld2r {v0.2s, v1.2s}, [sp], #8
+ld2 {v0.2s, v1.2s}, [sp], #16
+
+ld2 {v0.d, v1.d}[0], [sp], #16
+ld2r {v0.2d, v1.2d}, [sp], #16
+ld2 {v0.2d, v1.2d}, [sp], #32
+
+ld2 {v0.s, v1.s}[0], [sp], x0
+ld2r {v0.2s, v1.2s}, [sp], x0
+ld2 {v0.2s, v1.2s}, [sp], x0
+
+ld2 {v0.d, v1.d}[0], [sp], x0
+ld2r {v0.2d, v1.2d}, [sp], x0
+ld2 {v0.2d, v1.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1800
+
+# M3-NEXT: Total Cycles: 10003
+# M4-NEXT: Total Cycles: 9803
+# M5-NEXT: Total Cycles: 11103
+
+# ALL-NEXT: Total uOps: 5400
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.54
+# M3-NEXT: IPC: 0.18
+# M3-NEXT: Block RThroughput: 42.0
+
+# M4-NEXT: uOps Per Cycle: 0.55
+# M4-NEXT: IPC: 0.18
+# M4-NEXT: Block RThroughput: 30.0
+
+# M5-NEXT: uOps Per Cycle: 0.49
+# M5-NEXT: IPC: 0.16
+# M5-NEXT: Block RThroughput: 45.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp]
+# M3-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp]
+# M3-NEXT: 2 10 5.00 * ld2 { v0.2s, v1.2s }, [sp]
+# M3-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp]
+# M3-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp]
+# M3-NEXT: 2 10 5.00 * ld2 { v0.2d, v1.2d }, [sp]
+# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
+# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
+# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], #16
+# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
+# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
+# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], #32
+# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
+# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
+# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], x0
+# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
+# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
+# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], x0
+
+# M4-NEXT: 3 6 1.00 * ld2 { v0.s, v1.s }[0], [sp]
+# M4-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp]
+# M4-NEXT: 2 10 3.00 * ld2 { v0.2s, v1.2s }, [sp]
+# M4-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp]
+# M4-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp]
+# M4-NEXT: 2 10 3.00 * ld2 { v0.2d, v1.2d }, [sp]
+# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
+# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
+# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], #16
+# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
+# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
+# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], #32
+# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
+# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
+# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], x0
+# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
+# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
+# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], x0
+
+# M5-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp]
+# M5-NEXT: 2 6 1.00 * ld2r { v0.2s, v1.2s }, [sp]
+# M5-NEXT: 2 11 5.50 * ld2 { v0.2s, v1.2s }, [sp]
+# M5-NEXT: 3 7 1.00 * ld2 { v0.d, v1.d }[0], [sp]
+# M5-NEXT: 2 6 1.00 * ld2r { v0.2d, v1.2d }, [sp]
+# M5-NEXT: 2 11 5.50 * ld2 { v0.2d, v1.2d }, [sp]
+# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
+# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
+# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], #16
+# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
+# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
+# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], #32
+# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
+# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
+# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], x0
+# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
+# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
+# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s
new file mode 100644
index 00000000000..a6a89434754
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s
@@ -0,0 +1,118 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+ld3 {v0.s, v1.s, v2.s}[0], [sp]
+ld3r {v0.2s, v1.2s, v2.2s}, [sp]
+ld3 {v0.2s, v1.2s, v2.2s}, [sp]
+
+ld3 {v0.d, v1.d, v2.d}[0], [sp]
+ld3r {v0.2d, v1.2d, v2.2d}, [sp]
+ld3 {v0.2d, v1.2d, v2.2d}, [sp]
+
+ld3 {v0.s, v1.s, v2.s}[0], [sp], #12
+ld3r {v0.2s, v1.2s, v2.2s}, [sp], #12
+ld3 {v0.2s, v1.2s, v2.2s}, [sp], #24
+
+ld3 {v0.d, v1.d, v2.d}[0], [sp], #24
+ld3r {v0.2d, v1.2d, v2.2d}, [sp], #24
+ld3 {v0.2d, v1.2d, v2.2d}, [sp], #48
+
+ld3 {v0.s, v1.s, v2.s}[0], [sp], x0
+ld3r {v0.2s, v1.2s, v2.2s}, [sp], x0
+ld3 {v0.2s, v1.2s, v2.2s}, [sp], x0
+
+ld3 {v0.d, v1.d, v2.d}[0], [sp], x0
+ld3r {v0.2d, v1.2d, v2.2d}, [sp], x0
+ld3 {v0.2d, v1.2d, v2.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1800
+
+# M3-NEXT: Total Cycles: 12501
+# M4-NEXT: Total Cycles: 11804
+# M5-NEXT: Total Cycles: 12903
+
+# ALL-NEXT: Total uOps: 7500
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.60
+# M3-NEXT: IPC: 0.14
+# M3-NEXT: Block RThroughput: 84.0
+
+# M4-NEXT: uOps Per Cycle: 0.64
+# M4-NEXT: IPC: 0.15
+# M4-NEXT: Block RThroughput: 54.0
+
+# M5-NEXT: uOps Per Cycle: 0.58
+# M5-NEXT: IPC: 0.14
+# M5-NEXT: Block RThroughput: 22.5
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 4 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
+# M3-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
+# M3-NEXT: 3 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
+# M3-NEXT: 5 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
+# M3-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
+# M3-NEXT: 3 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
+# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
+# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
+# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
+# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
+# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
+# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
+# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
+
+# M4-NEXT: 4 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
+# M4-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
+# M4-NEXT: 3 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
+# M4-NEXT: 5 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
+# M4-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
+# M4-NEXT: 3 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
+# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
+# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
+# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
+# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
+# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
+# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
+# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
+
+# M5-NEXT: 4 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
+# M5-NEXT: 3 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
+# M5-NEXT: 3 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
+# M5-NEXT: 5 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
+# M5-NEXT: 3 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
+# M5-NEXT: 3 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
+# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
+# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
+# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
+# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
+# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
+# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
+# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s
new file mode 100644
index 00000000000..c5f2c9b7ec4
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s
@@ -0,0 +1,118 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp]
+ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
+ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
+
+ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp]
+ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
+ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
+
+ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16
+ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #16
+ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
+
+ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32
+ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #32
+ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
+
+ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0
+ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
+ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
+
+ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0
+ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
+ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1800
+
+# M3-NEXT: Total Cycles: 15598
+# M4-NEXT: Total Cycles: 13004
+# M5-NEXT: Total Cycles: 14304
+
+# ALL-NEXT: Total uOps: 9300
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.60
+# M3-NEXT: IPC: 0.12
+# M3-NEXT: Block RThroughput: 108.0
+
+# M4-NEXT: uOps Per Cycle: 0.72
+# M4-NEXT: IPC: 0.14
+# M4-NEXT: Block RThroughput: 61.5
+
+# M5-NEXT: uOps Per Cycle: 0.65
+# M5-NEXT: IPC: 0.13
+# M5-NEXT: Block RThroughput: 40.5
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 5 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
+# M3-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M3-NEXT: 4 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M3-NEXT: 6 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
+# M3-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M3-NEXT: 4 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
+# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
+# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
+# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
+# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
+# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
+# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M4-NEXT: 5 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
+# M4-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M4-NEXT: 4 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M4-NEXT: 6 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
+# M4-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M4-NEXT: 4 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
+# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
+# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
+# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
+# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
+# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
+# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M5-NEXT: 5 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
+# M5-NEXT: 4 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M5-NEXT: 4 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M5-NEXT: 6 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
+# M5-NEXT: 4 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M5-NEXT: 4 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
+# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
+# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
+# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
+# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
+# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
+# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s
new file mode 100644
index 00000000000..81e5fe84ad0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s
@@ -0,0 +1,169 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+st1 {v0.s}[0], [sp]
+st1 {v0.2s}, [sp]
+st1 {v0.2s, v1.2s}, [sp]
+st1 {v0.2s, v1.2s, v2.2s}, [sp]
+st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
+
+st1 {v0.d}[0], [sp]
+st1 {v0.2d}, [sp]
+st1 {v0.2d, v1.2d}, [sp]
+st1 {v0.2d, v1.2d, v2.2d}, [sp]
+st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
+
+st1 {v0.s}[0], [sp], #4
+st1 {v0.2s}, [sp], #8
+st1 {v0.2s, v1.2s}, [sp], #16
+st1 {v0.2s, v1.2s, v2.2s}, [sp], #24
+st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
+
+st1 {v0.d}[0], [sp], #8
+st1 {v0.2d}, [sp], #16
+st1 {v0.2d, v1.2d}, [sp], #32
+st1 {v0.2d, v1.2d, v2.2d}, [sp], #48
+st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
+
+st1 {v0.s}[0], [sp], x0
+st1 {v0.2s}, [sp], x0
+st1 {v0.2s, v1.2s}, [sp], x0
+st1 {v0.2s, v1.2s, v2.2s}, [sp], x0
+st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
+
+st1 {v0.d}[0], [sp], x0
+st1 {v0.2d}, [sp], x0
+st1 {v0.2d, v1.2d}, [sp], x0
+st1 {v0.2d, v1.2d, v2.2d}, [sp], x0
+st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 3000
+
+# M3-NEXT: Total Cycles: 10203
+# M3-NEXT: Total uOps: 8400
+
+# M4-NEXT: Total Cycles: 6603
+# M4-NEXT: Total uOps: 8600
+
+# M5-NEXT: Total Cycles: 6603
+# M5-NEXT: Total uOps: 8600
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.82
+# M3-NEXT: IPC: 0.29
+# M3-NEXT: Block RThroughput: 72.0
+
+# M4-NEXT: uOps Per Cycle: 1.30
+# M4-NEXT: IPC: 0.45
+# M4-NEXT: Block RThroughput: 33.0
+
+# M5-NEXT: uOps Per Cycle: 1.30
+# M5-NEXT: IPC: 0.45
+# M5-NEXT: Block RThroughput: 33.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp]
+# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp]
+# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp]
+# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
+# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp]
+# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp]
+# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp]
+# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
+# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], #4
+# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], #8
+# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], #16
+# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], #8
+# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], #16
+# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], #32
+# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], x0
+# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], x0
+# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], x0
+# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], x0
+# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], x0
+# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], x0
+# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M4-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp]
+# M4-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp]
+# M4-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp]
+# M4-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
+# M4-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M4-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp]
+# M4-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp]
+# M4-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp]
+# M4-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
+# M4-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4
+# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8
+# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16
+# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8
+# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16
+# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32
+# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0
+# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0
+# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0
+# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0
+# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0
+# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0
+# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M5-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp]
+# M5-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp]
+# M5-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp]
+# M5-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
+# M5-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M5-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp]
+# M5-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp]
+# M5-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp]
+# M5-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
+# M5-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4
+# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8
+# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16
+# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8
+# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16
+# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32
+# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0
+# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0
+# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0
+# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0
+# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0
+# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0
+# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
+# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s
new file mode 100644
index 00000000000..9506241fef2
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+st2 {v0.s, v1.s}[0], [sp]
+st2 {v0.2s, v1.2s}, [sp]
+
+st2 {v0.d, v1.d}[0], [sp]
+st2 {v0.2d, v1.2d}, [sp]
+
+st2 {v0.s, v1.s}[0], [sp], #8
+st2 {v0.2s, v1.2s}, [sp], #16
+
+st2 {v0.d, v1.d}[0], [sp], #16
+st2 {v0.2d, v1.2d}, [sp], #32
+
+st2 {v0.s, v1.s}[0], [sp], x0
+st2 {v0.2s, v1.2s}, [sp], x0
+
+st2 {v0.d, v1.d}[0], [sp], x0
+st2 {v0.2d, v1.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1200
+
+# M3-NEXT: Total Cycles: 8703
+# M3-NEXT: Total uOps: 5400
+
+# M4-NEXT: Total Cycles: 2403
+# M4-NEXT: Total uOps: 2300
+
+# M5-NEXT: Total Cycles: 2403
+# M5-NEXT: Total uOps: 2000
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.62
+# M3-NEXT: IPC: 0.14
+# M3-NEXT: Block RThroughput: 40.5
+
+# M4-NEXT: uOps Per Cycle: 0.96
+# M4-NEXT: IPC: 0.50
+# M4-NEXT: Block RThroughput: 7.5
+
+# M5-NEXT: uOps Per Cycle: 0.83
+# M5-NEXT: IPC: 0.50
+# M5-NEXT: Block RThroughput: 7.5
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp]
+# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp]
+# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp]
+# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp]
+# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], #8
+# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], #16
+# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], #16
+# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], #32
+# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], x0
+# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], x0
+# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], x0
+# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], x0
+
+# M4-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp]
+# M4-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp]
+# M4-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp]
+# M4-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp]
+# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8
+# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16
+# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16
+# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32
+# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0
+# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0
+# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0
+# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0
+
+# M5-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp]
+# M5-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp]
+# M5-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp]
+# M5-NEXT: 1 2 1.00 * st2 { v0.2d, v1.2d }, [sp]
+# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8
+# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16
+# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16
+# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32
+# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0
+# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0
+# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0
+# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s
new file mode 100644
index 00000000000..4de5213d526
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+st3 {v0.s, v1.s, v2.s}[0], [sp]
+st3 {v0.2s, v1.2s, v2.2s}, [sp]
+
+st3 {v0.d, v1.d, v2.d}[0], [sp]
+st3 {v0.2d, v1.2d, v2.2d}, [sp]
+
+st3 {v0.s, v1.s, v2.s}[0], [sp], #12
+st3 {v0.2s, v1.2s, v2.2s}, [sp], #24
+
+st3 {v0.d, v1.d, v2.d}[0], [sp], #24
+st3 {v0.2d, v1.2d, v2.2d}, [sp], #48
+
+st3 {v0.s, v1.s, v2.s}[0], [sp], x0
+st3 {v0.2s, v1.2s, v2.2s}, [sp], x0
+
+st3 {v0.d, v1.d, v2.d}[0], [sp], x0
+st3 {v0.2d, v1.2d, v2.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1200
+
+# M3-NEXT: Total Cycles: 18003
+# M3-NEXT: Total uOps: 8400
+
+# M4-NEXT: Total Cycles: 3903
+# M4-NEXT: Total uOps: 5000
+
+# M5-NEXT: Total Cycles: 3603
+# M5-NEXT: Total uOps: 4400
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.47
+# M3-NEXT: IPC: 0.07
+# M3-NEXT: Block RThroughput: 72.0
+
+# M4-NEXT: uOps Per Cycle: 1.28
+# M4-NEXT: IPC: 0.31
+# M4-NEXT: Block RThroughput: 21.0
+
+# M5-NEXT: uOps Per Cycle: 1.22
+# M5-NEXT: IPC: 0.33
+# M5-NEXT: Block RThroughput: 10.5
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp]
+# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
+# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
+# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
+# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
+# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
+# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
+# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
+# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
+
+# M4-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp]
+# M4-NEXT: 4 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
+# M4-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
+# M4-NEXT: 6 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
+# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
+# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
+# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
+# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
+# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
+
+# M5-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp]
+# M5-NEXT: 3 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
+# M5-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
+# M5-NEXT: 5 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
+# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
+# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
+# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
+# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
+# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
+# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
+# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
+# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s
new file mode 100644
index 00000000000..7dfe59f78c4
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp]
+st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
+
+st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp]
+st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
+
+st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16
+st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
+
+st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32
+st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
+
+st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0
+st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
+
+st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0
+st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1200
+
+# M3-NEXT: Total Cycles: 18603
+# M3-NEXT: Total uOps: 9000
+
+# M4-NEXT: Total Cycles: 4803
+# M4-NEXT: Total uOps: 4700
+
+# M5-NEXT: Total Cycles: 4803
+# M5-NEXT: Total uOps: 4700
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.48
+# M3-NEXT: IPC: 0.06
+# M3-NEXT: Block RThroughput: 76.5
+
+# M4-NEXT: uOps Per Cycle: 0.98
+# M4-NEXT: IPC: 0.25
+# M4-NEXT: Block RThroughput: 24.0
+
+# M5-NEXT: uOps Per Cycle: 0.98
+# M5-NEXT: IPC: 0.25
+# M5-NEXT: Block RThroughput: 24.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
+# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
+# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
+# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
+# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
+# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
+# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M4-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
+# M4-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M4-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
+# M4-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
+# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
+# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
+# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
+# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
+
+# M5-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
+# M5-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# M5-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
+# M5-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
+# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
+# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
+# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
+# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
+# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
+# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
+# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s
new file mode 100644
index 00000000000..27aa0075c57
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s
@@ -0,0 +1,58 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+crc32w w0, w1, w2
+crc32w w0, w0, w3
+
+crc32cx w0, w1, x2
+crc32cx w0, w0, x3
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 400
+
+# M3-NEXT: Total Cycles: 204
+# M4-NEXT: Total Cycles: 404
+# M5-NEXT: Total Cycles: 204
+
+# ALL-NEXT: Total uOps: 400
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 1.96
+# M3-NEXT: IPC: 1.96
+# M3-NEXT: Block RThroughput: 2.0
+
+# M4-NEXT: uOps Per Cycle: 0.99
+# M4-NEXT: IPC: 0.99
+# M4-NEXT: Block RThroughput: 4.0
+
+# M5-NEXT: uOps Per Cycle: 1.96
+# M5-NEXT: IPC: 1.96
+# M5-NEXT: Block RThroughput: 2.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 2 0.50 crc32w w0, w1, w2
+# M3-NEXT: 1 2 0.50 crc32w w0, w0, w3
+# M3-NEXT: 1 2 0.50 crc32cx w0, w1, x2
+# M3-NEXT: 1 2 0.50 crc32cx w0, w0, x3
+
+# M4-NEXT: 1 2 1.00 crc32w w0, w1, w2
+# M4-NEXT: 1 2 1.00 crc32w w0, w0, w3
+# M4-NEXT: 1 2 1.00 crc32cx w0, w1, x2
+# M4-NEXT: 1 2 1.00 crc32cx w0, w0, x3
+
+# M5-NEXT: 1 2 0.50 crc32w w0, w1, w2
+# M5-NEXT: 1 2 0.50 crc32w w0, w0, w3
+# M5-NEXT: 1 2 0.50 crc32cx w0, w1, x2
+# M5-NEXT: 1 2 0.50 crc32cx w0, w0, x3
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
index 0819170c68b..79f810c95f0 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
b main
@@ -9,6 +10,7 @@
# M3-NEXT: Total Cycles: 18
# M4-NEXT: Total Cycles: 18
+# M5-NEXT: Total Cycles: 18
# ALL-NEXT: Total uOps: 100
@@ -22,6 +24,11 @@
# M4-NEXT: IPC: 5.56
# M4-NEXT: Block RThroughput: 0.2
+# M5: Dispatch Width: 6
+# M5-NEXT: uOps Per Cycle: 5.56
+# M5-NEXT: IPC: 5.56
+# M5-NEXT: Block RThroughput: 0.2
+
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
@@ -34,3 +41,4 @@
# M3-NEXT: 1 0 0.17 b main
# M4-NEXT: 1 0 0.17 b main
+# M5-NEXT: 1 0 0.17 b main
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s
new file mode 100644
index 00000000000..c74d1923c4a
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s
@@ -0,0 +1,67 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
+
+sdiv w0, w1, w2
+udiv x1, x2, x3
+
+mul w2, w3, w4
+msub x3, x4, x5, x6
+
+smull x4, w5, w6
+umulh x5, x6, x7
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 600
+
+# EM3-NEXT: Total Cycles: 3305
+# EM4-NEXT: Total Cycles: 3303
+# EM5-NEXT: Total Cycles: 2603
+
+# ALL-NEXT: Total uOps: 600
+
+# ALL: Dispatch Width: 6
+
+# EM3-NEXT: uOps Per Cycle: 0.18
+# EM3-NEXT: IPC: 0.18
+# EM3-NEXT: Block RThroughput: 33.0
+
+# EM4-NEXT: uOps Per Cycle: 0.18
+# EM4-NEXT: IPC: 0.18
+# EM4-NEXT: Block RThroughput: 33.0
+
+# EM5-NEXT: uOps Per Cycle: 0.23
+# EM5-NEXT: IPC: 0.23
+# EM5-NEXT: Block RThroughput: 26.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# EM3-NEXT: 1 12 12.00 sdiv w0, w1, w2
+# EM3-NEXT: 1 21 21.00 udiv x1, x2, x3
+# EM3-NEXT: 1 3 0.50 mul w2, w3, w4
+# EM3-NEXT: 1 4 1.00 msub x3, x4, x5, x6
+# EM3-NEXT: 1 3 0.50 smull x4, w5, w6
+# EM3-NEXT: 1 4 1.00 umulh x5, x6, x7
+
+# EM4-NEXT: 1 12 12.00 sdiv w0, w1, w2
+# EM4-NEXT: 1 21 21.00 udiv x1, x2, x3
+# EM4-NEXT: 1 3 0.50 mul w2, w3, w4
+# EM4-NEXT: 1 4 1.00 msub x3, x4, x5, x6
+# EM4-NEXT: 1 3 0.50 smull x4, w5, w6
+# EM4-NEXT: 1 4 1.00 umulh x5, x6, x7
+
+# EM5-NEXT: 1 10 10.00 sdiv w0, w1, w2
+# EM5-NEXT: 1 16 16.00 udiv x1, x2, x3
+# EM5-NEXT: 1 2 0.50 mul w2, w3, w4
+# EM5-NEXT: 1 3 1.00 msub x3, x4, x5, x6
+# EM5-NEXT: 1 2 0.50 smull x4, w5, w6
+# EM5-NEXT: 1 3 1.00 umulh x5, x6, x7
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s
new file mode 100644
index 00000000000..872f6ab7948
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+fmov d31, #1.00000000
+fdiv d30, d31, d30
+
+# Newton series for 1 / x.
+frecpe d1, d0
+frecps d2, d0, d1
+fmul d1, d1, d2
+frecps d2, d0, d1
+fmul d1, d1, d2
+frecps d0, d0, d1
+fmul d0, d1, d0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 900
+
+# M3-NEXT: Total Cycles: 2503
+# M4-NEXT: Total Cycles: 2403
+# M5-NEXT: Total Cycles: 2403
+
+# ALL-NEXT: Total uOps: 900
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.36
+# M3-NEXT: IPC: 0.36
+# M3-NEXT: Block RThroughput: 3.3
+
+# M4-NEXT: uOps Per Cycle: 0.37
+# M4-NEXT: IPC: 0.37
+# M4-NEXT: Block RThroughput: 2.3
+
+# M5-NEXT: uOps Per Cycle: 0.37
+# M5-NEXT: IPC: 0.37
+# M5-NEXT: Block RThroughput: 2.3
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000
+
+# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30
+# M3-NEXT: 1 4 0.50 frecpe d1, d0
+
+# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30
+# M4-NEXT: 1 3 0.50 frecpe d1, d0
+
+# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30
+# M5-NEXT: 1 3 0.50 frecpe d1, d0
+
+# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1
+# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
+# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1
+# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
+# ALL-NEXT: 1 4 0.33 frecps d0, d0, d1
+# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s
new file mode 100644
index 00000000000..98fa404bb94
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+fsqrt d30, d30
+fmov d31, #1.00000000
+fdiv d30, d31, d30
+
+# Newton series for 1 / sqrt().
+frsqrte d1, d0
+fmul d2, d1, d1
+frsqrts d2, d0, d2
+fmul d1, d1, d2
+fmul d2, d1, d1
+frsqrts d2, d0, d2
+fmul d1, d1, d2
+fmul d2, d1, d1
+frsqrts d0, d0, d2
+fmul d0, d1, d0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1300
+
+# M3-NEXT: Total Cycles: 3703
+# M4-NEXT: Total Cycles: 3303
+# M5-NEXT: Total Cycles: 3303
+
+# ALL-NEXT: Total uOps: 1300
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.35
+# M3-NEXT: IPC: 0.35
+# M3-NEXT: Block RThroughput: 26.0
+
+# M4-NEXT: uOps Per Cycle: 0.39
+# M4-NEXT: IPC: 0.39
+# M4-NEXT: Block RThroughput: 3.0
+
+# M5-NEXT: uOps Per Cycle: 0.39
+# M5-NEXT: IPC: 0.39
+# M5-NEXT: Block RThroughput: 3.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 25 26.00 fsqrt d30, d30
+# M4-NEXT: 1 12 2.25 fsqrt d30, d30
+# M5-NEXT: 1 12 2.25 fsqrt d30, d30
+
+# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000
+
+# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30
+# M3-NEXT: 1 4 0.50 frsqrte d1, d0
+
+# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30
+# M4-NEXT: 1 3 0.50 frsqrte d1, d0
+
+# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30
+# M5-NEXT: 1 3 0.50 frsqrte d1, d0
+
+# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
+# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
+# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
+# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
+# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
+# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
+# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
+# ALL-NEXT: 1 4 0.33 frsqrts d0, d0, d2
+# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s
new file mode 100644
index 00000000000..b9aceff3913
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+fsqrt d31, d31
+
+# Newton series for sqrt().
+frsqrte d1, d0
+fmul d2, d1, d1
+frsqrts d2, d0, d2
+fmul d1, d1, d2
+fmul d2, d1, d1
+frsqrts d2, d0, d2
+fmul d1, d1, d2
+fmul d2, d1, d1
+frsqrts d2, d0, d2
+fmul d2, d2, d0
+fmul d1, d1, d2
+fcmp d0, #0.0
+fcsel d0, d0, d1, eq
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1400
+
+# M3-NEXT: Total Cycles: 4203
+# M4-NEXT: Total Cycles: 4103
+# M5-NEXT: Total Cycles: 3803
+
+# ALL-NEXT: Total uOps: 1500
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.36
+# M3-NEXT: IPC: 0.33
+# M3-NEXT: Block RThroughput: 27.0
+
+# M4-NEXT: uOps Per Cycle: 0.37
+# M4-NEXT: IPC: 0.34
+# M4-NEXT: Block RThroughput: 3.3
+
+# M5-NEXT: uOps Per Cycle: 0.39
+# M5-NEXT: IPC: 0.37
+# M5-NEXT: Block RThroughput: 3.3
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 25 26.00 fsqrt d31, d31
+# M3-NEXT: 1 4 0.50 frsqrte d1, d0
+
+# M4-NEXT: 1 12 2.25 fsqrt d31, d31
+# M4-NEXT: 1 3 0.50 frsqrte d1, d0
+
+# M5-NEXT: 1 12 2.25 fsqrt d31, d31
+# M5-NEXT: 1 3 0.50 frsqrte d1, d0
+
+# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
+# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
+# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
+# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
+# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
+# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
+# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
+# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
+# ALL-NEXT: 1 3 0.33 fmul d2, d2, d0
+# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
+# ALL-NEXT: 1 2 1.00 fcmp d0, #0.0
+
+# M3-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq
+# M4-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq
+# M5-NEXT: 2 2 1.00 fcsel d0, d0, d1, eq
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s
index aa14531577a..03522cd96e7 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
sub w0, w1, w2, sxtb #0
add x3, x4, w5, sxth #1
@@ -16,6 +17,7 @@
# EM3-NEXT: Total Cycles: 304
# EM4-NEXT: Total Cycles: 304
+# EM5-NEXT: Total Cycles: 254
# ALL-NEXT: Total uOps: 800
@@ -29,6 +31,11 @@
# EM4-NEXT: IPC: 2.63
# EM4-NEXT: Block RThroughput: 3.0
+# EM5: Dispatch Width: 6
+# EM5-NEXT: uOps Per Cycle: 3.15
+# EM5-NEXT: IPC: 3.15
+# EM5-NEXT: Block RThroughput: 2.5
+
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
@@ -56,3 +63,12 @@
# EM4-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1
# EM4-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2
# EM4-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3
+
+# EM5-NEXT: 1 1 0.17 sub w0, w1, w2, sxtb
+# EM5-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1
+# EM5-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2
+# EM5-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3
+# EM5-NEXT: 1 1 0.17 sub w12, w13, w14, uxtb
+# EM5-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1
+# EM5-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2
+# EM5-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s
new file mode 100644
index 00000000000..a24d8a27960
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s
@@ -0,0 +1,94 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
+
+fdiv h0, h1, h2
+fdiv s1, s2, s3
+fdiv d2, d3, d4
+
+fmul h3, h4, h5
+fmul s4, s5, s6
+fmul d5, d6, d7
+
+fmadd h6, h7, h8, h9
+fmadd s7, s8, s9, s10
+fmadd d8, d9, d10, d11
+
+fsqrt h9, h10
+fsqrt s10, s11
+fsqrt d11, d12
+
+# ALL: Iterations: 100
+
+# EM3-NEXT: Instructions: 800
+# EM3-NEXT: Total Cycles: 4503
+# EM3-NEXT: Total uOps: 800
+
+# EM4-NEXT: Instructions: 1200
+# EM4-NEXT: Total Cycles: 575
+# EM4-NEXT: Total uOps: 1200
+
+# EM5-NEXT: Instructions: 1200
+# EM5-NEXT: Total Cycles: 433
+# EM5-NEXT: Total uOps: 1200
+
+# ALL: Dispatch Width: 6
+
+# EM3-NEXT: uOps Per Cycle: 0.18
+# EM3-NEXT: IPC: 0.18
+# EM3-NEXT: Block RThroughput: 45.0
+
+# EM4-NEXT: uOps Per Cycle: 2.09
+# EM4-NEXT: IPC: 2.09
+# EM4-NEXT: Block RThroughput: 4.0
+
+# EM5-NEXT: uOps Per Cycle: 2.77
+# EM5-NEXT: IPC: 2.77
+# EM5-NEXT: Block RThroughput: 4.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# EM3: [1] [2] [3] [4] [5] [6] Instructions:
+# EM3-NEXT: 1 7 2.00 fdiv s1, s2, s3
+# EM3-NEXT: 1 12 3.25 fdiv d2, d3, d4
+# EM3-NEXT: 1 3 0.33 fmul s4, s5, s6
+# EM3-NEXT: 1 3 0.33 fmul d5, d6, d7
+# EM3-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
+# EM3-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
+# EM3-NEXT: 1 18 19.00 fsqrt s10, s11
+# EM3-NEXT: 1 25 26.00 fsqrt d11, d12
+
+# EM4: [1] [2] [3] [4] [5] [6] Instructions:
+# EM4-NEXT: 1 7 3.00 fdiv h0, h1, h2
+# EM4-NEXT: 1 7 1.50 fdiv s1, s2, s3
+# EM4-NEXT: 1 12 2.25 fdiv d2, d3, d4
+# EM4-NEXT: 1 3 0.50 fmul h3, h4, h5
+# EM4-NEXT: 1 3 0.33 fmul s4, s5, s6
+# EM4-NEXT: 1 3 0.33 fmul d5, d6, d7
+# EM4-NEXT: 1 4 0.50 fmadd h6, h7, h8, h9
+# EM4-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
+# EM4-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
+# EM4-NEXT: 1 7 3.00 fsqrt h9, h10
+# EM4-NEXT: 1 8 1.75 fsqrt s10, s11
+# EM4-NEXT: 1 12 2.25 fsqrt d11, d12
+
+# EM5: [1] [2] [3] [4] [5] [6] Instructions:
+# EM5-NEXT: 1 5 0.50 fdiv h0, h1, h2
+# EM5-NEXT: 1 7 1.00 fdiv s1, s2, s3
+# EM5-NEXT: 1 12 2.25 fdiv d2, d3, d4
+# EM5-NEXT: 1 3 0.33 fmul h3, h4, h5
+# EM5-NEXT: 1 3 0.33 fmul s4, s5, s6
+# EM5-NEXT: 1 3 0.33 fmul d5, d6, d7
+# EM5-NEXT: 1 4 0.33 fmadd h6, h7, h8, h9
+# EM5-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
+# EM5-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
+# EM5-NEXT: 1 5 0.50 fsqrt h9, h10
+# EM5-NEXT: 1 8 1.25 fsqrt s10, s11
+# EM5-NEXT: 1 12 2.25 fsqrt d11, d12
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s
new file mode 100644
index 00000000000..65aed321dd7
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s
@@ -0,0 +1,114 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
+
+scvtf h0, w0
+scvtf s1, w1
+scvtf d2, x2
+
+fcvtzs w3, h3
+fcvtzs w4, s4
+fcvtzs x5, d5
+
+fmov h6, #2.0
+fmov s7, #4.0
+fmov d8, #8.0
+
+fmov h9, w9
+fmov s10, w10
+fmov d11, x11
+fmov v12.d[1], x12
+
+fmov w13, h13
+fmov w14, s14
+fmov x15, d15
+fmov x16, v16.d[1]
+
+# ALL: Iterations: 100
+
+# EM3-NEXT: Instructions: 1200
+# EM3-NEXT: Total Cycles: 405
+# EM3-NEXT: Total uOps: 1400
+
+# EM4-NEXT: Instructions: 1700
+# EM4-NEXT: Total Cycles: 1108
+# EM4-NEXT: Total uOps: 1900
+
+# EM5-NEXT: Instructions: 1700
+# EM5-NEXT: Total Cycles: 1407
+# EM5-NEXT: Total uOps: 1900
+
+# ALL: Dispatch Width: 6
+
+# EM3-NEXT: uOps Per Cycle: 3.46
+# EM3-NEXT: IPC: 2.96
+# EM3-NEXT: Block RThroughput: 4.0
+
+# EM4-NEXT: uOps Per Cycle: 1.71
+# EM4-NEXT: IPC: 1.53
+# EM4-NEXT: Block RThroughput: 11.0
+
+# EM5-NEXT: uOps Per Cycle: 1.35
+# EM5-NEXT: IPC: 1.21
+# EM5-NEXT: Block RThroughput: 14.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# EM3: [1] [2] [3] [4] [5] [6] Instructions:
+# EM3-NEXT: 1 4 1.00 scvtf s1, w1
+# EM3-NEXT: 1 4 1.00 scvtf d2, x2
+# EM3-NEXT: 1 3 1.00 fcvtzs w4, s4
+# EM3-NEXT: 1 3 1.00 fcvtzs x5, d5
+# EM3-NEXT: 1 1 0.33 fmov s7, #4.00000000
+# EM3-NEXT: 1 1 0.33 fmov d8, #8.00000000
+# EM3-NEXT: 1 1 0.33 fmov s10, w10
+# EM3-NEXT: 1 1 0.33 fmov d11, x11
+# EM3-NEXT: 2 5 1.00 fmov v12.d[1], x12
+# EM3-NEXT: 1 1 0.33 fmov w14, s14
+# EM3-NEXT: 1 1 0.33 fmov x15, d15
+# EM3-NEXT: 2 5 1.00 fmov x16, v16.d[1]
+
+# EM4: [1] [2] [3] [4] [5] [6] Instructions:
+# EM4-NEXT: 1 6 1.00 scvtf h0, w0
+# EM4-NEXT: 1 6 1.00 scvtf s1, w1
+# EM4-NEXT: 1 6 1.00 scvtf d2, x2
+# EM4-NEXT: 1 4 1.00 fcvtzs w3, h3
+# EM4-NEXT: 1 4 1.00 fcvtzs w4, s4
+# EM4-NEXT: 1 4 1.00 fcvtzs x5, d5
+# EM4-NEXT: 1 1 0.33 fmov h6, #2.00000000
+# EM4-NEXT: 1 1 0.33 fmov s7, #4.00000000
+# EM4-NEXT: 1 1 0.33 fmov d8, #8.00000000
+# EM4-NEXT: 1 3 1.00 fmov h9, w9
+# EM4-NEXT: 1 3 1.00 fmov s10, w10
+# EM4-NEXT: 1 3 1.00 fmov d11, x11
+# EM4-NEXT: 2 2 1.00 fmov v12.d[1], x12
+# EM4-NEXT: 1 4 1.00 fmov w13, h13
+# EM4-NEXT: 1 4 1.00 fmov w14, s14
+# EM4-NEXT: 1 4 1.00 fmov x15, d15
+# EM4-NEXT: 2 5 1.00 fmov x16, v16.d[1]
+
+# EM5: [1] [2] [3] [4] [5] [6] Instructions:
+# EM5-NEXT: 1 6 1.00 scvtf h0, w0
+# EM5-NEXT: 1 6 1.00 scvtf s1, w1
+# EM5-NEXT: 1 6 1.00 scvtf d2, x2
+# EM5-NEXT: 1 4 1.00 fcvtzs w3, h3
+# EM5-NEXT: 1 4 1.00 fcvtzs w4, s4
+# EM5-NEXT: 1 4 1.00 fcvtzs x5, d5
+# EM5-NEXT: 1 1 0.33 fmov h6, #2.00000000
+# EM5-NEXT: 1 1 0.33 fmov s7, #4.00000000
+# EM5-NEXT: 1 1 0.33 fmov d8, #8.00000000
+# EM5-NEXT: 1 4 1.00 fmov h9, w9
+# EM5-NEXT: 1 4 1.00 fmov s10, w10
+# EM5-NEXT: 1 4 1.00 fmov d11, x11
+# EM5-NEXT: 2 6 1.00 fmov v12.d[1], x12
+# EM5-NEXT: 1 3 1.00 fmov w13, h13
+# EM5-NEXT: 1 3 1.00 fmov w14, s14
+# EM5-NEXT: 1 3 1.00 fmov x15, d15
+# EM5-NEXT: 2 5 1.00 fmov x16, v16.d[1]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s
new file mode 100644
index 00000000000..18dcf5ebe87
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s
@@ -0,0 +1,153 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+ldr s0, 1f
+ldr q0, 1f
+
+ldur d0, [sp, #2]
+ldur q0, [sp, #16]
+
+ldr b0, [sp], #1
+ldr q0, [sp], #16
+
+ldr h0, [sp, #2]!
+ldr q0, [sp, #16]!
+
+ldr s0, [sp, #4]
+ldr q0, [sp, #16]
+
+ldr d0, [sp, x0, lsl #3]
+ldr q0, [sp, x0, lsl #4]
+
+ldr b0, [sp, x0]
+ldr q0, [sp, x0]
+
+ldr h0, [sp, w0, sxtw #1]
+ldr q0, [sp, w0, uxtw #4]
+
+ldr s0, [sp, w0, sxtw]
+ldr q0, [sp, w0, uxtw]
+
+ldp d0, d1, [sp], #16
+ldp q0, q1, [sp], #32
+
+ldp s0, s1, [sp, #8]!
+ldp q0, q1, [sp, #32]!
+
+ldp d0, d1, [sp, #16]
+ldp q0, q1, [sp, #32]
+
+1:
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 2400
+
+# M3-NEXT: Total Cycles: 4708
+# M3-NEXT: Total uOps: 3200
+
+# M4-NEXT: Total Cycles: 4708
+# M4-NEXT: Total uOps: 3200
+
+# M5-NEXT: Total Cycles: 5509
+# M5-NEXT: Total uOps: 3300
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.68
+# M3-NEXT: IPC: 0.51
+# M3-NEXT: Block RThroughput: 13.5
+
+# M4-NEXT: uOps Per Cycle: 0.68
+# M4-NEXT: IPC: 0.51
+# M4-NEXT: Block RThroughput: 13.0
+
+# M5-NEXT: uOps Per Cycle: 0.60
+# M5-NEXT: IPC: 0.44
+# M5-NEXT: Block RThroughput: 13.5
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0
+# M3-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0
+# M3-NEXT: 1 5 0.50 * ldur d0, [sp, #2]
+# M3-NEXT: 1 5 0.50 * ldur q0, [sp, #16]
+# M3-NEXT: 1 5 0.50 * ldr b0, [sp], #1
+# M3-NEXT: 1 5 0.50 * ldr q0, [sp], #16
+# M3-NEXT: 1 5 0.50 * ldr h0, [sp, #2]!
+# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]!
+# M3-NEXT: 1 5 0.50 * ldr s0, [sp, #4]
+# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]
+# M3-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3]
+# M3-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4]
+# M3-NEXT: 1 5 0.50 * ldr b0, [sp, x0]
+# M3-NEXT: 1 5 0.50 * ldr q0, [sp, x0]
+# M3-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1]
+# M3-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4]
+# M3-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw]
+# M3-NEXT: 1 5 0.50 * ldr q0, [sp, w0, uxtw]
+# M3-NEXT: 2 5 0.50 * ldp d0, d1, [sp], #16
+# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp], #32
+# M3-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]!
+# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]!
+# M3-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16]
+# M3-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32]
+
+# M4-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0
+# M4-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0
+# M4-NEXT: 1 5 0.50 * ldur d0, [sp, #2]
+# M4-NEXT: 1 5 0.50 * ldur q0, [sp, #16]
+# M4-NEXT: 1 5 0.50 * ldr b0, [sp], #1
+# M4-NEXT: 1 5 0.50 * ldr q0, [sp], #16
+# M4-NEXT: 1 5 0.50 * ldr h0, [sp, #2]!
+# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]!
+# M4-NEXT: 1 5 0.50 * ldr s0, [sp, #4]
+# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]
+# M4-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3]
+# M4-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4]
+# M4-NEXT: 1 5 0.50 * ldr b0, [sp, x0]
+# M4-NEXT: 1 5 0.50 * ldr q0, [sp, x0]
+# M4-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1]
+# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4]
+# M4-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw]
+# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw]
+# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp], #16
+# M4-NEXT: 2 5 0.50 * ldp q0, q1, [sp], #32
+# M4-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]!
+# M4-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]!
+# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16]
+# M4-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32]
+
+# M5-NEXT: 1 6 0.50 * ldr s0, {{\.?}}Ltmp0
+# M5-NEXT: 1 6 0.50 * ldr q0, {{\.?}}Ltmp0
+# M5-NEXT: 1 6 0.50 * ldur d0, [sp, #2]
+# M5-NEXT: 1 6 0.50 * ldur q0, [sp, #16]
+# M5-NEXT: 1 6 0.50 * ldr b0, [sp], #1
+# M5-NEXT: 1 6 0.50 * ldr q0, [sp], #16
+# M5-NEXT: 1 6 0.50 * ldr h0, [sp, #2]!
+# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]!
+# M5-NEXT: 1 6 0.50 * ldr s0, [sp, #4]
+# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]
+# M5-NEXT: 1 6 0.50 * ldr d0, [sp, x0, lsl #3]
+# M5-NEXT: 2 7 0.50 * ldr q0, [sp, x0, lsl #4]
+# M5-NEXT: 1 6 0.50 * ldr b0, [sp, x0]
+# M5-NEXT: 1 6 0.50 * ldr q0, [sp, x0]
+# M5-NEXT: 2 7 0.50 * ldr h0, [sp, w0, sxtw #1]
+# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw #4]
+# M5-NEXT: 2 7 0.50 * ldr s0, [sp, w0, sxtw]
+# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw]
+# M5-NEXT: 2 6 0.50 * ldp d0, d1, [sp], #16
+# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp], #32
+# M5-NEXT: 2 6 0.50 * ldp s0, s1, [sp, #8]!
+# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp, #32]!
+# M5-NEXT: 1 6 0.50 * ldp d0, d1, [sp, #16]
+# M5-NEXT: 1 6 1.00 * ldp q0, q1, [sp, #32]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s
new file mode 100644
index 00000000000..05245ad631f
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s
@@ -0,0 +1,62 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+fmov s31, #1.00000000
+fdiv s30, s31, s30
+
+# Newton series for 1 / x.
+frecpe s1, s0
+frecps s2, s0, s1
+fmul s1, s1, s2
+frecps s0, s0, s1
+fmul s0, s1, s0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 700
+
+# M3-NEXT: Total Cycles: 1803
+# M4-NEXT: Total Cycles: 1703
+# M5-NEXT: Total Cycles: 1703
+
+# ALL-NEXT: Total uOps: 700
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.39
+# M3-NEXT: IPC: 0.39
+# M3-NEXT: Block RThroughput: 2.0
+
+# M4-NEXT: uOps Per Cycle: 0.41
+# M4-NEXT: IPC: 0.41
+# M4-NEXT: Block RThroughput: 1.5
+
+# M5-NEXT: uOps Per Cycle: 0.41
+# M5-NEXT: IPC: 0.41
+# M5-NEXT: Block RThroughput: 1.3
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000
+
+# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30
+# M3-NEXT: 1 4 0.50 frecpe s1, s0
+
+# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30
+# M4-NEXT: 1 3 0.50 frecpe s1, s0
+
+# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30
+# M5-NEXT: 1 3 0.50 frecpe s1, s0
+
+# ALL-NEXT: 1 4 0.33 frecps s2, s0, s1
+# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
+# ALL-NEXT: 1 4 0.33 frecps s0, s0, s1
+# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s
new file mode 100644
index 00000000000..fd82cc35329
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s
@@ -0,0 +1,72 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+fsqrt s30, s30
+fmov s31, #1.00000000
+fdiv s30, s31, s30
+
+# Newton series for 1 / sqrtf().
+frsqrte s1, s0
+fmul s2, s1, s1
+frsqrts s2, s0, s2
+fmul s1, s1, s2
+fmul s2, s1, s1
+frsqrts s0, s0, s2
+fmul s0, s1, s0
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1000
+
+# M3-NEXT: Total Cycles: 2503
+# M4-NEXT: Total Cycles: 2303
+# M5-NEXT: Total Cycles: 2303
+
+# ALL-NEXT: Total uOps: 1000
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.40
+# M3-NEXT: IPC: 0.40
+# M3-NEXT: Block RThroughput: 19.0
+
+# M4-NEXT: uOps Per Cycle: 0.43
+# M4-NEXT: IPC: 0.43
+# M4-NEXT: Block RThroughput: 2.0
+
+# M5-NEXT: uOps Per Cycle: 0.43
+# M5-NEXT: IPC: 0.43
+# M5-NEXT: Block RThroughput: 2.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 18 19.00 fsqrt s30, s30
+# M4-NEXT: 1 8 1.75 fsqrt s30, s30
+# M5-NEXT: 1 8 1.25 fsqrt s30, s30
+
+# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000
+
+# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30
+# M3-NEXT: 1 4 0.50 frsqrte s1, s0
+
+# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30
+# M4-NEXT: 1 3 0.50 frsqrte s1, s0
+
+# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30
+# M5-NEXT: 1 3 0.50 frsqrte s1, s0
+
+# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
+# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
+# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
+# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
+# ALL-NEXT: 1 4 0.33 frsqrts s0, s0, s2
+# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s
new file mode 100644
index 00000000000..423fae20366
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+fsqrt s31, s31
+
+# Newton series for sqrtf().
+frsqrte s1, s0
+fmul s2, s1, s1
+frsqrts s2, s0, s2
+fmul s1, s1, s2
+fmul s2, s1, s1
+frsqrts s2, s0, s2
+fmul s2, s2, s0
+fmul s1, s1, s2
+fcmp s0, #0.0
+fcsel s0, s0, s1, eq
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1100
+
+# M3-NEXT: Total Cycles: 3203
+# M4-NEXT: Total Cycles: 3103
+# M5-NEXT: Total Cycles: 2803
+
+# ALL-NEXT: Total uOps: 1200
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.37
+# M3-NEXT: IPC: 0.34
+# M3-NEXT: Block RThroughput: 20.0
+
+# M4-NEXT: uOps Per Cycle: 0.39
+# M4-NEXT: IPC: 0.35
+# M4-NEXT: Block RThroughput: 2.3
+
+# M5-NEXT: uOps Per Cycle: 0.43
+# M5-NEXT: IPC: 0.39
+# M5-NEXT: Block RThroughput: 2.3
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 18 19.00 fsqrt s31, s31
+# M3-NEXT: 1 4 0.50 frsqrte s1, s0
+
+# M4-NEXT: 1 8 1.75 fsqrt s31, s31
+# M4-NEXT: 1 3 0.50 frsqrte s1, s0
+
+# M5-NEXT: 1 8 1.25 fsqrt s31, s31
+# M5-NEXT: 1 3 0.50 frsqrte s1, s0
+
+# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
+# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
+# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
+# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
+# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
+# ALL-NEXT: 1 3 0.33 fmul s2, s2, s0
+# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
+# ALL-NEXT: 1 2 1.00 fcmp s0, #0.0
+
+# M3-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq
+# M4-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq
+# M5-NEXT: 2 2 1.00 fcsel s0, s0, s1, eq
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s
new file mode 100644
index 00000000000..55d1d60252b
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s
@@ -0,0 +1,142 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+stur d0, [sp, #2]
+stur q0, [sp, #16]
+
+str b0, [sp], #1
+str q0, [sp], #16
+
+str h0, [sp, #2]!
+str q0, [sp, #16]!
+
+str s0, [sp, #4]
+str q0, [sp, #16]
+
+str d0, [sp, x0, lsl #3]
+str q0, [sp, x0, lsl #4]
+
+str b0, [sp, x0]
+str q0, [sp, x0]
+
+str h0, [sp, w0, sxtw #1]
+str q0, [sp, w0, uxtw #4]
+
+str s0, [sp, w0, sxtw]
+str q0, [sp, w0, uxtw]
+
+stp d0, d1, [sp], #16
+stp q0, q1, [sp], #32
+
+stp s0, s1, [sp, #8]!
+stp q0, q1, [sp, #32]!
+
+stp d0, d1, [sp, #16]
+stp q0, q1, [sp, #32]
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 2200
+
+# M3-NEXT: Total Cycles: 3203
+# M3-NEXT: Total uOps: 2900
+
+# M4-NEXT: Total Cycles: 3203
+# M4-NEXT: Total uOps: 3000
+
+# M5-NEXT: Total Cycles: 2803
+# M5-NEXT: Total uOps: 2500
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.91
+# M3-NEXT: IPC: 0.69
+# M3-NEXT: Block RThroughput: 22.0
+
+# M4-NEXT: uOps Per Cycle: 0.94
+# M4-NEXT: IPC: 0.69
+# M4-NEXT: Block RThroughput: 12.5
+
+# M5-NEXT: uOps Per Cycle: 0.89
+# M5-NEXT: IPC: 0.78
+# M5-NEXT: Block RThroughput: 11.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 1 1.00 * stur d0, [sp, #2]
+# M3-NEXT: 1 1 1.00 * stur q0, [sp, #16]
+# M3-NEXT: 1 1 1.00 * str b0, [sp], #1
+# M3-NEXT: 1 1 1.00 * str q0, [sp], #16
+# M3-NEXT: 1 1 1.00 * str h0, [sp, #2]!
+# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]!
+# M3-NEXT: 1 1 1.00 * str s0, [sp, #4]
+# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]
+# M3-NEXT: 1 1 1.00 * str d0, [sp, x0, lsl #3]
+# M3-NEXT: 2 3 1.00 * str q0, [sp, x0, lsl #4]
+# M3-NEXT: 1 1 1.00 * str b0, [sp, x0]
+# M3-NEXT: 1 1 1.00 * str q0, [sp, x0]
+# M3-NEXT: 2 3 1.00 * str h0, [sp, w0, sxtw #1]
+# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw #4]
+# M3-NEXT: 2 3 1.00 * str s0, [sp, w0, sxtw]
+# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw]
+# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp], #16
+# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32
+# M3-NEXT: 1 1 1.00 * stp s0, s1, [sp, #8]!
+# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]!
+# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp, #16]
+# M3-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]
+
+# M4-NEXT: 1 1 0.50 * stur d0, [sp, #2]
+# M4-NEXT: 1 1 0.50 * stur q0, [sp, #16]
+# M4-NEXT: 1 1 0.50 * str b0, [sp], #1
+# M4-NEXT: 1 1 0.50 * str q0, [sp], #16
+# M4-NEXT: 1 1 0.50 * str h0, [sp, #2]!
+# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]!
+# M4-NEXT: 1 1 0.50 * str s0, [sp, #4]
+# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]
+# M4-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3]
+# M4-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4]
+# M4-NEXT: 1 1 0.50 * str b0, [sp, x0]
+# M4-NEXT: 1 1 0.50 * str q0, [sp, x0]
+# M4-NEXT: 2 3 0.50 * str h0, [sp, w0, sxtw #1]
+# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4]
+# M4-NEXT: 2 3 0.50 * str s0, [sp, w0, sxtw]
+# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw]
+# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16
+# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32
+# M4-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]!
+# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]!
+# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16]
+# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]
+
+# M5-NEXT: 1 1 0.50 * stur d0, [sp, #2]
+# M5-NEXT: 1 1 0.50 * stur q0, [sp, #16]
+# M5-NEXT: 1 1 0.50 * str b0, [sp], #1
+# M5-NEXT: 1 1 0.50 * str q0, [sp], #16
+# M5-NEXT: 1 1 0.50 * str h0, [sp, #2]!
+# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]!
+# M5-NEXT: 1 1 0.50 * str s0, [sp, #4]
+# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]
+# M5-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3]
+# M5-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4]
+# M5-NEXT: 1 1 0.50 * str b0, [sp, x0]
+# M5-NEXT: 1 1 0.50 * str q0, [sp, x0]
+# M5-NEXT: 1 1 0.50 * str h0, [sp, w0, sxtw #1]
+# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4]
+# M5-NEXT: 1 1 0.50 * str s0, [sp, w0, sxtw]
+# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw]
+# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16
+# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp], #32
+# M5-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]!
+# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]!
+# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16]
+# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s
new file mode 100644
index 00000000000..04f30d353ae
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+ldr w0, 1f
+ldur x0, [sp, #8]
+ldrb w0, [sp], #1
+ldrsh w0, [sp, #2]!
+ldr x0, [sp, #8]
+ldrb w0, [sp, x31]
+ldrsh w0, [sp, x31, lsl #1]
+ldr w0, [sp, w31, sxtw]
+ldr x0, [sp, w31, uxtw #3]
+ldnp w0, w1, [sp, #8]
+ldp x0, x1, [sp], #16
+ldpsw x0, x1, [sp, #8]!
+
+1:
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1200
+# ALL-NEXT: Total Cycles: 1904
+
+# M3-NEXT: Total uOps: 1600
+# M4-NEXT: Total uOps: 1400
+# M5-NEXT: Total uOps: 1400
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 0.84
+# M4-NEXT: uOps Per Cycle: 0.74
+# M5-NEXT: uOps Per Cycle: 0.74
+
+# ALL-NEXT: IPC: 0.63
+# ALL-NEXT: Block RThroughput: 6.0
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+# ALL-NEXT: 1 4 0.50 * ldr w0, {{\.?}}Ltmp0
+# ALL-NEXT: 1 4 0.50 * ldur x0, [sp, #8]
+# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp], #1
+# ALL-NEXT: 1 4 0.50 * ldrsh w0, [sp, #2]!
+# ALL-NEXT: 1 4 0.50 * ldr x0, [sp, #8]
+# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp, xzr]
+# ALL-NEXT: 1 5 0.50 * ldrsh w0, [sp, xzr, lsl #1]
+
+# M3-NEXT: 2 5 0.50 * ldr w0, [sp, wzr, sxtw]
+# M3-NEXT: 2 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
+
+# M4-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw]
+# M4-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
+
+# M5-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw]
+# M5-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
+
+# ALL-NEXT: 1 4 0.50 * ldnp w0, w1, [sp, #8]
+# ALL-NEXT: 2 4 0.50 * ldp x0, x1, [sp], #16
+# ALL-NEXT: 2 4 0.50 * ldpsw x0, x1, [sp, #8]!
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
index 9e8c07149ca..b3bbec5f362 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M5
b main
@@ -19,6 +20,11 @@
# M4-NEXT: IPC: 0.50
# M4-NEXT: Block RThroughput: 0.2
+# M5: Dispatch Width: 6
+# M5-NEXT: uOps Per Cycle: 0.50
+# M5-NEXT: IPC: 0.50
+# M5-NEXT: Block RThroughput: 0.2
+
# ALL: Schedulers - number of cycles where we saw N micro opcodes issued:
# ALL-NEXT: [# issued], [# cycles]
# ALL-NEXT: 0, 1 (50.0%)
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s
index 6a1c81b5fb4..8d885f431d7 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
adds w0, w1, w2, lsl #0
sub x3, x4, x5, lsr #1
@@ -9,13 +10,14 @@
adds w12, w13, w14, lsl #4
sub x15, x16, x17, lsr #6
ands x18, x19, x20, lsl #8
- orr w21, w22, w23, asr #10
+ eor w21, w22, w23, asr #10
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 800
# EM3-NEXT: Total Cycles: 354
# EM4-NEXT: Total Cycles: 329
+# EM5-NEXT: Total Cycles: 220
# ALL-NEXT: Total uOps: 800
@@ -29,6 +31,11 @@
# EM4-NEXT: IPC: 2.43
# EM4-NEXT: Block RThroughput: 3.3
+# EM5: Dispatch Width: 6
+# EM5-NEXT: uOps Per Cycle: 3.64
+# EM5-NEXT: IPC: 3.64
+# EM5-NEXT: Block RThroughput: 1.5
+
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
@@ -46,7 +53,7 @@
# EM3-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4
# EM3-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
# EM3-NEXT: 1 2 0.50 ands x18, x19, x20, lsl #8
-# EM3-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10
+# EM3-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10
# EM4-NEXT: 1 1 0.25 adds w0, w1, w2
# EM4-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
@@ -55,4 +62,13 @@
# EM4-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4
# EM4-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
# EM4-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8
-# EM4-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10
+# EM4-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10
+
+# EM5-NEXT: 1 1 0.17 adds w0, w1, w2
+# EM5-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
+# EM5-NEXT: 1 1 0.25 ands x6, x7, x8, lsl #2
+# EM5-NEXT: 1 2 0.33 orr w9, w10, w11, asr #3
+# EM5-NEXT: 1 2 0.33 adds w12, w13, w14, lsl #4
+# EM5-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
+# EM5-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8
+# EM5-NEXT: 1 2 0.33 eor w21, w22, w23, asr #10
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s
new file mode 100644
index 00000000000..b86cdac50e6
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s
@@ -0,0 +1,82 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
+
+stur x0, [sp, #8]
+strb w0, [sp], #1
+strh w0, [sp, #2]!
+str x0, [sp, #8]
+strb w0, [sp, x31]
+strh w0, [sp, x31, lsl #1]
+str w0, [sp, w31, sxtw]
+str x0, [sp, w31, uxtw #3]
+stnp w0, w1, [sp, #8]
+stp x0, x1, [sp], #16
+stp w0, w1, [sp, #8]!
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1100
+# ALL-NEXT: Total Cycles: 1303
+
+# M3-NEXT: Total uOps: 1300
+# M4-NEXT: Total uOps: 1100
+# M5-NEXT: Total uOps: 1100
+
+# ALL: Dispatch Width: 6
+
+# M3-NEXT: uOps Per Cycle: 1.00
+# M4-NEXT: uOps Per Cycle: 0.84
+# M5-NEXT: uOps Per Cycle: 0.84
+
+# ALL-NEXT: IPC: 0.84
+
+# M3-NEXT: Block RThroughput: 11.0
+# M4-NEXT: Block RThroughput: 5.5
+# M5-NEXT: Block RThroughput: 5.5
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 1 1.00 * stur x0, [sp, #8]
+# M3-NEXT: 1 1 1.00 * strb w0, [sp], #1
+# M3-NEXT: 1 1 1.00 * strh w0, [sp, #2]!
+# M3-NEXT: 1 1 1.00 * str x0, [sp, #8]
+# M3-NEXT: 1 1 1.00 * strb w0, [sp, xzr]
+# M3-NEXT: 1 1 1.00 * strh w0, [sp, xzr, lsl #1]
+# M3-NEXT: 2 2 1.00 * str w0, [sp, wzr, sxtw]
+# M3-NEXT: 2 2 1.00 * str x0, [sp, wzr, uxtw #3]
+# M3-NEXT: 1 1 1.00 * stnp w0, w1, [sp, #8]
+# M3-NEXT: 1 1 1.00 * stp x0, x1, [sp], #16
+# M3-NEXT: 1 1 1.00 * stp w0, w1, [sp, #8]!
+
+# M4-NEXT: 1 1 0.50 * stur x0, [sp, #8]
+# M4-NEXT: 1 1 0.50 * strb w0, [sp], #1
+# M4-NEXT: 1 1 0.50 * strh w0, [sp, #2]!
+# M4-NEXT: 1 1 0.50 * str x0, [sp, #8]
+# M4-NEXT: 1 1 0.50 * strb w0, [sp, xzr]
+# M4-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1]
+# M4-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw]
+# M4-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3]
+# M4-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8]
+# M4-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16
+# M4-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]!
+
+# M5-NEXT: 1 1 0.50 * stur x0, [sp, #8]
+# M5-NEXT: 1 1 0.50 * strb w0, [sp], #1
+# M5-NEXT: 1 1 0.50 * strh w0, [sp, #2]!
+# M5-NEXT: 1 1 0.50 * str x0, [sp, #8]
+# M5-NEXT: 1 1 0.50 * strb w0, [sp, xzr]
+# M5-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1]
+# M5-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw]
+# M5-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3]
+# M5-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8]
+# M5-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16
+# M5-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]!
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s
index a42291108d0..3fecb1eebd1 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
mov x0, x1
mov sp, x0
@@ -22,21 +23,13 @@
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000
-
-# M3-NEXT: Total Cycles: 172
-# M4-NEXT: Total Cycles: 172
-
+# ALL-NEXT: Total Cycles: 172
# ALL-NEXT: Total uOps: 1000
-# M3: Dispatch Width: 6
-# M3-NEXT: uOps Per Cycle: 5.81
-# M3-NEXT: IPC: 5.81
-# M3-NEXT: Block RThroughput: 1.7
-
-# M4: Dispatch Width: 6
-# M4-NEXT: uOps Per Cycle: 5.81
-# M4-NEXT: IPC: 5.81
-# M4-NEXT: Block RThroughput: 1.7
+# ALL: Dispatch Width: 6
+# ALL-NEXT: uOps Per Cycle: 5.81
+# ALL-NEXT: IPC: 5.81
+# ALL-NEXT: Block RThroughput: 1.7
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
@@ -47,25 +40,21 @@
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+# ALL-NEXT: 1 0 0.17 mov x0, x1
+# ALL-NEXT: 1 0 0.17 mov sp, x0
+# ALL-NEXT: 1 0 0.17 mov w0, #12816
-# M3-NEXT: 1 0 0.17 mov x0, x1
-# M3-NEXT: 1 0 0.17 mov sp, x0
-# M3-NEXT: 1 0 0.17 mov w0, #12816
# M3-NEXT: 1 1 0.25 add w0, w1, #0
-# M3-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
-# M3-NEXT: 1 4 0.50 * ldr x0, [x0]
-# M3-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
-# M3-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
-# M3-NEXT: 1 1 0.33 fmov s0, s1
-# M3-NEXT: 1 0 0.17 movi d0, #0000000000000000
-
-# M4-NEXT: 1 0 0.17 mov x0, x1
-# M4-NEXT: 1 0 0.17 mov sp, x0
-# M4-NEXT: 1 0 0.17 mov w0, #12816
# M4-NEXT: 1 1 0.25 add w0, w1, #0
-# M4-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
-# M4-NEXT: 1 4 0.50 * ldr x0, [x0]
-# M4-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
-# M4-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
+# M5-NEXT: 1 1 0.17 add w0, w1, #0
+
+# ALL-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
+# ALL-NEXT: 1 4 0.50 * ldr x0, [x0]
+# ALL-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
+# ALL-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
+
+# M3-NEXT: 1 1 0.33 fmov s0, s1
# M4-NEXT: 1 1 0.33 fmov s0, s1
-# M4-NEXT: 1 0 0.17 movi d0, #0000000000000000
+# M5-NEXT: 1 2 0.33 fmov s0, s1
+
+# ALL-NEXT: 1 0 0.17 movi d0, #0000000000000000
OpenPOWER on IntegriCloud