diff options
Diffstat (limited to 'llvm/test/tools/llvm-mca')
28 files changed, 2335 insertions, 34 deletions
diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s new file mode 100644 index 00000000000..e981b2aa4bd --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s @@ -0,0 +1,57 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +aese v0.16b, v1.16b +aesmc v0.16b, v0.16b + +aesd v0.16b, v1.16b +aesimc v0.16b, v0.16b + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 400 + +# M3-NEXT: Total Cycles: 203 +# M4-NEXT: Total Cycles: 203 +# M5-NEXT: Total Cycles: 403 + +# ALL-NEXT: Total uOps: 400 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 1.97 +# M3-NEXT: IPC: 1.97 + +# M4-NEXT: uOps Per Cycle: 1.97 +# M4-NEXT: IPC: 1.97 + +# M5-NEXT: uOps Per Cycle: 0.99 +# M5-NEXT: IPC: 0.99 + +# ALL-NEXT: Block RThroughput: 2.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 1 0.50 aese v0.16b, v1.16b +# M3-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b +# M3-NEXT: 1 1 0.50 aesd v0.16b, v1.16b +# M3-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b + +# M4-NEXT: 1 1 0.50 aese v0.16b, v1.16b +# M4-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b +# M4-NEXT: 1 1 0.50 aesd v0.16b, v1.16b +# M4-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b + +# M5-NEXT: 1 2 0.50 aese v0.16b, v1.16b +# M5-NEXT: 1 2 0.50 aesmc v0.16b, v0.16b +# M5-NEXT: 1 2 0.50 aesd v0.16b, v1.16b +# M5-NEXT: 1 2 0.50 aesimc v0.16b, v0.16b diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s new file mode 100644 index 00000000000..f23b1f71c53 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s @@ -0,0 +1,189 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +ld1 {v0.s}[0], [sp] +ld1r {v0.2s}, [sp] +ld1 {v0.2s}, [sp] +ld1 {v0.2s, v1.2s}, [sp] +ld1 {v0.2s, v1.2s, v2.2s}, [sp] +ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] + +ld1 {v0.d}[0], [sp] +ld1r {v0.2d}, [sp] +ld1 {v0.2d}, [sp] +ld1 {v0.2d, v1.2d}, [sp] +ld1 {v0.2d, v1.2d, v2.2d}, [sp] +ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] + +ld1 {v0.s}[0], [sp], #4 +ld1r {v0.2s}, [sp], #4 +ld1 {v0.2s}, [sp], #8 +ld1 {v0.2s, v1.2s}, [sp], #16 +ld1 {v0.2s, v1.2s, v2.2s}, [sp], #24 +ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 + +ld1 {v0.d}[0], [sp], #8 +ld1r {v0.2d}, [sp], #8 +ld1 {v0.2d}, [sp], #16 +ld1 {v0.2d, v1.2d}, [sp], #32 +ld1 {v0.2d, v1.2d, v2.2d}, [sp], #48 +ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 + +ld1 {v0.s}[0], [sp], x0 +ld1r {v0.2s}, [sp], x0 +ld1 {v0.2s}, [sp], x0 +ld1 {v0.2s, v1.2s}, [sp], x0 +ld1 {v0.2s, v1.2s, v2.2s}, [sp], x0 +ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 + +ld1 {v0.d}[0], [sp], x0 +ld1r {v0.2d}, [sp], x0 +ld1 {v0.2d}, [sp], x0 +ld1 {v0.2d, v1.2d}, [sp], x0 +ld1 {v0.2d, v1.2d, v2.2d}, [sp], x0 +ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 3600 + +# M3-NEXT: Total Cycles: 14903 +# M4-NEXT: Total Cycles: 14703 +# M5-NEXT: Total Cycles: 17203 + +# ALL-NEXT: Total uOps: 10200 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.68 +# M3-NEXT: IPC: 0.24 + +# M4-NEXT: uOps Per Cycle: 0.69 +# M4-NEXT: IPC: 0.24 + +# M5-NEXT: uOps Per Cycle: 0.59 +# M5-NEXT: IPC: 0.21 + +# ALL-NEXT: Block RThroughput: 39.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp] +# M3-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp] +# M3-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp] +# M3-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp] +# M3-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp] +# M3-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M3-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp] +# M3-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp] +# M3-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp] +# M3-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp] +# M3-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp] +# M3-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4 +# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4 +# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8 +# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16 +# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8 +# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8 +# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16 +# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32 +# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0 +# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0 +# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0 +# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0 +# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0 +# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0 +# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0 +# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0 +# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M4-NEXT: 2 6 1.00 * ld1 { v0.s }[0], [sp] +# M4-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp] +# M4-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp] +# M4-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp] +# M4-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp] +# M4-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M4-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp] +# M4-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp] +# M4-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp] +# M4-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp] +# M4-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp] +# M4-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], #4 +# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4 +# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8 +# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16 +# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8 +# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8 +# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16 +# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32 +# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], x0 +# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0 +# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0 +# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0 +# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0 +# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0 +# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0 +# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0 +# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M5-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp] +# M5-NEXT: 1 6 0.50 * ld1r { v0.2s }, [sp] +# M5-NEXT: 1 6 0.50 * ld1 { v0.2s }, [sp] +# M5-NEXT: 2 6 1.00 * ld1 { v0.2s, v1.2s }, [sp] +# M5-NEXT: 3 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp] +# M5-NEXT: 4 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M5-NEXT: 2 7 1.00 * ld1 { v0.d }[0], [sp] +# M5-NEXT: 1 6 0.50 * ld1r { v0.2d }, [sp] +# M5-NEXT: 1 6 0.50 * ld1 { v0.2d }, [sp] +# M5-NEXT: 2 6 1.00 * ld1 { v0.2d, v1.2d }, [sp] +# M5-NEXT: 3 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp] +# M5-NEXT: 4 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4 +# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], #4 +# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], #8 +# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16 +# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], #8 +# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], #8 +# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], #16 +# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32 +# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0 +# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], x0 +# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], x0 +# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0 +# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], x0 +# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], x0 +# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], x0 +# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0 +# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s new file mode 100644 index 00000000000..2ca640dbea7 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s @@ -0,0 +1,118 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +ld2 {v0.s, v1.s}[0], [sp] +ld2r {v0.2s, v1.2s}, [sp] +ld2 {v0.2s, v1.2s}, [sp] + +ld2 {v0.d, v1.d}[0], [sp] +ld2r {v0.2d, v1.2d}, [sp] +ld2 {v0.2d, v1.2d}, [sp] + +ld2 {v0.s, v1.s}[0], [sp], #8 +ld2r {v0.2s, v1.2s}, [sp], #8 +ld2 {v0.2s, v1.2s}, [sp], #16 + +ld2 {v0.d, v1.d}[0], [sp], #16 +ld2r {v0.2d, v1.2d}, [sp], #16 +ld2 {v0.2d, v1.2d}, [sp], #32 + +ld2 {v0.s, v1.s}[0], [sp], x0 +ld2r {v0.2s, v1.2s}, [sp], x0 +ld2 {v0.2s, v1.2s}, [sp], x0 + +ld2 {v0.d, v1.d}[0], [sp], x0 +ld2r {v0.2d, v1.2d}, [sp], x0 +ld2 {v0.2d, v1.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1800 + +# M3-NEXT: Total Cycles: 10003 +# M4-NEXT: Total Cycles: 9803 +# M5-NEXT: Total Cycles: 11103 + +# ALL-NEXT: Total uOps: 5400 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.54 +# M3-NEXT: IPC: 0.18 +# M3-NEXT: Block RThroughput: 42.0 + +# M4-NEXT: uOps Per Cycle: 0.55 +# M4-NEXT: IPC: 0.18 +# M4-NEXT: Block RThroughput: 30.0 + +# M5-NEXT: uOps Per Cycle: 0.49 +# M5-NEXT: IPC: 0.16 +# M5-NEXT: Block RThroughput: 45.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp] +# M3-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp] +# M3-NEXT: 2 10 5.00 * ld2 { v0.2s, v1.2s }, [sp] +# M3-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp] +# M3-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp] +# M3-NEXT: 2 10 5.00 * ld2 { v0.2d, v1.2d }, [sp] +# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8 +# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8 +# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], #16 +# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16 +# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16 +# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], #32 +# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0 +# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0 +# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], x0 +# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0 +# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0 +# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], x0 + +# M4-NEXT: 3 6 1.00 * ld2 { v0.s, v1.s }[0], [sp] +# M4-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp] +# M4-NEXT: 2 10 3.00 * ld2 { v0.2s, v1.2s }, [sp] +# M4-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp] +# M4-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp] +# M4-NEXT: 2 10 3.00 * ld2 { v0.2d, v1.2d }, [sp] +# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8 +# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8 +# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], #16 +# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16 +# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16 +# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], #32 +# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0 +# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0 +# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], x0 +# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0 +# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0 +# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], x0 + +# M5-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp] +# M5-NEXT: 2 6 1.00 * ld2r { v0.2s, v1.2s }, [sp] +# M5-NEXT: 2 11 5.50 * ld2 { v0.2s, v1.2s }, [sp] +# M5-NEXT: 3 7 1.00 * ld2 { v0.d, v1.d }[0], [sp] +# M5-NEXT: 2 6 1.00 * ld2r { v0.2d, v1.2d }, [sp] +# M5-NEXT: 2 11 5.50 * ld2 { v0.2d, v1.2d }, [sp] +# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8 +# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8 +# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], #16 +# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16 +# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16 +# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], #32 +# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0 +# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0 +# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], x0 +# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0 +# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0 +# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s new file mode 100644 index 00000000000..a6a89434754 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s @@ -0,0 +1,118 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +ld3 {v0.s, v1.s, v2.s}[0], [sp] +ld3r {v0.2s, v1.2s, v2.2s}, [sp] +ld3 {v0.2s, v1.2s, v2.2s}, [sp] + +ld3 {v0.d, v1.d, v2.d}[0], [sp] +ld3r {v0.2d, v1.2d, v2.2d}, [sp] +ld3 {v0.2d, v1.2d, v2.2d}, [sp] + +ld3 {v0.s, v1.s, v2.s}[0], [sp], #12 +ld3r {v0.2s, v1.2s, v2.2s}, [sp], #12 +ld3 {v0.2s, v1.2s, v2.2s}, [sp], #24 + +ld3 {v0.d, v1.d, v2.d}[0], [sp], #24 +ld3r {v0.2d, v1.2d, v2.2d}, [sp], #24 +ld3 {v0.2d, v1.2d, v2.2d}, [sp], #48 + +ld3 {v0.s, v1.s, v2.s}[0], [sp], x0 +ld3r {v0.2s, v1.2s, v2.2s}, [sp], x0 +ld3 {v0.2s, v1.2s, v2.2s}, [sp], x0 + +ld3 {v0.d, v1.d, v2.d}[0], [sp], x0 +ld3r {v0.2d, v1.2d, v2.2d}, [sp], x0 +ld3 {v0.2d, v1.2d, v2.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1800 + +# M3-NEXT: Total Cycles: 12501 +# M4-NEXT: Total Cycles: 11804 +# M5-NEXT: Total Cycles: 12903 + +# ALL-NEXT: Total uOps: 7500 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.60 +# M3-NEXT: IPC: 0.14 +# M3-NEXT: Block RThroughput: 84.0 + +# M4-NEXT: uOps Per Cycle: 0.64 +# M4-NEXT: IPC: 0.15 +# M4-NEXT: Block RThroughput: 54.0 + +# M5-NEXT: uOps Per Cycle: 0.58 +# M5-NEXT: IPC: 0.14 +# M5-NEXT: Block RThroughput: 22.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 4 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp] +# M3-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp] +# M3-NEXT: 3 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp] +# M3-NEXT: 5 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp] +# M3-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp] +# M3-NEXT: 3 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp] +# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12 +# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12 +# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24 +# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24 +# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0 +# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0 +# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0 + +# M4-NEXT: 4 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp] +# M4-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp] +# M4-NEXT: 3 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp] +# M4-NEXT: 5 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp] +# M4-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp] +# M4-NEXT: 3 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp] +# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12 +# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12 +# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24 +# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24 +# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0 +# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0 +# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0 + +# M5-NEXT: 4 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp] +# M5-NEXT: 3 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp] +# M5-NEXT: 3 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp] +# M5-NEXT: 5 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp] +# M5-NEXT: 3 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp] +# M5-NEXT: 3 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp] +# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12 +# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12 +# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24 +# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24 +# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0 +# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0 +# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s new file mode 100644 index 00000000000..c5f2c9b7ec4 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s @@ -0,0 +1,118 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp] +ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] +ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] + +ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp] +ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] +ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] + +ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16 +ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #16 +ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 + +ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32 +ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #32 +ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 + +ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0 +ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 +ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 + +ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0 +ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 +ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1800 + +# M3-NEXT: Total Cycles: 15598 +# M4-NEXT: Total Cycles: 13004 +# M5-NEXT: Total Cycles: 14304 + +# ALL-NEXT: Total uOps: 9300 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.60 +# M3-NEXT: IPC: 0.12 +# M3-NEXT: Block RThroughput: 108.0 + +# M4-NEXT: uOps Per Cycle: 0.72 +# M4-NEXT: IPC: 0.14 +# M4-NEXT: Block RThroughput: 61.5 + +# M5-NEXT: uOps Per Cycle: 0.65 +# M5-NEXT: IPC: 0.13 +# M5-NEXT: Block RThroughput: 40.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 5 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] +# M3-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M3-NEXT: 4 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M3-NEXT: 6 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] +# M3-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M3-NEXT: 4 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 +# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16 +# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 +# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32 +# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 +# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 +# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 +# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M4-NEXT: 5 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] +# M4-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M4-NEXT: 4 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M4-NEXT: 6 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] +# M4-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M4-NEXT: 4 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 +# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16 +# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 +# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32 +# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 +# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 +# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 +# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M5-NEXT: 5 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] +# M5-NEXT: 4 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M5-NEXT: 4 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M5-NEXT: 6 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] +# M5-NEXT: 4 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M5-NEXT: 4 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 +# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16 +# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 +# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32 +# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 +# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 +# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 +# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s new file mode 100644 index 00000000000..81e5fe84ad0 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s @@ -0,0 +1,169 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +st1 {v0.s}[0], [sp] +st1 {v0.2s}, [sp] +st1 {v0.2s, v1.2s}, [sp] +st1 {v0.2s, v1.2s, v2.2s}, [sp] +st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] + +st1 {v0.d}[0], [sp] +st1 {v0.2d}, [sp] +st1 {v0.2d, v1.2d}, [sp] +st1 {v0.2d, v1.2d, v2.2d}, [sp] +st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] + +st1 {v0.s}[0], [sp], #4 +st1 {v0.2s}, [sp], #8 +st1 {v0.2s, v1.2s}, [sp], #16 +st1 {v0.2s, v1.2s, v2.2s}, [sp], #24 +st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 + +st1 {v0.d}[0], [sp], #8 +st1 {v0.2d}, [sp], #16 +st1 {v0.2d, v1.2d}, [sp], #32 +st1 {v0.2d, v1.2d, v2.2d}, [sp], #48 +st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 + +st1 {v0.s}[0], [sp], x0 +st1 {v0.2s}, [sp], x0 +st1 {v0.2s, v1.2s}, [sp], x0 +st1 {v0.2s, v1.2s, v2.2s}, [sp], x0 +st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 + +st1 {v0.d}[0], [sp], x0 +st1 {v0.2d}, [sp], x0 +st1 {v0.2d, v1.2d}, [sp], x0 +st1 {v0.2d, v1.2d, v2.2d}, [sp], x0 +st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 3000 + +# M3-NEXT: Total Cycles: 10203 +# M3-NEXT: Total uOps: 8400 + +# M4-NEXT: Total Cycles: 6603 +# M4-NEXT: Total uOps: 8600 + +# M5-NEXT: Total Cycles: 6603 +# M5-NEXT: Total uOps: 8600 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.82 +# M3-NEXT: IPC: 0.29 +# M3-NEXT: Block RThroughput: 72.0 + +# M4-NEXT: uOps Per Cycle: 1.30 +# M4-NEXT: IPC: 0.45 +# M4-NEXT: Block RThroughput: 33.0 + +# M5-NEXT: uOps Per Cycle: 1.30 +# M5-NEXT: IPC: 0.45 +# M5-NEXT: Block RThroughput: 33.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp] +# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp] +# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp] +# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp] +# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp] +# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp] +# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp] +# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp] +# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], #4 +# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], #8 +# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], #16 +# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], #8 +# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], #16 +# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], #32 +# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], x0 +# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], x0 +# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], x0 +# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], x0 +# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], x0 +# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], x0 +# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M4-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp] +# M4-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp] +# M4-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp] +# M4-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp] +# M4-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M4-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp] +# M4-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp] +# M4-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp] +# M4-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp] +# M4-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4 +# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8 +# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16 +# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8 +# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16 +# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32 +# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0 +# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0 +# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0 +# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0 +# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0 +# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0 +# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M5-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp] +# M5-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp] +# M5-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp] +# M5-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp] +# M5-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M5-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp] +# M5-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp] +# M5-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp] +# M5-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp] +# M5-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4 +# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8 +# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16 +# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8 +# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16 +# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32 +# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0 +# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0 +# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0 +# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0 +# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0 +# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0 +# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0 +# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s new file mode 100644 index 00000000000..9506241fef2 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +st2 {v0.s, v1.s}[0], [sp] +st2 {v0.2s, v1.2s}, [sp] + +st2 {v0.d, v1.d}[0], [sp] +st2 {v0.2d, v1.2d}, [sp] + +st2 {v0.s, v1.s}[0], [sp], #8 +st2 {v0.2s, v1.2s}, [sp], #16 + +st2 {v0.d, v1.d}[0], [sp], #16 +st2 {v0.2d, v1.2d}, [sp], #32 + +st2 {v0.s, v1.s}[0], [sp], x0 +st2 {v0.2s, v1.2s}, [sp], x0 + +st2 {v0.d, v1.d}[0], [sp], x0 +st2 {v0.2d, v1.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1200 + +# M3-NEXT: Total Cycles: 8703 +# M3-NEXT: Total uOps: 5400 + +# M4-NEXT: Total Cycles: 2403 +# M4-NEXT: Total uOps: 2300 + +# M5-NEXT: Total Cycles: 2403 +# M5-NEXT: Total uOps: 2000 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.62 +# M3-NEXT: IPC: 0.14 +# M3-NEXT: Block RThroughput: 40.5 + +# M4-NEXT: uOps Per Cycle: 0.96 +# M4-NEXT: IPC: 0.50 +# M4-NEXT: Block RThroughput: 7.5 + +# M5-NEXT: uOps Per Cycle: 0.83 +# M5-NEXT: IPC: 0.50 +# M5-NEXT: Block RThroughput: 7.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp] +# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp] +# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp] +# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp] +# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], #8 +# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], #16 +# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], #16 +# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], #32 +# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], x0 +# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], x0 +# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], x0 +# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], x0 + +# M4-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp] +# M4-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp] +# M4-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp] +# M4-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp] +# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8 +# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16 +# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16 +# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32 +# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0 +# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0 +# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0 +# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0 + +# M5-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp] +# M5-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp] +# M5-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp] +# M5-NEXT: 1 2 1.00 * st2 { v0.2d, v1.2d }, [sp] +# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8 +# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16 +# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16 +# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32 +# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0 +# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0 +# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0 +# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s new file mode 100644 index 00000000000..4de5213d526 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +st3 {v0.s, v1.s, v2.s}[0], [sp] +st3 {v0.2s, v1.2s, v2.2s}, [sp] + +st3 {v0.d, v1.d, v2.d}[0], [sp] +st3 {v0.2d, v1.2d, v2.2d}, [sp] + +st3 {v0.s, v1.s, v2.s}[0], [sp], #12 +st3 {v0.2s, v1.2s, v2.2s}, [sp], #24 + +st3 {v0.d, v1.d, v2.d}[0], [sp], #24 +st3 {v0.2d, v1.2d, v2.2d}, [sp], #48 + +st3 {v0.s, v1.s, v2.s}[0], [sp], x0 +st3 {v0.2s, v1.2s, v2.2s}, [sp], x0 + +st3 {v0.d, v1.d, v2.d}[0], [sp], x0 +st3 {v0.2d, v1.2d, v2.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1200 + +# M3-NEXT: Total Cycles: 18003 +# M3-NEXT: Total uOps: 8400 + +# M4-NEXT: Total Cycles: 3903 +# M4-NEXT: Total uOps: 5000 + +# M5-NEXT: Total Cycles: 3603 +# M5-NEXT: Total uOps: 4400 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.47 +# M3-NEXT: IPC: 0.07 +# M3-NEXT: Block RThroughput: 72.0 + +# M4-NEXT: uOps Per Cycle: 1.28 +# M4-NEXT: IPC: 0.31 +# M4-NEXT: Block RThroughput: 21.0 + +# M5-NEXT: uOps Per Cycle: 1.22 +# M5-NEXT: IPC: 0.33 +# M5-NEXT: Block RThroughput: 10.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp] +# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp] +# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp] +# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp] +# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12 +# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24 +# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0 +# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0 +# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0 + +# M4-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp] +# M4-NEXT: 4 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp] +# M4-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp] +# M4-NEXT: 6 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp] +# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12 +# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24 +# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0 +# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0 +# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0 + +# M5-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp] +# M5-NEXT: 3 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp] +# M5-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp] +# M5-NEXT: 5 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp] +# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12 +# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24 +# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24 +# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48 +# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0 +# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0 +# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0 +# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s new file mode 100644 index 00000000000..7dfe59f78c4 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp] +st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] + +st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp] +st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] + +st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16 +st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 + +st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32 +st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 + +st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0 +st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 + +st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0 +st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1200 + +# M3-NEXT: Total Cycles: 18603 +# M3-NEXT: Total uOps: 9000 + +# M4-NEXT: Total Cycles: 4803 +# M4-NEXT: Total uOps: 4700 + +# M5-NEXT: Total Cycles: 4803 +# M5-NEXT: Total uOps: 4700 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.48 +# M3-NEXT: IPC: 0.06 +# M3-NEXT: Block RThroughput: 76.5 + +# M4-NEXT: uOps Per Cycle: 0.98 +# M4-NEXT: IPC: 0.25 +# M4-NEXT: Block RThroughput: 24.0 + +# M5-NEXT: uOps Per Cycle: 0.98 +# M5-NEXT: IPC: 0.25 +# M5-NEXT: Block RThroughput: 24.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] +# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] +# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 +# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 +# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 +# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 +# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M4-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] +# M4-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M4-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] +# M4-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 +# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 +# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 +# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 +# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 + +# M5-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] +# M5-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# M5-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] +# M5-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] +# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 +# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 +# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 +# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 +# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 +# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 +# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 +# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s new file mode 100644 index 00000000000..27aa0075c57 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s @@ -0,0 +1,58 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +crc32w w0, w1, w2 +crc32w w0, w0, w3 + +crc32cx w0, w1, x2 +crc32cx w0, w0, x3 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 400 + +# M3-NEXT: Total Cycles: 204 +# M4-NEXT: Total Cycles: 404 +# M5-NEXT: Total Cycles: 204 + +# ALL-NEXT: Total uOps: 400 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 1.96 +# M3-NEXT: IPC: 1.96 +# M3-NEXT: Block RThroughput: 2.0 + +# M4-NEXT: uOps Per Cycle: 0.99 +# M4-NEXT: IPC: 0.99 +# M4-NEXT: Block RThroughput: 4.0 + +# M5-NEXT: uOps Per Cycle: 1.96 +# M5-NEXT: IPC: 1.96 +# M5-NEXT: Block RThroughput: 2.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 2 0.50 crc32w w0, w1, w2 +# M3-NEXT: 1 2 0.50 crc32w w0, w0, w3 +# M3-NEXT: 1 2 0.50 crc32cx w0, w1, x2 +# M3-NEXT: 1 2 0.50 crc32cx w0, w0, x3 + +# M4-NEXT: 1 2 1.00 crc32w w0, w1, w2 +# M4-NEXT: 1 2 1.00 crc32w w0, w0, w3 +# M4-NEXT: 1 2 1.00 crc32cx w0, w1, x2 +# M4-NEXT: 1 2 1.00 crc32cx w0, w0, x3 + +# M5-NEXT: 1 2 0.50 crc32w w0, w1, w2 +# M5-NEXT: 1 2 0.50 crc32w w0, w0, w3 +# M5-NEXT: 1 2 0.50 crc32cx w0, w1, x2 +# M5-NEXT: 1 2 0.50 crc32cx w0, w0, x3 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s index 0819170c68b..79f810c95f0 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 b main @@ -9,6 +10,7 @@ # M3-NEXT: Total Cycles: 18 # M4-NEXT: Total Cycles: 18 +# M5-NEXT: Total Cycles: 18 # ALL-NEXT: Total uOps: 100 @@ -22,6 +24,11 @@ # M4-NEXT: IPC: 5.56 # M4-NEXT: Block RThroughput: 0.2 +# M5: Dispatch Width: 6 +# M5-NEXT: uOps Per Cycle: 5.56 +# M5-NEXT: IPC: 5.56 +# M5-NEXT: Block RThroughput: 0.2 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -34,3 +41,4 @@ # M3-NEXT: 1 0 0.17 b main # M4-NEXT: 1 0 0.17 b main +# M5-NEXT: 1 0 0.17 b main diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s new file mode 100644 index 00000000000..c74d1923c4a --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 + +sdiv w0, w1, w2 +udiv x1, x2, x3 + +mul w2, w3, w4 +msub x3, x4, x5, x6 + +smull x4, w5, w6 +umulh x5, x6, x7 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 600 + +# EM3-NEXT: Total Cycles: 3305 +# EM4-NEXT: Total Cycles: 3303 +# EM5-NEXT: Total Cycles: 2603 + +# ALL-NEXT: Total uOps: 600 + +# ALL: Dispatch Width: 6 + +# EM3-NEXT: uOps Per Cycle: 0.18 +# EM3-NEXT: IPC: 0.18 +# EM3-NEXT: Block RThroughput: 33.0 + +# EM4-NEXT: uOps Per Cycle: 0.18 +# EM4-NEXT: IPC: 0.18 +# EM4-NEXT: Block RThroughput: 33.0 + +# EM5-NEXT: uOps Per Cycle: 0.23 +# EM5-NEXT: IPC: 0.23 +# EM5-NEXT: Block RThroughput: 26.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# EM3-NEXT: 1 12 12.00 sdiv w0, w1, w2 +# EM3-NEXT: 1 21 21.00 udiv x1, x2, x3 +# EM3-NEXT: 1 3 0.50 mul w2, w3, w4 +# EM3-NEXT: 1 4 1.00 msub x3, x4, x5, x6 +# EM3-NEXT: 1 3 0.50 smull x4, w5, w6 +# EM3-NEXT: 1 4 1.00 umulh x5, x6, x7 + +# EM4-NEXT: 1 12 12.00 sdiv w0, w1, w2 +# EM4-NEXT: 1 21 21.00 udiv x1, x2, x3 +# EM4-NEXT: 1 3 0.50 mul w2, w3, w4 +# EM4-NEXT: 1 4 1.00 msub x3, x4, x5, x6 +# EM4-NEXT: 1 3 0.50 smull x4, w5, w6 +# EM4-NEXT: 1 4 1.00 umulh x5, x6, x7 + +# EM5-NEXT: 1 10 10.00 sdiv w0, w1, w2 +# EM5-NEXT: 1 16 16.00 udiv x1, x2, x3 +# EM5-NEXT: 1 2 0.50 mul w2, w3, w4 +# EM5-NEXT: 1 3 1.00 msub x3, x4, x5, x6 +# EM5-NEXT: 1 2 0.50 smull x4, w5, w6 +# EM5-NEXT: 1 3 1.00 umulh x5, x6, x7 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s new file mode 100644 index 00000000000..872f6ab7948 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +fmov d31, #1.00000000 +fdiv d30, d31, d30 + +# Newton series for 1 / x. +frecpe d1, d0 +frecps d2, d0, d1 +fmul d1, d1, d2 +frecps d2, d0, d1 +fmul d1, d1, d2 +frecps d0, d0, d1 +fmul d0, d1, d0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 900 + +# M3-NEXT: Total Cycles: 2503 +# M4-NEXT: Total Cycles: 2403 +# M5-NEXT: Total Cycles: 2403 + +# ALL-NEXT: Total uOps: 900 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.36 +# M3-NEXT: IPC: 0.36 +# M3-NEXT: Block RThroughput: 3.3 + +# M4-NEXT: uOps Per Cycle: 0.37 +# M4-NEXT: IPC: 0.37 +# M4-NEXT: Block RThroughput: 2.3 + +# M5-NEXT: uOps Per Cycle: 0.37 +# M5-NEXT: IPC: 0.37 +# M5-NEXT: Block RThroughput: 2.3 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000 + +# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30 +# M3-NEXT: 1 4 0.50 frecpe d1, d0 + +# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30 +# M4-NEXT: 1 3 0.50 frecpe d1, d0 + +# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30 +# M5-NEXT: 1 3 0.50 frecpe d1, d0 + +# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1 +# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 +# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1 +# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 +# ALL-NEXT: 1 4 0.33 frecps d0, d0, d1 +# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s new file mode 100644 index 00000000000..98fa404bb94 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +fsqrt d30, d30 +fmov d31, #1.00000000 +fdiv d30, d31, d30 + +# Newton series for 1 / sqrt(). +frsqrte d1, d0 +fmul d2, d1, d1 +frsqrts d2, d0, d2 +fmul d1, d1, d2 +fmul d2, d1, d1 +frsqrts d2, d0, d2 +fmul d1, d1, d2 +fmul d2, d1, d1 +frsqrts d0, d0, d2 +fmul d0, d1, d0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1300 + +# M3-NEXT: Total Cycles: 3703 +# M4-NEXT: Total Cycles: 3303 +# M5-NEXT: Total Cycles: 3303 + +# ALL-NEXT: Total uOps: 1300 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.35 +# M3-NEXT: IPC: 0.35 +# M3-NEXT: Block RThroughput: 26.0 + +# M4-NEXT: uOps Per Cycle: 0.39 +# M4-NEXT: IPC: 0.39 +# M4-NEXT: Block RThroughput: 3.0 + +# M5-NEXT: uOps Per Cycle: 0.39 +# M5-NEXT: IPC: 0.39 +# M5-NEXT: Block RThroughput: 3.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 25 26.00 fsqrt d30, d30 +# M4-NEXT: 1 12 2.25 fsqrt d30, d30 +# M5-NEXT: 1 12 2.25 fsqrt d30, d30 + +# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000 + +# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30 +# M3-NEXT: 1 4 0.50 frsqrte d1, d0 + +# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30 +# M4-NEXT: 1 3 0.50 frsqrte d1, d0 + +# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30 +# M5-NEXT: 1 3 0.50 frsqrte d1, d0 + +# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 +# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 +# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 +# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 +# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 +# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 +# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 +# ALL-NEXT: 1 4 0.33 frsqrts d0, d0, d2 +# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s new file mode 100644 index 00000000000..b9aceff3913 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +fsqrt d31, d31 + +# Newton series for sqrt(). +frsqrte d1, d0 +fmul d2, d1, d1 +frsqrts d2, d0, d2 +fmul d1, d1, d2 +fmul d2, d1, d1 +frsqrts d2, d0, d2 +fmul d1, d1, d2 +fmul d2, d1, d1 +frsqrts d2, d0, d2 +fmul d2, d2, d0 +fmul d1, d1, d2 +fcmp d0, #0.0 +fcsel d0, d0, d1, eq + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1400 + +# M3-NEXT: Total Cycles: 4203 +# M4-NEXT: Total Cycles: 4103 +# M5-NEXT: Total Cycles: 3803 + +# ALL-NEXT: Total uOps: 1500 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.36 +# M3-NEXT: IPC: 0.33 +# M3-NEXT: Block RThroughput: 27.0 + +# M4-NEXT: uOps Per Cycle: 0.37 +# M4-NEXT: IPC: 0.34 +# M4-NEXT: Block RThroughput: 3.3 + +# M5-NEXT: uOps Per Cycle: 0.39 +# M5-NEXT: IPC: 0.37 +# M5-NEXT: Block RThroughput: 3.3 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 25 26.00 fsqrt d31, d31 +# M3-NEXT: 1 4 0.50 frsqrte d1, d0 + +# M4-NEXT: 1 12 2.25 fsqrt d31, d31 +# M4-NEXT: 1 3 0.50 frsqrte d1, d0 + +# M5-NEXT: 1 12 2.25 fsqrt d31, d31 +# M5-NEXT: 1 3 0.50 frsqrte d1, d0 + +# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 +# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 +# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 +# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 +# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 +# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 +# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 +# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 +# ALL-NEXT: 1 3 0.33 fmul d2, d2, d0 +# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 +# ALL-NEXT: 1 2 1.00 fcmp d0, #0.0 + +# M3-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq +# M4-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq +# M5-NEXT: 2 2 1.00 fcsel d0, d0, d1, eq diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s index aa14531577a..03522cd96e7 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 sub w0, w1, w2, sxtb #0 add x3, x4, w5, sxth #1 @@ -16,6 +17,7 @@ # EM3-NEXT: Total Cycles: 304 # EM4-NEXT: Total Cycles: 304 +# EM5-NEXT: Total Cycles: 254 # ALL-NEXT: Total uOps: 800 @@ -29,6 +31,11 @@ # EM4-NEXT: IPC: 2.63 # EM4-NEXT: Block RThroughput: 3.0 +# EM5: Dispatch Width: 6 +# EM5-NEXT: uOps Per Cycle: 3.15 +# EM5-NEXT: IPC: 3.15 +# EM5-NEXT: Block RThroughput: 2.5 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -56,3 +63,12 @@ # EM4-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1 # EM4-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2 # EM4-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3 + +# EM5-NEXT: 1 1 0.17 sub w0, w1, w2, sxtb +# EM5-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1 +# EM5-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2 +# EM5-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3 +# EM5-NEXT: 1 1 0.17 sub w12, w13, w14, uxtb +# EM5-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1 +# EM5-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2 +# EM5-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s new file mode 100644 index 00000000000..a24d8a27960 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 + +fdiv h0, h1, h2 +fdiv s1, s2, s3 +fdiv d2, d3, d4 + +fmul h3, h4, h5 +fmul s4, s5, s6 +fmul d5, d6, d7 + +fmadd h6, h7, h8, h9 +fmadd s7, s8, s9, s10 +fmadd d8, d9, d10, d11 + +fsqrt h9, h10 +fsqrt s10, s11 +fsqrt d11, d12 + +# ALL: Iterations: 100 + +# EM3-NEXT: Instructions: 800 +# EM3-NEXT: Total Cycles: 4503 +# EM3-NEXT: Total uOps: 800 + +# EM4-NEXT: Instructions: 1200 +# EM4-NEXT: Total Cycles: 575 +# EM4-NEXT: Total uOps: 1200 + +# EM5-NEXT: Instructions: 1200 +# EM5-NEXT: Total Cycles: 433 +# EM5-NEXT: Total uOps: 1200 + +# ALL: Dispatch Width: 6 + +# EM3-NEXT: uOps Per Cycle: 0.18 +# EM3-NEXT: IPC: 0.18 +# EM3-NEXT: Block RThroughput: 45.0 + +# EM4-NEXT: uOps Per Cycle: 2.09 +# EM4-NEXT: IPC: 2.09 +# EM4-NEXT: Block RThroughput: 4.0 + +# EM5-NEXT: uOps Per Cycle: 2.77 +# EM5-NEXT: IPC: 2.77 +# EM5-NEXT: Block RThroughput: 4.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# EM3: [1] [2] [3] [4] [5] [6] Instructions: +# EM3-NEXT: 1 7 2.00 fdiv s1, s2, s3 +# EM3-NEXT: 1 12 3.25 fdiv d2, d3, d4 +# EM3-NEXT: 1 3 0.33 fmul s4, s5, s6 +# EM3-NEXT: 1 3 0.33 fmul d5, d6, d7 +# EM3-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10 +# EM3-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11 +# EM3-NEXT: 1 18 19.00 fsqrt s10, s11 +# EM3-NEXT: 1 25 26.00 fsqrt d11, d12 + +# EM4: [1] [2] [3] [4] [5] [6] Instructions: +# EM4-NEXT: 1 7 3.00 fdiv h0, h1, h2 +# EM4-NEXT: 1 7 1.50 fdiv s1, s2, s3 +# EM4-NEXT: 1 12 2.25 fdiv d2, d3, d4 +# EM4-NEXT: 1 3 0.50 fmul h3, h4, h5 +# EM4-NEXT: 1 3 0.33 fmul s4, s5, s6 +# EM4-NEXT: 1 3 0.33 fmul d5, d6, d7 +# EM4-NEXT: 1 4 0.50 fmadd h6, h7, h8, h9 +# EM4-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10 +# EM4-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11 +# EM4-NEXT: 1 7 3.00 fsqrt h9, h10 +# EM4-NEXT: 1 8 1.75 fsqrt s10, s11 +# EM4-NEXT: 1 12 2.25 fsqrt d11, d12 + +# EM5: [1] [2] [3] [4] [5] [6] Instructions: +# EM5-NEXT: 1 5 0.50 fdiv h0, h1, h2 +# EM5-NEXT: 1 7 1.00 fdiv s1, s2, s3 +# EM5-NEXT: 1 12 2.25 fdiv d2, d3, d4 +# EM5-NEXT: 1 3 0.33 fmul h3, h4, h5 +# EM5-NEXT: 1 3 0.33 fmul s4, s5, s6 +# EM5-NEXT: 1 3 0.33 fmul d5, d6, d7 +# EM5-NEXT: 1 4 0.33 fmadd h6, h7, h8, h9 +# EM5-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10 +# EM5-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11 +# EM5-NEXT: 1 5 0.50 fsqrt h9, h10 +# EM5-NEXT: 1 8 1.25 fsqrt s10, s11 +# EM5-NEXT: 1 12 2.25 fsqrt d11, d12 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s new file mode 100644 index 00000000000..65aed321dd7 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 + +scvtf h0, w0 +scvtf s1, w1 +scvtf d2, x2 + +fcvtzs w3, h3 +fcvtzs w4, s4 +fcvtzs x5, d5 + +fmov h6, #2.0 +fmov s7, #4.0 +fmov d8, #8.0 + +fmov h9, w9 +fmov s10, w10 +fmov d11, x11 +fmov v12.d[1], x12 + +fmov w13, h13 +fmov w14, s14 +fmov x15, d15 +fmov x16, v16.d[1] + +# ALL: Iterations: 100 + +# EM3-NEXT: Instructions: 1200 +# EM3-NEXT: Total Cycles: 405 +# EM3-NEXT: Total uOps: 1400 + +# EM4-NEXT: Instructions: 1700 +# EM4-NEXT: Total Cycles: 1108 +# EM4-NEXT: Total uOps: 1900 + +# EM5-NEXT: Instructions: 1700 +# EM5-NEXT: Total Cycles: 1407 +# EM5-NEXT: Total uOps: 1900 + +# ALL: Dispatch Width: 6 + +# EM3-NEXT: uOps Per Cycle: 3.46 +# EM3-NEXT: IPC: 2.96 +# EM3-NEXT: Block RThroughput: 4.0 + +# EM4-NEXT: uOps Per Cycle: 1.71 +# EM4-NEXT: IPC: 1.53 +# EM4-NEXT: Block RThroughput: 11.0 + +# EM5-NEXT: uOps Per Cycle: 1.35 +# EM5-NEXT: IPC: 1.21 +# EM5-NEXT: Block RThroughput: 14.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# EM3: [1] [2] [3] [4] [5] [6] Instructions: +# EM3-NEXT: 1 4 1.00 scvtf s1, w1 +# EM3-NEXT: 1 4 1.00 scvtf d2, x2 +# EM3-NEXT: 1 3 1.00 fcvtzs w4, s4 +# EM3-NEXT: 1 3 1.00 fcvtzs x5, d5 +# EM3-NEXT: 1 1 0.33 fmov s7, #4.00000000 +# EM3-NEXT: 1 1 0.33 fmov d8, #8.00000000 +# EM3-NEXT: 1 1 0.33 fmov s10, w10 +# EM3-NEXT: 1 1 0.33 fmov d11, x11 +# EM3-NEXT: 2 5 1.00 fmov v12.d[1], x12 +# EM3-NEXT: 1 1 0.33 fmov w14, s14 +# EM3-NEXT: 1 1 0.33 fmov x15, d15 +# EM3-NEXT: 2 5 1.00 fmov x16, v16.d[1] + +# EM4: [1] [2] [3] [4] [5] [6] Instructions: +# EM4-NEXT: 1 6 1.00 scvtf h0, w0 +# EM4-NEXT: 1 6 1.00 scvtf s1, w1 +# EM4-NEXT: 1 6 1.00 scvtf d2, x2 +# EM4-NEXT: 1 4 1.00 fcvtzs w3, h3 +# EM4-NEXT: 1 4 1.00 fcvtzs w4, s4 +# EM4-NEXT: 1 4 1.00 fcvtzs x5, d5 +# EM4-NEXT: 1 1 0.33 fmov h6, #2.00000000 +# EM4-NEXT: 1 1 0.33 fmov s7, #4.00000000 +# EM4-NEXT: 1 1 0.33 fmov d8, #8.00000000 +# EM4-NEXT: 1 3 1.00 fmov h9, w9 +# EM4-NEXT: 1 3 1.00 fmov s10, w10 +# EM4-NEXT: 1 3 1.00 fmov d11, x11 +# EM4-NEXT: 2 2 1.00 fmov v12.d[1], x12 +# EM4-NEXT: 1 4 1.00 fmov w13, h13 +# EM4-NEXT: 1 4 1.00 fmov w14, s14 +# EM4-NEXT: 1 4 1.00 fmov x15, d15 +# EM4-NEXT: 2 5 1.00 fmov x16, v16.d[1] + +# EM5: [1] [2] [3] [4] [5] [6] Instructions: +# EM5-NEXT: 1 6 1.00 scvtf h0, w0 +# EM5-NEXT: 1 6 1.00 scvtf s1, w1 +# EM5-NEXT: 1 6 1.00 scvtf d2, x2 +# EM5-NEXT: 1 4 1.00 fcvtzs w3, h3 +# EM5-NEXT: 1 4 1.00 fcvtzs w4, s4 +# EM5-NEXT: 1 4 1.00 fcvtzs x5, d5 +# EM5-NEXT: 1 1 0.33 fmov h6, #2.00000000 +# EM5-NEXT: 1 1 0.33 fmov s7, #4.00000000 +# EM5-NEXT: 1 1 0.33 fmov d8, #8.00000000 +# EM5-NEXT: 1 4 1.00 fmov h9, w9 +# EM5-NEXT: 1 4 1.00 fmov s10, w10 +# EM5-NEXT: 1 4 1.00 fmov d11, x11 +# EM5-NEXT: 2 6 1.00 fmov v12.d[1], x12 +# EM5-NEXT: 1 3 1.00 fmov w13, h13 +# EM5-NEXT: 1 3 1.00 fmov w14, s14 +# EM5-NEXT: 1 3 1.00 fmov x15, d15 +# EM5-NEXT: 2 5 1.00 fmov x16, v16.d[1] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s new file mode 100644 index 00000000000..18dcf5ebe87 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +ldr s0, 1f +ldr q0, 1f + +ldur d0, [sp, #2] +ldur q0, [sp, #16] + +ldr b0, [sp], #1 +ldr q0, [sp], #16 + +ldr h0, [sp, #2]! +ldr q0, [sp, #16]! + +ldr s0, [sp, #4] +ldr q0, [sp, #16] + +ldr d0, [sp, x0, lsl #3] +ldr q0, [sp, x0, lsl #4] + +ldr b0, [sp, x0] +ldr q0, [sp, x0] + +ldr h0, [sp, w0, sxtw #1] +ldr q0, [sp, w0, uxtw #4] + +ldr s0, [sp, w0, sxtw] +ldr q0, [sp, w0, uxtw] + +ldp d0, d1, [sp], #16 +ldp q0, q1, [sp], #32 + +ldp s0, s1, [sp, #8]! +ldp q0, q1, [sp, #32]! + +ldp d0, d1, [sp, #16] +ldp q0, q1, [sp, #32] + +1: + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 2400 + +# M3-NEXT: Total Cycles: 4708 +# M3-NEXT: Total uOps: 3200 + +# M4-NEXT: Total Cycles: 4708 +# M4-NEXT: Total uOps: 3200 + +# M5-NEXT: Total Cycles: 5509 +# M5-NEXT: Total uOps: 3300 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.68 +# M3-NEXT: IPC: 0.51 +# M3-NEXT: Block RThroughput: 13.5 + +# M4-NEXT: uOps Per Cycle: 0.68 +# M4-NEXT: IPC: 0.51 +# M4-NEXT: Block RThroughput: 13.0 + +# M5-NEXT: uOps Per Cycle: 0.60 +# M5-NEXT: IPC: 0.44 +# M5-NEXT: Block RThroughput: 13.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0 +# M3-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0 +# M3-NEXT: 1 5 0.50 * ldur d0, [sp, #2] +# M3-NEXT: 1 5 0.50 * ldur q0, [sp, #16] +# M3-NEXT: 1 5 0.50 * ldr b0, [sp], #1 +# M3-NEXT: 1 5 0.50 * ldr q0, [sp], #16 +# M3-NEXT: 1 5 0.50 * ldr h0, [sp, #2]! +# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]! +# M3-NEXT: 1 5 0.50 * ldr s0, [sp, #4] +# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16] +# M3-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3] +# M3-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4] +# M3-NEXT: 1 5 0.50 * ldr b0, [sp, x0] +# M3-NEXT: 1 5 0.50 * ldr q0, [sp, x0] +# M3-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1] +# M3-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4] +# M3-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw] +# M3-NEXT: 1 5 0.50 * ldr q0, [sp, w0, uxtw] +# M3-NEXT: 2 5 0.50 * ldp d0, d1, [sp], #16 +# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp], #32 +# M3-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]! +# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]! +# M3-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16] +# M3-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32] + +# M4-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0 +# M4-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0 +# M4-NEXT: 1 5 0.50 * ldur d0, [sp, #2] +# M4-NEXT: 1 5 0.50 * ldur q0, [sp, #16] +# M4-NEXT: 1 5 0.50 * ldr b0, [sp], #1 +# M4-NEXT: 1 5 0.50 * ldr q0, [sp], #16 +# M4-NEXT: 1 5 0.50 * ldr h0, [sp, #2]! +# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]! +# M4-NEXT: 1 5 0.50 * ldr s0, [sp, #4] +# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16] +# M4-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3] +# M4-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4] +# M4-NEXT: 1 5 0.50 * ldr b0, [sp, x0] +# M4-NEXT: 1 5 0.50 * ldr q0, [sp, x0] +# M4-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1] +# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4] +# M4-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw] +# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw] +# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp], #16 +# M4-NEXT: 2 5 0.50 * ldp q0, q1, [sp], #32 +# M4-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]! +# M4-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]! +# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16] +# M4-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32] + +# M5-NEXT: 1 6 0.50 * ldr s0, {{\.?}}Ltmp0 +# M5-NEXT: 1 6 0.50 * ldr q0, {{\.?}}Ltmp0 +# M5-NEXT: 1 6 0.50 * ldur d0, [sp, #2] +# M5-NEXT: 1 6 0.50 * ldur q0, [sp, #16] +# M5-NEXT: 1 6 0.50 * ldr b0, [sp], #1 +# M5-NEXT: 1 6 0.50 * ldr q0, [sp], #16 +# M5-NEXT: 1 6 0.50 * ldr h0, [sp, #2]! +# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]! +# M5-NEXT: 1 6 0.50 * ldr s0, [sp, #4] +# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16] +# M5-NEXT: 1 6 0.50 * ldr d0, [sp, x0, lsl #3] +# M5-NEXT: 2 7 0.50 * ldr q0, [sp, x0, lsl #4] +# M5-NEXT: 1 6 0.50 * ldr b0, [sp, x0] +# M5-NEXT: 1 6 0.50 * ldr q0, [sp, x0] +# M5-NEXT: 2 7 0.50 * ldr h0, [sp, w0, sxtw #1] +# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw #4] +# M5-NEXT: 2 7 0.50 * ldr s0, [sp, w0, sxtw] +# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw] +# M5-NEXT: 2 6 0.50 * ldp d0, d1, [sp], #16 +# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp], #32 +# M5-NEXT: 2 6 0.50 * ldp s0, s1, [sp, #8]! +# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp, #32]! +# M5-NEXT: 1 6 0.50 * ldp d0, d1, [sp, #16] +# M5-NEXT: 1 6 1.00 * ldp q0, q1, [sp, #32] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s new file mode 100644 index 00000000000..05245ad631f --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +fmov s31, #1.00000000 +fdiv s30, s31, s30 + +# Newton series for 1 / x. +frecpe s1, s0 +frecps s2, s0, s1 +fmul s1, s1, s2 +frecps s0, s0, s1 +fmul s0, s1, s0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 700 + +# M3-NEXT: Total Cycles: 1803 +# M4-NEXT: Total Cycles: 1703 +# M5-NEXT: Total Cycles: 1703 + +# ALL-NEXT: Total uOps: 700 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.39 +# M3-NEXT: IPC: 0.39 +# M3-NEXT: Block RThroughput: 2.0 + +# M4-NEXT: uOps Per Cycle: 0.41 +# M4-NEXT: IPC: 0.41 +# M4-NEXT: Block RThroughput: 1.5 + +# M5-NEXT: uOps Per Cycle: 0.41 +# M5-NEXT: IPC: 0.41 +# M5-NEXT: Block RThroughput: 1.3 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000 + +# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30 +# M3-NEXT: 1 4 0.50 frecpe s1, s0 + +# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30 +# M4-NEXT: 1 3 0.50 frecpe s1, s0 + +# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30 +# M5-NEXT: 1 3 0.50 frecpe s1, s0 + +# ALL-NEXT: 1 4 0.33 frecps s2, s0, s1 +# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 +# ALL-NEXT: 1 4 0.33 frecps s0, s0, s1 +# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s new file mode 100644 index 00000000000..fd82cc35329 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s @@ -0,0 +1,72 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +fsqrt s30, s30 +fmov s31, #1.00000000 +fdiv s30, s31, s30 + +# Newton series for 1 / sqrtf(). +frsqrte s1, s0 +fmul s2, s1, s1 +frsqrts s2, s0, s2 +fmul s1, s1, s2 +fmul s2, s1, s1 +frsqrts s0, s0, s2 +fmul s0, s1, s0 + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# M3-NEXT: Total Cycles: 2503 +# M4-NEXT: Total Cycles: 2303 +# M5-NEXT: Total Cycles: 2303 + +# ALL-NEXT: Total uOps: 1000 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.40 +# M3-NEXT: IPC: 0.40 +# M3-NEXT: Block RThroughput: 19.0 + +# M4-NEXT: uOps Per Cycle: 0.43 +# M4-NEXT: IPC: 0.43 +# M4-NEXT: Block RThroughput: 2.0 + +# M5-NEXT: uOps Per Cycle: 0.43 +# M5-NEXT: IPC: 0.43 +# M5-NEXT: Block RThroughput: 2.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 18 19.00 fsqrt s30, s30 +# M4-NEXT: 1 8 1.75 fsqrt s30, s30 +# M5-NEXT: 1 8 1.25 fsqrt s30, s30 + +# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000 + +# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30 +# M3-NEXT: 1 4 0.50 frsqrte s1, s0 + +# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30 +# M4-NEXT: 1 3 0.50 frsqrte s1, s0 + +# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30 +# M5-NEXT: 1 3 0.50 frsqrte s1, s0 + +# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 +# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2 +# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 +# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 +# ALL-NEXT: 1 4 0.33 frsqrts s0, s0, s2 +# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s new file mode 100644 index 00000000000..423fae20366 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s @@ -0,0 +1,73 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +fsqrt s31, s31 + +# Newton series for sqrtf(). +frsqrte s1, s0 +fmul s2, s1, s1 +frsqrts s2, s0, s2 +fmul s1, s1, s2 +fmul s2, s1, s1 +frsqrts s2, s0, s2 +fmul s2, s2, s0 +fmul s1, s1, s2 +fcmp s0, #0.0 +fcsel s0, s0, s1, eq + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1100 + +# M3-NEXT: Total Cycles: 3203 +# M4-NEXT: Total Cycles: 3103 +# M5-NEXT: Total Cycles: 2803 + +# ALL-NEXT: Total uOps: 1200 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.37 +# M3-NEXT: IPC: 0.34 +# M3-NEXT: Block RThroughput: 20.0 + +# M4-NEXT: uOps Per Cycle: 0.39 +# M4-NEXT: IPC: 0.35 +# M4-NEXT: Block RThroughput: 2.3 + +# M5-NEXT: uOps Per Cycle: 0.43 +# M5-NEXT: IPC: 0.39 +# M5-NEXT: Block RThroughput: 2.3 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 18 19.00 fsqrt s31, s31 +# M3-NEXT: 1 4 0.50 frsqrte s1, s0 + +# M4-NEXT: 1 8 1.75 fsqrt s31, s31 +# M4-NEXT: 1 3 0.50 frsqrte s1, s0 + +# M5-NEXT: 1 8 1.25 fsqrt s31, s31 +# M5-NEXT: 1 3 0.50 frsqrte s1, s0 + +# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 +# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2 +# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 +# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 +# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2 +# ALL-NEXT: 1 3 0.33 fmul s2, s2, s0 +# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 +# ALL-NEXT: 1 2 1.00 fcmp s0, #0.0 + +# M3-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq +# M4-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq +# M5-NEXT: 2 2 1.00 fcsel s0, s0, s1, eq diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s new file mode 100644 index 00000000000..55d1d60252b --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +stur d0, [sp, #2] +stur q0, [sp, #16] + +str b0, [sp], #1 +str q0, [sp], #16 + +str h0, [sp, #2]! +str q0, [sp, #16]! + +str s0, [sp, #4] +str q0, [sp, #16] + +str d0, [sp, x0, lsl #3] +str q0, [sp, x0, lsl #4] + +str b0, [sp, x0] +str q0, [sp, x0] + +str h0, [sp, w0, sxtw #1] +str q0, [sp, w0, uxtw #4] + +str s0, [sp, w0, sxtw] +str q0, [sp, w0, uxtw] + +stp d0, d1, [sp], #16 +stp q0, q1, [sp], #32 + +stp s0, s1, [sp, #8]! +stp q0, q1, [sp, #32]! + +stp d0, d1, [sp, #16] +stp q0, q1, [sp, #32] + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 2200 + +# M3-NEXT: Total Cycles: 3203 +# M3-NEXT: Total uOps: 2900 + +# M4-NEXT: Total Cycles: 3203 +# M4-NEXT: Total uOps: 3000 + +# M5-NEXT: Total Cycles: 2803 +# M5-NEXT: Total uOps: 2500 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.91 +# M3-NEXT: IPC: 0.69 +# M3-NEXT: Block RThroughput: 22.0 + +# M4-NEXT: uOps Per Cycle: 0.94 +# M4-NEXT: IPC: 0.69 +# M4-NEXT: Block RThroughput: 12.5 + +# M5-NEXT: uOps Per Cycle: 0.89 +# M5-NEXT: IPC: 0.78 +# M5-NEXT: Block RThroughput: 11.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 1 1.00 * stur d0, [sp, #2] +# M3-NEXT: 1 1 1.00 * stur q0, [sp, #16] +# M3-NEXT: 1 1 1.00 * str b0, [sp], #1 +# M3-NEXT: 1 1 1.00 * str q0, [sp], #16 +# M3-NEXT: 1 1 1.00 * str h0, [sp, #2]! +# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]! +# M3-NEXT: 1 1 1.00 * str s0, [sp, #4] +# M3-NEXT: 1 1 1.00 * str q0, [sp, #16] +# M3-NEXT: 1 1 1.00 * str d0, [sp, x0, lsl #3] +# M3-NEXT: 2 3 1.00 * str q0, [sp, x0, lsl #4] +# M3-NEXT: 1 1 1.00 * str b0, [sp, x0] +# M3-NEXT: 1 1 1.00 * str q0, [sp, x0] +# M3-NEXT: 2 3 1.00 * str h0, [sp, w0, sxtw #1] +# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw #4] +# M3-NEXT: 2 3 1.00 * str s0, [sp, w0, sxtw] +# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw] +# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp], #16 +# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32 +# M3-NEXT: 1 1 1.00 * stp s0, s1, [sp, #8]! +# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]! +# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp, #16] +# M3-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32] + +# M4-NEXT: 1 1 0.50 * stur d0, [sp, #2] +# M4-NEXT: 1 1 0.50 * stur q0, [sp, #16] +# M4-NEXT: 1 1 0.50 * str b0, [sp], #1 +# M4-NEXT: 1 1 0.50 * str q0, [sp], #16 +# M4-NEXT: 1 1 0.50 * str h0, [sp, #2]! +# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]! +# M4-NEXT: 1 1 0.50 * str s0, [sp, #4] +# M4-NEXT: 1 1 0.50 * str q0, [sp, #16] +# M4-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3] +# M4-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4] +# M4-NEXT: 1 1 0.50 * str b0, [sp, x0] +# M4-NEXT: 1 1 0.50 * str q0, [sp, x0] +# M4-NEXT: 2 3 0.50 * str h0, [sp, w0, sxtw #1] +# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4] +# M4-NEXT: 2 3 0.50 * str s0, [sp, w0, sxtw] +# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw] +# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16 +# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32 +# M4-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]! +# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]! +# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16] +# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32] + +# M5-NEXT: 1 1 0.50 * stur d0, [sp, #2] +# M5-NEXT: 1 1 0.50 * stur q0, [sp, #16] +# M5-NEXT: 1 1 0.50 * str b0, [sp], #1 +# M5-NEXT: 1 1 0.50 * str q0, [sp], #16 +# M5-NEXT: 1 1 0.50 * str h0, [sp, #2]! +# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]! +# M5-NEXT: 1 1 0.50 * str s0, [sp, #4] +# M5-NEXT: 1 1 0.50 * str q0, [sp, #16] +# M5-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3] +# M5-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4] +# M5-NEXT: 1 1 0.50 * str b0, [sp, x0] +# M5-NEXT: 1 1 0.50 * str q0, [sp, x0] +# M5-NEXT: 1 1 0.50 * str h0, [sp, w0, sxtw #1] +# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4] +# M5-NEXT: 1 1 0.50 * str s0, [sp, w0, sxtw] +# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw] +# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16 +# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp], #32 +# M5-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]! +# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]! +# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16] +# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s new file mode 100644 index 00000000000..04f30d353ae --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +ldr w0, 1f +ldur x0, [sp, #8] +ldrb w0, [sp], #1 +ldrsh w0, [sp, #2]! +ldr x0, [sp, #8] +ldrb w0, [sp, x31] +ldrsh w0, [sp, x31, lsl #1] +ldr w0, [sp, w31, sxtw] +ldr x0, [sp, w31, uxtw #3] +ldnp w0, w1, [sp, #8] +ldp x0, x1, [sp], #16 +ldpsw x0, x1, [sp, #8]! + +1: + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1200 +# ALL-NEXT: Total Cycles: 1904 + +# M3-NEXT: Total uOps: 1600 +# M4-NEXT: Total uOps: 1400 +# M5-NEXT: Total uOps: 1400 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 0.84 +# M4-NEXT: uOps Per Cycle: 0.74 +# M5-NEXT: uOps Per Cycle: 0.74 + +# ALL-NEXT: IPC: 0.63 +# ALL-NEXT: Block RThroughput: 6.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 4 0.50 * ldr w0, {{\.?}}Ltmp0 +# ALL-NEXT: 1 4 0.50 * ldur x0, [sp, #8] +# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp], #1 +# ALL-NEXT: 1 4 0.50 * ldrsh w0, [sp, #2]! +# ALL-NEXT: 1 4 0.50 * ldr x0, [sp, #8] +# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp, xzr] +# ALL-NEXT: 1 5 0.50 * ldrsh w0, [sp, xzr, lsl #1] + +# M3-NEXT: 2 5 0.50 * ldr w0, [sp, wzr, sxtw] +# M3-NEXT: 2 5 0.50 * ldr x0, [sp, wzr, uxtw #3] + +# M4-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw] +# M4-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3] + +# M5-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw] +# M5-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3] + +# ALL-NEXT: 1 4 0.50 * ldnp w0, w1, [sp, #8] +# ALL-NEXT: 2 4 0.50 * ldp x0, x1, [sp], #16 +# ALL-NEXT: 2 4 0.50 * ldpsw x0, x1, [sp, #8]! diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s index 9e8c07149ca..b3bbec5f362 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M5 b main @@ -19,6 +20,11 @@ # M4-NEXT: IPC: 0.50 # M4-NEXT: Block RThroughput: 0.2 +# M5: Dispatch Width: 6 +# M5-NEXT: uOps Per Cycle: 0.50 +# M5-NEXT: IPC: 0.50 +# M5-NEXT: Block RThroughput: 0.2 + # ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: 0, 1 (50.0%) diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s index 6a1c81b5fb4..8d885f431d7 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 adds w0, w1, w2, lsl #0 sub x3, x4, x5, lsr #1 @@ -9,13 +10,14 @@ adds w12, w13, w14, lsl #4 sub x15, x16, x17, lsr #6 ands x18, x19, x20, lsl #8 - orr w21, w22, w23, asr #10 + eor w21, w22, w23, asr #10 # ALL: Iterations: 100 # ALL-NEXT: Instructions: 800 # EM3-NEXT: Total Cycles: 354 # EM4-NEXT: Total Cycles: 329 +# EM5-NEXT: Total Cycles: 220 # ALL-NEXT: Total uOps: 800 @@ -29,6 +31,11 @@ # EM4-NEXT: IPC: 2.43 # EM4-NEXT: Block RThroughput: 3.3 +# EM5: Dispatch Width: 6 +# EM5-NEXT: uOps Per Cycle: 3.64 +# EM5-NEXT: IPC: 3.64 +# EM5-NEXT: Block RThroughput: 1.5 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -46,7 +53,7 @@ # EM3-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4 # EM3-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 # EM3-NEXT: 1 2 0.50 ands x18, x19, x20, lsl #8 -# EM3-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10 +# EM3-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10 # EM4-NEXT: 1 1 0.25 adds w0, w1, w2 # EM4-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1 @@ -55,4 +62,13 @@ # EM4-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4 # EM4-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 # EM4-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8 -# EM4-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10 +# EM4-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10 + +# EM5-NEXT: 1 1 0.17 adds w0, w1, w2 +# EM5-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1 +# EM5-NEXT: 1 1 0.25 ands x6, x7, x8, lsl #2 +# EM5-NEXT: 1 2 0.33 orr w9, w10, w11, asr #3 +# EM5-NEXT: 1 2 0.33 adds w12, w13, w14, lsl #4 +# EM5-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 +# EM5-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8 +# EM5-NEXT: 1 2 0.33 eor w21, w22, w23, asr #10 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s new file mode 100644 index 00000000000..b86cdac50e6 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s @@ -0,0 +1,82 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 + +stur x0, [sp, #8] +strb w0, [sp], #1 +strh w0, [sp, #2]! +str x0, [sp, #8] +strb w0, [sp, x31] +strh w0, [sp, x31, lsl #1] +str w0, [sp, w31, sxtw] +str x0, [sp, w31, uxtw #3] +stnp w0, w1, [sp, #8] +stp x0, x1, [sp], #16 +stp w0, w1, [sp, #8]! + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1100 +# ALL-NEXT: Total Cycles: 1303 + +# M3-NEXT: Total uOps: 1300 +# M4-NEXT: Total uOps: 1100 +# M5-NEXT: Total uOps: 1100 + +# ALL: Dispatch Width: 6 + +# M3-NEXT: uOps Per Cycle: 1.00 +# M4-NEXT: uOps Per Cycle: 0.84 +# M5-NEXT: uOps Per Cycle: 0.84 + +# ALL-NEXT: IPC: 0.84 + +# M3-NEXT: Block RThroughput: 11.0 +# M4-NEXT: Block RThroughput: 5.5 +# M5-NEXT: Block RThroughput: 5.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 1 1.00 * stur x0, [sp, #8] +# M3-NEXT: 1 1 1.00 * strb w0, [sp], #1 +# M3-NEXT: 1 1 1.00 * strh w0, [sp, #2]! +# M3-NEXT: 1 1 1.00 * str x0, [sp, #8] +# M3-NEXT: 1 1 1.00 * strb w0, [sp, xzr] +# M3-NEXT: 1 1 1.00 * strh w0, [sp, xzr, lsl #1] +# M3-NEXT: 2 2 1.00 * str w0, [sp, wzr, sxtw] +# M3-NEXT: 2 2 1.00 * str x0, [sp, wzr, uxtw #3] +# M3-NEXT: 1 1 1.00 * stnp w0, w1, [sp, #8] +# M3-NEXT: 1 1 1.00 * stp x0, x1, [sp], #16 +# M3-NEXT: 1 1 1.00 * stp w0, w1, [sp, #8]! + +# M4-NEXT: 1 1 0.50 * stur x0, [sp, #8] +# M4-NEXT: 1 1 0.50 * strb w0, [sp], #1 +# M4-NEXT: 1 1 0.50 * strh w0, [sp, #2]! +# M4-NEXT: 1 1 0.50 * str x0, [sp, #8] +# M4-NEXT: 1 1 0.50 * strb w0, [sp, xzr] +# M4-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1] +# M4-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw] +# M4-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3] +# M4-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8] +# M4-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16 +# M4-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]! + +# M5-NEXT: 1 1 0.50 * stur x0, [sp, #8] +# M5-NEXT: 1 1 0.50 * strb w0, [sp], #1 +# M5-NEXT: 1 1 0.50 * strh w0, [sp, #2]! +# M5-NEXT: 1 1 0.50 * str x0, [sp, #8] +# M5-NEXT: 1 1 0.50 * strb w0, [sp, xzr] +# M5-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1] +# M5-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw] +# M5-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3] +# M5-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8] +# M5-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16 +# M5-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]! diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s index a42291108d0..3fecb1eebd1 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 mov x0, x1 mov sp, x0 @@ -22,21 +23,13 @@ # ALL: Iterations: 100 # ALL-NEXT: Instructions: 1000 - -# M3-NEXT: Total Cycles: 172 -# M4-NEXT: Total Cycles: 172 - +# ALL-NEXT: Total Cycles: 172 # ALL-NEXT: Total uOps: 1000 -# M3: Dispatch Width: 6 -# M3-NEXT: uOps Per Cycle: 5.81 -# M3-NEXT: IPC: 5.81 -# M3-NEXT: Block RThroughput: 1.7 - -# M4: Dispatch Width: 6 -# M4-NEXT: uOps Per Cycle: 5.81 -# M4-NEXT: IPC: 5.81 -# M4-NEXT: Block RThroughput: 1.7 +# ALL: Dispatch Width: 6 +# ALL-NEXT: uOps Per Cycle: 5.81 +# ALL-NEXT: IPC: 5.81 +# ALL-NEXT: Block RThroughput: 1.7 # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps @@ -47,25 +40,21 @@ # ALL-NEXT: [6]: HasSideEffects (U) # ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 0 0.17 mov x0, x1 +# ALL-NEXT: 1 0 0.17 mov sp, x0 +# ALL-NEXT: 1 0 0.17 mov w0, #12816 -# M3-NEXT: 1 0 0.17 mov x0, x1 -# M3-NEXT: 1 0 0.17 mov sp, x0 -# M3-NEXT: 1 0 0.17 mov w0, #12816 # M3-NEXT: 1 1 0.25 add w0, w1, #0 -# M3-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0 -# M3-NEXT: 1 4 0.50 * ldr x0, [x0] -# M3-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0 -# M3-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0 -# M3-NEXT: 1 1 0.33 fmov s0, s1 -# M3-NEXT: 1 0 0.17 movi d0, #0000000000000000 - -# M4-NEXT: 1 0 0.17 mov x0, x1 -# M4-NEXT: 1 0 0.17 mov sp, x0 -# M4-NEXT: 1 0 0.17 mov w0, #12816 # M4-NEXT: 1 1 0.25 add w0, w1, #0 -# M4-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0 -# M4-NEXT: 1 4 0.50 * ldr x0, [x0] -# M4-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0 -# M4-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0 +# M5-NEXT: 1 1 0.17 add w0, w1, #0 + +# ALL-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0 +# ALL-NEXT: 1 4 0.50 * ldr x0, [x0] +# ALL-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0 +# ALL-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0 + +# M3-NEXT: 1 1 0.33 fmov s0, s1 # M4-NEXT: 1 1 0.33 fmov s0, s1 -# M4-NEXT: 1 0 0.17 movi d0, #0000000000000000 +# M5-NEXT: 1 2 0.33 fmov s0, s1 + +# ALL-NEXT: 1 0 0.17 movi d0, #0000000000000000 |

