summaryrefslogtreecommitdiffstats
path: root/llvm/test/tools/llvm-mca
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2019-02-01 11:15:13 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2019-02-01 11:15:13 +0000
commit7857215f8ea6850e78819e5c37a7904700bb10cf (patch)
tree9f7377e02a6478a75ee697f4595d079deb4b1f9f /llvm/test/tools/llvm-mca
parent2c15fc56f8f1548b0ab3fdaf32132e683199bfa6 (diff)
downloadbcm5719-llvm-7857215f8ea6850e78819e5c37a7904700bb10cf.tar.gz
bcm5719-llvm-7857215f8ea6850e78819e5c37a7904700bb10cf.zip
[X86][BdVer2] Transfer delays from the integer to the floating point unit.
Summary: I'm unable to find this number in the "AMD SOG for family 15h". llvm-exegesis measures the latencies of these instructions as `2`, which matches the latencies specified in "AMD SOG for family 15h". However if we look at Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller and Excavator pipeline", "Data delay between different execution domains", the int->ivec transfer is listed as `8`..`10`cy of additional latency. Also, Agner's "Instruction tables", for Piledriver, lists their latencies as `12`, which is consistent with `2cy` from exegesis / AMD SOG + `10cy` transfer delay. Additional data point comes from the fact that Agner's "Instruction tables", for Jaguar, lists their latencies as `8`; and "AMD SOG for family 16h" does state the `+6cy` int->ivec delay, which is consistent with instr latency of `1` or `2`. Reviewers: andreadb, RKSimon, craig.topper Reviewed By: andreadb Subscribers: gbedwell, courbet, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57300 llvm-svn: 352861
Diffstat (limited to 'llvm/test/tools/llvm-mca')
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s16
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s8
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s38
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s16
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s4
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s4
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s6
7 files changed, 46 insertions, 46 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s
index 82e16f4c2a9..8fb9630e351 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s
@@ -42,8 +42,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -100,8 +100,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrw $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrw $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrw $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrw $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -158,8 +158,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrd $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrd $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrd $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrd $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -216,8 +216,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrq $0, %rax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrq $1, %rax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrq $0, %rax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
index 07dea332b00..2b31da317fb 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
@@ -46,7 +46,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm0
+# CHECK-NEXT: 2 14 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -102,7 +102,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm0
+# CHECK-NEXT: 2 14 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -158,7 +158,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 cvtsi2ssl %ecx, %xmm0
+# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -214,7 +214,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm0
+# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s
index f1b02032b9e..24ac77d5c93 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s
@@ -7,12 +7,12 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 1500
-# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total Cycles: 2014
# CHECK-NEXT: Total uOps: 2500
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 1.25
-# CHECK-NEXT: IPC: 0.75
+# CHECK-NEXT: uOps Per Cycle: 1.24
+# CHECK-NEXT: IPC: 0.74
# CHECK-NEXT: Block RThroughput: 1.3
# CHECK: Instruction Info:
@@ -25,8 +25,8 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 addl %eax, %eax
-# CHECK-NEXT: 2 2 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -64,18 +64,18 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - - - - - - vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Timeline view:
-# CHECK-NEXT: 012345
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeER . . . addl %eax, %eax
-# CHECK-NEXT: [0,1] D=eeER . . vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [0,2] .D==eeER . . vpinsrb $1, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [1,0] .DeE---R . . addl %eax, %eax
-# CHECK-NEXT: [1,1] . D===eeER. . vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [1,2] . D=====eeER . vpinsrb $1, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [2,0] . DeE-----R . addl %eax, %eax
-# CHECK-NEXT: [2,1] . D======eeER . vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [2,2] . D=======eeER vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK: [0,0] DeER . . . . . addl %eax, %eax
+# CHECK-NEXT: [0,1] D===========eeER . . vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [0,2] .D============eeER . . vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [1,0] .DeE-------------R . . addl %eax, %eax
+# CHECK-NEXT: [1,1] . D=============eeER. . vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [1,2] . D===============eeER . vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [2,0] . DeE---------------R . addl %eax, %eax
+# CHECK-NEXT: [2,1] . D================eeER . vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [2,2] . D=================eeER vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -84,6 +84,6 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 1.0 0.7 2.7 addl %eax, %eax
-# CHECK-NEXT: 1. 3 4.3 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2. 3 5.7 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 0. 3 1.0 0.7 9.3 addl %eax, %eax
+# CHECK-NEXT: 1. 3 14.3 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2. 3 15.7 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
index 91d230dda15..5235eab449c 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
@@ -1144,12 +1144,12 @@ vzeroupper
# CHECK-NEXT: 2 18 1.00 * vcvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 4 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 9 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2ssq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2ssq %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
@@ -1469,13 +1469,13 @@ vzeroupper
# CHECK-NEXT: 4 10 0.50 * vphsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 5 0.50 vphsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 10 0.50 * vphsubw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrb $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrd $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrd $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrd $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrq $1, %rax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrq $1, %rax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrq $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrw $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrw $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 9 1.00 * vpmaddubsw (%rax), %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
index 7fcb7fb49f4..8916504b18e 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
@@ -212,7 +212,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 9 1.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 1 9 1.00 * cvtps2pi (%rax), %mm2
-# CHECK-NEXT: 2 4 1.00 cvtsi2ssl %ecx, %xmm2
+# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm2
# CHECK-NEXT: 2 13 1.00 cvtsi2ssq %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
@@ -269,7 +269,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pavgw %mm0, %mm2
# CHECK-NEXT: 1 7 0.50 * pavgw (%rax), %mm2
# CHECK-NEXT: 2 13 1.00 pextrw $1, %mm0, %ecx
-# CHECK-NEXT: 2 2 0.50 pinsrw $1, %eax, %mm2
+# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %mm2
# CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 pmaxsw %mm0, %mm2
# CHECK-NEXT: 1 7 0.50 * pmaxsw (%rax), %mm2
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
index 792002e4418..093a6be0359 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
@@ -444,7 +444,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 18 1.00 * cvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 4 1.00 cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: 1 9 1.00 * cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm2
+# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm2
# CHECK-NEXT: 2 13 1.00 cvtsi2sdq %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
@@ -561,7 +561,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pcmpgtw (%rax), %xmm2
# CHECK-NEXT: 2 13 1.00 pextrw $1, %xmm0, %ecx
-# CHECK-NEXT: 2 2 0.50 pinsrw $1, %eax, %xmm0
+# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %xmm0
# CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %xmm0
# CHECK-NEXT: 1 4 1.00 pmaddwd %xmm0, %xmm2
# CHECK-NEXT: 1 9 1.00 * pmaddwd (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
index 4aa29de694e..1e919a58d1c 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
@@ -191,11 +191,11 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 13 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 2 4 1.00 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * phminposuw (%rax), %xmm2
-# CHECK-NEXT: 2 2 0.50 pinsrb $1, %eax, %xmm1
+# CHECK-NEXT: 2 12 0.50 pinsrb $1, %eax, %xmm1
# CHECK-NEXT: 2 6 0.50 * pinsrb $1, (%rax), %xmm1
-# CHECK-NEXT: 2 2 0.50 pinsrd $1, %eax, %xmm1
+# CHECK-NEXT: 2 12 0.50 pinsrd $1, %eax, %xmm1
# CHECK-NEXT: 2 6 0.50 * pinsrd $1, (%rax), %xmm1
-# CHECK-NEXT: 2 2 0.50 pinsrq $1, %rax, %xmm1
+# CHECK-NEXT: 2 12 0.50 pinsrq $1, %rax, %xmm1
# CHECK-NEXT: 2 6 0.50 * pinsrq $1, (%rax), %xmm1
# CHECK-NEXT: 1 2 0.50 pmaxsb %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pmaxsb (%rax), %xmm2
OpenPOWER on IntegriCloud