diff options
| author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-02-01 11:15:13 +0000 |
|---|---|---|
| committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-02-01 11:15:13 +0000 |
| commit | 7857215f8ea6850e78819e5c37a7904700bb10cf (patch) | |
| tree | 9f7377e02a6478a75ee697f4595d079deb4b1f9f /llvm/test/tools | |
| parent | 2c15fc56f8f1548b0ab3fdaf32132e683199bfa6 (diff) | |
| download | bcm5719-llvm-7857215f8ea6850e78819e5c37a7904700bb10cf.tar.gz bcm5719-llvm-7857215f8ea6850e78819e5c37a7904700bb10cf.zip | |
[X86][BdVer2] Transfer delays from the integer to the floating point unit.
Summary:
I'm unable to find this number in the "AMD SOG for family 15h".
llvm-exegesis measures the latencies of these instructions as `2`,
which matches the latencies specified in "AMD SOG for family 15h".
However if we look at Agner, Microarchitecture, "AMD Bulldozer, Piledriver,
Steamroller and Excavator pipeline", "Data delay between different execution
domains", the int->ivec transfer is listed as `8`..`10`cy of additional latency.
Also, Agner's "Instruction tables", for Piledriver, lists their latencies as `12`,
which is consistent with `2cy` from exegesis / AMD SOG + `10cy` transfer delay.
Additional data point comes from the fact that Agner's "Instruction tables",
for Jaguar, lists their latencies as `8`; and "AMD SOG for family 16h" does
state the `+6cy` int->ivec delay, which is consistent with instr latency of `1` or `2`.
Reviewers: andreadb, RKSimon, craig.topper
Reviewed By: andreadb
Subscribers: gbedwell, courbet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57300
llvm-svn: 352861
Diffstat (limited to 'llvm/test/tools')
7 files changed, 46 insertions, 46 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s index 82e16f4c2a9..8fb9630e351 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s @@ -42,8 +42,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 2 0.50 vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -100,8 +100,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 2 0.50 vpinsrw $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: 2 2 0.50 vpinsrw $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrw $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrw $1, %eax, %xmm0, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -158,8 +158,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 2 0.50 vpinsrd $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: 2 2 0.50 vpinsrd $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrd $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrd $1, %eax, %xmm0, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -216,8 +216,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 2 0.50 vpinsrq $0, %rax, %xmm0, %xmm0 -# CHECK-NEXT: 2 2 0.50 vpinsrq $1, %rax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrq $0, %rax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrq $1, %rax, %xmm0, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s index 07dea332b00..2b31da317fb 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s @@ -46,7 +46,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 4 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm0 +# CHECK-NEXT: 2 14 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -102,7 +102,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 4 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm0 +# CHECK-NEXT: 2 14 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -158,7 +158,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 4 1.00 cvtsi2ssl %ecx, %xmm0 +# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -214,7 +214,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm0 +# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s index f1b02032b9e..24ac77d5c93 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s @@ -7,12 +7,12 @@ vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1500 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 2014 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.25 -# CHECK-NEXT: IPC: 0.75 +# CHECK-NEXT: uOps Per Cycle: 1.24 +# CHECK-NEXT: IPC: 0.74 # CHECK-NEXT: Block RThroughput: 1.3 # CHECK: Instruction Info: @@ -25,8 +25,8 @@ vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 0.50 addl %eax, %eax -# CHECK-NEXT: 2 2 0.50 vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -64,18 +64,18 @@ vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - - - - - - vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 012345 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345 -# CHECK: [0,0] DeER . . . addl %eax, %eax -# CHECK-NEXT: [0,1] D=eeER . . vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [0,2] .D==eeER . . vpinsrb $1, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [1,0] .DeE---R . . addl %eax, %eax -# CHECK-NEXT: [1,1] . D===eeER. . vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [1,2] . D=====eeER . vpinsrb $1, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [2,0] . DeE-----R . addl %eax, %eax -# CHECK-NEXT: [2,1] . D======eeER . vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [2,2] . D=======eeER vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK: [0,0] DeER . . . . . addl %eax, %eax +# CHECK-NEXT: [0,1] D===========eeER . . vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [0,2] .D============eeER . . vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [1,0] .DeE-------------R . . addl %eax, %eax +# CHECK-NEXT: [1,1] . D=============eeER. . vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [1,2] . D===============eeER . vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [2,0] . DeE---------------R . addl %eax, %eax +# CHECK-NEXT: [2,1] . D================eeER . vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [2,2] . D=================eeER vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -84,6 +84,6 @@ vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 1.0 0.7 2.7 addl %eax, %eax -# CHECK-NEXT: 1. 3 4.3 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: 2. 3 5.7 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 0. 3 1.0 0.7 9.3 addl %eax, %eax +# CHECK-NEXT: 1. 3 14.3 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 2. 3 15.7 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s index 91d230dda15..5235eab449c 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s @@ -1144,12 +1144,12 @@ vzeroupper # CHECK-NEXT: 2 18 1.00 * vcvtsd2si (%rax), %rcx # CHECK-NEXT: 1 4 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 9 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 4 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2 -# CHECK-NEXT: 2 4 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2 +# CHECK-NEXT: 2 14 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2 +# CHECK-NEXT: 2 14 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 2 4 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm2 -# CHECK-NEXT: 2 4 1.00 vcvtsi2ssq %rcx, %xmm0, %xmm2 +# CHECK-NEXT: 2 14 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm2 +# CHECK-NEXT: 2 14 1.00 vcvtsi2ssq %rcx, %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 @@ -1469,13 +1469,13 @@ vzeroupper # CHECK-NEXT: 4 10 0.50 * vphsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 5 0.50 vphsubw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 10 0.50 * vphsubw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 2 6 0.50 * vpinsrb $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 2 0.50 vpinsrd $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: 2 12 0.50 vpinsrd $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 2 6 0.50 * vpinsrd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 2 0.50 vpinsrq $1, %rax, %xmm1, %xmm2 +# CHECK-NEXT: 2 12 0.50 vpinsrq $1, %rax, %xmm1, %xmm2 # CHECK-NEXT: 2 6 0.50 * vpinsrq $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 2 0.50 vpinsrw $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: 2 12 0.50 vpinsrw $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 2 6 0.50 * vpinsrw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 4 1.00 vpmaddubsw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 9 1.00 * vpmaddubsw (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s index 7fcb7fb49f4..8916504b18e 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s @@ -212,7 +212,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 9 1.00 * cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 cvtps2pi %xmm0, %mm2 # CHECK-NEXT: 1 9 1.00 * cvtps2pi (%rax), %mm2 -# CHECK-NEXT: 2 4 1.00 cvtsi2ssl %ecx, %xmm2 +# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm2 # CHECK-NEXT: 2 13 1.00 cvtsi2ssq %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 @@ -269,7 +269,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 pavgw %mm0, %mm2 # CHECK-NEXT: 1 7 0.50 * pavgw (%rax), %mm2 # CHECK-NEXT: 2 13 1.00 pextrw $1, %mm0, %ecx -# CHECK-NEXT: 2 2 0.50 pinsrw $1, %eax, %mm2 +# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %mm2 # CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 pmaxsw %mm0, %mm2 # CHECK-NEXT: 1 7 0.50 * pmaxsw (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s index 792002e4418..093a6be0359 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s @@ -444,7 +444,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 18 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 1 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 1 9 1.00 * cvtsd2ss (%rax), %xmm2 -# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm2 +# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm2 # CHECK-NEXT: 2 13 1.00 cvtsi2sdq %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 @@ -561,7 +561,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 pcmpgtw %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * pcmpgtw (%rax), %xmm2 # CHECK-NEXT: 2 13 1.00 pextrw $1, %xmm0, %ecx -# CHECK-NEXT: 2 2 0.50 pinsrw $1, %eax, %xmm0 +# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %xmm0 # CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %xmm0 # CHECK-NEXT: 1 4 1.00 pmaddwd %xmm0, %xmm2 # CHECK-NEXT: 1 9 1.00 * pmaddwd (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s index 4aa29de694e..1e919a58d1c 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s @@ -191,11 +191,11 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 2 13 1.00 * pextrw $1, %xmm0, (%rax) # CHECK-NEXT: 2 4 1.00 phminposuw %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * phminposuw (%rax), %xmm2 -# CHECK-NEXT: 2 2 0.50 pinsrb $1, %eax, %xmm1 +# CHECK-NEXT: 2 12 0.50 pinsrb $1, %eax, %xmm1 # CHECK-NEXT: 2 6 0.50 * pinsrb $1, (%rax), %xmm1 -# CHECK-NEXT: 2 2 0.50 pinsrd $1, %eax, %xmm1 +# CHECK-NEXT: 2 12 0.50 pinsrd $1, %eax, %xmm1 # CHECK-NEXT: 2 6 0.50 * pinsrd $1, (%rax), %xmm1 -# CHECK-NEXT: 2 2 0.50 pinsrq $1, %rax, %xmm1 +# CHECK-NEXT: 2 12 0.50 pinsrq $1, %rax, %xmm1 # CHECK-NEXT: 2 6 0.50 * pinsrq $1, (%rax), %xmm1 # CHECK-NEXT: 1 2 0.50 pmaxsb %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * pmaxsb (%rax), %xmm2 |

