summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2019-02-01 11:15:13 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2019-02-01 11:15:13 +0000
commit7857215f8ea6850e78819e5c37a7904700bb10cf (patch)
tree9f7377e02a6478a75ee697f4595d079deb4b1f9f /llvm
parent2c15fc56f8f1548b0ab3fdaf32132e683199bfa6 (diff)
downloadbcm5719-llvm-7857215f8ea6850e78819e5c37a7904700bb10cf.tar.gz
bcm5719-llvm-7857215f8ea6850e78819e5c37a7904700bb10cf.zip
[X86][BdVer2] Transfer delays from the integer to the floating point unit.
Summary: I'm unable to find this number in the "AMD SOG for family 15h". llvm-exegesis measures the latencies of these instructions as `2`, which matches the latencies specified in "AMD SOG for family 15h". However if we look at Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller and Excavator pipeline", "Data delay between different execution domains", the int->ivec transfer is listed as `8`..`10`cy of additional latency. Also, Agner's "Instruction tables", for Piledriver, lists their latencies as `12`, which is consistent with `2cy` from exegesis / AMD SOG + `10cy` transfer delay. Additional data point comes from the fact that Agner's "Instruction tables", for Jaguar, lists their latencies as `8`; and "AMD SOG for family 16h" does state the `+6cy` int->ivec delay, which is consistent with instr latency of `1` or `2`. Reviewers: andreadb, RKSimon, craig.topper Reviewed By: andreadb Subscribers: gbedwell, courbet, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57300 llvm-svn: 352861
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBdVer2.td5
-rw-r--r--llvm/test/CodeGen/X86/mmx-schedule.ll4
-rw-r--r--llvm/test/CodeGen/X86/sse-schedule.ll6
-rw-r--r--llvm/test/CodeGen/X86/sse2-schedule.ll10
-rw-r--r--llvm/test/CodeGen/X86/sse41-schedule.ll12
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s16
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s8
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s38
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s16
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s4
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s4
-rw-r--r--llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s6
12 files changed, 66 insertions, 63 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index 90ca79915fa..8e8fc6fd1ff 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -250,7 +250,10 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 5>;
def : ReadAdvance<ReadAfterVecYLd, 5>;
-def : ReadAdvance<ReadInt2Fpu, 0>;
+// Transfer from int domain to ivec domain incurs additional latency of 8..10cy
+// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller
+// and Excavator pipeline", "Data delay between different execution domains"
+def : ReadAdvance<ReadInt2Fpu, -10>;
// A folded store needs a cycle on the PdStore for the store data.
def : WriteRes<WriteRMW, [PdStore]>;
diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll
index d423b9a2a90..9aa1de95a55 100644
--- a/llvm/test/CodeGen/X86/mmx-schedule.ll
+++ b/llvm/test/CodeGen/X86/mmx-schedule.ll
@@ -3880,8 +3880,8 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
; BDVER2-LABEL: test_pinsrw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:0.50]
+; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [12:0.50]
+; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [12:0.50]
; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll
index d10f42a2fe7..f737d24c879 100644
--- a/llvm/test/CodeGen/X86/sse-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse-schedule.ll
@@ -1158,13 +1158,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
; BDVER2-SSE-LABEL: test_cvtsi2ss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [14:1.00]
; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtsi2ss:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [14:1.00]
; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
@@ -1304,7 +1304,7 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
;
; BDVER2-LABEL: test_cvtsi2ssq:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [14:1.00]
; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll
index ec8cb3c8787..ffb650f833e 100644
--- a/llvm/test/CodeGen/X86/sse2-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse2-schedule.ll
@@ -2595,13 +2595,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; BDVER2-SSE-LABEL: test_cvtsi2sd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [14:1.00]
; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_cvtsi2sd:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [14:1.00]
; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
@@ -2741,7 +2741,7 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
;
; BDVER2-LABEL: test_cvtsi2sdq:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [14:1.00]
; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
@@ -10100,13 +10100,13 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) {
;
; BDVER2-SSE-LABEL: test_pinsrw:
; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:0.50]
+; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [12:0.50]
; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_pinsrw:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:0.50]
+; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [12:0.50]
; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll
index 4870434a8ae..c5a1c775f38 100644
--- a/llvm/test/CodeGen/X86/sse41-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse41-schedule.ll
@@ -2425,13 +2425,13 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
;
; BDVER2-SSE-LABEL: test_pinsrb:
; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:0.50]
+; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [12:0.50]
; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_pinsrb:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:0.50]
+; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [12:0.50]
; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
@@ -2539,13 +2539,13 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
;
; BDVER2-SSE-LABEL: test_pinsrd:
; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:0.50]
+; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [12:0.50]
; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_pinsrd:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:0.50]
+; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [12:0.50]
; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
@@ -2666,14 +2666,14 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; BDVER2-SSE-LABEL: test_pinsrq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:0.50]
-; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:0.50]
+; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [12:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
;
; BDVER2-LABEL: test_pinsrq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:0.50]
-; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:0.50]
+; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [12:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s
index 82e16f4c2a9..8fb9630e351 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s
@@ -42,8 +42,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -100,8 +100,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrw $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrw $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrw $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrw $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -158,8 +158,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrd $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrd $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrd $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrd $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -216,8 +216,8 @@ vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 2 0.50 vpinsrq $0, %rax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrq $1, %rax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrq $0, %rax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrq $1, %rax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
index 07dea332b00..2b31da317fb 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
@@ -46,7 +46,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm0
+# CHECK-NEXT: 2 14 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -102,7 +102,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm0
+# CHECK-NEXT: 2 14 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -158,7 +158,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 cvtsi2ssl %ecx, %xmm0
+# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -214,7 +214,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm0
+# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s
index f1b02032b9e..24ac77d5c93 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s
@@ -7,12 +7,12 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 1500
-# CHECK-NEXT: Total Cycles: 2004
+# CHECK-NEXT: Total Cycles: 2014
# CHECK-NEXT: Total uOps: 2500
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 1.25
-# CHECK-NEXT: IPC: 0.75
+# CHECK-NEXT: uOps Per Cycle: 1.24
+# CHECK-NEXT: IPC: 0.74
# CHECK-NEXT: Block RThroughput: 1.3
# CHECK: Instruction Info:
@@ -25,8 +25,8 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 addl %eax, %eax
-# CHECK-NEXT: 2 2 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -64,18 +64,18 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - - - - - - vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Timeline view:
-# CHECK-NEXT: 012345
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345
-# CHECK: [0,0] DeER . . . addl %eax, %eax
-# CHECK-NEXT: [0,1] D=eeER . . vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [0,2] .D==eeER . . vpinsrb $1, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [1,0] .DeE---R . . addl %eax, %eax
-# CHECK-NEXT: [1,1] . D===eeER. . vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [1,2] . D=====eeER . vpinsrb $1, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [2,0] . DeE-----R . addl %eax, %eax
-# CHECK-NEXT: [2,1] . D======eeER . vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [2,2] . D=======eeER vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK: [0,0] DeER . . . . . addl %eax, %eax
+# CHECK-NEXT: [0,1] D===========eeER . . vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [0,2] .D============eeER . . vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [1,0] .DeE-------------R . . addl %eax, %eax
+# CHECK-NEXT: [1,1] . D=============eeER. . vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [1,2] . D===============eeER . vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [2,0] . DeE---------------R . addl %eax, %eax
+# CHECK-NEXT: [2,1] . D================eeER . vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [2,2] . D=================eeER vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -84,6 +84,6 @@ vpinsrb $1, %eax, %xmm0, %xmm0
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 1.0 0.7 2.7 addl %eax, %eax
-# CHECK-NEXT: 1. 3 4.3 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2. 3 5.7 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 0. 3 1.0 0.7 9.3 addl %eax, %eax
+# CHECK-NEXT: 1. 3 14.3 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2. 3 15.7 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
index 91d230dda15..5235eab449c 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
@@ -1144,12 +1144,12 @@ vzeroupper
# CHECK-NEXT: 2 18 1.00 * vcvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 4 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 9 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm2
-# CHECK-NEXT: 2 4 1.00 vcvtsi2ssq %rcx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm2
+# CHECK-NEXT: 2 14 1.00 vcvtsi2ssq %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
@@ -1469,13 +1469,13 @@ vzeroupper
# CHECK-NEXT: 4 10 0.50 * vphsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 5 0.50 vphsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 10 0.50 * vphsubw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrb $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrb $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrd $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrd $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrd $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrq $1, %rax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrq $1, %rax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrq $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 2 0.50 vpinsrw $1, %eax, %xmm1, %xmm2
+# CHECK-NEXT: 2 12 0.50 vpinsrw $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 0.50 * vpinsrw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 9 1.00 * vpmaddubsw (%rax), %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
index 7fcb7fb49f4..8916504b18e 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
@@ -212,7 +212,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 9 1.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 1 9 1.00 * cvtps2pi (%rax), %mm2
-# CHECK-NEXT: 2 4 1.00 cvtsi2ssl %ecx, %xmm2
+# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm2
# CHECK-NEXT: 2 13 1.00 cvtsi2ssq %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
@@ -269,7 +269,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pavgw %mm0, %mm2
# CHECK-NEXT: 1 7 0.50 * pavgw (%rax), %mm2
# CHECK-NEXT: 2 13 1.00 pextrw $1, %mm0, %ecx
-# CHECK-NEXT: 2 2 0.50 pinsrw $1, %eax, %mm2
+# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %mm2
# CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 pmaxsw %mm0, %mm2
# CHECK-NEXT: 1 7 0.50 * pmaxsw (%rax), %mm2
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
index 792002e4418..093a6be0359 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
@@ -444,7 +444,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 18 1.00 * cvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 4 1.00 cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: 1 9 1.00 * cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm2
+# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm2
# CHECK-NEXT: 2 13 1.00 cvtsi2sdq %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
@@ -561,7 +561,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pcmpgtw (%rax), %xmm2
# CHECK-NEXT: 2 13 1.00 pextrw $1, %xmm0, %ecx
-# CHECK-NEXT: 2 2 0.50 pinsrw $1, %eax, %xmm0
+# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %xmm0
# CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %xmm0
# CHECK-NEXT: 1 4 1.00 pmaddwd %xmm0, %xmm2
# CHECK-NEXT: 1 9 1.00 * pmaddwd (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
index 4aa29de694e..1e919a58d1c 100644
--- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
@@ -191,11 +191,11 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 13 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 2 4 1.00 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * phminposuw (%rax), %xmm2
-# CHECK-NEXT: 2 2 0.50 pinsrb $1, %eax, %xmm1
+# CHECK-NEXT: 2 12 0.50 pinsrb $1, %eax, %xmm1
# CHECK-NEXT: 2 6 0.50 * pinsrb $1, (%rax), %xmm1
-# CHECK-NEXT: 2 2 0.50 pinsrd $1, %eax, %xmm1
+# CHECK-NEXT: 2 12 0.50 pinsrd $1, %eax, %xmm1
# CHECK-NEXT: 2 6 0.50 * pinsrd $1, (%rax), %xmm1
-# CHECK-NEXT: 2 2 0.50 pinsrq $1, %rax, %xmm1
+# CHECK-NEXT: 2 12 0.50 pinsrq $1, %rax, %xmm1
# CHECK-NEXT: 2 6 0.50 * pinsrq $1, (%rax), %xmm1
# CHECK-NEXT: 1 2 0.50 pmaxsb %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pmaxsb (%rax), %xmm2
OpenPOWER on IntegriCloud