[MC][X86] Correctly model additional operand latency caused by transfer delays from the integer to the floating point unit.

This patch adds a new ReadAdvance definition named ReadInt2Fpu. ReadInt2Fpu allows x86 scheduling models to accurately describe delays caused by data transfers from the integer unit to the floating point unit. ReadInt2Fpu currently defaults to a delay of zero cycles (i.e. no delay) for all x86 models excluding BtVer2. That means, this patch is only a functional change for the Jaguar cpu model only. Tablegen definitions for instructions (V)PINSR* have been updated to account for the new ReadInt2Fpu. That read is mapped to the the GPR input operand. On Jaguar, int-to-fpu transfers are modeled as a +6cy delay. Before this patch, that extra delay was added to the opcode latency. In practice, the insert opcode only executes for 1cy. Most of the actual latency is actually contributed by the so-called operand-latency. According to the AMD SOG for family 16h, (V)PINSR* latency is defined by expression f+1, where f is defined as a forwarding delay from the integer unit to the fpu. When printing instruction latency from MCA (see InstructionInfoView.cpp) and LLC (only when flag -print-schedule is speified), we now need to account for any extra forwarding delays. We do this by checking if scheduling classes declare any negative ReadAdvance entries. Quoting a code comment in TargetSchedule.td: "A negative advance effectively increases latency, which may be used for cross-domain stalls". When computing the instruction latency for the purpose of our scheduling tests, we now add any extra delay to the formula. This avoids regressing existing codegen and mca schedule tests. It comes with the cost of an extra (but very simple) hook in MCSchedModel. Differential Revision: https://reviews.llvm.org/D57056 llvm-svn: 351965
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2019-01-23 16:35:07 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2019-01-23 16:35:07 +0000
commit: d768d355158d067374a2ee02755b6b44d2b71053 (patch)
tree: ed3fc02f01455bd4b92300e140bc831bd274f055 /llvm/test
parent: 21ed868390fee0faca2fb86d52ec41ec65101bd6 (diff)
download: bcm5719-llvm-d768d355158d067374a2ee02755b6b44d2b71053.tar.gz
bcm5719-llvm-d768d355158d067374a2ee02755b6b44d2b71053.zip
4 files changed, 32 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll
index 51dc5e102ff..d423b9a2a90 100644
--- a/llvm/test/CodeGen/X86/mmx-schedule.ll
+++ b/llvm/test/CodeGen/X86/mmx-schedule.ll
@@ -3887,8 +3887,8 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
 ;
 ; BTVER2-LABEL: test_pinsrw:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    pinsrw $0, %edi, %mm0 # sched: [7:0.50]
 ; BTVER2-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
+; BTVER2-NEXT:    pinsrw $0, %edi, %mm0 # sched: [7:0.50]
 ; BTVER2-NEXT:    pinsrw $1, %eax, %mm0 # sched: [7:0.50]
 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll
index ea606463fc1..4870434a8ae 100644
--- a/llvm/test/CodeGen/X86/sse41-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse41-schedule.ll
@@ -2679,15 +2679,15 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
 ;
 ; BTVER2-SSE-LABEL: test_pinsrq:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
 ; BTVER2-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00]
+; BTVER2-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: test_pinsrq:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [7:0.50]
 ; BTVER2-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [4:1.00]
+; BTVER2-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [7:0.50]
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-1.s
index 9e44702ae79..398a52a8479 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-1.s
@@ -27,12 +27,12 @@ vpinsrq $1, %rax, %xmm0, %xmm0
 
 # CHECK:      Iterations:        500
 # CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      7003
+# CHECK-NEXT: Total Cycles:      1003
 # CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.29
-# CHECK-NEXT: IPC:               0.14
+# CHECK-NEXT: uOps Per Cycle:    1.99
+# CHECK-NEXT: IPC:               1.00
 # CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
@@ -76,12 +76,12 @@ vpinsrq $1, %rax, %xmm0, %xmm0
 
 # CHECK:      Iterations:        500
 # CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      7003
+# CHECK-NEXT: Total Cycles:      1003
 # CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.29
-# CHECK-NEXT: IPC:               0.14
+# CHECK-NEXT: uOps Per Cycle:    1.99
+# CHECK-NEXT: IPC:               1.00
 # CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
@@ -125,12 +125,12 @@ vpinsrq $1, %rax, %xmm0, %xmm0
 
 # CHECK:      Iterations:        500
 # CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      7003
+# CHECK-NEXT: Total Cycles:      1003
 # CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.29
-# CHECK-NEXT: IPC:               0.14
+# CHECK-NEXT: uOps Per Cycle:    1.99
+# CHECK-NEXT: IPC:               1.00
 # CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
@@ -174,12 +174,12 @@ vpinsrq $1, %rax, %xmm0, %xmm0
 
 # CHECK:      Iterations:        500
 # CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      7003
+# CHECK-NEXT: Total Cycles:      1003
 # CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.29
-# CHECK-NEXT: IPC:               0.14
+# CHECK-NEXT: uOps Per Cycle:    1.99
+# CHECK-NEXT: IPC:               1.00
 # CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s
index 4e130be8597..00c13f9ef59 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s
@@ -9,12 +9,12 @@ vpinsrb $1, %eax, %xmm0, %xmm0
 
 # CHECK:      Iterations:        500
 # CHECK-NEXT: Instructions:      1500
-# CHECK-NEXT: Total Cycles:      7004
+# CHECK-NEXT: Total Cycles:      1509
 # CHECK-NEXT: Total uOps:        2500
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.36
-# CHECK-NEXT: IPC:               0.21
+# CHECK-NEXT: uOps Per Cycle:    1.66
+# CHECK-NEXT: IPC:               0.99
 # CHECK-NEXT: Block RThroughput: 2.5
 
 # CHECK:      Instruction Info:
@@ -57,18 +57,18 @@ vpinsrb $1, %eax, %xmm0, %xmm0
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -     1.00    -      -     vpinsrb	$1, %eax, %xmm0, %xmm0
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789          0123456789
-# CHECK-NEXT: Index     0123456789          0123456789          012345
+# CHECK-NEXT:                     01234567
+# CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .    .   addl	%eax, %eax
-# CHECK-NEXT: [0,1]     .DeeeeeeeER    .    .    .    .    .    .    .   vpinsrb	$0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [0,2]     . D======eeeeeeeER  .    .    .    .    .    .   vpinsrb	$1, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [1,0]     .  DeE-----------R  .    .    .    .    .    .   addl	%eax, %eax
-# CHECK-NEXT: [1,1]     .   D===========eeeeeeeER.    .    .    .    .   vpinsrb	$0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [1,2]     .    D=================eeeeeeeER   .    .    .   vpinsrb	$1, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [2,0]     .    .DeE----------------------R   .    .    .   addl	%eax, %eax
-# CHECK-NEXT: [2,1]     .    . D======================eeeeeeeER .    .   vpinsrb	$0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: [2,2]     .    .  D============================eeeeeeeER   vpinsrb	$1, %eax, %xmm0, %xmm0
+# CHECK:      [0,0]     DeER .    .    . .   addl	%eax, %eax
+# CHECK-NEXT: [0,1]     .D======eER    . .   vpinsrb	$0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [0,2]     . D======eER   . .   vpinsrb	$1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [1,0]     .  DeE-----R   . .   addl	%eax, %eax
+# CHECK-NEXT: [1,1]     .   D======eER . .   vpinsrb	$0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [1,2]     .    D======eER. .   vpinsrb	$1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [2,0]     .    .DeE-----R. .   addl	%eax, %eax
+# CHECK-NEXT: [2,1]     .    . D======eER.   vpinsrb	$0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: [2,2]     .    .  D======eER   vpinsrb	$1, %eax, %xmm0, %xmm0
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -77,6 +77,6 @@ vpinsrb $1, %eax, %xmm0, %xmm0
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     3     1.0    1.0    11.0      addl	%eax, %eax
-# CHECK-NEXT: 1.     3     12.0   0.0    0.0       vpinsrb	$0, %eax, %xmm0, %xmm0
-# CHECK-NEXT: 2.     3     18.0   0.0    0.0       vpinsrb	$1, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 0.     3     1.0    1.0    3.3       addl	%eax, %eax
+# CHECK-NEXT: 1.     3     7.0    0.0    0.0       vpinsrb	$0, %eax, %xmm0, %xmm0
+# CHECK-NEXT: 2.     3     7.0    0.0    0.0       vpinsrb	$1, %eax, %xmm0, %xmm0
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2019-01-23 16:35:07 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2019-01-23 16:35:07 +0000
commit	d768d355158d067374a2ee02755b6b44d2b71053 (patch)
tree	ed3fc02f01455bd4b92300e140bc831bd274f055 /llvm/test
parent	21ed868390fee0faca2fb86d52ec41ec65101bd6 (diff)
download	bcm5719-llvm-d768d355158d067374a2ee02755b6b44d2b71053.tar.gz bcm5719-llvm-d768d355158d067374a2ee02755b6b44d2b71053.zip