diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-08-31 16:05:48 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-08-31 16:05:48 +0000 |
| commit | a59ec4efa02c6b50ce3dd8248a2f93efedb1aeb3 (patch) | |
| tree | 7ac221f8659c71c6b95f144be78c7adb0bbed636 | |
| parent | 3785e84cf2cdafb7c7adb0aabe896795acaee6e4 (diff) | |
| download | bcm5719-llvm-a59ec4efa02c6b50ce3dd8248a2f93efedb1aeb3.tar.gz bcm5719-llvm-a59ec4efa02c6b50ce3dd8248a2f93efedb1aeb3.zip | |
[X86][BtVer2] Remove wrong ReadAdvance from AVX vbroadcast(ss|sd|f128) instructions.
The presence of a ReadAdvance for input operand #0 is problematic
because it changes the input latency of the register used as the base address
for the folded load.
A broadcast cannot start executing if the load address hasn't been computed yet.
In the llvm-mca example, the VBROADCASTSS is dependent on the address generated
by the LEAQ. That means, it cannot start until LEAQ reaches the write-back
stage. If we apply ReadAdvance, then we wrongly assume that the load can start 3
cycles in advance.
Differential Revision: https://reviews.llvm.org/D51534
llvm-svn: 341222
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 6 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s | 20 |
2 files changed, 13 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d57db9886c8..84780a4a20e 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -570,9 +570,9 @@ def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let ResourceCycles = [1, 2, 4]; let NumMicroOps = 2; } -def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, - VBROADCASTSSYrm, - VBROADCASTF128)>; +def : InstRW<[JWriteVBROADCASTYLd], (instrs VBROADCASTSDYrm, + VBROADCASTSSYrm, + VBROADCASTF128)>; def JWriteJVZEROALL: SchedWriteRes<[]> { let Latency = 90; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s index e79cd9b10e2..f9fd2c3732a 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s @@ -6,7 +6,7 @@ vbroadcastss (%rax), %ymm0 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 209 # CHECK-NEXT: Total uOps: 300 # CHECK: Dispatch Width: 2 @@ -52,15 +52,15 @@ vbroadcastss (%rax), %ymm0 # CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 1.00 - - - - - - vbroadcastss (%rax), %ymm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: [0,1] .DeeeeeeER. . vbroadcastss (%rax), %ymm0 -# CHECK-NEXT: [1,0] . DeeE---R. . leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: [1,1] . DeeeeeeER . vbroadcastss (%rax), %ymm0 -# CHECK-NEXT: [2,0] . DeeE---R . leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: [2,1] . DeeeeeeER vbroadcastss (%rax), %ymm0 +# CHECK: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [0,1] .D=eeeeeeER . vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: [1,0] . DeeE----R . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [1,1] . D=eeeeeeER . vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: [2,0] . DeeE----R . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [2,1] . D=eeeeeeER vbroadcastss (%rax), %ymm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -69,5 +69,5 @@ vbroadcastss (%rax), %ymm0 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 1.0 1.0 2.0 leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: 1. 3 1.0 1.0 0.0 vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: 0. 3 1.0 1.0 2.7 leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: 1. 3 2.0 0.0 0.0 vbroadcastss (%rax), %ymm0 |

