diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2019-01-16 18:18:01 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2019-01-16 18:18:01 +0000 |
| commit | c5f0f5309e3d849a76d733ae35f58565d1c4eb65 (patch) | |
| tree | 108f6e17bc52adcb798daea01216a0cc87bdc955 /llvm/lib | |
| parent | 5a2bbe267ac0e6ecc6155727b3193394dc786146 (diff) | |
| download | bcm5719-llvm-c5f0f5309e3d849a76d733ae35f58565d1c4eb65.tar.gz bcm5719-llvm-c5f0f5309e3d849a76d733ae35f58565d1c4eb65.zip | |
[X86][BtVer2] Update latency of horizontal operations.
On Jaguar, horizontal adds/subs have local forwarding disable.
That means, we pay a compulsory extra cycle of write-back stage, and the value
is not available until the end of that stage.
This patch changes the latency of horizontal operations by adding an extra
cycle. With this patch, latency numbers now match what is reported by perf.
I plan to send another patch to also 'fix' the latency of shuffle operations (on
Jaguar, local forwarding is disabled for vector shuffles too).
Differential Revision: https://reviews.llvm.org/D56777
llvm-svn: 351366
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 33a6b01546d..adb69cc4408 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -174,6 +174,8 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW, } } +// Instructions that have local forwarding disabled have an extra +1cy latency. + // A folded store needs a cycle on the SAGU for the store data, // most RMW instructions don't need an extra uop. defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>; @@ -575,10 +577,10 @@ defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1, // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>; -defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>; -defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>; -defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 4>; // +1cy latency. +defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 4, [2,2], 2>; // +1cy latency. +defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 2>; // +1cy latency. +defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 2>; // +1cy latency. defm : X86WriteResPairUnsupported<WritePHAddY>; //////////////////////////////////////////////////////////////////////////////// |

