diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2019-10-14 11:12:18 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2019-10-14 11:12:18 +0000 |
| commit | b744abb4f6a94926b1ed15b9c3bf56bce9aae28d (patch) | |
| tree | fcde1c0d051986afd527653708b3ddb7bdd35d71 /llvm/lib | |
| parent | 527a35e1555834c9890eefde8d9edd885f9ae57f (diff) | |
| download | bcm5719-llvm-b744abb4f6a94926b1ed15b9c3bf56bce9aae28d.tar.gz bcm5719-llvm-b744abb4f6a94926b1ed15b9c3bf56bce9aae28d.zip | |
[X86][BtVer2] Improved latency and throughput of float/vector loads and stores.
This patch introduces the following changes to the btver2 scheduling model:
- The number of micro opcodes for YMM loads and stores is now 2 (it was
incorrectly set to 1 for both aligned and misaligned loads/stores).
- Increased the number of AGU resource cycles for YMM loads and stores
to 2cy (instead of 1cy).
- Removed JFPU01 and JFPX from the list of resources consumed by pure
float/vector loads (no MMX).
I verified with llvm-exegesis that pure XMM/YMM loads are no-pipe. Those
are dispatched to the FPU but not really issues on JFPU01.
Differential Revision: https://reviews.llvm.org/D68871
llvm-svn: 374765
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 3addf048dba..d0421d94ee0 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -501,14 +501,14 @@ defm : X86WriteRes<WriteFLD0, [JFPU1, JSTC], 3, [1,1], 1>; defm : X86WriteRes<WriteFLD1, [JFPU1, JSTC], 3, [1,1], 1>; defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>; defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>; -defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>; -defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>; +defm : X86WriteRes<WriteFLoadX, [JLAGU], 5, [1], 1>; +defm : X86WriteRes<WriteFLoadY, [JLAGU], 5, [2], 2>; defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>; defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>; defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; -defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; +defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>; defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>; @@ -657,8 +657,8 @@ defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; //////////////////////////////////////////////////////////////////////////////// defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; -defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; -defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecLoadX, [JLAGU], 5, [1], 1>; +defm : X86WriteRes<WriteVecLoadY, [JLAGU], 5, [2], 2>; defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 2, 2], 1>; @@ -666,7 +666,7 @@ defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 4, 4], defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; -defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>; defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>; defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>; |

