summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2019-10-14 11:12:18 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2019-10-14 11:12:18 +0000
commitb744abb4f6a94926b1ed15b9c3bf56bce9aae28d (patch)
treefcde1c0d051986afd527653708b3ddb7bdd35d71 /llvm/lib
parent527a35e1555834c9890eefde8d9edd885f9ae57f (diff)
downloadbcm5719-llvm-b744abb4f6a94926b1ed15b9c3bf56bce9aae28d.tar.gz
bcm5719-llvm-b744abb4f6a94926b1ed15b9c3bf56bce9aae28d.zip
[X86][BtVer2] Improved latency and throughput of float/vector loads and stores.
This patch introduces the following changes to the btver2 scheduling model: - The number of micro opcodes for YMM loads and stores is now 2 (it was incorrectly set to 1 for both aligned and misaligned loads/stores). - Increased the number of AGU resource cycles for YMM loads and stores to 2cy (instead of 1cy). - Removed JFPU01 and JFPX from the list of resources consumed by pure float/vector loads (no MMX). I verified with llvm-exegesis that pure XMM/YMM loads are no-pipe. Those are dispatched to the FPU but not really issues on JFPU01. Differential Revision: https://reviews.llvm.org/D68871 llvm-svn: 374765
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td12
1 files changed, 6 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 3addf048dba..d0421d94ee0 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -501,14 +501,14 @@ defm : X86WriteRes<WriteFLD0, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLD1, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFLoadX, [JLAGU], 5, [1], 1>;
+defm : X86WriteRes<WriteFLoadY, [JLAGU], 5, [2], 2>;
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
@@ -657,8 +657,8 @@ defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadX, [JLAGU], 5, [1], 1>;
+defm : X86WriteRes<WriteVecLoadY, [JLAGU], 5, [2], 2>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 2, 2], 1>;
@@ -666,7 +666,7 @@ defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 4, 4],
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
OpenPOWER on IntegriCloud