summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SISchedule.td
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-03-30 16:35:13 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-03-30 16:35:13 +0000
commit1d5e6d4bdcc316137e6e8fe75b709f5464190f59 (patch)
tree2750828aa7b2f29de80e49c35c366c162aea0714 /llvm/lib/Target/AMDGPU/SISchedule.td
parent0bc954e3bc474383f87ab9e55ab1aa5ae996f9c0 (diff)
downloadbcm5719-llvm-1d5e6d4bdcc316137e6e8fe75b709f5464190f59.tar.gz
bcm5719-llvm-1d5e6d4bdcc316137e6e8fe75b709f5464190f59.zip
AMDGPU/SI: Improve MachineSchedModel definition
This patch contains a few improvements to the model, including: - Using a single resource with a defined buffers size for each memory unit. - Setting the IssueWidth correctly. - Fixing latency values for memory instructions. shader-db stats: 16429 shaders in 3231 tests Totals: SGPRS: 318232 -> 312328 (-1.86 %) VGPRS: 208996 -> 209346 (0.17 %) Code Size: 7147044 -> 7166440 (0.27 %) bytes LDS: 83 -> 83 (0.00 %) blocks Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave Max Waves: 49182 -> 49243 (0.12 %) Wait states: 0 -> 0 (0.00 %)A Differential Revision: http://reviews.llvm.org/D18453 llvm-svn: 264877
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SISchedule.td')
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td46
1 files changed, 27 insertions, 19 deletions
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 40b37c4593b..4a46eb45c25 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -39,24 +39,32 @@ def Write64Bit : SchedWrite;
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
// instructions)
-def SIFullSpeedModel : SchedMachineModel {
- let CompleteModel = 0;
-}
-def SIQuarterSpeedModel : SchedMachineModel {
+class SISchedMachineModel : SchedMachineModel {
let CompleteModel = 0;
+ let IssueWidth = 1;
}
-// BufferSize = 0 means the processors are in-order.
-let BufferSize = 0 in {
+def SIFullSpeedModel : SISchedMachineModel;
+def SIQuarterSpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
-def HWBranch : ProcResource<1>;
-def HWExport : ProcResource<7>; // Taken from S_WAITCNT
-def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT
-def HWSALU : ProcResource<1>;
-def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT
-def HWVALU : ProcResource<1>;
-
+def HWBranch : ProcResource<1> {
+ let BufferSize = 1;
+}
+def HWExport : ProcResource<1> {
+ let BufferSize = 7; // Taken from S_WAITCNT
+}
+def HWLGKM : ProcResource<1> {
+ let BufferSize = 31; // Taken from S_WAITCNT
+}
+def HWSALU : ProcResource<1> {
+ let BufferSize = 1;
+}
+def HWVMEM : ProcResource<1> {
+ let BufferSize = 15; // Taken from S_WAITCNT
+}
+def HWVALU : ProcResource<1> {
+ let BufferSize = 1;
}
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
@@ -74,12 +82,12 @@ class HWVALUWriteRes<SchedWrite write, int latency> :
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
- def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ???
- def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ???
- def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64
- def : HWWriteRes<WriteSALU, [HWSALU], 1>;
- def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
- def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
+ def : HWWriteRes<WriteBranch, [HWBranch], 8>;
+ def : HWWriteRes<WriteExport, [HWExport], 4>;
+ def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64
+ def : HWWriteRes<WriteSALU, [HWSALU], 1>;
+ def : HWWriteRes<WriteSMEM, [HWLGKM], 5>;
+ def : HWWriteRes<WriteVMEM, [HWVMEM], 80>;
def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???
def : HWVALUWriteRes<Write32Bit, 1>;
OpenPOWER on IntegriCloud