diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2016-03-30 16:35:13 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2016-03-30 16:35:13 +0000 |
| commit | 1d5e6d4bdcc316137e6e8fe75b709f5464190f59 (patch) | |
| tree | 2750828aa7b2f29de80e49c35c366c162aea0714 /llvm/lib/Target/AMDGPU/SISchedule.td | |
| parent | 0bc954e3bc474383f87ab9e55ab1aa5ae996f9c0 (diff) | |
| download | bcm5719-llvm-1d5e6d4bdcc316137e6e8fe75b709f5464190f59.tar.gz bcm5719-llvm-1d5e6d4bdcc316137e6e8fe75b709f5464190f59.zip | |
AMDGPU/SI: Improve MachineSchedModel definition
This patch contains a few improvements to the model, including:
- Using a single resource with a defined buffers size for each memory unit.
- Setting the IssueWidth correctly.
- Fixing latency values for memory instructions.
shader-db stats:
16429 shaders in 3231 tests
Totals:
SGPRS: 318232 -> 312328 (-1.86 %)
VGPRS: 208996 -> 209346 (0.17 %)
Code Size: 7147044 -> 7166440 (0.27 %) bytes
LDS: 83 -> 83 (0.00 %) blocks
Scratch: 1862656 -> 1459200 (-21.66 %) bytes per wave
Max Waves: 49182 -> 49243 (0.12 %)
Wait states: 0 -> 0 (0.00 %)A
Differential Revision: http://reviews.llvm.org/D18453
llvm-svn: 264877
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SISchedule.td')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SISchedule.td | 46 |
1 files changed, 27 insertions, 19 deletions
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 40b37c4593b..4a46eb45c25 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -39,24 +39,32 @@ def Write64Bit : SchedWrite; // instructions and have VALU rates, but write to the SALU (i.e. VOPC // instructions) -def SIFullSpeedModel : SchedMachineModel { - let CompleteModel = 0; -} -def SIQuarterSpeedModel : SchedMachineModel { +class SISchedMachineModel : SchedMachineModel { let CompleteModel = 0; + let IssueWidth = 1; } -// BufferSize = 0 means the processors are in-order. -let BufferSize = 0 in { +def SIFullSpeedModel : SISchedMachineModel; +def SIQuarterSpeedModel : SISchedMachineModel; // XXX: Are the resource counts correct? -def HWBranch : ProcResource<1>; -def HWExport : ProcResource<7>; // Taken from S_WAITCNT -def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT -def HWSALU : ProcResource<1>; -def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT -def HWVALU : ProcResource<1>; - +def HWBranch : ProcResource<1> { + let BufferSize = 1; +} +def HWExport : ProcResource<1> { + let BufferSize = 7; // Taken from S_WAITCNT +} +def HWLGKM : ProcResource<1> { + let BufferSize = 31; // Taken from S_WAITCNT +} +def HWSALU : ProcResource<1> { + let BufferSize = 1; +} +def HWVMEM : ProcResource<1> { + let BufferSize = 15; // Taken from S_WAITCNT +} +def HWVALU : ProcResource<1> { + let BufferSize = 1; } class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, @@ -74,12 +82,12 @@ class HWVALUWriteRes<SchedWrite write, int latency> : // The latency values are 1 / (operations / cycle) / 4. multiclass SICommonWriteRes { - def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ??? - def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ??? - def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64 - def : HWWriteRes<WriteSALU, [HWSALU], 1>; - def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ??? - def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600 + def : HWWriteRes<WriteBranch, [HWBranch], 8>; + def : HWWriteRes<WriteExport, [HWExport], 4>; + def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 + def : HWWriteRes<WriteSALU, [HWSALU], 1>; + def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; + def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? def : HWVALUWriteRes<Write32Bit, 1>; |

