summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMarek Olsak <marek.olsak@amd.com>2016-12-09 19:49:54 +0000
committerMarek Olsak <marek.olsak@amd.com>2016-12-09 19:49:54 +0000
commit0f55fbae6c7f40938b27571a20752a376ae49eb1 (patch)
tree528ef08804bc9a7243957b77561bef09bacbb01b /llvm/lib/Target
parent693e9be9181eb8b3cc91e254a2acb49accafbd9c (diff)
downloadbcm5719-llvm-0f55fbae6c7f40938b27571a20752a376ae49eb1.tar.gz
bcm5719-llvm-0f55fbae6c7f40938b27571a20752a376ae49eb1.zip
AMDGPU/SI: Don't reserve XNACK when it's disabled
Summary: This frees 2 additional scalar registers. These are results from all of my 3 patches combined: Polaris: Spilled SGPRs: 2231 -> 1517 (-32.00 %) Tonga: Spilled SGPRs: 3829 -> 2608 (-31.89 %) Spilled VGPRs: 100 -> 84 (-16.00 %) Tonga even spills SGPRs via VGPRs to scratch. That's a compute shader limited to 64 VGPRs. Reviewers: tstellarAMD Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D27151 llvm-svn: 289262
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td7
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp2
2 files changed, 8 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index fe67f59feb6..f057a3a8bd9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -73,6 +73,13 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"Support unaligned scratch loads and stores"
>;
+// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
+// XNACK. The current default kernel driver setting is:
+// - graphics ring: XNACK disabled
+// - compute ring: XNACK enabled
+//
+// If XNACK is enabled, the VMEM latency can be worse.
+// If XNACK is disabled, the 2 SGPRs can be used for general purposes.
def FeatureXNACK : SubtargetFeature<"xnack",
"EnableXNACK",
"true",
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0fdd203b3d0..41633a2b6a0 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1188,7 +1188,7 @@ unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
return 4; // FLAT_SCRATCH, VCC (in that order)
}
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (ST.isXNACKEnabled())
return 4; // XNACK, VCC (in that order)
return 2; // VCC.
OpenPOWER on IntegriCloud