summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp178
1 files changed, 173 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 53417fc6d76..78b451f9847 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -322,12 +323,179 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
return 1;
}
-unsigned SISubtarget::getMaxNumSGPRs() const {
+unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const {
+ if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ switch (WavesPerEU) {
+ case 0: return 0;
+ case 10: return 0;
+ case 9: return 0;
+ case 8: return 81;
+ default: return 97;
+ }
+ } else {
+ switch (WavesPerEU) {
+ case 0: return 0;
+ case 10: return 0;
+ case 9: return 49;
+ case 8: return 57;
+ case 7: return 65;
+ case 6: return 73;
+ case 5: return 81;
+ default: return 97;
+ }
+ }
+}
+
+unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU,
+ bool Addressable) const {
+ if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ switch (WavesPerEU) {
+ case 0: return 80;
+ case 10: return 80;
+ case 9: return 80;
+ case 8: return 96;
+ default: return Addressable ? getAddressableNumSGPRs() : 112;
+ }
+ } else {
+ switch (WavesPerEU) {
+ case 0: return 48;
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return getAddressableNumSGPRs();
+ }
+ }
+}
+
+unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI.hasFlatScratchInit()) {
+ if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
+ if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
+ return 4; // FLAT_SCRATCH, VCC (in that order).
+ }
+
+ if (isXNACKEnabled())
+ return 4; // XNACK, VCC (in that order).
+ return 2; // VCC.
+}
+
+unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
+ const Function &F = *MF.getFunction();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ // Compute maximum number of SGPRs function can use using default/requested
+ // minimum number of waves per execution unit.
+ std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+ unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
+ unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
+
+ // Check if maximum number of SGPRs was explicitly requested using
+ // "amdgpu-num-sgpr" attribute.
+ if (F.hasFnAttribute("amdgpu-num-sgpr")) {
+ unsigned Requested = AMDGPU::getIntegerAttribute(
+ F, "amdgpu-num-sgpr", MaxNumSGPRs);
+
+ // Make sure requested value does not violate subtarget's specifications.
+ if (Requested && (Requested <= getReservedNumSGPRs(MF)))
+ Requested = 0;
+
+ // If more SGPRs are required to support the input user/system SGPRs,
+ // increase to accommodate them.
+ //
+ // FIXME: This really ends up using the requested number of SGPRs + number
+ // of reserved special registers in total. Theoretically you could re-use
+ // the last input registers for these special registers, but this would
+ // require a lot of complexity to deal with the weird aliasing.
+ unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
+ if (Requested && Requested < InputNumSGPRs)
+ Requested = InputNumSGPRs;
+
+ // Make sure requested value is compatible with values implied by
+ // default/requested minimum/maximum number of waves per execution unit.
+ if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
+ Requested = 0;
+ if (WavesPerEU.second &&
+ Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
+ Requested = 0;
+
+ if (Requested)
+ MaxNumSGPRs = Requested;
+ }
+
if (hasSGPRInitBug())
- return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+
+ return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
+ MaxAddressableNumSGPRs);
+}
+
+unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const {
+ switch (WavesPerEU) {
+ case 0: return 0;
+ case 10: return 0;
+ case 9: return 25;
+ case 8: return 29;
+ case 7: return 33;
+ case 6: return 37;
+ case 5: return 41;
+ case 4: return 49;
+ case 3: return 65;
+ case 2: return 85;
+ default: return 129;
+ }
+}
- if (getGeneration() >= VOLCANIC_ISLANDS)
- return 102;
+unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const {
+ switch (WavesPerEU) {
+ case 0: return 24;
+ case 10: return 24;
+ case 9: return 28;
+ case 8: return 32;
+ case 7: return 36;
+ case 6: return 40;
+ case 5: return 48;
+ case 4: return 64;
+ case 3: return 84;
+ case 2: return 128;
+ default: return getTotalNumVGPRs();
+ }
+}
+
+unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
+ const Function &F = *MF.getFunction();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ // Compute maximum number of VGPRs function can use using default/requested
+ // minimum number of waves per execution unit.
+ std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+ unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
+
+ // Check if maximum number of VGPRs was explicitly requested using
+ // "amdgpu-num-vgpr" attribute.
+ if (F.hasFnAttribute("amdgpu-num-vgpr")) {
+ unsigned Requested = AMDGPU::getIntegerAttribute(
+ F, "amdgpu-num-vgpr", MaxNumVGPRs);
+
+ // Make sure requested value does not violate subtarget's specifications.
+ if (Requested && Requested <= getReservedNumVGPRs(MF))
+ Requested = 0;
+
+ // Make sure requested value is compatible with values implied by
+ // default/requested minimum/maximum number of waves per execution unit.
+ if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
+ Requested = 0;
+ if (WavesPerEU.second &&
+ Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
+ Requested = 0;
+
+ if (Requested)
+ MaxNumVGPRs = Requested;
+ }
- return 104;
+ return MaxNumVGPRs - getReservedNumVGPRs(MF);
}
OpenPOWER on IntegriCloud