diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2015-01-29 16:55:25 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2015-01-29 16:55:25 +0000 |
commit | 83f0bcef7a3e96d022f8d31fd87c8363fd4f9a00 (patch) | |
tree | f29b52cf36d73c5cdc161b00203816b987f329a5 /llvm/lib | |
parent | e75aa4983c8bb64f63c8742d2e3dc32c6966b74b (diff) | |
download | bcm5719-llvm-83f0bcef7a3e96d022f8d31fd87c8363fd4f9a00.tar.gz bcm5719-llvm-83f0bcef7a3e96d022f8d31fd87c8363fd4f9a00.zip |
R600/SI: Define a schedule model and enable the generic machine scheduler
The schedule model is not complete yet, and could be improved.
llvm-svn: 227461
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUSubtarget.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUSubtarget.h | 14 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIRegisterInfo.cpp | 55 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIRegisterInfo.h | 12 |
4 files changed, 94 insertions, 6 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index 39cc383c89e..541dbab709d 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -20,6 +20,7 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineScheduler.h" using namespace llvm; @@ -111,3 +112,21 @@ bool AMDGPUSubtarget::isVGPRSpillingEnabled( const SIMachineFunctionInfo *MFI) const { return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling; } + +void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (getGeneration() >= SOUTHERN_ISLANDS) { + + // Track register pressure so the scheduler can try to decrease + // pressure once register usage is above the threshold defined by + // SIRegisterInfo::getRegPressureSetLimit() + Policy.ShouldTrackPressure = true; + + // Enabling both top down and bottom up scheduling seems to give us less + // register spills than just using one of these approaches on its own. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + } +} diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index d639f7c1922..389cc8caf27 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -204,9 +204,13 @@ public: unsigned getAmdKernelCodeChipID() const; bool enableMachineScheduler() const override { - return getGeneration() <= NORTHERN_ISLANDS; + return true; } + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const override; + // Helper functions to simplify if statements bool isTargetELF() const { return false; @@ -226,6 +230,14 @@ public: return TargetTriple.getOS() == Triple::AMDHSA; } bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + + unsigned getMaxWavesPerCU() const { + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 10; + + // FIXME: Not sure what this is for other subtagets. + llvm_unreachable("do not know max waves per CU for this subtarget."); + } }; } // End namespace llvm diff --git a/llvm/lib/Target/R600/SIRegisterInfo.cpp b/llvm/lib/Target/R600/SIRegisterInfo.cpp index 380c98b48d7..122e30c6628 100644 --- a/llvm/lib/Target/R600/SIRegisterInfo.cpp +++ b/llvm/lib/Target/R600/SIRegisterInfo.cpp @@ -51,9 +51,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - return RC->getNumRegs(); +unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + + // FIXME: We should adjust the max number of waves based on LDS size. + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); + unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); + + for (regclass_iterator I = regclass_begin(), E = regclass_end(); + I != E; ++I) { + + unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); + unsigned Limit; + + if (isSGPRClass(*I)) { + Limit = SGPRLimit / NumSubRegs; + } else { + Limit = VGPRLimit / NumSubRegs; + } + + const int *Sets = getRegClassPressureSets(*I); + assert(Sets); + for (unsigned i = 0; Sets[i] != -1; ++i) { + if (Sets[i] == (int)Idx) + return Limit; + } + } + return 256; } bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { @@ -455,3 +478,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, return AMDGPU::NoRegister; } +unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return 256; + } +} + +unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } +} diff --git a/llvm/lib/Target/R600/SIRegisterInfo.h b/llvm/lib/Target/R600/SIRegisterInfo.h index 8aa02c30a43..d908ffd12d2 100644 --- a/llvm/lib/Target/R600/SIRegisterInfo.h +++ b/llvm/lib/Target/R600/SIRegisterInfo.h @@ -17,6 +17,7 @@ #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "llvm/Support/Debug.h" namespace llvm { @@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const override; + unsigned getRegPressureSetLimit(unsigned Idx) const override; bool requiresRegisterScavenging(const MachineFunction &Fn) const override; @@ -105,6 +105,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const; + /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumVGPRsAllowed(unsigned WaveCount) const; + + /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumSGPRsAllowed(unsigned WaveCount) const; + unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; |