summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2018-06-28 23:47:12 +0000
committerTom Stellard <tstellar@redhat.com>2018-06-28 23:47:12 +0000
commitc5a154db48c3cd9e16b5c74977d506415414daf7 (patch)
tree9f13c6c0c08d47bb47b4058de080226cfec8f739 /llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
parent3702f9128779ffdd0f3b2c9db88c21379aaf5171 (diff)
downloadbcm5719-llvm-c5a154db48c3cd9e16b5c74977d506415414daf7.tar.gz
bcm5719-llvm-c5a154db48c3cd9e16b5c74977d506415414daf7.zip
AMDGPU: Separate R600 and GCN TableGen files
Summary: We now have two sets of generated TableGen files, one for R600 and one for GCN, so each sub-target now has its own tables of instructions, registers, ISel patterns, etc. This should help reduce compile time since each sub-target now only has to consider information that is specific to itself. This will also help prevent the R600 sub-target from slowing down new features for GCN, like disassembler support, GlobalISel, etc. Reviewers: arsenm, nhaehnle, jvesely Reviewed By: arsenm Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D46365 llvm-svn: 335942
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h613
1 files changed, 375 insertions, 238 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 333b99f9ced..9c8b82c2834 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -39,22 +39,181 @@
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
+#define GET_SUBTARGETINFO_HEADER
+#include "R600GenSubtargetInfo.inc"
namespace llvm {
class StringRef;
-class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+class AMDGPUCommonSubtarget {
+private:
+ Triple TargetTriple;
+
+protected:
+ const FeatureBitset &SubtargetFeatureBits;
+ bool Has16BitInsts;
+ bool HasMadMixInsts;
+ bool FP32Denormals;
+ bool FPExceptions;
+ bool HasSDWA;
+ bool HasVOP3PInsts;
+ bool HasMulI24;
+ bool HasMulU24;
+ bool HasFminFmaxLegacy;
+ bool EnablePromoteAlloca;
+ int LocalMemorySize;
+ unsigned WavefrontSize;
+
+public:
+ AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+
+ static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
+ static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
+ const Function &F);
+
+ /// \returns Default range flat work group size for a calling convention.
+ std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
+
+ /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
+ /// for function \p F, or minimum/maximum flat work group sizes explicitly
+ /// requested using "amdgpu-flat-work-group-size" attribute attached to
+ /// function \p F.
+ ///
+ /// \returns Subtarget's default values if explicitly requested values cannot
+ /// be converted to integer, or violate subtarget's specifications.
+ std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
+
+ /// \returns Subtarget's default pair of minimum/maximum number of waves per
+ /// execution unit for function \p F, or minimum/maximum number of waves per
+ /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
+ /// attached to function \p F.
+ ///
+ /// \returns Subtarget's default values if explicitly requested values cannot
+ /// be converted to integer, violate subtarget's specifications, or are not
+ /// compatible with minimum/maximum number of waves limited by flat work group
+ /// size, register usage, and/or lds usage.
+ std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+
+ /// Return the amount of LDS that can be used that will not restrict the
+ /// occupancy lower than WaveCount.
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+ const Function &) const;
+
+ /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
+ /// the given LDS memory size is the only constraint.
+ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
+
+ unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
+
+ bool isAmdHsaOS() const {
+ return TargetTriple.getOS() == Triple::AMDHSA;
+ }
+
+ bool isAmdPalOS() const {
+ return TargetTriple.getOS() == Triple::AMDPAL;
+ }
+
+ bool has16BitInsts() const {
+ return Has16BitInsts;
+ }
+
+ bool hasMadMixInsts() const {
+ return HasMadMixInsts;
+ }
+
+ bool hasFP32Denormals() const {
+ return FP32Denormals;
+ }
+
+ bool hasFPExceptions() const {
+ return FPExceptions;
+ }
+
+ bool hasSDWA() const {
+ return HasSDWA;
+ }
+
+ bool hasVOP3PInsts() const {
+ return HasVOP3PInsts;
+ }
+
+ bool hasMulI24() const {
+ return HasMulI24;
+ }
+
+ bool hasMulU24() const {
+ return HasMulU24;
+ }
+
+ bool hasFminFmaxLegacy() const {
+ return HasFminFmaxLegacy;
+ }
+
+ bool isPromoteAllocaEnabled() const {
+ return EnablePromoteAlloca;
+ }
+
+ unsigned getWavefrontSize() const {
+ return WavefrontSize;
+ }
+
+ int getLocalMemorySize() const {
+ return LocalMemorySize;
+ }
+
+ unsigned getAlignmentForImplicitArgPtr() const {
+ return isAmdHsaOS() ? 8 : 4;
+ }
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
+ FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
+ FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
+ }
+
+ unsigned getMaxWavesPerEU() const { return 10; }
+
+ /// Creates value range metadata on an workitemid.* inrinsic call or load.
+ bool makeLIDRangeMetadata(Instruction *I) const;
+
+ virtual ~AMDGPUCommonSubtarget() {}
+};
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
+ public AMDGPUCommonSubtarget {
public:
enum Generation {
- R600 = 0,
- R700,
- EVERGREEN,
- NORTHERN_ISLANDS,
- SOUTHERN_ISLANDS,
- SEA_ISLANDS,
- VOLCANIC_ISLANDS,
- GFX9,
+ // Gap for R600 generations, so we can do comparisons between
+ // AMDGPUSubtarget and r600Subtarget.
+ SOUTHERN_ISLANDS = 4,
+ SEA_ISLANDS = 5,
+ VOLCANIC_ISLANDS = 6,
+ GFX9 = 7,
};
enum {
@@ -96,13 +255,20 @@ public:
LLVMTrapHandlerRegValue = 1
};
+private:
+ SIFrameLowering FrameLowering;
+
+ /// GlobalISel related APIs.
+ std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
protected:
// Basic subtarget description.
Triple TargetTriple;
- Generation Gen;
+ unsigned Gen;
unsigned IsaVersion;
- unsigned WavefrontSize;
- int LocalMemorySize;
int LDSBankCount;
unsigned MaxPrivateElementSize;
@@ -111,9 +277,7 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP32Denormals;
bool FP64FP16Denormals;
- bool FPExceptions;
bool DX10Clamp;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
@@ -129,7 +293,6 @@ protected:
// Used as options.
bool EnableHugePrivateBuffer;
bool EnableVGPRSpilling;
- bool EnablePromoteAlloca;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
@@ -146,17 +309,13 @@ protected:
bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
- bool Has16BitInsts;
bool HasIntClamp;
- bool HasVOP3PInsts;
- bool HasMadMixInsts;
bool HasFmaMixInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasScalarAtomics;
bool HasInv2PiInlineImm;
- bool HasSDWA;
bool HasSDWAOmod;
bool HasSDWAScalar;
bool HasSDWASdst;
@@ -181,7 +340,6 @@ protected:
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
- InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
AMDGPUAS AS;
@@ -193,13 +351,30 @@ public:
AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);
- const AMDGPUInstrInfo *getInstrInfo() const override = 0;
- const AMDGPUFrameLowering *getFrameLowering() const override = 0;
- const AMDGPUTargetLowering *getTargetLowering() const override = 0;
- const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
+ virtual const SIInstrInfo *getInstrInfo() const override = 0;
- const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ const SIFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+
+ virtual const SITargetLowering *getTargetLowering() const override = 0;
+
+ virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
}
// Nothing implemented, just prevent crashes on use.
@@ -209,34 +384,18 @@ public:
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool isAmdHsaOS() const {
- return TargetTriple.getOS() == Triple::AMDHSA;
- }
-
bool isMesa3DOS() const {
return TargetTriple.getOS() == Triple::Mesa3D;
}
- bool isAmdPalOS() const {
- return TargetTriple.getOS() == Triple::AMDPAL;
- }
-
Generation getGeneration() const {
- return Gen;
- }
-
- unsigned getWavefrontSize() const {
- return WavefrontSize;
+ return (Generation)Gen;
}
unsigned getWavefrontSizeLog2() const {
return Log2_32(WavefrontSize);
}
- int getLocalMemorySize() const {
- return LocalMemorySize;
- }
-
int getLDSBankCount() const {
return LDSBankCount;
}
@@ -249,18 +408,10 @@ public:
return AS;
}
- bool has16BitInsts() const {
- return Has16BitInsts;
- }
-
bool hasIntClamp() const {
return HasIntClamp;
}
- bool hasVOP3PInsts() const {
- return HasVOP3PInsts;
- }
-
bool hasFP64() const {
return FP64;
}
@@ -269,6 +420,10 @@ public:
return MIMG_R128;
}
+ bool hasHWFP64() const {
+ return FP64;
+ }
+
bool hasFastFMAF32() const {
return FastFMAF32;
}
@@ -278,15 +433,15 @@ public:
}
bool hasAddr64() const {
- return (getGeneration() < VOLCANIC_ISLANDS);
+ return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
}
bool hasBFE() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasBFI() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasBFM() const {
@@ -294,42 +449,23 @@ public:
}
bool hasBCNT(unsigned Size) const {
- if (Size == 32)
- return (getGeneration() >= EVERGREEN);
-
- if (Size == 64)
- return (getGeneration() >= SOUTHERN_ISLANDS);
-
- return false;
- }
-
- bool hasMulU24() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasMulI24() const {
- return (getGeneration() >= SOUTHERN_ISLANDS ||
- hasCaymanISA());
+ return true;
}
bool hasFFBL() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasFFBH() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasMed3_16() const {
- return getGeneration() >= GFX9;
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasMin3Max3_16() const {
- return getGeneration() >= GFX9;
- }
-
- bool hasMadMixInsts() const {
- return HasMadMixInsts;
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasFmaMixInsts() const {
@@ -337,15 +473,7 @@ public:
}
bool hasCARRY() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasBORROW() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasCaymanISA() const {
- return CaymanISA;
+ return true;
}
bool hasFMA() const {
@@ -360,10 +488,6 @@ public:
return EnableHugePrivateBuffer;
}
- bool isPromoteAllocaEnabled() const {
- return EnablePromoteAlloca;
- }
-
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
}
@@ -377,20 +501,10 @@ public:
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
- /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
- /// the given LDS memory size is the only constraint.
- unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
-
- unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
-
bool hasFP16Denormals() const {
return FP64FP16Denormals;
}
- bool hasFP32Denormals() const {
- return FP32Denormals;
- }
-
bool hasFP64Denormals() const {
return FP64FP16Denormals;
}
@@ -399,10 +513,6 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- bool hasFPExceptions() const {
- return FPExceptions;
- }
-
bool enableDX10Clamp() const {
return DX10Clamp;
}
@@ -444,7 +554,7 @@ public:
}
bool hasApertureRegs() const {
- return HasApertureRegs;
+ return HasApertureRegs;
}
bool isTrapHandlerEnabled() const {
@@ -510,14 +620,6 @@ public:
return getGeneration() >= SEA_ISLANDS;
}
- bool hasFminFmaxLegacy() const {
- return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
- }
-
- bool hasSDWA() const {
- return HasSDWA;
- }
-
bool hasSDWAOmod() const {
return HasSDWAOmod;
}
@@ -556,10 +658,6 @@ public:
return isAmdCodeObjectV2(F) ? 0 : 36;
}
- unsigned getAlignmentForImplicitArgPtr() const {
- return isAmdHsaOS() ? 8 : 4;
- }
-
/// \returns Number of bytes of arguments that are passed to a shader or
/// kernel in addition to the explicit ones declared for the function.
unsigned getImplicitArgNumBytes(const Function &F) const {
@@ -588,134 +686,39 @@ public:
return true;
}
- void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
- bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
- }
-
- /// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
- FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
FlatWorkGroupSize);
}
- /// \returns Minimum number of waves per execution unit supported by the
- /// subtarget.
- unsigned getMinWavesPerEU() const {
- return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
- }
-
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
- }
-
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
- FlatWorkGroupSize);
- }
-
- /// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
- }
-
- /// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerEU();
}
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
- FlatWorkGroupSize);
- }
-
- /// \returns Default range flat work group size for a calling convention.
- std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
-
- /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
- /// for function \p F, or minimum/maximum flat work group sizes explicitly
- /// requested using "amdgpu-flat-work-group-size" attribute attached to
- /// function \p F.
- ///
- /// \returns Subtarget's default values if explicitly requested values cannot
- /// be converted to integer, or violate subtarget's specifications.
- std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
-
- /// \returns Subtarget's default pair of minimum/maximum number of waves per
- /// execution unit for function \p F, or minimum/maximum number of waves per
- /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
- /// attached to function \p F.
- ///
- /// \returns Subtarget's default values if explicitly requested values cannot
- /// be converted to integer, violate subtarget's specifications, or are not
- /// compatible with minimum/maximum number of waves limited by flat work group
- /// size, register usage, and/or lds usage.
- std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
-
- /// Creates value range metadata on an workitemid.* inrinsic call or load.
- bool makeLIDRangeMetadata(Instruction *I) const;
-};
-
-class R600Subtarget final : public AMDGPUSubtarget {
-private:
- R600InstrInfo InstrInfo;
- R600FrameLowering FrameLowering;
- R600TargetLowering TLInfo;
-
-public:
- R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const TargetMachine &TM);
-
- const R600InstrInfo *getInstrInfo() const override {
- return &InstrInfo;
- }
-
- const R600FrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
-
- const R600TargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const R600RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
-
- bool hasCFAluBug() const {
- return CFALUBug;
- }
-
- bool hasVertexCache() const {
- return HasVertexCache;
- }
-
- short getTexVTXClauseSize() const {
- return TexVTXClauseSize;
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(
+ MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
}
};
@@ -766,6 +769,8 @@ public:
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
+ // static wrappers
+ static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
// XXX - Why is this here if it isn't in the default pass set?
bool enableEarlyIfConversion() const override {
@@ -775,7 +780,7 @@ public:
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
- bool isVGPRSpillingEnabled(const Function& F) const;
+ bool isVGPRSpillingEnabled(const Function &F) const;
unsigned getMaxNumUserSGPRs() const {
return 16;
@@ -860,16 +865,18 @@ public:
unsigned getKernArgSegmentSize(const Function &F,
unsigned ExplictArgBytes) const;
- /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
+ /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
+ /// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
- /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
+ /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
+ /// VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
- return getGeneration() >= GFX9;
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
}
/// \returns true if the machine has merged shaders in which s0-s7 are
@@ -880,35 +887,39 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
- Addressable);
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@@ -926,34 +937,39 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -971,6 +987,127 @@ public:
const override;
};
+
+class R600Subtarget final : public R600GenSubtargetInfo,
+ public AMDGPUCommonSubtarget {
+public:
+ enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
+
+private:
+ R600InstrInfo InstrInfo;
+ R600FrameLowering FrameLowering;
+ bool FMA;
+ bool CaymanISA;
+ bool CFALUBug;
+ bool DX10Clamp;
+ bool HasVertexCache;
+ bool R600ALUInst;
+ bool FP64;
+ short TexVTXClauseSize;
+ Generation Gen;
+ R600TargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+ SelectionDAGTargetInfo TSInfo;
+ AMDGPUAS AS;
+
+public:
+ R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
+ const R600FrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+
+ const R600TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+
+ const R600RegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ // Nothing implemented, just prevent crashes on use.
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ Generation getGeneration() const {
+ return Gen;
+ }
+
+ unsigned getStackAlignment() const {
+ return 4;
+ }
+
+ R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS);
+
+ bool hasBFE() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasBFI() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasBCNT(unsigned Size) const {
+ if (Size == 32)
+ return (getGeneration() >= EVERGREEN);
+
+ return false;
+ }
+
+ bool hasBORROW() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasCARRY() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasCaymanISA() const {
+ return CaymanISA;
+ }
+
+ bool hasFFBL() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasFFBH() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasFMA() const { return FMA; }
+
+ unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+ return 36;
+ }
+
+ bool hasCFAluBug() const { return CFALUBug; }
+
+ bool hasVertexCache() const { return HasVertexCache; }
+
+ short getTexVTXClauseSize() const { return TexVTXClauseSize; }
+
+ AMDGPUAS getAMDGPUAS() const { return AS; }
+
+ bool enableMachineScheduler() const override {
+ return true;
+ }
+
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
OpenPOWER on IntegriCloud