diff options
author | Ilya Biryukov <ibiryukov@google.com> | 2018-09-12 07:05:30 +0000 |
---|---|---|
committer | Ilya Biryukov <ibiryukov@google.com> | 2018-09-12 07:05:30 +0000 |
commit | 95066496d08011c70b1dd6208deb17511f3000d0 (patch) | |
tree | 71a56db87cad62cdac1878a77e4b76fee9031528 /llvm/lib | |
parent | 26a37998588f673ef1e3fc4a126f2a0963dad6bb (diff) | |
download | bcm5719-llvm-95066496d08011c70b1dd6208deb17511f3000d0.tar.gz bcm5719-llvm-95066496d08011c70b1dd6208deb17511f3000d0.zip |
Revert "AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination into TargetParser."
This reverts commit r341982.
The change introduced a layering violation. Reverting to unbreak
our integrate.
llvm-svn: 342023
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Support/TargetParser.cpp | 138 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 125 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 83 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 226 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 149 |
12 files changed, 401 insertions, 402 deletions
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index f73eb290752..1f580378c01 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -17,13 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/ELF.h" #include <cctype> using namespace llvm; using namespace ARM; using namespace AArch64; -using namespace AMDGPU; namespace { @@ -949,8 +947,6 @@ bool llvm::AArch64::isX18ReservedByDefault(const Triple &TT) { TT.isOSWindows(); } -namespace { - struct GPUInfo { StringLiteral Name; StringLiteral CanonicalName; @@ -958,9 +954,11 @@ struct GPUInfo { unsigned Features; }; -constexpr GPUInfo R600GPUs[26] = { - // Name Canonical Kind Features - // Name +using namespace AMDGPU; +static constexpr GPUInfo R600GPUs[26] = { + // Name Canonical Kind Features + // Name + // {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE }, {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE }, {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE }, @@ -991,9 +989,9 @@ constexpr GPUInfo R600GPUs[26] = { // This table should be sorted by the value of GPUKind // Don't bother listing the implicitly true features -constexpr GPUInfo AMDGCNGPUs[32] = { - // Name Canonical Kind Features - // Name +static constexpr GPUInfo AMDGCNGPUs[32] = { + // Name Canonical Kind Features + // Name {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, @@ -1028,7 +1026,8 @@ constexpr GPUInfo AMDGCNGPUs[32] = { {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, }; -const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) { +static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, + ArrayRef<GPUInfo> Table) { GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE }; auto I = std::lower_bound(Table.begin(), Table.end(), Search, @@ -1041,8 +1040,6 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) { return I; } -} // namespace - StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) return Entry->CanonicalName; @@ -1095,118 +1092,3 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) { for (const auto C : R600GPUs) Values.push_back(C.Name); } - -StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) { - AMDGPU::GPUKind AK; - - switch (ElfMach) { - case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; - case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; - case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; - case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; - case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; - case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; - case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; - case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; - case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; - case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; - case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; - case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; - case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; - case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; - case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; - case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; - case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; - } - - StringRef GPUName = getArchNameAMDGCN(AK); - if (GPUName != "") - return GPUName; - return getArchNameR600(AK); -} - -unsigned AMDGPU::getElfMach(StringRef GPU) { - AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); - if (AK == AMDGPU::GPUKind::GK_NONE) - AK = parseArchR600(GPU); - - switch (AK) { - case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; - case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; - case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; - case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; - case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; - case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; - case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; - case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; - case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; - case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; - case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; - case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; - case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; - case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; - case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; - case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; - case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; - case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; - case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; - case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; - case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; - case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; - case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; - case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; - case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; - case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; - case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; - case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; - case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; - case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; - case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; - case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; - } - - llvm_unreachable("unknown GPU"); -} - -AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { - if (GPU == "generic") - return {7, 0, 0}; - - AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); - if (AK == AMDGPU::GPUKind::GK_NONE) - return {0, 0, 0}; - - switch (AK) { - case GK_GFX600: return {6, 0, 0}; - case GK_GFX601: return {6, 0, 1}; - case GK_GFX700: return {7, 0, 0}; - case GK_GFX701: return {7, 0, 1}; - case GK_GFX702: return {7, 0, 2}; - case GK_GFX703: return {7, 0, 3}; - case GK_GFX704: return {7, 0, 4}; - case GK_GFX801: return {8, 0, 1}; - case GK_GFX802: return {8, 0, 2}; - case GK_GFX803: return {8, 0, 3}; - case GK_GFX810: return {8, 1, 0}; - case GK_GFX900: return {9, 0, 0}; - case GK_GFX902: return {9, 0, 2}; - case GK_GFX904: return {9, 0, 4}; - case GK_GFX906: return {9, 0, 6}; - default: return {0, 0, 0}; - } -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 80a29a88633..b7e8423c266 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -40,7 +40,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -135,9 +134,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2. - IsaVersion Version = getIsaVersion(getSTI()->getCPU()); + IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); getTargetStreamer()->EmitDirectiveHSACodeObjectISA( - Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU"); + ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { @@ -241,7 +240,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo), CurrentProgramInfo.NumVGPRsForWavesPerEU, CurrentProgramInfo.NumSGPRsForWavesPerEU - - IsaInfo::getNumExtraSGPRs(getSTI(), + IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(), CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed), CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, @@ -562,7 +561,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( const GCNSubtarget &ST) const { - return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST, + return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), UsesVCC, UsesFlatScratch); } @@ -759,7 +758,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( // 48 SGPRs - vcc, - flat_scr, -xnack int MaxSGPRGuess = - 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true, + 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true, ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); @@ -824,7 +823,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be // unified. unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs( - getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed); + STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed); // Check the addressable register limit before we add ExtraSGPRs. if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && @@ -906,9 +905,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( - getSTI(), ProgInfo.NumSGPRsForWavesPerEU); + STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU); ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( - getSTI(), ProgInfo.NumVGPRsForWavesPerEU); + STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" @@ -1138,7 +1137,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); - AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI()); + AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); Out.compute_pgm_resource_registers = CurrentProgramInfo.ComputePGMRSrc1 | diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 2a29b6f3e55..713dab3c311 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -124,8 +124,10 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, + const FeatureBitset &FeatureBits) : TargetTriple(TT), + SubtargetFeatureBits(FeatureBits), Has16BitInsts(false), HasMadMixInsts(false), FP32Denormals(false), @@ -142,9 +144,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : { } GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM) : + const GCNTargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), - AMDGPUSubtarget(TT), + AMDGPUSubtarget(TT, getFeatureBits()), TargetTriple(TT), Gen(SOUTHERN_ISLANDS), IsaVersion(ISAVersion0_0_0), @@ -446,7 +448,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, FS), - AMDGPUSubtarget(TT), + AMDGPUSubtarget(TT, getFeatureBits()), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), FMA(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index d7713ae0be5..a25be48b5d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -63,6 +63,7 @@ private: Triple TargetTriple; protected: + const FeatureBitset &SubtargetFeatureBits; bool Has16BitInsts; bool HasMadMixInsts; bool FP32Denormals; @@ -78,7 +79,7 @@ protected: unsigned WavefrontSize; public: - AMDGPUSubtarget(const Triple &TT); + AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); static const AMDGPUSubtarget &get(const MachineFunction &MF); static const AMDGPUSubtarget &get(const TargetMachine &TM, @@ -202,21 +203,33 @@ public: /// \returns Maximum number of work groups per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits, + FlatWorkGroupSize); + } /// \returns Minimum flat work group size supported by the subtarget. - virtual unsigned getMinFlatWorkGroupSize() const = 0; + unsigned getMinFlatWorkGroupSize() const { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits); + } /// \returns Maximum flat work group size supported by the subtarget. - virtual unsigned getMaxFlatWorkGroupSize() const = 0; + unsigned getMaxFlatWorkGroupSize() const { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits); + } /// \returns Maximum number of waves per execution unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0; + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { + return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits, + FlatWorkGroupSize); + } /// \returns Minimum number of waves per execution unit supported by the /// subtarget. - virtual unsigned getMinWavesPerEU() const = 0; + unsigned getMinWavesPerEU() const { + return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits); + } unsigned getMaxWavesPerEU() const { return 10; } @@ -695,19 +708,20 @@ public: /// \returns Number of execution units per compute unit supported by the /// subtarget. unsigned getEUsPerCU() const { - return AMDGPU::IsaInfo::getEUsPerCU(this); + return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits()); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerCU() const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(this); + return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits()); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); + return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(), + FlatWorkGroupSize); } /// \returns Maximum number of waves per execution unit supported by the @@ -719,7 +733,8 @@ public: /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize); + return AMDGPU::IsaInfo::getWavesPerWorkGroup( + MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); } // static wrappers @@ -838,34 +853,39 @@ public: /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { - return AMDGPU::IsaInfo::getSGPRAllocGranule(this); + return AMDGPU::IsaInfo::getSGPRAllocGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns SGPR encoding granularity supported by the subtarget. unsigned getSGPREncodingGranule() const { - return AMDGPU::IsaInfo::getSGPREncodingGranule(this); + return AMDGPU::IsaInfo::getSGPREncodingGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns Total number of SGPRs supported by the subtarget. unsigned getTotalNumSGPRs() const { - return AMDGPU::IsaInfo::getTotalNumSGPRs(this); + return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits()); } /// \returns Addressable number of SGPRs supported by the subtarget. unsigned getAddressableNumSGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); + return AMDGPU::IsaInfo::getAddressableNumSGPRs( + MCSubtargetInfo::getFeatureBits()); } /// \returns Minimum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumSGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); + return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU); } /// \returns Maximum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { - return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); + return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU, Addressable); } /// \returns Reserved number of SGPRs for given function \p MF. @@ -883,34 +903,39 @@ public: /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(this); + return AMDGPU::IsaInfo::getVGPRAllocGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns VGPR encoding granularity supported by the subtarget. unsigned getVGPREncodingGranule() const { - return AMDGPU::IsaInfo::getVGPREncodingGranule(this); + return AMDGPU::IsaInfo::getVGPREncodingGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns Total number of VGPRs supported by the subtarget. unsigned getTotalNumVGPRs() const { - return AMDGPU::IsaInfo::getTotalNumVGPRs(this); + return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits()); } /// \returns Addressable number of VGPRs supported by the subtarget. unsigned getAddressableNumVGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); + return AMDGPU::IsaInfo::getAddressableNumVGPRs( + MCSubtargetInfo::getFeatureBits()); } /// \returns Minimum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); + return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU); } /// \returns Maximum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); + return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU); } /// \returns Maximum number of VGPRs that meets number of waves per execution @@ -926,34 +951,6 @@ public: void getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const override; - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); - } - - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const override { - return AMDGPU::IsaInfo::getMinWavesPerEU(this); - } }; class R600Subtarget final : public R600GenSubtargetInfo, @@ -1064,34 +1061,6 @@ public: bool enableSubRegLiveness() const override { return true; } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); - } - - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const override { - return AMDGPU::IsaInfo::getMinWavesPerEU(this); - } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index bd941930872..fe10f7a0da9 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -49,7 +49,6 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -918,7 +917,8 @@ public: // Currently there is none suitable machinery in the core llvm-mc for this. // MCSymbol::isRedefinable is intended for another purpose, and // AsmParser::parseDirectiveSet() cannot be specialized for specific target. - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); MCContext &Ctx = getContext(); if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { MCSymbol *Sym = @@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { // Symbols are only defined for GCN targets - if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) + if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6) return true; auto SymbolName = getGprCountSymbolName(RegKind); @@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks( unsigned &SGPRBlocks) { // TODO(scott.linder): These calculations are duplicated from // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. - IsaVersion Version = getIsaVersion(getSTI().getCPU()); + IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features); unsigned NumVGPRs = NextFreeVGPR; unsigned NumSGPRs = NextFreeSGPR; - unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); + unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features); if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && NumSGPRs > MaxAddressableNumSGPRs) return OutOfRangeError(SGPRRange); NumSGPRs += - IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); + IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed); if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && NumSGPRs > MaxAddressableNumSGPRs) @@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks( if (Features.test(FeatureSGPRInitBug)) NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; - VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); - SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); + VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs); + SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs); return false; } @@ -2678,7 +2678,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { StringSet<> Seen; - IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); + IsaInfo::IsaVersion IVersion = + IsaInfo::getIsaVersion(getSTI().getFeatureBits()); SMRange VGPRRange; uint64_t NextFreeVGPR = 0; @@ -2937,7 +2938,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { // If this directive has no arguments, then use the ISA version for the // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); @@ -2999,7 +3001,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); + AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment @@ -3677,12 +3679,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { static bool encodeCnt( - const AMDGPU::IsaVersion ISA, + const AMDGPU::IsaInfo::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, - unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), - unsigned (*decode)(const IsaVersion &Version, unsigned)) + unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) { bool Failed = false; @@ -3713,7 +3715,8 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getParser().parseAbsoluteExpression(CntVal)) return true; - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); bool Failed = true; bool Sat = CntName.endswith("_sat"); @@ -3748,7 +3751,8 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = Parser.getTok().getLoc(); diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index fab0f87dfcb..76b19761ee1 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -1155,7 +1155,8 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits()); unsigned SImm16 = MI->getOperand(OpNo).getImm(); unsigned Vmcnt, Expcnt, Lgkmcnt; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 249f2bca7ae..6a41e3f650b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -27,7 +27,6 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/TargetParser.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -40,6 +39,84 @@ using namespace llvm::AMDGPU; // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// +static const struct { + const char *Name; + unsigned Mach; +} MachTable[] = { + // Radeon HD 2000/3000 Series (R600). + { "r600", ELF::EF_AMDGPU_MACH_R600_R600 }, + { "r630", ELF::EF_AMDGPU_MACH_R600_R630 }, + { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 }, + { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 }, + // Radeon HD 4000 Series (R700). + { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 }, + { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 }, + { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 }, + // Radeon HD 5000 Series (Evergreen). + { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR }, + { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS }, + { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER }, + { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD }, + { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO }, + // Radeon HD 6000 Series (Northern Islands). + { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS }, + { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS }, + { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN }, + { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS }, + // AMDGCN GFX6. + { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, + { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, + { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, + { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, + { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, + { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, + { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, + // AMDGCN GFX7. + { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, + { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, + { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, + { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, + { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 }, + { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, + { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, + { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, + { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, + { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, + // AMDGCN GFX8. + { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, + { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, + { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, + { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, + { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, + { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, + { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, + { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, + { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, + { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, + { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, + // AMDGCN GFX9. + { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 }, + { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 }, + { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 }, + { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 }, + // Not specified processor. + { nullptr, ELF::EF_AMDGPU_MACH_NONE } +}; + +unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const { + auto Entry = MachTable; + for (; Entry->Name && GPU != Entry->Name; ++Entry) + ; + return Entry->Mach; +} + +const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) { + auto Entry = MachTable; + for (; Entry->Name && Mach != Entry->Mach; ++Entry) + ; + return Entry->Name; +} + bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) @@ -128,7 +205,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor(); - IsaVersion IVersion = getIsaVersion(STI.getCPU()); + IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; @@ -265,7 +342,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( unsigned EFlags = MCA.getELFHeaderEFlags(); EFlags &= ~ELF::EF_AMDGPU_MACH; - EFlags |= getElfMach(STI.getCPU()); + EFlags |= getMACH(STI.getCPU()); EFlags &= ~ELF::EF_AMDGPU_XNACK; if (AMDGPU::hasXNACK(STI)) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index ad21e5eec21..472da1b7359 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -31,7 +31,13 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { protected: MCContext &getContext() const { return Streamer.getContext(); } + /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name. + unsigned getMACH(StringRef GPU) const; + public: + /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value. + static const char *getMachName(unsigned Mach); + AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 819b1b9fcd7..b6dab35ac9a 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -369,7 +369,7 @@ private: const SIRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; const MachineLoopInfo *MLI = nullptr; - AMDGPU::IsaVersion IV; + AMDGPU::IsaInfo::IsaVersion IV; DenseSet<MachineBasicBlock *> BlockVisitedSet; DenseSet<MachineInstr *> TrackedWaitcntSet; @@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); MLI = &getAnalysis<MachineLoopInfo>(); - IV = AMDGPU::getIsaVersion(ST->getCPU()); + IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits()); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); ForceEmitZeroWaitcnts = ForceEmitZeroFlag; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 6fa52f57601..645fbfc0e84 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -253,7 +253,7 @@ protected: /// Instruction info. const SIInstrInfo *TII = nullptr; - IsaVersion IV; + IsaInfo::IsaVersion IV; SICacheControl(const GCNSubtarget &ST); @@ -605,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo( SICacheControl::SICacheControl(const GCNSubtarget &ST) { TII = ST.getInstrInfo(); - IV = getIsaVersion(ST.getCPU()); + IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); } /* static */ diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b242345c52a..3f361edf4e8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -137,18 +137,68 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) { namespace IsaInfo { +IsaVersion getIsaVersion(const FeatureBitset &Features) { + // GCN GFX6 (Southern Islands (SI)). + if (Features.test(FeatureISAVersion6_0_0)) + return {6, 0, 0}; + if (Features.test(FeatureISAVersion6_0_1)) + return {6, 0, 1}; + + // GCN GFX7 (Sea Islands (CI)). + if (Features.test(FeatureISAVersion7_0_0)) + return {7, 0, 0}; + if (Features.test(FeatureISAVersion7_0_1)) + return {7, 0, 1}; + if (Features.test(FeatureISAVersion7_0_2)) + return {7, 0, 2}; + if (Features.test(FeatureISAVersion7_0_3)) + return {7, 0, 3}; + if (Features.test(FeatureISAVersion7_0_4)) + return {7, 0, 4}; + if (Features.test(FeatureSeaIslands)) + return {7, 0, 0}; + + // GCN GFX8 (Volcanic Islands (VI)). + if (Features.test(FeatureISAVersion8_0_1)) + return {8, 0, 1}; + if (Features.test(FeatureISAVersion8_0_2)) + return {8, 0, 2}; + if (Features.test(FeatureISAVersion8_0_3)) + return {8, 0, 3}; + if (Features.test(FeatureISAVersion8_1_0)) + return {8, 1, 0}; + if (Features.test(FeatureVolcanicIslands)) + return {8, 0, 0}; + + // GCN GFX9. + if (Features.test(FeatureISAVersion9_0_0)) + return {9, 0, 0}; + if (Features.test(FeatureISAVersion9_0_2)) + return {9, 0, 2}; + if (Features.test(FeatureISAVersion9_0_4)) + return {9, 0, 4}; + if (Features.test(FeatureISAVersion9_0_6)) + return {9, 0, 6}; + if (Features.test(FeatureGFX9)) + return {9, 0, 0}; + + if (Features.test(FeatureSouthernIslands)) + return {0, 0, 0}; + return {7, 0, 0}; +} + void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { auto TargetTriple = STI->getTargetTriple(); - auto Version = getIsaVersion(STI->getCPU()); + auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); Stream << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName() << '-' << TargetTriple.getOSName() << '-' << TargetTriple.getEnvironmentName() << '-' << "gfx" - << Version.Major - << Version.Minor - << Version.Stepping; + << ISAVersion.Major + << ISAVersion.Minor + << ISAVersion.Stepping; if (hasXNACK(*STI)) Stream << "+xnack"; @@ -160,49 +210,49 @@ bool hasCodeObjectV3(const MCSubtargetInfo *STI) { return STI->getFeatureBits().test(FeatureCodeObjectV3); } -unsigned getWavefrontSize(const MCSubtargetInfo *STI) { - if (STI->getFeatureBits().test(FeatureWavefrontSize16)) +unsigned getWavefrontSize(const FeatureBitset &Features) { + if (Features.test(FeatureWavefrontSize16)) return 16; - if (STI->getFeatureBits().test(FeatureWavefrontSize32)) + if (Features.test(FeatureWavefrontSize32)) return 32; return 64; } -unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { - if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) +unsigned getLocalMemorySize(const FeatureBitset &Features) { + if (Features.test(FeatureLocalMemorySize32768)) return 32768; - if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) + if (Features.test(FeatureLocalMemorySize65536)) return 65536; return 0; } -unsigned getEUsPerCU(const MCSubtargetInfo *STI) { +unsigned getEUsPerCU(const FeatureBitset &Features) { return 4; } -unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, +unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize) { - if (!STI->getFeatureBits().test(FeatureGCN)) + if (!Features.test(FeatureGCN)) return 8; - unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); + unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); if (N == 1) return 40; N = 40 / N; return std::min(N, 16u); } -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { - return getMaxWavesPerEU() * getEUsPerCU(STI); +unsigned getMaxWavesPerCU(const FeatureBitset &Features) { + return getMaxWavesPerEU() * getEUsPerCU(Features); } -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, +unsigned getMaxWavesPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize) { - return getWavesPerWorkGroup(STI, FlatWorkGroupSize); + return getWavesPerWorkGroup(Features, FlatWorkGroupSize); } -unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { +unsigned getMinWavesPerEU(const FeatureBitset &Features) { return 1; } @@ -211,89 +261,89 @@ unsigned getMaxWavesPerEU() { return 10; } -unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, +unsigned getMaxWavesPerEU(const FeatureBitset &Features, unsigned FlatWorkGroupSize) { - return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize), - getEUsPerCU(STI)) / getEUsPerCU(STI); + return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), + getEUsPerCU(Features)) / getEUsPerCU(Features); } -unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { +unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { return 1; } -unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { +unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { return 2048; } -unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, +unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize) { - return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) / - getWavefrontSize(STI); + return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / + getWavefrontSize(Features); } -unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { - IsaVersion Version = getIsaVersion(STI->getCPU()); +unsigned getSGPRAllocGranule(const FeatureBitset &Features) { + IsaVersion Version = getIsaVersion(Features); if (Version.Major >= 8) return 16; return 8; } -unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { +unsigned getSGPREncodingGranule(const FeatureBitset &Features) { return 8; } -unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { - IsaVersion Version = getIsaVersion(STI->getCPU()); +unsigned getTotalNumSGPRs(const FeatureBitset &Features) { + IsaVersion Version = getIsaVersion(Features); if (Version.Major >= 8) return 800; return 512; } -unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { - if (STI->getFeatureBits().test(FeatureSGPRInitBug)) +unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { + if (Features.test(FeatureSGPRInitBug)) return FIXED_NUM_SGPRS_FOR_INIT_BUG; - IsaVersion Version = getIsaVersion(STI->getCPU()); + IsaVersion Version = getIsaVersion(Features); if (Version.Major >= 8) return 102; return 104; } -unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { +unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; - unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); - if (STI->getFeatureBits().test(FeatureTrapHandler)) + unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1); + if (Features.test(FeatureTrapHandler)) MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; - return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); + MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1; + return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); } -unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, +unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable) { assert(WavesPerEU != 0); - IsaVersion Version = getIsaVersion(STI->getCPU()); - unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); + IsaVersion Version = getIsaVersion(Features); + unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); if (Version.Major >= 8 && !Addressable) AddressableNumSGPRs = 112; - unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; - if (STI->getFeatureBits().test(FeatureTrapHandler)) + unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU; + if (Features.test(FeatureTrapHandler)) MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); + MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features)); return std::min(MaxNumSGPRs, AddressableNumSGPRs); } -unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed) { unsigned ExtraSGPRs = 0; if (VCCUsed) ExtraSGPRs = 2; - IsaVersion Version = getIsaVersion(STI->getCPU()); + IsaVersion Version = getIsaVersion(Features); if (Version.Major < 8) { if (FlatScrUsed) ExtraSGPRs = 4; @@ -308,74 +358,74 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, return ExtraSGPRs; } -unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed) { - return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, - STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); + return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, + Features[AMDGPU::FeatureXNACK]); } -unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); +unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) { + NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(STI) - 1; + return NumSGPRs / getSGPREncodingGranule(Features) - 1; } -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) { +unsigned getVGPRAllocGranule(const FeatureBitset &Features) { return 4; } -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) { - return getVGPRAllocGranule(STI); +unsigned getVGPREncodingGranule(const FeatureBitset &Features) { + return getVGPRAllocGranule(Features); } -unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { +unsigned getTotalNumVGPRs(const FeatureBitset &Features) { return 256; } -unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { - return getTotalNumVGPRs(STI); +unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { + return getTotalNumVGPRs(Features); } -unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { +unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; unsigned MinNumVGPRs = - alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), - getVGPRAllocGranule(STI)) + 1; - return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); + alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), + getVGPRAllocGranule(Features)) + 1; + return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); } -unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { +unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { assert(WavesPerEU != 0); - unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, - getVGPRAllocGranule(STI)); - unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); + unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, + getVGPRAllocGranule(Features)); + unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI)); +unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) { + NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features)); // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(STI) - 1; + return NumVGPRs / getVGPREncodingGranule(Features) - 1; } } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, - const MCSubtargetInfo *STI) { - IsaVersion Version = getIsaVersion(STI->getCPU()); + const FeatureBitset &Features) { + IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); memset(&Header, 0, sizeof(Header)); Header.amd_kernel_code_version_major = 1; Header.amd_kernel_code_version_minor = 2; Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU - Header.amd_machine_version_major = Version.Major; - Header.amd_machine_version_minor = Version.Minor; - Header.amd_machine_version_stepping = Version.Stepping; + Header.amd_machine_version_major = ISA.Major; + Header.amd_machine_version_minor = ISA.Minor; + Header.amd_machine_version_stepping = ISA.Stepping; Header.kernel_code_entry_byte_offset = sizeof(Header); // wavefront_size is specified as a power of 2: 2^6 = 64 threads. Header.wavefront_size = 6; @@ -463,7 +513,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F, return Ints; } -unsigned getVmcntBitMask(const IsaVersion &Version) { +unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; if (Version.Major < 9) return VmcntLo; @@ -472,15 +522,15 @@ unsigned getVmcntBitMask(const IsaVersion &Version) { return VmcntLo | VmcntHi; } -unsigned getExpcntBitMask(const IsaVersion &Version) { +unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { return (1 << getExpcntBitWidth()) - 1; } -unsigned getLgkmcntBitMask(const IsaVersion &Version) { +unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { return (1 << getLgkmcntBitWidth()) - 1; } -unsigned getWaitcntBitMask(const IsaVersion &Version) { +unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); @@ -492,7 +542,7 @@ unsigned getWaitcntBitMask(const IsaVersion &Version) { return Waitcnt | VmcntHi; } -unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); if (Version.Major < 9) @@ -504,22 +554,22 @@ unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { return VmcntLo | VmcntHi; } -unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { Vmcnt = decodeVmcnt(Version, Waitcnt); Expcnt = decodeExpcnt(Version, Waitcnt); Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); } -unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, +unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt) { Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); @@ -530,17 +580,17 @@ unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); } -unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, +unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt) { return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, +unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned encodeWaitcnt(const IsaVersion &Version, +unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { unsigned Waitcnt = getWaitcntBitMask(Version); Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index da004a6a841..97cf29c67c5 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -19,7 +19,6 @@ #include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetParser.h" #include <cstdint> #include <string> #include <utility> @@ -57,6 +56,16 @@ enum { TRAP_NUM_SGPRS = 16 }; +/// Instruction set architecture version. +struct IsaVersion { + unsigned Major; + unsigned Minor; + unsigned Stepping; +}; + +/// \returns Isa version for given subtarget \p Features. +IsaVersion getIsaVersion(const FeatureBitset &Features); + /// Streams isa version string for given subtarget \p STI into \p Stream. void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); @@ -64,114 +73,114 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); /// false otherwise. bool hasCodeObjectV3(const MCSubtargetInfo *STI); -/// \returns Wavefront size for given subtarget \p STI. -unsigned getWavefrontSize(const MCSubtargetInfo *STI); +/// \returns Wavefront size for given subtarget \p Features. +unsigned getWavefrontSize(const FeatureBitset &Features); -/// \returns Local memory size in bytes for given subtarget \p STI. -unsigned getLocalMemorySize(const MCSubtargetInfo *STI); +/// \returns Local memory size in bytes for given subtarget \p Features. +unsigned getLocalMemorySize(const FeatureBitset &Features); /// \returns Number of execution units per compute unit for given subtarget \p -/// STI. -unsigned getEUsPerCU(const MCSubtargetInfo *STI); +/// Features. +unsigned getEUsPerCU(const FeatureBitset &Features); /// \returns Maximum number of work groups per compute unit for given subtarget -/// \p STI and limited by given \p FlatWorkGroupSize. -unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, +/// \p Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize); /// \returns Maximum number of waves per compute unit for given subtarget \p -/// STI without any kind of limitation. -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); +/// Features without any kind of limitation. +unsigned getMaxWavesPerCU(const FeatureBitset &Features); /// \returns Maximum number of waves per compute unit for given subtarget \p -/// STI and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, +/// Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize); /// \returns Minimum number of waves per execution unit for given subtarget \p -/// STI. -unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); +/// Features. +unsigned getMinWavesPerEU(const FeatureBitset &Features); /// \returns Maximum number of waves per execution unit for given subtarget \p -/// STI without any kind of limitation. +/// Features without any kind of limitation. unsigned getMaxWavesPerEU(); /// \returns Maximum number of waves per execution unit for given subtarget \p -/// STI and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, +/// Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerEU(const FeatureBitset &Features, unsigned FlatWorkGroupSize); -/// \returns Minimum flat work group size for given subtarget \p STI. -unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); +/// \returns Minimum flat work group size for given subtarget \p Features. +unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features); -/// \returns Maximum flat work group size for given subtarget \p STI. -unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); +/// \returns Maximum flat work group size for given subtarget \p Features. +unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features); -/// \returns Number of waves per work group for given subtarget \p STI and +/// \returns Number of waves per work group for given subtarget \p Features and /// limited by given \p FlatWorkGroupSize. -unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, +unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize); -/// \returns SGPR allocation granularity for given subtarget \p STI. -unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); +/// \returns SGPR allocation granularity for given subtarget \p Features. +unsigned getSGPRAllocGranule(const FeatureBitset &Features); -/// \returns SGPR encoding granularity for given subtarget \p STI. -unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); +/// \returns SGPR encoding granularity for given subtarget \p Features. +unsigned getSGPREncodingGranule(const FeatureBitset &Features); -/// \returns Total number of SGPRs for given subtarget \p STI. -unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); +/// \returns Total number of SGPRs for given subtarget \p Features. +unsigned getTotalNumSGPRs(const FeatureBitset &Features); -/// \returns Addressable number of SGPRs for given subtarget \p STI. -unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); +/// \returns Addressable number of SGPRs for given subtarget \p Features. +unsigned getAddressableNumSGPRs(const FeatureBitset &Features); /// \returns Minimum number of SGPRs that meets the given number of waves per -/// execution unit requirement for given subtarget \p STI. -unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p Features. +unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU); /// \returns Maximum number of SGPRs that meets the given number of waves per -/// execution unit requirement for given subtarget \p STI. -unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, +/// execution unit requirement for given subtarget \p Features. +unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable); /// \returns Number of extra SGPRs implicitly required by given subtarget \p -/// STI when the given special registers are used. -unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, +/// Features when the given special registers are used. +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed); /// \returns Number of extra SGPRs implicitly required by given subtarget \p -/// STI when the given special registers are used. XNACK is inferred from -/// \p STI. -unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, +/// Features when the given special registers are used. XNACK is inferred from +/// \p Features. +unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed); -/// \returns Number of SGPR blocks needed for given subtarget \p STI when +/// \returns Number of SGPR blocks needed for given subtarget \p Features when /// \p NumSGPRs are used. \p NumSGPRs should already include any special /// register counts. -unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); +unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); -/// \returns VGPR allocation granularity for given subtarget \p STI. -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI); +/// \returns VGPR allocation granularity for given subtarget \p Features. +unsigned getVGPRAllocGranule(const FeatureBitset &Features); -/// \returns VGPR encoding granularity for given subtarget \p STI. -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI); +/// \returns VGPR encoding granularity for given subtarget \p Features. +unsigned getVGPREncodingGranule(const FeatureBitset &Features); -/// \returns Total number of VGPRs for given subtarget \p STI. -unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); +/// \returns Total number of VGPRs for given subtarget \p Features. +unsigned getTotalNumVGPRs(const FeatureBitset &Features); -/// \returns Addressable number of VGPRs for given subtarget \p STI. -unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); +/// \returns Addressable number of VGPRs for given subtarget \p Features. +unsigned getAddressableNumVGPRs(const FeatureBitset &Features); /// \returns Minimum number of VGPRs that meets given number of waves per -/// execution unit requirement for given subtarget \p STI. -unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p Features. +unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); /// \returns Maximum number of VGPRs that meets given number of waves per -/// execution unit requirement for given subtarget \p STI. -unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p Features. +unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); -/// \returns Number of VGPR blocks needed for given subtarget \p STI when +/// \returns Number of VGPR blocks needed for given subtarget \p Features when /// \p NumVGPRs are used. -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); +unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); } // end namespace IsaInfo @@ -224,7 +233,7 @@ LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, - const MCSubtargetInfo *STI); + const FeatureBitset &Features); amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(); @@ -259,25 +268,25 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F, bool OnlyFirstRequired = false); /// \returns Vmcnt bit mask for given isa \p Version. -unsigned getVmcntBitMask(const IsaVersion &Version); +unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Expcnt bit mask for given isa \p Version. -unsigned getExpcntBitMask(const IsaVersion &Version); +unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Lgkmcnt bit mask for given isa \p Version. -unsigned getLgkmcntBitMask(const IsaVersion &Version); +unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Waitcnt bit mask for given isa \p Version. -unsigned getWaitcntBitMask(const IsaVersion &Version); +unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); +unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); +unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); +unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and @@ -288,19 +297,19 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) /// \p Expcnt = \p Waitcnt[6:4] /// \p Lgkmcnt = \p Waitcnt[11:8] -void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. -unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, +unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt); /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. -unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, +unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt); /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. -unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, +unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt); /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa @@ -315,7 +324,7 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given /// isa \p Version. -unsigned encodeWaitcnt(const IsaVersion &Version, +unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); unsigned getInitialPSInputAddr(const Function &F); |