diff options
Diffstat (limited to 'llvm/lib')
39 files changed, 446 insertions, 290 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 99d71f50374..4e2f0af5a20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -23,6 +23,7 @@ class Pass; class Target; class TargetMachine; class PassRegistry; +class Module; // R600 Passes FunctionPass *createR600VectorRegMerger(TargetMachine &tm); @@ -150,43 +151,53 @@ enum TargetIndex { /// however on the GPU, each address space points to /// a separate piece of memory that is unique from other /// memory locations. -namespace AMDGPUAS { -enum AddressSpaces : unsigned { - PRIVATE_ADDRESS = 0, ///< Address space for private memory. - GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2) - LOCAL_ADDRESS = 3, ///< Address space for local memory. - FLAT_ADDRESS = 4, ///< Address space for flat memory. - REGION_ADDRESS = 5, ///< Address space for region memory. - PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) +struct AMDGPUAS { + // The following address space values depend on the triple environment. + unsigned PRIVATE_ADDRESS; ///< Address space for private memory. + unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2) + unsigned FLAT_ADDRESS; ///< Address space for flat memory. + unsigned REGION_ADDRESS; ///< Address space for region memory. + + // The maximum value for flat, generic, local, private, constant and region. + const static unsigned MAX_COMMON_ADDRESS = 5; + + const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0). + const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory. + const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0) + const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1) // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this // order to be able to dynamically index a constant buffer, for example: // // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx - CONSTANT_BUFFER_0 = 8, - CONSTANT_BUFFER_1 = 9, - CONSTANT_BUFFER_2 = 10, - CONSTANT_BUFFER_3 = 11, - CONSTANT_BUFFER_4 = 12, - CONSTANT_BUFFER_5 = 13, - CONSTANT_BUFFER_6 = 14, - CONSTANT_BUFFER_7 = 15, - CONSTANT_BUFFER_8 = 16, - CONSTANT_BUFFER_9 = 17, - CONSTANT_BUFFER_10 = 18, - CONSTANT_BUFFER_11 = 19, - CONSTANT_BUFFER_12 = 20, - CONSTANT_BUFFER_13 = 21, - CONSTANT_BUFFER_14 = 22, - CONSTANT_BUFFER_15 = 23, + const static unsigned CONSTANT_BUFFER_0 = 8; + const static unsigned CONSTANT_BUFFER_1 = 9; + const static unsigned CONSTANT_BUFFER_2 = 10; + const static unsigned CONSTANT_BUFFER_3 = 11; + const static unsigned CONSTANT_BUFFER_4 = 12; + const static unsigned CONSTANT_BUFFER_5 = 13; + const static unsigned CONSTANT_BUFFER_6 = 14; + const static unsigned CONSTANT_BUFFER_7 = 15; + const static unsigned CONSTANT_BUFFER_8 = 16; + const static unsigned CONSTANT_BUFFER_9 = 17; + const static unsigned CONSTANT_BUFFER_10 = 18; + const static unsigned CONSTANT_BUFFER_11 = 19; + const static unsigned CONSTANT_BUFFER_12 = 20; + const static unsigned CONSTANT_BUFFER_13 = 21; + const static unsigned CONSTANT_BUFFER_14 = 22; + const static unsigned CONSTANT_BUFFER_15 = 23; // Some places use this if the address space can't be determined. - UNKNOWN_ADDRESS_SPACE = ~0u + const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u; }; -} // namespace AMDGPUAS +namespace llvm { +namespace AMDGPU { +AMDGPUAS getAMDGPUAS(const Module &M); +AMDGPUAS getAMDGPUAS(const TargetMachine &TM); +AMDGPUAS getAMDGPUAS(Triple T); +} // namespace AMDGPU +} // namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 127b2639794..aa5ebae2d9f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -37,26 +37,60 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } +// Must match the table in getAliasResult. +AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_) : AS(AS_) { + // These arrarys are indexed by address space value + // enum elements 0 ... to 5 + static const AliasResult ASAliasRulesPrivIsZero[6][6] = { + /* Private Global Constant Group Flat Region*/ + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, + /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias}, + /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias}, + /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias}, + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias} + }; + static const AliasResult ASAliasRulesGenIsZero[6][6] = { + /* Flat Global Region Group Constant Private */ + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias}, + /* Region */ {NoAlias , NoAlias , MayAlias, NoAlias, NoAlias , MayAlias}, + /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, + /* Constant */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias} + }; + assert(AS.MAX_COMMON_ADDRESS <= 5); + if (AS.FLAT_ADDRESS == 0) { + assert(AS.GLOBAL_ADDRESS == 1 && + AS.REGION_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.CONSTANT_ADDRESS == 4 && + AS.PRIVATE_ADDRESS == 5); + ASAliasRules = &ASAliasRulesGenIsZero; + } else { + assert(AS.PRIVATE_ADDRESS == 0 && + AS.GLOBAL_ADDRESS == 1 && + AS.CONSTANT_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.FLAT_ADDRESS == 4 && + AS.REGION_ADDRESS == 5); + ASAliasRules = &ASAliasRulesPrivIsZero; + } +} + +AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1, + unsigned AS2) const { + if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) + report_fatal_error("Pointer address space out of range"); + return (*ASAliasRules)[AS1][AS2]; +} + AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { - // This array is indexed by the AMDGPUAS::AddressSpaces - // enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS - // see "llvm/Transforms/AMDSPIRUtils.h" - static const AliasResult ASAliasRules[5][5] = { - /* Private Global Constant Group Flat */ - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias}, - /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, - /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias}, - /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias} - }; unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace(); unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace(); - if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS || - asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS) - report_fatal_error("Pointer address space out of range"); - AliasResult Result = ASAliasRules[asA][asB]; + AliasResult Result = ASAliasRules.getAliasResult(asA, asB); if (Result == NoAlias) return Result; if (isa<Argument>(LocA.Ptr) && isa<Argument>(LocB.Ptr)) { @@ -75,8 +109,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { const Value *Base = GetUnderlyingObject(Loc.Ptr, DL); - if (Base->getType()->getPointerAddressSpace() == - AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) { + if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) { return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index 943b4a68b25..f73aa47cb93 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -13,6 +13,7 @@ #ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H #define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -25,11 +26,14 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> { friend AAResultBase<AMDGPUAAResult>; const DataLayout &DL; + AMDGPUAS AS; public: - explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} + explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(), + DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL){} + : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS), + ASAliasRules(Arg.ASAliasRules){} /// Handle invalidation events from the new pass manager. /// @@ -42,6 +46,15 @@ public: private: bool Aliases(const MDNode *A, const MDNode *B) const; bool PathAliases(const MDNode *A, const MDNode *B) const; + + class ASAliasRulesTy { + public: + ASAliasRulesTy(AMDGPUAS AS_); + AliasResult getAliasResult(unsigned AS1, unsigned AS2) const; + private: + AMDGPUAS AS; + const AliasResult (*ASAliasRules)[6][6]; + } ASAliasRules; }; /// Analysis pass providing a never-invalidated alias analysis result. @@ -53,7 +66,8 @@ public: typedef AMDGPUAAResult Result; AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) { - return AMDGPUAAResult(F.getParent()->getDataLayout()); + return AMDGPUAAResult(F.getParent()->getDataLayout(), + Triple(F.getParent()->getTargetTriple())); } }; @@ -72,7 +86,8 @@ public: const AMDGPUAAResult &getResult() const { return *Result; } bool doInitialization(Module &M) override { - Result.reset(new AMDGPUAAResult(M.getDataLayout())); + Result.reset(new AMDGPUAAResult(M.getDataLayout(), + Triple(M.getTargetTriple()))); return false; } bool doFinalization(Module &M) override { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 09d3ff716e6..3d8db7cd8af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -28,7 +28,8 @@ namespace { class AMDGPUAnnotateKernelFeatures : public ModulePass { private: const TargetMachine *TM; - static bool hasAddrSpaceCast(const Function &F); + AMDGPUAS AS; + static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); void addAttrToCallers(Function *Intrin, StringRef AttrName); bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); @@ -48,10 +49,11 @@ public: ModulePass::getAnalysisUsage(AU); } - static bool visitConstantExpr(const ConstantExpr *CE); + static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet<const Constant *, 8> &ConstantExprVisited); + SmallPtrSet<const Constant *, 8> &ConstantExprVisited, + AMDGPUAS AS); }; } @@ -65,18 +67,20 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, // The queue ptr is only needed when casting to flat, not from it. -static bool castRequiresQueuePtr(unsigned SrcAS) { - return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; +static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { + return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; } -static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { - return castRequiresQueuePtr(ASC->getSrcAddressSpace()); +static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, + const AMDGPUAS &AS) { + return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); } -bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { +bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, + AMDGPUAS AS) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - return castRequiresQueuePtr(SrcAS); + return castRequiresQueuePtr(SrcAS, AS); } return false; @@ -84,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { + SmallPtrSet<const Constant *, 8> &ConstantExprVisited, + AMDGPUAS AS) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -97,7 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( // Check this constant expression. if (const auto *CE = dyn_cast<ConstantExpr>(C)) { - if (visitConstantExpr(CE)) + if (visitConstantExpr(CE, AS)) return true; } @@ -118,13 +123,14 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( } // Return true if an addrspacecast is used that requires the queue ptr. -bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { +bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, + AMDGPUAS AS) { SmallPtrSet<const Constant *, 8> ConstantExprVisited; for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { - if (castRequiresQueuePtr(ASC)) + if (castRequiresQueuePtr(ASC, AS)) return true; } @@ -133,7 +139,7 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) return true; } } @@ -173,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { Triple TT(M.getTargetTriple()); + AS = AMDGPU::getAMDGPUAS(M); static const StringRef IntrinsicToAttr[][2] = { // .x omitted @@ -216,7 +223,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { bool HasApertureRegs = TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs(); - if (!HasApertureRegs && hasAddrSpaceCast(F)) + if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) F.addFnAttr("amdgpu-queue-ptr"); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index c011be6fa16..91b3649f5c3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -37,6 +37,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, LoopInfo *LI; DenseMap<Value*, GetElementPtrInst*> noClobberClones; bool isKernelFunc; + AMDGPUAS AMDGPUASI; public: static char ID; @@ -130,8 +131,8 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); if (!DA->isUniform(Ptr)) return; - auto isGlobalLoad = [](LoadInst &Load)->bool { - return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + auto isGlobalLoad = [&](LoadInst &Load)->bool { + return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }; // We're tracking up to the Function boundaries // We cannot go beyond because of FunctionPass restrictions @@ -166,6 +167,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { } bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { + AMDGPUASI = AMDGPU::getAMDGPUAS(M); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index accfd698108..4f2a0ca2cd0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,6 +17,7 @@ // #include "AMDGPUAsmPrinter.h" +#include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" @@ -92,7 +93,9 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() { AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer)) {} + : AsmPrinter(TM, std::move(Streamer)) { + AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS(); + } StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; @@ -174,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Group segment variables aren't emitted in HSA. - if (AMDGPU::isGroupSegment(GV)) + if (AMDGPU::isGroupSegment(GV, AMDGPUASI)) return; AsmPrinter::EmitGlobalVariable(GV); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index be476e3f14d..13425c8b2a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #include "AMDKernelCodeT.h" +#include "AMDGPU.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include <cstddef> @@ -150,6 +151,7 @@ public: protected: std::vector<std::string> DisasmLines, HexLines; size_t DisasmLineMaxLen; + AMDGPUAS AMDGPUASI; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index ce70d150e52..e67ae092fdd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -31,7 +31,7 @@ using namespace llvm; #endif AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) - : CallLowering(&TLI) { + : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) { } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -49,7 +49,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); LLT PtrType = getLLTForType(*PtrTy, DL); unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); unsigned KernArgSegmentPtr = @@ -70,7 +70,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); unsigned Align = DL.getABITypeAlignment(ParamTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index b5f3fa5617b..09bdf8ffcde 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" namespace llvm { @@ -22,6 +23,7 @@ namespace llvm { class AMDGPUTargetLowering; class AMDGPUCallLowering: public CallLowering { + AMDGPUAS AMDGPUASI; unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, unsigned Offset) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index fddf94339a1..ca695c1d53c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; public: explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} + : SelectionDAGISel(TM, OptLevel){ + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); + } ~AMDGPUDAGToDAGISel() override = default; bool runOnMachineFunction(MachineFunction &MF) override; @@ -269,7 +272,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) return N; const SITargetLowering& Lowering = @@ -586,9 +589,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { if (!N->readMem()) return false; if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; - return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; } bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { @@ -1536,7 +1539,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MemSDNode *Mem = cast<MemSDNode>(N); unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUAS::FLAT_ADDRESS) { + if (AS == AMDGPUASI.FLAT_ADDRESS) { SelectCode(N); return; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index b57cc00a71f..af3c9ff28df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -59,6 +59,7 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::LOAD, MVT::f32, Promote); @@ -967,19 +968,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = G->getGlobal(); - switch (G->getAddressSpace()) { - case AMDGPUAS::LOCAL_ADDRESS: { + if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); // TODO: We could emit code to handle the initialization somewhere. - if (hasDefinedInitializer(GV)) - break; - - unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); - } + if (!hasDefinedInitializer(GV)) { + unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); + return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); + } } const Function &Fn = *DAG.getMachineFunction().getFunction(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index a41200ceb21..73860383fd4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H +#include "AMDGPU.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -34,6 +35,7 @@ private: protected: const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; @@ -224,6 +226,10 @@ public: /// type of implicit parameter. uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const; + + AMDGPUAS getAMDGPUAS() const { + return AMDGPUASI; + } }; namespace AMDGPUISD { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index a3abb96fb94..a01f5d37c7c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -30,7 +30,7 @@ using namespace llvm; void AMDGPUInstrInfo::anchor() {} AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) - : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} + : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {} // FIXME: This behaves strangely. If, for example, you have 32 load + stores, // the first 16 loads will be interleaved with the stores, and the next 16 will diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index bd8e389639f..a122fd612ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H +#include "AMDGPU.h" #include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -35,6 +36,8 @@ private: const AMDGPUSubtarget &ST; virtual void anchor(); +protected: + AMDGPUAS AMDGPUASI; public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index d133851c852..8867ed689a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -33,7 +33,7 @@ using namespace llvm; AMDGPUInstructionSelector::AMDGPUInstructionSelector( const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} + TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {} MachineOperand AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, @@ -291,7 +291,7 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, if (!I.hasOneMemOperand()) return false; - if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS) return false; if (!isInstrUniform(I)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 783f1408b3d..c87102e55df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -35,7 +36,6 @@ public: const AMDGPURegisterBankInfo &RBI); bool select(MachineInstr &I) const override; - private: struct GEPInfo { const MachineInstr &GEP; @@ -59,6 +59,8 @@ private: const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; +protected: + AMDGPUAS AMDGPUASI; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c4ac3180453..b8d681298de 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -204,7 +204,7 @@ def COND_NULL : PatLeaf < //===----------------------------------------------------------------------===// class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS; }]>; class PrivateLoad <SDPatternOperator op> : PrivateMemOp < @@ -222,7 +222,7 @@ def truncstorei16_private : PrivateStore <truncstorei16>; def store_private : PrivateStore <store>; class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; // Global address space loads @@ -242,7 +242,7 @@ def global_store_atomic : GlobalStore<atomic_store>; class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; }]>; // Constant address space loads @@ -253,7 +253,7 @@ class ConstantLoad <SDPatternOperator op> : ConstantMemOp < def constant_load : ConstantLoad<load>; class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; // Local address space loads @@ -266,7 +266,7 @@ class LocalStore <SDPatternOperator op> : LocalMemOp < >; class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ - return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS; + return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS; }]>; class FlatLoad <SDPatternOperator op> : FlatMemOp < @@ -348,7 +348,7 @@ def local_store_aligned8bytes : Aligned8Bytes < class local_binary_atomic_op<SDNode atomic_op> : PatFrag<(ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; @@ -366,7 +366,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { @@ -376,7 +376,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast<AtomicSDNode>(N); return AN->getMemoryVT() == MVT::i32 && - AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def _64_local : PatFrag< @@ -384,7 +384,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast<AtomicSDNode>(N); return AN->getMemoryVT() == MVT::i64 && - AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; } @@ -394,17 +394,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> { def "" : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def _noret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def _ret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; } defm atomic_swap_global : global_binary_atomic_op<atomic_swap>; @@ -422,22 +422,22 @@ defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>; def AMDGPUatomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$value), (AMDGPUatomic_cmp_swap node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def atomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def atomic_cmp_swap_global_noret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def atomic_cmp_swap_global_ret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; //===----------------------------------------------------------------------===// // Misc Pattern Fragments diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index e46b7ff554b..96bc53d06cd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -72,6 +72,7 @@ private: Module *Mod = nullptr; const DataLayout *DL = nullptr; MDNode *MaxWorkGroupSizeRange = nullptr; + AMDGPUAS AS; // FIXME: This should be per-kernel. uint32_t LocalMemLimit = 0; @@ -154,6 +155,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); if (!ST.isPromoteAllocaEnabled()) return false; + AS = AMDGPU::getAMDGPUAS(*F.getParent()); FunctionType *FTy = F.getFunctionType(); @@ -162,7 +164,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // we cannot use local memory in the pass. for (Type *ParamTy : FTy->params()) { PointerType *PtrTy = dyn_cast<PointerType>(ParamTy); - if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { LocalMemLimit = 0; DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); @@ -179,7 +181,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // Check how much local memory is being used by global objects CurrentLocalMemUsage = 0; for (GlobalVariable &GV : Mod->globals()) { - if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) continue; for (const User *U : GV.users()) { @@ -317,7 +319,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { Type *I32Ty = Type::getInt32Ty(Mod->getContext()); Value *CastDispatchPtr = Builder.CreateBitCast( - DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS)); + DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS)); // We could do a single 64-bit load here, but it's likely that the basic // 32-bit and extract sequence is already present, and it is probably easier @@ -413,7 +415,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { } } -static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { +static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); @@ -468,7 +470,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(0); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -480,7 +482,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { break; } case Instruction::Store: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(1); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -673,7 +675,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I)) { + if (tryPromoteAllocaToVector(&I, AS)) { DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); return; } @@ -734,7 +736,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, - AMDGPUAS::LOCAL_ADDRESS); + AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); @@ -767,7 +769,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); @@ -784,7 +786,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { continue; Type *EltTy = V->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. @@ -852,7 +854,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, - { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } + { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index a4bb8b9a5e5..92825684d90 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -135,6 +135,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FeatureDisable(false), InstrItins(getInstrItineraryForCPU(GPU)) { + AS = AMDGPU::getAMDGPUAS(TT); initializeSubtargetDependencies(TT, GPU, FS); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 39289d0f149..c61a2ff818f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -157,6 +157,7 @@ protected: InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; + AMDGPUAS AS; public: AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, @@ -214,6 +215,10 @@ public: return MaxPrivateElementSize; } + AMDGPUAS getAMDGPUAS() const { + return AS; + } + bool has16BitInsts() const { return Has16BitInsts; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 68e78d50ef0..e8954c59479 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -240,6 +240,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), TLOF(createTLOF(getTargetTriple())) { + AS = AMDGPU::getAMDGPUAS(TT); initAsmInfo(); } @@ -809,3 +810,4 @@ void GCNPassConfig::addPreEmitPass() { TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { return new GCNPassConfig(this, PM); } + diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index e36e940532c..934bf7f31ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -35,6 +35,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { protected: std::unique_ptr<TargetLoweringObjectFile> TLOF; AMDGPUIntrinsicInfo IntrinsicInfo; + AMDGPUAS AS; StringRef getGPUName(const Function &F) const; StringRef getFeatureString(const Function &F) const; @@ -57,17 +58,16 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + AMDGPUAS getAMDGPUAS() const { + return AS; + } void adjustPassManager(PassManagerBuilder &) override; /// Get the integer value of a null pointer in the given address space. uint64_t getNullPointerValue(unsigned AddrSpace) const { - switch(AddrSpace) { - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS) return -1; - default: - return 0; - } + return 0; } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index 1fddc88a705..c96761c0b04 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" #include "llvm/MC/MCContext.h" @@ -22,7 +23,8 @@ using namespace llvm; MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) && + auto AS = static_cast<const AMDGPUTargetMachine*>(&TM)->getAMDGPUAS(); + if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) && AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple())) return TextSection; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h index de327786dff..ca6210f6929 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H +#include "AMDGPU.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 4a6d12bd883..c5b7086dd48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -48,7 +48,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, const DataLayout &DL = BB->getModule()->getDataLayout(); for (const Instruction &I : *BB) { const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I); - if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + if (!GEP || GEP->getAddressSpace() != ST->getAMDGPUAS().PRIVATE_ADDRESS) continue; const Value *Ptr = GEP->getPointerOperand(); @@ -108,25 +108,24 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) { } unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { - switch (AddrSpace) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::CONSTANT_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + AMDGPUAS AS = ST->getAMDGPUAS(); + if (AddrSpace == AS.GLOBAL_ADDRESS || + AddrSpace == AS.CONSTANT_ADDRESS || + AddrSpace == AS.FLAT_ADDRESS) return 128; - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || + AddrSpace == AS.REGION_ADDRESS) return 64; - case AMDGPUAS::PRIVATE_ADDRESS: + if (AddrSpace == AS.PRIVATE_ADDRESS) return 8 * ST->getMaxPrivateElementSize(); - default: - if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && - (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || - AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || - (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && - AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) - return 128; - llvm_unreachable("unhandled address space"); - } + + if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && + (AddrSpace == AS.PARAM_D_ADDRESS || + AddrSpace == AS.PARAM_I_ADDRESS || + (AddrSpace >= AS.CONSTANT_BUFFER_0 && + AddrSpace <= AS.CONSTANT_BUFFER_15))) + return 128; + llvm_unreachable("unhandled address space"); } bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, @@ -135,7 +134,7 @@ bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, // We allow vectorization of flat stores, even though we may need to decompose // them later if they may access private memory. We don't have enough context // here, and legalization can handle it. - if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { + if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) { return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) && ChainSizeInBytes <= ST->getMaxPrivateElementSize(); } @@ -362,7 +361,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. if (const LoadInst *Load = dyn_cast<LoadInst>(V)) - return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS; // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index c64c4bf5f6a..71d6306bc1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -110,7 +110,7 @@ public: if (IsGraphicsShader) return -1; return ST->hasFlatAddressSpace() ? - AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE; } unsigned getVectorSplitCost() { return 0; } diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 45a7fe6d343..29f5eef67ec 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -21,8 +21,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset" class MubufLoad <SDPatternOperator op> : PatFrag < (ops node:$ptr), (op node:$ptr), [{ auto const AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; def mubuf_load : MubufLoad <load>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 849fb8ad50f..b0ac0e689a0 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -136,7 +136,7 @@ multiclass FLAT_Atomic_Pseudo< class flat_binary_atomic_op<SDNode atomic_op> : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}] >; def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>; @@ -284,16 +284,16 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ auto const AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr), [{ auto const AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS; }]>; def atomic_flat_load : flat_ld <atomic_load>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 7b4f25106e7..59571a48a96 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -264,20 +264,18 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( unsigned AddressSpace) const { - switch (AddressSpace) { - case AMDGPUAS::PRIVATE_ADDRESS: + if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) return AddressSpaceQualifier::Private; - case AMDGPUAS::GLOBAL_ADDRESS: + if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS) return AddressSpaceQualifier::Global; - case AMDGPUAS::CONSTANT_ADDRESS: + if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS) return AddressSpaceQualifier::Constant; - case AMDGPUAS::LOCAL_ADDRESS: + if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS) return AddressSpaceQualifier::Local; - case AMDGPUAS::FLAT_ADDRESS: + if (AddressSpace == AMDGPUASI.FLAT_ADDRESS) return AddressSpaceQualifier::Generic; - case AMDGPUAS::REGION_ADDRESS: + if (AddressSpace == AMDGPUASI.REGION_ADDRESS) return AddressSpaceQualifier::Region; - } llvm_unreachable("Unknown address space qualifier"); } @@ -304,7 +302,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, "image3d_t", ValueKind::Image) .Default(isa<PointerType>(Ty) ? (Ty->getPointerAddressSpace() == - AMDGPUAS::LOCAL_ADDRESS ? + AMDGPUASI.LOCAL_ADDRESS ? ValueKind::DynamicSharedPointer : ValueKind::GlobalBuffer) : ValueKind::ByValue); @@ -460,7 +458,7 @@ void MetadataStreamer::emitKernelArgs(const Function &Func) { return; auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), - AMDGPUAS::GLOBAL_ADDRESS); + AMDGPUASI.GLOBAL_ADDRESS); emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); } @@ -513,7 +511,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, if (auto PtrTy = dyn_cast<PointerType>(Ty)) { auto ElTy = PtrTy->getElementType(); - if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized()) + if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized()) Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy); } @@ -576,6 +574,7 @@ void MetadataStreamer::emitKernelDebugProps( } void MetadataStreamer::begin(const Module &Mod) { + AMDGPUASI = getAMDGPUAS(Mod); emitVersion(); emitPrintf(Mod); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h index 12d4c5e5dd5..8d4c51763f6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H +#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" @@ -36,6 +37,7 @@ namespace CodeObject { class MetadataStreamer final { private: Metadata CodeObjectMetadata; + AMDGPUAS AMDGPUASI; void dump(StringRef YamlString) const; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 812555060b2..a8db5cc13b3 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -557,7 +557,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } case Intrinsic::r600_implicitarg_ptr: { - MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); + MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS); uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); return DAG.getConstant(ByteOffset, DL, PtrVT); } @@ -707,12 +707,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); const DataLayout &DL = DAG.getDataLayout(); const GlobalValue *GV = GSD->getGlobal(); - MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); @@ -869,7 +869,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, unsigned DwordOffset) const { unsigned ByteOffset = DwordOffset * 4; PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_BUFFER_0); + AMDGPUASI.CONSTANT_BUFFER_0); // We shouldn't be using an offset wider than 16-bits for implicit parameters. assert(isInt<16>(ByteOffset)); @@ -1107,7 +1107,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, //TODO: Who creates the i8 stores? assert(Store->isTruncatingStore() || Store->getValue().getValueType() == MVT::i8); - assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); + assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS); SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { @@ -1205,9 +1205,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // Neither LOCAL nor PRIVATE can do vectors at the moment - if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && + if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { - if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + if ((AS == AMDGPUASI.PRIVATE_ADDRESS) && + StoreNode->isTruncatingStore()) { // Add an extra level of chain to isolate this vector SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); // TODO: can the chain be replaced without creating a new store? @@ -1230,7 +1231,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, DAG.getConstant(2, DL, PtrVT)); - if (AS == AMDGPUAS::GLOBAL_ADDRESS) { + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // It is beneficial to create MSKOR here instead of combiner to avoid // artificial dependencies introduced by RMW if (StoreNode->isTruncatingStore()) { @@ -1283,7 +1284,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes - if (AS != AMDGPUAS::PRIVATE_ADDRESS) + if (AS != AMDGPUASI.PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) @@ -1302,39 +1303,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // return (512 + (kc_bank << 12) static int -ConstantAddressBlock(unsigned AddressSpace) { +ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) { switch (AddressSpace) { - case AMDGPUAS::CONSTANT_BUFFER_0: + case AMDGPUASI.CONSTANT_BUFFER_0: return 512; - case AMDGPUAS::CONSTANT_BUFFER_1: + case AMDGPUASI.CONSTANT_BUFFER_1: return 512 + 4096; - case AMDGPUAS::CONSTANT_BUFFER_2: + case AMDGPUASI.CONSTANT_BUFFER_2: return 512 + 4096 * 2; - case AMDGPUAS::CONSTANT_BUFFER_3: + case AMDGPUASI.CONSTANT_BUFFER_3: return 512 + 4096 * 3; - case AMDGPUAS::CONSTANT_BUFFER_4: + case AMDGPUASI.CONSTANT_BUFFER_4: return 512 + 4096 * 4; - case AMDGPUAS::CONSTANT_BUFFER_5: + case AMDGPUASI.CONSTANT_BUFFER_5: return 512 + 4096 * 5; - case AMDGPUAS::CONSTANT_BUFFER_6: + case AMDGPUASI.CONSTANT_BUFFER_6: return 512 + 4096 * 6; - case AMDGPUAS::CONSTANT_BUFFER_7: + case AMDGPUASI.CONSTANT_BUFFER_7: return 512 + 4096 * 7; - case AMDGPUAS::CONSTANT_BUFFER_8: + case AMDGPUASI.CONSTANT_BUFFER_8: return 512 + 4096 * 8; - case AMDGPUAS::CONSTANT_BUFFER_9: + case AMDGPUASI.CONSTANT_BUFFER_9: return 512 + 4096 * 9; - case AMDGPUAS::CONSTANT_BUFFER_10: + case AMDGPUASI.CONSTANT_BUFFER_10: return 512 + 4096 * 10; - case AMDGPUAS::CONSTANT_BUFFER_11: + case AMDGPUASI.CONSTANT_BUFFER_11: return 512 + 4096 * 11; - case AMDGPUAS::CONSTANT_BUFFER_12: + case AMDGPUASI.CONSTANT_BUFFER_12: return 512 + 4096 * 12; - case AMDGPUAS::CONSTANT_BUFFER_13: + case AMDGPUASI.CONSTANT_BUFFER_13: return 512 + 4096 * 13; - case AMDGPUAS::CONSTANT_BUFFER_14: + case AMDGPUASI.CONSTANT_BUFFER_14: return 512 + 4096 * 14; - case AMDGPUAS::CONSTANT_BUFFER_15: + case AMDGPUASI.CONSTANT_BUFFER_15: return 512 + 4096 * 15; default: return -1; @@ -1402,7 +1403,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { EVT MemVT = LoadNode->getMemoryVT(); ISD::LoadExtType ExtType = LoadNode->getExtensionType(); - if (AS == AMDGPUAS::PRIVATE_ADDRESS && + if (AS == AMDGPUASI.PRIVATE_ADDRESS && ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { return lowerPrivateExtLoad(Op, DAG); } @@ -1412,13 +1413,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && + if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS || + LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { return scalarizeVectorLoad(LoadNode, DAG); } - int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(), + AMDGPUASI); if (ConstantBlock > -1 && ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { @@ -1450,7 +1452,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, DL, MVT::i32)), DAG.getConstant(LoadNode->getAddressSpace() - - AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32) + AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32) ); } @@ -1486,7 +1488,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(MergedValues, DL); } - if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) { return SDValue(); } @@ -1563,7 +1565,7 @@ SDValue R600TargetLowering::LowerFormalArguments( } PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_BUFFER_0); + AMDGPUASI.CONSTANT_BUFFER_0); // i64 isn't a legal type, so the register type used ends up as i32, which // isn't expected here. It attempts to create this sextload, but it ends up diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index a5d1a0afb9f..bac557ba989 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -316,7 +316,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern> class LoadParamFrag <PatFrag load_type> : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isConstantLoad(cast<LoadSDNode>(N), 0) || - (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }] + (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }] >; def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>; @@ -326,8 +326,8 @@ def vtx_id3_load : LoadParamFrag<load>; class LoadVtxId1 <PatFrag load> : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast<MemSDNode>(N); - return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && !isa<GlobalValue>(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout()))); }]>; @@ -339,7 +339,7 @@ def vtx_id1_load : LoadVtxId1 <load>; class LoadVtxId2 <PatFrag load> : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast<MemSDNode>(N); - return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && isa<GlobalValue>(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout())); }]>; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 68afcca12b8..abe6af9a6d3 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -202,6 +202,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + auto AMDGPUASI = ST.getAMDGPUAS(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -340,7 +341,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, PointerType *PtrTy = PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), - AMDGPUAS::CONSTANT_ADDRESS); + AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); auto MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 56734345bdd..783369c8200 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -597,8 +597,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, if (AM.BaseGV) return false; - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Assume the we will use FLAT for all global memory accesses // on VI. @@ -613,8 +612,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::CONSTANT_ADDRESS: + } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? @@ -652,11 +650,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return false; - case AMDGPUAS::PRIVATE_ADDRESS: + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // Basic, single offset DS instructions allow a 16-bit unsigned immediate // field. // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have @@ -671,17 +668,15 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; return false; - - case AMDGPUAS::FLAT_ADDRESS: - case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: + } else if (AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) { // For an unknown address space, this usually means that this is for some // reason being used for pure arithmetic, and not based on some addressing // computation. We don't have instructions that compute pointers with any // addressing modes, so treat them as having no offset like flat // instructions. return isLegalFlatAddressingMode(AM); - - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -702,8 +697,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return false; } - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || - AddrSpace == AMDGPUAS::REGION_ADDRESS) { + if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS || + AddrSpace == AMDGPUASI.REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // aligned, 8 byte access in a single operation using ds_read2/write2_b32 // with adjacent offsets. @@ -718,8 +713,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // will access scratch. If we had access to the IR function, then we // could determine if any private memory was used in the function. if (!Subtarget->hasUnalignedScratchAccess() && - (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || - AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { + (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS || + AddrSpace == AMDGPUASI.FLAT_ADDRESS)) { return false; } @@ -727,7 +722,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // If we have an uniform constant load, it still requires using a slow // buffer instruction if unaligned. if (IsFast) { - *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ? + *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ? (Align % 4 == 0) : true; } @@ -767,15 +762,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, return MVT::Other; } -static bool isFlatGlobalAddrSpace(unsigned AS) { - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; +static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) { + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; } bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); + return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) && + isFlatGlobalAddrSpace(DestAS, AMDGPUASI); } bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { @@ -789,7 +785,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { // Flat -> private/local is a simple truncate. // Flat -> global is no-op - if (SrcAS == AMDGPUAS::FLAT_ADDRESS) + if (SrcAS == AMDGPUASI.FLAT_ADDRESS) return true; return isNoopAddrSpaceCast(SrcAS, DestAS); @@ -850,7 +846,7 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG, unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg), PtrVT); return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, @@ -863,7 +859,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const ISD::InputArg *Arg) const { const DataLayout &DL = DAG.getDataLayout(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned Align = DL.getABITypeAlignment(Ty); @@ -1073,7 +1069,7 @@ SDValue SITargetLowering::LowerFormalArguments( auto *ParamTy = dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex())); if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && - ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + ParamTy && ParamTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { // On SI local pointers are just offsets into LDS, so they are always // less than 16-bits. On CI and newer they could potentially be // real pointers, so we can't guarantee their size. @@ -2206,13 +2202,13 @@ void SITargetLowering::createDebuggerPrologueStackObjects( bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && AMDGPU::shouldEmitConstantsToTextSection(TT); } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { - return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitFixup(GV) && !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); } @@ -2351,7 +2347,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, SelectionDAG &DAG) const { if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly. - unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : + unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : AMDGPU::SRC_PRIVATE_BASE; return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32); } @@ -2367,7 +2363,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, // Offset into amd_queue_t for group_segment_aperture_base_hi / // private_segment_aperture_base_hi. - uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44; + uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44; SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr, DAG.getConstant(StructOffset, SL, MVT::i64)); @@ -2376,7 +2372,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, // TODO: We should use the value from the IR intrinsic call, but it might not // be available and how do we get it? Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()), - AMDGPUAS::CONSTANT_ADDRESS)); + AMDGPUASI.CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(V, StructOffset); return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo, @@ -2397,9 +2393,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, static_cast<const AMDGPUTargetMachine &>(getTargetMachine()); // flat -> local/private - if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned DestAS = ASC->getDestAddressSpace(); - if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) { + + if (DestAS == AMDGPUASI.LOCAL_ADDRESS || + DestAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(DestAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE); @@ -2411,9 +2409,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, } // local/private -> flat - if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned SrcAS = ASC->getSrcAddressSpace(); - if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) { + + if (SrcAS == AMDGPUASI.LOCAL_ADDRESS || + SrcAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(SrcAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); @@ -2513,8 +2513,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // We can fold offsets for anything that doesn't require a GOT relocation. - return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitGOTReloc(GA->getGlobal()); } @@ -2565,8 +2565,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); SDLoc DL(GSD); @@ -2583,7 +2583,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SIInstrInfo::MO_GOTPCREL32); Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); const DataLayout &DataLayout = DAG.getDataLayout(); unsigned Align = DataLayout.getABITypeAlignment(PtrTy); // FIXME: Use a PseudoSourceValue once those can be assigned an address space. @@ -3229,21 +3229,20 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = MemVT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::CONSTANT_ADDRESS: + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { if (isMemOpUniform(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::GLOBAL_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); @@ -3251,13 +3250,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::FLAT_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorLoad(Op, DAG); // v4 loads are supported for private and global memory. return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: + } + if (AS == AMDGPUASI.PRIVATE_ADDRESS) { // Depending on the setting of the private_element_size field in the // resource descriptor, we can only make private accesses up to a certain // size. @@ -3276,7 +3277,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("unsupported private_element_size"); } - case AMDGPUAS::LOCAL_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorLoad(Op, DAG); @@ -3285,9 +3286,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // If properly aligned, if we split we might be able to use ds_read_b64. return SplitVectorLoad(Op, DAG); - default: - return SDValue(); } + return SDValue(); } SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { @@ -3656,18 +3656,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = VT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorStore(Op, DAG); return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: { + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { switch (Subtarget->getMaxPrivateElementSize()) { case 4: return scalarizeVectorStore(Store, DAG); @@ -3682,8 +3681,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("unsupported private_element_size"); } - } - case AMDGPUAS::LOCAL_ADDRESS: { + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorStore(Op, DAG); @@ -3692,8 +3690,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // If properly aligned, if we split we might be able to use ds_write_b64. return SplitVectorStore(Op, DAG); - } - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -3724,7 +3721,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co unsigned AS = AtomicNode->getAddressSpace(); // No custom lowering required for local address space - if (!isFlatGlobalAddrSpace(AS)) + if (!isFlatGlobalAddrSpace(AS, AMDGPUASI)) return Op; // Non-local address space requires custom lowering for atomic compare @@ -3781,26 +3778,26 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, /// the immediate offsets of a memory instruction for the given address space. static bool canFoldOffset(unsigned OffsetSize, unsigned AS, const SISubtarget &STI) { - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + auto AMDGPUASI = STI.getAMDGPUAS(); + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // MUBUF instructions a 12-bit offset in bytes. return isUInt<12>(OffsetSize); - case AMDGPUAS::CONSTANT_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // SMRD instructions have an 8-bit offset in dwords on SI and // a 20-bit offset in bytes on VI. if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return isUInt<20>(OffsetSize); else return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } + if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // The single offset versions have a 16-bit offset in bytes. return isUInt<16>(OffsetSize); - case AMDGPUAS::PRIVATE_ADDRESS: - // Indirect register addressing does not use any offsets. - default: - return false; } + // Indirect register addressing does not use any offsets. + return false; } // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2) @@ -3858,7 +3855,7 @@ SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N, // TODO: We could also do this for multiplies. unsigned AS = N->getAddressSpace(); - if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) { + if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) { SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI); if (NewPtr) { SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end()); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e2e0895f899..bbd8de2e7d1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3747,7 +3747,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, return AMDGPU::NoRegister; assert(!MI.memoperands_empty() && - (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); + (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); @@ -3854,7 +3854,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { return true; for (const MachineMemOperand *MMO : MI.memoperands()) { - if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) + if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS) return true; } return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 84319153b01..561feb98d59 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -107,7 +107,7 @@ def SIld_local : SDNode <"ISD::LOAD", SDTLoad, >; def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{ - return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{ @@ -144,7 +144,7 @@ def SIst_local : SDNode <"ISD::STORE", SDTStore, def si_st_local : PatFrag < (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def si_store_local : PatFrag < diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 5dfae3f8f3f..5b840a14dbc 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -226,9 +226,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime> def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast<LoadSDNode>(N); return Ld->getAlignment() >= 4 && - ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) || - (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index de0fda4be6f..6b9a81976c6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -354,16 +355,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { ELF::SHF_AMDGPU_HSA_AGENT); } -bool isGroupSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } -bool isGlobalSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; } -bool isReadOnlySegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; } bool shouldEmitConstantsToTextSection(const Triple &TT) { @@ -736,6 +737,60 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); } - } // end namespace AMDGPU + } // end namespace llvm + +const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; +const unsigned AMDGPUAS::GLOBAL_ADDRESS; +const unsigned AMDGPUAS::LOCAL_ADDRESS; +const unsigned AMDGPUAS::PARAM_D_ADDRESS; +const unsigned AMDGPUAS::PARAM_I_ADDRESS; +const unsigned AMDGPUAS::CONSTANT_BUFFER_0; +const unsigned AMDGPUAS::CONSTANT_BUFFER_1; +const unsigned AMDGPUAS::CONSTANT_BUFFER_2; +const unsigned AMDGPUAS::CONSTANT_BUFFER_3; +const unsigned AMDGPUAS::CONSTANT_BUFFER_4; +const unsigned AMDGPUAS::CONSTANT_BUFFER_5; +const unsigned AMDGPUAS::CONSTANT_BUFFER_6; +const unsigned AMDGPUAS::CONSTANT_BUFFER_7; +const unsigned AMDGPUAS::CONSTANT_BUFFER_8; +const unsigned AMDGPUAS::CONSTANT_BUFFER_9; +const unsigned AMDGPUAS::CONSTANT_BUFFER_10; +const unsigned AMDGPUAS::CONSTANT_BUFFER_11; +const unsigned AMDGPUAS::CONSTANT_BUFFER_12; +const unsigned AMDGPUAS::CONSTANT_BUFFER_13; +const unsigned AMDGPUAS::CONSTANT_BUFFER_14; +const unsigned AMDGPUAS::CONSTANT_BUFFER_15; +const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + +namespace llvm { +namespace AMDGPU { + +AMDGPUAS getAMDGPUAS(Triple T) { + auto Env = T.getEnvironmentName(); + AMDGPUAS AS; + if (Env == "amdgiz" || Env == "amdgizcl") { + AS.FLAT_ADDRESS = 0; + AS.CONSTANT_ADDRESS = 4; + AS.PRIVATE_ADDRESS = 5; + AS.REGION_ADDRESS = 2; + } + else { + AS.FLAT_ADDRESS = 4; + AS.CONSTANT_ADDRESS = 2; + AS.PRIVATE_ADDRESS = 0; + AS.REGION_ADDRESS = 5; + } + return AS; +} + +AMDGPUAS getAMDGPUAS(const TargetMachine &M) { + return getAMDGPUAS(M.getTargetTriple()); +} + +AMDGPUAS getAMDGPUAS(const Module &M) { + return getAMDGPUAS(Triple(M.getTargetTriple())); +} +} // namespace AMDGPU +} // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 96171562ebe..0ce90284d67 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H +#include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" @@ -160,9 +161,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); -bool isGroupSegment(const GlobalValue *GV); -bool isGlobalSegment(const GlobalValue *GV); -bool isReadOnlySegment(const GlobalValue *GV); +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS); /// \returns True if constants should be emitted to .text section for given /// target triple \p TT, false otherwise. |