diff options
89 files changed, 362 insertions, 492 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 2b49c2ea88e..14835407ec6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -221,19 +221,18 @@ enum TargetIndex { /// however on the GPU, each address space points to /// a separate piece of memory that is unique from other /// memory locations. -struct AMDGPUAS { - // The following address space values depend on the triple environment. - unsigned PRIVATE_ADDRESS; ///< Address space for private memory. - unsigned FLAT_ADDRESS; ///< Address space for flat memory. - unsigned REGION_ADDRESS; ///< Address space for region memory. - +namespace AMDGPUAS { enum : unsigned { // The maximum value for flat, generic, local, private, constant and region. MAX_AMDGPU_ADDRESS = 6, + FLAT_ADDRESS = 0, ///< Address space for flat memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). + REGION_ADDRESS = 2, ///< Address space for region memory. + CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2) LOCAL_ADDRESS = 3, ///< Address space for local memory. + PRIVATE_ADDRESS = 5, ///< Address space for private memory. CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory @@ -270,12 +269,4 @@ struct AMDGPUAS { }; }; -namespace llvm { -namespace AMDGPU { -AMDGPUAS getAMDGPUAS(const Module &M); -AMDGPUAS getAMDGPUAS(const TargetMachine &TM); -AMDGPUAS getAMDGPUAS(Triple T); -} // namespace AMDGPU -} // namespace llvm - #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 974fbcb8719..77e39c20044 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -47,20 +47,10 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { } // Must match the table in getAliasResult. -AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_) - : Arch(Arch_), AS(AS_) { +AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(Triple::ArchType Arch_) + : Arch(Arch_) { // These arrarys are indexed by address space value // enum elements 0 ... to 6 - static const AliasResult ASAliasRulesPrivIsZero[7][7] = { - /* Private Global Constant Group Flat Region Constant 32-bit */ - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, - /* Global */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}, - /* Constant */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}, - /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias , NoAlias}, - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, - /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias}, - /* Constant 32-bit */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias} - }; static const AliasResult ASAliasRulesGenIsZero[7][7] = { /* Flat Global Region Group Constant Private Constant 32-bit */ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, @@ -71,30 +61,15 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias} }; + static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range"); - if (AS.FLAT_ADDRESS == 0) { - assert(AS.GLOBAL_ADDRESS == 1 && - AS.REGION_ADDRESS == 2 && - AS.LOCAL_ADDRESS == 3 && - AS.CONSTANT_ADDRESS == 4 && - AS.PRIVATE_ADDRESS == 5 && - AS.CONSTANT_ADDRESS_32BIT == 6); - ASAliasRules = &ASAliasRulesGenIsZero; - } else { - assert(AS.PRIVATE_ADDRESS == 0 && - AS.GLOBAL_ADDRESS == 1 && - AS.CONSTANT_ADDRESS == 2 && - AS.LOCAL_ADDRESS == 3 && - AS.FLAT_ADDRESS == 4 && - AS.REGION_ADDRESS == 5 && - AS.CONSTANT_ADDRESS_32BIT == 6); - ASAliasRules = &ASAliasRulesPrivIsZero; - } + + ASAliasRules = &ASAliasRulesGenIsZero; } AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1, unsigned AS2) const { - if (AS1 > AS.MAX_AMDGPU_ADDRESS || AS2 > AS.MAX_AMDGPU_ADDRESS) { + if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) { if (Arch == Triple::amdgcn) report_fatal_error("Pointer address space out of range"); return AS1 == AS2 ? MayAlias : NoAlias; @@ -118,9 +93,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { const Value *Base = GetUnderlyingObject(Loc.Ptr, DL); - - if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS || - Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS_32BIT) { + unsigned AS = Base->getType()->getPointerAddressSpace(); + if (AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index 09ad51d5e42..659a9f38454 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -33,13 +33,12 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> { friend AAResultBase<AMDGPUAAResult>; const DataLayout &DL; - AMDGPUAS AS; public: explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(), - DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {} + DL(DL), ASAliasRules(T.getArch()) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS), + : AAResultBase(std::move(Arg)), DL(Arg.DL), ASAliasRules(Arg.ASAliasRules){} /// Handle invalidation events from the new pass manager. @@ -56,13 +55,12 @@ private: class ASAliasRulesTy { public: - ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_); + ASAliasRulesTy(Triple::ArchType Arch_); AliasResult getAliasResult(unsigned AS1, unsigned AS2) const; private: Triple::ArchType Arch; - AMDGPUAS AS; const AliasResult (*ASAliasRules)[7][7]; } ASAliasRules; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index d4bbb2c1eb8..fc65430b745 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -86,8 +86,6 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers( } bool AMDGPUAlwaysInline::runOnModule(Module &M) { - AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M); - std::vector<GlobalAlias*> AliasesToRemove; SmallPtrSet<Function *, 8> FuncsToAlwaysInline; @@ -122,7 +120,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { for (GlobalVariable &GV : M.globals()) { // TODO: Region address unsigned AS = GV.getType()->getAddressSpace(); - if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS) + if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS) continue; recursivelyVisitUsers(GV, FuncsToAlwaysInline); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 1a70833a447..896ac9c8777 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -46,7 +46,6 @@ namespace { class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: const TargetMachine *TM = nullptr; - AMDGPUAS AS; bool addFeatureAttributes(Function &F); @@ -67,11 +66,10 @@ public: CallGraphSCCPass::getAnalysisUsage(AU); } - static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); + static bool visitConstantExpr(const ConstantExpr *CE); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet<const Constant *, 8> &ConstantExprVisited, - AMDGPUAS AS); + SmallPtrSet<const Constant *, 8> &ConstantExprVisited); }; } // end anonymous namespace @@ -85,20 +83,18 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, // The queue ptr is only needed when casting to flat, not from it. -static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { - return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; +static bool castRequiresQueuePtr(unsigned SrcAS) { + return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; } -static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, - const AMDGPUAS &AS) { - return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); +static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { + return castRequiresQueuePtr(ASC->getSrcAddressSpace()); } -bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, - AMDGPUAS AS) { +bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - return castRequiresQueuePtr(SrcAS, AS); + return castRequiresQueuePtr(SrcAS); } return false; @@ -106,8 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet<const Constant *, 8> &ConstantExprVisited, - AMDGPUAS AS) { + SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -120,7 +115,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( // Check this constant expression. if (const auto *CE = dyn_cast<ConstantExpr>(C)) { - if (visitConstantExpr(CE, AS)) + if (visitConstantExpr(CE)) return true; } @@ -262,7 +257,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { continue; if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { - if (castRequiresQueuePtr(ASC, AS)) { + if (castRequiresQueuePtr(ASC)) { NeedQueuePtr = true; continue; } @@ -273,7 +268,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { + if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) { NeedQueuePtr = true; break; } @@ -318,7 +313,6 @@ bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { if (!TPC) report_fatal_error("TargetMachine is required"); - AS = AMDGPU::getAMDGPUAS(CG.getModule()); TM = &TPC->getTM<TargetMachine>(); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index d3722710e64..4e0cc736bad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -37,7 +37,6 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, LoopInfo *LI; DenseMap<Value*, GetElementPtrInst*> noClobberClones; bool isKernelFunc; - AMDGPUAS AMDGPUASI; public: static char ID; @@ -133,7 +132,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { if (!DA->isUniform(Ptr)) return; auto isGlobalLoad = [&](LoadInst &Load)->bool { - return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; + return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }; // We're tracking up to the Function boundaries // We cannot go beyond because of FunctionPass restrictions @@ -168,7 +167,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { } bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { - AMDGPUASI = AMDGPU::getAMDGPUAS(M); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index e62e5d52ad7..b7e8423c266 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -98,8 +98,7 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() { AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)) { - AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS(); - } +} StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 22982d912c7..462b5feca6a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -143,7 +143,6 @@ public: protected: mutable std::vector<std::string> DisasmLines, HexLines; mutable size_t DisasmLineMaxLen; - AMDGPUAS AMDGPUASI; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index b6baadef6e4..daef37f9c21 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -28,7 +28,7 @@ using namespace llvm; AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) - : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) { + : CallLowering(&TLI) { } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -51,7 +51,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); LLT PtrType = getLLTForType(*PtrTy, DL); unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); unsigned KernArgSegmentPtr = @@ -73,7 +73,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index 00c91a0933c..ed859716218 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -23,8 +23,6 @@ namespace llvm { class AMDGPUTargetLowering; class AMDGPUCallLowering: public CallLowering { - AMDGPUAS AMDGPUASI; - unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, uint64_t Offset) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b8212474c30..6fcb92efb04 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -63,7 +63,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass, LegacyDivergenceAnalysis *DA = nullptr; Module *Mod = nullptr; bool HasUnsafeFPMath = false; - AMDGPUAS AMDGPUASI; /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to /// binary operation \p V. @@ -799,8 +798,8 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { if (!WidenLoads) return false; - if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || - I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && + if ((I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && canWidenScalarExtLoad(I)) { IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); @@ -900,7 +899,6 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DA = &getAnalysis<LegacyDivergenceAnalysis>(); HasUnsafeFPMath = hasUnsafeFPMath(F); - AMDGPUASI = TM.getAMDGPUAS(); bool MadeChange = false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 01ef346f74e..6657c08e7e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -76,17 +76,17 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( unsigned AddressSpace) const { - if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) + if (AddressSpace == AMDGPUAS::PRIVATE_ADDRESS) return AddressSpaceQualifier::Private; - if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS) + if (AddressSpace == AMDGPUAS::GLOBAL_ADDRESS) return AddressSpaceQualifier::Global; - if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS) + if (AddressSpace == AMDGPUAS::CONSTANT_ADDRESS) return AddressSpaceQualifier::Constant; - if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS) + if (AddressSpace == AMDGPUAS::LOCAL_ADDRESS) return AddressSpaceQualifier::Local; - if (AddressSpace == AMDGPUASI.FLAT_ADDRESS) + if (AddressSpace == AMDGPUAS::FLAT_ADDRESS) return AddressSpaceQualifier::Generic; - if (AddressSpace == AMDGPUASI.REGION_ADDRESS) + if (AddressSpace == AMDGPUAS::REGION_ADDRESS) return AddressSpaceQualifier::Region; llvm_unreachable("Unknown address space qualifier"); @@ -114,7 +114,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, .Case("queue_t", ValueKind::Queue) .Default(isa<PointerType>(Ty) ? (Ty->getPointerAddressSpace() == - AMDGPUASI.LOCAL_ADDRESS ? + AMDGPUAS::LOCAL_ADDRESS ? ValueKind::DynamicSharedPointer : ValueKind::GlobalBuffer) : ValueKind::ByValue); @@ -355,7 +355,7 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) { unsigned PointeeAlign = 0; if (auto PtrTy = dyn_cast<PointerType>(Ty)) { - if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { PointeeAlign = Arg.getParamAlignment(); if (PointeeAlign == 0) PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); @@ -422,7 +422,7 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), - AMDGPUASI.GLOBAL_ADDRESS); + AMDGPUAS::GLOBAL_ADDRESS); // Emit "printf buffer" argument if printf is used, otherwise emit dummy // "none" argument. @@ -447,7 +447,6 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { } void MetadataStreamer::begin(const Module &Mod) { - AMDGPUASI = getAMDGPUAS(Mod); emitVersion(); emitPrintf(Mod); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 3424c956d78..a1e08235a5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -37,7 +37,6 @@ namespace HSAMD { class MetadataStreamer final { private: Metadata HSAMetadata; - AMDGPUAS AMDGPUASI; void dump(StringRef HSAMetadataString) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index f4af9cdc6dd..c95776d0982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -72,14 +72,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const GCNSubtarget *Subtarget; - AMDGPUAS AMDGPUASI; bool EnableLateStructurizeCFG; public: explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, CodeGenOpt::Level OptLevel = CodeGenOpt::Default) : SelectionDAGISel(*TM, OptLevel) { - AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; } ~AMDGPUDAGToDAGISel() override = default; @@ -222,7 +220,6 @@ protected: class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { const R600Subtarget *Subtarget; - AMDGPUAS AMDGPUASI; bool isConstantLoad(const MemSDNode *N, int cbID) const; bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); @@ -230,9 +227,7 @@ class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { SDValue& Offset); public: explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : - AMDGPUDAGToDAGISel(TM, OptLevel) { - AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); - } + AMDGPUDAGToDAGISel(TM, OptLevel) {} void Select(SDNode *N) override; @@ -348,7 +343,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { - if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || + if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS || !Subtarget->ldsRequiresM0Init()) return N; @@ -1725,7 +1720,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MemSDNode *Mem = cast<MemSDNode>(N); unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUASI.FLAT_ADDRESS) { + if (AS == AMDGPUAS::FLAT_ADDRESS) { SelectCode(N); return; } @@ -2108,10 +2103,10 @@ bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { if (!N->readMem()) return false; if (CbId == -1) - return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || - N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; + return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; - return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; + return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; } bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 093a6f02810..08c3b75dfd9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -146,7 +146,6 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { - AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::LOAD, MVT::f32, Promote); @@ -725,7 +724,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const { { const LoadSDNode * L = dyn_cast<LoadSDNode>(N); if (L->getMemOperand()->getAddrSpace() - == AMDGPUASI.CONSTANT_ADDRESS_32BIT) + == AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; return false; } @@ -1193,8 +1192,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = G->getGlobal(); - if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS || - G->getAddressSpace() == AMDGPUASI.REGION_ADDRESS) { + if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { if (!MFI->isEntryFunction()) { const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported BadLDSDecl( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index ae029be5565..a231265af53 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -41,8 +41,6 @@ public: static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); protected: - AMDGPUAS AMDGPUASI; - SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; /// Split a vector store into multiple scalar stores. @@ -306,10 +304,6 @@ public: uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const; - AMDGPUAS getAMDGPUAS() const { - return AMDGPUASI; - } - MVT getFenceOperandTy(const DataLayout &DL) const override { return MVT::i32; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp index 7060ff353f4..a5f9a85f50d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -118,8 +118,6 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const { if (!Callee) return (unsigned)Thres; - const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*Caller->getParent()); - // If we have a pointer to private array passed into a function // it will not be optimized out, leaving scratch usage. // Increase the inline threshold to allow inliniting in this case. @@ -128,7 +126,7 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const { for (Value *PtrArg : CS.args()) { Type *Ty = PtrArg->getType(); if (!Ty->isPointerTy() || - Ty->getPointerAddressSpace() != AS.PRIVATE_ADDRESS) + Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) continue; PtrArg = GetUnderlyingObject(PtrArg, DL); if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 219d430fbb3..ea184fa6eb7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -55,7 +55,6 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector( #define GET_GLOBALISEL_TEMPORARIES_INIT #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT - ,AMDGPUASI(STI.getAMDGPUAS()) { } @@ -506,8 +505,8 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, if (!I.hasOneMemOperand()) return false; - if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS && - (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT) + if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS && + (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT) return false; if (!isInstrUniform(I)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 68b40b20aca..449431adc56 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -105,9 +105,6 @@ private: #define GET_GLOBALISEL_TEMPORARIES_DECL #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_DECL - -protected: - AMDGPUAS AMDGPUASI; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 055f2a8cee9..ab00b1d6326 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -331,37 +331,37 @@ class StoreHi16<SDPatternOperator op> : PatFrag < >; class PrivateAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; }]>; class ConstantAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; }]>; class LocalAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; class GlobalAddress : CodePatPred<[{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; class GlobalLoadAddress : CodePatPred<[{ auto AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS; + return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS; }]>; class FlatLoadAddress : CodePatPred<[{ const auto AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUASI.FLAT_ADDRESS || - AS == AMDGPUASI.GLOBAL_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS; + return AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS; }]>; class FlatStoreAddress : CodePatPred<[{ const auto AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUASI.FLAT_ADDRESS || - AS == AMDGPUASI.GLOBAL_ADDRESS; + return AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::GLOBAL_ADDRESS; }]>; class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr), @@ -483,7 +483,7 @@ def az_extloadi16_constant : ConstantLoad <az_extloadi16>; class local_binary_atomic_op<SDNode atomic_op> : PatFrag<(ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; def atomic_swap_local : local_binary_atomic_op<atomic_swap>; @@ -500,14 +500,14 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag< (ops node:$ptr, node:$cmp, node:$swap), (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast<AtomicSDNode>(N); - return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; + return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>; @@ -516,17 +516,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> { def "" : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; def _noret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def _ret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; } defm atomic_swap_global : global_binary_atomic_op<atomic_swap>; @@ -553,12 +553,12 @@ def atomic_cmp_swap_global : PatFrag< def atomic_cmp_swap_global_noret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def atomic_cmp_swap_global_ret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; //===----------------------------------------------------------------------===// // Misc Pattern Fragments diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 87b072c9ea2..c265283d86d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -32,8 +32,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, return LLT::pointer(AS, TM.getPointerSizeInBits(AS)); }; - auto AMDGPUAS = ST.getAMDGPUAS(); - const LLT S1 = LLT::scalar(1); const LLT V2S16 = LLT::vector(2, 16); @@ -44,8 +42,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS); const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS); const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS); - const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS.FLAT_ADDRESS); - const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS.PRIVATE_ADDRESS); + const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS); + const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS); const LLT AddrSpaces[] = { GlobalPtr, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 7a7ed7a4f06..14e88004269 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1333,8 +1333,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, // for OpenCL 2.0 we have only generic implementation of sincos // function. AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); - const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M); - nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS); + nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf)); if (!Fsincos) return false; @@ -1347,7 +1346,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, // The allocaInst allocates the memory in private address space. This need // to be bitcasted to point to the address space of cos pointer type. // In OpenCL 2.0 this is generic, while in 1.2 that is private. - if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS) + if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) P = B.CreateAddrSpaceCast(Alloc, PTy); CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 3cfdccc9fe5..e53a8fe7c07 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -99,8 +99,6 @@ private: const DataLayout *DL; - AMDGPUAS AS; - const TargetLowering *TLI; void visit(const Function &F); @@ -267,7 +265,6 @@ void AMDGPUPerfHint::runOnFunction(Function &F) { const Module &M = *F.getParent(); DL = &M.getDataLayout(); - AS = AMDGPU::getAMDGPUAS(M); visit(F); auto Loc = FIM.find(&F); @@ -306,14 +303,14 @@ bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const { if (auto PT = dyn_cast<PointerType>(V->getType())) { unsigned As = PT->getAddressSpace(); // Flat likely points to global too. - return As == AS.GLOBAL_ADDRESS || As == AS.FLAT_ADDRESS; + return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS; } return false; } bool AMDGPUPerfHint::isLocalAddr(const Value *V) const { if (auto PT = dyn_cast<PointerType>(V->getType())) - return PT->getAddressSpace() == AS.LOCAL_ADDRESS; + return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; return false; } @@ -346,7 +343,8 @@ AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const { bool AMDGPUPerfHint::isConstantAddr(const Value *V) const { if (auto PT = dyn_cast<PointerType>(V->getType())) { unsigned As = PT->getAddressSpace(); - return As == AS.CONSTANT_ADDRESS || As == AS.CONSTANT_ADDRESS_32BIT; + return As == AMDGPUAS::CONSTANT_ADDRESS || + As == AMDGPUAS::CONSTANT_ADDRESS_32BIT; } return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index d341fec6296..fe9e4ca0ca4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -76,7 +76,6 @@ private: const TargetMachine *TM; Module *Mod = nullptr; const DataLayout *DL = nullptr; - AMDGPUAS AS; // FIXME: This should be per-kernel. uint32_t LocalMemLimit = 0; @@ -156,8 +155,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { if (!ST.isPromoteAllocaEnabled()) return false; - AS = AMDGPU::getAMDGPUAS(*F.getParent()); - bool SufficientLDS = hasSufficientLocalMem(F); bool Changed = false; BasicBlock &EntryBB = *F.begin(); @@ -238,7 +235,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { Type *I32Ty = Type::getInt32Ty(Mod->getContext()); Value *CastDispatchPtr = Builder.CreateBitCast( - DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS)); + DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS)); // We could do a single 64-bit load here, but it's likely that the basic // 32-bit and extract sequence is already present, and it is probably easier @@ -342,7 +339,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { } } -static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { +static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { if (DisablePromoteAllocaToVector) { LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n"); @@ -406,7 +403,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { - Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -418,7 +415,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { break; } case Instruction::Store: { - Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); StoreInst *SI = cast<StoreInst>(Inst); Value *Ptr = SI->getPointerOperand(); @@ -610,7 +607,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { // we cannot use local memory in the pass. for (Type *ParamTy : FTy->params()) { PointerType *PtrTy = dyn_cast<PointerType>(ParamTy); - if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { + if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { LocalMemLimit = 0; LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); @@ -627,7 +624,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { // Check how much local memory is being used by global objects CurrentLocalMemUsage = 0; for (GlobalVariable &GV : Mod->globals()) { - if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) + if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) continue; for (const User *U : GV.users()) { @@ -706,7 +703,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I, AS)) + if (tryPromoteAllocaToVector(&I)) return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); @@ -775,7 +772,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, - AS.LOCAL_ADDRESS); + AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); @@ -808,7 +805,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); @@ -825,7 +822,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { continue; Type *EltTy = V->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. @@ -894,7 +891,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, - { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } + { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 2e6840c7915..713dab3c311 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -213,7 +213,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), TLInfo(TM, *this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { - AS = AMDGPU::getAMDGPUAS(TT); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); @@ -462,8 +461,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, TexVTXClauseSize(0), Gen(R600), TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), - InstrItins(getInstrItineraryForCPU(GPU)), - AS (AMDGPU::getAMDGPUAS(TT)) { } + InstrItins(getInstrItineraryForCPU(GPU)) { } void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index db9ebd1a778..a25be48b5d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -378,7 +378,6 @@ protected: bool FeatureDisable; SelectionDAGTargetInfo TSInfo; - AMDGPUAS AS; private: SIInstrInfo InstrInfo; SITargetLowering TLInfo; @@ -447,10 +446,6 @@ public: return MaxPrivateElementSize; } - AMDGPUAS getAMDGPUAS() const { - return AS; - } - bool hasIntClamp() const { return HasIntClamp; } @@ -975,7 +970,6 @@ private: R600TargetLowering TLInfo; InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; - AMDGPUAS AS; public: R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, @@ -1060,8 +1054,6 @@ public: short getTexVTXClauseSize() const { return TexVTXClauseSize; } - AMDGPUAS getAMDGPUAS() const { return AS; } - bool enableMachineScheduler() const override { return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 2205819c444..cd1823cc187 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -308,7 +308,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM), OptLevel), TLOF(createTLOF(getTargetTriple())) { - AS = AMDGPU::getAMDGPUAS(TT); initAsmInfo(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 0fe14493fab..62fbe71d190 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -34,7 +34,6 @@ namespace llvm { class AMDGPUTargetMachine : public LLVMTargetMachine { protected: std::unique_ptr<TargetLoweringObjectFile> TLOF; - AMDGPUAS AS; StringRef getGPUName(const Function &F) const; StringRef getFeatureString(const Function &F) const; @@ -55,16 +54,13 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } - AMDGPUAS getAMDGPUAS() const { - return AS; - } void adjustPassManager(PassManagerBuilder &) override; + /// Get the integer value of a null pointer in the given address space. uint64_t getNullPointerValue(unsigned AddrSpace) const { - if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS) - return -1; - return 0; + return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0; } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index ad0802accc5..11e4ba4b501 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -102,7 +102,6 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, unsigned ThresholdPrivate = UnrollThresholdPrivate; unsigned ThresholdLocal = UnrollThresholdLocal; unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal); - const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple); for (const BasicBlock *BB : L->getBlocks()) { const DataLayout &DL = BB->getModule()->getDataLayout(); unsigned LocalGEPsSeen = 0; @@ -140,9 +139,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, unsigned AS = GEP->getAddressSpace(); unsigned Threshold = 0; - if (AS == ASST.PRIVATE_ADDRESS) + if (AS == AMDGPUAS::PRIVATE_ADDRESS) Threshold = ThresholdPrivate; - else if (AS == ASST.LOCAL_ADDRESS) + else if (AS == AMDGPUAS::LOCAL_ADDRESS) Threshold = ThresholdLocal; else continue; @@ -150,7 +149,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, if (UP.Threshold >= Threshold) continue; - if (AS == ASST.PRIVATE_ADDRESS) { + if (AS == AMDGPUAS::PRIVATE_ADDRESS) { const Value *Ptr = GEP->getPointerOperand(); const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL)); @@ -160,7 +159,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0; if (AllocaSize > MaxAlloca) continue; - } else if (AS == ASST.LOCAL_ADDRESS) { + } else if (AS == AMDGPUAS::LOCAL_ADDRESS) { LocalGEPsSeen++; // Inhibit unroll for local memory if we have seen addressing not to // a variable, most likely we will be unable to combine it. @@ -253,19 +252,18 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize, } unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { - AMDGPUAS AS = ST->getAMDGPUAS(); - if (AddrSpace == AS.GLOBAL_ADDRESS || - AddrSpace == AS.CONSTANT_ADDRESS || - AddrSpace == AS.CONSTANT_ADDRESS_32BIT) { + if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || + AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || + AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { return 512; } - if (AddrSpace == AS.FLAT_ADDRESS || - AddrSpace == AS.LOCAL_ADDRESS || - AddrSpace == AS.REGION_ADDRESS) + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS || + AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::REGION_ADDRESS) return 128; - if (AddrSpace == AS.PRIVATE_ADDRESS) + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) return 8 * ST->getMaxPrivateElementSize(); llvm_unreachable("unhandled address space"); @@ -277,7 +275,7 @@ bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, // We allow vectorization of flat stores, even though we may need to decompose // them later if they may access private memory. We don't have enough context // here, and legalization can handle it. - if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) { + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) && ChainSizeInBytes <= ST->getMaxPrivateElementSize(); } @@ -552,9 +550,8 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. if (const LoadInst *Load = dyn_cast<LoadInst>(V)) - return Load->getPointerAddressSpace() == - ST->getAMDGPUAS().PRIVATE_ADDRESS || - Load->getPointerAddressSpace() == ST->getAMDGPUAS().FLAT_ADDRESS; + return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || + Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS; // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each @@ -644,20 +641,19 @@ unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { } unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { - AMDGPUAS AS = ST->getAMDGPUAS(); - if (AddrSpace == AS.GLOBAL_ADDRESS || - AddrSpace == AS.CONSTANT_ADDRESS) + if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || + AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) return 128; - if (AddrSpace == AS.LOCAL_ADDRESS || - AddrSpace == AS.REGION_ADDRESS) + if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::REGION_ADDRESS) return 64; - if (AddrSpace == AS.PRIVATE_ADDRESS) + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) return 32; - if ((AddrSpace == AS.PARAM_D_ADDRESS || - AddrSpace == AS.PARAM_I_ADDRESS || - (AddrSpace >= AS.CONSTANT_BUFFER_0 && - AddrSpace <= AS.CONSTANT_BUFFER_15))) + if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || + AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || + (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && + AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) return 128; llvm_unreachable("unhandled address space"); } @@ -668,9 +664,7 @@ bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, // We allow vectorization of flat stores, even though we may need to decompose // them later if they may access private memory. We don't have enough context // here, and legalization can handle it. - if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) - return false; - return true; + return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); } bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 8e63d789e17..397c5c6fa6f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -179,7 +179,7 @@ public: if (IsGraphicsShader) return -1; return ST->hasFlatAddressSpace() ? - ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE; + AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE; } unsigned getVectorSplitCost() { return 0; } diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index d4c54dacfe5..7b2dc3494ab 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -23,8 +23,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset" class MubufLoad <SDPatternOperator op> : PatFrag < (ops node:$ptr), (op node:$ptr), [{ auto const AS = cast<MemSDNode>(N)->getAddressSpace(); - return AS == AMDGPUASI.GLOBAL_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS; + return AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS; }]>; def mubuf_load : MubufLoad <load>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 3ef473b7fd9..43130dfcae9 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -326,7 +326,7 @@ multiclass FLAT_Global_Atomic_Pseudo< class flat_binary_atomic_op<SDNode atomic_op> : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}] + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] >; def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index e00dffc4be9..8864aabb063 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -589,7 +589,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } case Intrinsic::r600_implicitarg_ptr: { - MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS); + MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); return DAG.getConstant(ByteOffset, DL, PtrVT); } @@ -741,12 +741,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); - if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); const DataLayout &DL = DAG.getDataLayout(); const GlobalValue *GV = GSD->getGlobal(); - MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); + MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); @@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, unsigned DwordOffset) const { unsigned ByteOffset = DwordOffset * 4; PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUASI.PARAM_I_ADDRESS); + AMDGPUAS::PARAM_I_ADDRESS); // We shouldn't be using an offset wider than 16-bits for implicit parameters. assert(isInt<16>(ByteOffset)); @@ -1141,7 +1141,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, //TODO: Who creates the i8 stores? assert(Store->isTruncatingStore() || Store->getValue().getValueType() == MVT::i8); - assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS); + assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { @@ -1175,7 +1175,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, // Load dword // TODO: can we be smarter about machine pointer info? MachinePointerInfo PtrInfo(UndefValue::get( - Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))); + Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))); SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); Chain = Dst.getValue(1); @@ -1241,9 +1241,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // Neither LOCAL nor PRIVATE can do vectors at the moment - if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) && + if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && VT.isVector()) { - if ((AS == AMDGPUASI.PRIVATE_ADDRESS) && + if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { // Add an extra level of chain to isolate this vector SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); @@ -1267,7 +1267,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, DAG.getConstant(2, DL, PtrVT)); - if (AS == AMDGPUASI.GLOBAL_ADDRESS) { + if (AS == AMDGPUAS::GLOBAL_ADDRESS) { // It is beneficial to create MSKOR here instead of combiner to avoid // artificial dependencies introduced by RMW if (StoreNode->isTruncatingStore()) { @@ -1320,7 +1320,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes - if (AS != AMDGPUASI.PRIVATE_ADDRESS) + if (AS != AMDGPUAS::PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) @@ -1403,7 +1403,7 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, // Load dword // TODO: can we be smarter about machine pointer info? MachinePointerInfo PtrInfo(UndefValue::get( - Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))); + Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))); SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); // Get offset within the register. @@ -1441,7 +1441,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { EVT MemVT = LoadNode->getMemoryVT(); ISD::LoadExtType ExtType = LoadNode->getExtensionType(); - if (AS == AMDGPUASI.PRIVATE_ADDRESS && + if (AS == AMDGPUAS::PRIVATE_ADDRESS && ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { return lowerPrivateExtLoad(Op, DAG); } @@ -1451,8 +1451,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS || - LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) && + if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && VT.isVector()) { return scalarizeVectorLoad(LoadNode, DAG); } @@ -1473,7 +1473,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, DL, MVT::i32)), DAG.getConstant(LoadNode->getAddressSpace() - - AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32) + AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32) ); } @@ -1509,7 +1509,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(MergedValues, DL); } - if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) { + if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { return SDValue(); } @@ -1606,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments( } PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUASI.PARAM_I_ADDRESS); + AMDGPUAS::PARAM_I_ADDRESS); // i64 isn't a legal type, so the register type used ends up as i32, which // isn't expected here. It attempts to create this sextload, but it ends up @@ -1656,7 +1656,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const { // Local and Private addresses do not handle vectors. Limit to i32 - if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) { + if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) { return (MemVT.getSizeInBits() <= 32); } return true; diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index c54ea7ac42b..97228965e68 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -1504,15 +1504,15 @@ unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind( switch (Kind) { case PseudoSourceValue::Stack: case PseudoSourceValue::FixedStack: - return ST.getAMDGPUAS().PRIVATE_ADDRESS; + return AMDGPUAS::PRIVATE_ADDRESS; case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: - return ST.getAMDGPUAS().CONSTANT_ADDRESS; + return AMDGPUAS::CONSTANT_ADDRESS; } + llvm_unreachable("Invalid pseudo source kind"); - return ST.getAMDGPUAS().PRIVATE_ADDRESS; } diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index 7bf174f4cd8..10e87375522 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -299,7 +299,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern> class LoadParamFrag <PatFrag load_type> : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isConstantLoad(cast<LoadSDNode>(N), 0) || - (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }] + (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }] >; def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>; @@ -309,8 +309,8 @@ def vtx_id3_load : LoadParamFrag<load>; class LoadVtxId1 <PatFrag load> : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast<MemSDNode>(N); - return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || - (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && !isa<GlobalValue>(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout()))); }]>; @@ -322,7 +322,7 @@ def vtx_id1_load : LoadVtxId1 <load>; class LoadVtxId2 <PatFrag load> : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast<MemSDNode>(N); - return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && isa<GlobalValue>(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout())); }]>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9f4599ff379..f9f24c79bd6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -950,11 +950,11 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, if (AM.BaseGV) return false; - if (AS == AMDGPUASI.GLOBAL_ADDRESS) + if (AS == AMDGPUAS::GLOBAL_ADDRESS) return isLegalGlobalAddressingMode(AM); - if (AS == AMDGPUASI.CONSTANT_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) { + if (AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? @@ -992,10 +992,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return false; - } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { + } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) { return isLegalMUBUFAddressingMode(AM); - } else if (AS == AMDGPUASI.LOCAL_ADDRESS || - AS == AMDGPUASI.REGION_ADDRESS) { + } else if (AS == AMDGPUAS::LOCAL_ADDRESS || + AS == AMDGPUAS::REGION_ADDRESS) { // Basic, single offset DS instructions allow a 16-bit unsigned immediate // field. // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have @@ -1010,8 +1010,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; return false; - } else if (AS == AMDGPUASI.FLAT_ADDRESS || - AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) { + } else if (AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) { // For an unknown address space, this usually means that this is for some // reason being used for pure arithmetic, and not based on some addressing // computation. We don't have instructions that compute pointers with any @@ -1025,12 +1025,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const { - if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) { + if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) { return (MemVT.getSizeInBits() <= 4 * 32); - } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { + } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) { unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize(); return (MemVT.getSizeInBits() <= MaxPrivateBits); - } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { + } else if (AS == AMDGPUAS::LOCAL_ADDRESS) { return (MemVT.getSizeInBits() <= 2 * 32); } return true; @@ -1052,8 +1052,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return false; } - if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS || - AddrSpace == AMDGPUASI.REGION_ADDRESS) { + if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // aligned, 8 byte access in a single operation using ds_read2/write2_b32 // with adjacent offsets. @@ -1068,8 +1068,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // will access scratch. If we had access to the IR function, then we // could determine if any private memory was used in the function. if (!Subtarget->hasUnalignedScratchAccess() && - (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS || - AddrSpace == AMDGPUASI.FLAT_ADDRESS)) { + (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || + AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { return false; } @@ -1077,8 +1077,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // If we have an uniform constant load, it still requires using a slow // buffer instruction if unaligned. if (IsFast) { - *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS || - AddrSpace == AMDGPUASI.CONSTANT_ADDRESS_32BIT) ? + *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || + AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ? (Align % 4 == 0) : true; } @@ -1118,17 +1118,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, return MVT::Other; } -static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) { - return AS == AMDGPUASI.GLOBAL_ADDRESS || - AS == AMDGPUASI.FLAT_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT; +static bool isFlatGlobalAddrSpace(unsigned AS) { + return AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; } bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) && - isFlatGlobalAddrSpace(DestAS, AMDGPUASI); + return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); } bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { @@ -1142,7 +1141,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { // Flat -> private/local is a simple truncate. // Flat -> global is no-op - if (SrcAS == AMDGPUASI.FLAT_ADDRESS) + if (SrcAS == AMDGPUAS::FLAT_ADDRESS) return true; return isNoopAddrSpaceCast(SrcAS, DestAS); @@ -1209,7 +1208,7 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG, = Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); + MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT); @@ -1249,7 +1248,7 @@ SDValue SITargetLowering::lowerKernargMemParameter( uint64_t Offset, unsigned Align, bool Signed, const ISD::InputArg *Arg) const { Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); // Try to avoid using an extload by loading earlier than the argument address, @@ -2567,7 +2566,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, /*isVol = */ false, /*AlwaysInline = */ true, /*isTailCall = */ false, DstInfo, MachinePointerInfo(UndefValue::get(Type::getInt8PtrTy( - *DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)))); + *DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)))); MemOpChains.push_back(Cpy); } else { @@ -3911,15 +3910,15 @@ void SITargetLowering::createDebuggerPrologueStackObjects( bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); - return (GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || - GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && + return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && AMDGPU::shouldEmitConstantsToTextSection(TT); } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { - return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || - GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || - GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && + return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && !shouldEmitFixup(GV) && !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); } @@ -4107,10 +4106,10 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, SelectionDAG &DAG) const { // FIXME: Use inline constants (src_{shared, private}_base) instead. if (Subtarget->hasApertureRegs()) { - unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ? + unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE : AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE; - unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ? + unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE : AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE; unsigned Encoding = @@ -4135,7 +4134,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, // Offset into amd_queue_t for group_segment_aperture_base_hi / // private_segment_aperture_base_hi. - uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44; + uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44; SDValue Ptr = DAG.getObjectPtrOffset(DL, QueuePtr, StructOffset); @@ -4143,7 +4142,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, // TODO: We should use the value from the IR intrinsic call, but it might not // be available and how do we get it? Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()), - AMDGPUASI.CONSTANT_ADDRESS)); + AMDGPUAS::CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(V, StructOffset); return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo, @@ -4164,11 +4163,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, static_cast<const AMDGPUTargetMachine &>(getTargetMachine()); // flat -> local/private - if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { + if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { unsigned DestAS = ASC->getDestAddressSpace(); - if (DestAS == AMDGPUASI.LOCAL_ADDRESS || - DestAS == AMDGPUASI.PRIVATE_ADDRESS) { + if (DestAS == AMDGPUAS::LOCAL_ADDRESS || + DestAS == AMDGPUAS::PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(DestAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE); @@ -4180,11 +4179,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, } // local/private -> flat - if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { + if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { unsigned SrcAS = ASC->getSrcAddressSpace(); - if (SrcAS == AMDGPUASI.LOCAL_ADDRESS || - SrcAS == AMDGPUASI.PRIVATE_ADDRESS) { + if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || + SrcAS == AMDGPUAS::PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(SrcAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); @@ -4381,9 +4380,9 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // We can fold offsets for anything that doesn't require a GOT relocation. - return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || - GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || - GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && + return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && !shouldEmitGOTReloc(GA->getGlobal()); } @@ -4435,9 +4434,9 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GSD->getGlobal(); - if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS && - GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT && - GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS && + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT && + GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && // FIXME: It isn't correct to rely on the type of the pointer. This should // be removed when address space 0 is 64-bit. !GV->getType()->getElementType()->isFunctionTy()) @@ -4456,7 +4455,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SIInstrInfo::MO_GOTPCREL32); Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); const DataLayout &DataLayout = DAG.getDataLayout(); unsigned Align = DataLayout.getABITypeAlignment(PtrTy); // FIXME: Use a PseudoSourceValue once those can be assigned an address space. @@ -4699,8 +4698,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op, // Dz/dh, dz/dv and the last odd coord are packed with undef. Also, // in 1D, derivatives dx/dh and dx/dv are packed with undef. if (((i + 1) >= (AddrIdx + NumMIVAddrs)) || - ((NumGradients / 2) % 2 == 1 && - (i == DimIdx + (NumGradients / 2) - 1 || + ((NumGradients / 2) % 2 == 1 && + (i == DimIdx + (NumGradients / 2) - 1 || i == DimIdx + NumGradients - 1))) { AddrHi = DAG.getUNDEF(MVT::f16); } else { @@ -6077,8 +6076,8 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const // FIXME: Constant loads should all be marked invariant. unsigned AS = Ld->getAddressSpace(); - if (AS != AMDGPUASI.CONSTANT_ADDRESS && - AS != AMDGPUASI.CONSTANT_ADDRESS_32BIT && + if (AS != AMDGPUAS::CONSTANT_ADDRESS && + AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT && (AS != AMDGPUAS::GLOBAL_ADDRESS || !Ld->isInvariant())) return SDValue(); @@ -6189,14 +6188,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUASI.FLAT_ADDRESS) + if (AS == AMDGPUAS::FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; + AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; unsigned NumElements = MemVT.getVectorNumElements(); - if (AS == AMDGPUASI.CONSTANT_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) { + if (AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { if (!Op->isDivergent() && Alignment >= 4) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they @@ -6205,9 +6204,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // } - if (AS == AMDGPUASI.CONSTANT_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT || - AS == AMDGPUASI.GLOBAL_ADDRESS) { + if (AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || + AS == AMDGPUAS::GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) && Alignment >= 4) @@ -6217,16 +6216,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // loads. // } - if (AS == AMDGPUASI.CONSTANT_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT || - AS == AMDGPUASI.GLOBAL_ADDRESS || - AS == AMDGPUASI.FLAT_ADDRESS) { + if (AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || + AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorLoad(Op, DAG); // v4 loads are supported for private and global memory. return SDValue(); } - if (AS == AMDGPUASI.PRIVATE_ADDRESS) { + if (AS == AMDGPUAS::PRIVATE_ADDRESS) { // Depending on the setting of the private_element_size field in the // resource descriptor, we can only make private accesses up to a certain // size. @@ -6245,7 +6244,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("unsupported private_element_size"); } - } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { + } else if (AS == AMDGPUAS::LOCAL_ADDRESS) { // Use ds_read_b128 if possible. if (Subtarget->useDS128() && Load->getAlignment() >= 16 && MemVT.getStoreSize() == 16) @@ -6622,17 +6621,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUASI.FLAT_ADDRESS) + if (AS == AMDGPUAS::FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; + AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; unsigned NumElements = VT.getVectorNumElements(); - if (AS == AMDGPUASI.GLOBAL_ADDRESS || - AS == AMDGPUASI.FLAT_ADDRESS) { + if (AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorStore(Op, DAG); return SDValue(); - } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { + } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) { switch (Subtarget->getMaxPrivateElementSize()) { case 4: return scalarizeVectorStore(Store, DAG); @@ -6647,7 +6646,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("unsupported private_element_size"); } - } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { + } else if (AS == AMDGPUAS::LOCAL_ADDRESS) { // Use ds_write_b128 if possible. if (Subtarget->useDS128() && Store->getAlignment() >= 16 && VT.getStoreSize() == 16) @@ -6687,7 +6686,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co unsigned AS = AtomicNode->getAddressSpace(); // No custom lowering required for local address space - if (!isFlatGlobalAddrSpace(AS, AMDGPUASI)) + if (!isFlatGlobalAddrSpace(AS)) return Op; // Non-local address space requires custom lowering for atomic compare @@ -9205,8 +9204,8 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, break; case ISD::LOAD: { const LoadSDNode *L = dyn_cast<LoadSDNode>(N); - if (L->getMemOperand()->getAddrSpace() == - Subtarget->getAMDGPUAS().PRIVATE_ADDRESS) + // FIXME: Also needs to handle flat. + if (L->getMemOperand()->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS) return true; } break; case ISD::CALLSEQ_END: diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index d456e3d9b94..b6dab35ac9a 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -370,7 +370,6 @@ private: const MachineRegisterInfo *MRI = nullptr; const MachineLoopInfo *MLI = nullptr; AMDGPU::IsaInfo::IsaVersion IV; - AMDGPUAS AMDGPUASI; DenseSet<MachineBasicBlock *> BlockVisitedSet; DenseSet<MachineInstr *> TrackedWaitcntSet; @@ -1051,7 +1050,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore( // instruction. for (const MachineMemOperand *Memop : MI.memoperands()) { unsigned AS = Memop->getAddrSpace(); - if (AS != AMDGPUASI.LOCAL_ADDRESS) + if (AS != AMDGPUAS::LOCAL_ADDRESS) continue; unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS; // VM_CNT is only relevant to vgpr or LDS. @@ -1086,7 +1085,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore( // FIXME: Should not be relying on memoperands. for (const MachineMemOperand *Memop : MI.memoperands()) { unsigned AS = Memop->getAddrSpace(); - if (AS != AMDGPUASI.LOCAL_ADDRESS) + if (AS != AMDGPUAS::LOCAL_ADDRESS) continue; unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS; EmitWaitcnt |= ScoreBrackets->updateByWait( @@ -1305,7 +1304,7 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const { for (const MachineMemOperand *Memop : MI.memoperands()) { unsigned AS = Memop->getAddrSpace(); - if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) + if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) return true; } @@ -1844,7 +1843,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis<MachineLoopInfo>(); IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits()); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - AMDGPUASI = ST->getAMDGPUAS(); ForceEmitZeroWaitcnts = ForceEmitZeroFlag; for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 59dfb8a7a23..81f631ced87 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1937,16 +1937,16 @@ unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind( switch(Kind) { case PseudoSourceValue::Stack: case PseudoSourceValue::FixedStack: - return ST.getAMDGPUAS().PRIVATE_ADDRESS; + return AMDGPUAS::PRIVATE_ADDRESS; case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: - return ST.getAMDGPUAS().CONSTANT_ADDRESS; + return AMDGPUAS::CONSTANT_ADDRESS; } - return ST.getAMDGPUAS().FLAT_ADDRESS; + return AMDGPUAS::FLAT_ADDRESS; } static void removeModOperands(MachineInstr &MI) { @@ -4605,7 +4605,7 @@ void SIInstrInfo::splitScalarBuffer(SetVectorType &Worklist, unsigned Count = 0; const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); - + switch(Opcode) { default: return; @@ -4619,7 +4619,7 @@ void SIInstrInfo::splitScalarBuffer(SetVectorType &Worklist, // FIXME: Should also attempt to build VAddr and Offset like the non-split // case (see call site for this function) - + // Create a vector of result registers SmallVector<unsigned, 8> ResultRegs; for (unsigned i = 0; i < Count ; ++i) { @@ -4913,7 +4913,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, return AMDGPU::NoRegister; assert(!MI.memoperands_empty() && - (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS); + (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); @@ -5030,7 +5030,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { return true; for (const MachineMemOperand *MMO : MI.memoperands()) { - if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS) + if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) return true; } return false; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 938cdaf1ef8..645fbfc0e84 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -202,8 +202,6 @@ public: class SIMemOpAccess final { private: - - AMDGPUAS SIAddrSpaceInfo; AMDGPUMachineModuleInfo *MMI = nullptr; /// Reports unsupported message \p Msg for \p MI to LLVM context. @@ -453,22 +451,21 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID, } SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const { - if (AS == SIAddrSpaceInfo.FLAT_ADDRESS) + if (AS == AMDGPUAS::FLAT_ADDRESS) return SIAtomicAddrSpace::FLAT; - if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS) + if (AS == AMDGPUAS::GLOBAL_ADDRESS) return SIAtomicAddrSpace::GLOBAL; - if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS) + if (AS == AMDGPUAS::LOCAL_ADDRESS) return SIAtomicAddrSpace::LDS; - if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS) + if (AS == AMDGPUAS::PRIVATE_ADDRESS) return SIAtomicAddrSpace::SCRATCH; - if (AS == SIAddrSpaceInfo.REGION_ADDRESS) + if (AS == AMDGPUAS::REGION_ADDRESS) return SIAtomicAddrSpace::GDS; return SIAtomicAddrSpace::OTHER; } SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) { - SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget()); MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); } diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 2d68908df87..4e8ecfb2c1c 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -382,8 +382,8 @@ defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">; def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast<LoadSDNode>(N); return Ld->getAlignment() >= 4 && - ((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) || - (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && + ((((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) || + (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && !Ld->isVolatile() && !N->isDivergent() && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b5025c0535e..3f361edf4e8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -978,29 +978,6 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, return true; } -} // end namespace AMDGPU - -} // end namespace llvm - -namespace llvm { -namespace AMDGPU { - -AMDGPUAS getAMDGPUAS(Triple T) { - AMDGPUAS AS; - AS.FLAT_ADDRESS = 0; - AS.PRIVATE_ADDRESS = 5; - AS.REGION_ADDRESS = 2; - return AS; -} - -AMDGPUAS getAMDGPUAS(const TargetMachine &M) { - return getAMDGPUAS(M.getTargetTriple()); -} - -AMDGPUAS getAMDGPUAS(const Module &M) { - return getAMDGPUAS(Triple(M.getTargetTriple())); -} - namespace { struct SourceOfDivergence { diff --git a/llvm/test/CodeGen/AMDGPU/alloca.ll b/llvm/test/CodeGen/AMDGPU/alloca.ll index fd42e925f3a..58d8d2f4080 100644 --- a/llvm/test/CodeGen/AMDGPU/alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/alloca.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as -data-layout=A5 < %s | llvm-dis | FileCheck %s -; RUN: llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s -; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple amdgcn-amd-amdhsa-amdgiz +; RUN: llc -mtriple amdgcn-amd-amdhsa < %s +; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple amdgcn-amd-amdhsa ; RUN: opt -data-layout=A5 -S < %s ; RUN: llvm-as -data-layout=A5 < %s | opt -S diff --git a/llvm/test/CodeGen/AMDGPU/call-return-types.ll b/llvm/test/CodeGen/AMDGPU/call-return-types.ll index 0e69a268ac8..313ce63b0d2 100644 --- a/llvm/test/CodeGen/AMDGPU/call-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-return-types.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @external_void_func_void() #0 diff --git a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll index 005cfe6f955..1545d5219b1 100644 --- a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG %s ; ; kernel void combine_vloads(global char8 addrspace(5)* src, global char8 addrspace(5)* result) { diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll index 75b21140bb6..0eb639e06e6 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-value.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck %s %struct.wombat = type { [4 x i32], [4 x i32], [4 x i32] } diff --git a/llvm/test/CodeGen/AMDGPU/debug-value2.ll b/llvm/test/CodeGen/AMDGPU/debug-value2.ll index b2ec6ed32c9..5200315ed74 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-value2.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-value2.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck %s %struct.ShapeData = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32, i64, <4 x float>, i32, i8, i8, i16, i32, i32 } diff --git a/llvm/test/CodeGen/AMDGPU/debugger-emit-prologue.ll b/llvm/test/CodeGen/AMDGPU/debugger-emit-prologue.ll index e0d17c06700..46d81e57065 100644 --- a/llvm/test/CodeGen/AMDGPU/debugger-emit-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/debugger-emit-prologue.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR target datalayout = "A5" ; CHECK: debug_wavefront_private_segment_offset_sgpr = [[SOFF:[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll b/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll index ee045c31caa..f0947e49873 100644 --- a/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll +++ b/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP target datalayout = "A5" ; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each. diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll index d0efd07497d..19a3823a3ed 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll @@ -1,6 +1,6 @@ -; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s -; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s -; RUN: not llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s 2>&1 | FileCheck %s target datalayout = "A5" ; CHECK: in function test_dynamic_stackalloc{{.*}}: unsupported dynamic alloca diff --git a/llvm/test/CodeGen/AMDGPU/env-amdgiz.ll b/llvm/test/CodeGen/AMDGPU/env-amdgiz.ll index 70e4fb30d3a..dd8bb1a8aba 100644 --- a/llvm/test/CodeGen/AMDGPU/env-amdgiz.ll +++ b/llvm/test/CodeGen/AMDGPU/env-amdgiz.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s ; Just check the target feature and data layout is accepted without error. target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -target triple = "amdgcn-amd-amdhsa-amdgiz" +target triple = "amdgcn-amd-amdhsa" define void @foo() { entry: diff --git a/llvm/test/CodeGen/AMDGPU/env-amdgizcl.ll b/llvm/test/CodeGen/AMDGPU/env-amdgizcl.ll index feb213562c8..dd8bb1a8aba 100644 --- a/llvm/test/CodeGen/AMDGPU/env-amdgizcl.ll +++ b/llvm/test/CodeGen/AMDGPU/env-amdgizcl.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s ; Just check the target feature and data layout is accepted without error. target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -target triple = "amdgcn-amd-amdhsa-amdgizcl" +target triple = "amdgcn-amd-amdhsa" define void @foo() { entry: diff --git a/llvm/test/CodeGen/AMDGPU/extload-align.ll b/llvm/test/CodeGen/AMDGPU/extload-align.ll index aa80ee4f64f..79749a0b4c4 100644 --- a/llvm/test/CodeGen/AMDGPU/extload-align.ll +++ b/llvm/test/CodeGen/AMDGPU/extload-align.ll @@ -1,4 +1,4 @@ -; RUN: llc -debug-only=machine-scheduler -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s +; RUN: llc -debug-only=machine-scheduler -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s target datalayout = "A5" ; REQUIRES: asserts diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll index 2bd69b87d5c..090a1e26745 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}extract_vector_elt_v2f16: ; GCN: s_load_dword [[VEC:s[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll index 2f13f63fa88..203c813f4fb 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}extract_vector_elt_v3f64_2: ; GCN: buffer_load_dwordx4 diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll index d5337506312..8aec6ef41ca 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,GFX89 %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s ; GCN-LABEL: {{^}}extract_vector_elt_v2i16: ; GCN: s_load_dword [[VEC:s[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll index 99019e2a83e..96c16d32015 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; How the replacement of i64 stores with v2i32 stores resulted in ; breaking other users of the bitcast if they already existed diff --git a/llvm/test/CodeGen/AMDGPU/fence-amdgiz.ll b/llvm/test/CodeGen/AMDGPU/fence-amdgiz.ll index 0dd2a9241b2..b1338c5efea 100644 --- a/llvm/test/CodeGen/AMDGPU/fence-amdgiz.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-amdgiz.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -mcpu=kaveri < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll index dad01c2a83c..8f5a06d01fa 100644 --- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s -; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=GCN %s declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-amdgiz.ll b/llvm/test/CodeGen/AMDGPU/frame-index-amdgiz.ll index 8863b94ab78..c63c0049eb0 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-amdgiz.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-amdgiz.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn---amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s ; ; The original OpenCL kernel: ; kernel void f(global int *a, int i, int j) { @@ -6,7 +6,7 @@ ; x[i] = 7; ; a[0] = x[j]; ; } -; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - +; clang -cc1 -triple amdgcn--cl -emit-llvm -o - target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll index 20208b188d7..8d30b243614 100644 --- a/llvm/test/CodeGen/AMDGPU/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; GCN-LABEL: {{^}}i1_func_void: ; GCN: buffer_load_ubyte v0, off diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll index 6eb86ae5dae..fab086e6cb1 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s target datalayout = "A5" declare void @llvm.dbg.declare(metadata, metadata, metadata) diff --git a/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll index 043ab46716b..fb84117b047 100644 --- a/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll @@ -1,6 +1,6 @@ ; RUN: not llvm-as -data-layout=A5 < %s 2>&1 | FileCheck -check-prefixes=COMMON,AS %s -; RUN: not llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s -; RUN: llvm-as < %s | not llc -mtriple amdgcn-amd-amdhsa-amdgiz 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s +; RUN: not llc -mtriple amdgcn-amd-amdhsa < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s +; RUN: llvm-as < %s | not llc -mtriple amdgcn-amd-amdhsa 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s ; RUN: not opt -data-layout=A5 -S < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s ; RUN: llvm-as < %s | not opt -data-layout=A5 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll index 61a2aca3891..8ea45def252 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}constant_load_i1: ; GCN: buffer_load_ubyte diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll index b35922bac23..3575a707bd5 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_i1: ; GCN: buffer_load_ubyte diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll index 0320debc828..e8e7fc3752e 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefixes=EG,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}local_load_i1: ; SICIVI: s_mov_b32 m0 diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll index 6c72608d3d9..02477eff677 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s ; Testing for ds_read/write_b128 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s diff --git a/llvm/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll b/llvm/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll index e158677e098..9f9a4e8783f 100644 --- a/llvm/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll +++ b/llvm/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll index f97785beab6..c082766e283 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Effectively, check that the compile finishes; in the case ; of an infinite loop, llc toggles between merging 2 ST4s diff --git a/llvm/test/CodeGen/AMDGPU/nullptr.ll b/llvm/test/CodeGen/AMDGPU/nullptr.ll index 1013349e781..4eaf9836bb9 100644 --- a/llvm/test/CodeGen/AMDGPU/nullptr.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s -;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s +;RUN: llc < %s -march=r600 -mtriple=r600-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s %struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*} diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll index 3a8b45fd912..a3f988c5acb 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s target datalayout = "A5" declare {}* @llvm.invariant.start.p5i8(i64, i8 addrspace(5)* nocapture) #0 diff --git a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll index b2d6139ca3d..273e256fffe 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=r600---amdgiz -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s ; CHECK: NoAlias: i8 addrspace(5)* %p, i8 addrspace(7)* %p1 diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir index 7fdeb229ae1..182096305f4 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s --- | %struct.widget.0 = type { float, i32, i32 } diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll index 238bcc54928..e4405900840 100644 --- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll +++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx803 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s ; This used to fail due to a v_add_i32 instruction with an illegal immediate ; operand that was created during Local Stack Slot Allocation. Test case derived diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll index 4004a6a494c..69f772cb832 100644 --- a/llvm/test/CodeGen/AMDGPU/setcc.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll index 70e11d204ad..40d4aa89763 100644 --- a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; FIXME: i16 promotion pass ruins the scalar cases when legal. ; FIXME: r600 fails verifier diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll index cc1b0f00b16..8997e6bc741 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir index 7513e8f02d4..eff6e563951 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s # Check that assert is not triggered # GCN-LABEL: name: si-lower-control-flow{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 1c0076c9cd9..fb1f5ef5b8a 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,MESA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI,MESA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,MESA %s target datalayout = "A5" ; FIXME: Why is this commuted only sometimes? diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 47e7d4c9cc2..24117e2c836 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -1,6 +1,6 @@ -; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s -; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s -; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos ; GCN-POSTLINK: tail call fast float @_Z3sinf( diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll index 72d2f737ae4..a4ac760270c 100644 --- a/llvm/test/CodeGen/AMDGPU/sra.ll +++ b/llvm/test/CodeGen/AMDGPU/sra.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 diff --git a/llvm/test/CodeGen/AMDGPU/store-global.ll b/llvm/test/CodeGen/AMDGPU/store-global.ll index 8f8df884502..5040ab3bc29 100644 --- a/llvm/test/CodeGen/AMDGPU/store-global.ll +++ b/llvm/test/CodeGen/AMDGPU/store-global.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}store_i1: ; EG: MEM_RAT MSKOR diff --git a/llvm/test/CodeGen/AMDGPU/store-local.ll b/llvm/test/CodeGen/AMDGPU/store-local.ll index 96d5e06a9e9..f0e76ac5951 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s ; FUNC-LABEL: {{^}}store_local_i1: ; SICIVI: s_mov_b32 m0 diff --git a/llvm/test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll b/llvm/test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll index d280be5eba4..3911909e345 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600-- -mcpu=redwood | FileCheck %s ; This tests for a bug in the SelectionDAG where custom lowered truncated ; vector stores at the end of a basic block were not being added to the diff --git a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll index 6dfcff77d81..1a0369be98c 100644 --- a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll +++ b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s target datalayout = "A5" ; Should not crash when the processor is not recognized and the diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll index 434ba0327e7..2b6e15b79a4 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll @@ -1,5 +1,5 @@ -; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -tailcallopt < %s 2>&1 | FileCheck -check-prefix=GCN %s -; RUN: not llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -tailcallopt < %s 2>&1 | FileCheck -check-prefix=R600 %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn-- -tailcallopt < %s 2>&1 | FileCheck -check-prefix=GCN %s +; RUN: not llc -march=r600 -mtriple=r600-- -mcpu=cypress -tailcallopt < %s 2>&1 | FileCheck -check-prefix=R600 %s declare i32 @external_function(i32) nounwind diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll index fbdfe48cd38..3b6df750ff0 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s -; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s target datalayout = "A5" ; OPT-LABEL: @vector_read( diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll index a0242ec958b..9cdc333cbc0 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s -; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa-amdgiz -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s -; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s ; This ends up using all 256 registers and requires register ; scavenging which will fail to find an unsued register. diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll index 7cfde82de6c..c743d6a48ae 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; This ends up using all 255 registers and requires register ; scavenging which will fail to find an unsued register. diff --git a/llvm/test/tools/llvm-objdump/AMDGPU/source-lines.ll b/llvm/test/tools/llvm-objdump/AMDGPU/source-lines.ll index 464606d6975..748f04754e4 100644 --- a/llvm/test/tools/llvm-objdump/AMDGPU/source-lines.ll +++ b/llvm/test/tools/llvm-objdump/AMDGPU/source-lines.ll @@ -1,7 +1,7 @@ ; RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %s > %t.ll -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -filetype=obj -O0 -o %t.o %t.ll -; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -disassemble -line-numbers %t.o | FileCheck --check-prefix=LINE %t.ll -; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx802 -disassemble -source %t.o | FileCheck --check-prefix=SOURCE %t.ll +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -O0 -o %t.o %t.ll +; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa -mcpu=gfx802 -disassemble -line-numbers %t.o | FileCheck --check-prefix=LINE %t.ll +; RUN: llvm-objdump -triple=amdgcn-amd-amdhsa -mcpu=gfx802 -disassemble -source %t.o | FileCheck --check-prefix=SOURCE %t.ll ; Prologue. ; LINE: source_lines_test: @@ -38,7 +38,7 @@ ; ModuleID = 'source-lines.cl' source_filename = "source-lines.cl" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -target triple = "amdgcn-amd-amdhsa-amdgiz" +target triple = "amdgcn-amd-amdhsa" ; Function Attrs: noinline nounwind define amdgpu_kernel void @source_lines_test(i32 addrspace(1)* %Out) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 { |