diff options
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 169 |
1 files changed, 158 insertions, 11 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 8935aa172f3..ec488488179 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -545,6 +545,11 @@ private: /// @param The kernel to generate the intrinsic functions for. void insertKernelIntrinsics(ppcg_kernel *Kernel); + /// Insert function calls to retrieve the SPIR group/local ids. + /// + /// @param The kernel to generate the function calls for. + void insertKernelCallsSPIR(ppcg_kernel *Kernel); + /// Setup the creation of functions referenced by the GPU kernel. /// /// 1. Create new function declarations in GPUModule which are the same as @@ -1254,10 +1259,24 @@ void GPUNodeBuilder::createScopStmt(isl_ast_expr *Expr, void GPUNodeBuilder::createKernelSync() { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + const char *SpirName = "__gen_ocl_barrier_global"; Function *Sync; switch (Arch) { + case GPUArch::SPIR64: + case GPUArch::SPIR32: + Sync = M->getFunction(SpirName); + + // If Sync is not available, declare it. + if (!Sync) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector<Type *> Args; + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); + Sync = Function::Create(Ty, Linkage, SpirName, M); + Sync->setCallingConv(CallingConv::SPIR_FUNC); + } + break; case GPUArch::NVPTX64: Sync = Intrinsic::getDeclaration(M, Intrinsic::nvvm_barrier0); break; @@ -1668,7 +1687,8 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { finalizeKernelArguments(Kernel); Function *F = Builder.GetInsertBlock()->getParent(); - addCUDAAnnotations(F->getParent(), BlockDimX, BlockDimY, BlockDimZ); + if (Arch == GPUArch::NVPTX64) + addCUDAAnnotations(F->getParent(), BlockDimX, BlockDimY, BlockDimZ); clearDominators(F); clearScalarEvolution(F); clearLoops(F); @@ -1725,12 +1745,35 @@ static std::string computeNVPTXDataLayout(bool is64Bit) { return Ret; } +/// Compute the DataLayout string for a SPIR kernel. +/// +/// @param is64Bit Are we looking for a 64 bit architecture? +static std::string computeSPIRDataLayout(bool is64Bit) { + std::string Ret = ""; + + if (!is64Bit) { + Ret += "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:" + "64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:" + "32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:" + "256:256-v256:256:256-v512:512:512-v1024:1024:1024"; + } else { + Ret += "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:" + "64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:" + "32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:" + "256:256-v256:256:256-v512:512:512-v1024:1024:1024"; + } + + return Ret; +} + Function * GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, SetVector<Value *> &SubtreeValues) { std::vector<Type *> Args; std::string Identifier = getKernelFuncName(Kernel->id); + std::vector<Metadata *> MemoryType; + for (long i = 0; i < Prog->n_array; i++) { if (!ppcg_kernel_requires_array_argument(Kernel, i)) continue; @@ -1739,16 +1782,23 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set); const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id); Args.push_back(SAI->getElementType()); + MemoryType.push_back( + ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0))); } else { static const int UseGlobalMemory = 1; Args.push_back(Builder.getInt8PtrTy(UseGlobalMemory)); + MemoryType.push_back( + ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 1))); } } int NumHostIters = isl_space_dim(Kernel->space, isl_dim_set); - for (long i = 0; i < NumHostIters; i++) + for (long i = 0; i < NumHostIters; i++) { Args.push_back(Builder.getInt64Ty()); + MemoryType.push_back( + ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0))); + } int NumVars = isl_space_dim(Kernel->space, isl_dim_param); @@ -1757,19 +1807,49 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, Value *Val = IDToValue[Id]; isl_id_free(Id); Args.push_back(Val->getType()); + MemoryType.push_back( + ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0))); } - for (auto *V : SubtreeValues) + for (auto *V : SubtreeValues) { Args.push_back(V->getType()); + MemoryType.push_back( + ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0))); + } auto *FT = FunctionType::get(Builder.getVoidTy(), Args, false); auto *FN = Function::Create(FT, Function::ExternalLinkage, Identifier, GPUModule.get()); + std::vector<Metadata *> EmptyStrings; + + for (unsigned int i = 0; i < MemoryType.size(); i++) { + EmptyStrings.push_back(MDString::get(FN->getContext(), "")); + } + + if (Arch == GPUArch::SPIR32 || Arch == GPUArch::SPIR64) { + FN->setMetadata("kernel_arg_addr_space", + MDNode::get(FN->getContext(), MemoryType)); + FN->setMetadata("kernel_arg_name", + MDNode::get(FN->getContext(), EmptyStrings)); + FN->setMetadata("kernel_arg_access_qual", + MDNode::get(FN->getContext(), EmptyStrings)); + FN->setMetadata("kernel_arg_type", + MDNode::get(FN->getContext(), EmptyStrings)); + FN->setMetadata("kernel_arg_type_qual", + MDNode::get(FN->getContext(), EmptyStrings)); + FN->setMetadata("kernel_arg_base_type", + MDNode::get(FN->getContext(), EmptyStrings)); + } + switch (Arch) { case GPUArch::NVPTX64: FN->setCallingConv(CallingConv::PTX_Kernel); break; + case GPUArch::SPIR32: + case GPUArch::SPIR64: + FN->setCallingConv(CallingConv::SPIR_KERNEL); + break; } auto Arg = FN->arg_begin(); @@ -1835,6 +1915,9 @@ void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) { Intrinsic::ID IntrinsicsTID[3]; switch (Arch) { + case GPUArch::SPIR64: + case GPUArch::SPIR32: + llvm_unreachable("Cannot generate NVVM intrinsics for SPIR"); case GPUArch::NVPTX64: IntrinsicsBID[0] = Intrinsic::nvvm_read_ptx_sreg_ctaid_x; IntrinsicsBID[1] = Intrinsic::nvvm_read_ptx_sreg_ctaid_y; @@ -1866,6 +1949,41 @@ void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) { } } +void GPUNodeBuilder::insertKernelCallsSPIR(ppcg_kernel *Kernel) { + const char *GroupName[3] = {"__gen_ocl_get_group_id0", + "__gen_ocl_get_group_id1", + "__gen_ocl_get_group_id2"}; + + const char *LocalName[3] = {"__gen_ocl_get_local_id0", + "__gen_ocl_get_local_id1", + "__gen_ocl_get_local_id2"}; + + auto createFunc = [this](const char *Name, __isl_take isl_id *Id) mutable { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *FN = M->getFunction(Name); + + // If FN is not available, declare it. + if (!FN) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector<Type *> Args; + FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Args, false); + FN = Function::Create(Ty, Linkage, Name, M); + FN->setCallingConv(CallingConv::SPIR_FUNC); + } + + Value *Val = Builder.CreateCall(FN, {}); + Val = Builder.CreateIntCast(Val, Builder.getInt64Ty(), false, Name); + IDToValue[Id] = Val; + KernelIDs.insert(std::unique_ptr<isl_id, IslIdDeleter>(Id)); + }; + + for (int i = 0; i < Kernel->n_grid; ++i) + createFunc(GroupName[i], isl_id_list_get_id(Kernel->block_ids, i)); + + for (int i = 0; i < Kernel->n_block; ++i) + createFunc(LocalName[i], isl_id_list_get_id(Kernel->thread_ids, i)); +} + void GPUNodeBuilder::prepareKernelArguments(ppcg_kernel *Kernel, Function *FN) { auto Arg = FN->arg_begin(); for (long i = 0; i < Kernel->n_array; i++) { @@ -2004,6 +2122,14 @@ void GPUNodeBuilder::createKernelFunction( GPUModule->setTargetTriple(Triple::normalize("nvptx64-nvidia-nvcl")); GPUModule->setDataLayout(computeNVPTXDataLayout(true /* is64Bit */)); break; + case GPUArch::SPIR32: + GPUModule->setTargetTriple(Triple::normalize("spir-unknown-unknown")); + GPUModule->setDataLayout(computeSPIRDataLayout(false /* is64Bit */)); + break; + case GPUArch::SPIR64: + GPUModule->setTargetTriple(Triple::normalize("spir64-unknown-unknown")); + GPUModule->setDataLayout(computeSPIRDataLayout(true /* is64Bit */)); + break; } Function *FN = createKernelFunctionDecl(Kernel, SubtreeValues); @@ -2021,7 +2147,16 @@ void GPUNodeBuilder::createKernelFunction( prepareKernelArguments(Kernel, FN); createKernelVariables(Kernel, FN); - insertKernelIntrinsics(Kernel); + + switch (Arch) { + case GPUArch::NVPTX64: + insertKernelIntrinsics(Kernel); + break; + case GPUArch::SPIR32: + case GPUArch::SPIR64: + insertKernelCallsSPIR(Kernel); + break; + } } std::string GPUNodeBuilder::createKernelASM() { @@ -2038,6 +2173,13 @@ std::string GPUNodeBuilder::createKernelASM() { break; } break; + case GPUArch::SPIR64: + case GPUArch::SPIR32: + std::string SPIRAssembly; + raw_string_ostream IROstream(SPIRAssembly); + IROstream << *GPUModule; + IROstream.flush(); + return SPIRAssembly; } std::string ErrMsg; @@ -2057,6 +2199,9 @@ std::string GPUNodeBuilder::createKernelASM() { case GPUArch::NVPTX64: subtarget = CudaVersion; break; + case GPUArch::SPIR32: + case GPUArch::SPIR64: + llvm_unreachable("No subtarget for SPIR architecture"); } std::unique_ptr<TargetMachine> TargetM(GPUTarget->createTargetMachine( @@ -2097,13 +2242,15 @@ std::string GPUNodeBuilder::finalizeKernelFunction() { if (DumpKernelIR) outs() << *GPUModule << "\n"; - // Optimize module. - llvm::legacy::PassManager OptPasses; - PassManagerBuilder PassBuilder; - PassBuilder.OptLevel = 3; - PassBuilder.SizeLevel = 0; - PassBuilder.populateModulePassManager(OptPasses); - OptPasses.run(*GPUModule); + if (Arch != GPUArch::SPIR32 && Arch != GPUArch::SPIR64) { + // Optimize module. + llvm::legacy::PassManager OptPasses; + PassManagerBuilder PassBuilder; + PassBuilder.OptLevel = 3; + PassBuilder.SizeLevel = 0; + PassBuilder.populateModulePassManager(OptPasses); + OptPasses.run(*GPUModule); + } std::string Assembly = createKernelASM(); |