summaryrefslogtreecommitdiffstats
path: root/polly/lib/CodeGen/PPCGCodeGeneration.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp169
1 files changed, 158 insertions, 11 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 8935aa172f3..ec488488179 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -545,6 +545,11 @@ private:
/// @param The kernel to generate the intrinsic functions for.
void insertKernelIntrinsics(ppcg_kernel *Kernel);
+ /// Insert function calls to retrieve the SPIR group/local ids.
+ ///
+ /// @param The kernel to generate the function calls for.
+ void insertKernelCallsSPIR(ppcg_kernel *Kernel);
+
/// Setup the creation of functions referenced by the GPU kernel.
///
/// 1. Create new function declarations in GPUModule which are the same as
@@ -1254,10 +1259,24 @@ void GPUNodeBuilder::createScopStmt(isl_ast_expr *Expr,
void GPUNodeBuilder::createKernelSync() {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ const char *SpirName = "__gen_ocl_barrier_global";
Function *Sync;
switch (Arch) {
+ case GPUArch::SPIR64:
+ case GPUArch::SPIR32:
+ Sync = M->getFunction(SpirName);
+
+ // If Sync is not available, declare it.
+ if (!Sync) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+ Sync = Function::Create(Ty, Linkage, SpirName, M);
+ Sync->setCallingConv(CallingConv::SPIR_FUNC);
+ }
+ break;
case GPUArch::NVPTX64:
Sync = Intrinsic::getDeclaration(M, Intrinsic::nvvm_barrier0);
break;
@@ -1668,7 +1687,8 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
finalizeKernelArguments(Kernel);
Function *F = Builder.GetInsertBlock()->getParent();
- addCUDAAnnotations(F->getParent(), BlockDimX, BlockDimY, BlockDimZ);
+ if (Arch == GPUArch::NVPTX64)
+ addCUDAAnnotations(F->getParent(), BlockDimX, BlockDimY, BlockDimZ);
clearDominators(F);
clearScalarEvolution(F);
clearLoops(F);
@@ -1725,12 +1745,35 @@ static std::string computeNVPTXDataLayout(bool is64Bit) {
return Ret;
}
+/// Compute the DataLayout string for a SPIR kernel.
+///
+/// @param is64Bit Are we looking for a 64 bit architecture?
+static std::string computeSPIRDataLayout(bool is64Bit) {
+ std::string Ret = "";
+
+ if (!is64Bit) {
+ Ret += "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:"
+ "64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:"
+ "32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:"
+ "256:256-v256:256:256-v512:512:512-v1024:1024:1024";
+ } else {
+ Ret += "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:"
+ "64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:"
+ "32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:"
+ "256:256-v256:256:256-v512:512:512-v1024:1024:1024";
+ }
+
+ return Ret;
+}
+
Function *
GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
SetVector<Value *> &SubtreeValues) {
std::vector<Type *> Args;
std::string Identifier = getKernelFuncName(Kernel->id);
+ std::vector<Metadata *> MemoryType;
+
for (long i = 0; i < Prog->n_array; i++) {
if (!ppcg_kernel_requires_array_argument(Kernel, i))
continue;
@@ -1739,16 +1782,23 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id);
Args.push_back(SAI->getElementType());
+ MemoryType.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0)));
} else {
static const int UseGlobalMemory = 1;
Args.push_back(Builder.getInt8PtrTy(UseGlobalMemory));
+ MemoryType.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 1)));
}
}
int NumHostIters = isl_space_dim(Kernel->space, isl_dim_set);
- for (long i = 0; i < NumHostIters; i++)
+ for (long i = 0; i < NumHostIters; i++) {
Args.push_back(Builder.getInt64Ty());
+ MemoryType.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0)));
+ }
int NumVars = isl_space_dim(Kernel->space, isl_dim_param);
@@ -1757,19 +1807,49 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
Value *Val = IDToValue[Id];
isl_id_free(Id);
Args.push_back(Val->getType());
+ MemoryType.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0)));
}
- for (auto *V : SubtreeValues)
+ for (auto *V : SubtreeValues) {
Args.push_back(V->getType());
+ MemoryType.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), 0)));
+ }
auto *FT = FunctionType::get(Builder.getVoidTy(), Args, false);
auto *FN = Function::Create(FT, Function::ExternalLinkage, Identifier,
GPUModule.get());
+ std::vector<Metadata *> EmptyStrings;
+
+ for (unsigned int i = 0; i < MemoryType.size(); i++) {
+ EmptyStrings.push_back(MDString::get(FN->getContext(), ""));
+ }
+
+ if (Arch == GPUArch::SPIR32 || Arch == GPUArch::SPIR64) {
+ FN->setMetadata("kernel_arg_addr_space",
+ MDNode::get(FN->getContext(), MemoryType));
+ FN->setMetadata("kernel_arg_name",
+ MDNode::get(FN->getContext(), EmptyStrings));
+ FN->setMetadata("kernel_arg_access_qual",
+ MDNode::get(FN->getContext(), EmptyStrings));
+ FN->setMetadata("kernel_arg_type",
+ MDNode::get(FN->getContext(), EmptyStrings));
+ FN->setMetadata("kernel_arg_type_qual",
+ MDNode::get(FN->getContext(), EmptyStrings));
+ FN->setMetadata("kernel_arg_base_type",
+ MDNode::get(FN->getContext(), EmptyStrings));
+ }
+
switch (Arch) {
case GPUArch::NVPTX64:
FN->setCallingConv(CallingConv::PTX_Kernel);
break;
+ case GPUArch::SPIR32:
+ case GPUArch::SPIR64:
+ FN->setCallingConv(CallingConv::SPIR_KERNEL);
+ break;
}
auto Arg = FN->arg_begin();
@@ -1835,6 +1915,9 @@ void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) {
Intrinsic::ID IntrinsicsTID[3];
switch (Arch) {
+ case GPUArch::SPIR64:
+ case GPUArch::SPIR32:
+ llvm_unreachable("Cannot generate NVVM intrinsics for SPIR");
case GPUArch::NVPTX64:
IntrinsicsBID[0] = Intrinsic::nvvm_read_ptx_sreg_ctaid_x;
IntrinsicsBID[1] = Intrinsic::nvvm_read_ptx_sreg_ctaid_y;
@@ -1866,6 +1949,41 @@ void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) {
}
}
+void GPUNodeBuilder::insertKernelCallsSPIR(ppcg_kernel *Kernel) {
+ const char *GroupName[3] = {"__gen_ocl_get_group_id0",
+ "__gen_ocl_get_group_id1",
+ "__gen_ocl_get_group_id2"};
+
+ const char *LocalName[3] = {"__gen_ocl_get_local_id0",
+ "__gen_ocl_get_local_id1",
+ "__gen_ocl_get_local_id2"};
+
+ auto createFunc = [this](const char *Name, __isl_take isl_id *Id) mutable {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *FN = M->getFunction(Name);
+
+ // If FN is not available, declare it.
+ if (!FN) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Args, false);
+ FN = Function::Create(Ty, Linkage, Name, M);
+ FN->setCallingConv(CallingConv::SPIR_FUNC);
+ }
+
+ Value *Val = Builder.CreateCall(FN, {});
+ Val = Builder.CreateIntCast(Val, Builder.getInt64Ty(), false, Name);
+ IDToValue[Id] = Val;
+ KernelIDs.insert(std::unique_ptr<isl_id, IslIdDeleter>(Id));
+ };
+
+ for (int i = 0; i < Kernel->n_grid; ++i)
+ createFunc(GroupName[i], isl_id_list_get_id(Kernel->block_ids, i));
+
+ for (int i = 0; i < Kernel->n_block; ++i)
+ createFunc(LocalName[i], isl_id_list_get_id(Kernel->thread_ids, i));
+}
+
void GPUNodeBuilder::prepareKernelArguments(ppcg_kernel *Kernel, Function *FN) {
auto Arg = FN->arg_begin();
for (long i = 0; i < Kernel->n_array; i++) {
@@ -2004,6 +2122,14 @@ void GPUNodeBuilder::createKernelFunction(
GPUModule->setTargetTriple(Triple::normalize("nvptx64-nvidia-nvcl"));
GPUModule->setDataLayout(computeNVPTXDataLayout(true /* is64Bit */));
break;
+ case GPUArch::SPIR32:
+ GPUModule->setTargetTriple(Triple::normalize("spir-unknown-unknown"));
+ GPUModule->setDataLayout(computeSPIRDataLayout(false /* is64Bit */));
+ break;
+ case GPUArch::SPIR64:
+ GPUModule->setTargetTriple(Triple::normalize("spir64-unknown-unknown"));
+ GPUModule->setDataLayout(computeSPIRDataLayout(true /* is64Bit */));
+ break;
}
Function *FN = createKernelFunctionDecl(Kernel, SubtreeValues);
@@ -2021,7 +2147,16 @@ void GPUNodeBuilder::createKernelFunction(
prepareKernelArguments(Kernel, FN);
createKernelVariables(Kernel, FN);
- insertKernelIntrinsics(Kernel);
+
+ switch (Arch) {
+ case GPUArch::NVPTX64:
+ insertKernelIntrinsics(Kernel);
+ break;
+ case GPUArch::SPIR32:
+ case GPUArch::SPIR64:
+ insertKernelCallsSPIR(Kernel);
+ break;
+ }
}
std::string GPUNodeBuilder::createKernelASM() {
@@ -2038,6 +2173,13 @@ std::string GPUNodeBuilder::createKernelASM() {
break;
}
break;
+ case GPUArch::SPIR64:
+ case GPUArch::SPIR32:
+ std::string SPIRAssembly;
+ raw_string_ostream IROstream(SPIRAssembly);
+ IROstream << *GPUModule;
+ IROstream.flush();
+ return SPIRAssembly;
}
std::string ErrMsg;
@@ -2057,6 +2199,9 @@ std::string GPUNodeBuilder::createKernelASM() {
case GPUArch::NVPTX64:
subtarget = CudaVersion;
break;
+ case GPUArch::SPIR32:
+ case GPUArch::SPIR64:
+ llvm_unreachable("No subtarget for SPIR architecture");
}
std::unique_ptr<TargetMachine> TargetM(GPUTarget->createTargetMachine(
@@ -2097,13 +2242,15 @@ std::string GPUNodeBuilder::finalizeKernelFunction() {
if (DumpKernelIR)
outs() << *GPUModule << "\n";
- // Optimize module.
- llvm::legacy::PassManager OptPasses;
- PassManagerBuilder PassBuilder;
- PassBuilder.OptLevel = 3;
- PassBuilder.SizeLevel = 0;
- PassBuilder.populateModulePassManager(OptPasses);
- OptPasses.run(*GPUModule);
+ if (Arch != GPUArch::SPIR32 && Arch != GPUArch::SPIR64) {
+ // Optimize module.
+ llvm::legacy::PassManager OptPasses;
+ PassManagerBuilder PassBuilder;
+ PassBuilder.OptLevel = 3;
+ PassBuilder.SizeLevel = 0;
+ PassBuilder.populateModulePassManager(OptPasses);
+ OptPasses.run(*GPUModule);
+ }
std::string Assembly = createKernelASM();
OpenPOWER on IntegriCloud