summaryrefslogtreecommitdiffstats
path: root/polly/lib/CodeGen/PPCGCodeGeneration.cpp
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2016-07-27 13:20:16 +0000
committerTobias Grosser <tobias@grosser.es>2016-07-27 13:20:16 +0000
commit79a947c2336c9cf12a588844d39422c9aa0a5658 (patch)
tree0cb6a29681fa7ac56c6258803047171cdbaa02c4 /polly/lib/CodeGen/PPCGCodeGeneration.cpp
parent499375ceaaa1f7071dd6a9d90f6a3e6fd237d70e (diff)
downloadbcm5719-llvm-79a947c2336c9cf12a588844d39422c9aa0a5658.tar.gz
bcm5719-llvm-79a947c2336c9cf12a588844d39422c9aa0a5658.zip
GPGPU: Add basic support for kernel launches
llvm-svn: 276863
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp171
1 files changed, 171 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index a5d55ea7ac5..d0ae82aec3c 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -204,6 +204,29 @@ private:
/// @returns A set of values referenced by the kernel.
SetVector<Value *> getReferencesInKernel(ppcg_kernel *Kernel);
+ /// Compute the sizes of the execution grid for a given kernel.
+ ///
+ /// @param Kernel The kernel to compute grid sizes for.
+ ///
+ /// @returns A tuple with grid sizes for X and Y dimension
+ std::tuple<Value *, Value *> getGridSizes(ppcg_kernel *Kernel);
+
+ /// Compute the sizes of the thread blocks for a given kernel.
+ ///
+ /// @param Kernel The kernel to compute thread block sizes for.
+ ///
+ /// @returns A tuple with thread block sizes for X, Y, and Z dimensions.
+ std::tuple<Value *, Value *, Value *> getBlockSizes(ppcg_kernel *Kernel);
+
+ /// Create kernel launch parameters.
+ ///
+ /// @param Kernel The kernel to create parameters for.
+ /// @param F The kernel function that has been created.
+ ///
+ /// @returns A stack allocated array with pointers to the parameter
+ /// values that are passed to the kernel.
+ Value *createLaunchParameters(ppcg_kernel *Kernel, Function *F);
+
/// Create GPU kernel.
///
/// Code generate the kernel described by @p KernelStmt.
@@ -296,6 +319,13 @@ private:
/// @returns A pointer to the newly initialized context.
Value *createCallInitContext();
+ /// Create a call to get the device pointer for a kernel allocation.
+ ///
+ /// @param Allocation The Polly GPU allocation
+ ///
+ /// @returns The device parameter corresponding to this allocation.
+ Value *createCallGetDevicePtr(Value *Allocation);
+
/// Create a call to free the GPU context.
///
/// @param Context A pointer to an initialized GPU context.
@@ -339,6 +369,21 @@ private:
///
/// @param GPUKernel THe kernel to free.
void createCallFreeKernel(Value *GPUKernel);
+
+ /// Create a call to launch a GPU kernel.
+ ///
+ /// @param GPUKernel The kernel to launch.
+ /// @param GridDimX The size of the first grid dimension.
+ /// @param GridDimY The size of the second grid dimension.
+ /// @param GridBlockX The size of the first block dimension.
+ /// @param GridBlockY The size of the second block dimension.
+ /// @param GridBlockZ The size of the third block dimension.
+ /// @param Paramters A pointer to an array that contains itself pointers to
+ /// the parameter values passed for each kernel argument.
+ void createCallLaunchKernel(Value *GPUKernel, Value *GridDimX,
+ Value *GridDimY, Value *BlockDimX,
+ Value *BlockDimY, Value *BlockDimZ,
+ Value *Parameters);
};
void GPUNodeBuilder::initializeAfterRTH() {
@@ -393,6 +438,50 @@ Value *GPUNodeBuilder::createCallGetKernel(Value *Buffer, Value *Entry) {
return Builder.CreateCall(F, {Buffer, Entry});
}
+Value *GPUNodeBuilder::createCallGetDevicePtr(Value *Allocation) {
+ const char *Name = "polly_getDevicePtr";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ Args.push_back(Builder.getInt8PtrTy());
+ FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ return Builder.CreateCall(F, {Allocation});
+}
+
+void GPUNodeBuilder::createCallLaunchKernel(Value *GPUKernel, Value *GridDimX,
+ Value *GridDimY, Value *BlockDimX,
+ Value *BlockDimY, Value *BlockDimZ,
+ Value *Parameters) {
+ const char *Name = "polly_launchKernel";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ Args.push_back(Builder.getInt8PtrTy());
+ Args.push_back(Builder.getInt32Ty());
+ Args.push_back(Builder.getInt32Ty());
+ Args.push_back(Builder.getInt32Ty());
+ Args.push_back(Builder.getInt32Ty());
+ Args.push_back(Builder.getInt32Ty());
+ Args.push_back(Builder.getInt8PtrTy());
+ FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ Builder.CreateCall(F, {GPUKernel, GridDimX, GridDimY, BlockDimX, BlockDimY,
+ BlockDimZ, Parameters});
+}
+
void GPUNodeBuilder::createCallFreeKernel(Value *GPUKernel) {
const char *Name = "polly_freeKernel";
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -755,6 +844,77 @@ void GPUNodeBuilder::clearLoops(Function *F) {
}
}
+std::tuple<Value *, Value *> GPUNodeBuilder::getGridSizes(ppcg_kernel *Kernel) {
+ std::vector<Value *> Sizes;
+ isl_ast_build *Context = isl_ast_build_from_context(S.getContext());
+
+ for (long i = 0; i < Kernel->n_grid; i++) {
+ isl_pw_aff *Size = isl_multi_pw_aff_get_pw_aff(Kernel->grid_size, i);
+ isl_ast_expr *GridSize = isl_ast_build_expr_from_pw_aff(Context, Size);
+ Value *Res = ExprBuilder.create(GridSize);
+ Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
+ Sizes.push_back(Res);
+ }
+ isl_ast_build_free(Context);
+
+ for (long i = Kernel->n_grid; i < 3; i++)
+ Sizes.push_back(ConstantInt::get(Builder.getInt32Ty(), 1));
+
+ return std::make_tuple(Sizes[0], Sizes[1]);
+}
+
+std::tuple<Value *, Value *, Value *>
+GPUNodeBuilder::getBlockSizes(ppcg_kernel *Kernel) {
+ std::vector<Value *> Sizes;
+
+ for (long i = 0; i < Kernel->n_block; i++) {
+ Value *Res = ConstantInt::get(Builder.getInt32Ty(), Kernel->block_dim[i]);
+ Sizes.push_back(Res);
+ }
+
+ for (long i = Kernel->n_block; i < 3; i++)
+ Sizes.push_back(ConstantInt::get(Builder.getInt32Ty(), 1));
+
+ return std::make_tuple(Sizes[0], Sizes[1], Sizes[2]);
+}
+
+Value *GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel,
+ Function *F) {
+ Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), F->getNumOperands());
+
+ BasicBlock *EntryBlock =
+ &Builder.GetInsertBlock()->getParent()->getEntryBlock();
+ std::string Launch = "polly_launch_" + std::to_string(Kernel->id);
+ Instruction *Parameters =
+ new AllocaInst(ArrayTy, Launch + "_params", EntryBlock->getTerminator());
+
+ int Index = 0;
+ for (long i = 0; i < Prog->n_array; i++) {
+ if (!ppcg_kernel_requires_array_argument(Kernel, i))
+ continue;
+
+ isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
+ const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id);
+
+ Value *DevArray = DeviceAllocations[(ScopArrayInfo *)SAI];
+ DevArray = createCallGetDevicePtr(DevArray);
+ Instruction *Param = new AllocaInst(
+ Builder.getInt8PtrTy(), Launch + "_param_" + std::to_string(Index),
+ EntryBlock->getTerminator());
+ Builder.CreateStore(DevArray, Param);
+ Value *Slot = Builder.CreateGEP(Parameters,
+ {Builder.getInt64(0), Builder.getInt64(i)});
+ Value *ParamTyped =
+ Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
+ Builder.CreateStore(ParamTyped, Slot);
+ Index++;
+ }
+
+ auto Location = EntryBlock->getTerminator();
+ return new BitCastInst(Parameters, Builder.getInt8PtrTy(),
+ Launch + "_params_i8ptr", Location);
+}
+
void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
isl_id *Id = isl_ast_node_get_annotation(KernelStmt);
ppcg_kernel *Kernel = (ppcg_kernel *)isl_id_get_user(Id);
@@ -805,11 +965,22 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
S.invalidateScopArrayInfo(BasePtr, ScopArrayInfo::MK_Array);
LocalArrays.clear();
+ Value *Parameters = createLaunchParameters(Kernel, F);
+
std::string ASMString = finalizeKernelFunction();
std::string Name = "kernel_" + std::to_string(Kernel->id);
Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name);
Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name");
Value *GPUKernel = createCallGetKernel(KernelString, NameString);
+
+ Value *GridDimX, *GridDimY;
+ std::tie(GridDimX, GridDimY) = getGridSizes(Kernel);
+
+ Value *BlockDimX, *BlockDimY, *BlockDimZ;
+ std::tie(BlockDimX, BlockDimY, BlockDimZ) = getBlockSizes(Kernel);
+
+ createCallLaunchKernel(GPUKernel, GridDimX, GridDimY, BlockDimX, BlockDimY,
+ BlockDimZ, Parameters);
createCallFreeKernel(GPUKernel);
}
OpenPOWER on IntegriCloud