summaryrefslogtreecommitdiffstats
path: root/polly/lib/CodeGen/PPCGCodeGeneration.cpp
diff options
context:
space:
mode:
authorSiddharth Bhat <siddu.druid@gmail.com>2017-05-09 10:45:52 +0000
committerSiddharth Bhat <siddu.druid@gmail.com>2017-05-09 10:45:52 +0000
commita90be207c60cf3bcbcdf452f4443ba0b73d868ae (patch)
treeade3e615fc4891e9b72b19c9bad7eba8fa71e31e /polly/lib/CodeGen/PPCGCodeGeneration.cpp
parentcf9daa33a7870c235e0edc176dd40579f376cafc (diff)
downloadbcm5719-llvm-a90be207c60cf3bcbcdf452f4443ba0b73d868ae.tar.gz
bcm5719-llvm-a90be207c60cf3bcbcdf452f4443ba0b73d868ae.zip
[Polly][PPCGCodeGen] OpenCL now gets kernel argument size from PPCG CodeGen
Summary: PPCGCodeGeneration now attaches the size of the kernel launch parameters at the end of the parameter list. For the existing CUDA Runtime, this gets ignored, but the OpenCL Runtime knows to check for kernel-argument size at the end of the parameter list. (The resulting parameters list is twice as long. This has been accounted for in the corresponding test cases). Reviewers: grosser, Meinersbur, bollu Reviewed By: bollu Subscribers: nemanjai, yaxunl, Anastasia, pollydev, llvm-commits Tags: #polly Differential Revision: https://reviews.llvm.org/D32961 llvm-svn: 302515
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp71
1 files changed, 54 insertions, 17 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 45e570c90b5..4b09faabac5 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -142,6 +142,14 @@ static __isl_give isl_id_to_ast_expr *pollyBuildAstExprForStmt(
return RefToExpr;
}
+/// Given a LLVM Type, compute its size in bytes,
+static int computeSizeInBytes(const Type *T) {
+ int bytes = T->getPrimitiveSizeInBits() / 8;
+ if (bytes == 0)
+ bytes = T->getScalarSizeInBits() / 8;
+ return bytes;
+}
+
/// Generate code for a GPU specific isl AST.
///
/// The GPUNodeBuilder augments the general existing IslNodeBuilder, which
@@ -272,6 +280,16 @@ private:
/// @returns A tuple with thread block sizes for X, Y, and Z dimensions.
std::tuple<Value *, Value *, Value *> getBlockSizes(ppcg_kernel *Kernel);
+ /// Store a specific kernel launch parameter in the array of kernel launch
+ /// parameters.
+ ///
+ /// @param Parameters The list of parameters in which to store.
+ /// @param Param The kernel launch parameter to store.
+ /// @param Index The index in the parameter list, at which to store the
+ /// parameter.
+ void insertStoreParameter(Instruction *Parameters, Instruction *Param,
+ int Index);
+
/// Create kernel launch parameters.
///
/// @param Kernel The kernel to create parameters for.
@@ -1192,11 +1210,21 @@ GPUNodeBuilder::getBlockSizes(ppcg_kernel *Kernel) {
return std::make_tuple(Sizes[0], Sizes[1], Sizes[2]);
}
+void GPUNodeBuilder::insertStoreParameter(Instruction *Parameters,
+ Instruction *Param, int Index) {
+ Value *Slot = Builder.CreateGEP(
+ Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
+ Value *ParamTyped = Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
+ Builder.CreateStore(ParamTyped, Slot);
+}
+
Value *
GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F,
SetVector<Value *> SubtreeValues) {
- Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(),
- std::distance(F->arg_begin(), F->arg_end()));
+ const int NumArgs = F->arg_size();
+ std::vector<int> ArgSizes(NumArgs);
+
+ Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), 2 * NumArgs);
BasicBlock *EntryBlock =
&Builder.GetInsertBlock()->getParent()->getEntryBlock();
@@ -1213,6 +1241,8 @@ GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F,
isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id);
+ ArgSizes[Index] = SAI->getElemSizeInBytes();
+
Value *DevArray = nullptr;
if (ManagedMemory) {
DevArray = getOrCreateManagedDeviceArray(
@@ -1265,16 +1295,15 @@ GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F,
isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_set, i);
Value *Val = IDToValue[Id];
isl_id_free(Id);
+
+ ArgSizes[Index] = computeSizeInBytes(Val->getType());
+
Instruction *Param =
new AllocaInst(Val->getType(), AddressSpace,
Launch + "_param_" + std::to_string(Index),
EntryBlock->getTerminator());
Builder.CreateStore(Val, Param);
- Value *Slot = Builder.CreateGEP(
- Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
- Value *ParamTyped =
- Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
- Builder.CreateStore(ParamTyped, Slot);
+ insertStoreParameter(Parameters, Param, Index);
Index++;
}
@@ -1284,30 +1313,38 @@ GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F,
isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_param, i);
Value *Val = IDToValue[Id];
isl_id_free(Id);
+
+ ArgSizes[Index] = computeSizeInBytes(Val->getType());
+
Instruction *Param =
new AllocaInst(Val->getType(), AddressSpace,
Launch + "_param_" + std::to_string(Index),
EntryBlock->getTerminator());
Builder.CreateStore(Val, Param);
- Value *Slot = Builder.CreateGEP(
- Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
- Value *ParamTyped =
- Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
- Builder.CreateStore(ParamTyped, Slot);
+ insertStoreParameter(Parameters, Param, Index);
Index++;
}
for (auto Val : SubtreeValues) {
+ ArgSizes[Index] = computeSizeInBytes(Val->getType());
+
Instruction *Param =
new AllocaInst(Val->getType(), AddressSpace,
Launch + "_param_" + std::to_string(Index),
EntryBlock->getTerminator());
Builder.CreateStore(Val, Param);
- Value *Slot = Builder.CreateGEP(
- Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
- Value *ParamTyped =
- Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
- Builder.CreateStore(ParamTyped, Slot);
+ insertStoreParameter(Parameters, Param, Index);
+ Index++;
+ }
+
+ for (int i = 0; i < NumArgs; i++) {
+ Value *Val = ConstantInt::get(Builder.getInt32Ty(), ArgSizes[i]);
+ Instruction *Param =
+ new AllocaInst(Builder.getInt32Ty(), AddressSpace,
+ Launch + "_param_size_" + std::to_string(i),
+ EntryBlock->getTerminator());
+ Builder.CreateStore(Val, Param);
+ insertStoreParameter(Parameters, Param, Index);
Index++;
}
OpenPOWER on IntegriCloud