summaryrefslogtreecommitdiffstats
path: root/polly/lib/CodeGen/PPCGCodeGeneration.cpp
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2016-09-15 14:05:58 +0000
committerTobias Grosser <tobias@grosser.es>2016-09-15 14:05:58 +0000
commitaaabbbf8863827cb9e286e1e04cc41f088f9383f (patch)
treeec5c4ccb9685e196e83f55e7edb6e30a3357f7f7 /polly/lib/CodeGen/PPCGCodeGeneration.cpp
parent9edf96ec9b64f91a0d6b992a92f6762f87e3c67d (diff)
downloadbcm5719-llvm-aaabbbf8863827cb9e286e1e04cc41f088f9383f.tar.gz
bcm5719-llvm-aaabbbf8863827cb9e286e1e04cc41f088f9383f.zip
GPGPU: Do not assume arrays start at 0
Our alias checks precisely check that the minimal and maximal accessed elements do not overlap in a kernel. Hence, we must ensure that our host <-> device transfers do not touch additional memory locations that are not covered in the alias check. To ensure this, we make sure that the data we copy for a given array is only the data from the smallest element accessed to the largest element accessed. We also adjust the size of the array according to the offset at which the array is actually accessed. An interesting result of this is: In case array are accessed with negative subscripts ,e.g., A[-100], we automatically allocate and transfer _more_ data to cover the full array. This is important as such code indeed exists in the wild. llvm-svn: 281611
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp86
1 files changed, 86 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 076a94add20..48444d60aa0 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -284,6 +284,21 @@ private:
/// @param Array The array for which to compute a size.
Value *getArraySize(gpu_array_info *Array);
+ /// Generate code to compute the minimal offset at which an array is accessed.
+ ///
+ /// The offset of an array is the minimal array location accessed in a scop.
+ ///
+ /// Example:
+ ///
+ /// for (long i = 0; i < 100; i++)
+ /// A[i + 42] += ...
+ ///
+ /// getArrayOffset(A) results in 42.
+ ///
+ /// @param Array The array for which to compute the offset.
+ /// @returns An llvm::Value that contains the offset of the array.
+ Value *getArrayOffset(gpu_array_info *Array);
+
/// Prepare the kernel arguments for kernel code generation
///
/// @param Kernel The kernel to generate code for.
@@ -468,6 +483,12 @@ void GPUNodeBuilder::allocateDeviceArrays() {
DevArrayName.append(Array->name);
Value *ArraySize = getArraySize(Array);
+ Value *Offset = getArrayOffset(Array);
+ if (Offset)
+ ArraySize = Builder.CreateSub(
+ ArraySize,
+ Builder.CreateMul(Offset,
+ Builder.getInt64(ScopArray->getElemSizeInBytes())));
Value *DevArray = createCallAllocateMemoryForDevice(ArraySize);
DevArray->setName(DevArrayName);
DeviceAllocations[ScopArray] = DevArray;
@@ -721,6 +742,48 @@ Value *GPUNodeBuilder::getArraySize(gpu_array_info *Array) {
return ArraySize;
}
+Value *GPUNodeBuilder::getArrayOffset(gpu_array_info *Array) {
+ if (gpu_array_is_scalar(Array))
+ return nullptr;
+
+ isl_ast_build *Build = isl_ast_build_from_context(S.getContext());
+
+ isl_set *Min = isl_set_lexmin(isl_set_copy(Array->extent));
+
+ isl_set *ZeroSet = isl_set_universe(isl_set_get_space(Min));
+
+ for (long i = 0; i < isl_set_dim(Min, isl_dim_set); i++)
+ ZeroSet = isl_set_fix_si(ZeroSet, isl_dim_set, i, 0);
+
+ if (isl_set_is_subset(Min, ZeroSet)) {
+ isl_set_free(Min);
+ isl_set_free(ZeroSet);
+ isl_ast_build_free(Build);
+ return nullptr;
+ }
+ isl_set_free(ZeroSet);
+
+ isl_ast_expr *Result =
+ isl_ast_expr_from_val(isl_val_int_from_si(isl_set_get_ctx(Min), 0));
+
+ for (long i = 0; i < isl_set_dim(Min, isl_dim_set); i++) {
+ if (i > 0) {
+ isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i - 1]);
+ isl_ast_expr *BExpr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
+ Result = isl_ast_expr_mul(Result, BExpr);
+ }
+ isl_pw_aff *DimMin = isl_set_dim_min(isl_set_copy(Min), i);
+ isl_ast_expr *MExpr = isl_ast_build_expr_from_pw_aff(Build, DimMin);
+ Result = isl_ast_expr_add(Result, MExpr);
+ }
+
+ Value *ResultValue = ExprBuilder.create(Result);
+ isl_set_free(Min);
+ isl_ast_build_free(Build);
+
+ return ResultValue;
+}
+
void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt,
enum DataDirection Direction) {
isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt);
@@ -730,6 +793,7 @@ void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt,
auto ScopArray = (ScopArrayInfo *)(Array->user);
Value *Size = getArraySize(Array);
+ Value *Offset = getArrayOffset(Array);
Value *DevPtr = DeviceAllocations[ScopArray];
Value *HostPtr;
@@ -739,8 +803,20 @@ void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt,
else
HostPtr = ScopArray->getBasePtr();
+ if (Offset) {
+ HostPtr = Builder.CreatePointerCast(
+ HostPtr, ScopArray->getElementType()->getPointerTo());
+ HostPtr = Builder.CreateGEP(HostPtr, Offset);
+ }
+
HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy());
+ if (Offset) {
+ Size = Builder.CreateSub(
+ Size, Builder.CreateMul(
+ Offset, Builder.getInt64(ScopArray->getElemSizeInBytes())));
+ }
+
if (Direction == HOST_TO_DEVICE)
createCallCopyFromHostToDevice(HostPtr, DevPtr, Size);
else
@@ -1000,6 +1076,16 @@ GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F,
Value *DevArray = DeviceAllocations[const_cast<ScopArrayInfo *>(SAI)];
DevArray = createCallGetDevicePtr(DevArray);
+
+ Value *Offset = getArrayOffset(&Prog->array[i]);
+
+ if (Offset) {
+ DevArray = Builder.CreatePointerCast(
+ DevArray, SAI->getElementType()->getPointerTo());
+ DevArray = Builder.CreateGEP(DevArray, Builder.CreateNeg(Offset));
+ DevArray = Builder.CreatePointerCast(DevArray, Builder.getInt8PtrTy());
+ }
+
Instruction *Param = new AllocaInst(
Builder.getInt8PtrTy(), Launch + "_param_" + std::to_string(Index),
EntryBlock->getTerminator());
OpenPOWER on IntegriCloud