summaryrefslogtreecommitdiffstats
path: root/polly/lib/CodeGen/PPCGCodeGeneration.cpp
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2016-07-25 12:47:39 +0000
committerTobias Grosser <tobias@grosser.es>2016-07-25 12:47:39 +0000
commit13c78e4d517dd6f4b8fa133f53e09a9e11596fc8 (patch)
tree23f9e7d631aad2fd45c81db67f1108302f5c3fdf /polly/lib/CodeGen/PPCGCodeGeneration.cpp
parent7287aeddf1e5973905461d3afbdd6648f02ede23 (diff)
downloadbcm5719-llvm-13c78e4d517dd6f4b8fa133f53e09a9e11596fc8.tar.gz
bcm5719-llvm-13c78e4d517dd6f4b8fa133f53e09a9e11596fc8.zip
GPGPU: Emit data-transfer code
Also factor out getArraySize() to avoid code dupliciation and reorder some function arguments to indicate the direction into which data is transferred. llvm-svn: 276636
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp164
1 files changed, 139 insertions, 25 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 872b2f60a44..3b5a5d3aabe 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -148,8 +148,8 @@ private:
/// more.
std::vector<Value *> LocalArrays;
- /// A list of device arrays that has been allocated.
- std::vector<Value *> AllocatedDevArrays;
+ /// A map from ScopArrays to their corresponding device allocations.
+ std::map<ScopArrayInfo *, Value *> DeviceAllocations;
/// The current GPU context.
Value *GPUContext;
@@ -181,13 +181,22 @@ private:
///
/// - ScopStmt: A computational statement (TODO)
/// - Kernel: A GPU kernel call (TODO)
- /// - Data-Transfer: A GPU <-> CPU data-transfer (TODO)
+ /// - Data-Transfer: A GPU <-> CPU data-transfer
/// - In-kernel synchronization
/// - In-kernel memory copy statement
///
/// @param UserStmt The ast node to generate code for.
virtual void createUser(__isl_take isl_ast_node *UserStmt);
+ enum DataDirection { HOST_TO_DEVICE, DEVICE_TO_HOST };
+
+ /// Create code for a data transfer statement
+ ///
+ /// @param TransferStmt The data transfer statement.
+ /// @param Direction The direction in which to transfer data.
+ void createDataTransfer(__isl_take isl_ast_node *TransferStmt,
+ enum DataDirection Direction);
+
/// Find llvm::Values referenced in GPU kernel.
///
/// @param Kernel The kernel to scan for llvm::Values
@@ -202,6 +211,11 @@ private:
/// @param KernelStmt The ast node to generate kernel code for.
void createKernel(__isl_take isl_ast_node *KernelStmt);
+ /// Generate code that computes the size of an array.
+ ///
+ /// @param Array The array for which to compute a size.
+ Value *getArraySize(gpu_array_info *Array);
+
/// Create kernel function.
///
/// Create a kernel function located in a newly created module that can serve
@@ -296,6 +310,20 @@ private:
///
/// @param Array The device array to free.
void createCallFreeDeviceMemory(Value *Array);
+
+ /// Create a call to copy data from host to device.
+ ///
+ /// @param HostPtr A pointer to the host data that should be copied.
+ /// @param DevicePtr A device pointer specifying the location to copy to.
+ void createCallCopyFromHostToDevice(Value *HostPtr, Value *DevicePtr,
+ Value *Size);
+
+ /// Create a call to copy data from device to host.
+ ///
+ /// @param DevicePtr A pointer to the device data that should be copied.
+ /// @param HostPtr A host pointer specifying the location to copy to.
+ void createCallCopyFromDeviceToHost(Value *DevicePtr, Value *HostPtr,
+ Value *Size);
};
void GPUNodeBuilder::initializeAfterRTH() {
@@ -314,36 +342,22 @@ void GPUNodeBuilder::allocateDeviceArrays() {
for (int i = 0; i < Prog->n_array; ++i) {
gpu_array_info *Array = &Prog->array[i];
+ auto *ScopArray = (ScopArrayInfo *)Array->user;
std::string DevArrayName("p_dev_array_");
DevArrayName.append(Array->name);
- Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
-
- if (!gpu_array_is_scalar(Array)) {
- auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
- isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
-
- for (unsigned int i = 1; i < Array->n_index; i++) {
- isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
- isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
- Res = isl_ast_expr_mul(Res, Expr);
- }
-
- Value *NumElements = ExprBuilder.create(Res);
- ArraySize = Builder.CreateMul(ArraySize, NumElements);
- }
-
+ Value *ArraySize = getArraySize(Array);
Value *DevArray = createCallAllocateMemoryForDevice(ArraySize);
DevArray->setName(DevArrayName);
- AllocatedDevArrays.push_back(DevArray);
+ DeviceAllocations[ScopArray] = DevArray;
}
isl_ast_build_free(Build);
}
void GPUNodeBuilder::freeDeviceArrays() {
- for (auto &Array : AllocatedDevArrays)
- createCallFreeDeviceMemory(Array);
+ for (auto &Array : DeviceAllocations)
+ createCallFreeDeviceMemory(Array.second);
}
void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) {
@@ -380,6 +394,48 @@ Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) {
return Builder.CreateCall(F, {Size});
}
+void GPUNodeBuilder::createCallCopyFromHostToDevice(Value *HostData,
+ Value *DeviceData,
+ Value *Size) {
+ const char *Name = "polly_copyFromHostToDevice";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ Args.push_back(Builder.getInt8PtrTy());
+ Args.push_back(Builder.getInt8PtrTy());
+ Args.push_back(Builder.getInt64Ty());
+ FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ Builder.CreateCall(F, {HostData, DeviceData, Size});
+}
+
+void GPUNodeBuilder::createCallCopyFromDeviceToHost(Value *DeviceData,
+ Value *HostData,
+ Value *Size) {
+ const char *Name = "polly_copyFromDeviceToHost";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ Args.push_back(Builder.getInt8PtrTy());
+ Args.push_back(Builder.getInt8PtrTy());
+ Args.push_back(Builder.getInt64Ty());
+ FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ Builder.CreateCall(F, {DeviceData, HostData, Size});
+}
+
Value *GPUNodeBuilder::createCallInitContext() {
const char *Name = "polly_initContext";
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -421,6 +477,58 @@ static bool isPrefix(std::string String, std::string Prefix) {
return String.find(Prefix) == 0;
}
+Value *GPUNodeBuilder::getArraySize(gpu_array_info *Array) {
+ isl_ast_build *Build = isl_ast_build_from_context(S.getContext());
+ Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
+
+ if (!gpu_array_is_scalar(Array)) {
+ auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
+ isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
+
+ for (unsigned int i = 1; i < Array->n_index; i++) {
+ isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
+ isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
+ Res = isl_ast_expr_mul(Res, Expr);
+ }
+
+ Value *NumElements = ExprBuilder.create(Res);
+ ArraySize = Builder.CreateMul(ArraySize, NumElements);
+ }
+ isl_ast_build_free(Build);
+ return ArraySize;
+}
+
+void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt,
+ enum DataDirection Direction) {
+ isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt);
+ isl_ast_expr *Arg = isl_ast_expr_get_op_arg(Expr, 0);
+ isl_id *Id = isl_ast_expr_get_id(Arg);
+ auto Array = (gpu_array_info *)isl_id_get_user(Id);
+ auto ScopArray = (ScopArrayInfo *)(Array->user);
+
+ Value *Size = getArraySize(Array);
+ Value *HostPtr = ScopArray->getBasePtr();
+
+ Value *DevPtr = DeviceAllocations[ScopArray];
+
+ if (gpu_array_is_scalar(Array)) {
+ HostPtr = Builder.CreateAlloca(ScopArray->getElementType());
+ Builder.CreateStore(ScopArray->getBasePtr(), HostPtr);
+ }
+
+ HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy());
+
+ if (Direction == HOST_TO_DEVICE)
+ createCallCopyFromHostToDevice(HostPtr, DevPtr, Size);
+ else
+ createCallCopyFromDeviceToHost(DevPtr, HostPtr, Size);
+
+ isl_id_free(Id);
+ isl_ast_expr_free(Arg);
+ isl_ast_expr_free(Expr);
+ isl_ast_node_free(TransferStmt);
+}
+
void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) {
isl_ast_expr *Expr = isl_ast_node_user_get_expr(UserStmt);
isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
@@ -435,10 +543,15 @@ void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) {
return;
}
- if (isPrefix(Str, "to_device") || isPrefix(Str, "from_device")) {
- // TODO: Insert memory copies
+ if (isPrefix(Str, "to_device")) {
+ createDataTransfer(UserStmt, HOST_TO_DEVICE);
+ isl_ast_expr_free(Expr);
+ return;
+ }
+
+ if (isPrefix(Str, "from_device")) {
+ createDataTransfer(UserStmt, DEVICE_TO_HOST);
isl_ast_expr_free(Expr);
- isl_ast_node_free(UserStmt);
return;
}
@@ -1202,6 +1315,7 @@ public:
PPCGArray.global = false;
PPCGArray.linearize = false;
PPCGArray.dep_order = nullptr;
+ PPCGArray.user = Array;
setArrayBounds(PPCGArray, Array);
i++;
OpenPOWER on IntegriCloud