diff options
| author | Tobias Grosser <tobias@grosser.es> | 2016-07-25 12:47:39 +0000 |
|---|---|---|
| committer | Tobias Grosser <tobias@grosser.es> | 2016-07-25 12:47:39 +0000 |
| commit | 13c78e4d517dd6f4b8fa133f53e09a9e11596fc8 (patch) | |
| tree | 23f9e7d631aad2fd45c81db67f1108302f5c3fdf /polly/lib/CodeGen/PPCGCodeGeneration.cpp | |
| parent | 7287aeddf1e5973905461d3afbdd6648f02ede23 (diff) | |
| download | bcm5719-llvm-13c78e4d517dd6f4b8fa133f53e09a9e11596fc8.tar.gz bcm5719-llvm-13c78e4d517dd6f4b8fa133f53e09a9e11596fc8.zip | |
GPGPU: Emit data-transfer code
Also factor out getArraySize() to avoid code dupliciation and reorder some
function arguments to indicate the direction into which data is transferred.
llvm-svn: 276636
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
| -rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 164 |
1 files changed, 139 insertions, 25 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 872b2f60a44..3b5a5d3aabe 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -148,8 +148,8 @@ private: /// more. std::vector<Value *> LocalArrays; - /// A list of device arrays that has been allocated. - std::vector<Value *> AllocatedDevArrays; + /// A map from ScopArrays to their corresponding device allocations. + std::map<ScopArrayInfo *, Value *> DeviceAllocations; /// The current GPU context. Value *GPUContext; @@ -181,13 +181,22 @@ private: /// /// - ScopStmt: A computational statement (TODO) /// - Kernel: A GPU kernel call (TODO) - /// - Data-Transfer: A GPU <-> CPU data-transfer (TODO) + /// - Data-Transfer: A GPU <-> CPU data-transfer /// - In-kernel synchronization /// - In-kernel memory copy statement /// /// @param UserStmt The ast node to generate code for. virtual void createUser(__isl_take isl_ast_node *UserStmt); + enum DataDirection { HOST_TO_DEVICE, DEVICE_TO_HOST }; + + /// Create code for a data transfer statement + /// + /// @param TransferStmt The data transfer statement. + /// @param Direction The direction in which to transfer data. + void createDataTransfer(__isl_take isl_ast_node *TransferStmt, + enum DataDirection Direction); + /// Find llvm::Values referenced in GPU kernel. /// /// @param Kernel The kernel to scan for llvm::Values @@ -202,6 +211,11 @@ private: /// @param KernelStmt The ast node to generate kernel code for. void createKernel(__isl_take isl_ast_node *KernelStmt); + /// Generate code that computes the size of an array. + /// + /// @param Array The array for which to compute a size. + Value *getArraySize(gpu_array_info *Array); + /// Create kernel function. /// /// Create a kernel function located in a newly created module that can serve @@ -296,6 +310,20 @@ private: /// /// @param Array The device array to free. void createCallFreeDeviceMemory(Value *Array); + + /// Create a call to copy data from host to device. + /// + /// @param HostPtr A pointer to the host data that should be copied. + /// @param DevicePtr A device pointer specifying the location to copy to. + void createCallCopyFromHostToDevice(Value *HostPtr, Value *DevicePtr, + Value *Size); + + /// Create a call to copy data from device to host. + /// + /// @param DevicePtr A pointer to the device data that should be copied. + /// @param HostPtr A host pointer specifying the location to copy to. + void createCallCopyFromDeviceToHost(Value *DevicePtr, Value *HostPtr, + Value *Size); }; void GPUNodeBuilder::initializeAfterRTH() { @@ -314,36 +342,22 @@ void GPUNodeBuilder::allocateDeviceArrays() { for (int i = 0; i < Prog->n_array; ++i) { gpu_array_info *Array = &Prog->array[i]; + auto *ScopArray = (ScopArrayInfo *)Array->user; std::string DevArrayName("p_dev_array_"); DevArrayName.append(Array->name); - Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size); - - if (!gpu_array_is_scalar(Array)) { - auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]); - isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero); - - for (unsigned int i = 1; i < Array->n_index; i++) { - isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]); - isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I); - Res = isl_ast_expr_mul(Res, Expr); - } - - Value *NumElements = ExprBuilder.create(Res); - ArraySize = Builder.CreateMul(ArraySize, NumElements); - } - + Value *ArraySize = getArraySize(Array); Value *DevArray = createCallAllocateMemoryForDevice(ArraySize); DevArray->setName(DevArrayName); - AllocatedDevArrays.push_back(DevArray); + DeviceAllocations[ScopArray] = DevArray; } isl_ast_build_free(Build); } void GPUNodeBuilder::freeDeviceArrays() { - for (auto &Array : AllocatedDevArrays) - createCallFreeDeviceMemory(Array); + for (auto &Array : DeviceAllocations) + createCallFreeDeviceMemory(Array.second); } void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { @@ -380,6 +394,48 @@ Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { return Builder.CreateCall(F, {Size}); } +void GPUNodeBuilder::createCallCopyFromHostToDevice(Value *HostData, + Value *DeviceData, + Value *Size) { + const char *Name = "polly_copyFromHostToDevice"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector<Type *> Args; + Args.push_back(Builder.getInt8PtrTy()); + Args.push_back(Builder.getInt8PtrTy()); + Args.push_back(Builder.getInt64Ty()); + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F, {HostData, DeviceData, Size}); +} + +void GPUNodeBuilder::createCallCopyFromDeviceToHost(Value *DeviceData, + Value *HostData, + Value *Size) { + const char *Name = "polly_copyFromDeviceToHost"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector<Type *> Args; + Args.push_back(Builder.getInt8PtrTy()); + Args.push_back(Builder.getInt8PtrTy()); + Args.push_back(Builder.getInt64Ty()); + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F, {DeviceData, HostData, Size}); +} + Value *GPUNodeBuilder::createCallInitContext() { const char *Name = "polly_initContext"; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -421,6 +477,58 @@ static bool isPrefix(std::string String, std::string Prefix) { return String.find(Prefix) == 0; } +Value *GPUNodeBuilder::getArraySize(gpu_array_info *Array) { + isl_ast_build *Build = isl_ast_build_from_context(S.getContext()); + Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size); + + if (!gpu_array_is_scalar(Array)) { + auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]); + isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero); + + for (unsigned int i = 1; i < Array->n_index; i++) { + isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]); + isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I); + Res = isl_ast_expr_mul(Res, Expr); + } + + Value *NumElements = ExprBuilder.create(Res); + ArraySize = Builder.CreateMul(ArraySize, NumElements); + } + isl_ast_build_free(Build); + return ArraySize; +} + +void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt, + enum DataDirection Direction) { + isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt); + isl_ast_expr *Arg = isl_ast_expr_get_op_arg(Expr, 0); + isl_id *Id = isl_ast_expr_get_id(Arg); + auto Array = (gpu_array_info *)isl_id_get_user(Id); + auto ScopArray = (ScopArrayInfo *)(Array->user); + + Value *Size = getArraySize(Array); + Value *HostPtr = ScopArray->getBasePtr(); + + Value *DevPtr = DeviceAllocations[ScopArray]; + + if (gpu_array_is_scalar(Array)) { + HostPtr = Builder.CreateAlloca(ScopArray->getElementType()); + Builder.CreateStore(ScopArray->getBasePtr(), HostPtr); + } + + HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy()); + + if (Direction == HOST_TO_DEVICE) + createCallCopyFromHostToDevice(HostPtr, DevPtr, Size); + else + createCallCopyFromDeviceToHost(DevPtr, HostPtr, Size); + + isl_id_free(Id); + isl_ast_expr_free(Arg); + isl_ast_expr_free(Expr); + isl_ast_node_free(TransferStmt); +} + void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) { isl_ast_expr *Expr = isl_ast_node_user_get_expr(UserStmt); isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); @@ -435,10 +543,15 @@ void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) { return; } - if (isPrefix(Str, "to_device") || isPrefix(Str, "from_device")) { - // TODO: Insert memory copies + if (isPrefix(Str, "to_device")) { + createDataTransfer(UserStmt, HOST_TO_DEVICE); + isl_ast_expr_free(Expr); + return; + } + + if (isPrefix(Str, "from_device")) { + createDataTransfer(UserStmt, DEVICE_TO_HOST); isl_ast_expr_free(Expr); - isl_ast_node_free(UserStmt); return; } @@ -1202,6 +1315,7 @@ public: PPCGArray.global = false; PPCGArray.linearize = false; PPCGArray.dep_order = nullptr; + PPCGArray.user = Array; setArrayBounds(PPCGArray, Array); i++; |

