diff options
| -rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 49 | ||||
| -rw-r--r-- | polly/test/GPGPU/double-parallel-loop.ll | 3 | ||||
| -rw-r--r-- | polly/tools/GPURuntime/GPUJIT.c | 26 | ||||
| -rw-r--r-- | polly/tools/GPURuntime/GPUJIT.h | 5 |
4 files changed, 77 insertions, 6 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 012ae34d0c9..872b2f60a44 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -148,6 +148,9 @@ private: /// more. std::vector<Value *> LocalArrays; + /// A list of device arrays that has been allocated. + std::vector<Value *> AllocatedDevArrays; + /// The current GPU context. Value *GPUContext; @@ -266,8 +269,12 @@ private: /// dump its IR to stderr. void finalizeKernelFunction(); + /// Create code that allocates memory to store arrays on device. void allocateDeviceArrays(); + /// Free all allocated device arrays. + void freeDeviceArrays(); + /// Create a call to initialize the GPU context. /// /// @returns A pointer to the newly initialized context. @@ -278,7 +285,17 @@ private: /// @param Context A pointer to an initialized GPU context. void createCallFreeContext(Value *Context); + /// Create a call to allocate memory on the device. + /// + /// @param Size The size of memory to allocate + /// + /// @returns A pointer that identifies this allocation. Value *createCallAllocateMemoryForDevice(Value *Size); + + /// Create a call to free a device array. + /// + /// @param Array The device array to free. + void createCallFreeDeviceMemory(Value *Array); }; void GPUNodeBuilder::initializeAfterRTH() { @@ -287,6 +304,7 @@ void GPUNodeBuilder::initializeAfterRTH() { } void GPUNodeBuilder::finalize() { + freeDeviceArrays(); createCallFreeContext(GPUContext); IslNodeBuilder::finalize(); } @@ -296,8 +314,8 @@ void GPUNodeBuilder::allocateDeviceArrays() { for (int i = 0; i < Prog->n_array; ++i) { gpu_array_info *Array = &Prog->array[i]; - std::string DevPtrName("p_devptr_"); - DevPtrName.append(Array->name); + std::string DevArrayName("p_dev_array_"); + DevArrayName.append(Array->name); Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size); @@ -315,13 +333,36 @@ void GPUNodeBuilder::allocateDeviceArrays() { ArraySize = Builder.CreateMul(ArraySize, NumElements); } - Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize); - DevPtr->setName(DevPtrName); + Value *DevArray = createCallAllocateMemoryForDevice(ArraySize); + DevArray->setName(DevArrayName); + AllocatedDevArrays.push_back(DevArray); } isl_ast_build_free(Build); } +void GPUNodeBuilder::freeDeviceArrays() { + for (auto &Array : AllocatedDevArrays) + createCallFreeDeviceMemory(Array); +} + +void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { + const char *Name = "polly_freeDeviceMemory"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector<Type *> Args; + Args.push_back(Builder.getInt8PtrTy()); + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F, {Array}); +} + Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { const char *Name = "polly_allocateMemoryForDevice"; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 33153aea0af..1563f0c2986 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -93,7 +93,8 @@ ; IR: polly.start: ; IR-NEXT: [[GPUContext:%.*]] = call i8* @polly_initContext() -; IR-NEXT: %p_devptr_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304) +; IR-NEXT: %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304) +; IR-NEXT: call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef_A) ; IR-NEXT: call void @polly_freeContext(i8* [[GPUContext]]) ; IR-NEXT: br label %polly.exiting diff --git a/polly/tools/GPURuntime/GPUJIT.c b/polly/tools/GPURuntime/GPUJIT.c index ec971580e89..41801fa1f4c 100644 --- a/polly/tools/GPURuntime/GPUJIT.c +++ b/polly/tools/GPURuntime/GPUJIT.c @@ -339,6 +339,32 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth, debug_print("CUDA kernel launched.\n"); } +void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation) { + dump_function(); + CuMemFreeFcnPtr((CUdeviceptr)Allocation->Cuda); + free(Allocation); +} + +PollyGPUDevicePtr *polly_allocateMemoryForDevice(long MemSize) { + dump_function(); + + PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr)); + + if (DevData == 0) { + fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n"); + exit(-1); + } + + CUresult Res = CuMemAllocFcnPtr(&(DevData->Cuda), MemSize); + + if (Res != CUDA_SUCCESS) { + fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n"); + exit(-1); + } + + return DevData; +} + void polly_freeContext(PollyGPUContext *Context) { dump_function(); diff --git a/polly/tools/GPURuntime/GPUJIT.h b/polly/tools/GPURuntime/GPUJIT.h index 0ded86b2e3c..516475d1696 100644 --- a/polly/tools/GPURuntime/GPUJIT.h +++ b/polly/tools/GPURuntime/GPUJIT.h @@ -47,7 +47,7 @@ * PollyGPUModule *Module; * PollyGPUFunction *Kernel; * PollyGPUContext *Context; - * PollyGPUDevicePtr *PtrDevData; + * PollyGPUDevicePtr *DevArray; * int *HostData; * int MemSize; * int BlockWidth = 16; @@ -57,11 +57,13 @@ * * MemSize = 256*64*sizeof(int); * Context = polly_initContext(); + * DevArray = polly_allocateMemoryForDevice(MemSize); * polly_getPTXModule(KernelString, &Module); * polly_getPTXKernelEntry(Entry, Module, &Kernel); * polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData); * polly_launchKernel(Kernel, GridWidth, GridHeight); * polly_copyFromDeviceToHost(HostData, DevData, MemSize); + * polly_freeDeviceMemory(DevArray); * polly_freeContext(Context); * } * @@ -84,5 +86,6 @@ void polly_setKernelParameters(PollyGPUFunction *Kernel, int BlockWidth, int BlockHeight, PollyGPUDevicePtr *DevData); void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth, int GridHeight); +void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation); void polly_freeContext(PollyGPUContext *Context); #endif /* GPUJIT_H_ */ |

