summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp49
-rw-r--r--polly/test/GPGPU/double-parallel-loop.ll3
-rw-r--r--polly/tools/GPURuntime/GPUJIT.c26
-rw-r--r--polly/tools/GPURuntime/GPUJIT.h5
4 files changed, 77 insertions, 6 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 012ae34d0c9..872b2f60a44 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -148,6 +148,9 @@ private:
/// more.
std::vector<Value *> LocalArrays;
+ /// A list of device arrays that has been allocated.
+ std::vector<Value *> AllocatedDevArrays;
+
/// The current GPU context.
Value *GPUContext;
@@ -266,8 +269,12 @@ private:
/// dump its IR to stderr.
void finalizeKernelFunction();
+ /// Create code that allocates memory to store arrays on device.
void allocateDeviceArrays();
+ /// Free all allocated device arrays.
+ void freeDeviceArrays();
+
/// Create a call to initialize the GPU context.
///
/// @returns A pointer to the newly initialized context.
@@ -278,7 +285,17 @@ private:
/// @param Context A pointer to an initialized GPU context.
void createCallFreeContext(Value *Context);
+ /// Create a call to allocate memory on the device.
+ ///
+ /// @param Size The size of memory to allocate
+ ///
+ /// @returns A pointer that identifies this allocation.
Value *createCallAllocateMemoryForDevice(Value *Size);
+
+ /// Create a call to free a device array.
+ ///
+ /// @param Array The device array to free.
+ void createCallFreeDeviceMemory(Value *Array);
};
void GPUNodeBuilder::initializeAfterRTH() {
@@ -287,6 +304,7 @@ void GPUNodeBuilder::initializeAfterRTH() {
}
void GPUNodeBuilder::finalize() {
+ freeDeviceArrays();
createCallFreeContext(GPUContext);
IslNodeBuilder::finalize();
}
@@ -296,8 +314,8 @@ void GPUNodeBuilder::allocateDeviceArrays() {
for (int i = 0; i < Prog->n_array; ++i) {
gpu_array_info *Array = &Prog->array[i];
- std::string DevPtrName("p_devptr_");
- DevPtrName.append(Array->name);
+ std::string DevArrayName("p_dev_array_");
+ DevArrayName.append(Array->name);
Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
@@ -315,13 +333,36 @@ void GPUNodeBuilder::allocateDeviceArrays() {
ArraySize = Builder.CreateMul(ArraySize, NumElements);
}
- Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize);
- DevPtr->setName(DevPtrName);
+ Value *DevArray = createCallAllocateMemoryForDevice(ArraySize);
+ DevArray->setName(DevArrayName);
+ AllocatedDevArrays.push_back(DevArray);
}
isl_ast_build_free(Build);
}
+void GPUNodeBuilder::freeDeviceArrays() {
+ for (auto &Array : AllocatedDevArrays)
+ createCallFreeDeviceMemory(Array);
+}
+
+void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) {
+ const char *Name = "polly_freeDeviceMemory";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ Args.push_back(Builder.getInt8PtrTy());
+ FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ Builder.CreateCall(F, {Array});
+}
+
Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) {
const char *Name = "polly_allocateMemoryForDevice";
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll
index 33153aea0af..1563f0c2986 100644
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@@ -93,7 +93,8 @@
; IR: polly.start:
; IR-NEXT: [[GPUContext:%.*]] = call i8* @polly_initContext()
-; IR-NEXT: %p_devptr_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
+; IR-NEXT: %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
+; IR-NEXT: call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef_A)
; IR-NEXT: call void @polly_freeContext(i8* [[GPUContext]])
; IR-NEXT: br label %polly.exiting
diff --git a/polly/tools/GPURuntime/GPUJIT.c b/polly/tools/GPURuntime/GPUJIT.c
index ec971580e89..41801fa1f4c 100644
--- a/polly/tools/GPURuntime/GPUJIT.c
+++ b/polly/tools/GPURuntime/GPUJIT.c
@@ -339,6 +339,32 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
debug_print("CUDA kernel launched.\n");
}
+void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation) {
+ dump_function();
+ CuMemFreeFcnPtr((CUdeviceptr)Allocation->Cuda);
+ free(Allocation);
+}
+
+PollyGPUDevicePtr *polly_allocateMemoryForDevice(long MemSize) {
+ dump_function();
+
+ PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr));
+
+ if (DevData == 0) {
+ fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n");
+ exit(-1);
+ }
+
+ CUresult Res = CuMemAllocFcnPtr(&(DevData->Cuda), MemSize);
+
+ if (Res != CUDA_SUCCESS) {
+ fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n");
+ exit(-1);
+ }
+
+ return DevData;
+}
+
void polly_freeContext(PollyGPUContext *Context) {
dump_function();
diff --git a/polly/tools/GPURuntime/GPUJIT.h b/polly/tools/GPURuntime/GPUJIT.h
index 0ded86b2e3c..516475d1696 100644
--- a/polly/tools/GPURuntime/GPUJIT.h
+++ b/polly/tools/GPURuntime/GPUJIT.h
@@ -47,7 +47,7 @@
* PollyGPUModule *Module;
* PollyGPUFunction *Kernel;
* PollyGPUContext *Context;
- * PollyGPUDevicePtr *PtrDevData;
+ * PollyGPUDevicePtr *DevArray;
* int *HostData;
* int MemSize;
* int BlockWidth = 16;
@@ -57,11 +57,13 @@
*
* MemSize = 256*64*sizeof(int);
* Context = polly_initContext();
+ * DevArray = polly_allocateMemoryForDevice(MemSize);
* polly_getPTXModule(KernelString, &Module);
* polly_getPTXKernelEntry(Entry, Module, &Kernel);
* polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData);
* polly_launchKernel(Kernel, GridWidth, GridHeight);
* polly_copyFromDeviceToHost(HostData, DevData, MemSize);
+ * polly_freeDeviceMemory(DevArray);
* polly_freeContext(Context);
* }
*
@@ -84,5 +86,6 @@ void polly_setKernelParameters(PollyGPUFunction *Kernel, int BlockWidth,
int BlockHeight, PollyGPUDevicePtr *DevData);
void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
int GridHeight);
+void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation);
void polly_freeContext(PollyGPUContext *Context);
#endif /* GPUJIT_H_ */
OpenPOWER on IntegriCloud