summaryrefslogtreecommitdiffstats
path: root/polly/tools/GPURuntime/GPUJIT.c
diff options
context:
space:
mode:
Diffstat (limited to 'polly/tools/GPURuntime/GPUJIT.c')
-rw-r--r--polly/tools/GPURuntime/GPUJIT.c133
1 files changed, 63 insertions, 70 deletions
diff --git a/polly/tools/GPURuntime/GPUJIT.c b/polly/tools/GPURuntime/GPUJIT.c
index a863ea4e485..a03ebe9a654 100644
--- a/polly/tools/GPURuntime/GPUJIT.c
+++ b/polly/tools/GPURuntime/GPUJIT.c
@@ -1,4 +1,5 @@
-/******************** GPUJIT.cpp - GPUJIT Execution Engine ********************/
+/******************** GPUJIT.cpp - GPUJIT Execution Engine \
+ * ********************/
/* */
/* The LLVM Compiler Infrastructure */
/* */
@@ -53,8 +54,8 @@ static CuMemAllocFcnTy *CuMemAllocFcnPtr;
typedef CUresult CUDAAPI CuFuncSetBlockShapeFcnTy(CUfunction, int, int, int);
static CuFuncSetBlockShapeFcnTy *CuFuncSetBlockShapeFcnPtr;
-typedef CUresult CUDAAPI CuParamSetvFcnTy(CUfunction, int, void *,
- unsigned int);
+typedef CUresult CUDAAPI
+CuParamSetvFcnTy(CUfunction, int, void *, unsigned int);
static CuParamSetvFcnTy *CuParamSetvFcnPtr;
typedef CUresult CUDAAPI CuParamSetSizeFcnTy(CUfunction, unsigned int);
@@ -90,13 +91,12 @@ static CuCtxCreateFcnTy *CuCtxCreateFcnPtr;
typedef CUresult CUDAAPI CuDeviceGetFcnTy(CUdevice *, int);
static CuDeviceGetFcnTy *CuDeviceGetFcnPtr;
-typedef CUresult CUDAAPI CuModuleLoadDataExFcnTy(CUmodule *, const void *,
- unsigned int, CUjit_option *,
- void **);
+typedef CUresult CUDAAPI CuModuleLoadDataExFcnTy(
+ CUmodule *, const void *, unsigned int, CUjit_option *, void **);
static CuModuleLoadDataExFcnTy *CuModuleLoadDataExFcnPtr;
-typedef CUresult CUDAAPI CuModuleGetFunctionFcnTy(CUfunction *, CUmodule,
- const char *);
+typedef CUresult CUDAAPI
+CuModuleGetFunctionFcnTy(CUfunction *, CUmodule, const char *);
static CuModuleGetFunctionFcnTy *CuModuleGetFunctionFcnPtr;
typedef CUresult CUDAAPI CuDeviceComputeCapabilityFcnTy(int *, int *, CUdevice);
@@ -109,15 +109,14 @@ static CuDeviceGetNameFcnTy *CuDeviceGetNameFcnPtr;
typedef cudaError_t CUDARTAPI CudaEventCreateFcnTy(cudaEvent_t *);
static CudaEventCreateFcnTy *CudaEventCreateFcnPtr;
-typedef cudaError_t CUDARTAPI CudaEventRecordFcnTy(cudaEvent_t,
- cudaStream_t);
+typedef cudaError_t CUDARTAPI CudaEventRecordFcnTy(cudaEvent_t, cudaStream_t);
static CudaEventRecordFcnTy *CudaEventRecordFcnPtr;
typedef cudaError_t CUDARTAPI CudaEventSynchronizeFcnTy(cudaEvent_t);
static CudaEventSynchronizeFcnTy *CudaEventSynchronizeFcnPtr;
-typedef cudaError_t CUDARTAPI CudaEventElapsedTimeFcnTy(float *, cudaEvent_t,
- cudaEvent_t);
+typedef cudaError_t CUDARTAPI
+CudaEventElapsedTimeFcnTy(float *, cudaEvent_t, cudaEvent_t);
static CudaEventElapsedTimeFcnTy *CudaEventElapsedTimeFcnPtr;
typedef cudaError_t CUDARTAPI CudaEventDestroyFcnTy(cudaEvent_t);
@@ -166,81 +165,77 @@ static int initialDeviceAPIs() {
* of this kind of cast may not be emitted by clang and new versions of gcc
* as it is valid on POSIX 2008.
*/
- CuFuncSetBlockShapeFcnPtr =
- (CuFuncSetBlockShapeFcnTy *) getAPIHandle(HandleCuda,
- "cuFuncSetBlockShape");
+ CuFuncSetBlockShapeFcnPtr = (CuFuncSetBlockShapeFcnTy *)getAPIHandle(
+ HandleCuda, "cuFuncSetBlockShape");
- CuParamSetvFcnPtr = (CuParamSetvFcnTy *) getAPIHandle(HandleCuda,
- "cuParamSetv");
+ CuParamSetvFcnPtr =
+ (CuParamSetvFcnTy *)getAPIHandle(HandleCuda, "cuParamSetv");
- CuParamSetSizeFcnPtr = (CuParamSetSizeFcnTy *) getAPIHandle(HandleCuda,
- "cuParamSetSize");
+ CuParamSetSizeFcnPtr =
+ (CuParamSetSizeFcnTy *)getAPIHandle(HandleCuda, "cuParamSetSize");
- CuLaunchGridFcnPtr = (CuLaunchGridFcnTy *) getAPIHandle(HandleCuda,
- "cuLaunchGrid");
+ CuLaunchGridFcnPtr =
+ (CuLaunchGridFcnTy *)getAPIHandle(HandleCuda, "cuLaunchGrid");
- CuMemAllocFcnPtr = (CuMemAllocFcnTy *) getAPIHandle(HandleCuda,
- "cuMemAlloc_v2");
+ CuMemAllocFcnPtr =
+ (CuMemAllocFcnTy *)getAPIHandle(HandleCuda, "cuMemAlloc_v2");
- CuMemFreeFcnPtr = (CuMemFreeFcnTy *) getAPIHandle(HandleCuda, "cuMemFree_v2");
+ CuMemFreeFcnPtr = (CuMemFreeFcnTy *)getAPIHandle(HandleCuda, "cuMemFree_v2");
- CuMemcpyDtoHFcnPtr = (CuMemcpyDtoHFcnTy *) getAPIHandle(HandleCuda,
- "cuMemcpyDtoH_v2");
+ CuMemcpyDtoHFcnPtr =
+ (CuMemcpyDtoHFcnTy *)getAPIHandle(HandleCuda, "cuMemcpyDtoH_v2");
- CuMemcpyHtoDFcnPtr = (CuMemcpyHtoDFcnTy *) getAPIHandle(HandleCuda,
- "cuMemcpyHtoD_v2");
+ CuMemcpyHtoDFcnPtr =
+ (CuMemcpyHtoDFcnTy *)getAPIHandle(HandleCuda, "cuMemcpyHtoD_v2");
- CuModuleUnloadFcnPtr = (CuModuleUnloadFcnTy *) getAPIHandle(HandleCuda,
- "cuModuleUnload");
+ CuModuleUnloadFcnPtr =
+ (CuModuleUnloadFcnTy *)getAPIHandle(HandleCuda, "cuModuleUnload");
- CuCtxDestroyFcnPtr = (CuCtxDestroyFcnTy *) getAPIHandle(HandleCuda,
- "cuCtxDestroy");
+ CuCtxDestroyFcnPtr =
+ (CuCtxDestroyFcnTy *)getAPIHandle(HandleCuda, "cuCtxDestroy");
- CuInitFcnPtr = (CuInitFcnTy *) getAPIHandle(HandleCuda, "cuInit");
+ CuInitFcnPtr = (CuInitFcnTy *)getAPIHandle(HandleCuda, "cuInit");
- CuDeviceGetCountFcnPtr = (CuDeviceGetCountFcnTy *) getAPIHandle(HandleCuda,
- "cuDeviceGetCount");
+ CuDeviceGetCountFcnPtr =
+ (CuDeviceGetCountFcnTy *)getAPIHandle(HandleCuda, "cuDeviceGetCount");
- CuDeviceGetFcnPtr = (CuDeviceGetFcnTy *) getAPIHandle(HandleCuda,
- "cuDeviceGet");
+ CuDeviceGetFcnPtr =
+ (CuDeviceGetFcnTy *)getAPIHandle(HandleCuda, "cuDeviceGet");
- CuCtxCreateFcnPtr = (CuCtxCreateFcnTy *) getAPIHandle(HandleCuda,
- "cuCtxCreate_v2");
+ CuCtxCreateFcnPtr =
+ (CuCtxCreateFcnTy *)getAPIHandle(HandleCuda, "cuCtxCreate_v2");
CuModuleLoadDataExFcnPtr =
- (CuModuleLoadDataExFcnTy *) getAPIHandle(HandleCuda, "cuModuleLoadDataEx");
+ (CuModuleLoadDataExFcnTy *)getAPIHandle(HandleCuda, "cuModuleLoadDataEx");
- CuModuleGetFunctionFcnPtr =
- (CuModuleGetFunctionFcnTy *)getAPIHandle(HandleCuda, "cuModuleGetFunction");
+ CuModuleGetFunctionFcnPtr = (CuModuleGetFunctionFcnTy *)getAPIHandle(
+ HandleCuda, "cuModuleGetFunction");
CuDeviceComputeCapabilityFcnPtr =
- (CuDeviceComputeCapabilityFcnTy *)getAPIHandle(HandleCuda,
- "cuDeviceComputeCapability");
+ (CuDeviceComputeCapabilityFcnTy *)getAPIHandle(
+ HandleCuda, "cuDeviceComputeCapability");
CuDeviceGetNameFcnPtr =
- (CuDeviceGetNameFcnTy *) getAPIHandle(HandleCuda, "cuDeviceGetName");
+ (CuDeviceGetNameFcnTy *)getAPIHandle(HandleCuda, "cuDeviceGetName");
/* Get function pointer to CUDA Runtime APIs. */
CudaEventCreateFcnPtr =
- (CudaEventCreateFcnTy *) getAPIHandle(HandleCudaRT, "cudaEventCreate");
+ (CudaEventCreateFcnTy *)getAPIHandle(HandleCudaRT, "cudaEventCreate");
CudaEventRecordFcnPtr =
- (CudaEventRecordFcnTy *) getAPIHandle(HandleCudaRT, "cudaEventRecord");
+ (CudaEventRecordFcnTy *)getAPIHandle(HandleCudaRT, "cudaEventRecord");
- CudaEventSynchronizeFcnPtr =
- (CudaEventSynchronizeFcnTy *) getAPIHandle(HandleCudaRT,
- "cudaEventSynchronize");
+ CudaEventSynchronizeFcnPtr = (CudaEventSynchronizeFcnTy *)getAPIHandle(
+ HandleCudaRT, "cudaEventSynchronize");
- CudaEventElapsedTimeFcnPtr =
- (CudaEventElapsedTimeFcnTy *) getAPIHandle(HandleCudaRT,
- "cudaEventElapsedTime");
+ CudaEventElapsedTimeFcnPtr = (CudaEventElapsedTimeFcnTy *)getAPIHandle(
+ HandleCudaRT, "cudaEventElapsedTime");
CudaEventDestroyFcnPtr =
- (CudaEventDestroyFcnTy *) getAPIHandle(HandleCudaRT, "cudaEventDestroy");
+ (CudaEventDestroyFcnTy *)getAPIHandle(HandleCudaRT, "cudaEventDestroy");
- CudaThreadSynchronizeFcnPtr =
- (CudaThreadSynchronizeFcnTy *) getAPIHandle(HandleCudaRT,
- "cudaThreadSynchronize");
+ CudaThreadSynchronizeFcnPtr = (CudaThreadSynchronizeFcnTy *)getAPIHandle(
+ HandleCudaRT, "cudaThreadSynchronize");
return 1;
}
@@ -269,7 +264,7 @@ void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) {
}
/* We select the 1st device as default. */
- *Device = malloc(sizeof(PollyGPUDevice));
+ *Device = malloc(sizeof(PollyGPUDevice));
if (*Device == 0) {
fprintf(stdout, "Allocate memory for Polly GPU device failed.\n");
exit(-1);
@@ -297,8 +292,8 @@ void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module) {
exit(-1);
}
- if (CuModuleLoadDataExFcnPtr(&((*Module)->Cuda), PTXBuffer, 0, 0, 0)
- != CUDA_SUCCESS) {
+ if (CuModuleLoadDataExFcnPtr(&((*Module)->Cuda), PTXBuffer, 0, 0, 0) !=
+ CUDA_SUCCESS) {
fprintf(stdout, "Loading ptx assembly text failed.\n");
exit(-1);
}
@@ -313,8 +308,8 @@ void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module,
}
/* Locate the kernel entry point. */
- if(CuModuleGetFunctionFcnPtr(&((*Kernel)->Cuda), Module->Cuda, KernelName)
- != CUDA_SUCCESS) {
+ if (CuModuleGetFunctionFcnPtr(&((*Kernel)->Cuda), Module->Cuda, KernelName) !=
+ CUDA_SUCCESS) {
fprintf(stdout, "Loading kernel function failed.\n");
exit(-1);
}
@@ -354,9 +349,8 @@ void polly_stopTimerByCudaEvent(PollyGPUEvent *Start, PollyGPUEvent *Stop,
free(Stop);
}
-void polly_allocateMemoryForHostAndDevice(void **HostData,
- PollyGPUDevicePtr **DevData,
- int MemSize) {
+void polly_allocateMemoryForHostAndDevice(
+ void **HostData, PollyGPUDevicePtr **DevData, int MemSize) {
if ((*HostData = (int *)malloc(MemSize)) == 0) {
fprintf(stdout, "Could not allocate host memory.\n");
exit(-1);
@@ -378,7 +372,7 @@ void polly_copyFromHostToDevice(PollyGPUDevicePtr *DevData, void *HostData,
void polly_copyFromDeviceToHost(void *HostData, PollyGPUDevicePtr *DevData,
int MemSize) {
- if(CuMemcpyDtoHFcnPtr(HostData, DevData->Cuda, MemSize) != CUDA_SUCCESS) {
+ if (CuMemcpyDtoHFcnPtr(HostData, DevData->Cuda, MemSize) != CUDA_SUCCESS) {
fprintf(stdout, "Copying results from device to host memory failed.\n");
exit(-1);
}
@@ -405,10 +399,9 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
fprintf(stdout, "CUDA kernel launched.\n");
}
-void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
- PollyGPUModule *Module,
- PollyGPUContext *Context,
- PollyGPUFunction *Kernel) {
+void polly_cleanupGPGPUResources(
+ void *HostData, PollyGPUDevicePtr *DevData, PollyGPUModule *Module,
+ PollyGPUContext *Context, PollyGPUFunction *Kernel) {
if (HostData) {
free(HostData);
HostData = 0;
OpenPOWER on IntegriCloud