summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/tools/GPURuntime/GPUJIT.c56
1 files changed, 52 insertions, 4 deletions
diff --git a/polly/tools/GPURuntime/GPUJIT.c b/polly/tools/GPURuntime/GPUJIT.c
index 24320f88418..d066d917c80 100644
--- a/polly/tools/GPURuntime/GPUJIT.c
+++ b/polly/tools/GPURuntime/GPUJIT.c
@@ -20,6 +20,7 @@
#include <string.h>
static int DebugMode;
+static int CacheMode;
static void debug_print(const char *format, ...) {
if (!DebugMode)
@@ -40,6 +41,7 @@ struct PollyGPUContextT {
struct PollyGPUFunctionT {
CUfunction Cuda;
CUmodule CudaModule;
+ const char *PTXString;
};
struct PollyGPUDevicePtrT {
@@ -249,6 +251,11 @@ PollyGPUContext *polly_initContext() {
char DeviceName[256];
int DeviceCount = 0;
+ static __thread PollyGPUContext *CurrentContext = NULL;
+
+ if (CurrentContext)
+ return CurrentContext;
+
/* Get API handles. */
if (initialDeviceAPIs() == 0) {
fprintf(stdout, "Getting the \"handle\" for the CUDA driver API failed.\n");
@@ -282,13 +289,41 @@ PollyGPUContext *polly_initContext() {
}
CuCtxCreateFcnPtr(&(Context->Cuda), 0, Device);
+ CacheMode = getenv("POLLY_NOCACHE") == 0;
+
+ if (CacheMode)
+ CurrentContext = Context;
+
return Context;
}
+static void freeKernel(PollyGPUFunction *Kernel) {
+ if (Kernel->CudaModule)
+ CuModuleUnloadFcnPtr(Kernel->CudaModule);
+
+ if (Kernel)
+ free(Kernel);
+}
+
+#define KERNEL_CACHE_SIZE 10
+
PollyGPUFunction *polly_getKernel(const char *PTXBuffer,
const char *KernelName) {
dump_function();
+ static __thread PollyGPUFunction *KernelCache[KERNEL_CACHE_SIZE];
+ static __thread int NextCacheItem = 0;
+
+ for (long i = 0; i < KERNEL_CACHE_SIZE; i++) {
+ // We exploit here the property that all Polly-ACC kernels are allocated
+ // as global constants, hence a pointer comparision is sufficient to
+ // determin equality.
+ if (KernelCache[i] && KernelCache[i]->PTXString == PTXBuffer) {
+ debug_print(" -> using cached kernel\n");
+ return KernelCache[i];
+ }
+ }
+
PollyGPUFunction *Function = malloc(sizeof(PollyGPUFunction));
if (Function == 0) {
@@ -361,17 +396,27 @@ PollyGPUFunction *polly_getKernel(const char *PTXBuffer,
CuLinkDestroyFcnPtr(LState);
+ Function->PTXString = PTXBuffer;
+
+ if (CacheMode) {
+ if (KernelCache[NextCacheItem])
+ freeKernel(KernelCache[NextCacheItem]);
+
+ KernelCache[NextCacheItem] = Function;
+
+ NextCacheItem = (NextCacheItem + 1) % KERNEL_CACHE_SIZE;
+ }
+
return Function;
}
void polly_freeKernel(PollyGPUFunction *Kernel) {
dump_function();
- if (Kernel->CudaModule)
- CuModuleUnloadFcnPtr(Kernel->CudaModule);
+ if (CacheMode)
+ return;
- if (Kernel)
- free(Kernel);
+ freeKernel(Kernel);
}
void polly_copyFromHostToDevice(void *HostData, PollyGPUDevicePtr *DevData,
@@ -448,6 +493,9 @@ void *polly_getDevicePtr(PollyGPUDevicePtr *Allocation) {
void polly_freeContext(PollyGPUContext *Context) {
dump_function();
+ if (CacheMode)
+ return;
+
if (Context->Cuda) {
CuCtxDestroyFcnPtr(Context->Cuda);
free(Context);
OpenPOWER on IntegriCloud