diff options
author | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2018-02-07 18:21:55 +0000 |
---|---|---|
committer | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2018-02-07 18:21:55 +0000 |
commit | aaeab8d4efa59b62e47e062a34d567e3ca4ae567 (patch) | |
tree | 6a1905948bb46ba784332ea05d6e0c89a773a184 | |
parent | f4e3f3e31cacd412bcef0906da3c6a4bd503d0a2 (diff) | |
download | bcm5719-llvm-aaeab8d4efa59b62e47e062a34d567e3ca4ae567.tar.gz bcm5719-llvm-aaeab8d4efa59b62e47e062a34d567e3ca4ae567.zip |
[OpenMP][libomptarget] Add data sharing support in libomptarget
Summary: This patch extends the libomptarget functionality in patch D14254 with support for the data sharing scheme for supporting implicitly shared variables. The runtime therefore maintains a list of references to shared variables.
Reviewers: carlo.bertolli, ABataev, Hahnfeld, grokos, caomhin, hfinkel
Reviewed By: Hahnfeld, grokos
Subscribers: guansong, llvm-commits, openmp-commits
Differential Revision: https://reviews.llvm.org/D41485
llvm-svn: 324495
6 files changed, 65 insertions, 1 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h index f314443a8f2..4005353b17c 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h @@ -470,8 +470,9 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); EXTERN void __kmpc_spmd_kernel_deinit(); EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, + void ***SharedArgs, int32_t nArgs, int16_t IsOMPRuntimeInitialized); -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, +EXTERN bool __kmpc_kernel_parallel(void **WorkFn, void ***SharedArgs, int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_kernel_end_parallel(); EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu index adedc0b3aa5..8ff8f7b4723 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -46,3 +46,8 @@ __device__ __shared__ DataSharingStateTy DataSharingState; // Scratchpad for teams reduction. //////////////////////////////////////////////////////////////////////////////// __device__ __shared__ void *ReductionScratchpadPtr; + +//////////////////////////////////////////////////////////////////////////////// +// Data sharing related variables. +//////////////////////////////////////////////////////////////////////////////// +__device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs; diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu index 5c5c88b2cb6..fca2aa0e4ac 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -54,6 +54,9 @@ EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) { PRINT(LD_IO, "call to __kmpc_kernel_init with version %f\n", OMPTARGET_NVPTX_VERSION); + // init parallel work arguments + omptarget_nvptx_sharedArgs.Init(); + if (!RequiresOMPRuntime) { // If OMP runtime is not required don't initialize OMP state. setExecutionParameters(Generic, RuntimeUninitialized); @@ -107,6 +110,9 @@ EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) { } // Done with work. Kill the workers. omptarget_nvptx_workFn = 0; + + // Deinit parallel work arguments + omptarget_nvptx_sharedArgs.DeInit(); } EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 3ee32f96480..43b8dd27bf5 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -62,6 +62,46 @@ #define __ACTIVEMASK() __ballot(1) #endif +// arguments needed for L0 parallelism only. +class omptarget_nvptx_SharedArgs { +public: + // All these methods must be called by the master thread only. + INLINE void Init() { + args = buffer; + nArgs = MAX_SHARED_ARGS; + } + INLINE void DeInit() { + // Free any memory allocated for outlined parallel function with a large + // number of arguments. + if (nArgs > MAX_SHARED_ARGS) { + SafeFree(args, (char *)"new extended args"); + Init(); + } + } + INLINE void EnsureSize(int size) { + if (size > nArgs) { + if (nArgs > MAX_SHARED_ARGS) { + SafeFree(args, (char *)"new extended args"); + } + args = (void **) SafeMalloc(size * sizeof(void *), + (char *)"new extended args"); + nArgs = size; + } + } + // Called by all threads. + INLINE void **GetArgs() { return args; }; +private: + // buffer of pre-allocated arguments. + void *buffer[MAX_SHARED_ARGS]; + // pointer to arguments buffer. + // starts off as a pointer to 'buffer' but can be dynamically allocated. + void **args; + // starts off as MAX_SHARED_ARGS but can increase in size. + uint32_t nArgs; +}; + +extern __device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs; + // Data sharing related quantities, need to match what is used in the compiler. enum DATA_SHARING_SIZES { // The maximum number of workers in a kernel. diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h index c5f5d9c04fb..43172ad45d0 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h @@ -28,6 +28,10 @@ // region to synchronize with each other. #define L1_BARRIER (1) +// Maximum number of preallocated arguments to an outlined parallel/simd function. +// Anything more requires dynamic memory allocation. +#define MAX_SHARED_ARGS 20 + // Maximum number of omp state objects per SM allocated statically in global // memory. #if __CUDA_ARCH__ >= 600 diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu index d4546284fac..0b9ebd401f8 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -214,10 +214,16 @@ EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer) { // // This routine is always called by the team master.. EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, + void ***SharedArgs, int32_t nArgs, int16_t IsOMPRuntimeInitialized) { PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n"); omptarget_nvptx_workFn = WorkFn; + if (nArgs > 0) { + omptarget_nvptx_sharedArgs.EnsureSize(nArgs); + *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs(); + } + if (!IsOMPRuntimeInitialized) return; @@ -317,11 +323,13 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, // // Only the worker threads call this routine. EXTERN bool __kmpc_kernel_parallel(void **WorkFn, + void ***SharedArgs, int16_t IsOMPRuntimeInitialized) { PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n"); // Work function and arguments for L1 parallel region. *WorkFn = omptarget_nvptx_workFn; + *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs(); if (!IsOMPRuntimeInitialized) return true; |