diff options
6 files changed, 11 insertions, 20 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu index 39dfebd92fe..3eff3a15bd4 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -765,7 +765,7 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) { // is started, so we don't need a barrier. if (NumThreads > 1) { #endif - named_sync(L1_BARRIER, WARPSIZE * NumWarps); + __kmpc_impl_named_sync(L1_BARRIER, WARPSIZE * NumWarps); #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700 } #endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu index cee3e5d6dd3..da7204df412 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -256,7 +256,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars, // If we guard this barrier as follows it leads to deadlock, probably // because of a compiler bug: if (!IsGenericMode()) __syncthreads(); uint16_t SyncWarps = (NumThreads + WARPSIZE - 1) / WARPSIZE; - named_sync(L1_BARRIER, SyncWarps * WARPSIZE); + __kmpc_impl_named_sync(L1_BARRIER, SyncWarps * WARPSIZE); // If this team is not the last, quit. if (/* Volatile read by all threads */ !IsLastTeam) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h index e10f2a19d32..de685b89450 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h @@ -84,11 +84,6 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment); ((void *)((char *)((void *)(_addr)) - (_bytes))) //////////////////////////////////////////////////////////////////////////////// -// Named Barrier Routines -//////////////////////////////////////////////////////////////////////////////// -INLINE void named_sync(const int barrier, const int num_threads); - -//////////////////////////////////////////////////////////////////////////////// // Teams Reduction Scratchpad Helpers //////////////////////////////////////////////////////////////////////////////// INLINE unsigned int *GetTeamsReductionTimestamp(); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h index d4da6ad73fa..6fa85789990 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h @@ -269,17 +269,6 @@ INLINE void *SafeFree(void *ptr, const char *msg) { } //////////////////////////////////////////////////////////////////////////////// -// Named Barrier Routines -//////////////////////////////////////////////////////////////////////////////// - -INLINE void named_sync(const int barrier, const int num_threads) { - asm volatile("bar.sync %0, %1;" - : - : "r"(barrier), "r"(num_threads) - : "memory"); -} - -//////////////////////////////////////////////////////////////////////////////// // Teams Reduction Scratchpad Helpers //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu index 28a541901c3..f36e877bff1 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) { "call kmpc_barrier with %d omp threads, sync parameter %d\n", (int)numberOfActiveOMPThreads, (int)threads); // Barrier #1 is for synchronization among active threads. - named_sync(L1_BARRIER, threads); + __kmpc_impl_named_sync(L1_BARRIER, threads); } } else { // Still need to flush the memory per the standard. @@ -92,7 +92,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) { "%d\n", (int)numberOfActiveOMPThreads, (int)threads); // Barrier #1 is for synchronization among active threads. - named_sync(L1_BARRIER, threads); + __kmpc_impl_named_sync(L1_BARRIER, threads); PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n"); } diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index bbce9f1c511..95fe2ad3d3d 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -153,4 +153,11 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) { #endif // CUDA_VERSION } +INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { + asm volatile("bar.sync %0, %1;" + : + : "r"(barrier), "r"(num_threads) + : "memory"); +} + #endif |