diff options
| author | JonChesterfield <jonathanchesterfield@gmail.com> | 2019-12-06 15:41:17 +0000 |
|---|---|---|
| committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2019-12-06 15:41:18 +0000 |
| commit | 0dd62c5c2ec854997ca45f810175c5d1426b474e (patch) | |
| tree | b0645f46f29624c472ebc715ae956c0eeafe9e18 | |
| parent | cd90f49d708dbc8f367eb4c575f62ab614900f59 (diff) | |
| download | bcm5719-llvm-0dd62c5c2ec854997ca45f810175c5d1426b474e.tar.gz bcm5719-llvm-0dd62c5c2ec854997ca45f810175c5d1426b474e.zip | |
[libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl
Summary:
[libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl
Part of building code under common/ without requiring a cuda compiler
Reviewers: ABataev, jdoerfert, grokos
Reviewed By: ABataev
Subscribers: jvesely, jfb, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D71102
7 files changed, 21 insertions, 13 deletions
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h index 475851ac9af..b10f34a1664 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -124,6 +124,10 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { __builtin_amdgcn_s_barrier(); } +EXTERN void __kmpc_impl_threadfence(void); +EXTERN void __kmpc_impl_threadfence_block(void); +EXTERN void __kmpc_impl_threadfence_system(void); + // DEVICE versions of part of libc extern "C" { DEVICE __attribute__((noreturn)) void diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu index 45bf8f40a92..59970a6db41 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu @@ -363,7 +363,7 @@ public: __kmpc_barrier(loc, threadId); if (tid == 0) { omptarget_nvptx_threadPrivateContext->Cnt() = 0; - __threadfence_block(); + __kmpc_impl_threadfence_block(); } __kmpc_barrier(loc, threadId); PRINT(LD_LOOP, diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu index 44a42e172f2..2f992f2778e 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu @@ -212,7 +212,7 @@ DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { if (Rank == 0) { parallelLevel[GetWarpId()] += (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); - __threadfence(); + __kmpc_impl_threadfence(); } __kmpc_impl_syncwarp(Mask); } @@ -224,7 +224,7 @@ DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { if (Rank == 0) { parallelLevel[GetWarpId()] -= (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); - __threadfence(); + __kmpc_impl_threadfence(); } __kmpc_impl_syncwarp(Mask); } diff --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu index 0ee29bf316b..691e3436a38 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu @@ -133,7 +133,7 @@ EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) { EXTERN void __kmpc_flush(kmp_Ident *loc) { PRINT0(LD_IO, "call kmpc_flush\n"); - __threadfence(); + __kmpc_impl_threadfence(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index 75068c7814a..6549d76def7 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -211,7 +211,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin( } // FIXME: Need to see the impact of doing it here. - __threadfence_block(); + __kmpc_impl_threadfence_block(); DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_begin\n"); @@ -289,7 +289,7 @@ EXTERN void __kmpc_data_sharing_environment_end( } // FIXME: Need to see the impact of doing it here. - __threadfence_block(); + __kmpc_impl_threadfence_block(); DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_end\n"); return; @@ -357,7 +357,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() { if (GetThreadIdInBlock() == 0) data_sharing_init_stack_common(); - __threadfence_block(); + __kmpc_impl_threadfence_block(); } INLINE static void* data_sharing_push_stack_common(size_t PushSize) { @@ -474,7 +474,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize, EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) { ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Expected initialized runtime."); - __threadfence_block(); + __kmpc_impl_threadfence_block(); if (GetThreadIdInBlock() % WARPSIZE == 0) { unsigned WID = GetWarpId(); @@ -555,7 +555,7 @@ EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(), "Must be called only in the target master thread."); *frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size); - __threadfence(); + __kmpc_impl_threadfence(); } EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, @@ -569,7 +569,7 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, } return; } - __threadfence(); + __kmpc_impl_threadfence(); ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(), "Must be called only in the target master thread."); omptarget_nvptx_simpleMemoryManager.Release(); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu index dfa7c4db1a6..cfccf78c377 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -241,7 +241,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars, char *scratchpad = GetTeamsReductionScratchpad(); scratchFct(reduce_data, scratchpad, TeamId, NumTeams); - __threadfence(); + __kmpc_impl_threadfence(); // atomicInc increments 'timestamp' and has a range [0, NumTeams-1]. // It resets 'timestamp' back to 0 once the last team increments @@ -389,7 +389,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc, EXTERN void __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid, kmp_CriticalName *crit) { - __threadfence_system(); + __kmpc_impl_threadfence_system(); (void)atomicExch((uint32_t *)crit, 0); } @@ -446,7 +446,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2( lgcpyFct(global_buffer, ModBockId, reduce_data); else lgredFct(global_buffer, ModBockId, reduce_data); - __threadfence_system(); + __kmpc_impl_threadfence_system(); // Increment team counter. // This counter is incremented by all teams in the current diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 5daeb5ca829..fe36a46c5cd 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -163,4 +163,8 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { : "memory"); } +INLINE void __kmpc_impl_threadfence(void) { __threadfence(); } +INLINE void __kmpc_impl_threadfence_block(void) { __threadfence_block(); } +INLINE void __kmpc_impl_threadfence_system(void) { __threadfence_system(); } + #endif |

