summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h4
-rw-r--r--openmp/libomptarget/deviceRTLs/common/src/loop.cu2
-rw-r--r--openmp/libomptarget/deviceRTLs/common/src/support.cu4
-rw-r--r--openmp/libomptarget/deviceRTLs/common/src/sync.cu2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu12
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu6
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h4
7 files changed, 21 insertions, 13 deletions
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 475851ac9af..b10f34a1664 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -124,6 +124,10 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
__builtin_amdgcn_s_barrier();
}
+EXTERN void __kmpc_impl_threadfence(void);
+EXTERN void __kmpc_impl_threadfence_block(void);
+EXTERN void __kmpc_impl_threadfence_system(void);
+
// DEVICE versions of part of libc
extern "C" {
DEVICE __attribute__((noreturn)) void
diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
index 45bf8f40a92..59970a6db41 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@@ -363,7 +363,7 @@ public:
__kmpc_barrier(loc, threadId);
if (tid == 0) {
omptarget_nvptx_threadPrivateContext->Cnt() = 0;
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
}
__kmpc_barrier(loc, threadId);
PRINT(LD_LOOP,
diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
index 44a42e172f2..2f992f2778e 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -212,7 +212,7 @@ DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
if (Rank == 0) {
parallelLevel[GetWarpId()] +=
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
- __threadfence();
+ __kmpc_impl_threadfence();
}
__kmpc_impl_syncwarp(Mask);
}
@@ -224,7 +224,7 @@ DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
if (Rank == 0) {
parallelLevel[GetWarpId()] -=
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
- __threadfence();
+ __kmpc_impl_threadfence();
}
__kmpc_impl_syncwarp(Mask);
}
diff --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
index 0ee29bf316b..691e3436a38 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
@@ -133,7 +133,7 @@ EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
EXTERN void __kmpc_flush(kmp_Ident *loc) {
PRINT0(LD_IO, "call kmpc_flush\n");
- __threadfence();
+ __kmpc_impl_threadfence();
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 75068c7814a..6549d76def7 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -211,7 +211,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
}
// FIXME: Need to see the impact of doing it here.
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_begin\n");
@@ -289,7 +289,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
}
// FIXME: Need to see the impact of doing it here.
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_end\n");
return;
@@ -357,7 +357,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
if (GetThreadIdInBlock() == 0)
data_sharing_init_stack_common();
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
}
INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
@@ -474,7 +474,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Expected initialized runtime.");
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
if (GetThreadIdInBlock() % WARPSIZE == 0) {
unsigned WID = GetWarpId();
@@ -555,7 +555,7 @@ EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
"Must be called only in the target master thread.");
*frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
- __threadfence();
+ __kmpc_impl_threadfence();
}
EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
@@ -569,7 +569,7 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
}
return;
}
- __threadfence();
+ __kmpc_impl_threadfence();
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
"Must be called only in the target master thread.");
omptarget_nvptx_simpleMemoryManager.Release();
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index dfa7c4db1a6..cfccf78c377 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -241,7 +241,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
char *scratchpad = GetTeamsReductionScratchpad();
scratchFct(reduce_data, scratchpad, TeamId, NumTeams);
- __threadfence();
+ __kmpc_impl_threadfence();
// atomicInc increments 'timestamp' and has a range [0, NumTeams-1].
// It resets 'timestamp' back to 0 once the last team increments
@@ -389,7 +389,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
EXTERN void
__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit) {
- __threadfence_system();
+ __kmpc_impl_threadfence_system();
(void)atomicExch((uint32_t *)crit, 0);
}
@@ -446,7 +446,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
lgcpyFct(global_buffer, ModBockId, reduce_data);
else
lgredFct(global_buffer, ModBockId, reduce_data);
- __threadfence_system();
+ __kmpc_impl_threadfence_system();
// Increment team counter.
// This counter is incremented by all teams in the current
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 5daeb5ca829..fe36a46c5cd 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -163,4 +163,8 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
: "memory");
}
+INLINE void __kmpc_impl_threadfence(void) { __threadfence(); }
+INLINE void __kmpc_impl_threadfence_block(void) { __threadfence_block(); }
+INLINE void __kmpc_impl_threadfence_system(void) { __threadfence_system(); }
+
#endif
OpenPOWER on IntegriCloud