summaryrefslogtreecommitdiffstats
path: root/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu19
1 files changed, 12 insertions, 7 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index f2892acb790..78b04ec5cff 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -13,6 +13,11 @@
#include "target_impl.h"
#include <stdio.h>
+// Warp ID in the CUDA block
+INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; }
+// Lane ID in the CUDA warp.
+INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; }
+
// Return true if this is the first active thread in the warp.
INLINE static bool IsWarpMasterActiveThread() {
unsigned long long Mask = __kmpc_impl_activemask();
@@ -62,7 +67,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
DSPRINT0(DSFLAG_INIT,
"Entering __kmpc_initialize_data_sharing_environment\n");
- unsigned WID = GetWarpId();
+ unsigned WID = getWarpId();
DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
omptarget_nvptx_TeamDescr *teamDescr =
@@ -106,7 +111,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
DSPRINT(DSFLAG, "Default Data Size %016llx\n",
(unsigned long long)SharingDefaultDataSize);
- unsigned WID = GetWarpId();
+ unsigned WID = getWarpId();
__kmpc_impl_lanemask_t CurActiveThreads = __kmpc_impl_activemask();
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
@@ -226,7 +231,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
- unsigned WID = GetWarpId();
+ unsigned WID = getWarpId();
if (IsEntryPoint) {
if (IsWarpMasterActiveThread()) {
@@ -354,7 +359,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
// This function initializes the stack pointer with the pointer to the
// statically allocated shared memory slots. The size of a shared memory
// slot is pre-determined to be 256 bytes.
- if (GetThreadIdInBlock() == 0)
+ if (threadIdx.x == 0)
data_sharing_init_stack_common();
__threadfence_block();
@@ -372,7 +377,7 @@ INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment;
// Frame pointer must be visible to all workers in the same warp.
- const unsigned WID = GetWarpId();
+ const unsigned WID = getWarpId();
void *FrameP = 0;
__kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask();
@@ -462,7 +467,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
// Compute the start address of the frame of each thread in the warp.
uintptr_t FrameStartAddress =
(uintptr_t) data_sharing_push_stack_common(PushSize);
- FrameStartAddress += (uintptr_t) (GetLaneId() * DataSize);
+ FrameStartAddress += (uintptr_t) (getLaneId() * DataSize);
return (void *)FrameStartAddress;
}
@@ -477,7 +482,7 @@ EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
__threadfence_block();
if (GetThreadIdInBlock() % WARPSIZE == 0) {
- unsigned WID = GetWarpId();
+ unsigned WID = getWarpId();
// Current slot
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
OpenPOWER on IntegriCloud