diff options
Diffstat (limited to 'openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu')
-rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index f2892acb790..78b04ec5cff 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -13,6 +13,11 @@ #include "target_impl.h" #include <stdio.h> +// Warp ID in the CUDA block +INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; } +// Lane ID in the CUDA warp. +INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; } + // Return true if this is the first active thread in the warp. INLINE static bool IsWarpMasterActiveThread() { unsigned long long Mask = __kmpc_impl_activemask(); @@ -62,7 +67,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS, DSPRINT0(DSFLAG_INIT, "Entering __kmpc_initialize_data_sharing_environment\n"); - unsigned WID = GetWarpId(); + unsigned WID = getWarpId(); DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID); omptarget_nvptx_TeamDescr *teamDescr = @@ -106,7 +111,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin( DSPRINT(DSFLAG, "Default Data Size %016llx\n", (unsigned long long)SharingDefaultDataSize); - unsigned WID = GetWarpId(); + unsigned WID = getWarpId(); __kmpc_impl_lanemask_t CurActiveThreads = __kmpc_impl_activemask(); __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID]; @@ -226,7 +231,7 @@ EXTERN void __kmpc_data_sharing_environment_end( DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n"); - unsigned WID = GetWarpId(); + unsigned WID = getWarpId(); if (IsEntryPoint) { if (IsWarpMasterActiveThread()) { @@ -354,7 +359,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() { // This function initializes the stack pointer with the pointer to the // statically allocated shared memory slots. The size of a shared memory // slot is pre-determined to be 256 bytes. - if (GetThreadIdInBlock() == 0) + if (threadIdx.x == 0) data_sharing_init_stack_common(); __threadfence_block(); @@ -372,7 +377,7 @@ INLINE static void* data_sharing_push_stack_common(size_t PushSize) { PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment; // Frame pointer must be visible to all workers in the same warp. - const unsigned WID = GetWarpId(); + const unsigned WID = getWarpId(); void *FrameP = 0; __kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask(); @@ -462,7 +467,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize, // Compute the start address of the frame of each thread in the warp. uintptr_t FrameStartAddress = (uintptr_t) data_sharing_push_stack_common(PushSize); - FrameStartAddress += (uintptr_t) (GetLaneId() * DataSize); + FrameStartAddress += (uintptr_t) (getLaneId() * DataSize); return (void *)FrameStartAddress; } @@ -477,7 +482,7 @@ EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) { __threadfence_block(); if (GetThreadIdInBlock() % WARPSIZE == 0) { - unsigned WID = GetWarpId(); + unsigned WID = getWarpId(); // Current slot __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID]; |