summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h4
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h18
2 files changed, 11 insertions, 11 deletions
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index e0d6bb1953c..4e8232974e7 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -101,8 +101,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
return __lanemask_gt();
}
-EXTERN bool __kmpc_impl_is_first_active_thread();
-
INLINE uint32_t __kmpc_impl_smid() {
return __smid();
}
@@ -126,6 +124,8 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
return __ballot64(1);
}
+EXTERN bool __kmpc_impl_is_first_active_thread();
+
EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
int32_t SrcLane);
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 6f6c38956a9..8461a93913a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -94,15 +94,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
return res;
}
-// Return true if this is the first active thread in the warp.
-INLINE bool __kmpc_impl_is_first_active_thread() {
- unsigned long long Mask = __kmpc_impl_activemask();
- unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
- unsigned long long Sh = Mask << ShNum;
- // Truncate Sh to the 32 lower bits
- return (unsigned)Sh == 0;
-}
-
INLINE uint32_t __kmpc_impl_smid() {
uint32_t id;
asm("mov.u32 %0, %%smid;" : "=r"(id));
@@ -142,6 +133,15 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
#endif
}
+// Return true if this is the first active thread in the warp.
+INLINE bool __kmpc_impl_is_first_active_thread() {
+ unsigned long long Mask = __kmpc_impl_activemask();
+ unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
+ unsigned long long Sh = Mask << ShNum;
+ // Truncate Sh to the 32 lower bits
+ return (unsigned)Sh == 0;
+}
+
// In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
OpenPOWER on IntegriCloud