[libomptarget] Refactor syncwarp macro to inline function

Summary: [libomptarget] Refactor syncwarp macro to inline function See also abandoned D66846, split into this diff and others. Reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D66857 llvm-svn: 370149
author: Jon Chesterfield <jonathanchesterfield@gmail.com> 2019-08-28 02:02:53 +0000
committer: Jon Chesterfield <jonathanchesterfield@gmail.com> 2019-08-28 02:02:53 +0000
commit: be3d4873130096ee722ccad2eff392bc6955b333 (patch)
tree: dc1c7b44f4944c91d0dd040904b53b1d49a07514
parent: e73e3013a646b9c5d3e11fac10afcfff464cb895 (diff)
download: bcm5719-llvm-be3d4873130096ee722ccad2eff392bc6955b333.tar.gz
bcm5719-llvm-be3d4873130096ee722ccad2eff392bc6955b333.zip
3 files changed, 13 insertions, 8 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 0a0c6cc4334..5519f647655 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -52,11 +52,8 @@
 #error CUDA_VERSION macro is undefined, something wrong with cuda.
 #elif CUDA_VERSION >= 9000
 #define __ACTIVEMASK() __activemask()
-#define __SYNCWARP(Mask) __syncwarp(Mask)
 #else
 #define __ACTIVEMASK() __ballot(1)
-// In Cuda < 9.0 no need to sync threads in warps.
-#define __SYNCWARP(Mask)
 #endif // CUDA_VERSION
 
 #define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
index ceed7d3f7c8..c1a84679649 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -14,6 +14,8 @@
 // Execution Parameters
 ////////////////////////////////////////////////////////////////////////////////
 
+#include "target_impl.h"
+
 INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
   execution_param = EMode;
   execution_param |= RMode;
@@ -203,7 +205,7 @@ INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
 
 INLINE void IncParallelLevel(bool ActiveParallel) {
   unsigned Active = __ACTIVEMASK();
-  __SYNCWARP(Active);
+  __kmpc_impl_syncwarp(Active);
   unsigned LaneMaskLt;
   asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
   unsigned Rank = __popc(Active & LaneMaskLt);
@@ -212,12 +214,12 @@ INLINE void IncParallelLevel(bool ActiveParallel) {
         (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
     __threadfence();
   }
-  __SYNCWARP(Active);
+  __kmpc_impl_syncwarp(Active);
 }
 
 INLINE void DecParallelLevel(bool ActiveParallel) {
   unsigned Active = __ACTIVEMASK();
-  __SYNCWARP(Active);
+  __kmpc_impl_syncwarp(Active);
   unsigned LaneMaskLt;
   asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
   unsigned Rank = __popc(Active & LaneMaskLt);
@@ -226,7 +228,7 @@ INLINE void DecParallelLevel(bool ActiveParallel) {
         (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
     __threadfence();
   }
-  __SYNCWARP(Active);
+  __kmpc_impl_syncwarp(Active);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 144f7ab1d79..91883eaea54 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -63,6 +63,12 @@ INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
 #endif // CUDA_VERSION
 }
 
-INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
+INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
+#if CUDA_VERSION >= 9000
+  __syncwarp(Mask);
+#else
+  // In Cuda < 9.0 no need to sync threads in warps.
+#endif // CUDA_VERSION
+}
 
 #endif
author	Jon Chesterfield <jonathanchesterfield@gmail.com>	2019-08-28 02:02:53 +0000
committer	Jon Chesterfield <jonathanchesterfield@gmail.com>	2019-08-28 02:02:53 +0000
commit	be3d4873130096ee722ccad2eff392bc6955b333 (patch)
tree	dc1c7b44f4944c91d0dd040904b53b1d49a07514
parent	e73e3013a646b9c5d3e11fac10afcfff464cb895 (diff)
download	bcm5719-llvm-be3d4873130096ee722ccad2eff392bc6955b333.tar.gz bcm5719-llvm-be3d4873130096ee722ccad2eff392bc6955b333.zip