summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-08-26 17:32:45 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-08-26 17:32:45 +0000
commitda8b5cc9f138b20c75f1b0592d0b046b8d5b08b6 (patch)
treecfea28298c286930c7dc050bc94639a0fe792bb1
parent98f9e94e57d62170ee5c7dcd4e4d90ddb84baf2d (diff)
downloadbcm5719-llvm-da8b5cc9f138b20c75f1b0592d0b046b8d5b08b6.tar.gz
bcm5719-llvm-da8b5cc9f138b20c75f1b0592d0b046b8d5b08b6.zip
[OPENMP][NVPTX]Add __kmpc_syncwarp(int32_t) function.
Summary: Added function void __kmpc_syncwarp(int32_t) to expose it to the compiler. It is required to fix the problem with the critical regions in Cuda9.0+. We cannot use barrier in the critical region, but still need to reconverge the threads in the warp after. This function allows to do this. Reviewers: grokos, jdoerfert Subscribers: guansong, openmp-commits, kkwli0, caomhin Tags: #openmp Differential Revision: https://reviews.llvm.org/D66672 llvm-svn: 369933
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/interface.h2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu10
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h2
3 files changed, 14 insertions, 0 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
index b2a13a41f6d..ab57715592e 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
@@ -423,6 +423,8 @@ EXTERN void __kmpc_flush(kmp_Ident *loc);
// vote
EXTERN int32_t __kmpc_warp_active_thread_mask();
+// syncwarp
+EXTERN void __kmpc_syncwarp(int32_t);
// tasks
EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 4607d6a7d8a..fcfe272bc0a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "omptarget-nvptx.h"
+#include "target_impl.h"
////////////////////////////////////////////////////////////////////////////////
// KMP Ordered calls
@@ -144,3 +145,12 @@ EXTERN int32_t __kmpc_warp_active_thread_mask() {
PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
return __ACTIVEMASK();
}
+
+////////////////////////////////////////////////////////////////////////////////
+// Syncwarp
+////////////////////////////////////////////////////////////////////////////////
+
+EXTERN void __kmpc_syncwarp(int32_t Mask) {
+ PRINT0(LD_IO, "call __kmpc_syncwarp\n");
+ __kmpc_impl_syncwarp(Mask);
+}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 610469527cf..a1b4c20653c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -38,4 +38,6 @@ INLINE int __kmpc_impl_ffs(uint32_t x) { return __ffs(x); }
INLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); }
+INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
+
#endif
OpenPOWER on IntegriCloud