summaryrefslogtreecommitdiffstats
path: root/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index caa9feafe03..0f548289f9c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -43,6 +43,7 @@ INLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); }
#endif
// In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
+
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
int32_t SrcLane) {
#if CUDA_VERSION >= 9000
@@ -50,6 +51,15 @@ INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
#else
return __shfl(Var, SrcLane);
#endif // CUDA_VERSION
+
+INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
+ int32_t Var, uint32_t Delta,
+ int32_t Width) {
+#if CUDA_VERSION >= 9000
+ return __shfl_down_sync(Mask, Var, Delta, Width);
+#else
+ return __shfl_down(Var, Delta, Width);
+#endif // CUDA_VERSION
}
INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
OpenPOWER on IntegriCloud