summaryrefslogtreecommitdiffstats
path: root/clang/lib/Headers/__clang_cuda_intrinsics.h
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2017-09-25 18:53:57 +0000
committerArtem Belevich <tra@google.com>2017-09-25 18:53:57 +0000
commit9941ee9529da345f6d2ea2d8b586c6390ac4a0be (patch)
tree14f17d0e0aebc4f07e70cfbe9b051a641b78fe8b /clang/lib/Headers/__clang_cuda_intrinsics.h
parentd72bd83479da9ca130514631a07fe25d0e5e381a (diff)
downloadbcm5719-llvm-9941ee9529da345f6d2ea2d8b586c6390ac4a0be.tar.gz
bcm5719-llvm-9941ee9529da345f6d2ea2d8b586c6390ac4a0be.zip
[NVPTX] added match.{any,all}.sync instructions, intrinsics & builtins.
Differential Revision: https://reviews.llvm.org/D38191 llvm-svn: 314135
Diffstat (limited to 'clang/lib/Headers/__clang_cuda_intrinsics.h')
-rw-r--r--clang/lib/Headers/__clang_cuda_intrinsics.h32
1 files changed, 29 insertions, 3 deletions
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index 0e2141a2a17..49cf384fd90 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -92,8 +92,9 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+#if CUDA_VERSION >= 9000
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
// __shfl_sync_* variants available in CUDA-9
-#if CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
#pragma push_macro("__MAKE_SYNC_SHUFFLES")
#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \
__Mask) \
@@ -187,8 +188,33 @@ inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
inline __device__ unsigned int activemask() { return __nvvm_vote_ballot(1); }
-#endif // __CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) ||
- // __CUDA_ARCH__ >= 300)
+#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+
+// Define __match* builtins CUDA-9 headers expect to see.
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
+inline __device__ unsigned int __match32_any_sync(unsigned int mask,
+ unsigned int value) {
+ return __nvvm_match_any_sync_i32(mask, value);
+}
+
+inline __device__ unsigned long long
+__match64_any_sync(unsigned int mask, unsigned long long value) {
+ return __nvvm_match_any_sync_i64(mask, value);
+}
+
+inline __device__ unsigned int
+__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {
+ return __nvvm_match_all_sync_i32p(mask, value, pred);
+}
+
+inline __device__ unsigned long long
+__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {
+ return __nvvm_match_all_sync_i64p(mask, value, pred);
+}
+#include "crt/sm_70_rt.hpp"
+
+#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
+#endif // __CUDA_VERSION >= 9000
// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
OpenPOWER on IntegriCloud