diff options
| author | Artem Belevich <tra@google.com> | 2017-09-25 18:53:57 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2017-09-25 18:53:57 +0000 |
| commit | 9941ee9529da345f6d2ea2d8b586c6390ac4a0be (patch) | |
| tree | 14f17d0e0aebc4f07e70cfbe9b051a641b78fe8b /clang/lib/Headers/__clang_cuda_intrinsics.h | |
| parent | d72bd83479da9ca130514631a07fe25d0e5e381a (diff) | |
| download | bcm5719-llvm-9941ee9529da345f6d2ea2d8b586c6390ac4a0be.tar.gz bcm5719-llvm-9941ee9529da345f6d2ea2d8b586c6390ac4a0be.zip | |
[NVPTX] added match.{any,all}.sync instructions, intrinsics & builtins.
Differential Revision: https://reviews.llvm.org/D38191
llvm-svn: 314135
Diffstat (limited to 'clang/lib/Headers/__clang_cuda_intrinsics.h')
| -rw-r--r-- | clang/lib/Headers/__clang_cuda_intrinsics.h | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index 0e2141a2a17..49cf384fd90 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -92,8 +92,9 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f); #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 +#if CUDA_VERSION >= 9000 +#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300) // __shfl_sync_* variants available in CUDA-9 -#if CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300) #pragma push_macro("__MAKE_SYNC_SHUFFLES") #define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \ __Mask) \ @@ -187,8 +188,33 @@ inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) { inline __device__ unsigned int activemask() { return __nvvm_vote_ballot(1); } -#endif // __CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) || - // __CUDA_ARCH__ >= 300) +#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 + +// Define __match* builtins CUDA-9 headers expect to see. +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 +inline __device__ unsigned int __match32_any_sync(unsigned int mask, + unsigned int value) { + return __nvvm_match_any_sync_i32(mask, value); +} + +inline __device__ unsigned long long +__match64_any_sync(unsigned int mask, unsigned long long value) { + return __nvvm_match_any_sync_i64(mask, value); +} + +inline __device__ unsigned int +__match32_all_sync(unsigned int mask, unsigned int value, int *pred) { + return __nvvm_match_all_sync_i32p(mask, value, pred); +} + +inline __device__ unsigned long long +__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) { + return __nvvm_match_all_sync_i64p(mask, value, pred); +} +#include "crt/sm_70_rt.hpp" + +#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 +#endif // __CUDA_VERSION >= 9000 // sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}. |

