summaryrefslogtreecommitdiffstats
path: root/clang/lib/Headers/__clang_cuda_intrinsics.h
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2017-09-25 19:41:56 +0000
committerJustin Lebar <jlebar@google.com>2017-09-25 19:41:56 +0000
commitd31d5e6aa26fcdbb6b68c364cbe01fe3901566bd (patch)
tree608a7c285f9cbea4eae73324c97ef87086bb4395 /clang/lib/Headers/__clang_cuda_intrinsics.h
parentf4963ff162638547f3d4dc27a47c1f5308ecc54a (diff)
downloadbcm5719-llvm-d31d5e6aa26fcdbb6b68c364cbe01fe3901566bd.tar.gz
bcm5719-llvm-d31d5e6aa26fcdbb6b68c364cbe01fe3901566bd.zip
Revert "[NVPTX] added match.{any,all}.sync instructions, intrinsics & builtins.", rL314135.
Causing assertion failures on macos: > Assertion failed: (Num < NumOperands && "Invalid child # of SDNode!"), > function getOperand, file > /Users/buildslave/jenkins/workspace/clang-stage1-cmake-RA-incremental/llvm/include/llvm/CodeGen/SelectionDAGNodes.h, > line 835. http://green.lab.llvm.org/green/job/clang-stage1-cmake-RA-incremental/42739/testReport/LLVM/CodeGen_NVPTX/surf_read_cuda_ll/ llvm-svn: 314142
Diffstat (limited to 'clang/lib/Headers/__clang_cuda_intrinsics.h')
-rw-r--r--clang/lib/Headers/__clang_cuda_intrinsics.h32
1 files changed, 3 insertions, 29 deletions
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index 49cf384fd90..0e2141a2a17 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -92,9 +92,8 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
-#if CUDA_VERSION >= 9000
-#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
// __shfl_sync_* variants available in CUDA-9
+#if CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
#pragma push_macro("__MAKE_SYNC_SHUFFLES")
#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \
__Mask) \
@@ -188,33 +187,8 @@ inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
inline __device__ unsigned int activemask() { return __nvvm_vote_ballot(1); }
-#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
-
-// Define __match* builtins CUDA-9 headers expect to see.
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
-inline __device__ unsigned int __match32_any_sync(unsigned int mask,
- unsigned int value) {
- return __nvvm_match_any_sync_i32(mask, value);
-}
-
-inline __device__ unsigned long long
-__match64_any_sync(unsigned int mask, unsigned long long value) {
- return __nvvm_match_any_sync_i64(mask, value);
-}
-
-inline __device__ unsigned int
-__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {
- return __nvvm_match_all_sync_i32p(mask, value, pred);
-}
-
-inline __device__ unsigned long long
-__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {
- return __nvvm_match_all_sync_i64p(mask, value, pred);
-}
-#include "crt/sm_70_rt.hpp"
-
-#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
-#endif // __CUDA_VERSION >= 9000
+#endif // __CUDA_VERSION >= 9000 && (!defined(__CUDA_ARCH__) ||
+ // __CUDA_ARCH__ >= 300)
// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
OpenPOWER on IntegriCloud