summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-02-24 21:49:33 +0000
committerJustin Lebar <jlebar@google.com>2016-02-24 21:49:33 +0000
commitd7a35492ada22de1ce0b1681a669900de3c72439 (patch)
tree0c400e24e54bb0ab725323cef38fdc2954e02b97 /clang/lib
parentc8dae5378b6e5520e958adf1ae28805916665547 (diff)
downloadbcm5719-llvm-d7a35492ada22de1ce0b1681a669900de3c72439.tar.gz
bcm5719-llvm-d7a35492ada22de1ce0b1681a669900de3c72439.zip
[CUDA] Add conversion operators for threadIdx, blockIdx, gridDim, and blockDim to uint3 and dim3.
Summary: This lets you write, e.g. uint3 a = threadIdx; uint3 b = blockIdx; dim3 c = gridDim; dim3 d = blockDim; which is legal in nvcc, but was not legal in clang. The fact that e.g. the type of threadIdx is not actually uint3 is still observable, but now you have to try to observe it. Reviewers: tra Subscribers: echristo, cfe-commits Differential Revision: http://reviews.llvm.org/D17561 llvm-svn: 261777
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Headers/__clang_cuda_runtime_wrapper.h27
-rw-r--r--clang/lib/Headers/cuda_builtin_vars.h18
2 files changed, 44 insertions, 1 deletions
diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index fb527dc9abc..8753a8ce6e4 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -245,6 +245,33 @@ __device__ static inline void *malloc(size_t __size) {
}
} // namespace std
+// Out-of-line implementations from cuda_builtin_vars.h. These need to come
+// after we've pulled in the definition of uint3 and dim3.
+
+__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
+ uint3 ret;
+ ret.x = x;
+ ret.y = y;
+ ret.z = z;
+ return ret;
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
+ uint3 ret;
+ ret.x = x;
+ ret.y = y;
+ ret.z = z;
+ return ret;
+}
+
+__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
+ return dim3(x, y, z);
+}
+
+__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
+ return dim3(x, y, z);
+}
+
#include <__clang_cuda_cmath.h>
// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host
diff --git a/clang/lib/Headers/cuda_builtin_vars.h b/clang/lib/Headers/cuda_builtin_vars.h
index 901356b3d5c..ec830864036 100644
--- a/clang/lib/Headers/cuda_builtin_vars.h
+++ b/clang/lib/Headers/cuda_builtin_vars.h
@@ -24,10 +24,14 @@
#ifndef __CUDA_BUILTIN_VARS_H
#define __CUDA_BUILTIN_VARS_H
+// Forward declares from vector_types.h.
+struct uint3;
+struct dim3;
+
// The file implements built-in CUDA variables using __declspec(property).
// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
// All read accesses of built-in variable fields get converted into calls to a
-// getter function which in turn would call appropriate builtin to fetch the
+// getter function which in turn calls the appropriate builtin to fetch the
// value.
//
// Example:
@@ -63,6 +67,9 @@ struct __cuda_builtin_threadIdx_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+ // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+ // uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
};
@@ -71,6 +78,9 @@ struct __cuda_builtin_blockIdx_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+ // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+ // uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
};
@@ -79,6 +89,9 @@ struct __cuda_builtin_blockDim_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+ // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
+ // dim3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
};
@@ -87,6 +100,9 @@ struct __cuda_builtin_gridDim_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+ // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
+ // dim3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
};
OpenPOWER on IntegriCloud