diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-09-08 23:59:00 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-09-08 23:59:00 +0000 |
| commit | 285d2fb85c89a42dca49ea66af425f4deba352c2 (patch) | |
| tree | 0f805fbba8e62c67a5cbdf130f1361271b6f3c41 /libclc/generic/include/clc/shared | |
| parent | 661ac03a1bef271698a92fb39eff369063bfcca0 (diff) | |
| download | bcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.tar.gz bcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.zip | |
Implement vload_half{,n} and vload(half)
v2: add vload(half) as well
make helpers amdgpu specific (NVPTX uses different private AS numbering)
use clang builtin on clang >= 6
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Tom Stellard <tstellar@redhat.com>
llvm-svn: 312839
Diffstat (limited to 'libclc/generic/include/clc/shared')
| -rw-r--r-- | libclc/generic/include/clc/shared/vload.h | 55 |
1 files changed, 35 insertions, 20 deletions
diff --git a/libclc/generic/include/clc/shared/vload.h b/libclc/generic/include/clc/shared/vload.h index 93d07501d4a..8c262ddbffc 100644 --- a/libclc/generic/include/clc/shared/vload.h +++ b/libclc/generic/include/clc/shared/vload.h @@ -1,18 +1,21 @@ -#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \ - _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x); +#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \ + _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x); -#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \ - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE) +#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \ + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE) + +#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \ + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \ #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \ - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \ + _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \ #define _CLC_VECTOR_VLOAD_PRIM() \ _CLC_VECTOR_VLOAD_PRIM1(char) \ @@ -24,14 +27,26 @@ _CLC_VECTOR_VLOAD_PRIM1(long) \ _CLC_VECTOR_VLOAD_PRIM1(ulong) \ _CLC_VECTOR_VLOAD_PRIM1(float) \ - + _CLC_VECTOR_VLOAD_PRIM3(_half, half, float) + #ifdef cl_khr_fp64 -#define _CLC_VECTOR_VLOAD() \ - _CLC_VECTOR_VLOAD_PRIM1(double) \ - _CLC_VECTOR_VLOAD_PRIM() -#else -#define _CLC_VECTOR_VLOAD() \ - _CLC_VECTOR_VLOAD_PRIM() +#pragma OPENCL EXTENSION cl_khr_fp64: enable + _CLC_VECTOR_VLOAD_PRIM1(double) #endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16: enable + _CLC_VECTOR_VLOAD_PRIM1(half) +#endif + +_CLC_VECTOR_VLOAD_PRIM() +// Plain vload_half also needs to be declared +_CLC_VLOAD_DECL(_half, half, float, , __constant) +_CLC_VLOAD_DECL(_half, half, float, , __global) +_CLC_VLOAD_DECL(_half, half, float, , __local) +_CLC_VLOAD_DECL(_half, half, float, , __private) -_CLC_VECTOR_VLOAD() +#undef _CLC_VLOAD_DECL +#undef _CLC_VECTOR_VLOAD_DECL +#undef _CLC_VECTOR_VLOAD_PRIM3 +#undef _CLC_VECTOR_VLOAD_PRIM1 +#undef _CLC_VECTOR_VLOAD_PRIM |

