diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-09-08 23:59:00 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-09-08 23:59:00 +0000 |
| commit | 285d2fb85c89a42dca49ea66af425f4deba352c2 (patch) | |
| tree | 0f805fbba8e62c67a5cbdf130f1361271b6f3c41 /libclc/generic/lib | |
| parent | 661ac03a1bef271698a92fb39eff369063bfcca0 (diff) | |
| download | bcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.tar.gz bcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.zip | |
Implement vload_half{,n} and vload(half)
v2: add vload(half) as well
make helpers amdgpu specific (NVPTX uses different private AS numbering)
use clang builtin on clang >= 6
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Tom Stellard <tstellar@redhat.com>
llvm-svn: 312839
Diffstat (limited to 'libclc/generic/lib')
| -rw-r--r-- | libclc/generic/lib/shared/vload.cl | 59 | ||||
| -rw-r--r-- | libclc/generic/lib/shared/vload_half.inc | 13 |
2 files changed, 72 insertions, 0 deletions
diff --git a/libclc/generic/lib/shared/vload.cl b/libclc/generic/lib/shared/vload.cl index 88972005cfa..08922708fbe 100644 --- a/libclc/generic/lib/shared/vload.cl +++ b/libclc/generic/lib/shared/vload.cl @@ -50,3 +50,62 @@ VLOAD_TYPES() #pragma OPENCL EXTENSION cl_khr_fp64 : enable VLOAD_ADDR_SPACES(double) #endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + VLOAD_ADDR_SPACES(half) +#endif + +/* vload_half are legal even without cl_khr_fp16 */ +/* no vload_half for double */ +#if __clang_major__ < 6 +float __clc_vload_half_float_helper__constant(const __constant half *); +float __clc_vload_half_float_helper__global(const __global half *); +float __clc_vload_half_float_helper__local(const __local half *); +float __clc_vload_half_float_helper__private(const __private half *); + +#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]); +#else +#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]); +#endif + +#define VEC_LOAD2(val, AS) \ + VEC_LOAD1(val.lo, AS) \ + VEC_LOAD1(val.hi, AS) +#define VEC_LOAD3(val, AS) \ + VEC_LOAD1(val.s0, AS) \ + VEC_LOAD1(val.s1, AS) \ + VEC_LOAD1(val.s2, AS) +#define VEC_LOAD4(val, AS) \ + VEC_LOAD2(val.lo, AS) \ + VEC_LOAD2(val.hi, AS) +#define VEC_LOAD8(val, AS) \ + VEC_LOAD4(val.lo, AS) \ + VEC_LOAD4(val.hi, AS) +#define VEC_LOAD16(val, AS) \ + VEC_LOAD8(val.lo, AS) \ + VEC_LOAD8(val.hi, AS) + +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \ + offset *= VEC_SIZE; \ + TYPE __tmp; \ + VEC_LOAD##VEC_SIZE(__tmp, AS) \ + return __tmp; \ + } + +#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) + +#define __CLC_BODY "vload_half.inc" +#include <clc/math/gentype.inc> +#undef __CLC_BODY +#undef FUNC +#undef __FUNC +#undef VEC_LOAD16 +#undef VEC_LOAD8 +#undef VEC_LOAD4 +#undef VEC_LOAD3 +#undef VEC_LOAD2 +#undef VEC_LOAD1 +#undef VLOAD_TYPES +#undef VLOAD_ADDR_SPACES +#undef VLOAD_VECTORIZE diff --git a/libclc/generic/lib/shared/vload_half.inc b/libclc/generic/lib/shared/vload_half.inc new file mode 100644 index 00000000000..00dae8ad337 --- /dev/null +++ b/libclc/generic/lib/shared/vload_half.inc @@ -0,0 +1,13 @@ +#if __CLC_FPSIZE == 32 +#ifdef __CLC_VECSIZE + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant); +#else + FUNC(, 1, __CLC_GENTYPE, __private); + FUNC(, 1, __CLC_GENTYPE, __local); + FUNC(, 1, __CLC_GENTYPE, __global); + FUNC(, 1, __CLC_GENTYPE, __constant); +#endif +#endif |

