summaryrefslogtreecommitdiffstats
path: root/libclc/generic/lib
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2017-09-08 23:59:00 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2017-09-08 23:59:00 +0000
commit285d2fb85c89a42dca49ea66af425f4deba352c2 (patch)
tree0f805fbba8e62c67a5cbdf130f1361271b6f3c41 /libclc/generic/lib
parent661ac03a1bef271698a92fb39eff369063bfcca0 (diff)
downloadbcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.tar.gz
bcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.zip
Implement vload_half{,n} and vload(half)
v2: add vload(half) as well make helpers amdgpu specific (NVPTX uses different private AS numbering) use clang builtin on clang >= 6 Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewed-by: Tom Stellard <tstellar@redhat.com> llvm-svn: 312839
Diffstat (limited to 'libclc/generic/lib')
-rw-r--r--libclc/generic/lib/shared/vload.cl59
-rw-r--r--libclc/generic/lib/shared/vload_half.inc13
2 files changed, 72 insertions, 0 deletions
diff --git a/libclc/generic/lib/shared/vload.cl b/libclc/generic/lib/shared/vload.cl
index 88972005cfa..08922708fbe 100644
--- a/libclc/generic/lib/shared/vload.cl
+++ b/libclc/generic/lib/shared/vload.cl
@@ -50,3 +50,62 @@ VLOAD_TYPES()
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VLOAD_ADDR_SPACES(double)
#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+ VLOAD_ADDR_SPACES(half)
+#endif
+
+/* vload_half are legal even without cl_khr_fp16 */
+/* no vload_half for double */
+#if __clang_major__ < 6
+float __clc_vload_half_float_helper__constant(const __constant half *);
+float __clc_vload_half_float_helper__global(const __global half *);
+float __clc_vload_half_float_helper__local(const __local half *);
+float __clc_vload_half_float_helper__private(const __private half *);
+
+#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
+#else
+#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
+#endif
+
+#define VEC_LOAD2(val, AS) \
+ VEC_LOAD1(val.lo, AS) \
+ VEC_LOAD1(val.hi, AS)
+#define VEC_LOAD3(val, AS) \
+ VEC_LOAD1(val.s0, AS) \
+ VEC_LOAD1(val.s1, AS) \
+ VEC_LOAD1(val.s2, AS)
+#define VEC_LOAD4(val, AS) \
+ VEC_LOAD2(val.lo, AS) \
+ VEC_LOAD2(val.hi, AS)
+#define VEC_LOAD8(val, AS) \
+ VEC_LOAD4(val.lo, AS) \
+ VEC_LOAD4(val.hi, AS)
+#define VEC_LOAD16(val, AS) \
+ VEC_LOAD8(val.lo, AS) \
+ VEC_LOAD8(val.hi, AS)
+
+#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
+ _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
+ offset *= VEC_SIZE; \
+ TYPE __tmp; \
+ VEC_LOAD##VEC_SIZE(__tmp, AS) \
+ return __tmp; \
+ }
+
+#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
+
+#define __CLC_BODY "vload_half.inc"
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
+#undef FUNC
+#undef __FUNC
+#undef VEC_LOAD16
+#undef VEC_LOAD8
+#undef VEC_LOAD4
+#undef VEC_LOAD3
+#undef VEC_LOAD2
+#undef VEC_LOAD1
+#undef VLOAD_TYPES
+#undef VLOAD_ADDR_SPACES
+#undef VLOAD_VECTORIZE
diff --git a/libclc/generic/lib/shared/vload_half.inc b/libclc/generic/lib/shared/vload_half.inc
new file mode 100644
index 00000000000..00dae8ad337
--- /dev/null
+++ b/libclc/generic/lib/shared/vload_half.inc
@@ -0,0 +1,13 @@
+#if __CLC_FPSIZE == 32
+#ifdef __CLC_VECSIZE
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
+#else
+ FUNC(, 1, __CLC_GENTYPE, __private);
+ FUNC(, 1, __CLC_GENTYPE, __local);
+ FUNC(, 1, __CLC_GENTYPE, __global);
+ FUNC(, 1, __CLC_GENTYPE, __constant);
+#endif
+#endif
OpenPOWER on IntegriCloud