summaryrefslogtreecommitdiffstats
path: root/libclc/generic/include/clc/shared
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2017-09-08 23:59:00 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2017-09-08 23:59:00 +0000
commit285d2fb85c89a42dca49ea66af425f4deba352c2 (patch)
tree0f805fbba8e62c67a5cbdf130f1361271b6f3c41 /libclc/generic/include/clc/shared
parent661ac03a1bef271698a92fb39eff369063bfcca0 (diff)
downloadbcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.tar.gz
bcm5719-llvm-285d2fb85c89a42dca49ea66af425f4deba352c2.zip
Implement vload_half{,n} and vload(half)
v2: add vload(half) as well make helpers amdgpu specific (NVPTX uses different private AS numbering) use clang builtin on clang >= 6 Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewed-by: Tom Stellard <tstellar@redhat.com> llvm-svn: 312839
Diffstat (limited to 'libclc/generic/include/clc/shared')
-rw-r--r--libclc/generic/include/clc/shared/vload.h55
1 files changed, 35 insertions, 20 deletions
diff --git a/libclc/generic/include/clc/shared/vload.h b/libclc/generic/include/clc/shared/vload.h
index 93d07501d4a..8c262ddbffc 100644
--- a/libclc/generic/include/clc/shared/vload.h
+++ b/libclc/generic/include/clc/shared/vload.h
@@ -1,18 +1,21 @@
-#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
- _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x);
+#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
+ _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);
-#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \
- _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
- _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
- _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
- _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
- _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
+#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
+
+#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
+ _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
+ _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
+ _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
+ _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
- _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \
- _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \
- _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \
- _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \
+ _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
#define _CLC_VECTOR_VLOAD_PRIM() \
_CLC_VECTOR_VLOAD_PRIM1(char) \
@@ -24,14 +27,26 @@
_CLC_VECTOR_VLOAD_PRIM1(long) \
_CLC_VECTOR_VLOAD_PRIM1(ulong) \
_CLC_VECTOR_VLOAD_PRIM1(float) \
-
+ _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
+
#ifdef cl_khr_fp64
-#define _CLC_VECTOR_VLOAD() \
- _CLC_VECTOR_VLOAD_PRIM1(double) \
- _CLC_VECTOR_VLOAD_PRIM()
-#else
-#define _CLC_VECTOR_VLOAD() \
- _CLC_VECTOR_VLOAD_PRIM()
+#pragma OPENCL EXTENSION cl_khr_fp64: enable
+ _CLC_VECTOR_VLOAD_PRIM1(double)
#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16: enable
+ _CLC_VECTOR_VLOAD_PRIM1(half)
+#endif
+
+_CLC_VECTOR_VLOAD_PRIM()
+// Plain vload_half also needs to be declared
+_CLC_VLOAD_DECL(_half, half, float, , __constant)
+_CLC_VLOAD_DECL(_half, half, float, , __global)
+_CLC_VLOAD_DECL(_half, half, float, , __local)
+_CLC_VLOAD_DECL(_half, half, float, , __private)
-_CLC_VECTOR_VLOAD()
+#undef _CLC_VLOAD_DECL
+#undef _CLC_VECTOR_VLOAD_DECL
+#undef _CLC_VECTOR_VLOAD_PRIM3
+#undef _CLC_VECTOR_VLOAD_PRIM1
+#undef _CLC_VECTOR_VLOAD_PRIM
OpenPOWER on IntegriCloud