summaryrefslogtreecommitdiffstats
path: root/libclc/generic/lib
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2018-05-17 22:55:30 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2018-05-17 22:55:30 +0000
commit70a270da5f4a27b670cba1cb8148fcce5c094f4f (patch)
treeba5043901fab059f1a5ccfb2578156c62ec4c027 /libclc/generic/lib
parentc762666e8747841ab7dd785415e1354a1682e954 (diff)
downloadbcm5719-llvm-70a270da5f4a27b670cba1cb8148fcce5c094f4f.tar.gz
bcm5719-llvm-70a270da5f4a27b670cba1cb8148fcce5c094f4f.zip
Add initial support for half precision builtins
v2: fix fmax implementation use consistent checks for __CLC_FP_SIZE add missing TODOs fix whitespace in definitions.h v3: undef ZERO in modf.inc Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> reviewer: Jeroen Ketema <j.ketema@xs4all.nl> Reviewed-by: Aaron Watry <awatry@gmail.com> Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 332677
Diffstat (limited to 'libclc/generic/lib')
-rw-r--r--libclc/generic/lib/geometric/dot.cl22
-rw-r--r--libclc/generic/lib/geometric/length.cl42
-rw-r--r--libclc/generic/lib/math/acos.inc13
-rw-r--r--libclc/generic/lib/math/asin.inc14
-rw-r--r--libclc/generic/lib/math/clc_nextafter.cl7
-rw-r--r--libclc/generic/lib/math/clc_sqrt_impl.inc13
-rw-r--r--libclc/generic/lib/math/clc_sw_binary.inc3
-rw-r--r--libclc/generic/lib/math/clc_sw_unary.inc3
-rw-r--r--libclc/generic/lib/math/copysign.cl15
-rw-r--r--libclc/generic/lib/math/fmax.cl16
-rw-r--r--libclc/generic/lib/math/fmax.inc10
-rw-r--r--libclc/generic/lib/math/fmin.cl15
-rw-r--r--libclc/generic/lib/math/fmin.inc10
-rw-r--r--libclc/generic/lib/math/fract.inc14
-rw-r--r--libclc/generic/lib/math/ldexp.cl6
-rw-r--r--libclc/generic/lib/math/ldexp.inc5
-rw-r--r--libclc/generic/lib/math/lgamma_r.inc4
-rw-r--r--libclc/generic/lib/math/modf.inc12
-rw-r--r--libclc/generic/lib/math/nan.inc9
-rw-r--r--libclc/generic/lib/math/pown.inc3
-rw-r--r--libclc/generic/lib/math/remquo.inc3
-rw-r--r--libclc/generic/lib/math/rootn.inc3
-rw-r--r--libclc/generic/lib/math/sincos.inc3
-rw-r--r--libclc/generic/lib/math/sqrt.cl8
-rw-r--r--libclc/generic/lib/relational/isequal.cl16
-rw-r--r--libclc/generic/lib/relational/isfinite.cl13
-rw-r--r--libclc/generic/lib/relational/isgreater.cl15
-rw-r--r--libclc/generic/lib/relational/isgreaterequal.cl14
-rw-r--r--libclc/generic/lib/relational/isinf.cl12
-rw-r--r--libclc/generic/lib/relational/isless.cl14
-rw-r--r--libclc/generic/lib/relational/islessequal.cl14
-rw-r--r--libclc/generic/lib/relational/islessgreater.cl14
-rw-r--r--libclc/generic/lib/relational/isnan.cl14
-rw-r--r--libclc/generic/lib/relational/isnormal.cl13
-rw-r--r--libclc/generic/lib/relational/isnotequal.cl10
-rw-r--r--libclc/generic/lib/relational/isordered.cl10
-rw-r--r--libclc/generic/lib/relational/isunordered.cl14
-rw-r--r--libclc/generic/lib/relational/signbit.cl14
-rw-r--r--libclc/generic/lib/shared/vstore_half.inc4
39 files changed, 420 insertions, 24 deletions
diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl
index 0d6fe6c9a4e..e58bc26f433 100644
--- a/libclc/generic/lib/geometric/dot.cl
+++ b/libclc/generic/lib/geometric/dot.cl
@@ -37,3 +37,25 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
}
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) {
+ return p0*p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
+ return p0.x*p1.x + p0.y*p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
+
+#endif
diff --git a/libclc/generic/lib/geometric/length.cl b/libclc/generic/lib/geometric/length.cl
index e7f31b43ae1..170ec584fcf 100644
--- a/libclc/generic/lib/geometric/length.cl
+++ b/libclc/generic/lib/geometric/length.cl
@@ -79,9 +79,47 @@ _CLC_OVERLOAD _CLC_DEF double length(double3 p) {
V_DLENGTH(p);
}
-_CLC_OVERLOAD _CLC_DEF double
-length(double4 p) {
+_CLC_OVERLOAD _CLC_DEF double length(double4 p) {
V_DLENGTH(p);
}
#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half length(half p){
+ return fabs(p);
+}
+
+// Only available in CLC1.2
+#ifndef HALF_MIN
+#define HALF_MIN 0x1.0p-14h
+#endif
+
+#define V_HLENGTH(p) \
+ half l2 = dot(p, p); \
+ \
+ if (l2 < HALF_MIN) { \
+ p *= 0x1.0p+12h; \
+ return sqrt(dot(p, p)) * 0x1.0p-12h; \
+ } else if (l2 == INFINITY) { \
+ p *= 0x1.0p-7h; \
+ return sqrt(dot(p, p)) * 0x1.0p+7h; \
+ } \
+ \
+ return sqrt(l2);
+
+_CLC_OVERLOAD _CLC_DEF half length(half2 p) {
+ V_HLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF half length(half3 p) {
+ V_HLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF half length(half4 p) {
+ V_HLENGTH(p);
+}
+
+#endif
diff --git a/libclc/generic/lib/math/acos.inc b/libclc/generic/lib/math/acos.inc
index cac9499260f..f5586ea0939 100644
--- a/libclc/generic/lib/math/acos.inc
+++ b/libclc/generic/lib/math/acos.inc
@@ -11,10 +11,15 @@
* precision of #4 may be better.
*/
-#if __CLC_FPSIZE == 32
-#define __CLC_CONST(x) x ## f
-#else
+// TODO: Enable half precision when atan2 is implemented
+#if __CLC_FPSIZE > 16
+
+#if __CLC_FPSIZE == 64
#define __CLC_CONST(x) x
+#elif __CLC_FPSIZE == 32
+#define __CLC_CONST(x) x ## f
+#elif __CLC_FPSIZE == 16
+#define __CLC_CONST(x) x ## h
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
@@ -27,3 +32,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
}
#undef __CLC_CONST
+
+#endif
diff --git a/libclc/generic/lib/math/asin.inc b/libclc/generic/lib/math/asin.inc
index 4643cf816de..b08c7bd29ce 100644
--- a/libclc/generic/lib/math/asin.inc
+++ b/libclc/generic/lib/math/asin.inc
@@ -1,12 +1,18 @@
+// TODO: Enable half precision when atan2 is implemented
+#if __CLC_FPSIZE > 16
-#if __CLC_FPSIZE == 32
-#define __CLC_CONST(x) x ## f
-#else
+#if __CLC_FPSIZE == 64
#define __CLC_CONST(x) x
+#elif __CLC_FPSIZE == 32
+#define __CLC_CONST(x) x ## f
+#elif __CLC_FPSIZE == 16
+#define __CLC_CONST(x) x ## h
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) {
- return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) -(x*x) ));
+ return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) - (x*x) ));
}
#undef __CLC_CONST
+
+#endif
diff --git a/libclc/generic/lib/math/clc_nextafter.cl b/libclc/generic/lib/math/clc_nextafter.cl
index d5c0af040ad..d32ef7079e1 100644
--- a/libclc/generic/lib/math/clc_nextafter.cl
+++ b/libclc/generic/lib/math/clc_nextafter.cl
@@ -37,3 +37,10 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, flo
NEXTAFTER(double, ulong, long)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double)
#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+NEXTAFTER(half, ushort, short)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_nextafter, half, half)
+#endif
diff --git a/libclc/generic/lib/math/clc_sqrt_impl.inc b/libclc/generic/lib/math/clc_sqrt_impl.inc
index e97b5403db0..fe724e8c143 100644
--- a/libclc/generic/lib/math/clc_sqrt_impl.inc
+++ b/libclc/generic/lib/math/clc_sqrt_impl.inc
@@ -20,14 +20,15 @@
* THE SOFTWARE.
*/
-#if __CLC_FPSIZE == 32
-#define __CLC_NAN NAN
-#define ZERO 0.0f
-#elif __CLC_FPSIZE == 64
+#if __CLC_FPSIZE == 64
#define __CLC_NAN __builtin_nan("")
#define ZERO 0.0
-#else
-#error "Invalid value for __CLC_FPSIZE"
+#elif __CLC_FPSIZE == 32
+#define __CLC_NAN NAN
+#define ZERO 0.0f
+#elif __CLC_FPSIZE == 16
+#define __CLC_NAN (half)NAN
+#define ZERO 0.0h
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
diff --git a/libclc/generic/lib/math/clc_sw_binary.inc b/libclc/generic/lib/math/clc_sw_binary.inc
index 0fe15244904..7741475c237 100644
--- a/libclc/generic/lib/math/clc_sw_binary.inc
+++ b/libclc/generic/lib/math/clc_sw_binary.inc
@@ -2,8 +2,11 @@
#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
+// TODO: Enable half precision when the sw routine is implemented
+#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return __CLC_SW_FUNC(__CLC_FUNC)(x, y);
}
+#endif
#undef __CLC_SW_FUNC
diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc
index 0f8467c0a1c..cd148b07a02 100644
--- a/libclc/generic/lib/math/clc_sw_unary.inc
+++ b/libclc/generic/lib/math/clc_sw_unary.inc
@@ -2,8 +2,11 @@
#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
+// TODO: Enable half precision when the sw routine is implemented
+#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
return __CLC_SW_FUNC(__CLC_FUNC)(x);
}
+#endif
#undef __CLC_SW_FUNC
diff --git a/libclc/generic/lib/math/copysign.cl b/libclc/generic/lib/math/copysign.cl
index 4e0c51b0937..df65e9d7fec 100644
--- a/libclc/generic/lib/math/copysign.cl
+++ b/libclc/generic/lib/math/copysign.cl
@@ -10,3 +10,18 @@ _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
+{
+ ushort sign_x = as_ushort(x) & 0x8000u;
+ ushort unsigned_y = as_ushort(y) & 0x7ffffu;
+
+ return as_half((ushort)(sign_x | unsigned_y));
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
+
+#endif
diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl
index 239da3d3a61..5c269ceccdd 100644
--- a/libclc/generic/lib/math/fmax.cl
+++ b/libclc/generic/lib/math/fmax.cl
@@ -12,5 +12,21 @@ _CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
+{
+ if (isnan(x))
+ return y;
+ if (isnan(y))
+ return x;
+ return (x < y) ? y : x;
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
+
+#endif
+
#define __CLC_BODY <fmax.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/fmax.inc b/libclc/generic/lib/math/fmax.inc
index 8315c5fb6e0..a91ad6b7ba7 100644
--- a/libclc/generic/lib/math/fmax.inc
+++ b/libclc/generic/lib/math/fmax.inc
@@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) {
#endif // ifdef cl_khr_fp64
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, half y) {
+ return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#endif // ifdef cl_khr_fp16
+
#endif // !defined(__CLC_SCALAR)
diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl
index 28c7d0125a9..45c112d991f 100644
--- a/libclc/generic/lib/math/fmin.cl
+++ b/libclc/generic/lib/math/fmin.cl
@@ -11,6 +11,21 @@ _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
+{
+ if (isnan(x))
+ return y;
+ if (isnan(y))
+ return x;
+ return (y < x) ? y : x;
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
+
+#endif
#define __CLC_BODY <fmin.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/fmin.inc b/libclc/generic/lib/math/fmin.inc
index d4b5ac2d62a..98756785196 100644
--- a/libclc/generic/lib/math/fmin.inc
+++ b/libclc/generic/lib/math/fmin.inc
@@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) {
#endif // ifdef cl_khr_fp64
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, half y) {
+ return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#endif // ifdef cl_khr_fp16
+
#endif // !defined(__CLC_SCALAR)
diff --git a/libclc/generic/lib/math/fract.inc b/libclc/generic/lib/math/fract.inc
index 8d2a4d707a9..00d4674bfa2 100644
--- a/libclc/generic/lib/math/fract.inc
+++ b/libclc/generic/lib/math/fract.inc
@@ -20,16 +20,21 @@
* THE SOFTWARE.
*/
-#if __CLC_FPSIZE == 32
-#define MIN_CONSTANT 0x1.fffffep-1f
-#else
+#if __CLC_FPSIZE == 64
#define MIN_CONSTANT 0x1.fffffffffffffp-1
+#define ZERO 0.0
+#elif __CLC_FPSIZE == 32
+#define MIN_CONSTANT 0x1.fffffep-1f
+#define ZERO 0.0f
+#elif __CLC_FPSIZE == 16
+#define MIN_CONSTANT 0x1.ffcp-1h
+#define ZERO 0.0h
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) {
*iptr = floor(x);
__CLC_GENTYPE r = fmin(x - *iptr, MIN_CONSTANT);
- r = isinf(x) ? 0.0f : r;
+ r = isinf(x) ? ZERO : r;
r = isnan(x) ? x : r;
return r;
}
@@ -47,3 +52,4 @@ FRACT_DEF(local);
FRACT_DEF(global);
#undef MIN_CONSTANT
+#undef ZERO
diff --git a/libclc/generic/lib/math/ldexp.cl b/libclc/generic/lib/math/ldexp.cl
index 9be3127f46b..190a4d5f5fc 100644
--- a/libclc/generic/lib/math/ldexp.cl
+++ b/libclc/generic/lib/math/ldexp.cl
@@ -33,7 +33,13 @@ _CLC_DEFINE_BINARY_BUILTIN(float, ldexp, __clc_ldexp, float, int)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __clc_ldexp, double, int)
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_DEFINE_BINARY_BUILTIN(half, ldexp, __clc_ldexp, half, int)
#endif
// This defines all the ldexp(GENTYPE, int) variants
diff --git a/libclc/generic/lib/math/ldexp.inc b/libclc/generic/lib/math/ldexp.inc
index 6e28fbb94ca..d6144d7cb6d 100644
--- a/libclc/generic/lib/math/ldexp.inc
+++ b/libclc/generic/lib/math/ldexp.inc
@@ -20,6 +20,9 @@
* THE SOFTWARE.
*/
+// TODO: Enable half precision when ldexp is implemented.
+#if __CLC_FPSIZE > 16
+
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) {
@@ -27,3 +30,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) {
}
#endif
+
+#endif
diff --git a/libclc/generic/lib/math/lgamma_r.inc b/libclc/generic/lib/math/lgamma_r.inc
index 316d4fa1539..0e19ba8fb2c 100644
--- a/libclc/generic/lib/math/lgamma_r.inc
+++ b/libclc/generic/lib/math/lgamma_r.inc
@@ -21,10 +21,12 @@
* THE SOFTWARE.
*/
-
+// TODO: Enable half precision when the base version is implemented.
+#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
__CLC_INTN private_iptr;
__CLC_GENTYPE ret = lgamma_r(x, &private_iptr);
*iptr = private_iptr;
return ret;
}
+#endif
diff --git a/libclc/generic/lib/math/modf.inc b/libclc/generic/lib/math/modf.inc
index 1486b765bba..1ffc6d9e851 100644
--- a/libclc/generic/lib/math/modf.inc
+++ b/libclc/generic/lib/math/modf.inc
@@ -20,9 +20,17 @@
* THE SOFTWARE.
*/
+#if __CLC_FPSIZE == 64
+#define ZERO 0.0
+#elif __CLC_FPSIZE == 32
+#define ZERO 0.0f
+#elif __CLC_FPSIZE == 16
+#define ZERO 0.0h
+#endif
+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) {
*iptr = trunc(x);
- return copysign(isinf(x) ? 0.0f : x - *iptr, x);
+ return copysign(isinf(x) ? ZERO : x - *iptr, x);
}
#define MODF_DEF(addrspace) \
@@ -35,3 +43,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr)
MODF_DEF(local);
MODF_DEF(global);
+
+#undef ZERO
diff --git a/libclc/generic/lib/math/nan.inc b/libclc/generic/lib/math/nan.inc
index 6e4afc888d5..f6508c3ab52 100644
--- a/libclc/generic/lib/math/nan.inc
+++ b/libclc/generic/lib/math/nan.inc
@@ -7,11 +7,18 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ulong, __CLC_VECSIZE) cod
{
return __CLC_AS_GENTYPE(code | 0x7ff0000000000000ul);
}
-#else
+#elif __CLC_FPSIZE == 32
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(uint, __CLC_VECSIZE) code)
{
return __CLC_AS_GENTYPE(code | 0x7fc00000);
}
+#elif __CLC_FPSIZE == 16
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ushort, __CLC_VECSIZE) code)
+{
+ const ushort mask = 0x7e00;
+ const __CLC_XCONCAT(ushort, __CLC_VECSIZE) res = code | mask;
+ return __CLC_AS_GENTYPE(res);
+}
#endif
diff --git a/libclc/generic/lib/math/pown.inc b/libclc/generic/lib/math/pown.inc
index 84729d90a79..2add2c7459d 100644
--- a/libclc/generic/lib/math/pown.inc
+++ b/libclc/generic/lib/math/pown.inc
@@ -1,3 +1,6 @@
+// TODO: Enable half precision when the sw routine is implemented
+#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) {
return __clc_pown(x, y);
}
+#endif
diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc
index c1de78a5e7f..c33b5ddab31 100644
--- a/libclc/generic/lib/math/remquo.inc
+++ b/libclc/generic/lib/math/remquo.inc
@@ -1,6 +1,9 @@
+// TODO: Enable half precision when the sw routine is implemented
+#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) {
__CLC_INTN local_q;
__CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
*q = local_q;
return ret;
}
+#endif
diff --git a/libclc/generic/lib/math/rootn.inc b/libclc/generic/lib/math/rootn.inc
index 3f5b00c082c..f788649685a 100644
--- a/libclc/generic/lib/math/rootn.inc
+++ b/libclc/generic/lib/math/rootn.inc
@@ -1,3 +1,6 @@
+// TODO: Enable half precision when the sw routine is implemented
+#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) {
return __clc_rootn(x, y);
}
+#endif
diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc
index e97f0f9641c..2318ffb73f5 100644
--- a/libclc/generic/lib/math/sincos.inc
+++ b/libclc/generic/lib/math/sincos.inc
@@ -1,3 +1,5 @@
+// TODO: Enable half precision when sin/cos is implemented
+#if __CLC_FPSIZE > 16
#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
*cosval = cos(x); \
@@ -9,3 +11,4 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
#undef __CLC_DECLARE_SINCOS
+#endif
diff --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl
index 300e2741ee0..b05d6bc0a06 100644
--- a/libclc/generic/lib/math/sqrt.cl
+++ b/libclc/generic/lib/math/sqrt.cl
@@ -33,3 +33,11 @@ _CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
_CLC_DEFINE_UNARY_BUILTIN(double, sqrt, __clc_sqrt, double)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/isequal.cl b/libclc/generic/lib/relational/isequal.cl
index 9d79ba6b3db..3f14f945bfd 100644
--- a/libclc/generic/lib/relational/isequal.cl
+++ b/libclc/generic/lib/relational/isequal.cl
@@ -26,5 +26,19 @@ _CLC_DEFINE_ISEQUAL(long8, isequal, double8, double8)
_CLC_DEFINE_ISEQUAL(long16, isequal, double16, double16)
#endif
+#ifdef cl_khr_fp16
-#undef _CLC_DEFINE_ISEQUAL \ No newline at end of file
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isequal(half) returns an int, but the vector versions
+// return short.
+_CLC_DEFINE_ISEQUAL(int, isequal, half, half)
+_CLC_DEFINE_ISEQUAL(short2, isequal, half2, half2)
+_CLC_DEFINE_ISEQUAL(short3, isequal, half3, half3)
+_CLC_DEFINE_ISEQUAL(short4, isequal, half4, half4)
+_CLC_DEFINE_ISEQUAL(short8, isequal, half8, half8)
+_CLC_DEFINE_ISEQUAL(short16, isequal, half16, half16)
+
+#endif
+
+#undef _CLC_DEFINE_ISEQUAL
diff --git a/libclc/generic/lib/relational/isfinite.cl b/libclc/generic/lib/relational/isfinite.cl
index d0658c01eac..15b92fa4129 100644
--- a/libclc/generic/lib/relational/isfinite.cl
+++ b/libclc/generic/lib/relational/isfinite.cl
@@ -16,3 +16,16 @@ _CLC_DEF _CLC_OVERLOAD int isfinite(double x) {
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isfinite(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isfinite(half x) {
+ return __builtin_isfinite(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isfinite, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/isgreater.cl b/libclc/generic/lib/relational/isgreater.cl
index 79456e56d51..167d6f21355 100644
--- a/libclc/generic/lib/relational/isgreater.cl
+++ b/libclc/generic/lib/relational/isgreater.cl
@@ -20,3 +20,18 @@ _CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isgreater(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isgreater(half x, half y){
+ return __builtin_isgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreater, half, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/isgreaterequal.cl b/libclc/generic/lib/relational/isgreaterequal.cl
index 2d5ebe5770c..128a1d0de4e 100644
--- a/libclc/generic/lib/relational/isgreaterequal.cl
+++ b/libclc/generic/lib/relational/isgreaterequal.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isgreaterequal(double x, double y){
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreaterequal, double, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isgreaterequal(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isgreaterequal(half x, half y){
+ return __builtin_isgreaterequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreaterequal, half, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/isinf.cl b/libclc/generic/lib/relational/isinf.cl
index 1452d919cb8..96aae4aa700 100644
--- a/libclc/generic/lib/relational/isinf.cl
+++ b/libclc/generic/lib/relational/isinf.cl
@@ -14,5 +14,17 @@ _CLC_DEF _CLC_OVERLOAD int isinf(double x) {
}
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double)
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isinf(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isinf(half x) {
+ return __builtin_isinf(x);
+}
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isinf, half)
#endif
diff --git a/libclc/generic/lib/relational/isless.cl b/libclc/generic/lib/relational/isless.cl
index 56a3e1329b4..1dbf7676060 100644
--- a/libclc/generic/lib/relational/isless.cl
+++ b/libclc/generic/lib/relational/isless.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isless(double x, double y){
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isless(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isless(half x, half y){
+ return __builtin_isless(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isless, half, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/islessequal.cl b/libclc/generic/lib/relational/islessequal.cl
index 259c307da45..db64bea5dcd 100644
--- a/libclc/generic/lib/relational/islessequal.cl
+++ b/libclc/generic/lib/relational/islessequal.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of islessequal(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int islessequal(half x, half y){
+ return __builtin_islessequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessequal, half, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/islessgreater.cl b/libclc/generic/lib/relational/islessgreater.cl
index fc029f35b73..9e9b11ec9b6 100644
--- a/libclc/generic/lib/relational/islessgreater.cl
+++ b/libclc/generic/lib/relational/islessgreater.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of islessgreater(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int islessgreater(half x, half y){
+ return __builtin_islessgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessgreater, half, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/isnan.cl b/libclc/generic/lib/relational/isnan.cl
index f82dc5d59da..3d3104783b7 100644
--- a/libclc/generic/lib/relational/isnan.cl
+++ b/libclc/generic/lib/relational/isnan.cl
@@ -16,3 +16,17 @@ _CLC_DEF _CLC_OVERLOAD int isnan(double x) {
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isnan(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isnan(half x) {
+ return __builtin_isnan(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnan, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/isnormal.cl b/libclc/generic/lib/relational/isnormal.cl
index 2e6b42d0017..a3dbf661c61 100644
--- a/libclc/generic/lib/relational/isnormal.cl
+++ b/libclc/generic/lib/relational/isnormal.cl
@@ -16,3 +16,16 @@ _CLC_DEF _CLC_OVERLOAD int isnormal(double x) {
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isnormal(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isnormal(half x) {
+ return __builtin_isnormal(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnormal, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/isnotequal.cl b/libclc/generic/lib/relational/isnotequal.cl
index 787fd8d53c2..afd293dda01 100644
--- a/libclc/generic/lib/relational/isnotequal.cl
+++ b/libclc/generic/lib/relational/isnotequal.cl
@@ -19,5 +19,15 @@ _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double)
#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isnotequal(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, half, half)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isnotequal, half, half)
+
+#endif
#undef _CLC_DEFINE_ISNOTEQUAL
diff --git a/libclc/generic/lib/relational/isordered.cl b/libclc/generic/lib/relational/isordered.cl
index ebda2eb72ba..cedd05f6e04 100644
--- a/libclc/generic/lib/relational/isordered.cl
+++ b/libclc/generic/lib/relational/isordered.cl
@@ -19,5 +19,15 @@ _CLC_DEFINE_ISORDERED(int, isordered, double, double)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double)
#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isordered(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEFINE_ISORDERED(int, isordered, half, half)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isordered, half, half)
+
+#endif
#undef _CLC_DEFINE_ISORDERED
diff --git a/libclc/generic/lib/relational/isunordered.cl b/libclc/generic/lib/relational/isunordered.cl
index 8bc5e3fa7f6..90939807ffd 100644
--- a/libclc/generic/lib/relational/isunordered.cl
+++ b/libclc/generic/lib/relational/isunordered.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isunordered(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isunordered(half x, half y){
+ return __builtin_isunordered(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isunordered, half, half)
+
+#endif
diff --git a/libclc/generic/lib/relational/signbit.cl b/libclc/generic/lib/relational/signbit.cl
index ab37d2f1288..a7378d7d605 100644
--- a/libclc/generic/lib/relational/signbit.cl
+++ b/libclc/generic/lib/relational/signbit.cl
@@ -17,3 +17,17 @@ _CLC_DEF _CLC_OVERLOAD int signbit(double x){
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of signbit(half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int signbit(half x){
+ return __builtin_signbit(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, signbit, half)
+
+#endif
diff --git a/libclc/generic/lib/shared/vstore_half.inc b/libclc/generic/lib/shared/vstore_half.inc
index ee4e38b4c6c..2d833785623 100644
--- a/libclc/generic/lib/shared/vstore_half.inc
+++ b/libclc/generic/lib/shared/vstore_half.inc
@@ -1,4 +1,5 @@
-
+// This does not exist for fp16
+#if __CLC_FPSIZE > 16
#ifdef __CLC_VECSIZE
#if __CLC_VECSIZE == 3
@@ -17,3 +18,4 @@
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
#endif
+#endif
OpenPOWER on IntegriCloud