diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2018-05-17 22:55:30 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2018-05-17 22:55:30 +0000 |
| commit | 70a270da5f4a27b670cba1cb8148fcce5c094f4f (patch) | |
| tree | ba5043901fab059f1a5ccfb2578156c62ec4c027 /libclc/generic/lib/math | |
| parent | c762666e8747841ab7dd785415e1354a1682e954 (diff) | |
| download | bcm5719-llvm-70a270da5f4a27b670cba1cb8148fcce5c094f4f.tar.gz bcm5719-llvm-70a270da5f4a27b670cba1cb8148fcce5c094f4f.zip | |
Add initial support for half precision builtins
v2: fix fmax implementation
use consistent checks for __CLC_FP_SIZE
add missing TODOs
fix whitespace in definitions.h
v3: undef ZERO in modf.inc
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
reviewer: Jeroen Ketema <j.ketema@xs4all.nl>
Reviewed-by: Aaron Watry <awatry@gmail.com>
Tested-by: Aaron Watry <awatry@gmail.com>
llvm-svn: 332677
Diffstat (limited to 'libclc/generic/lib/math')
22 files changed, 169 insertions, 20 deletions
diff --git a/libclc/generic/lib/math/acos.inc b/libclc/generic/lib/math/acos.inc index cac9499260f..f5586ea0939 100644 --- a/libclc/generic/lib/math/acos.inc +++ b/libclc/generic/lib/math/acos.inc @@ -11,10 +11,15 @@ * precision of #4 may be better. */ -#if __CLC_FPSIZE == 32 -#define __CLC_CONST(x) x ## f -#else +// TODO: Enable half precision when atan2 is implemented +#if __CLC_FPSIZE > 16 + +#if __CLC_FPSIZE == 64 #define __CLC_CONST(x) x +#elif __CLC_FPSIZE == 32 +#define __CLC_CONST(x) x ## f +#elif __CLC_FPSIZE == 16 +#define __CLC_CONST(x) x ## h #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) { @@ -27,3 +32,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) { } #undef __CLC_CONST + +#endif diff --git a/libclc/generic/lib/math/asin.inc b/libclc/generic/lib/math/asin.inc index 4643cf816de..b08c7bd29ce 100644 --- a/libclc/generic/lib/math/asin.inc +++ b/libclc/generic/lib/math/asin.inc @@ -1,12 +1,18 @@ +// TODO: Enable half precision when atan2 is implemented +#if __CLC_FPSIZE > 16 -#if __CLC_FPSIZE == 32 -#define __CLC_CONST(x) x ## f -#else +#if __CLC_FPSIZE == 64 #define __CLC_CONST(x) x +#elif __CLC_FPSIZE == 32 +#define __CLC_CONST(x) x ## f +#elif __CLC_FPSIZE == 16 +#define __CLC_CONST(x) x ## h #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) { - return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) -(x*x) )); + return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) - (x*x) )); } #undef __CLC_CONST + +#endif diff --git a/libclc/generic/lib/math/clc_nextafter.cl b/libclc/generic/lib/math/clc_nextafter.cl index d5c0af040ad..d32ef7079e1 100644 --- a/libclc/generic/lib/math/clc_nextafter.cl +++ b/libclc/generic/lib/math/clc_nextafter.cl @@ -37,3 +37,10 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, flo NEXTAFTER(double, ulong, long) _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double) #endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +NEXTAFTER(half, ushort, short) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_nextafter, half, half) +#endif diff --git a/libclc/generic/lib/math/clc_sqrt_impl.inc b/libclc/generic/lib/math/clc_sqrt_impl.inc index e97b5403db0..fe724e8c143 100644 --- a/libclc/generic/lib/math/clc_sqrt_impl.inc +++ b/libclc/generic/lib/math/clc_sqrt_impl.inc @@ -20,14 +20,15 @@ * THE SOFTWARE. */ -#if __CLC_FPSIZE == 32 -#define __CLC_NAN NAN -#define ZERO 0.0f -#elif __CLC_FPSIZE == 64 +#if __CLC_FPSIZE == 64 #define __CLC_NAN __builtin_nan("") #define ZERO 0.0 -#else -#error "Invalid value for __CLC_FPSIZE" +#elif __CLC_FPSIZE == 32 +#define __CLC_NAN NAN +#define ZERO 0.0f +#elif __CLC_FPSIZE == 16 +#define __CLC_NAN (half)NAN +#define ZERO 0.0h #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) { diff --git a/libclc/generic/lib/math/clc_sw_binary.inc b/libclc/generic/lib/math/clc_sw_binary.inc index 0fe15244904..7741475c237 100644 --- a/libclc/generic/lib/math/clc_sw_binary.inc +++ b/libclc/generic/lib/math/clc_sw_binary.inc @@ -2,8 +2,11 @@ #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) +// TODO: Enable half precision when the sw routine is implemented +#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) { return __CLC_SW_FUNC(__CLC_FUNC)(x, y); } +#endif #undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc index 0f8467c0a1c..cd148b07a02 100644 --- a/libclc/generic/lib/math/clc_sw_unary.inc +++ b/libclc/generic/lib/math/clc_sw_unary.inc @@ -2,8 +2,11 @@ #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) +// TODO: Enable half precision when the sw routine is implemented +#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { return __CLC_SW_FUNC(__CLC_FUNC)(x); } +#endif #undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/copysign.cl b/libclc/generic/lib/math/copysign.cl index 4e0c51b0937..df65e9d7fec 100644 --- a/libclc/generic/lib/math/copysign.cl +++ b/libclc/generic/lib/math/copysign.cl @@ -10,3 +10,18 @@ _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float) _CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y) +{ + ushort sign_x = as_ushort(x) & 0x8000u; + ushort unsigned_y = as_ushort(y) & 0x7ffffu; + + return as_half((ushort)(sign_x | unsigned_y)); +} +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half) + +#endif diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl index 239da3d3a61..5c269ceccdd 100644 --- a/libclc/generic/lib/math/fmax.cl +++ b/libclc/generic/lib/math/fmax.cl @@ -12,5 +12,21 @@ _CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double); #endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y) +{ + if (isnan(x)) + return y; + if (isnan(y)) + return x; + return (x < y) ? y : x; +} +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half) + +#endif + #define __CLC_BODY <fmax.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/fmax.inc b/libclc/generic/lib/math/fmax.inc index 8315c5fb6e0..a91ad6b7ba7 100644 --- a/libclc/generic/lib/math/fmax.inc +++ b/libclc/generic/lib/math/fmax.inc @@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) { #endif // ifdef cl_khr_fp64 +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, half y) { + return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); +} + +#endif // ifdef cl_khr_fp16 + #endif // !defined(__CLC_SCALAR) diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl index 28c7d0125a9..45c112d991f 100644 --- a/libclc/generic/lib/math/fmin.cl +++ b/libclc/generic/lib/math/fmin.cl @@ -11,6 +11,21 @@ _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float); _CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double); #endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y) +{ + if (isnan(x)) + return y; + if (isnan(y)) + return x; + return (y < x) ? y : x; +} +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half) + +#endif #define __CLC_BODY <fmin.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/fmin.inc b/libclc/generic/lib/math/fmin.inc index d4b5ac2d62a..98756785196 100644 --- a/libclc/generic/lib/math/fmin.inc +++ b/libclc/generic/lib/math/fmin.inc @@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) { #endif // ifdef cl_khr_fp64 +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, half y) { + return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); +} + +#endif // ifdef cl_khr_fp16 + #endif // !defined(__CLC_SCALAR) diff --git a/libclc/generic/lib/math/fract.inc b/libclc/generic/lib/math/fract.inc index 8d2a4d707a9..00d4674bfa2 100644 --- a/libclc/generic/lib/math/fract.inc +++ b/libclc/generic/lib/math/fract.inc @@ -20,16 +20,21 @@ * THE SOFTWARE. */ -#if __CLC_FPSIZE == 32 -#define MIN_CONSTANT 0x1.fffffep-1f -#else +#if __CLC_FPSIZE == 64 #define MIN_CONSTANT 0x1.fffffffffffffp-1 +#define ZERO 0.0 +#elif __CLC_FPSIZE == 32 +#define MIN_CONSTANT 0x1.fffffep-1f +#define ZERO 0.0f +#elif __CLC_FPSIZE == 16 +#define MIN_CONSTANT 0x1.ffcp-1h +#define ZERO 0.0h #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { *iptr = floor(x); __CLC_GENTYPE r = fmin(x - *iptr, MIN_CONSTANT); - r = isinf(x) ? 0.0f : r; + r = isinf(x) ? ZERO : r; r = isnan(x) ? x : r; return r; } @@ -47,3 +52,4 @@ FRACT_DEF(local); FRACT_DEF(global); #undef MIN_CONSTANT +#undef ZERO diff --git a/libclc/generic/lib/math/ldexp.cl b/libclc/generic/lib/math/ldexp.cl index 9be3127f46b..190a4d5f5fc 100644 --- a/libclc/generic/lib/math/ldexp.cl +++ b/libclc/generic/lib/math/ldexp.cl @@ -33,7 +33,13 @@ _CLC_DEFINE_BINARY_BUILTIN(float, ldexp, __clc_ldexp, float, int) #pragma OPENCL EXTENSION cl_khr_fp64 : enable _CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __clc_ldexp, double, int) +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_DEFINE_BINARY_BUILTIN(half, ldexp, __clc_ldexp, half, int) #endif // This defines all the ldexp(GENTYPE, int) variants diff --git a/libclc/generic/lib/math/ldexp.inc b/libclc/generic/lib/math/ldexp.inc index 6e28fbb94ca..d6144d7cb6d 100644 --- a/libclc/generic/lib/math/ldexp.inc +++ b/libclc/generic/lib/math/ldexp.inc @@ -20,6 +20,9 @@ * THE SOFTWARE. */ +// TODO: Enable half precision when ldexp is implemented. +#if __CLC_FPSIZE > 16 + #ifndef __CLC_SCALAR _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) { @@ -27,3 +30,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) { } #endif + +#endif diff --git a/libclc/generic/lib/math/lgamma_r.inc b/libclc/generic/lib/math/lgamma_r.inc index 316d4fa1539..0e19ba8fb2c 100644 --- a/libclc/generic/lib/math/lgamma_r.inc +++ b/libclc/generic/lib/math/lgamma_r.inc @@ -21,10 +21,12 @@ * THE SOFTWARE. */ - +// TODO: Enable half precision when the base version is implemented. +#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { __CLC_INTN private_iptr; __CLC_GENTYPE ret = lgamma_r(x, &private_iptr); *iptr = private_iptr; return ret; } +#endif diff --git a/libclc/generic/lib/math/modf.inc b/libclc/generic/lib/math/modf.inc index 1486b765bba..1ffc6d9e851 100644 --- a/libclc/generic/lib/math/modf.inc +++ b/libclc/generic/lib/math/modf.inc @@ -20,9 +20,17 @@ * THE SOFTWARE. */ +#if __CLC_FPSIZE == 64 +#define ZERO 0.0 +#elif __CLC_FPSIZE == 32 +#define ZERO 0.0f +#elif __CLC_FPSIZE == 16 +#define ZERO 0.0h +#endif + _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) { *iptr = trunc(x); - return copysign(isinf(x) ? 0.0f : x - *iptr, x); + return copysign(isinf(x) ? ZERO : x - *iptr, x); } #define MODF_DEF(addrspace) \ @@ -35,3 +43,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) MODF_DEF(local); MODF_DEF(global); + +#undef ZERO diff --git a/libclc/generic/lib/math/nan.inc b/libclc/generic/lib/math/nan.inc index 6e4afc888d5..f6508c3ab52 100644 --- a/libclc/generic/lib/math/nan.inc +++ b/libclc/generic/lib/math/nan.inc @@ -7,11 +7,18 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ulong, __CLC_VECSIZE) cod { return __CLC_AS_GENTYPE(code | 0x7ff0000000000000ul); } -#else +#elif __CLC_FPSIZE == 32 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(uint, __CLC_VECSIZE) code) { return __CLC_AS_GENTYPE(code | 0x7fc00000); } +#elif __CLC_FPSIZE == 16 +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ushort, __CLC_VECSIZE) code) +{ + const ushort mask = 0x7e00; + const __CLC_XCONCAT(ushort, __CLC_VECSIZE) res = code | mask; + return __CLC_AS_GENTYPE(res); +} #endif diff --git a/libclc/generic/lib/math/pown.inc b/libclc/generic/lib/math/pown.inc index 84729d90a79..2add2c7459d 100644 --- a/libclc/generic/lib/math/pown.inc +++ b/libclc/generic/lib/math/pown.inc @@ -1,3 +1,6 @@ +// TODO: Enable half precision when the sw routine is implemented +#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_pown(x, y); } +#endif diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc index c1de78a5e7f..c33b5ddab31 100644 --- a/libclc/generic/lib/math/remquo.inc +++ b/libclc/generic/lib/math/remquo.inc @@ -1,6 +1,9 @@ +// TODO: Enable half precision when the sw routine is implemented +#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { __CLC_INTN local_q; __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); *q = local_q; return ret; } +#endif diff --git a/libclc/generic/lib/math/rootn.inc b/libclc/generic/lib/math/rootn.inc index 3f5b00c082c..f788649685a 100644 --- a/libclc/generic/lib/math/rootn.inc +++ b/libclc/generic/lib/math/rootn.inc @@ -1,3 +1,6 @@ +// TODO: Enable half precision when the sw routine is implemented +#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_rootn(x, y); } +#endif diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc index e97f0f9641c..2318ffb73f5 100644 --- a/libclc/generic/lib/math/sincos.inc +++ b/libclc/generic/lib/math/sincos.inc @@ -1,3 +1,5 @@ +// TODO: Enable half precision when sin/cos is implemented +#if __CLC_FPSIZE > 16 #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \ *cosval = cos(x); \ @@ -9,3 +11,4 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) #undef __CLC_DECLARE_SINCOS +#endif diff --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl index 300e2741ee0..b05d6bc0a06 100644 --- a/libclc/generic/lib/math/sqrt.cl +++ b/libclc/generic/lib/math/sqrt.cl @@ -33,3 +33,11 @@ _CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float) _CLC_DEFINE_UNARY_BUILTIN(double, sqrt, __clc_sqrt, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half) + +#endif |

