diff options
author | Aaron Watry <awatry@gmail.com> | 2014-06-25 13:29:23 +0000 |
---|---|---|
committer | Aaron Watry <awatry@gmail.com> | 2014-06-25 13:29:23 +0000 |
commit | d9ee196eab5a00e48ef2022a0952d93f5edad2e1 (patch) | |
tree | ccd6441afcca005ed99bc4ccd89a0e306f32fc4b | |
parent | bfa644b91def589f1dd38668f3e094b581a44583 (diff) | |
download | bcm5719-llvm-d9ee196eab5a00e48ef2022a0952d93f5edad2e1.tar.gz bcm5719-llvm-d9ee196eab5a00e48ef2022a0952d93f5edad2e1.zip |
relational: Implement signbit
v2 Changes:
- use __builtin_signbit instead of shifting by hand
- significantly improve vector shuffling
- Works correctly now for signbit(float16) on radeonsi
Signed-off-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
llvm-svn: 211696
-rw-r--r-- | libclc/generic/include/clc/clc.h | 1 | ||||
-rw-r--r-- | libclc/generic/include/clc/relational/signbit.h | 18 | ||||
-rw-r--r-- | libclc/generic/lib/SOURCES | 1 | ||||
-rw-r--r-- | libclc/generic/lib/relational/signbit.cl | 87 |
4 files changed, 107 insertions, 0 deletions
diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h index e4fbfbaaa53..109be8228e6 100644 --- a/libclc/generic/include/clc/clc.h +++ b/libclc/generic/include/clc/clc.h @@ -112,6 +112,7 @@ #include <clc/relational/isequal.h> #include <clc/relational/isnan.h> #include <clc/relational/select.h> +#include <clc/relational/signbit.h> /* 6.11.8 Synchronization Functions */ #include <clc/synchronization/cl_mem_fence_flags.h> diff --git a/libclc/generic/include/clc/relational/signbit.h b/libclc/generic/include/clc/relational/signbit.h new file mode 100644 index 00000000000..774d6e0433e --- /dev/null +++ b/libclc/generic/include/clc/relational/signbit.h @@ -0,0 +1,18 @@ + +#define _CLC_SIGNBIT_DECL(TYPE, RETTYPE) \ + _CLC_OVERLOAD _CLC_DECL RETTYPE signbit(TYPE x); + +#define _CLC_VECTOR_SIGNBIT_DECL(TYPE, RETTYPE) \ + _CLC_SIGNBIT_DECL(TYPE##2, RETTYPE##2) \ + _CLC_SIGNBIT_DECL(TYPE##3, RETTYPE##3) \ + _CLC_SIGNBIT_DECL(TYPE##4, RETTYPE##4) \ + _CLC_SIGNBIT_DECL(TYPE##8, RETTYPE##8) \ + _CLC_SIGNBIT_DECL(TYPE##16, RETTYPE##16) + +_CLC_SIGNBIT_DECL(float, int) +_CLC_VECTOR_SIGNBIT_DECL(float, int) + +#ifdef cl_khr_fp64 +_CLC_SIGNBIT_DECL(double, int) +_CLC_VECTOR_SIGNBIT_DECL(double, long) +#endif
\ No newline at end of file diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index c3d07577d88..a6d16bd3ecb 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -42,6 +42,7 @@ relational/all.cl relational/any.cl relational/isequal.cl relational/isnan.cl +relational/signbit.cl shared/clamp.cl shared/max.cl shared/min.cl diff --git a/libclc/generic/lib/relational/signbit.cl b/libclc/generic/lib/relational/signbit.cl new file mode 100644 index 00000000000..1f496d910de --- /dev/null +++ b/libclc/generic/lib/relational/signbit.cl @@ -0,0 +1,87 @@ +#include <clc/clc.h> + +#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG_TYPE) \ +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x){ \ + return BUILTIN_NAME(x); \ +} \ + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC(RET_TYPE, FUNCTION, ARG_TYPE) \ +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \ +} \ + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \ +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \ +} \ + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \ +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)((FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)) != (RET_TYPE)0); \ +} \ + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \ +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)( \ + ( \ + FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3) \ + ) != (RET_TYPE)0); \ +} \ + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \ +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)( \ + ( \ + FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \ + FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7) \ + ) != (RET_TYPE)0); \ +} \ + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \ +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)( \ + ( \ + FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \ + FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \ + FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \ + FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf) \ + ) != (RET_TYPE)0); \ +} \ + + +#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \ +_CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \ +_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \ +_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \ +_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \ +_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \ +_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16) \ + +_CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of signbit(double) returns an int, but the vector versions +// return long. + +_CLC_DEF _CLC_OVERLOAD int signbit(double x){ + return __builtin_signbit(x); +} + +_CLC_DEFINE_RELATIONAL_UNARY_VEC2(long2, signbit, double2) +_CLC_DEFINE_RELATIONAL_UNARY_VEC3(long3, signbit, double3) +_CLC_DEFINE_RELATIONAL_UNARY_VEC4(long4, signbit, double4) +_CLC_DEFINE_RELATIONAL_UNARY_VEC8(long8, signbit, double8) +_CLC_DEFINE_RELATIONAL_UNARY_VEC16(long16, signbit, double16) + +#endif + +#undef _CLC_DEFINE_RELATIONAL_UNARY +#undef _CLC_DEFINE_RELATIONAL_UNARY_SCALAR +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC2 +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC3 +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC4 +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC8 +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC16
\ No newline at end of file |