diff options
author | Jan Vesely <jan.vesely@rutgers.edu> | 2014-09-02 17:55:02 +0000 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2014-09-02 17:55:02 +0000 |
commit | ef513d392b9670124a057b9705b7a012e574fe65 (patch) | |
tree | cf7f1db7e9f75c1d7bae469839fc5f4fa851128a | |
parent | 62496142d5ecd29dbb35778b84008512ad32d04b (diff) | |
download | bcm5719-llvm-ef513d392b9670124a057b9705b7a012e574fe65.tar.gz bcm5719-llvm-ef513d392b9670124a057b9705b7a012e574fe65.zip |
Implement generic mad_sat
v2: Fix trailing whitespace
Fix signed long overflow
improve comment
v3: fix typo
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Tom Stellard <tom@stellard.net>
llvm-svn: 216923
-rw-r--r-- | libclc/generic/include/clc/clc.h | 1 | ||||
-rw-r--r-- | libclc/generic/include/clc/integer/mad_sat.h | 3 | ||||
-rw-r--r-- | libclc/generic/include/clc/integer/mad_sat.inc | 1 | ||||
-rw-r--r-- | libclc/generic/lib/SOURCES | 1 | ||||
-rw-r--r-- | libclc/generic/lib/clcmacro.h | 22 | ||||
-rw-r--r-- | libclc/generic/lib/integer/mad_sat.cl | 72 |
6 files changed, 100 insertions, 0 deletions
diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h index 84b25ac77f0..20664f9b414 100644 --- a/libclc/generic/include/clc/clc.h +++ b/libclc/generic/include/clc/clc.h @@ -82,6 +82,7 @@ #include <clc/integer/hadd.h> #include <clc/integer/mad24.h> #include <clc/integer/mad_hi.h> +#include <clc/integer/mad_sat.h> #include <clc/integer/mul24.h> #include <clc/integer/mul_hi.h> #include <clc/integer/rhadd.h> diff --git a/libclc/generic/include/clc/integer/mad_sat.h b/libclc/generic/include/clc/integer/mad_sat.h new file mode 100644 index 00000000000..3e92372a27d --- /dev/null +++ b/libclc/generic/include/clc/integer/mad_sat.h @@ -0,0 +1,3 @@ +#define __CLC_BODY <clc/integer/mad_sat.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_BODY diff --git a/libclc/generic/include/clc/integer/mad_sat.inc b/libclc/generic/include/clc/integer/mad_sat.inc new file mode 100644 index 00000000000..5da2bdf8908 --- /dev/null +++ b/libclc/generic/include/clc/integer/mad_sat.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 21fc7ca796a..5ad823dd1fd 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -20,6 +20,7 @@ integer/clz_if.ll integer/clz_impl.ll integer/hadd.cl integer/mad24.cl +integer/mad_sat.cl integer/mul24.cl integer/mul_hi.cl integer/rhadd.cl diff --git a/libclc/generic/lib/clcmacro.h b/libclc/generic/lib/clcmacro.h index 730073ae1f9..ef102ea54e9 100644 --- a/libclc/generic/lib/clcmacro.h +++ b/libclc/generic/lib/clcmacro.h @@ -41,6 +41,28 @@ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ } +#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \ + return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \ + } \ +\ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \ + return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \ + FUNCTION(x.z, y.z, z.z)); \ + } \ +\ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \ + return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ + } \ +\ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \ + return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ + } \ +\ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \ + return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ + } + #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ return BUILTIN(x, y); \ diff --git a/libclc/generic/lib/integer/mad_sat.cl b/libclc/generic/lib/integer/mad_sat.cl new file mode 100644 index 00000000000..1708b29efff --- /dev/null +++ b/libclc/generic/lib/integer/mad_sat.cl @@ -0,0 +1,72 @@ +#include <clc/clc.h> +#include "../clcmacro.h" + +_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) { + return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX); +} + +_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) { + return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX); +} + +_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) { + return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX); +} + +_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) { + return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX); +} + +_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) { + int mhi = mul_hi(x, y); + uint mlo = x * y; + long m = upsample(mhi, mlo); + m += z; + if (m > INT_MAX) + return INT_MAX; + if (m < INT_MIN) + return INT_MIN; + return m; +} + +_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) { + if (mul_hi(x, y) != 0) + return UINT_MAX; + return add_sat(x * y, z); +} + +_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) { + long hi = mul_hi(x, y); + ulong ulo = x * y; + long slo = x * y; + /* Big overflow of more than 2 bits, add can't fix this */ + if (((x < 0) == (y < 0)) && hi != 0) + return LONG_MAX; + /* Low overflow in mul and z not neg enough to correct it */ + if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX)) + return LONG_MAX; + /* Big overflow of more than 2 bits, add can't fix this */ + if (((x < 0) != (y < 0)) && hi != -1) + return LONG_MIN; + /* Low overflow in mul and z not pos enough to correct it */ + if (hi == -1 && ulo <= ((ulong)LONG_MAX + 1UL) && (z < 0 || z < (LONG_MAX - ulo))) + return LONG_MIN; + /* We have checked all conditions, any overflow in addition returns + * the correct value */ + return ulo + z; +} + +_CLC_OVERLOAD _CLC_DEF ulong mad_sat(ulong x, ulong y, ulong z) { + if (mul_hi(x, y) != 0) + return ULONG_MAX; + return add_sat(x * y, z); +} + +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, mad_sat, char, char, char) +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, mad_sat, uchar, uchar, uchar) +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, mad_sat, short, short, short) +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, mad_sat, ushort, ushort, ushort) +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, mad_sat, int, int, int) +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, mad_sat, uint, uint, uint) +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, mad_sat, long, long, long) +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, mad_sat, ulong, ulong, ulong) |