diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2015-05-13 03:55:07 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2015-05-13 03:55:07 +0000 |
commit | d538fdc217fd575790670953c9e0247125bf6740 (patch) | |
tree | 35d975391bac8499cee5176c71220764c32a75bd /libclc | |
parent | 0cabcf211a929a4003f75cfa3348713062232400 (diff) | |
download | bcm5719-llvm-d538fdc217fd575790670953c9e0247125bf6740.tar.gz bcm5719-llvm-d538fdc217fd575790670953c9e0247125bf6740.zip |
Implement exp2 using OpenCL C rather than using an intrinsic
Not all targets support the intrinsic, so it's better to have a
generic implementation which does not use it.
This exp2 implementation was ported from the AMD builtin library
and has been tested with piglit, OpenCV, and the ocl conformance tests.
llvm-svn: 237228
Diffstat (limited to 'libclc')
-rw-r--r-- | libclc/generic/include/clc/math/exp2.h | 28 | ||||
-rw-r--r-- | libclc/generic/include/clc/math/exp2.inc | 23 | ||||
-rw-r--r-- | libclc/generic/lib/SOURCES | 2 | ||||
-rw-r--r-- | libclc/generic/lib/math/exp2.cl | 86 | ||||
-rw-r--r-- | libclc/generic/lib/math/exp_helper.cl | 69 | ||||
-rw-r--r-- | libclc/generic/lib/math/exp_helper.h | 29 | ||||
-rw-r--r-- | libclc/generic/lib/math/tables.cl | 70 | ||||
-rw-r--r-- | libclc/generic/lib/math/tables.h | 2 |
8 files changed, 303 insertions, 6 deletions
diff --git a/libclc/generic/include/clc/math/exp2.h b/libclc/generic/include/clc/math/exp2.h index ec0dad268a7..14167e87f7d 100644 --- a/libclc/generic/include/clc/math/exp2.h +++ b/libclc/generic/include/clc/math/exp2.h @@ -1,6 +1,24 @@ -#undef exp2 -#define exp2 __clc_exp2 +/* + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ -#define __CLC_FUNCTION __clc_exp2 -#define __CLC_INTRINSIC "llvm.exp2" -#include <clc/math/unary_intrin.inc> +#define __CLC_BODY <clc/math/exp2.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/generic/include/clc/math/exp2.inc b/libclc/generic/include/clc/math/exp2.inc new file mode 100644 index 00000000000..3ecaae621b3 --- /dev/null +++ b/libclc/generic/include/clc/math/exp2.inc @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE exp2(__CLC_GENTYPE x); diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 6398d654c19..1e639948374 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -79,6 +79,8 @@ math/cospi.cl math/ep_log.cl math/erfc.cl math/exp.cl +math/exp_helper.cl +math/exp2.cl math/exp10.cl math/fmax.cl math/fmin.cl diff --git a/libclc/generic/lib/math/exp2.cl b/libclc/generic/lib/math/exp2.cl new file mode 100644 index 00000000000..1ddccbd3ee6 --- /dev/null +++ b/libclc/generic/lib/math/exp2.cl @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <clc/clc.h> + +#include "math.h" +#include "../clcmacro.h" + +_CLC_OVERLOAD _CLC_DEF float exp2(float x) { + + // Reduce x + const float ln2HI = 0x1.62e300p-1f; + const float ln2LO = 0x1.2fefa2p-17f; + + float t = rint(x); + int p = (int)t; + float tt = x - t; + float hi = tt * ln2HI; + float lo = tt * ln2LO; + + // Evaluate poly + t = hi + lo; + tt = t*t; + float v = mad(tt, + -mad(tt, + mad(tt, + mad(tt, + mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f), + 0x1.1566aap-14f), + -0x1.6c16c2p-9f), + 0x1.555556p-3f), + t); + + float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi); + + // Scale by 2^p + float r = as_float(as_int(y) + (p << 23)); + + const float ulim = 128.0f; + const float llim = -126.0f; + + r = x < llim ? 0.0f : r; + r = x < ulim ? r : as_float(0x7f800000); + return isnan(x) ? x : r; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float) + +#ifdef cl_khr_fp64 + +#include "exp_helper.h" + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double exp2(double x) { + const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2) + const double R_1_BY_64 = 1.0 / 64.0; + + int n = convert_int(x * 64.0); + double r = R_LN2 * fma(-R_1_BY_64, (double)n, x); + return __clc_exp_helper(x, -1074.0, 1024.0, r, n); +} + + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double) + +#endif diff --git a/libclc/generic/lib/math/exp_helper.cl b/libclc/generic/lib/math/exp_helper.cl new file mode 100644 index 00000000000..046f306466b --- /dev/null +++ b/libclc/generic/lib/math/exp_helper.cl @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <clc/clc.h> + +#include "math.h" +#include "tables.h" + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) { + + int j = n & 0x3f; + int m = n >> 6; + + // 6 term tail of Taylor expansion of e^r + double z2 = r * fma(r, + fma(r, + fma(r, + fma(r, + fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7), + 0x1.5555555555555p-5), + 0x1.5555555555555p-3), + 0x1.0000000000000p-1), + 1.0); + + double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j); + z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0; + + int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0)); + + int n1 = m >> 2; + int n2 = m-n1; + double z3= z2 * as_double(((long)n1 + 1023) << 52); + z3 *= as_double(((long)n2 + 1023) << 52); + + z2 = ldexp(z2, m); + z2 = small_value ? z3: z2; + + z2 = isnan(x) ? x : z2; + + z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2; + z2 = x < x_min ? 0.0 : z2; + + return z2; +} + +#endif // cl_khr_fp64 diff --git a/libclc/generic/lib/math/exp_helper.h b/libclc/generic/lib/math/exp_helper.h new file mode 100644 index 00000000000..e6df2fd905b --- /dev/null +++ b/libclc/generic/lib/math/exp_helper.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n); + +#endif diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl index 090e64aad39..5a620ec6189 100644 --- a/libclc/generic/lib/math/tables.cl +++ b/libclc/generic/lib/math/tables.cl @@ -634,6 +634,76 @@ DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = { (double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23), }; +DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = { + (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0), + (double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25), + (double2)(0x1.059b0d0000000p+0, 0x1.8ac2ba1d73e2ap-27), + (double2)(0x1.0874510000000p+0, 0x1.0eb37901186bep-25), + (double2)(0x1.0b55860000000p+0, 0x1.9f3121ec53172p-25), + (double2)(0x1.0e3ec30000000p+0, 0x1.69e8d10103a17p-27), + (double2)(0x1.11301d0000000p+0, 0x1.25b50a4ebbf1ap-32), + (double2)(0x1.1429aa0000000p+0, 0x1.d525bbf668203p-25), + (double2)(0x1.172b830000000p+0, 0x1.8faa2f5b9bef9p-25), + (double2)(0x1.1a35be0000000p+0, 0x1.6df96ea796d31p-25), + (double2)(0x1.1d48730000000p+0, 0x1.68b9aa7805b80p-28), + (double2)(0x1.2063b80000000p+0, 0x1.0c519ac771dd6p-25), + (double2)(0x1.2387a60000000p+0, 0x1.ceac470cd83f5p-25), + (double2)(0x1.26b4560000000p+0, 0x1.789f37495e99cp-26), + (double2)(0x1.29e9df0000000p+0, 0x1.47f7b84b09745p-26), + (double2)(0x1.2d285a0000000p+0, 0x1.b900c2d002475p-26), + (double2)(0x1.306fe00000000p+0, 0x1.4636e2a5bd1abp-25), + (double2)(0x1.33c08b0000000p+0, 0x1.320b7fa64e430p-27), + (double2)(0x1.371a730000000p+0, 0x1.ceaa72a9c5154p-26), + (double2)(0x1.3a7db30000000p+0, 0x1.3967fdba86f24p-26), + (double2)(0x1.3dea640000000p+0, 0x1.82468446b6824p-25), + (double2)(0x1.4160a20000000p+0, 0x1.f72e29f84325bp-28), + (double2)(0x1.44e0860000000p+0, 0x1.8624b40c4dbd0p-30), + (double2)(0x1.486a2b0000000p+0, 0x1.704f3404f068ep-26), + (double2)(0x1.4bfdad0000000p+0, 0x1.4d8a89c750e5ep-26), + (double2)(0x1.4f9b270000000p+0, 0x1.a74b29ab4cf62p-26), + (double2)(0x1.5342b50000000p+0, 0x1.a753e077c2a0fp-26), + (double2)(0x1.56f4730000000p+0, 0x1.ad49f699bb2c0p-26), + (double2)(0x1.5ab07d0000000p+0, 0x1.a90a852b19260p-25), + (double2)(0x1.5e76f10000000p+0, 0x1.6b48521ba6f93p-26), + (double2)(0x1.6247eb0000000p+0, 0x1.d2ac258f87d03p-31), + (double2)(0x1.6623880000000p+0, 0x1.2a91124893ecfp-27), + (double2)(0x1.6a09e60000000p+0, 0x1.9fcef32422cbep-26), + (double2)(0x1.6dfb230000000p+0, 0x1.8ca345de441c5p-25), + (double2)(0x1.71f75e0000000p+0, 0x1.1d8bee7ba46e1p-25), + (double2)(0x1.75feb50000000p+0, 0x1.9099f22fdba6ap-26), + (double2)(0x1.7a11470000000p+0, 0x1.f580c36bea881p-27), + (double2)(0x1.7e2f330000000p+0, 0x1.b3d398841740ap-26), + (double2)(0x1.8258990000000p+0, 0x1.2999c25159f11p-25), + (double2)(0x1.868d990000000p+0, 0x1.68925d901c83bp-25), + (double2)(0x1.8ace540000000p+0, 0x1.15506dadd3e2ap-27), + (double2)(0x1.8f1ae90000000p+0, 0x1.22aee6c57304ep-25), + (double2)(0x1.93737b0000000p+0, 0x1.9b8bc9e8a0387p-29), + (double2)(0x1.97d8290000000p+0, 0x1.fbc9c9f173d24p-25), + (double2)(0x1.9c49180000000p+0, 0x1.51f8480e3e235p-27), + (double2)(0x1.a0c6670000000p+0, 0x1.6bbcac96535b5p-25), + (double2)(0x1.a5503b0000000p+0, 0x1.1f12ae45a1224p-27), + (double2)(0x1.a9e6b50000000p+0, 0x1.5e7f6fd0fac90p-26), + (double2)(0x1.ae89f90000000p+0, 0x1.2b5a75abd0e69p-25), + (double2)(0x1.b33a2b0000000p+0, 0x1.09e2bf5ed7fa1p-25), + (double2)(0x1.b7f76f0000000p+0, 0x1.7daf237553d84p-27), + (double2)(0x1.bcc1e90000000p+0, 0x1.2f074891ee83dp-30), + (double2)(0x1.c199bd0000000p+0, 0x1.b0aa538444196p-25), + (double2)(0x1.c67f120000000p+0, 0x1.cafa29694426fp-25), + (double2)(0x1.cb720d0000000p+0, 0x1.9df20d22a0797p-25), + (double2)(0x1.d072d40000000p+0, 0x1.40f12f71a1e45p-25), + (double2)(0x1.d5818d0000000p+0, 0x1.9f7490e4bb40bp-25), + (double2)(0x1.da9e600000000p+0, 0x1.ed9942b84600dp-27), + (double2)(0x1.dfc9730000000p+0, 0x1.bdcdaf5cb4656p-27), + (double2)(0x1.e502ee0000000p+0, 0x1.e2cffd89cf44cp-26), + (double2)(0x1.ea4afa0000000p+0, 0x1.52486cc2c7b9dp-27), + (double2)(0x1.efa1be0000000p+0, 0x1.cc2b44eee3fa4p-25), + (double2)(0x1.f507650000000p+0, 0x1.6dc8a80ce9f09p-25), + (double2)(0x1.fa7c180000000p+0, 0x1.9e90d82e90a7ep-28) + +}; + + TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl); +TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl); #endif // cl_khr_fp64 diff --git a/libclc/generic/lib/math/tables.h b/libclc/generic/lib/math/tables.h index d09adf15567..55ff8537faf 100644 --- a/libclc/generic/lib/math/tables.h +++ b/libclc/generic/lib/math/tables.h @@ -48,5 +48,5 @@ TABLE_FUNCTION_DECL(uint4, pibits_tbl); TABLE_FUNCTION_DECL(double2, ln_tbl); TABLE_FUNCTION_DECL(double2, atan_jby256_tbl); - +TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl); #endif // cl_khr_fp64 |