diff options
Diffstat (limited to 'libclc/generic')
| -rw-r--r-- | libclc/generic/include/clc/clc.h | 1 | ||||
| -rw-r--r-- | libclc/generic/include/clc/math/log1p.h | 24 | ||||
| -rw-r--r-- | libclc/generic/include/clc/math/log1p.inc | 23 | ||||
| -rw-r--r-- | libclc/generic/lib/SOURCES | 2 | ||||
| -rw-r--r-- | libclc/generic/lib/math/log1p.cl | 177 | ||||
| -rw-r--r-- | libclc/generic/lib/math/math.h | 26 | ||||
| -rw-r--r-- | libclc/generic/lib/math/tables.cl | 366 | ||||
| -rw-r--r-- | libclc/generic/lib/math/tables.h | 50 | 
8 files changed, 669 insertions, 0 deletions
| diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h index ff63fb0c32f..bd92fdb12b5 100644 --- a/libclc/generic/include/clc/clc.h +++ b/libclc/generic/include/clc/clc.h @@ -50,6 +50,7 @@  #include <clc/math/fmod.h>  #include <clc/math/hypot.h>  #include <clc/math/log.h> +#include <clc/math/log1p.h>  #include <clc/math/log2.h>  #include <clc/math/mad.h>  #include <clc/math/mix.h> diff --git a/libclc/generic/include/clc/math/log1p.h b/libclc/generic/include/clc/math/log1p.h new file mode 100644 index 00000000000..4d716dd18d9 --- /dev/null +++ b/libclc/generic/include/clc/math/log1p.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#define __CLC_BODY <clc/math/log1p.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/generic/include/clc/math/log1p.inc b/libclc/generic/include/clc/math/log1p.inc new file mode 100644 index 00000000000..4cbfbf38fc1 --- /dev/null +++ b/libclc/generic/include/clc/math/log1p.inc @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log1p(__CLC_GENTYPE a); diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 5918989093c..b76fec98f63 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -63,8 +63,10 @@ math/fmax.cl  math/fmin.cl  math/fmod.cl  math/hypot.cl +math/log1p.cl  math/mad.cl  math/mix.cl +math/tables.cl  math/clc_nextafter.cl  math/nextafter.cl  math/pown.cl diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl new file mode 100644 index 00000000000..be25c64bf6a --- /dev/null +++ b/libclc/generic/lib/math/log1p.cl @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <clc/clc.h> + +#include "math.h" +#include "tables.h" +#include "../clcmacro.h" + +_CLC_OVERLOAD _CLC_DEF float log1p(float x) +{ +    float w = x; +    uint ux = as_uint(x); +    uint ax = ux & EXSIGNBIT_SP32; + +    // |x| < 2^-4 +    float u2 = MATH_DIVIDE(x, 2.0f + x); +    float u = u2 + u2; +    float v = u * u; +    // 2/(5 * 2^5), 2/(3 * 2^3) +    float zsmall = mad(-u2, x, mad(v, 0x1.99999ap-7f, 0x1.555556p-4f) * v * u) + x; + +    // |x| >= 2^-4 +    ux = as_uint(x + 1.0f); + +    int m = (int)((ux >> EXPSHIFTBITS_SP32) & 0xff) - EXPBIAS_SP32; +    float mf = (float)m; +    uint indx = (ux & 0x007f0000) + ((ux & 0x00008000) << 1); +    float F = as_float(indx | 0x3f000000); + +    // x > 2^24 +    float fg24 = F - as_float(0x3f000000 | (ux & MANTBITS_SP32)); + +    // x <= 2^24 +    uint xhi = ux & 0xffff8000; +    float xh = as_float(xhi); +    float xt = (1.0f - xh) + w; +    uint xnm = ((~(xhi & 0x7f800000)) - 0x00800000) & 0x7f800000; +    xt = xt * as_float(xnm) * 0.5f; +    float fl24 = F - as_float(0x3f000000 | (xhi & MANTBITS_SP32)) - xt; + +    float f = mf > 24.0f ? fg24 : fl24; + +    indx = indx >> 16; +    float r = f * USE_TABLE(log_inv_tbl, indx); + +    // 1/3, 1/2 +    float poly = mad(mad(r, 0x1.555556p-2f, 0x1.0p-1f), r*r, r); + +    const float LOG2_HEAD = 0x1.62e000p-1f;   // 0.693115234 +    const float LOG2_TAIL = 0x1.0bfbe8p-15f;  // 0.0000319461833 + +    float2 tv = USE_TABLE(loge_tbl, indx); +    float z1 = mad(mf, LOG2_HEAD, tv.s0); +    float z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1; +    float z = z1 + z2; + +    z = ax < 0x3d800000U ? zsmall : z; + + + +    // Edge cases +    z = ax >= PINFBITPATT_SP32 ? w : z; +    z = w  < -1.0f ? as_float(QNANBITPATT_SP32) : z; +    z = w == -1.0f ? as_float(NINFBITPATT_SP32) : z; +        //fix subnormals +        z = ax  < 0x33800000 ? x : z; + +    return z; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log1p, float); + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double log1p(double x) +{ +    // Computes natural log(1+x). Algorithm based on: +    // Ping-Tak Peter Tang +    // "Table-driven implementation of the logarithm function in IEEE +    // floating-point arithmetic" +    // ACM Transactions on Mathematical Software (TOMS) +    // Volume 16, Issue 4 (December 1990) +    // Note that we use a lookup table of size 64 rather than 128, +    // and compensate by having extra terms in the minimax polynomial +    // for the kernel approximation. + +    // Process Inside the threshold now +    ulong ux = as_ulong(1.0 + x); +    int xexp = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64; +    double f = as_double(ONEEXPBITS_DP64 | (ux & MANTBITS_DP64)); + +    int j = as_int2(ux).hi >> 13; +    j = ((0x80 | (j & 0x7e)) >> 1) + (j & 0x1); +    double f1 = (double)j * 0x1.0p-6; +    j -= 64; + +    double f2temp = f - f1; +    double m2 = as_double(convert_ulong(0x3ff - xexp) << EXPSHIFTBITS_DP64); +    double f2l = fma(m2, x, m2 - f1); +    double f2g = fma(m2, x, -f1) + m2; +    double f2 = xexp <= MANTLENGTH_DP64-1 ? f2l : f2g; +    f2 = (xexp <= -2) | (xexp >= MANTLENGTH_DP64+8) ? f2temp : f2; + +    double2 tv = USE_TABLE(ln_tbl, j); +    double z1 = tv.s0; +    double q = tv.s1; + +    double u = MATH_DIVIDE(f2, fma(0.5, f2, f1)); +    double v = u * u; + +    double poly = v * fma(v, +                          fma(v, 2.23219810758559851206e-03, 1.24999999978138668903e-02), +                          8.33333333333333593622e-02); + +    // log2_lead and log2_tail sum to an extra-precise version of log(2) +    const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */ +    const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */ + +    double z2 = q + fma(u, poly, u); +    double dxexp = (double)xexp; +    double r1 = fma(dxexp, log2_lead, z1); +    double r2 = fma(dxexp, log2_tail, z2); +    double result1 = r1 + r2; + +    // Process Outside the threshold now +    double r = x; +    u = r / (2.0 + r); +    double correction = r * u; +    u = u + u; +    v = u * u; +    r1 = r; + +    poly = fma(v, +               fma(v, +                   fma(v, 4.34887777707614552256e-04, 2.23213998791944806202e-03), +                   1.25000000037717509602e-02), +               8.33333333333317923934e-02); + +    r2 = fma(u*v, poly, -correction); + +    // The values exp(-1/16)-1 and exp(1/16)-1 +    const double log1p_thresh1 = -0x1.f0540438fd5c3p-5; +    const double log1p_thresh2 =  0x1.082b577d34ed8p-4; +    double result2 = r1 + r2; +    result2 = x < log1p_thresh1 | x > log1p_thresh2 ? result1 : result2; + +    result2 = isinf(x) ? x : result2; +    result2 = x < -1.0 ? as_double(QNANBITPATT_DP64) : result2; +    result2 = x == -1.0 ? as_double(NINFBITPATT_DP64) : result2; +    return result2; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double); + +#endif // cl_khr_fp64 diff --git a/libclc/generic/lib/math/math.h b/libclc/generic/lib/math/math.h index 53ed38a78a5..f46c7ea7a7d 100644 --- a/libclc/generic/lib/math/math.h +++ b/libclc/generic/lib/math/math.h @@ -61,4 +61,30 @@  #define MANTLENGTH_SP32   24  #define BASEDIGITS_SP32   7 +#ifdef cl_khr_fp64 + +#define SIGNBIT_DP64      0x8000000000000000L +#define EXSIGNBIT_DP64    0x7fffffffffffffffL +#define EXPBITS_DP64      0x7ff0000000000000L +#define MANTBITS_DP64     0x000fffffffffffffL +#define ONEEXPBITS_DP64   0x3ff0000000000000L +#define TWOEXPBITS_DP64   0x4000000000000000L +#define HALFEXPBITS_DP64  0x3fe0000000000000L +#define IMPBIT_DP64       0x0010000000000000L +#define QNANBITPATT_DP64  0x7ff8000000000000L +#define INDEFBITPATT_DP64 0xfff8000000000000L +#define PINFBITPATT_DP64  0x7ff0000000000000L +#define NINFBITPATT_DP64  0xfff0000000000000L +#define EXPBIAS_DP64      1023 +#define EXPSHIFTBITS_DP64 52 +#define BIASEDEMIN_DP64   1 +#define EMIN_DP64         -1022 +#define BIASEDEMAX_DP64   2046 /* 0x7fe */ +#define EMAX_DP64         1023 /* 0x3ff */ +#define LAMBDA_DP64       1.0e300 +#define MANTLENGTH_DP64   53 +#define BASEDIGITS_DP64   15 + +#endif // cl_khr_fp64 +  #define ALIGNED(x)	__attribute__((aligned(x))) diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl new file mode 100644 index 00000000000..b5345a2cff1 --- /dev/null +++ b/libclc/generic/lib/math/tables.cl @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <clc/clc.h> + +#include "tables.h" + +DECLARE_TABLE(float2, LOGE_TBL, 129) = { +    (float2)(0x0.000000p+0f, 0x0.000000p+0f), +    (float2)(0x1.fe0000p-8f, 0x1.535882p-23f), +    (float2)(0x1.fc0000p-7f, 0x1.5161f8p-20f), +    (float2)(0x1.7b8000p-6f, 0x1.1b07d4p-18f), +    (float2)(0x1.f82000p-6f, 0x1.361cf0p-19f), +    (float2)(0x1.39e000p-5f, 0x1.0f73fcp-18f), +    (float2)(0x1.774000p-5f, 0x1.63d8cap-19f), +    (float2)(0x1.b42000p-5f, 0x1.bae232p-18f), +    (float2)(0x1.f0a000p-5f, 0x1.86008ap-20f), +    (float2)(0x1.164000p-4f, 0x1.36eea2p-16f), +    (float2)(0x1.340000p-4f, 0x1.d7961ap-16f), +    (float2)(0x1.51a000p-4f, 0x1.073f06p-16f), +    (float2)(0x1.6f0000p-4f, 0x1.a515cap-17f), +    (float2)(0x1.8c2000p-4f, 0x1.45d630p-16f), +    (float2)(0x1.a92000p-4f, 0x1.b4e92ap-18f), +    (float2)(0x1.c5e000p-4f, 0x1.523d6ep-18f), +    (float2)(0x1.e26000p-4f, 0x1.076e2ap-16f), +    (float2)(0x1.fec000p-4f, 0x1.2263b6p-17f), +    (float2)(0x1.0d6000p-3f, 0x1.7e7cd0p-15f), +    (float2)(0x1.1b6000p-3f, 0x1.2ad52ep-15f), +    (float2)(0x1.294000p-3f, 0x1.52f81ep-15f), +    (float2)(0x1.370000p-3f, 0x1.fc201ep-15f), +    (float2)(0x1.44c000p-3f, 0x1.2b6ccap-15f), +    (float2)(0x1.526000p-3f, 0x1.cbc742p-16f), +    (float2)(0x1.5fe000p-3f, 0x1.3070a6p-15f), +    (float2)(0x1.6d6000p-3f, 0x1.fce33ap-20f), +    (float2)(0x1.7aa000p-3f, 0x1.890210p-15f), +    (float2)(0x1.87e000p-3f, 0x1.a06520p-15f), +    (float2)(0x1.952000p-3f, 0x1.6a73d0p-17f), +    (float2)(0x1.a22000p-3f, 0x1.bc1fe2p-15f), +    (float2)(0x1.af2000p-3f, 0x1.c94e80p-15f), +    (float2)(0x1.bc2000p-3f, 0x1.0ce85ap-16f), +    (float2)(0x1.c8e000p-3f, 0x1.f7c79ap-15f), +    (float2)(0x1.d5c000p-3f, 0x1.0b5a7cp-18f), +    (float2)(0x1.e26000p-3f, 0x1.076e2ap-15f), +    (float2)(0x1.ef0000p-3f, 0x1.5b97b8p-16f), +    (float2)(0x1.fb8000p-3f, 0x1.186d5ep-15f), +    (float2)(0x1.040000p-2f, 0x1.2ca5a6p-17f), +    (float2)(0x1.0a2000p-2f, 0x1.24e272p-14f), +    (float2)(0x1.104000p-2f, 0x1.8bf9aep-14f), +    (float2)(0x1.166000p-2f, 0x1.5cabaap-14f), +    (float2)(0x1.1c8000p-2f, 0x1.3182d2p-15f), +    (float2)(0x1.228000p-2f, 0x1.41fbcep-14f), +    (float2)(0x1.288000p-2f, 0x1.5a13dep-14f), +    (float2)(0x1.2e8000p-2f, 0x1.c575c2p-15f), +    (float2)(0x1.346000p-2f, 0x1.dd9a98p-14f), +    (float2)(0x1.3a6000p-2f, 0x1.3155a4p-16f), +    (float2)(0x1.404000p-2f, 0x1.843434p-17f), +    (float2)(0x1.460000p-2f, 0x1.8bc21cp-14f), +    (float2)(0x1.4be000p-2f, 0x1.7e55dcp-16f), +    (float2)(0x1.51a000p-2f, 0x1.5b0e5ap-15f), +    (float2)(0x1.576000p-2f, 0x1.dc5d14p-16f), +    (float2)(0x1.5d0000p-2f, 0x1.bdbf58p-14f), +    (float2)(0x1.62c000p-2f, 0x1.05e572p-15f), +    (float2)(0x1.686000p-2f, 0x1.903d36p-15f), +    (float2)(0x1.6e0000p-2f, 0x1.1d5456p-15f), +    (float2)(0x1.738000p-2f, 0x1.d7f6bap-14f), +    (float2)(0x1.792000p-2f, 0x1.4abfbap-15f), +    (float2)(0x1.7ea000p-2f, 0x1.f07704p-15f), +    (float2)(0x1.842000p-2f, 0x1.a3b43cp-15f), +    (float2)(0x1.89a000p-2f, 0x1.9c360ap-17f), +    (float2)(0x1.8f0000p-2f, 0x1.1e8736p-14f), +    (float2)(0x1.946000p-2f, 0x1.941c20p-14f), +    (float2)(0x1.99c000p-2f, 0x1.958116p-14f), +    (float2)(0x1.9f2000p-2f, 0x1.23ecbep-14f), +    (float2)(0x1.a48000p-2f, 0x1.024396p-16f), +    (float2)(0x1.a9c000p-2f, 0x1.d93534p-15f), +    (float2)(0x1.af0000p-2f, 0x1.293246p-14f), +    (float2)(0x1.b44000p-2f, 0x1.eef798p-15f), +    (float2)(0x1.b98000p-2f, 0x1.625a4cp-16f), +    (float2)(0x1.bea000p-2f, 0x1.4d9da6p-14f), +    (float2)(0x1.c3c000p-2f, 0x1.d7a7ccp-14f), +    (float2)(0x1.c8e000p-2f, 0x1.f7c79ap-14f), +    (float2)(0x1.ce0000p-2f, 0x1.af0b84p-14f), +    (float2)(0x1.d32000p-2f, 0x1.fcfc00p-15f), +    (float2)(0x1.d82000p-2f, 0x1.e7258ap-14f), +    (float2)(0x1.dd4000p-2f, 0x1.a81306p-16f), +    (float2)(0x1.e24000p-2f, 0x1.1034f8p-15f), +    (float2)(0x1.e74000p-2f, 0x1.09875ap-16f), +    (float2)(0x1.ec2000p-2f, 0x1.99d246p-14f), +    (float2)(0x1.f12000p-2f, 0x1.1ebf5ep-15f), +    (float2)(0x1.f60000p-2f, 0x1.23fa70p-14f), +    (float2)(0x1.fae000p-2f, 0x1.588f78p-14f), +    (float2)(0x1.ffc000p-2f, 0x1.2e0856p-14f), +    (float2)(0x1.024000p-1f, 0x1.52a5a4p-13f), +    (float2)(0x1.04a000p-1f, 0x1.df9da8p-13f), +    (float2)(0x1.072000p-1f, 0x1.f2e0e6p-16f), +    (float2)(0x1.098000p-1f, 0x1.bd3d5cp-15f), +    (float2)(0x1.0be000p-1f, 0x1.cb9094p-15f), +    (float2)(0x1.0e4000p-1f, 0x1.261746p-15f), +    (float2)(0x1.108000p-1f, 0x1.f39e2cp-13f), +    (float2)(0x1.12e000p-1f, 0x1.719592p-13f), +    (float2)(0x1.154000p-1f, 0x1.87a5e8p-14f), +    (float2)(0x1.178000p-1f, 0x1.eabbd8p-13f), +    (float2)(0x1.19e000p-1f, 0x1.cd68cep-14f), +    (float2)(0x1.1c2000p-1f, 0x1.b81f70p-13f), +    (float2)(0x1.1e8000p-1f, 0x1.7d79c0p-15f), +    (float2)(0x1.20c000p-1f, 0x1.b9a324p-14f), +    (float2)(0x1.230000p-1f, 0x1.30d7bep-13f), +    (float2)(0x1.254000p-1f, 0x1.5bce98p-13f), +    (float2)(0x1.278000p-1f, 0x1.5e1288p-13f), +    (float2)(0x1.29c000p-1f, 0x1.37fec2p-13f), +    (float2)(0x1.2c0000p-1f, 0x1.d3da88p-14f), +    (float2)(0x1.2e4000p-1f, 0x1.d0db90p-15f), +    (float2)(0x1.306000p-1f, 0x1.d7334ep-13f), +    (float2)(0x1.32a000p-1f, 0x1.133912p-13f), +    (float2)(0x1.34e000p-1f, 0x1.44ece6p-16f), +    (float2)(0x1.370000p-1f, 0x1.17b546p-13f), +    (float2)(0x1.392000p-1f, 0x1.e0d356p-13f), +    (float2)(0x1.3b6000p-1f, 0x1.0893fep-14f), +    (float2)(0x1.3d8000p-1f, 0x1.026a70p-13f), +    (float2)(0x1.3fa000p-1f, 0x1.5b84d0p-13f), +    (float2)(0x1.41c000p-1f, 0x1.8fe846p-13f), +    (float2)(0x1.43e000p-1f, 0x1.9fe2f8p-13f), +    (float2)(0x1.460000p-1f, 0x1.8bc21cp-13f), +    (float2)(0x1.482000p-1f, 0x1.53d1eap-13f), +    (float2)(0x1.4a4000p-1f, 0x1.f0bb60p-14f), +    (float2)(0x1.4c6000p-1f, 0x1.e6bf32p-15f), +    (float2)(0x1.4e6000p-1f, 0x1.d811b6p-13f), +    (float2)(0x1.508000p-1f, 0x1.13cc00p-13f), +    (float2)(0x1.52a000p-1f, 0x1.6932dep-16f), +    (float2)(0x1.54a000p-1f, 0x1.246798p-13f), +    (float2)(0x1.56a000p-1f, 0x1.f9d5b2p-13f), +    (float2)(0x1.58c000p-1f, 0x1.5b6b9ap-14f), +    (float2)(0x1.5ac000p-1f, 0x1.404c34p-13f), +    (float2)(0x1.5cc000p-1f, 0x1.b1dc6cp-13f), +    (float2)(0x1.5ee000p-1f, 0x1.54920ap-20f), +    (float2)(0x1.60e000p-1f, 0x1.97a23cp-16f), +    (float2)(0x1.62e000p-1f, 0x1.0bfbe8p-15f), +}; + +DECLARE_TABLE(float, LOG_INV_TBL, 129) = { +    0x1.000000p+1f, +    0x1.fc07f0p+0f, +    0x1.f81f82p+0f, +    0x1.f4465ap+0f, +    0x1.f07c20p+0f, +    0x1.ecc07cp+0f, +    0x1.e9131ap+0f, +    0x1.e573acp+0f, +    0x1.e1e1e2p+0f, +    0x1.de5d6ep+0f, +    0x1.dae608p+0f, +    0x1.d77b66p+0f, +    0x1.d41d42p+0f, +    0x1.d0cb58p+0f, +    0x1.cd8568p+0f, +    0x1.ca4b30p+0f, +    0x1.c71c72p+0f, +    0x1.c3f8f0p+0f, +    0x1.c0e070p+0f, +    0x1.bdd2b8p+0f, +    0x1.bacf92p+0f, +    0x1.b7d6c4p+0f, +    0x1.b4e81cp+0f, +    0x1.b20364p+0f, +    0x1.af286cp+0f, +    0x1.ac5702p+0f, +    0x1.a98ef6p+0f, +    0x1.a6d01ap+0f, +    0x1.a41a42p+0f, +    0x1.a16d40p+0f, +    0x1.9ec8eap+0f, +    0x1.9c2d14p+0f, +    0x1.99999ap+0f, +    0x1.970e50p+0f, +    0x1.948b10p+0f, +    0x1.920fb4p+0f, +    0x1.8f9c18p+0f, +    0x1.8d3018p+0f, +    0x1.8acb90p+0f, +    0x1.886e60p+0f, +    0x1.861862p+0f, +    0x1.83c978p+0f, +    0x1.818182p+0f, +    0x1.7f4060p+0f, +    0x1.7d05f4p+0f, +    0x1.7ad220p+0f, +    0x1.78a4c8p+0f, +    0x1.767dcep+0f, +    0x1.745d18p+0f, +    0x1.724288p+0f, +    0x1.702e06p+0f, +    0x1.6e1f76p+0f, +    0x1.6c16c2p+0f, +    0x1.6a13cep+0f, +    0x1.681682p+0f, +    0x1.661ec6p+0f, +    0x1.642c86p+0f, +    0x1.623fa8p+0f, +    0x1.605816p+0f, +    0x1.5e75bcp+0f, +    0x1.5c9882p+0f, +    0x1.5ac056p+0f, +    0x1.58ed24p+0f, +    0x1.571ed4p+0f, +    0x1.555556p+0f, +    0x1.539094p+0f, +    0x1.51d07ep+0f, +    0x1.501502p+0f, +    0x1.4e5e0ap+0f, +    0x1.4cab88p+0f, +    0x1.4afd6ap+0f, +    0x1.49539ep+0f, +    0x1.47ae14p+0f, +    0x1.460cbcp+0f, +    0x1.446f86p+0f, +    0x1.42d662p+0f, +    0x1.414142p+0f, +    0x1.3fb014p+0f, +    0x1.3e22ccp+0f, +    0x1.3c995ap+0f, +    0x1.3b13b2p+0f, +    0x1.3991c2p+0f, +    0x1.381382p+0f, +    0x1.3698e0p+0f, +    0x1.3521d0p+0f, +    0x1.33ae46p+0f, +    0x1.323e34p+0f, +    0x1.30d190p+0f, +    0x1.2f684cp+0f, +    0x1.2e025cp+0f, +    0x1.2c9fb4p+0f, +    0x1.2b404ap+0f, +    0x1.29e412p+0f, +    0x1.288b02p+0f, +    0x1.27350cp+0f, +    0x1.25e228p+0f, +    0x1.24924ap+0f, +    0x1.234568p+0f, +    0x1.21fb78p+0f, +    0x1.20b470p+0f, +    0x1.1f7048p+0f, +    0x1.1e2ef4p+0f, +    0x1.1cf06ap+0f, +    0x1.1bb4a4p+0f, +    0x1.1a7b96p+0f, +    0x1.194538p+0f, +    0x1.181182p+0f, +    0x1.16e068p+0f, +    0x1.15b1e6p+0f, +    0x1.1485f0p+0f, +    0x1.135c82p+0f, +    0x1.12358ep+0f, +    0x1.111112p+0f, +    0x1.0fef02p+0f, +    0x1.0ecf56p+0f, +    0x1.0db20ap+0f, +    0x1.0c9714p+0f, +    0x1.0b7e6ep+0f, +    0x1.0a6810p+0f, +    0x1.0953f4p+0f, +    0x1.084210p+0f, +    0x1.073260p+0f, +    0x1.0624dep+0f, +    0x1.051980p+0f, +    0x1.041042p+0f, +    0x1.03091cp+0f, +    0x1.020408p+0f, +    0x1.010102p+0f, +    0x1.000000p+0f, +}; + +TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl); +TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl); + +#ifdef cl_khr_fp64 + +DECLARE_TABLE(double2, LN_TBL, 65) = { +    (double2)(0x0.0000000000000p+0, 0x0.0000000000000p+0), +    (double2)(0x1.fc0a800000000p-7, 0x1.61f807c79f3dbp-28), +    (double2)(0x1.f829800000000p-6, 0x1.873c1980267c8p-25), +    (double2)(0x1.7745800000000p-5, 0x1.ec65b9f88c69ep-26), +    (double2)(0x1.f0a3000000000p-5, 0x1.8022c54cc2f99p-26), +    (double2)(0x1.341d700000000p-4, 0x1.2c37a3a125330p-25), +    (double2)(0x1.6f0d200000000p-4, 0x1.15cad69737c93p-25), +    (double2)(0x1.a926d00000000p-4, 0x1.d256ab1b285e9p-27), +    (double2)(0x1.e270700000000p-4, 0x1.b8abcb97a7aa2p-26), +    (double2)(0x1.0d77e00000000p-3, 0x1.f34239659a5dcp-25), +    (double2)(0x1.2955280000000p-3, 0x1.e07fd48d30177p-25), +    (double2)(0x1.44d2b00000000p-3, 0x1.b32df4799f4f6p-25), +    (double2)(0x1.5ff3000000000p-3, 0x1.c29e4f4f21cf8p-25), +    (double2)(0x1.7ab8900000000p-3, 0x1.086c848df1b59p-30), +    (double2)(0x1.9525a80000000p-3, 0x1.cf456b4764130p-27), +    (double2)(0x1.af3c900000000p-3, 0x1.3a02ffcb63398p-25), +    (double2)(0x1.c8ff780000000p-3, 0x1.1e6a6886b0976p-25), +    (double2)(0x1.e270700000000p-3, 0x1.b8abcb97a7aa2p-25), +    (double2)(0x1.fb91800000000p-3, 0x1.b578f8aa35552p-25), +    (double2)(0x1.0a324c0000000p-2, 0x1.139c871afb9fcp-25), +    (double2)(0x1.1675c80000000p-2, 0x1.5d5d30701ce64p-25), +    (double2)(0x1.22941c0000000p-2, 0x1.de7bcb2d12142p-25), +    (double2)(0x1.2e8e280000000p-2, 0x1.d708e984e1664p-25), +    (double2)(0x1.3a64c40000000p-2, 0x1.56945e9c72f36p-26), +    (double2)(0x1.4618bc0000000p-2, 0x1.0e2f613e85bdap-29), +    (double2)(0x1.51aad80000000p-2, 0x1.cb7e0b42724f6p-28), +    (double2)(0x1.5d1bd80000000p-2, 0x1.fac04e52846c7p-25), +    (double2)(0x1.686c800000000p-2, 0x1.e9b14aec442bep-26), +    (double2)(0x1.739d7c0000000p-2, 0x1.b5de8034e7126p-25), +    (double2)(0x1.7eaf800000000p-2, 0x1.dc157e1b259d3p-25), +    (double2)(0x1.89a3380000000p-2, 0x1.b05096ad69c62p-28), +    (double2)(0x1.9479400000000p-2, 0x1.c2116faba4cddp-26), +    (double2)(0x1.9f323c0000000p-2, 0x1.65fcc25f95b47p-25), +    (double2)(0x1.a9cec80000000p-2, 0x1.a9a08498d4850p-26), +    (double2)(0x1.b44f740000000p-2, 0x1.de647b1465f77p-25), +    (double2)(0x1.beb4d80000000p-2, 0x1.da71b7bf7861dp-26), +    (double2)(0x1.c8ff7c0000000p-2, 0x1.e6a6886b09760p-28), +    (double2)(0x1.d32fe40000000p-2, 0x1.f0075eab0ef64p-25), +    (double2)(0x1.dd46a00000000p-2, 0x1.3071282fb989bp-28), +    (double2)(0x1.e744240000000p-2, 0x1.0eb43c3f1bed2p-25), +    (double2)(0x1.f128f40000000p-2, 0x1.faf06ecb35c84p-26), +    (double2)(0x1.faf5880000000p-2, 0x1.ef1e63db35f68p-27), +    (double2)(0x1.02552a0000000p-1, 0x1.69743fb1a71a5p-27), +    (double2)(0x1.0723e40000000p-1, 0x1.c1cdf404e5796p-25), +    (double2)(0x1.0be72e0000000p-1, 0x1.094aa0ada625ep-27), +    (double2)(0x1.109f380000000p-1, 0x1.e2d4c96fde3ecp-25), +    (double2)(0x1.154c3c0000000p-1, 0x1.2f4d5e9a98f34p-25), +    (double2)(0x1.19ee6a0000000p-1, 0x1.467c96ecc5cbep-25), +    (double2)(0x1.1e85f40000000p-1, 0x1.e7040d03dec5ap-25), +    (double2)(0x1.23130c0000000p-1, 0x1.7bebf4282de36p-25), +    (double2)(0x1.2795e00000000p-1, 0x1.289b11aeb783fp-25), +    (double2)(0x1.2c0e9e0000000p-1, 0x1.a891d1772f538p-26), +    (double2)(0x1.307d720000000p-1, 0x1.34f10be1fb591p-25), +    (double2)(0x1.34e2880000000p-1, 0x1.d9ce1d316eb93p-25), +    (double2)(0x1.393e0c0000000p-1, 0x1.3562a19a9c442p-25), +    (double2)(0x1.3d90260000000p-1, 0x1.4e2adf548084cp-26), +    (double2)(0x1.41d8fe0000000p-1, 0x1.08ce55cc8c97ap-26), +    (double2)(0x1.4618bc0000000p-1, 0x1.0e2f613e85bdap-28), +    (double2)(0x1.4a4f840000000p-1, 0x1.db03ebb0227bfp-25), +    (double2)(0x1.4e7d800000000p-1, 0x1.1b75bb09cb098p-25), +    (double2)(0x1.52a2d20000000p-1, 0x1.96f16abb9df22p-27), +    (double2)(0x1.56bf9c0000000p-1, 0x1.5b3f399411c62p-25), +    (double2)(0x1.5ad4040000000p-1, 0x1.86b3e59f65355p-26), +    (double2)(0x1.5ee02a0000000p-1, 0x1.2482ceae1ac12p-26), +    (double2)(0x1.62e42e0000000p-1, 0x1.efa39ef35793cp-25), +}; + +TABLE_FUNCTION(double2, LN_TBL, ln_tbl); + +#endif // cl_khr_fp64 diff --git a/libclc/generic/lib/math/tables.h b/libclc/generic/lib/math/tables.h new file mode 100644 index 00000000000..925544064a5 --- /dev/null +++ b/libclc/generic/lib/math/tables.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#define TABLE_SPACE __constant + +#define TABLE_MANGLE(NAME) __clc_##NAME + +#define DECLARE_TABLE(TYPE,NAME,LENGTH) \ +    TABLE_SPACE TYPE NAME [ LENGTH ] + +#define TABLE_FUNCTION(TYPE,TABLE,NAME) \ +    TYPE TABLE_MANGLE(NAME)(size_t idx) { \ +        return TABLE[idx]; \ +    } + +#define TABLE_FUNCTION_DECL(TYPE, NAME) \ +    TYPE TABLE_MANGLE(NAME)(size_t idx); + +#define USE_TABLE(NAME, IDX) \ +    TABLE_MANGLE(NAME)(IDX) + +TABLE_FUNCTION_DECL(float2, loge_tbl); +TABLE_FUNCTION_DECL(float, log_inv_tbl); + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +TABLE_FUNCTION_DECL(double2, ln_tbl); + +#endif // cl_khr_fp64 | 

