diff options
Diffstat (limited to 'libclc/generic/lib/math')
-rw-r--r-- | libclc/generic/lib/math/log2.cl | 37 | ||||
-rw-r--r-- | libclc/generic/lib/math/log_base.h | 299 | ||||
-rw-r--r-- | libclc/generic/lib/math/tables.cl | 132 | ||||
-rw-r--r-- | libclc/generic/lib/math/tables.h | 1 |
4 files changed, 469 insertions, 0 deletions
diff --git a/libclc/generic/lib/math/log2.cl b/libclc/generic/lib/math/log2.cl new file mode 100644 index 00000000000..df6eeb28f6f --- /dev/null +++ b/libclc/generic/lib/math/log2.cl @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <clc/clc.h> +#include "../clcmacro.h" +#include "tables.h" + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif // cl_khr_fp64 + +#define COMPILING_LOG2 +#include "log_base.h" +#undef COMPILING_LOG2 + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log2, float); + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double); diff --git a/libclc/generic/lib/math/log_base.h b/libclc/generic/lib/math/log_base.h new file mode 100644 index 00000000000..bf2f82b7302 --- /dev/null +++ b/libclc/generic/lib/math/log_base.h @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "math.h" + +/* + Algorithm: + + Based on: + Ping-Tak Peter Tang + "Table-driven implementation of the logarithm function in IEEE + floating-point arithmetic" + ACM Transactions on Mathematical Software (TOMS) + Volume 16, Issue 4 (December 1990) + + + x very close to 1.0 is handled differently, for x everywhere else + a brief explanation is given below + + x = (2^m)*A + x = (2^m)*(G+g) with (1 <= G < 2) and (g <= 2^(-8)) + x = (2^m)*2*(G/2+g/2) + x = (2^m)*2*(F+f) with (0.5 <= F < 1) and (f <= 2^(-9)) + + Y = (2^(-1))*(2^(-m))*(2^m)*A + Now, range of Y is: 0.5 <= Y < 1 + + F = 0x80 + (first 7 mantissa bits) + (8th mantissa bit) + Now, range of F is: 128 <= F <= 256 + F = F / 256 + Now, range of F is: 0.5 <= F <= 1 + + f = -(Y-F), with (f <= 2^(-9)) + + log(x) = m*log(2) + log(2) + log(F-f) + log(x) = m*log(2) + log(2) + log(F) + log(1-(f/F)) + log(x) = m*log(2) + log(2*F) + log(1-r) + + r = (f/F), with (r <= 2^(-8)) + r = f*(1/F) with (1/F) precomputed to avoid division + + log(x) = m*log(2) + log(G) - poly + + log(G) is precomputed + poly = (r + (r^2)/2 + (r^3)/3 + (r^4)/4) + (r^5)/5)) + + log(2) and log(G) need to be maintained in extra precision + to avoid losing precision in the calculations + + + For x close to 1.0, we employ the following technique to + ensure faster convergence. + + log(x) = log((1+s)/(1-s)) = 2*s + (2/3)*s^3 + (2/5)*s^5 + (2/7)*s^7 + x = ((1+s)/(1-s)) + x = 1 + r + s = r/(2+r) + +*/ + +_CLC_OVERLOAD _CLC_DEF float +#if defined(COMPILING_LOG2) +log2(float x) +#elif defined(COMPILING_LOG10) +log10(float x) +#else +log(float x) +#endif +{ + +#if defined(COMPILING_LOG2) + const float LOG2E = 0x1.715476p+0f; // 1.4426950408889634 + const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375 + const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072 +#elif defined(COMPILING_LOG10) + USE_TABLE(float2, p_log, LOG10_TBL); + const float LOG10E = 0x1.bcb7b2p-2f; // 0.43429448190325182 + const float LOG10E_HEAD = 0x1.bc0000p-2f; // 0.43359375 + const float LOG10E_TAIL = 0x1.6f62a4p-11f; // 0.0007007319 + const float LOG10_2_HEAD = 0x1.340000p-2f; // 0.30078125 + const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637 +#else + USE_TABLE(float2, p_log, LOGE_TBL); + const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234 + const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833 +#endif + + uint xi = as_uint(x); + uint ax = xi & EXSIGNBIT_SP32; + + // Calculations for |x-1| < 2^-4 + float r = x - 1.0f; + int near1 = fabs(r) < 0x1.0p-4f; + float u2 = MATH_DIVIDE(r, 2.0f + r); + float corr = u2 * r; + float u = u2 + u2; + float v = u * u; + float znear1, z1, z2; + + // 2/(5 * 2^5), 2/(3 * 2^3) + z2 = mad(u, mad(v, 0x1.99999ap-7f, 0x1.555556p-4f)*v, -corr); + +#if defined(COMPILING_LOG2) + z1 = as_float(as_int(r) & 0xffff0000); + z2 = z2 + (r - z1); + znear1 = mad(z1, LOG2E_HEAD, mad(z2, LOG2E_HEAD, mad(z1, LOG2E_TAIL, z2*LOG2E_TAIL))); +#elif defined(COMPILING_LOG10) + z1 = as_float(as_int(r) & 0xffff0000); + z2 = z2 + (r - z1); + znear1 = mad(z1, LOG10E_HEAD, mad(z2, LOG10E_HEAD, mad(z1, LOG10E_TAIL, z2*LOG10E_TAIL))); +#else + znear1 = z2 + r; +#endif + + // Calculations for x not near 1 + int m = (int)(xi >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; + + // Normalize subnormal + uint xis = as_uint(as_float(xi | 0x3f800000) - 1.0f); + int ms = (int)(xis >> EXPSHIFTBITS_SP32) - 253; + int c = m == -127; + m = c ? ms : m; + uint xin = c ? xis : xi; + + float mf = (float)m; + uint indx = (xin & 0x007f0000) + ((xin & 0x00008000) << 1); + + // F - Y + float f = as_float(0x3f000000 | indx) - as_float(0x3f000000 | (xin & MANTBITS_SP32)); + + indx = indx >> 16; + r = f * USE_TABLE(log_inv_tbl, indx); + + // 1/3, 1/2 + float poly = mad(mad(r, 0x1.555556p-2f, 0.5f), r*r, r); + +#if defined(COMPILING_LOG2) + float2 tv = USE_TABLE(log2_tbl, indx); + z1 = tv.s0 + mf; + z2 = mad(poly, -LOG2E, tv.s1); +#elif defined(COMPILING_LOG10) + float2 tv = p_log[indx]; + z1 = mad(mf, LOG10_2_HEAD, tv.s0); + z2 = mad(poly, -LOG10E, mf*LOG10_2_TAIL) + tv.s1; +#else + float2 tv = p_log[indx]; + z1 = mad(mf, LOG2_HEAD, tv.s0); + z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1; +#endif + + float z = z1 + z2; + z = near1 ? znear1 : z; + + // Corner cases + z = ax >= PINFBITPATT_SP32 ? x : z; + z = xi != ax ? as_float(QNANBITPATT_SP32) : z; + z = ax == 0 ? as_float(NINFBITPATT_SP32) : z; + + return z; +} + +#ifdef cl_khr_fp64 + +_CLC_OVERLOAD _CLC_DEF double +#if defined(COMPILING_LOG2) +log2(double x) +#elif defined(COMPILING_LOG10) +log10(double x) +#else +log(double x) +#endif +{ + +#ifndef COMPILING_LOG2 + // log2_lead and log2_tail sum to an extra-precise version of ln(2) + const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */ + const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */ +#endif + +#if defined(COMPILING_LOG10) + // log10e_lead and log10e_tail sum to an extra-precision version of log10(e) (19 bits in lead) + const double log10e_lead = 4.34293746948242187500e-01; /* 0x3fdbcb7800000000 */ + const double log10e_tail = 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */ +#elif defined(COMPILING_LOG2) + // log2e_lead and log2e_tail sum to an extra-precision version of log2(e) (19 bits in lead) + const double log2e_lead = 1.44269180297851562500E+00; /* 0x3FF7154400000000 */ + const double log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */ +#endif + + // log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000 + // log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 + const double log_thresh1 = 0x1.e0faap-1; + const double log_thresh2 = 0x1.1082cp+0; + + int is_near = x >= log_thresh1 & x <= log_thresh2; + + // Near 1 code + double r = x - 1.0; + double u = r / (2.0 + r); + double correction = r * u; + u = u + u; + double v = u * u; + double r1 = r; + + const double ca_1 = 8.33333333333317923934e-02; /* 0x3fb55555555554e6 */ + const double ca_2 = 1.25000000037717509602e-02; /* 0x3f89999999bac6d4 */ + const double ca_3 = 2.23213998791944806202e-03; /* 0x3f62492307f1519f */ + const double ca_4 = 4.34887777707614552256e-04; /* 0x3f3c8034c85dfff0 */ + + double r2 = fma(u*v, fma(v, fma(v, fma(v, ca_4, ca_3), ca_2), ca_1), -correction); + +#if defined(COMPILING_LOG10) + r = r1; + r1 = as_double(as_ulong(r1) & 0xffffffff00000000); + r2 = r2 + (r - r1); + double ret_near = fma(log10e_lead, r1, fma(log10e_lead, r2, fma(log10e_tail, r1, log10e_tail * r2))); +#elif defined(COMPILING_LOG2) + r = r1; + r1 = as_double(as_ulong(r1) & 0xffffffff00000000); + r2 = r2 + (r - r1); + double ret_near = fma(log2e_lead, r1, fma(log2e_lead, r2, fma(log2e_tail, r1, log2e_tail*r2))); +#else + double ret_near = r1 + r2; +#endif + + // This is the far from 1 code + + // Deal with subnormal + ulong ux = as_ulong(x); + ulong uxs = as_ulong(as_double(0x03d0000000000000UL | ux) - 0x1.0p-962); + int c = ux < IMPBIT_DP64; + ux = c ? uxs : ux; + int expadjust = c ? 60 : 0; + + int xexp = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64 - expadjust; + double f = as_double(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64)); + int index = as_int2(ux).hi >> 13; + index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1); + + double2 tv = USE_TABLE(ln_tbl, index - 64); + double z1 = tv.s0; + double q = tv.s1; + + double f1 = index * 0x1.0p-7; + double f2 = f - f1; + u = f2 / fma(f2, 0.5, f1); + v = u * u; + + const double cb_1 = 8.33333333333333593622e-02; /* 0x3fb5555555555557 */ + const double cb_2 = 1.24999999978138668903e-02; /* 0x3f89999999865ede */ + const double cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */ + + double poly = v * fma(v, fma(v, cb_3, cb_2), cb_1); + double z2 = q + fma(u, poly, u); + + double dxexp = (double)xexp; +#if defined (COMPILING_LOG10) + // Add xexp * log(2) to z1,z2 to get log(x) + r1 = fma(dxexp, log2_lead, z1); + r2 = fma(dxexp, log2_tail, z2); + double ret_far = fma(log10e_lead, r1, fma(log10e_lead, r2, fma(log10e_tail, r1, log10e_tail*r2))); +#elif defined(COMPILING_LOG2) + r1 = fma(log2e_lead, z1, dxexp); + r2 = fma(log2e_lead, z2, fma(log2e_tail, z1, log2e_tail*z2)); + double ret_far = r1 + r2; +#else + r1 = fma(dxexp, log2_lead, z1); + r2 = fma(dxexp, log2_tail, z2); + double ret_far = r1 + r2; +#endif + + double ret = is_near ? ret_near : ret_far; + + ret = isinf(x) ? as_double(PINFBITPATT_DP64) : ret; + ret = isnan(x) | (x < 0.0) ? as_double(QNANBITPATT_DP64) : ret; + ret = x == 0.0 ? as_double(NINFBITPATT_DP64) : ret; + return ret; +} + +#endif // cl_khr_fp64 diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl index 5a620ec6189..8286efb7518 100644 --- a/libclc/generic/lib/math/tables.cl +++ b/libclc/generic/lib/math/tables.cl @@ -288,6 +288,137 @@ DECLARE_TABLE(float, LOG_INV_TBL, 129) = { 0x1.000000p+0f, }; +DECLARE_TABLE(float2, LOG2_TBL, 129) = { + (float2)(0x0.000000p+0f, 0x0.000000p+0f), + (float2)(0x1.6f8000p-7f, 0x1.942dbap-17f), + (float2)(0x1.6e0000p-6f, 0x1.e5a170p-16f), + (float2)(0x1.118000p-5f, 0x1.347544p-15f), + (float2)(0x1.6b8000p-5f, 0x1.69bac6p-16f), + (float2)(0x1.c48000p-5f, 0x1.7eae42p-15f), + (float2)(0x1.0e8000p-4f, 0x1.9c4fd0p-15f), + (float2)(0x1.3a8000p-4f, 0x1.17ee92p-15f), + (float2)(0x1.660000p-4f, 0x1.fb7d64p-15f), + (float2)(0x1.918000p-4f, 0x1.42dc8cp-17f), + (float2)(0x1.bc8000p-4f, 0x1.0902b6p-18f), + (float2)(0x1.e70000p-4f, 0x1.7608bep-15f), + (float2)(0x1.088000p-3f, 0x1.162336p-13f), + (float2)(0x1.1d8000p-3f, 0x1.3465d4p-13f), + (float2)(0x1.328000p-3f, 0x1.74f13cp-14f), + (float2)(0x1.470000p-3f, 0x1.aa7e60p-13f), + (float2)(0x1.5c0000p-3f, 0x1.a39fbcp-19f), + (float2)(0x1.700000p-3f, 0x1.d0b53ap-13f), + (float2)(0x1.848000p-3f, 0x1.0af40ap-13f), + (float2)(0x1.988000p-3f, 0x1.b741dep-13f), + (float2)(0x1.ac8000p-3f, 0x1.d78b6cp-13f), + (float2)(0x1.c08000p-3f, 0x1.6db376p-13f), + (float2)(0x1.d48000p-3f, 0x1.ee4c32p-15f), + (float2)(0x1.e80000p-3f, 0x1.02f9d2p-13f), + (float2)(0x1.fb8000p-3f, 0x1.05ae40p-13f), + (float2)(0x1.078000p-2f, 0x1.0adbb0p-14f), + (float2)(0x1.110000p-2f, 0x1.83ed68p-13f), + (float2)(0x1.1a8000p-2f, 0x1.016ca4p-12f), + (float2)(0x1.240000p-2f, 0x1.01eac2p-12f), + (float2)(0x1.2d8000p-2f, 0x1.887e26p-13f), + (float2)(0x1.370000p-2f, 0x1.24cea4p-14f), + (float2)(0x1.400000p-2f, 0x1.918ec6p-12f), + (float2)(0x1.498000p-2f, 0x1.3c25e6p-13f), + (float2)(0x1.528000p-2f, 0x1.6f7f12p-12f), + (float2)(0x1.5c0000p-2f, 0x1.a39fbcp-18f), + (float2)(0x1.650000p-2f, 0x1.8fe466p-14f), + (float2)(0x1.6e0000p-2f, 0x1.10e6cep-13f), + (float2)(0x1.770000p-2f, 0x1.d2ba7ep-14f), + (float2)(0x1.800000p-2f, 0x1.4ac62cp-15f), + (float2)(0x1.888000p-2f, 0x1.a71cb8p-12f), + (float2)(0x1.918000p-2f, 0x1.dd448ep-13f), + (float2)(0x1.9a8000p-2f, 0x1.1c8f10p-21f), + (float2)(0x1.a30000p-2f, 0x1.bb053ep-13f), + (float2)(0x1.ab8000p-2f, 0x1.861e5ep-12f), + (float2)(0x1.b40000p-2f, 0x1.fafdcep-12f), + (float2)(0x1.bd0000p-2f, 0x1.e5d3cep-15f), + (float2)(0x1.c58000p-2f, 0x1.2fad28p-14f), + (float2)(0x1.ce0000p-2f, 0x1.492474p-15f), + (float2)(0x1.d60000p-2f, 0x1.d4f80cp-12f), + (float2)(0x1.de8000p-2f, 0x1.4ff510p-12f), + (float2)(0x1.e70000p-2f, 0x1.3550f2p-13f), + (float2)(0x1.ef0000p-2f, 0x1.b59ccap-12f), + (float2)(0x1.f78000p-2f, 0x1.42b464p-13f), + (float2)(0x1.ff8000p-2f, 0x1.5e66a0p-12f), + (float2)(0x1.038000p-1f, 0x1.f6a2e4p-11f), + (float2)(0x1.080000p-1f, 0x1.39e4fep-14f), + (float2)(0x1.0c0000p-1f, 0x1.0500d6p-13f), + (float2)(0x1.100000p-1f, 0x1.13b152p-13f), + (float2)(0x1.140000p-1f, 0x1.93f542p-14f), + (float2)(0x1.180000p-1f, 0x1.467b94p-16f), + (float2)(0x1.1b8000p-1f, 0x1.cc47a4p-11f), + (float2)(0x1.1f8000p-1f, 0x1.78f4c2p-11f), + (float2)(0x1.238000p-1f, 0x1.107508p-11f), + (float2)(0x1.278000p-1f, 0x1.2602c2p-12f), + (float2)(0x1.2b8000p-1f, 0x1.a39fbcp-20f), + (float2)(0x1.2f0000p-1f, 0x1.5a1d7ap-11f), + (float2)(0x1.330000p-1f, 0x1.3e355ap-12f), + (float2)(0x1.368000p-1f, 0x1.cffedap-11f), + (float2)(0x1.3a8000p-1f, 0x1.d9fd50p-12f), + (float2)(0x1.3e0000p-1f, 0x1.f64de6p-11f), + (float2)(0x1.420000p-1f, 0x1.d83f4cp-12f), + (float2)(0x1.458000p-1f, 0x1.cea628p-11f), + (float2)(0x1.498000p-1f, 0x1.3c25e6p-12f), + (float2)(0x1.4d0000p-1f, 0x1.5a96ccp-11f), + (float2)(0x1.510000p-1f, 0x1.18708ap-17f), + (float2)(0x1.548000p-1f, 0x1.374652p-12f), + (float2)(0x1.580000p-1f, 0x1.2089a6p-11f), + (float2)(0x1.5b8000p-1f, 0x1.93432cp-11f), + (float2)(0x1.5f0000p-1f, 0x1.f3fd06p-11f), + (float2)(0x1.630000p-1f, 0x1.0b8f54p-13f), + (float2)(0x1.668000p-1f, 0x1.004722p-12f), + (float2)(0x1.6a0000p-1f, 0x1.57cf2cp-12f), + (float2)(0x1.6d8000p-1f, 0x1.8cb53ap-12f), + (float2)(0x1.710000p-1f, 0x1.9f4d8ap-12f), + (float2)(0x1.748000p-1f, 0x1.8feb26p-12f), + (float2)(0x1.780000p-1f, 0x1.5edfeep-12f), + (float2)(0x1.7b8000p-1f, 0x1.0c7c9ap-12f), + (float2)(0x1.7f0000p-1f, 0x1.322182p-13f), + (float2)(0x1.828000p-1f, 0x1.3ab7cep-18f), + (float2)(0x1.858000p-1f, 0x1.a82c2cp-11f), + (float2)(0x1.890000p-1f, 0x1.3dd2c0p-11f), + (float2)(0x1.8c8000p-1f, 0x1.871da4p-12f), + (float2)(0x1.900000p-1f, 0x1.cc2c00p-14f), + (float2)(0x1.930000p-1f, 0x1.9fdb68p-11f), + (float2)(0x1.968000p-1f, 0x1.ed6956p-12f), + (float2)(0x1.9a0000p-1f, 0x1.f1a760p-14f), + (float2)(0x1.9d0000p-1f, 0x1.767f54p-11f), + (float2)(0x1.a08000p-1f, 0x1.3f6d26p-12f), + (float2)(0x1.a38000p-1f, 0x1.b9fce2p-11f), + (float2)(0x1.a70000p-1f, 0x1.8ae816p-12f), + (float2)(0x1.aa0000p-1f, 0x1.c23d60p-11f), + (float2)(0x1.ad8000p-1f, 0x1.60f388p-12f), + (float2)(0x1.b08000p-1f, 0x1.9049aep-11f), + (float2)(0x1.b40000p-1f, 0x1.8734a8p-13f), + (float2)(0x1.b70000p-1f, 0x1.2523d4p-11f), + (float2)(0x1.ba0000p-1f, 0x1.da6ce6p-11f), + (float2)(0x1.bd8000p-1f, 0x1.038e62p-12f), + (float2)(0x1.c08000p-1f, 0x1.1b511ep-11f), + (float2)(0x1.c38000p-1f, 0x1.a728b8p-11f), + (float2)(0x1.c70000p-1f, 0x1.2b5d22p-14f), + (float2)(0x1.ca0000p-1f, 0x1.2c6e54p-12f), + (float2)(0x1.cd0000p-1f, 0x1.f35064p-12f), + (float2)(0x1.d00000p-1f, 0x1.4fdb48p-11f), + (float2)(0x1.d30000p-1f, 0x1.98ec9ep-11f), + (float2)(0x1.d60000p-1f, 0x1.d4f80cp-11f), + (float2)(0x1.d98000p-1f, 0x1.0643d6p-17f), + (float2)(0x1.dc8000p-1f, 0x1.33567ep-14f), + (float2)(0x1.df8000p-1f, 0x1.e0410cp-14f), + (float2)(0x1.e28000p-1f, 0x1.142e0ep-13f), + (float2)(0x1.e58000p-1f, 0x1.063c88p-13f), + (float2)(0x1.e88000p-1f, 0x1.8d66c4p-14f), + (float2)(0x1.eb8000p-1f, 0x1.57e32ap-15f), + (float2)(0x1.ee0000p-1f, 0x1.ed1c6cp-11f), + (float2)(0x1.f10000p-1f, 0x1.b8a076p-11f), + (float2)(0x1.f40000p-1f, 0x1.7822f2p-11f), + (float2)(0x1.f70000p-1f, 0x1.2bbc3ap-11f), + (float2)(0x1.fa0000p-1f, 0x1.a708bap-12f), + (float2)(0x1.fd0000p-1f, 0x1.be4c7ep-13f), + (float2)(0x1.000000p+0f, 0x0.000000p+0f) +}; DECLARE_TABLE(uchar, PIBITS_TBL, ) = { 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175, @@ -306,6 +437,7 @@ DECLARE_TABLE(uchar, PIBITS_TBL, ) = { TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl); TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl); +TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl); uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) { return *(__constant uint4 *)(PIBITS_TBL + idx); diff --git a/libclc/generic/lib/math/tables.h b/libclc/generic/lib/math/tables.h index 55ff8537faf..1348fe162c1 100644 --- a/libclc/generic/lib/math/tables.h +++ b/libclc/generic/lib/math/tables.h @@ -40,6 +40,7 @@ TABLE_FUNCTION_DECL(float2, loge_tbl); TABLE_FUNCTION_DECL(float, log_inv_tbl); +TABLE_FUNCTION_DECL(float2, log2_tbl); TABLE_FUNCTION_DECL(uint4, pibits_tbl); #ifdef cl_khr_fp64 |