diff options
Diffstat (limited to 'libclc/generic/lib/math')
| -rw-r--r-- | libclc/generic/lib/math/clc_sw_unary.inc | 9 | ||||
| -rw-r--r-- | libclc/generic/lib/math/clc_tan.cl | 71 | ||||
| -rw-r--r-- | libclc/generic/lib/math/sincosD_piby4.h | 55 | ||||
| -rw-r--r-- | libclc/generic/lib/math/sincos_helpers.cl | 17 | ||||
| -rw-r--r-- | libclc/generic/lib/math/sincos_helpers.h | 3 | ||||
| -rw-r--r-- | libclc/generic/lib/math/tan.cl | 7 | ||||
| -rw-r--r-- | libclc/generic/lib/math/tan.inc | 17 | 
7 files changed, 158 insertions, 21 deletions
| diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc new file mode 100644 index 00000000000..0f8467c0a1c --- /dev/null +++ b/libclc/generic/lib/math/clc_sw_unary.inc @@ -0,0 +1,9 @@ +#include <utils.h> + +#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { +  return __CLC_SW_FUNC(__CLC_FUNC)(x); +} + +#undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl new file mode 100644 index 00000000000..ebba36a0d25 --- /dev/null +++ b/libclc/generic/lib/math/clc_tan.cl @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <clc/clc.h> + +#include "math.h" +#include "sincos_helpers.h" +#include "../clcmacro.h" +#include "tables.h" + +_CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) +{ +    int ix = as_int(x); +    int ax = ix & 0x7fffffff; +    float dx = as_float(ax); + +    float r0, r1; +    int regn = __clc_argReductionS(&r0, &r1, dx); + +    float t = __clc_tanf_piby4(r0 + r1, regn); +    t = as_float(as_int(t) ^ (ix ^ ax)); + +    t = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : t; +    //Take care of subnormals +    t = (x == 0.0f) ? x : t; +    return t; +} +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float); + +#ifdef cl_khr_fp64 +#include "sincosD_piby4.h" + +_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) +{ +    double y = fabs(x); + +    double r, rr; +    int regn; + +    if (y < 0x1.0p+30) +        __clc_remainder_piby2_medium(y, &r, &rr, ®n); +    else +        __clc_remainder_piby2_large(y, &r, &rr, ®n); + +    double2 tt = __clc_tan_piby4(r, rr); + +    int2 t = as_int2(regn & 1 ? tt.y : tt.x); +    t.hi ^= (x < 0.0) << 31; + +    return isnan(x) || isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(t); +} +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tan, double); +#endif diff --git a/libclc/generic/lib/math/sincosD_piby4.h b/libclc/generic/lib/math/sincosD_piby4.h index a00db854891..c98488b33ed 100644 --- a/libclc/generic/lib/math/sincosD_piby4.h +++ b/libclc/generic/lib/math/sincosD_piby4.h @@ -76,3 +76,58 @@ __libclc__sincos_piby4(double x, double xx)      return ret;  } + +_CLC_INLINE double2 +__clc_tan_piby4(double x, double xx) +{ +    const double piby4_lead = 7.85398163397448278999e-01; // 0x3fe921fb54442d18 +    const double piby4_tail = 3.06161699786838240164e-17; // 0x3c81a62633145c06 + +    // In order to maintain relative precision transform using the identity: +    // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. +    // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. + +    int ca = x >  0.68; +    int cb = x < -0.68; +    double transform = ca ?  1.0 : 0.0; +    transform = cb ? -1.0 : transform; + +    double tx = fma(-transform, x, piby4_lead) + fma(-transform, xx, piby4_tail); +    int c = ca | cb; +    x = c ? tx : x; +    xx = c ? 0.0 : xx; + +    // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68]. +    double t1 = x; +    double r = fma(2.0, x*xx, x*x); + +    double a = fma(r, +                   fma(r, 0.224044448537022097264602535574e-3, -0.229345080057565662883358588111e-1), +                   0.372379159759792203640806338901e0); + +    double b = fma(r, +                   fma(r, +                       fma(r, -0.232371494088563558304549252913e-3, 0.260656620398645407524064091208e-1), +                       -0.515658515729031149329237816945e0), +                   0.111713747927937668539901657944e1); + +    double t2 = fma(MATH_DIVIDE(a, b), x*r, xx); + +    double tp = t1 + t2; + +    // Compute -1.0/(t1 + t2) accurately +    double z1 = as_double(as_long(tp) & 0xffffffff00000000L); +    double z2 = t2 - (z1 - t1); +    double trec = -MATH_RECIP(tp); +    double trec_top = as_double(as_long(trec) & 0xffffffff00000000L); + +    double tpr = fma(fma(trec_top, z2, fma(trec_top, z1, 1.0)), trec, trec_top); + +    double tpt = transform * (1.0 - MATH_DIVIDE(2.0*tp, 1.0 + tp)); +    double tptr = transform * (MATH_DIVIDE(2.0*tp, tp - 1.0) - 1.0); + +    double2 ret; +    ret.lo = c ? tpt : tp; +    ret.hi = c ? tptr : tpr; +    return ret; +} diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl index 251b7f94efd..3c466bcf9f8 100644 --- a/libclc/generic/lib/math/sincos_helpers.cl +++ b/libclc/generic/lib/math/sincos_helpers.cl @@ -90,6 +90,23 @@ _CLC_DEF float __clc_cosf_piby4(float x, float y) {      return ret;  } +_CLC_DEF float __clc_tanf_piby4(float x, int regn) +{ +    // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4]. +    float r = x * x; + +    float a = mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f); + +    float b = mad(r, +	          mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f), +	          1.15588821434688393452299f); + +    float t = mad(x*r, native_divide(a, b), x); +    float tr = -MATH_RECIP(t); + +    return regn & 1 ? tr : t; +} +  _CLC_DEF void __clc_fullMulS(float *hi, float *lo, float a, float b, float bh, float bt)  {      if (HAVE_HW_FMA32()) { diff --git a/libclc/generic/lib/math/sincos_helpers.h b/libclc/generic/lib/math/sincos_helpers.h index 2565d44eb02..e307abc48b2 100644 --- a/libclc/generic/lib/math/sincos_helpers.h +++ b/libclc/generic/lib/math/sincos_helpers.h @@ -20,8 +20,11 @@   * THE SOFTWARE.   */ +#include "clc/clcfunc.h" +  _CLC_DECL float __clc_sinf_piby4(float x, float y);  _CLC_DECL float __clc_cosf_piby4(float x, float y); +_CLC_DECL float __clc_tanf_piby4(float x, int y);  _CLC_DECL int __clc_argReductionS(float *r, float *rr, float x);  #ifdef cl_khr_fp64 diff --git a/libclc/generic/lib/math/tan.cl b/libclc/generic/lib/math/tan.cl index a447999ea8b..380db67e364 100644 --- a/libclc/generic/lib/math/tan.cl +++ b/libclc/generic/lib/math/tan.cl @@ -1,8 +1,7 @@  #include <clc/clc.h> -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -#endif +#include <math/clc_tan.h> -#define __CLC_BODY <tan.inc> +#define __CLC_FUNC tan +#define __CLC_BODY <clc_sw_unary.inc>  #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/tan.inc b/libclc/generic/lib/math/tan.inc deleted file mode 100644 index b9ce33ef823..00000000000 --- a/libclc/generic/lib/math/tan.inc +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Note: tan(x) = sin(x)/cos(x) also, but the final assembly ends up being - *       twice as long for R600 (maybe for others as well). - */ - -#if __CLC_FPSIZE == 32 -#define __CLC_CONST(x) x ## f -#else -#define __CLC_CONST(x) x -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE tan(__CLC_GENTYPE x) { -  __CLC_GENTYPE sinx = sin(x); -  return sinx / sqrt( (__CLC_GENTYPE) __CLC_CONST(1.0) - (sinx*sinx) ); -} - -#undef __CLC_CONST | 

