diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2018-01-19 18:57:19 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2018-01-19 18:57:19 +0000 |
| commit | 03937bdec33db12afe8ce7ee862fade2f2ed4f04 (patch) | |
| tree | e914c5c54057f5af334114805cf1f08ea00841d2 | |
| parent | 30e1bbc10672808320876797afc45a0d8f2767bf (diff) | |
| download | bcm5719-llvm-03937bdec33db12afe8ce7ee862fade2f2ed4f04.tar.gz bcm5719-llvm-03937bdec33db12afe8ce7ee862fade2f2ed4f04.zip | |
tan: Port from amd_builtins
v2: fixup constant precision
Passes piglit on turks and carrizo.
Passes CTS on carrizo
Fixes half_tan to pass CTS on carrizo
Acked-By: Aaron Watry <awatry@gmail.com>
Tested-By: Aaron Watry <awatry@gmail.com>
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
llvm-svn: 322979
| -rw-r--r-- | libclc/generic/include/math/clc_tan.h | 5 | ||||
| -rw-r--r-- | libclc/generic/lib/SOURCES | 1 | ||||
| -rw-r--r-- | libclc/generic/lib/math/clc_sw_unary.inc | 9 | ||||
| -rw-r--r-- | libclc/generic/lib/math/clc_tan.cl | 71 | ||||
| -rw-r--r-- | libclc/generic/lib/math/sincosD_piby4.h | 55 | ||||
| -rw-r--r-- | libclc/generic/lib/math/sincos_helpers.cl | 17 | ||||
| -rw-r--r-- | libclc/generic/lib/math/sincos_helpers.h | 3 | ||||
| -rw-r--r-- | libclc/generic/lib/math/tan.cl | 7 | ||||
| -rw-r--r-- | libclc/generic/lib/math/tan.inc | 17 |
9 files changed, 164 insertions, 21 deletions
diff --git a/libclc/generic/include/math/clc_tan.h b/libclc/generic/include/math/clc_tan.h new file mode 100644 index 00000000000..bc97902965d --- /dev/null +++ b/libclc/generic/include/math/clc_tan.h @@ -0,0 +1,5 @@ +#define __CLC_FUNCTION __clc_tan +#define __CLC_BODY <clc/math/unary_decl.inc> +#include <clc/math/gentype.inc> +#undef __CLC_BODY +#undef __CLC_FUNCTION diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 9dbd35e4f76..4546a9ef610 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -164,6 +164,7 @@ math/sinh.cl math/sinpi.cl math/clc_sqrt.cl math/sqrt.cl +math/clc_tan.cl math/tan.cl math/tanh.cl math/tgamma.cl diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc new file mode 100644 index 00000000000..0f8467c0a1c --- /dev/null +++ b/libclc/generic/lib/math/clc_sw_unary.inc @@ -0,0 +1,9 @@ +#include <utils.h> + +#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { + return __CLC_SW_FUNC(__CLC_FUNC)(x); +} + +#undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl new file mode 100644 index 00000000000..ebba36a0d25 --- /dev/null +++ b/libclc/generic/lib/math/clc_tan.cl @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <clc/clc.h> + +#include "math.h" +#include "sincos_helpers.h" +#include "../clcmacro.h" +#include "tables.h" + +_CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) +{ + int ix = as_int(x); + int ax = ix & 0x7fffffff; + float dx = as_float(ax); + + float r0, r1; + int regn = __clc_argReductionS(&r0, &r1, dx); + + float t = __clc_tanf_piby4(r0 + r1, regn); + t = as_float(as_int(t) ^ (ix ^ ax)); + + t = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : t; + //Take care of subnormals + t = (x == 0.0f) ? x : t; + return t; +} +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float); + +#ifdef cl_khr_fp64 +#include "sincosD_piby4.h" + +_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) +{ + double y = fabs(x); + + double r, rr; + int regn; + + if (y < 0x1.0p+30) + __clc_remainder_piby2_medium(y, &r, &rr, ®n); + else + __clc_remainder_piby2_large(y, &r, &rr, ®n); + + double2 tt = __clc_tan_piby4(r, rr); + + int2 t = as_int2(regn & 1 ? tt.y : tt.x); + t.hi ^= (x < 0.0) << 31; + + return isnan(x) || isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(t); +} +_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tan, double); +#endif diff --git a/libclc/generic/lib/math/sincosD_piby4.h b/libclc/generic/lib/math/sincosD_piby4.h index a00db854891..c98488b33ed 100644 --- a/libclc/generic/lib/math/sincosD_piby4.h +++ b/libclc/generic/lib/math/sincosD_piby4.h @@ -76,3 +76,58 @@ __libclc__sincos_piby4(double x, double xx) return ret; } + +_CLC_INLINE double2 +__clc_tan_piby4(double x, double xx) +{ + const double piby4_lead = 7.85398163397448278999e-01; // 0x3fe921fb54442d18 + const double piby4_tail = 3.06161699786838240164e-17; // 0x3c81a62633145c06 + + // In order to maintain relative precision transform using the identity: + // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. + // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. + + int ca = x > 0.68; + int cb = x < -0.68; + double transform = ca ? 1.0 : 0.0; + transform = cb ? -1.0 : transform; + + double tx = fma(-transform, x, piby4_lead) + fma(-transform, xx, piby4_tail); + int c = ca | cb; + x = c ? tx : x; + xx = c ? 0.0 : xx; + + // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68]. + double t1 = x; + double r = fma(2.0, x*xx, x*x); + + double a = fma(r, + fma(r, 0.224044448537022097264602535574e-3, -0.229345080057565662883358588111e-1), + 0.372379159759792203640806338901e0); + + double b = fma(r, + fma(r, + fma(r, -0.232371494088563558304549252913e-3, 0.260656620398645407524064091208e-1), + -0.515658515729031149329237816945e0), + 0.111713747927937668539901657944e1); + + double t2 = fma(MATH_DIVIDE(a, b), x*r, xx); + + double tp = t1 + t2; + + // Compute -1.0/(t1 + t2) accurately + double z1 = as_double(as_long(tp) & 0xffffffff00000000L); + double z2 = t2 - (z1 - t1); + double trec = -MATH_RECIP(tp); + double trec_top = as_double(as_long(trec) & 0xffffffff00000000L); + + double tpr = fma(fma(trec_top, z2, fma(trec_top, z1, 1.0)), trec, trec_top); + + double tpt = transform * (1.0 - MATH_DIVIDE(2.0*tp, 1.0 + tp)); + double tptr = transform * (MATH_DIVIDE(2.0*tp, tp - 1.0) - 1.0); + + double2 ret; + ret.lo = c ? tpt : tp; + ret.hi = c ? tptr : tpr; + return ret; +} diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl index 251b7f94efd..3c466bcf9f8 100644 --- a/libclc/generic/lib/math/sincos_helpers.cl +++ b/libclc/generic/lib/math/sincos_helpers.cl @@ -90,6 +90,23 @@ _CLC_DEF float __clc_cosf_piby4(float x, float y) { return ret; } +_CLC_DEF float __clc_tanf_piby4(float x, int regn) +{ + // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4]. + float r = x * x; + + float a = mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f); + + float b = mad(r, + mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f), + 1.15588821434688393452299f); + + float t = mad(x*r, native_divide(a, b), x); + float tr = -MATH_RECIP(t); + + return regn & 1 ? tr : t; +} + _CLC_DEF void __clc_fullMulS(float *hi, float *lo, float a, float b, float bh, float bt) { if (HAVE_HW_FMA32()) { diff --git a/libclc/generic/lib/math/sincos_helpers.h b/libclc/generic/lib/math/sincos_helpers.h index 2565d44eb02..e307abc48b2 100644 --- a/libclc/generic/lib/math/sincos_helpers.h +++ b/libclc/generic/lib/math/sincos_helpers.h @@ -20,8 +20,11 @@ * THE SOFTWARE. */ +#include "clc/clcfunc.h" + _CLC_DECL float __clc_sinf_piby4(float x, float y); _CLC_DECL float __clc_cosf_piby4(float x, float y); +_CLC_DECL float __clc_tanf_piby4(float x, int y); _CLC_DECL int __clc_argReductionS(float *r, float *rr, float x); #ifdef cl_khr_fp64 diff --git a/libclc/generic/lib/math/tan.cl b/libclc/generic/lib/math/tan.cl index a447999ea8b..380db67e364 100644 --- a/libclc/generic/lib/math/tan.cl +++ b/libclc/generic/lib/math/tan.cl @@ -1,8 +1,7 @@ #include <clc/clc.h> -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -#endif +#include <math/clc_tan.h> -#define __CLC_BODY <tan.inc> +#define __CLC_FUNC tan +#define __CLC_BODY <clc_sw_unary.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/tan.inc b/libclc/generic/lib/math/tan.inc deleted file mode 100644 index b9ce33ef823..00000000000 --- a/libclc/generic/lib/math/tan.inc +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Note: tan(x) = sin(x)/cos(x) also, but the final assembly ends up being - * twice as long for R600 (maybe for others as well). - */ - -#if __CLC_FPSIZE == 32 -#define __CLC_CONST(x) x ## f -#else -#define __CLC_CONST(x) x -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE tan(__CLC_GENTYPE x) { - __CLC_GENTYPE sinx = sin(x); - return sinx / sqrt( (__CLC_GENTYPE) __CLC_CONST(1.0) - (sinx*sinx) ); -} - -#undef __CLC_CONST |

