From b03917e7efea6e2c6f3e17cafc00d3f65eeae075 Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Mon, 23 Mar 2020 21:53:15 +0000 Subject: [PATCH] add function sncs1cs to get accurate 1-cosine Currently in three places in the code there is an expression like: 1-cosine(subexpr). The problem with that is that it sacrifices quite a bit of precision the floating point type in use could provide. Because of the subtraction, and the fact that floating point is MUCH denser near zero than near one, 1-cosine(x) values that should be close to zero will just be truncated to exactly zero. The added function sncs1cs is not exported. The changes apply only if the user has not specified a custom mfloat_t. This improves mat3_rotation_axis, mat4_rotation_axis and sine_ease_in_out. A side improvement in the former two is that the computation of sine, cosine and 1-cosine should be cheaper when computing them all at the same time than when calling a separate function for each of the three (because the majority of the code is shared between the three). I checked that the improvements do materialize and, more importantly, at the same time searched for big regressions: https://github.com/nsajko/numericcompfricas (see the README.md) Basically, the results are that there are no significant differences for sin and cos (I think the accuracy only varies by one bit), but there are big improvements for 1-cos (AKA omc AKA 1cs). The repository contains the results of the check against glibc and against musl. FriCAS (for the check) can be found here: https://github.com/fricas/fricas --- mathc.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/mathc.c b/mathc.c index 888d9f3..ef92afe 100644 --- a/mathc.c +++ b/mathc.c @@ -20,6 +20,140 @@ the following restrictions: #include "mathc.h" +#if !defined(MATHC_FLOATING_POINT_TYPE) && defined(MATHC_USE_FLOATING_POINT) +/* The following code (sincos1cos and float and double versions of + * sncs1cs) is Copyright © 1985, 1995, 2000 Stephen L. Moshier and + * Copyright © 2020 Neven Sajko. The intention is to get accurate + * 1-cosine, while also getting the sine and cosine as a bonus. The + * implementation is derived from the Cephes Math Library's sin.c and + * sinf.c. To be more specific, I took Stephen Moshier's sin, cos, sinf + * and cosf (without changing the polynomials) and adapted them to give + * all three required function values (in double and float versions), + * without unnecessary accuracy losses. + * + * sncs1cs is not correct for values of x of huge magnitude. That can + * be fixed by more elaborate range reduction. + */ + +typedef struct { + /* Sine, cosine, 1-cosine */ + mfloat_t sin, cos, omc; +} sincos1cos; + +static +sincos1cos sncs1cs(mfloat_t x) +{ + const mfloat_t fourOverPi = 1.27323954473516268615; + + mfloat_t y, z, zz; + mint_t j, sign = 1, csign = 1; + sincos1cos r; + + /* Handle +-0. */ + if (x == (mfloat_t)0) { + r.sin = x; + r.cos = 1; + r.omc = 0; + return r; + } + if (isnan(x)) { + r.sin = r.cos = r.omc = x; + return r; + } + if (isinf(x)) { + r.sin = r.cos = r.omc = x - x; + return r; + } + if (x < 0) { + sign = -1; + x = -x; + } + j = (mint_t)(x * fourOverPi); + y = (mfloat_t)j; + /* map zeros to origin */ + if ((j & 1)) { + j += 1; + y += 1; + } + j = j & 7; /* octant modulo one turn */ + /* reflect in x axis */ + if (j > 3) { + sign = -sign; + csign = -csign; + j -= 4; + } + if (j > 1) { + csign = -csign; + } + +# if defined(MATHC_USE_SINGLE_FLOATING_POINT) + const float sc[] = {-1.9515295891E-4, 8.3321608736E-3, -1.6666654611E-1}; + + const float cc[] = {2.443315711809948E-5, -1.388731625493765E-3, 4.166664568298827E-2}; + + /* These are for a 24-bit significand: */ + const float DP1 = 0.78515625; + const float DP2 = 2.4187564849853515625e-4; + const float DP3 = 3.77489497744594108e-8; + + /* Extended precision modular arithmetic */ + z = ((x - y * DP1) - y * DP2) - y * DP3; + zz = z * z; + r.sin = z + zz*z*((sc[0]*zz + sc[1])*zz + sc[2]); + r.omc = (mfloat_t)0.5*zz - zz*zz*((cc[0]*zz + cc[1])*zz + cc[2]); +# elif defined(MATHC_USE_DOUBLE_FLOATING_POINT) + const double sc[] = { + 1.58962301576546568060E-10, + -2.50507477628578072866E-8, + 2.75573136213857245213E-6, + -1.98412698295895385996E-4, + 8.33333333332211858878E-3, + -1.66666666666666307295E-1, + }; + + const double cc[] = { + -1.13585365213876817300E-11, + 2.08757008419747316778E-9, + -2.75573141792967388112E-7, + 2.48015872888517045348E-5, + -1.38888888888730564116E-3, + 4.16666666666665929218E-2, + }; + + const double DP1 = 7.85398125648498535156E-1; + const double DP2 = 3.77489470793079817668E-8; + const double DP3 = 2.69515142907905952645E-15; + + /* Extended precision modular arithmetic */ + z = ((x - y * DP1) - y * DP2) - y * DP3; + zz = z * z; + r.sin = z + zz*z*(((((sc[0]*zz + sc[1])*zz + sc[2])*zz + sc[3])*zz + sc[4])*zz + sc[5]); + r.omc = (mfloat_t)0.5*zz - zz*zz*(((((cc[0]*zz + cc[1])*zz + cc[2])*zz + cc[3])*zz + cc[4])*zz + cc[5]); +# else +# error "Unknown preprocessing configuration." +# endif + if (j == 1 || j == 2) { + if (csign < 0) { + r.sin = -r.sin; + } + r.cos = r.sin; + r.sin = 1 - r.omc; + r.omc = 1 - r.cos; + } else { + if (csign < 0) { + r.cos = r.omc - 1; + r.omc = 1 - r.cos; + } else { + r.cos = 1 - r.omc; + } + } + if (sign < 0) { + r.sin = -r.sin; + } + return r; +} +#endif + #if defined(MATHC_USE_INT) mint_t clampi(mint_t value, mint_t min, mint_t max) { @@ -2520,9 +2654,15 @@ mfloat_t *mat3_rotation_z(mfloat_t *result, mfloat_t f) mfloat_t *mat3_rotation_axis(mfloat_t *result, mfloat_t *v0, mfloat_t f) { +#if !defined(MATHC_FLOATING_POINT_TYPE) + // Get higher accuracy 1-cosine when mfloat_t is known. + sincos1cos sc1c = sncs1cs(f); + mfloat_t c = sc1c.cos, s = sc1c.sin, one_c = sc1c.omc; +#else mfloat_t c = MCOS(f); mfloat_t s = MSIN(f); mfloat_t one_c = MFLOAT_C(1.0) - c; +#endif mfloat_t x = v0[0]; mfloat_t y = v0[4]; mfloat_t z = v0[8]; @@ -2977,9 +3117,15 @@ mfloat_t *mat4_rotation_z(mfloat_t *result, mfloat_t f) mfloat_t *mat4_rotation_axis(mfloat_t *result, mfloat_t *v0, mfloat_t f) { +#if !defined(MATHC_FLOATING_POINT_TYPE) + // Get higher accuracy 1-cosine when mfloat_t is known. + sincos1cos sc1c = sncs1cs(f); + mfloat_t c = sc1c.cos, s = sc1c.sin, one_c = sc1c.omc; +#else mfloat_t c = MCOS(f); mfloat_t s = MSIN(f); mfloat_t one_c = MFLOAT_C(1.0) - c; +#endif mfloat_t x = v0[0]; mfloat_t y = v0[1]; mfloat_t z = v0[2]; @@ -6753,7 +6899,13 @@ mfloat_t sine_ease_in(mfloat_t f) mfloat_t sine_ease_in_out(mfloat_t f) { +#if !defined(MATHC_FLOATING_POINT_TYPE) + // Get higher accuracy 1-cosine when mfloat_t is known. + sincos1cos sc1c = sncs1cs(f * MPI); + return MFLOAT_C(0.5) * sc1c.omc; +#else return MFLOAT_C(0.5) * (MFLOAT_C(1.0) - MCOS(f * MPI)); +#endif } mfloat_t circular_ease_out(mfloat_t f)