|
|
@@ -0,0 +1,706 @@
|
|
|
+#include "pocketpy/common/dmath.h"
|
|
|
+#include "pocketpy/common/algorithm.h"
|
|
|
+#include "pocketpy/common/_log_spline_tbl.h"
|
|
|
+#include <stdint.h>
|
|
|
+
|
|
|
+union Float64Bits {
|
|
|
+ double f;
|
|
|
+ uint64_t i;
|
|
|
+};
|
|
|
+
|
|
|
+/* IEEE 754 double precision floating point data manipulation */
|
|
|
+typedef union
|
|
|
+{
|
|
|
+ double f;
|
|
|
+ uint64_t u;
|
|
|
+ struct {int32_t i0,i1;} s;
|
|
|
+} udi_t;
|
|
|
+
|
|
|
+// https://github.com/akohlmey/fastermath/blob/master/src/exp.c#L63
|
|
|
+double dmath_exp2(double x) {
|
|
|
+ if (x > 1000) return DMATH_INFINITY;
|
|
|
+ if (x < -1000) return 0;
|
|
|
+ if (dmath_isnan(x)) return DMATH_NAN;
|
|
|
+
|
|
|
+ const int FM_DOUBLE_BIAS = 1023;
|
|
|
+
|
|
|
+ static const double fm_exp2_q[] = {
|
|
|
+ /* 1.00000000000000000000e0, */
|
|
|
+ 2.33184211722314911771e2,
|
|
|
+ 4.36821166879210612817e3
|
|
|
+ };
|
|
|
+ static const double fm_exp2_p[] = {
|
|
|
+ 2.30933477057345225087e-2,
|
|
|
+ 2.02020656693165307700e1,
|
|
|
+ 1.51390680115615096133e3
|
|
|
+ };
|
|
|
+
|
|
|
+ double ipart, fpart, px, qx;
|
|
|
+ udi_t epart;
|
|
|
+
|
|
|
+ ipart = dmath_floor(x+0.5);
|
|
|
+ fpart = x - ipart;
|
|
|
+
|
|
|
+ // FM_DOUBLE_INIT_EXP(epart,ipart);
|
|
|
+ epart.s.i0 = 0;
|
|
|
+ epart.s.i1 = (((int) ipart) + FM_DOUBLE_BIAS) << 20;
|
|
|
+
|
|
|
+ x = fpart*fpart;
|
|
|
+
|
|
|
+ px = fm_exp2_p[0];
|
|
|
+ px = px*x + fm_exp2_p[1];
|
|
|
+ qx = x + fm_exp2_q[0];
|
|
|
+ px = px*x + fm_exp2_p[2];
|
|
|
+ qx = qx*x + fm_exp2_q[1];
|
|
|
+
|
|
|
+ px = px * fpart;
|
|
|
+
|
|
|
+ x = 1.0 + 2.0*(px/(qx-px));
|
|
|
+ return epart.f*x;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_log2(double x) {
|
|
|
+ if(x < 0) return DMATH_NAN;
|
|
|
+ if(x == 0) return -DMATH_INFINITY;
|
|
|
+ if(x == DMATH_INFINITY) return DMATH_INFINITY;
|
|
|
+ if(dmath_isnan(x)) return DMATH_NAN;
|
|
|
+
|
|
|
+ const double fm_log_dinv = 4.09600000000000000000e+03;
|
|
|
+ const double fm_log_dsq6 = 9.93410746256510361521e-09;
|
|
|
+
|
|
|
+ const int FM_DOUBLE_BIAS = 1023;
|
|
|
+ const int FM_DOUBLE_EMASK = 2146435072;
|
|
|
+ const int FM_DOUBLE_MBITS = 20;
|
|
|
+ const int FM_DOUBLE_MMASK = 1048575;
|
|
|
+ const int FM_DOUBLE_EZERO = 1072693248;
|
|
|
+
|
|
|
+ const int FM_SPLINE_SHIFT = 8;
|
|
|
+
|
|
|
+ udi_t val;
|
|
|
+ double a,b,y;
|
|
|
+ int32_t hx, ipart;
|
|
|
+
|
|
|
+ val.f = x;
|
|
|
+ hx = val.s.i1;
|
|
|
+
|
|
|
+ /* extract exponent and subtract bias */
|
|
|
+ ipart = (((hx & FM_DOUBLE_EMASK) >> FM_DOUBLE_MBITS) - FM_DOUBLE_BIAS);
|
|
|
+
|
|
|
+ /* mask out exponent to get the prefactor to 2**ipart */
|
|
|
+ hx &= FM_DOUBLE_MMASK;
|
|
|
+ val.s.i1 = hx | FM_DOUBLE_EZERO;
|
|
|
+ x = val.f;
|
|
|
+
|
|
|
+ /* table index */
|
|
|
+ hx >>= FM_SPLINE_SHIFT;
|
|
|
+
|
|
|
+ /* compute x value matching table index */
|
|
|
+ val.s.i0 = 0;
|
|
|
+ val.s.i1 = FM_DOUBLE_EZERO | (hx << FM_SPLINE_SHIFT);
|
|
|
+ b = (x - val.f) * fm_log_dinv;
|
|
|
+ a = 1.0 - b;
|
|
|
+
|
|
|
+ /* evaluate spline */
|
|
|
+ y = a * fm_log_q1[hx] + b * fm_log_q1[hx+1];
|
|
|
+ a = (a*a*a-a) * fm_log_q2[hx];
|
|
|
+ b = (b*b*b-b) * fm_log_q2[hx+1];
|
|
|
+ y += (a + b) * fm_log_dsq6;
|
|
|
+
|
|
|
+ return ((double) ipart) + (y * DMATH_LOG2_E);
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_exp(double x) {
|
|
|
+ return dmath_exp2(x * DMATH_LOG2_E); // log2(e)
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_exp10(double x) {
|
|
|
+ return dmath_exp2(x * 3.321928094887362); // log2(10)
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_log(double x) {
|
|
|
+ return dmath_log2(x) / DMATH_LOG2_E; // log2(e)
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_log10(double x) {
|
|
|
+ return dmath_log2(x) / 3.321928094887362; // log2(10)
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_pow(double base, double exp) {
|
|
|
+ int exp_int = (int)exp;
|
|
|
+ if(exp_int == exp) {
|
|
|
+ if(exp_int == 0) return 1;
|
|
|
+ if(exp_int < 0) {
|
|
|
+ base = 1 / base;
|
|
|
+ exp_int = -exp_int;
|
|
|
+ }
|
|
|
+ double res = 1;
|
|
|
+ while(exp_int > 0) {
|
|
|
+ if(exp_int & 1) res *= base;
|
|
|
+ base *= base;
|
|
|
+ exp_int >>= 1;
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+ if (base > 0) {
|
|
|
+ return dmath_exp(exp * dmath_log(base));
|
|
|
+ }
|
|
|
+ if (base == 0) {
|
|
|
+ if (exp > 0) return 0;
|
|
|
+ if (exp == 0) return 1;
|
|
|
+ }
|
|
|
+ return DMATH_NAN;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_sqrt(double x) {
|
|
|
+ return dmath_pow(x, 0.5);
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_cbrt(double x) {
|
|
|
+ return dmath_pow(x, 1.0 / 3.0);
|
|
|
+}
|
|
|
+
|
|
|
+// https://github.com/kraj/musl/blob/kraj/master/src/math/sincos.c
|
|
|
+static double __sin(double x, double y, int iy)
|
|
|
+{
|
|
|
+static const double
|
|
|
+S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
|
|
|
+S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
|
|
|
+S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
|
|
|
+S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
|
|
|
+S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
|
|
|
+S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
|
|
|
+
|
|
|
+ double z,r,v,w;
|
|
|
+
|
|
|
+ z = x*x;
|
|
|
+ w = z*z;
|
|
|
+ r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
|
|
|
+ v = z*x;
|
|
|
+ if (iy == 0)
|
|
|
+ return x + v*(S1 + z*r);
|
|
|
+ else
|
|
|
+ return x - ((z*(0.5*y - v*r) - y) - v*S1);
|
|
|
+}
|
|
|
+
|
|
|
+static double __cos(double x, double y)
|
|
|
+{
|
|
|
+static const double
|
|
|
+C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
|
|
|
+C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
|
|
|
+C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
|
|
|
+C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
|
|
|
+C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
|
|
|
+C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
|
|
|
+
|
|
|
+ double hz,z,r,w;
|
|
|
+
|
|
|
+ z = x*x;
|
|
|
+ w = z*z;
|
|
|
+ r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
|
|
|
+ hz = 0.5*z;
|
|
|
+ w = 1.0-hz;
|
|
|
+ return w + (((1.0-w)-hz) + (z*r-x*y));
|
|
|
+}
|
|
|
+
|
|
|
+int __rem_pio2(double x, double *y)
|
|
|
+{
|
|
|
+static const double
|
|
|
+toint = 1.5/2.22044604925031308085e-16,
|
|
|
+pio4 = 0x1.921fb54442d18p-1,
|
|
|
+invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
|
|
|
+pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
|
|
|
+pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
|
|
|
+pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
|
|
|
+pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
|
|
|
+pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
|
|
|
+pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
|
|
|
+
|
|
|
+ union Float64Bits u = { .f = x };
|
|
|
+ double z,w,t,r,fn;
|
|
|
+ double tx[3],ty[2];
|
|
|
+ uint32_t ix;
|
|
|
+ int sign, n, ex, ey, i;
|
|
|
+
|
|
|
+ sign = u.i>>63;
|
|
|
+ ix = u.i>>32 & 0x7fffffff;
|
|
|
+ if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
|
|
|
+ if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
|
|
|
+ goto medium; /* cancellation -- use medium case */
|
|
|
+ if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
|
|
|
+ if (!sign) {
|
|
|
+ z = x - pio2_1; /* one round good to 85 bits */
|
|
|
+ y[0] = z - pio2_1t;
|
|
|
+ y[1] = (z-y[0]) - pio2_1t;
|
|
|
+ return 1;
|
|
|
+ } else {
|
|
|
+ z = x + pio2_1;
|
|
|
+ y[0] = z + pio2_1t;
|
|
|
+ y[1] = (z-y[0]) + pio2_1t;
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (!sign) {
|
|
|
+ z = x - 2*pio2_1;
|
|
|
+ y[0] = z - 2*pio2_1t;
|
|
|
+ y[1] = (z-y[0]) - 2*pio2_1t;
|
|
|
+ return 2;
|
|
|
+ } else {
|
|
|
+ z = x + 2*pio2_1;
|
|
|
+ y[0] = z + 2*pio2_1t;
|
|
|
+ y[1] = (z-y[0]) + 2*pio2_1t;
|
|
|
+ return -2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
|
|
|
+ if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
|
|
|
+ if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
|
|
|
+ goto medium;
|
|
|
+ if (!sign) {
|
|
|
+ z = x - 3*pio2_1;
|
|
|
+ y[0] = z - 3*pio2_1t;
|
|
|
+ y[1] = (z-y[0]) - 3*pio2_1t;
|
|
|
+ return 3;
|
|
|
+ } else {
|
|
|
+ z = x + 3*pio2_1;
|
|
|
+ y[0] = z + 3*pio2_1t;
|
|
|
+ y[1] = (z-y[0]) + 3*pio2_1t;
|
|
|
+ return -3;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
|
|
|
+ goto medium;
|
|
|
+ if (!sign) {
|
|
|
+ z = x - 4*pio2_1;
|
|
|
+ y[0] = z - 4*pio2_1t;
|
|
|
+ y[1] = (z-y[0]) - 4*pio2_1t;
|
|
|
+ return 4;
|
|
|
+ } else {
|
|
|
+ z = x + 4*pio2_1;
|
|
|
+ y[0] = z + 4*pio2_1t;
|
|
|
+ y[1] = (z-y[0]) + 4*pio2_1t;
|
|
|
+ return -4;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
|
|
|
+medium:
|
|
|
+ /* rint(x/(pi/2)) */
|
|
|
+ fn = (double)x*invpio2 + toint - toint;
|
|
|
+ n = (int32_t)fn;
|
|
|
+ r = x - fn*pio2_1;
|
|
|
+ w = fn*pio2_1t; /* 1st round, good to 85 bits */
|
|
|
+ /* Matters with directed rounding. */
|
|
|
+ if ((r - w < -pio4)) {
|
|
|
+ n--;
|
|
|
+ fn--;
|
|
|
+ r = x - fn*pio2_1;
|
|
|
+ w = fn*pio2_1t;
|
|
|
+ } else if ((r - w > pio4)) {
|
|
|
+ n++;
|
|
|
+ fn++;
|
|
|
+ r = x - fn*pio2_1;
|
|
|
+ w = fn*pio2_1t;
|
|
|
+ }
|
|
|
+ y[0] = r - w;
|
|
|
+ u.f = y[0];
|
|
|
+ ey = u.i>>52 & 0x7ff;
|
|
|
+ ex = ix>>20;
|
|
|
+ if (ex - ey > 16) { /* 2nd round, good to 118 bits */
|
|
|
+ t = r;
|
|
|
+ w = fn*pio2_2;
|
|
|
+ r = t - w;
|
|
|
+ w = fn*pio2_2t - ((t-r)-w);
|
|
|
+ y[0] = r - w;
|
|
|
+ u.f = y[0];
|
|
|
+ ey = u.i>>52 & 0x7ff;
|
|
|
+ if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
|
|
|
+ t = r;
|
|
|
+ w = fn*pio2_3;
|
|
|
+ r = t - w;
|
|
|
+ w = fn*pio2_3t - ((t-r)-w);
|
|
|
+ y[0] = r - w;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ y[1] = (r - y[0]) - w;
|
|
|
+ return n;
|
|
|
+ }
|
|
|
+
|
|
|
+ (void)tx;
|
|
|
+ (void)ty;
|
|
|
+ (void)i;
|
|
|
+ return 0;
|
|
|
+#if 0
|
|
|
+ /*
|
|
|
+ * all other (large) arguments
|
|
|
+ */
|
|
|
+ if (ix >= 0x7ff00000) { /* x is inf or NaN */
|
|
|
+ y[0] = y[1] = x - x;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ /* set z = scalbn(|x|,-ilogb(x)+23) */
|
|
|
+ u.f = x;
|
|
|
+ u.i &= (uint64_t)-1>>12;
|
|
|
+ u.i |= (uint64_t)(0x3ff + 23)<<52;
|
|
|
+ z = u.f;
|
|
|
+ for (i=0; i < 2; i++) {
|
|
|
+ tx[i] = (double)(int32_t)z;
|
|
|
+ z = (z-tx[i])*0x1p24;
|
|
|
+ }
|
|
|
+ tx[i] = z;
|
|
|
+ /* skip zero terms, first term is non-zero */
|
|
|
+ while (tx[i] == 0.0)
|
|
|
+ i--;
|
|
|
+ n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1);
|
|
|
+ if (sign) {
|
|
|
+ y[0] = -ty[0];
|
|
|
+ y[1] = -ty[1];
|
|
|
+ return -n;
|
|
|
+ }
|
|
|
+ y[0] = ty[0];
|
|
|
+ y[1] = ty[1];
|
|
|
+ return n;
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+void dmath_sincos(double x, double *sin, double *cos) {
|
|
|
+ double y[2], s, c;
|
|
|
+ uint32_t ix;
|
|
|
+ unsigned n;
|
|
|
+
|
|
|
+ //GET_HIGH_WORD(ix, x);
|
|
|
+ union Float64Bits u = { .f = x };
|
|
|
+ ix = (uint32_t)(u.i >> 32);
|
|
|
+
|
|
|
+ ix &= 0x7fffffff;
|
|
|
+
|
|
|
+ /* |x| ~< pi/4 */
|
|
|
+ if (ix <= 0x3fe921fb) {
|
|
|
+ /* if |x| < 2**-27 * sqrt(2) */
|
|
|
+ if (ix < 0x3e46a09e) {
|
|
|
+ /* raise inexact if x!=0 and underflow if subnormal */
|
|
|
+
|
|
|
+ // FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
|
|
|
+ volatile double y_force_eval;
|
|
|
+ y_force_eval = ix < 0x00100000 ? x/0x1p120f : x+0x1p120f;
|
|
|
+ (void)y_force_eval;
|
|
|
+
|
|
|
+ *sin = x;
|
|
|
+ *cos = 1.0;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ *sin = __sin(x, 0.0, 0);
|
|
|
+ *cos = __cos(x, 0.0);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* sincos(Inf or NaN) is NaN */
|
|
|
+ if (ix >= 0x7ff00000) {
|
|
|
+ *sin = *cos = x - x;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* argument reduction needed */
|
|
|
+ n = __rem_pio2(x, y);
|
|
|
+ s = __sin(y[0], y[1], 1);
|
|
|
+ c = __cos(y[0], y[1]);
|
|
|
+ switch (n&3) {
|
|
|
+ case 0:
|
|
|
+ *sin = s;
|
|
|
+ *cos = c;
|
|
|
+ break;
|
|
|
+ case 1:
|
|
|
+ *sin = c;
|
|
|
+ *cos = -s;
|
|
|
+ break;
|
|
|
+ case 2:
|
|
|
+ *sin = -s;
|
|
|
+ *cos = -c;
|
|
|
+ break;
|
|
|
+ case 3:
|
|
|
+ default:
|
|
|
+ *sin = -c;
|
|
|
+ *cos = s;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_sin(double x) {
|
|
|
+ double s, c;
|
|
|
+ dmath_sincos(x, &s, &c);
|
|
|
+ return s;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_cos(double x) {
|
|
|
+ double s, c;
|
|
|
+ dmath_sincos(x, &s, &c);
|
|
|
+ return c;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_tan(double x) {
|
|
|
+ double s, c;
|
|
|
+ dmath_sincos(x, &s, &c);
|
|
|
+ return s / c;
|
|
|
+}
|
|
|
+
|
|
|
+static double zig_r64(double z) {
|
|
|
+ const double pS0 = 1.66666666666666657415e-01;
|
|
|
+ const double pS1 = -3.25565818622400915405e-01;
|
|
|
+ const double pS2 = 2.01212532134862925881e-01;
|
|
|
+ const double pS3 = -4.00555345006794114027e-02;
|
|
|
+ const double pS4 = 7.91534994289814532176e-04;
|
|
|
+ const double pS5 = 3.47933107596021167570e-05;
|
|
|
+ const double qS1 = -2.40339491173441421878e+00;
|
|
|
+ const double qS2 = 2.02094576023350569471e+00;
|
|
|
+ const double qS3 = -6.88283971605453293030e-01;
|
|
|
+ const double qS4 = 7.70381505559019352791e-02;
|
|
|
+
|
|
|
+ double p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
|
|
|
+ double q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
|
|
|
+ return p / q;
|
|
|
+}
|
|
|
+
|
|
|
+// https://github.com/ziglang/zig/blob/master/lib/std/math/asin.zig
|
|
|
+double dmath_asin(double x) {
|
|
|
+ const double pio2_hi = 1.57079632679489655800e+00;
|
|
|
+ const double pio2_lo = 6.12323399573676603587e-17;
|
|
|
+
|
|
|
+ union Float64Bits ux_union;
|
|
|
+ ux_union.f = x;
|
|
|
+ uint64_t ux = ux_union.i;
|
|
|
+ uint32_t hx = (uint32_t)(ux >> 32);
|
|
|
+ uint32_t ix = hx & 0x7FFFFFFF;
|
|
|
+
|
|
|
+ /* |x| >= 1 or nan */
|
|
|
+ if (ix >= 0x3FF00000) {
|
|
|
+ uint32_t lx = (uint32_t)(ux & 0xFFFFFFFF);
|
|
|
+
|
|
|
+ /* asin(1) = +-pi/2 with inexact */
|
|
|
+ if (((ix - 0x3FF00000) | lx) == 0) {
|
|
|
+ return x * pio2_hi + 0x1.0p-120;
|
|
|
+ } else {
|
|
|
+ return DMATH_NAN;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* |x| < 0.5 */
|
|
|
+ if (ix < 0x3FE00000) {
|
|
|
+ /* if 0x1p-1022 <= |x| < 0x1p-26 avoid raising overflow */
|
|
|
+ if (ix < 0x3E500000 && ix >= 0x00100000) {
|
|
|
+ return x;
|
|
|
+ } else {
|
|
|
+ return x + x * zig_r64(x * x);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* 1 > |x| >= 0.5 */
|
|
|
+ double z = (1 - dmath_fabs(x)) * 0.5;
|
|
|
+ double s = dmath_sqrt(z);
|
|
|
+ double r = zig_r64(z);
|
|
|
+ double fx;
|
|
|
+
|
|
|
+ /* |x| > 0.975 */
|
|
|
+ if (ix >= 0x3FEF3333) {
|
|
|
+ fx = pio2_hi - 2 * (s + s * r);
|
|
|
+ } else {
|
|
|
+ union Float64Bits jx_union = { .f = s };
|
|
|
+ uint64_t jx = jx_union.i;
|
|
|
+ union Float64Bits df_union = { .i = jx & 0xFFFFFFFF00000000ULL };
|
|
|
+ double df = df_union.f;
|
|
|
+ double c = (z - df * df) / (s + df);
|
|
|
+ fx = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * df));
|
|
|
+ }
|
|
|
+
|
|
|
+ if (hx >> 31 != 0) {
|
|
|
+ return -fx;
|
|
|
+ } else {
|
|
|
+ return fx;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_acos(double x) {
|
|
|
+ return DMATH_PI / 2 - dmath_asin(x);
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_atan(double x) {
|
|
|
+ return dmath_asin(x / dmath_sqrt(1 + x * x));
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_atan2(double y, double x) {
|
|
|
+ if (x > 0) {
|
|
|
+ return dmath_atan(y / x);
|
|
|
+ } else if (x < 0 && y >= 0) {
|
|
|
+ return dmath_atan(y / x) + DMATH_PI;
|
|
|
+ } else if (x < 0 && y < 0) {
|
|
|
+ return dmath_atan(y / x) - DMATH_PI;
|
|
|
+ } else if (x == 0 && y > 0) {
|
|
|
+ return DMATH_PI / 2;
|
|
|
+ } else if (x == 0 && y < 0) {
|
|
|
+ return -DMATH_PI / 2;
|
|
|
+ } else {
|
|
|
+ return DMATH_NAN;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+////////////////////////////////////////////////////////////////////
|
|
|
+
|
|
|
+int dmath_isinf(double x) {
|
|
|
+ union Float64Bits u = { .f = x };
|
|
|
+ return (u.i & -1ULL>>1) == 0x7ffULL<<52;
|
|
|
+}
|
|
|
+
|
|
|
+int dmath_isnan(double x) {
|
|
|
+ union Float64Bits u = { .f = x };
|
|
|
+ return (u.i & -1ULL>>1) > 0x7ffULL<<52;
|
|
|
+}
|
|
|
+
|
|
|
+int dmath_isnormal(double x) {
|
|
|
+ union Float64Bits u = { .f = x };
|
|
|
+ return ((u.i+(1ULL<<52)) & -1ULL>>1) >= 1ULL<<53;
|
|
|
+}
|
|
|
+
|
|
|
+int dmath_isfinite(double x) {
|
|
|
+ union Float64Bits u = { .f = x };
|
|
|
+ return (u.i & -1ULL>>1) < 0x7ffULL<<52;
|
|
|
+}
|
|
|
+
|
|
|
+// https://github.com/kraj/musl/blob/kraj/master/src/math/fmod.c
|
|
|
+double dmath_fmod(double x, double y) {
|
|
|
+ union Float64Bits ux = { .f = x }, uy = { .f = y };
|
|
|
+ int ex = ux.i>>52 & 0x7ff;
|
|
|
+ int ey = uy.i>>52 & 0x7ff;
|
|
|
+ int sx = ux.i>>63;
|
|
|
+ uint64_t i;
|
|
|
+
|
|
|
+ /* in the followings uxi should be ux.i, but then gcc wrongly adds */
|
|
|
+ /* float load/store to inner loops ruining performance and code size */
|
|
|
+ uint64_t uxi = ux.i;
|
|
|
+
|
|
|
+ if (uy.i<<1 == 0 || dmath_isnan(y) || ex == 0x7ff)
|
|
|
+ return (x*y)/(x*y);
|
|
|
+ if (uxi<<1 <= uy.i<<1) {
|
|
|
+ if (uxi<<1 == uy.i<<1)
|
|
|
+ return 0*x;
|
|
|
+ return x;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* normalize x and y */
|
|
|
+ if (!ex) {
|
|
|
+ for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
|
|
|
+ uxi <<= -ex + 1;
|
|
|
+ } else {
|
|
|
+ uxi &= -1ULL >> 12;
|
|
|
+ uxi |= 1ULL << 52;
|
|
|
+ }
|
|
|
+ if (!ey) {
|
|
|
+ for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
|
|
|
+ uy.i <<= -ey + 1;
|
|
|
+ } else {
|
|
|
+ uy.i &= -1ULL >> 12;
|
|
|
+ uy.i |= 1ULL << 52;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* x mod y */
|
|
|
+ for (; ex > ey; ex--) {
|
|
|
+ i = uxi - uy.i;
|
|
|
+ if (i >> 63 == 0) {
|
|
|
+ if (i == 0)
|
|
|
+ return 0*x;
|
|
|
+ uxi = i;
|
|
|
+ }
|
|
|
+ uxi <<= 1;
|
|
|
+ }
|
|
|
+ i = uxi - uy.i;
|
|
|
+ if (i >> 63 == 0) {
|
|
|
+ if (i == 0)
|
|
|
+ return 0*x;
|
|
|
+ uxi = i;
|
|
|
+ }
|
|
|
+ for (; uxi>>52 == 0; uxi <<= 1, ex--);
|
|
|
+
|
|
|
+ /* scale result */
|
|
|
+ if (ex > 0) {
|
|
|
+ uxi -= 1ULL << 52;
|
|
|
+ uxi |= (uint64_t)ex << 52;
|
|
|
+ } else {
|
|
|
+ uxi >>= -ex + 1;
|
|
|
+ }
|
|
|
+ uxi |= (uint64_t)sx << 63;
|
|
|
+ ux.i = uxi;
|
|
|
+ return ux.f;
|
|
|
+}
|
|
|
+
|
|
|
+// https://github.com/kraj/musl/blob/kraj/master/src/math/copysign.c
|
|
|
+double dmath_copysign(double x, double y) {
|
|
|
+ union Float64Bits ux = { .f = x }, uy = { .f = y };
|
|
|
+ ux.i &= -1ULL/2;
|
|
|
+ ux.i |= uy.i & 1ULL<<63;
|
|
|
+ return ux.f;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_fabs(double x) {
|
|
|
+ return (x < 0) ? -x : x;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_ceil(double x) {
|
|
|
+ if(!dmath_isfinite(x)) return x;
|
|
|
+ int int_part = (int)x;
|
|
|
+ if (x > 0 && x != (double)int_part) {
|
|
|
+ return (double)(int_part + 1);
|
|
|
+ }
|
|
|
+ return (double)int_part;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_floor(double x) {
|
|
|
+ if(!dmath_isfinite(x)) return x;
|
|
|
+ int int_part = (int)x;
|
|
|
+ if (x < 0 && x != (double)int_part) {
|
|
|
+ return (double)(int_part - 1);
|
|
|
+ }
|
|
|
+ return (double)int_part;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_trunc(double x) {
|
|
|
+ return (double)((int)x);
|
|
|
+}
|
|
|
+
|
|
|
+// https://github.com/kraj/musl/blob/kraj/master/src/math/modf.c
|
|
|
+double dmath_modf(double x, double* iptr) {
|
|
|
+ union Float64Bits u = { .f = x };
|
|
|
+ uint64_t mask;
|
|
|
+ int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
|
|
|
+
|
|
|
+ /* no fractional part */
|
|
|
+ if (e >= 52) {
|
|
|
+ *iptr = x;
|
|
|
+ if (e == 0x400 && u.i<<12 != 0) /* nan */
|
|
|
+ return x;
|
|
|
+ u.i &= 1ULL<<63;
|
|
|
+ return u.f;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* no integral part*/
|
|
|
+ if (e < 0) {
|
|
|
+ u.i &= 1ULL<<63;
|
|
|
+ *iptr = u.f;
|
|
|
+ return x;
|
|
|
+ }
|
|
|
+
|
|
|
+ mask = -1ULL>>12>>e;
|
|
|
+ if ((u.i & mask) == 0) {
|
|
|
+ *iptr = x;
|
|
|
+ u.i &= 1ULL<<63;
|
|
|
+ return u.f;
|
|
|
+ }
|
|
|
+ u.i &= ~mask;
|
|
|
+ *iptr = u.f;
|
|
|
+ return x - u.f;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_fmin(double x, double y) {
|
|
|
+ return (x < y) ? x : y;
|
|
|
+}
|
|
|
+
|
|
|
+double dmath_fmax(double x, double y) {
|
|
|
+ return (x > y) ? x : y;
|
|
|
+}
|