summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2018-07-31 20:17:31 -0700
committerOri Bernstein <ori@eigenstate.org>2018-07-31 20:17:31 -0700
commit3d4d7c4ca9eb30d468d71c26f4b4588c800c9d1c (patch)
tree901a96c69f1514e1d9551c10b90008bcc03a9fc4
parentd1e7671e6bd6a12d5444716bfbcd3e88ba7e3b5f (diff)
parent95d1f61428c4ba5176b341da851b2edee196cb09 (diff)
downloadmc-3d4d7c4ca9eb30d468d71c26f4b4588c800c9d1c.tar.gz
Merge remote-tracking branch 'npnth/libmath'
-rw-r--r--lib/math/ancillary/KM06-calc.gp39
-rw-r--r--lib/math/ancillary/README4
-rw-r--r--lib/math/ancillary/generate-arctan-tuples-for-GB91.c422
-rw-r--r--lib/math/ancillary/generate-minimax-by-Remez.gp178
-rw-r--r--lib/math/ancillary/generate-triples-for-GB91.c297
-rw-r--r--lib/math/ancillary/pi-constants.c93
-rw-r--r--lib/math/ancillary/ulp.gp27
-rw-r--r--lib/math/atan-impl.myr488
-rw-r--r--lib/math/bld.sub21
-rw-r--r--lib/math/exp-impl.myr78
-rw-r--r--lib/math/fpmath.myr168
-rw-r--r--lib/math/log-impl.myr588
-rw-r--r--lib/math/poly-impl.myr24
-rw-r--r--lib/math/powr-impl.myr406
-rw-r--r--lib/math/references27
-rw-r--r--lib/math/round-impl.myr12
-rw-r--r--lib/math/scale2-impl.myr17
-rw-r--r--lib/math/sin-impl.myr815
-rw-r--r--lib/math/sqrt-impl.myr94
-rw-r--r--lib/math/sum-impl.myr42
-rw-r--r--lib/math/tan-impl.myr510
-rw-r--r--lib/math/test/atan-impl.myr622
-rw-r--r--lib/math/test/log-impl.myr742
-rw-r--r--lib/math/test/powr-impl.myr76
-rw-r--r--lib/math/test/scale2-impl.myr5
-rw-r--r--lib/math/test/sin-impl.myr438
-rw-r--r--lib/math/test/tan-impl.myr494
-rw-r--r--lib/math/trunc-impl.myr60
-rw-r--r--lib/math/util.myr80
29 files changed, 6655 insertions, 212 deletions
diff --git a/lib/math/ancillary/KM06-calc.gp b/lib/math/ancillary/KM06-calc.gp
new file mode 100644
index 0000000..da74852
--- /dev/null
+++ b/lib/math/ancillary/KM06-calc.gp
@@ -0,0 +1,39 @@
+/*
+ Implementations of some functions from [KM06]. The exact ranges
+ were applied by manual fiddling.
+ */
+
+{ betap(amin, amax, p, n) = my(l, s);
+ l = amax^(1/p);
+ s = amin^(1/p);
+ return(polroots(l^(1 - 1/(2^(n-1))) * (3/s - (x - s) * (p+1)/(s^2)) * (x - s)^2 - s^(1 - 1/(2^(n-1))) * (3/l - (x - l) * (p+1)/(l^2)) * (x - l)^2));
+}
+
+{ betam(amin, amax, p, n) = my(l, s);
+ l = amax^(1/p);
+ s = amin^(1/p);
+ return(polroots(l^(1 - 1/(2^(n-1))) * (3/s - (x - s) * (p+1)/(s^2)) * (x - s)^2 + s^(1 - 1/(2^(n-1))) * (3/l - (x - l) * (p+1)/(l^2)) * (x - l)^2));
+}
+
+{ beta(amin, amax, p, n) = my(plus, minus, alsmaller, allarger, l, r);
+ plus = betap(amin, amax, p, n);
+ minus = betam(amin, amax, p, n);
+ alsmaller = min(amax^(1/p), amin^(1/p));
+ allarger = max(amax^(1/p), amin^(1/p));
+ l = List();
+ for(i=1, length(plus), if(imag(plus[i]) < 0.001 && imag(plus[i]) > -0.001, listput(l, real(plus[i])), ));
+ for(i=1, length(minus), if(imag(minus[i]) < 0.001 && imag(minus[i]) > -0.001, listput(l, real(minus[i])), ));
+ r=0.0;
+ for(i=1, length(l), if(l[i] <= allarger && l[i] >= alsmaller, r=l[i], ));
+ return(r);
+}
+
+{ maxerr(amin, amax, p, n) = my(x1, x2, e1, e2);
+ x1 = List([beta(amin, amax, p, n)]);
+ x2 = List([beta(amin, amax, p, n)]);
+ for(i=1, n, listput(x1, x1[i]/p * (p - 1 + amin * x1[i]^(-p))));
+ for(i=1, n, listput(x2, x2[i]/p * (p - 1 + amax * x2[i]^(-p))));
+ e1 = abs(amin^(1/p) - x1[n + 1]);
+ e2 = abs(amax^(1/p) - x2[n + 1]);
+ return(max(e1, e2));
+}
diff --git a/lib/math/ancillary/README b/lib/math/ancillary/README
new file mode 100644
index 0000000..9e9e72a
--- /dev/null
+++ b/lib/math/ancillary/README
@@ -0,0 +1,4 @@
+The files in this directory are only useful for understanding
+derivation of constants. If you wish to improve a file like
+sqrt-impl.myr, and are curious as to how certain constants were
+derived, this might hold some answers.
diff --git a/lib/math/ancillary/generate-arctan-tuples-for-GB91.c b/lib/math/ancillary/generate-arctan-tuples-for-GB91.c
new file mode 100644
index 0000000..67cd3a5
--- /dev/null
+++ b/lib/math/ancillary/generate-arctan-tuples-for-GB91.c
@@ -0,0 +1,422 @@
+/* cc -o generate-arctan-tuples-for-GB91 generate-arctan-tuples-for-GB91.c -lmpfr # -fno-strict-aliasing */
+/* cc -static -std=c99 -D_POSIX_C_SOURCE=999999999 -fno-strict-aliasing -O2 -o generate-arctan-tuples-for-GB91 generate-arctan-tuples-for-GB91.c -lmpfr -lgmp */
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <mpfr.h>
+
+/*
+ [GB91] treat arctan by a table + minimax method: the range [0,1]
+ is partitioned up, and on each partition arctan is approximated
+ by a minimax polynomial pi(x - xi) =~= arctan(x - xi).
+
+ The twist of [GB91], that of Highly Accurate Tables, here applies
+ to the first two coefficients of the minimax polynomial: by
+ adjusting xi and computing different polynomials, we obtain
+ coefficients cij for pi such that ci0 and ci1, in perfect accuracy,
+ have a bunch of zeroes in the binary expansion after the 53rd
+ bit. This gives the stored cij a bit more accuracy for free.
+
+ Note that there's a sign flip somewhere: so the even-degree
+ elements need to be negated for use in atan-impl.myr.
+ */
+
+/* Something something -fno-strict-aliasing */
+#define FLT64_TO_UINT64(f) (*((uint64_t *) ((char *) &f)))
+#define UINT64_TO_FLT64(u) (*((double *) ((char *) &u)))
+#define xmin(a, b) ((a) < (b) ? (a) : (b))
+#define xmax(a, b) ((a) > (b) ? (a) : (b))
+
+#define EXP_OF_FLT64(f) (((FLT64_TO_UINT64(f)) >> 52) & 0x7ff)
+
+typedef int (*mpfr_fn)(mpfr_ptr, mpfr_srcptr, mpfr_rnd_t);
+
+#define N 5
+
+static int leeway_of(mpfr_t temp, mpfr_t f)
+{
+ double d1 = mpfr_get_d(f, MPFR_RNDN);
+ double d2 = 0.0;
+
+ mpfr_set_d(temp, d1, MPFR_RNDN);
+ mpfr_sub(temp, f, temp, MPFR_RNDN);
+ d2 = mpfr_get_d(temp, MPFR_RNDN);
+
+ return EXP_OF_FLT64(d1) - 52 - EXP_OF_FLT64(d2);
+}
+
+static void determinant_poorly(mpfr_t det, mpfr_t A[][N + 2])
+{
+ int sgn = 1;
+ int sigma[N + 2];
+ mpfr_t prod;
+
+ for (int j = 0; j < (N + 2); ++j) {
+ sigma[j] = j;
+ }
+
+ mpfr_set_si(det, 0, MPFR_RNDN);
+ mpfr_init2(prod, 200);
+
+ while (1) {
+ /* ∏ a_{j, sigma[j]} */
+ mpfr_set_si(prod, sgn, MPFR_RNDN);
+
+ for (int j = 0; j < (N + 2); ++j) {
+ mpfr_mul(prod, prod, A[j][sigma[j]], MPFR_RNDN);
+ }
+
+ mpfr_add(det, det, prod, MPFR_RNDN);
+
+ /* increment sigma: algorithm K/L/something... */
+ int k = N + 1;
+ int j = N + 1;
+ int t;
+
+ while (k > 0 &&
+ sigma[k - 1] >= sigma[k]) {
+ k--;
+ }
+
+ if (!k) {
+ break;
+ }
+
+ while (sigma[j] <= sigma[k - 1]) {
+ j--;
+ }
+
+ if (k - 1 != j) {
+ t = sigma[k - 1];
+ sigma[k - 1] = sigma[j];
+ sigma[j] = t;
+ sgn *= -1;
+ }
+
+ for (int l = N + 1; l > k; --l, ++k) {
+ t = sigma[l];
+ sigma[l] = sigma[k];
+ sigma[k] = t;
+ sgn *= -1;
+ }
+ }
+}
+
+static void invert_poorly(mpfr_t A[][N + 2], mpfr_t Ainv[][N + 2])
+{
+ mpfr_t Mij[N + 2][N + 2];
+ mpfr_t det;
+ mpfr_t Mijdet;
+
+ mpfr_init2(det, 200);
+ mpfr_init2(Mijdet, 200);
+ determinant_poorly(det, A);
+
+ for (int i = 0; i < N + 2; ++i) {
+ for (int j = 0; j < N + 2; ++j) {
+ mpfr_init2(Mij[i][j], 200);
+
+ if (i == (N + 1) &&
+ j == (N + 1)) {
+ mpfr_set_si(Mij[i][j], 1, MPFR_RNDN);
+ } else if (i == (N + 1) ||
+ j == (N + 1)) {
+ mpfr_set_si(Mij[i][j], 0, MPFR_RNDN);
+ }
+ }
+ }
+
+ /* Construct transpose adjugate poorly */
+ for (int i = 0; i < N + 2; ++i) {
+ for (int j = 0; j < N + 2; ++j) {
+ /* Copy over A, sans i, j, to Mij */
+ for (int ii = 0; ii < N + 2; ii++) {
+ if (ii == i) {
+ continue;
+ }
+
+ int ri = ii > i ? ii - 1 : ii;
+
+ for (int jj = 0; jj < N + 2; jj++) {
+ if (jj == j) {
+ continue;
+ }
+
+ int rj = jj > j ? jj - 1 : jj;
+
+ mpfr_set(Mij[ri][rj], A[ii][jj],
+ MPFR_RNDN);
+ }
+ }
+
+ /* Ainv[j][i] = | Mij | / det */
+ determinant_poorly(Mijdet, Mij);
+ mpfr_div(Ainv[j][i], Mijdet, det, MPFR_RNDN);
+
+ if ((i + j) % 2) {
+ mpfr_mul_si(Ainv[j][i], Ainv[j][i], -1,
+ MPFR_RNDN);
+ }
+ }
+ }
+}
+
+static int find_tuple(int ii, int min_leeway)
+{
+ int64_t r = 0;
+ double xi_orig_d = ii / 256.0;
+ uint64_t xi_orig = FLT64_TO_UINT64(xi_orig_d);
+ double range_a = -1 / 512.0;
+ double range_b = 1 / 512.0;
+ uint64_t xi;
+ double xi_d;
+ mpfr_t xi_m;
+ int best_lee = 0;
+ long int best_r = 0;
+ mpfr_t t[10];
+ mpfr_t cn[N + 2];
+ mpfr_t bi[N + 2];
+ mpfr_t best_bi[N + 2];
+ mpfr_t best_xi;
+ mpfr_t xij[N + 2][N + 2];
+ mpfr_t xijinv[N + 2][N + 2];
+ mpfr_t fxi[N + 2];
+ double t_d = 0.0;
+ uint64_t t_u = 0;
+ long ec = 1;
+ long start = time(0);
+ long end = start;
+
+ mpfr_init2(xi_m, 200);
+ mpfr_init2(best_xi, 200);
+
+ for (int i = 0; i < 10; ++i) {
+ mpfr_init2(t[i], 200);
+ }
+
+ mpfr_set_d(t[1], range_a, MPFR_RNDN);
+ mpfr_set_d(t[2], range_b, MPFR_RNDN);
+ mpfr_add(t[3], t[2], t[1], MPFR_RNDN);
+ mpfr_sub(t[4], t[2], t[1], MPFR_RNDN);
+ mpfr_div_si(t[3], t[3], 2, MPFR_RNDN);
+ mpfr_div_si(t[4], t[4], 2, MPFR_RNDN);
+
+ /* Calculate Chebyshev nodes for the range */
+ for (int i = 0; i < (N + 2); ++i) {
+ mpfr_init2(cn[i], 200);
+ mpfr_init2(bi[i], 200);
+ mpfr_init2(best_bi[i], 200);
+ mpfr_set_si(best_bi[i], 0, MPFR_RNDN);
+ mpfr_init2(fxi[i], 200);
+ mpfr_set_si(cn[i], 2 * i - 1, MPFR_RNDN);
+ mpfr_div_si(cn[i], cn[i], 2 * (N + 2), MPFR_RNDN);
+ mpfr_cos(cn[i], cn[i], MPFR_RNDN);
+ mpfr_mul(cn[i], cn[i], t[4], MPFR_RNDN);
+ mpfr_add(cn[i], cn[i], t[3], MPFR_RNDN);
+ }
+
+ /*
+ Set up M×M (M = N+2) matrix for one step of Remez
+ algorithm: the cnI^Js in
+
+ b0 + b1·cn1 + ⋯ + bN·cn1^n + (-1)^1·E = f(cn1)
+ b0 + b1·cn2 + ⋯ + bN·cn2^n + (-1)^2·E = f(cn2)
+ ⋮ ⋮ ⋱ ⋮ ⋮ ⋮
+ b0 + b1·cnM + ⋯ + bN·cnM^n + (-1)^M·E = f(cnM)
+ */
+ for (int i = 0; i < (N + 2); ++i) {
+ mpfr_set_si(t[1], 1, MPFR_RNDN);
+
+ for (int j = 0; j < (N + 1); ++j) {
+ mpfr_init2(xij[i][j], 200);
+ mpfr_init2(xijinv[i][j], 200);
+ mpfr_set(xij[i][j], t[1], MPFR_RNDN);
+ mpfr_mul(t[1], t[1], cn[i], MPFR_RNDN);
+ }
+
+ mpfr_init2(xij[i][N + 1], 200);
+ mpfr_init2(xijinv[i][N + 1], 200);
+ mpfr_set_si(xij[i][N + 1], ec, MPFR_RNDN);
+ ec *= -1;
+ }
+
+ /* Compute (xij)^(-1) */
+ invert_poorly(xij, xijinv);
+
+ while (r < (1 << 28)) {
+ xi = xi_orig + r;
+ xi_d = UINT64_TO_FLT64(xi);
+ mpfr_set_d(xi_m, xi_d, MPFR_RNDN);
+
+ /* compute f(cn[i]) = atan(cn[i] - xi) */
+ for (int i = 0; i < (N + 2); ++i) {
+ mpfr_sub(fxi[i], cn[i], xi_m, MPFR_RNDN);
+ mpfr_atan(fxi[i], fxi[i], MPFR_RNDN);
+ }
+
+ /* Now solve the linear system above for bi */
+ for (int i = 0; i < (N + 2); ++i) {
+ mpfr_set_si(bi[i], 0, MPFR_RNDN);
+
+ for (int j = 0; j < (N + 2); ++j) {
+ mpfr_mul(t[i], xijinv[i][j], fxi[j], MPFR_RNDN);
+ mpfr_add(bi[i], bi[i], t[i], MPFR_RNDN);
+ }
+ }
+
+ /*
+ If the error term isn't within close to 0, we
+ should, by all rights, try a few more iterations
+ of Remez. But that's incredibly slow, and we're
+ in a tight loop, so let's just bail.
+ */
+ double e = mpfr_get_d(bi[N + 1], MPFR_RNDN);
+
+ if (FLT64_TO_UINT64(e) & 0x7fffffffffffffff > 0x08) {
+ goto next_r;
+ }
+
+ /* Test if b[0] and b[1] are very precise */
+ int leeA = leeway_of(t[0], bi[0]);
+ int leeB = 0;
+ int lee = 0;
+
+ if (leeA <= min_leeway) {
+ goto next_r;
+ }
+
+ leeB = leeway_of(t[0], bi[1]);
+
+ if (leeB + 4 <= min_leeway) {
+ goto next_r;
+ }
+
+ lee = xmin(leeA, leeB + 4);
+
+ if (lee <= best_lee) {
+ goto next_r;
+ }
+
+ best_lee = lee;
+ best_r = r;
+ mpfr_set(best_xi, xi_m, MPFR_RNDN);
+
+ for (int i = 0; i < (N + 2); ++i) {
+ mpfr_set(best_bi[i], bi[i], MPFR_RNDN);
+ }
+
+next_r:
+
+ /* increment r */
+ if (r <= 0) {
+ r = 1 - r;
+ } else {
+ r = -r;
+ }
+ }
+
+ end = time(0);
+
+ if (best_lee < min_leeway) {
+ return -1;
+ }
+
+ /* Recall the N+1 entry in output is the error, which we don't care about */
+ t_d = mpfr_get_d(best_xi, MPFR_RNDN);
+ t_u = FLT64_TO_UINT64(t_d);
+ printf("(%#018lx, ", t_u);
+
+ for (int i = 0; i < N; ++i) {
+ t_d = mpfr_get_d(best_bi[i], MPFR_RNDN);
+ printf("%#018lx, ", FLT64_TO_UINT64(t_d));
+ }
+
+ t_d = mpfr_get_d(best_bi[N], MPFR_RNDN);
+ t_u = FLT64_TO_UINT64(t_d);
+ printf("%#018lx), ", t_u);
+ printf("/* i = %03d, l = %02d, r = %010ld, t = %ld */\n", ii, best_lee,
+ best_r, end - start);
+
+ return 0;
+}
+
+static void usage(void)
+{
+ printf("generate-arctan-tuples-for-GB91\n");
+ printf(" [-i start_idx]\n");
+ printf(" [-j end_idx]\n");
+}
+
+int main(int argc, char **argv)
+{
+ int c = 0;
+ long i_start_arg = 1;
+ long i_end_arg = 256;
+ int i_start = 1;
+ int i_end = 256;
+
+ for (int k = 0; k < argc; ++k) {
+ printf("%s ", argv[k]);
+ }
+
+ printf("\n");
+
+ while ((c = getopt(argc, argv, "i:j:")) != -1) {
+ switch (c) {
+ case 'i':
+ errno = 0;
+ i_start_arg = strtoll(optarg, 0, 0);
+
+ if (errno) {
+ fprintf(stderr, "bad start index %s\n", optarg);
+
+ return 1;
+ }
+
+ break;
+ case 'j':
+ errno = 0;
+ i_end_arg = strtoll(optarg, 0, 0);
+
+ if (errno) {
+ fprintf(stderr, "bad end index %s\n", optarg);
+
+ return 1;
+ }
+
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (i_start_arg <= 0 ||
+ i_start_arg > 256) {
+ printf("truncating start to (0, %d]\n", 256);
+ i_start_arg = xmin(xmax(i_start_arg, 1), 256);
+ }
+
+ if (i_end_arg <= 0 ||
+ i_end_arg > 256) {
+ printf("truncating end to (0, %d]\n", 256);
+ i_end_arg = xmin(xmax(i_end_arg, 1), 256);
+ }
+
+ i_start = i_start_arg;
+ i_end = i_end_arg;
+
+ for (int i = i_start; i <= i_end; ++i) {
+ if (find_tuple(i, 1) < 0) {
+ printf("CANNOT FIND SUITABLE CANDIDATE FOR i = %03d\n",
+ i);
+ }
+ }
+
+ return 0;
+}
diff --git a/lib/math/ancillary/generate-minimax-by-Remez.gp b/lib/math/ancillary/generate-minimax-by-Remez.gp
new file mode 100644
index 0000000..a388f4e
--- /dev/null
+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp
@@ -0,0 +1,178 @@
+/*
+ Attempts to find a minimax polynomial of degree n for f via Remez
+ algorithm. The Remez algorithm appears to be slightly shot, but
+ the initial step of approximating by Chebyshev nodes works, and
+ is usually good enough.
+ */
+
+{ chebyshev_node(a, b, k, n) = my(p, m, c);
+ p = (b + a)/2;
+ m = (b - a)/2;
+ c = cos(Pi * (2*k - 1)/(2*n));
+ return(p + m*c);
+}
+
+{ remez_step(f, n, a, b, x) = my(M, xx, bvec, k);
+ M = matrix(n + 2, n + 2);
+ bvec = vector(n + 2);
+ for (k = 1, n + 2,
+ xx = x[k];
+ for (j = 1, n + 1,
+ M[k,j] = xx^(j - 1);
+ );
+ M[k, n + 2] = (-1)^k;
+ bvec[k] = f(xx);
+ );
+ return(mattranspose(M^-1 * mattranspose(bvec)));
+}
+
+{ p_eval(n, v, x) = my(s, k);
+ s = 0;
+ for (k = 1, n + 1,
+ s = s + v[k]*x^(k - 1)
+ );
+
+ return(s);
+}
+
+{ err(f, n, v, x) =
+ return(abs(f(x) - p_eval(n, v, x)));
+}
+
+{ find_M(f, n, v, a, b, depth) = my(X, gran, l, lnext, len, xprev, xcur, xnext, yprev, ycur, ynext, thisa, thisb, k, j);
+ gran = 10000 * depth;
+ l = listcreate();
+
+ xprev = a - (b - a)/gran;
+ yprev = err(f, n, v, xprev);
+
+ xcur = a;
+ ycur = err(f, n, v, xprev);
+
+ xnext = a + (b - a)/gran;
+ ynext = err(f, n, v, xprev);
+
+ for (k = 2, gran,
+ xprev = xcur;
+ yprev = ycur;
+
+ xcur = xnext;
+ ycur = ynext;
+
+ xnext = a + k*(b - a)/gran;
+ ynext = err(f, n, v, xnext);
+
+ if(ycur > yprev && ycur > ynext, listput(l, [xcur, abs(ycur)]),);
+ );
+
+ vecsort(l, 2);
+ if(length(l) < n + 2 || l[1][2] < 2^(-2000), return("q"),);
+ len = length(l);
+
+ lnext = listcreate();
+ for(j = 1, n + 2,
+ thisa = l[j][1] - (b-a)/gran;
+ thisb = l[j][1] + (b-a)/gran;
+
+ xprev = thisa - (thisb - a)/gran;
+ yprev = err(f, n, v, xprev);
+
+ xcur = thisa;
+ ycur = err(f, n, v, xprev);
+
+ xnext = thisa + (thisb - thisa)/gran;
+ ynext = err(f, n, v, xprev);
+
+ for (k = 2, gran,
+ xprev = xcur;
+ yprev = ycur;
+
+ xcur = xnext;
+ ycur = ynext;
+
+ xnext = thisa + k*(thisb - thisa)/gran;
+ ynext = abs(f(xnext) - p_eval(n, v, xnext));
+
+ if(ycur > yprev && ycur > ynext, listput(lnext, xcur),);
+ );
+ );
+ vecsort(lnext, cmp);
+ listkill(l);
+ X = vector(n + 2);
+ for (k = 1, min(n + 2, length(lnext)), X[k] = lnext[k]);
+ listkill(lnext);
+ vecsort(X);
+ return(X);
+}
+
+{ find_minimax(f, n, a, b) = my(c, k, j);
+ c = vector(n + 2);
+ for (k = 1, n + 2,
+ c[k] = chebyshev_node(a, b, k, n + 2);
+ );
+ for(j = 1, 100,
+ v = remez_step(f, n, a, b, c);
+ print("v = ", v);
+ c = find_M(f, n, v, a, b, j);
+ if(c == "q", return,);
+ print("c = ", c);
+ );
+}
+
+{ sinoverx(x) =
+ return(if(x == 0, 1, sin(x)/x));
+}
+
+{ tanoverx(x) =
+ return(if(x == 0, 1, tan(x)/x));
+}
+
+{ atanxoverx(x) =
+ return(if(x == 0, 1, atan(x)/x));
+}
+
+{ cotx(x) =
+ return(1/tanoverx(x));
+}
+
+print("\n");
+print("Minimaxing sin(x) / x, degree 6, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
+find_minimax(sinoverx, 6, -Pi/1024, Pi/1024)
+print("\n");
+print("(You'll need to add a 0x0 at the beginning to make a degree 7...\n");
+print("\n");
+print("---\n");
+print("\n");
+print("Minimaxing cos(x), degree 7, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
+find_minimax(cos, 7, -Pi/1024, Pi/1024)
+print("\n");
+print("---\n");
+print("\n");
+print("Minmimaxing tan(x) / x, degree 6, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
+find_minimax(tanoverx, 6, -Pi/1024, Pi/1024)
+print("\n");
+print("(You'll need to add a 0x0 at the beginning to make a degree 7...\n");
+print("\n");
+print("---\n");
+print("\n");
+print("Minmimaxing x*cot(x), degree 8, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
+find_minimax(cotx, 8, -Pi/1024, Pi/1024)
+print("\n");
+print("(Take the first v, and remember to divide by x)\n");
+print("\n");
+print("---\n");
+print("\n");
+print("Minmimaxing tan(x) / x, degree 10, on [0, 15.5/256]:");
+find_minimax(tanoverx, 10, 0, 15.5/256)
+print("\n");
+print("(You'll need to add a 0x0 at the beginning to make a degree 11...\n");
+print("\n");
+print("---\n");
+print("\n");
+print("Minmimaxing atan(x) / x, degree 12, on [0, 15.5/256]:");
+find_minimax(atanxoverx, 12, 0, 1/16)
+print("\n");
+print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n");
+print("\n");
+print("---\n");
+print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n");
diff --git a/lib/math/ancillary/generate-triples-for-GB91.c b/lib/math/ancillary/generate-triples-for-GB91.c
new file mode 100644
index 0000000..82f1a45
--- /dev/null
+++ b/lib/math/ancillary/generate-triples-for-GB91.c
@@ -0,0 +1,297 @@
+/* cc -o generate-triples-for-GB91 generate-triples-for-GB91.c -lmpfr # -fno-strict-aliasing */
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <mpfr.h>
+
+/*
+ Find triples (xi, sin(xi), cos(xi)) very close to machine numbers
+ for use in the algorithm of [GB91]. See [Lut95] for a better way
+ of calculating these, which we don't follow.
+
+ The idea is that, by good fortune (or more persuasive arguments),
+ there exist floating-point numbers xi that are close to numbers
+ of the form (i/N)(π/4), such that, letting f(xi) = yi, the binary
+ expansion of yi has a bunch of zeroes after the 53rd bit.
+
+ Then, when we discretize the input for sin(x) to some xi + delta,
+ the precomputed sin(xi) and cos(xi) can be stored as single
+ floating-point numbers; the extra zeroes in the infinite-precision
+ result mean that we get some precision for free. Compare with
+ the constants for log-impl.myr or exp-impl.myr, where the constants
+ require more space to be stored.
+ */
+
+/* Something something -fno-strict-aliasing */
+#define FLT64_TO_UINT64(f) (*((uint64_t *) ((char *) &f)))
+#define UINT64_TO_FLT64(u) (*((double *) ((char *) &u)))
+#define xmin(a, b) ((a) < (b) ? (a) : (b))
+#define xmax(a, b) ((a) > (b) ? (a) : (b))
+
+#define EXP_OF_FLT64(f) (((FLT64_TO_UINT64(f)) >> 52) & 0x7ff)
+
+typedef int (*mpfr_fn)(mpfr_ptr, mpfr_srcptr, mpfr_rnd_t);
+
+static int N = 256;
+
+/* Returns >= zero iff successful */
+static int find_triple_64(int i, int min_leeway, int perfect_leeway, mpfr_fn
+ sin_fn, mpfr_fn cos_fn)
+{
+ /*
+ Using mpfr is not entirely overkill for this; [Lut95]
+ includes PASCAL fragments that use almost entirely integer
+ arithmetic... but the error term in that only handles
+ up to 13 extra bits of zeroes or so. We proudly boast
+ at least 16 bits of extra zeroes in all cases.
+ */
+ mpfr_t xi;
+ mpfr_t xip1;
+ mpfr_t cos;
+ mpfr_t sin;
+ double xip1_d;
+ double t;
+ uint64_t sin_u;
+ uint64_t cos_u;
+ int e1;
+ int e2;
+ uint64_t xip1_u;
+ double xi_initial;
+ uint64_t xi_initial_u;
+ double xi_current;
+ uint64_t xi_current_u;
+ long int r = 0;
+ long int best_r = 0;
+ int sgn = 1;
+ int ml = min_leeway;
+ int best_l = 0;
+ uint64_t best_xi_u;
+ uint64_t best_sin_u;
+ uint64_t best_cos_u;
+ time_t start;
+ time_t end;
+
+ start = time(0);
+ mpfr_init2(xi, 100);
+ mpfr_init2(xip1, 100);
+ mpfr_init2(cos, 100);
+ mpfr_init2(sin, 100);
+
+ /* start out at xi = πi/(4N) */
+ mpfr_const_pi(xi, MPFR_RNDN);
+ mpfr_mul_si(xip1, xi, (long int) (i + 1), MPFR_RNDN);
+ mpfr_mul_si(xi, xi, (long int) i, MPFR_RNDN);
+ mpfr_div_si(xi, xi, (long int) 4 * N, MPFR_RNDN);
+ mpfr_div_si(xip1, xip1, (long int) 4 * N, MPFR_RNDN);
+ xip1_d = mpfr_get_d(xip1, MPFR_RNDN);
+ xip1_u = FLT64_TO_UINT64(xip1_d);
+ xi_initial = mpfr_get_d(xi, MPFR_RNDN);
+ xi_initial_u = FLT64_TO_UINT64(xi_initial);
+
+ while (1) {
+ xi_current_u = xi_initial_u + (sgn * r);
+ xi_current = UINT64_TO_FLT64(xi_current_u);
+ mpfr_set_d(xi, xi_current, MPFR_RNDN);
+
+ /* Test if cos(xi) has enough zeroes */
+ cos_fn(cos, xi, MPFR_RNDN);
+ t = mpfr_get_d(cos, MPFR_RNDN);
+ cos_u = FLT64_TO_UINT64(t);
+ e1 = EXP_OF_FLT64(t);
+ mpfr_sub_d(cos, cos, t, MPFR_RNDN);
+ t = mpfr_get_d(cos, MPFR_RNDN);
+ e2 = EXP_OF_FLT64(t);
+
+ if (e2 == -1024) {
+
+ /* Damn; this is too close to a subnormal. i = 0 or N? */
+ return -1;
+ }
+
+ if (e1 - e2 < (52 + min_leeway)) {
+ goto inc;
+ }
+
+ ml = xmax(min_leeway, e1 - e2 - 52);
+
+ /* Test if sin(xi) has enough zeroes */
+ sin_fn(sin, xi, MPFR_RNDN);
+ t = mpfr_get_d(sin, MPFR_RNDN);
+ sin_u = FLT64_TO_UINT64(t);
+ e1 = EXP_OF_FLT64(t);
+ mpfr_sub_d(sin, sin, t, MPFR_RNDN);
+ t = mpfr_get_d(sin, MPFR_RNDN);
+ e2 = EXP_OF_FLT64(t);
+
+ if (e2 == -1024) {
+
+ /* Damn; this is too close to a subnormal. i = 0 or N? */
+ return -1;
+ }
+
+ if (e1 - e2 < (52 + min_leeway)) {
+ goto inc;
+ }
+
+ ml = xmin(ml, e1 - e2 - 52);
+
+ /* Hurrah, this is valid */
+ if (ml > best_l) {
+ best_l = ml;
+ best_xi_u = xi_current_u;
+ best_cos_u = cos_u;
+ best_sin_u = sin_u;
+ best_r = sgn * r;
+
+ /* If this is super-good, don't bother finding more */
+ if (best_l >= perfect_leeway) {
+ break;
+ }
+ }
+
+inc:
+
+ /* Increment */
+ sgn *= -1;
+
+ if (sgn < 0) {
+ r++;
+ } else if (r > (1 << 29) ||
+ xi_current_u > xip1_u) {
+ /*
+ This is taking too long, give up looking
+ for perfection and take the best we've
+ got. A sweep of 1 << 28 finishes in ~60
+ hrs on my personal machine as I write
+ this.
+ */
+ break;
+ }
+ }
+
+ end = time(0);
+
+ if (best_l > min_leeway) {
+ printf(
+ "(%#018lx, %#018lx, %#018lx), /* i = %03d, l = %02d, r = %010ld, t = %ld */ \n",
+ best_xi_u, best_cos_u, best_sin_u, i, best_l, best_r,
+ end -
+ start);
+
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+static void usage(void)
+{
+ printf("generate-triples-for-GB91\n");
+ printf(" [-i start_idx]\n");
+ printf(" [-j end_idx]\n");
+ printf(" -f sin|tan\n");
+}
+
+int main(int argc, char **argv)
+{
+ int c = 0;
+ long i_start_arg = 1;
+ long i_end_arg = N;
+ int i_start = 1;
+ int i_end = N;
+ mpfr_fn sin_fn = 0;
+ mpfr_fn cos_fn = 0;
+
+ for (int k = 0; k < argc; ++k) {
+ printf("%s ", argv[k]);
+ }
+
+ printf("\n");
+
+ while ((c = getopt(argc, argv, "i:j:f:")) != -1) {
+ switch (c) {
+ case 'i':
+ errno = 0;
+ i_start_arg = strtoll(optarg, 0, 0);
+
+ if (errno) {
+ fprintf(stderr, "bad start index %s\n", optarg);
+
+ return 1;
+ }
+
+ break;
+ case 'j':
+ errno = 0;
+ i_end_arg = strtoll(optarg, 0, 0);
+
+ if (errno) {
+ fprintf(stderr, "bad end index %s\n", optarg);
+
+ return 1;
+ }
+
+ break;
+ case 'f':
+
+ if (!strcmp(optarg, "sin")) {
+ sin_fn = mpfr_sin;
+ cos_fn = mpfr_cos;
+ } else if (!strcmp(optarg, "tan")) {
+ sin_fn = mpfr_tan;
+ cos_fn = mpfr_cot;
+ } else {
+ fprintf(stderr, "unknown function %s\n",
+ optarg);
+
+ return 1;
+ }
+
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (i_start_arg <= 0 ||
+ i_end_arg > N) {
+ printf("truncating start to (0, %d]\n", N);
+ i_start_arg = xmin(xmax(i_start_arg, 1), N);
+ }
+
+ if (i_end_arg <= 0 ||
+ i_end_arg > N) {
+ printf("truncating end to (0, %d]\n", N);
+ i_end_arg = xmin(xmax(i_end_arg, 1), N);
+ }
+
+ i_start = i_start_arg;
+ i_end = i_end_arg;
+
+ if (!sin_fn ||
+ !cos_fn) {
+ fprintf(stderr, "-f required\n");
+
+ return 1;
+ }
+
+ for (int i = i_start; i <= i_end; ++i) {
+ if (find_triple_64(i, 11, 20, sin_fn, cos_fn) < 0) {
+ /*
+ This indicates you should drop the range
+ limitations on r, re-run, and come back
+ in a week.
+ */
+ printf("CANNOT FIND SUITABLE CANDIDATE FOR i = %03d\n",
+ i);
+ }
+ }
+
+ return 0;
+}
diff --git a/lib/math/ancillary/pi-constants.c b/lib/math/ancillary/pi-constants.c
new file mode 100644
index 0000000..ff092a5
--- /dev/null
+++ b/lib/math/ancillary/pi-constants.c
@@ -0,0 +1,93 @@
+/* cc -o pi-constants pi-constants.c -lmpfr */
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <mpfr.h>
+
+#define FLT64_TO_UINT64(f) (*((uint64_t *) ((char *) &f)))
+#define UINT64_TO_FLT64(u) (*((double *) ((char *) &u)))
+
+int main(void)
+{
+ mpfr_t pi;
+ mpfr_t two_pi;
+ mpfr_t t;
+ mpfr_t t2;
+ mpfr_t perfect_n;
+ double d = 0;
+ uint64_t u = 0;
+
+ mpfr_init2(pi, 10000);
+ mpfr_init2(two_pi, 10000);
+ mpfr_init2(t, 10000);
+ mpfr_init2(t2, 10000);
+ mpfr_init2(perfect_n, 10000);
+ mpfr_const_pi(pi, MPFR_RNDN);
+ mpfr_mul_si(two_pi, pi, 2, MPFR_RNDN);
+
+ for (long e = 25; e <= 1023; e += 50) {
+ mpfr_set_si(t, e, MPFR_RNDN);
+ mpfr_exp2(t, t, MPFR_RNDN);
+ mpfr_fmod(t2, t, two_pi, MPFR_RNDN);
+ mpfr_set(t, t2, MPFR_RNDN);
+ d = mpfr_get_d(t, MPFR_RNDN);
+ u = FLT64_TO_UINT64(d);
+ printf("(%#018lx, ", u);
+ mpfr_set_d(t2, d, MPFR_RNDN);
+ mpfr_sub(t, t, t2, MPFR_RNDN);
+ d = mpfr_get_d(t, MPFR_RNDN);
+ u = FLT64_TO_UINT64(d);
+ printf("%#018lx, ", u);
+ mpfr_set_d(t2, d, MPFR_RNDN);
+ mpfr_sub(t, t, t2, MPFR_RNDN);
+ d = mpfr_get_d(t, MPFR_RNDN);
+ u = FLT64_TO_UINT64(d);
+ printf("%#018lx), /* 2^%ld mod 2pi */\n", u, e);
+ }
+
+ printf("\n");
+ printf("1000 bits of pi/2:\n");
+ mpfr_const_pi(pi, MPFR_RNDN);
+ mpfr_div_si(pi, pi, 2, MPFR_RNDN);
+
+ for (int bits_obtained = 0; bits_obtained < 1000; bits_obtained += 53) {
+ d = mpfr_get_d(pi, MPFR_RNDN);
+ u = FLT64_TO_UINT64(d);
+ printf("%#018lx\n", u);
+ mpfr_set_d(t, d, MPFR_RNDN);
+ mpfr_sub(pi, pi, t, MPFR_RNDN);
+ }
+
+ printf("\n");
+ printf("1000 bits of pi/4:\n");
+ mpfr_const_pi(pi, MPFR_RNDN);
+ mpfr_div_si(pi, pi, 4, MPFR_RNDN);
+
+ for (int bits_obtained = 0; bits_obtained < 1000; bits_obtained += 53) {
+ d = mpfr_get_d(pi, MPFR_RNDN);
+ u = FLT64_TO_UINT64(d);
+ printf("%#018lx\n", u);
+ mpfr_set_d(t, d, MPFR_RNDN);
+ mpfr_sub(pi, pi, t, MPFR_RNDN);
+ }
+
+ printf("\n");
+ printf("Pre-computed 2/pi:\n");
+ mpfr_const_pi(pi, MPFR_RNDN);
+ mpfr_set_si(t, 2, MPFR_RNDN);
+ mpfr_div(pi, t, pi, MPFR_RNDN);
+ d = mpfr_get_d(pi, MPFR_RNDN);
+ u = FLT64_TO_UINT64(d);
+ printf("%#018lx\n", u);
+ printf("\n");
+ printf("Pre-computed 1/(pi/1024):\n");
+ mpfr_const_pi(pi, MPFR_RNDN);
+ mpfr_set_si(t, 1024, MPFR_RNDN);
+ mpfr_div(pi, t, pi, MPFR_RNDN);
+ d = mpfr_get_d(pi, MPFR_RNDN);
+ u = FLT64_TO_UINT64(d);
+ printf("%#018lx\n", u);
+
+ return 0;
+}
diff --git a/lib/math/ancillary/ulp.gp b/lib/math/ancillary/ulp.gp
new file mode 100644
index 0000000..bf86208
--- /dev/null
+++ b/lib/math/ancillary/ulp.gp
@@ -0,0 +1,27 @@
+/*
+ I always end up need this for debugging
+ */
+
+{ ulp32(a) = my(aa, q);
+ aa = abs(a);
+ if(aa < 2^(-150),return(2^(-126 - 23)),);
+ q = floor(log(aa)/log(2));
+ if(q < -126,q=-126,);
+ return(2^(q-23));
+}
+
+{ ulp64(a) = my(aa, q);
+ aa = abs(a);
+ if(aa < 2^(-2000),return(2^(-1022 - 52)),);
+ q = floor(log(aa)/log(2));
+ if(q < -1022,q=-1022,);
+ return(2^(q-52));
+}
+
+{ err32(x, y) =
+ return(abs(x-y)/ulp32(x));
+}
+
+{ err64(x, y) =
+ return(abs(x-y)/ulp64(x));
+}
diff --git a/lib/math/atan-impl.myr b/lib/math/atan-impl.myr
new file mode 100644
index 0000000..78f235d
--- /dev/null
+++ b/lib/math/atan-impl.myr
@@ -0,0 +1,488 @@
+use std
+
+use "fpmath"
+
+use "sin-impl"
+use "util"
+
+/*
+ As with other trig functions, we follow [GB91] here. This is a
+ relatively straight-forward "tabulate and approximate by local
+ polynomials" approach. The (relative) lack of sophistication can
+ be seen by the huge number of constants required. The only fancy
+ trick is that
+
+ atan(x) + atan(1/x) = pi/2
+
+ which allows quick reduction to the range [-1, 1].
+
+ The Highly Accurate Table approach of [GB91] then appears in the
+ constants for the polynomial approximations. This acts as a few
+ bits of insurance against subtractive cancellation of the first
+ few terms of the polynomial approximations.
+
+ See files generate-atan-tuples-for-GB91.c, and
+ generate-minimax-by-Remez.gp for where the constants come from.
+ */
+pkg math =
+ pkglocal const atan32 : (x : flt32 -> flt32)
+ pkglocal const atan64 : (x : flt64 -> flt64)
+
+ pkglocal const atan232 : (y : flt32, x : flt32 -> flt32)
+ pkglocal const atan264 : (y : flt64, x : flt64 -> flt64)
+;;
+
+/*
+ Coefficients for p(x), where arctan(x) is approximated in degree
+ 13 by x*p(x^2).
+ */
+const atan_coeffs : uint64[7] = [
+ 0x3ff0000000000000,
+ 0xbfd5555555555555,
+ 0x3fc9999999999128,
+ 0xbfc2492493302077,
+ 0x3fbc71bd4ec2b9ae,
+ 0xbfb750eefa902315,
+ 0x3fb226324f0122e0,
+]
+
+/*
+ Coefficients for degree 5 polynomial approximations of arctan(x)
+ on intervals of width 1/256, roughly filling the [0, 1] space.
+ Because of the slight shifting of the pivot xi variables by up
+ to 2^26 ulps, there are tiny gaps, of up to size 2*2^(-53 + 26)
+ = 2^-26, on which the minimax guarantees do not apply. This is
+ not expected to be a significant issue.
+
+ Note that the first few entries aren't actually used.
+ */
+const C : (uint64, uint64, uint64, uint64, uint64, uint64, uint64)[257] = [
+ (0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000),
+ (0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000),
+ (0x3f80000001f83362, 0x3f7fffd559ac119a, 0x3fefff8001ffd883, 0xbf7fff0009e129ba, 0xbfd55355689e3241, 0x3f7ffdcf9a784a0c, 0x3fc995f4bed14247),
+ (0x3f87ffffff37811c, 0x3f87ffb800bc4b32, 0x3feffee00a1fb7b6, 0xbf87fe5015f2c284, 0xbfd550d5b96bfd9c, 0x3f87fc076a4fd120, 0x3fc98fc8b892a7b2),
+ (0x3f8fffffffc1eade, 0x3f8fff555b7d615a, 0x3feffe001ffe07ed, 0xbf8ffc005fa9b585, 0xbfd54d5693a075a5, 0x3f8ff657f61b9dd7, 0x3fc9869ee5cadbbd),
+ (0x3f93fffffe0e9f70, 0x3f93ff595d2776fd, 0x3feffce04e18fad7, 0xbf93fc18907a7f60, 0xbfd548d85eda9a6c, 0x3f93f61bf2de1558, 0x3fc97abe5d2bf700),
+ (0x3f97ffffffec7ecd, 0x3f97fee01836ddc0, 0x3feffb80a1e94266, 0xbf97f9416c2a7c14, 0xbfd5435ba3ccdd9c, 0x3f97eefc49b67b90, 0x3fc96b9f97155700),
+ (0x3f9c000002043a2b, 0x3f9bfe36e12cee80, 0x3feff9e12be5b739, 0xbf9bf54b150fb215, 0xbfd53ce105d2bb59, 0x3f9be5104edd8ee0, 0x3fc9598af87d1c00),
+ (0x3fa00000022e99a5, 0x3f9ffd55c0059267, 0x3feff801ff7df250, 0xbf9ff006025a63b6, 0xbfd5356946bb4ad9, 0x3f9fd7e22b0f4cc0, 0x3fc94484728b9380),
+ (0x3fa200000058e037, 0x3fa1fe1a5c8788b8, 0x3feff5e3331c6ffa, 0xbfa1f4a166090962, 0xbfd52cf545c2c2d0, 0x3fa1e37e5b434560, 0x3fc92c909999c980),
+ (0x3fa3fffffe4fe9f8, 0x3fa3fd65efba5c58, 0x3feff384e01a9106, 0xbfa3f0692146d108, 0xbfd5238601c8043d, 0x3fa3d95263db398f, 0x3fc9116fa0344708),
+ (0x3fa5fffffd7d0899, 0x3fa5fc89a378731e, 0x3feff0e722c403ee, 0xbfa5eb42b2d510f3, 0xbfd5191c8f45f7ad, 0x3fa5cc83e91b1137, 0x3fc8f3b166e52899),
+ (0x3fa7fffffff47d57, 0x3fa7fb8184255df7, 0x3fefee0a1a5143a8, 0xbfa7e516b6ddd4cd, 0xbfd50dba2684300b, 0x3fa7bd2cb777bd9a, 0x3fc8d317587dd676),
+ (0x3fa9fffffc2c23a9, 0x3fa9fa4994982dbb, 0x3fefeaede8f76900, 0xbfa9ddcddc883e9e, 0xbfd501601a419c50, 0x3fa9ab141ce95d64, 0x3fc8afa883cd8ab3),
+ (0x3fabfffffd0f0cbd, 0x3fabf8ddee4b0fc3, 0x3fefe792b3b38c6e, 0xbfabd5510839a77b, 0xbfd4f40fd8f0a91b, 0x3fab9602138cdbe9, 0x3fc8896c8f290450),
+ (0x3fae000000ffdc11, 0x3fadf73aa09e1477, 0x3fefe3f8a27756be, 0xbfadcb8935b8415f, 0xbfd4e5cae94695f1, 0x3fad7d32cd68db68, 0x3fc860b09a86ff00),
+ (0x3fb0000002ad31a4, 0x3faff55bbc820c23, 0x3fefe01fe0154058, 0xbfafc05f85dd78f3, 0xbfd4d692f8f482ea, 0x3faf617ebeb00bd8, 0x3fc834f3a9931b00),
+ (0x3fb100000188824f, 0x3fb0f99ea8a41bcd, 0x3fefdc089a435d4f, 0xbfb0d9de9f1c8568, 0xbfd4c669ca505107, 0x3fb0a166428a466f, 0x3fc8063f36448e35),
+ (0x3fb20000009bc757, 0x3fb1f86dbfa33080, 0x3fefd7b3016f7596, 0xbfb1d2c5eedc6080, 0xbfd4b55130ac22d8, 0x3fb18fd8c2bb0ee4, 0x3fc7d526db38f039),
+ (0x3fb3000024c171ae, 0x3fb2f7195618302f, 0x3fefd31f48358ab4, 0xbfb2cadaa8a559c1, 0xbfd4a34b240cff5f, 0x3fb27c47c2ad49a6, 0x3fc7a1702e4d8d69),
+ (0x3fb4000000068d16, 0x3fb3f59f0e82d887, 0x3fefce4da6ab73c7, 0xbfb3c2114d1e9dc2, 0xbfd49059bce708bf, 0x3fb36699156cde19, 0x3fc76b264479966d),
+ (0x3fb4ffffffdaf503, 0x3fb4f3fd674dc761, 0x3fefc93e53a2ae2c, 0xbfb4b85f4a8c15c6, 0xbfd47c7f1d6673be, 0x3fb44eb3ca7ef180, 0x3fc7325487c15892),
+ (0x3fb60000005453f7, 0x3fb5f23250268d66, 0x3fefc3f18b52cb50, 0xbfb5adb99d22e5c3, 0xbfd467bd8b4de796, 0x3fb5347ee36faada, 0x3fc6f706fed2d6e1),
+ (0x3fb7000000057dfc, 0x3fb6f03bdcefbdc7, 0x3fefbe678c078564, 0xbfb6a2157e35172d, 0xbfd452176286cdb9, 0x3fb617e1fe0c1388, 0x3fc6b94a23d2539a),
+ (0x3fb7fffffe7c548b, 0x3fb7ee182482a642, 0x3fefb8a096b5e8ba, 0xbfb7956844e7782f, 0xbfd43b8f17095f3f, 0x3fb6f8c53a2a94dc, 0x3fc6792ae7109ee3),
+ (0x3fb9000000399ffa, 0x3fb8ebc544b20fc7, 0x3fefb29ceedb75bc, 0xbfb887a76b0a59ee, 0xbfd4242733f64aa6, 0x3fb7d7114183708e, 0x3fc636b6a9a36669),
+ (0x3fba000000503ea3, 0x3fb9e941541f49d9, 0x3fefac5cdacc38c0, 0xbfb978c880930264, 0xbfd40be25c5cc9fc, 0x3fb8b2af3f76ad1f, 0x3fc5f1fb3caa1a83),
+ (0x3fbafffffefd604e, 0x3fbae68a70c75b65, 0x3fefa5e0a3559ff9, 0xbfba68c13a88c726, 0xbfd3f2c349624f3b, 0x3fb98b88f1f263f7, 0x3fc5ab06d90ee046),
+ (0x3fbc0000027ada67, 0x3fbbe39ec0e260e9, 0x3fef9f2893aa9d84, 0xbfbb578774c87524, 0xbfd3d8ccc2282c36, 0x3fba6104a32547f8, 0x3fc5622c6b5cdc00),
+ (0x3fbcfffffe2aae0e, 0x3fbce07c5a6d6a81, 0x3fef9834fa03c879, 0xbfbc45111ba09bf8, 0xbfd3be01baf8bc7a, 0x3fbb3410872e3840, 0x3fc516f2437f8a00),
+ (0x3fbe000001dab907, 0x3fbddd2171f3051f, 0x3fef910626538a70, 0xbfbd3154591dc7f9, 0xbfd3a2652c4d0b90, 0x3fbc04a65d78b257, 0x3fc4c967e0fd9461),
+ (0x3fbeffffffa53a79, 0x3fbed98c21368e87, 0x3fef899c6bd1f677, 0xbfbe1c47601d7e8f, 0xbfd385fa159fbfca, 0x3fbcd19beca90b8c, 0x3fc47a256b36705a),
+ (0x3fc000000f142d5a, 0x3fbfd5bab85dc3d0, 0x3fef81f81e981366, 0xbfbf05e0b86e7006, 0xbfd368c39b4690e4, 0x3fbd9b66d4a9b7f8, 0x3fc428f6923eacde),
+ (0x3fc07ffffcdf1951, 0x3fc068d5810d5bd4, 0x3fef7a19994da3f1, 0xbfbfee169210a60e, 0xbfd34ac4fcd8967e, 0x3fbe61f4253945ff, 0x3fc3d5ebc0588a79),
+ (0x3fc1000002aa39f4, 0x3fc0e6adcf926f2e, 0x3fef7201326bccc1, 0xbfc06a70037899d2, 0xbfd32c01735432c2, 0x3fbf25323b0051d6, 0x3fc38115615f521e),
+ (0x3fc18000001fea3b, 0x3fc1646541255ca1, 0x3fef69af483d2f52, 0xbfc0dd19dadefc1f, 0xbfd30c7c5e62ba59, 0x3fbfe50f8f51460e, 0x3fc32a845cd3502a),
+ (0x3fc1fffffe8823bc, 0x3fc1e1faf993a4c8, 0x3fef612438bab95f, 0xbfc14f04587e458a, 0xbfd2ec3924d36b33, 0x3fc050bdc309e406, 0x3fc2d249bb3c927d),
+ (0x3fc27ffffc9337b0, 0x3fc25f6e13bf7bd7, 0x3fef586064c338f8, 0xbfc1c02b10b43292, 0xbfd2cb3b4055087c, 0x3fc0ad330fd21a61, 0x3fc27876c32ba8fe),
+ (0x3fc2fffffdc6bf7b, 0x3fc2dcbdb0cea731, 0x3fef4f642f0fe16c, 0xbfc23089b1286391, 0xbfd2a98638fb0d98, 0x3fc107e008ee7ac7, 0x3fc21d1ce9880c12),
+ (0x3fc3800000c2fb33, 0x3fc359e8eeaa28a7, 0x3fef462ffcd452fe, 0xbfc2a01bf88e9498, 0xbfd2871da71092b5, 0x3fc160bd5de46b03, 0x3fc1c04dd31f291d),
+ (0x3fc4000000d8f876, 0x3fc3d6eee99a2f23, 0x3fef3cc435a04a95, 0xbfc30eddb88397f1, 0xbfd26405321912f4, 0x3fc1b7c415010900, 0x3fc1621b4e780103),
+ (0x3fc48000000de728, 0x3fc453cec616b8c4, 0x3fef332142989e5d, 0xbfc37ccade8e0ca6, 0xbfd240408d609878, 0x3fc20ced92d9e381, 0x3fc1029747314f3d),
+ (0x3fc4ffffff0e7ab0, 0x3fc4d087a8ef1e63, 0x3fef29478efdfd42, 0xbfc3e9df6d60cba2, 0xbfd21bd3796a9ce3, 0x3fc26033953388f3, 0x3fc0a1d3c688050b),
+ (0x3fc57ffffc7377dc, 0x3fc54d18b69dbcdb, 0x3fef1f3788337a10, 0xbfc456177c933fea, 0xbfd1f6c1c37d2372, 0x3fc2b190331b73b1, 0x3fc03fe2eebeac95),
+ (0x3fc5ffffff069c34, 0x3fc5c9811d4c8683, 0x3fef14f19ce2618f, 0xbfc4c16f418df8ce, 0xbfd1d10f41eb9d16, 0x3fc300fde3b8367f, 0x3fbfb9addc08f782),
+ (0x3fc67ffffe3dace2, 0x3fc645bffdfed6b5, 0x3fef0a763ecf8d44, 0xbfc52be2fc09f17c, 0xbfd1aabfda46e407, 0x3fc34e776fde19e4, 0x3fbef18416e713f0),
+ (0x3fc6fffffe88eb88, 0x3fc6c1d4881ddaae, 0x3feeffc5e08c9b3d, 0xbfc5956f0e16385c, 0xbfd183d77857c309, 0x3fc399f803dfed36, 0x3fbe276d18332ee6),
+ (0x3fc77fffffbe804c, 0x3fc73dbde868750f, 0x3feef4e0f6d3f675, 0xbfc5fe0fed8db872, 0xbfd15c5a1291701e, 0x3fc3e37b24a92e8d, 0x3fbd5b8d76fa3afe),
+ (0x3fc7fffffc35d1d7, 0x3fc7b97b48251fed, 0x3feee9c7f89a7009, 0xbfc665c223be9680, 0xbfd1344ba9bdc601, 0x3fc42afcaf9262f8, 0x3fbc8e09cf02b355),
+ (0x3fc8800000a239dc, 0x3fc8350be43569e9, 0x3feede7b5cee6e27, 0xbfc6cc8260df5e45, 0xbfd10bb040c61a32, 0x3fc47078e771a1fe, 0x3fbbbf06908b1814),
+ (0x3fc90000027df7fa, 0x3fc8b06ee4ee22e7, 0x3feed2fb9e8bb11b, 0xbfc7324d5aa33c81, 0xbfd0e28be90da705, 0x3fc4b3ec5d56fe5c, 0x3fbaeea83980b70d),
+ (0x3fc9800002d31c2a, 0x3fc92ba37fbc8427, 0x3feec74938c8c904, 0xbfc7971fe886b4de, 0xbfd0b8e2b6d26476, 0x3fc4f55403930d95, 0x3fba1d131409b3ab),
+ (0x3fc9fffffcddfcfd, 0x3fc9a6a8e66a4ab7, 0x3feebb64a91453c7, 0xbfc7faf6f60a4367, 0xbfd08eb8c5fc4160, 0x3fc534ad23ed705c, 0x3fb94a6b493e7c18),
+ (0x3fca7fffff8b55ee, 0x3fca217e5fa0a315, 0x3feeaf4e6c86c253, 0xbfc85dcf96a2bb00, 0xbfd0641230fa59ed, 0x3fc571f56b9c5c8a, 0x3fb876d4adfc5ac3),
+ (0x3fcafffffd414cf5, 0x3fca9c23189f6ea5, 0x3feea30704596a99, 0xbfc8bfa6e0659b60, 0xbfd038f3200791fd, 0x3fc5ad2ad32c8d7a, 0x3fb7a273097623dd),
+ (0x3fcb7ffffe765c40, 0x3fcb169655d5327a, 0x3fee968ef1197db7, 0xbfc9207a13521ece, 0xbfd00d5fb7ad7b56, 0x3fc5e64bb64ff60a, 0x3fb6cd69b8f6e504),
+ (0x3fcc000000abc4ff, 0x3fcb90d753364d8b, 0x3fee89e6b5bbd593, 0xbfc980467fefa357, 0xbfcfc2b8463315e8, 0x3fc61d56c362a69c, 0x3fb5f7dbddfa3d81),
+ (0x3fcc7fffffda3e36, 0x3fcc0ae54d528b2b, 0x3fee7d0ed6b1420d, 0xbfc9df098ebe3358, 0xbfcf69d920a9cf8c, 0x3fc6524aff4b3332, 0x3fb521ec46597687),
+ (0x3fccfffffff895a3, 0x3fcc84bf8a6d20c0, 0x3fee7007d8e2c735, 0xbfca3cc0c754ee99, 0xbfcf102a3a0ed514, 0x3fc6849fe6a47028, 0x3fb44c40e5060800),
+ (0x3fcd800002e87b67, 0x3fccfe6550e0425b, 0x3fee62d242a27514, 0xbfca9969c88af14c, 0xbfceb5b42bd588bd, 0x3fc6b562a8259298, 0x3fb375f45f6e7800),
+ (0x3fce0000007c3fa1, 0x3fcd77d5df961e94, 0x3fee556e9c79c2d5, 0xbfcaf502433f724e, 0xbfce5a7f5c4a951a, 0x3fc6e49a1c28bf4d, 0x3fb29f2902804d52),
+ (0x3fce8000018c9844, 0x3fcdf11087c3e708, 0x3fee47dd6e4b27d1, 0xbfcb4f880dd31913, 0xbfcdfe93e4e6b22c, 0x3fc71130057e8a97, 0x3fb1c906570f0d02),
+ (0x3fcf0000011971a7, 0x3fce6a148fa0c60b, 0x3fee3a1f427d6bd1, 0xbfcba8f90d9b0ad8, 0xbfcda1fa4468eaa4, 0x3fc73baf35ad6fbc, 0x3fb0f329b1d71c3b),
+ (0x3fcf7fffffae07f0, 0x3fcee2e144d04e42, 0x3fee2c34a3e84e63, 0xbfcc015345086350, 0xbfcd44bad359c3db, 0x3fc76418a7b6f7b9, 0x3fb01db333c0e897),
+ (0x3fd0000003c8cfe7, 0x3fcf5b76004c2654, 0x3fee1e1e1d47929d, 0xbfcc5894d623e3cd, 0xbfcce6ddde8c38ac, 0x3fc78a6da87d134e, 0x3fae9184c911f786),
+ (0x3fd0400003e0f35b, 0x3fcfd3d2038a7c77, 0x3fee0fdc3d4407ce, 0xbfccaebbe9509c5a, 0xbfcc886bc113543a, 0x3fc7aeafca638093, 0x3face8ecd45e8a40),
+ (0x3fd08000032761cb, 0x3fd025fa53fb759e, 0x3fee016f90dfb3ea, 0xbfcd03c6cfa46b13, 0xbfcc296cbe2c693a, 0x3fc7d0e0f31c6cc9, 0x3fab41db5e90e29b),
+ (0x3fd0bffffd1bbae5, 0x3fd061ee9d889212, 0x3fedf2d8a7a1ddae, 0xbfcd57b3e8faad27, 0xbfcbc9e91c1a6c4e, 0x3fc7f1034f0598ee, 0x3fa99c8c39820b78),
+ (0x3fd0fffffda7b6b5, 0x3fd09dc595a7aa29, 0x3fede4180e167b90, 0xbfcdaa81c348841c, 0xbfcb69e8ff46f764, 0x3fc80f195bae93a5, 0x3fa7f9398b9c2669),
+ (0x3fd1400002df4b88, 0x3fd0d97ee7b72b9e, 0x3fedd52e54a32e74, 0xbfcdfc2ef2b6e464, 0xbfcb0974963c35fd, 0x3fc82b25d6d98068, 0x3fa6581c8d5f3f91),
+ (0x3fd18000007fef6f, 0x3fd1151a369b3ad0, 0x3fedc61c0ed3a8b3, 0xbfce4cba17180e39, 0xbfcaa89413bba312, 0x3fc8452bc0669ca5, 0x3fa4b96d672b82b0),
+ (0x3fd1bffffd750e56, 0x3fd1509738407215, 0x3fedb6e1cc5e56bc, 0xbfce9c22010b8ec2, 0xbfca474f8102a8b8, 0x3fc85d2e64dd2c0a, 0x3fa31d6268e4aace),
+ (0x3fd2000003ca0de4, 0x3fd18bf5a68ee3b3, 0x3feda7801cbd34e3, 0xbfceea659ca4de3f, 0xbfc9e5aed4b250a4, 0x3fc87331536725ba, 0x3fa184306c2c9ae3),
+ (0x3fd2400002036234, 0x3fd1c735230a790e, 0x3fed97f79692b2ec, 0xbfcf3783cc4f0a97, 0xbfc983ba20bb093a, 0x3fc887385275eb6c, 0x3f9fdc171f589263),
+ (0x3fd27fffffdfc6c4, 0x3fd2025567c6bf7b, 0x3fed8848caf35b89, 0xbfcf837ba93cd7cf, 0xbfc9217940ea8bf9, 0x3fc89947704663ae, 0x3f9cb64bbd3e6b8a),
+ (0x3fd2bffffed8628c, 0x3fd23d562a27d0d0, 0x3fed78744ce26842, 0xbfcfce4c5c6a3c83, 0xbfc8bef409ec6215, 0x3fc8a962f5b80e45, 0x3f999760297b6b81),
+ (0x3fd2fffffd9ac75c, 0x3fd278371e2461ca, 0x3fed687ab0799710, 0xbfd00bfa9178bb91, 0xbfc85c32431fb914, 0x3fc8b78f661ebb08, 0x3f967fb3033009f6),
+ (0x3fd33ffffe5a26b1, 0x3fd2b2f7fc188619, 0x3fed585c894c7f41, 0xbfd0303aaa9777f1, 0xbfc7f93b9bf3c072, 0x3fc8c3d17ee8c3ea, 0x3f936f9fb86fef86),
+ (0x3fd37ffffec1b143, 0x3fd2ed98795ef86b, 0x3fed481a6c610cdd, 0xbfd053e62ea7a861, 0xbfc79617b7755508, 0x3fc8ce2e3400f439, 0x3f90677ee0a18b9f),
+ (0x3fd3c000003531d6, 0x3fd328184fe61b16, 0x3fed37b4ee113238, 0xbfd076fce03b5660, 0xbfc732ce1eb44786, 0x3fc8d6aaaf852261, 0x3f8acf4b9ec59d46),
+ (0x3fd40000014b4f1a, 0x3fd362773835c112, 0x3fed272ca3a66d1e, 0xbfd0997e8b99570c, 0xbfc6cf664a09a01d, 0x3fc8dd4c4eb42bdc, 0x3f84e0cdba8b378e),
+ (0x3fd44000002a76ab, 0x3fd39cb4eb9caee5, 0x3fed168222b7fc82, 0xbfd0bb6b07fba0fc, 0xbfc66be79ca66f40, 0x3fc8e218a084b591, 0x3f7e0845137e79a8),
+ (0x3fd4800002f0ac40, 0x3fd3d6d12bd1b3a2, 0x3fed05b5fee9e15f, 0xbfd0dcc23bca443d, 0xbfc6085956bc3e59, 0x3fc8e5156401dd74, 0x3f7273c2f54049f0),
+ (0x3fd4c0000398c9cd, 0x3fd410cbb0adb15b, 0x3fecf4c8cf23b4a7, 0xbfd0fd8411e92f76, 0xbfc5a4c2b406c729, 0x3fc8e648851b1c4d, 0x3f5c14e6132786eb),
+ (0x3fd5000000d2b414, 0x3fd44aa43780e819, 0x3fece3bb2923ae3c, 0xbfd11db082866efc, 0xbfc5412ad0e80c83, 0x3fc8e5b81bf6df1b, 0xbf51090196534ac1),
+ (0x3fd5400000d97ce4, 0x3fd4845a8594a669, 0x3fecd28da0a43a9e, 0xbfd13d479421cbf8, 0xbfc4dd98a5510238, 0x3fc8e36a6a29d4e3, 0xbf6ec343ef3f561f),
+ (0x3fd580000083fbd8, 0x3fd4bdee58df2a56, 0x3fecc140cbd716cb, 0xbfd15c4952ee964a, 0xbfc47a131e7a5564, 0x3fc8df65d90205c1, 0xbf7a57ed6c513309),
+ (0x3fd5c0000018a579, 0x3fd4f75f739cb1b0, 0x3fecafd53ff0fa50, 0xbfd17ab5d724d2b5, 0xbfc416a10a536704, 0x3fc8d9b0f77c529d, 0xbf8292192630d9e3),
+ (0x3fd5fffffe824f64, 0x3fd530ad97fc72a4, 0x3fec9e4b92687e96, 0xbfd1988d427b2e5c, 0xbfc3b3491e63dea3, 0x3fc8d25278721643, 0xbf87e2c648beb0b9),
+ (0x3fd63fffff3a13ec, 0x3fd569d88d6ab865, 0x3fec8ca4574c4517, 0xbfd1b5cfc2bbdc5f, 0xbfc35011ee14077d, 0x3fc8c9512fe43c8d, 0xbf8d1d92ca8d1ca8),
+ (0x3fd67fffff856d0c, 0x3fd5a2e016f0f80e, 0x3fec7ae0242da62e, 0xbfd1d27d8ca121f4, 0xbfc2ed01fadabe3b, 0x3fc8beb4125cd6e8, 0xbf91210c8ebe0b06),
+ (0x3fd6bffffe40da9f, 0x3fd5dbc3fa317aac, 0x3fec68ff8df60300, 0xbfd1ee96df565754, 0xbfc28a1fa7a73942, 0x3fc8b282322cefbb, 0xbf93a7fd0b2cabf4),
+ (0x3fd7000000850018, 0x3fd61484037f999c, 0x3fec5703278a5d34, 0xbfd20a1c0634f09a, 0xbfc22771314a492b, 0x3fc8a4c2bc3a467a, 0xbf96236e9d5027af),
+ (0x3fd74000032958c9, 0x3fd64d1ff900be97, 0x3fec44eb85d056c0, 0xbfd2250d52489ac7, 0xbfc1c4fcc46a8260, 0x3fc8957cf8f5c2e9, 0xbf989337c85e3c31),
+ (0x3fd7800001f42af4, 0x3fd68597a1186566, 0x3fec32b93da9d11a, 0xbfd23f6b1d709665, 0xbfc162c871f645a7, 0x3fc884b849764b83, 0xbf9af73214886454),
+ (0x3fd7c000020a1e4e, 0x3fd6bdeacb96c300, 0x3fec206ce0efcd02, 0xbfd25935ce6835b1, 0xbfc100da1ecc4e75, 0x3fc8727c22994bf1, 0xbf9d4f3a6910e69e),
+ (0x3fd7ffffef4b942f, 0x3fd6f619333fae00, 0x3fec0e0708525d88, 0xbfd2726dcab05fb8, 0xbfc09f37b85d7a7c, 0x3fc85ed0151cb6ca, 0xbf9f9b2fc00147e3),
+ (0x3fd84000000e1b33, 0x3fd72e22d546f87e, 0x3febfb88369c3125, 0xbfd28b139d7910a3, 0xbfc03de69c5c8f9e, 0x3fc849bbac9f2806, 0xbfa0ed7b56485e08),
+ (0x3fd88000025752bc, 0x3fd76607548c2d8b, 0x3febe8f10c2b5b27, 0xbfd2a327b1acf5d8, 0xbfbfb9d951a88eda, 0x3fc83346a668b00a, 0xbfa207397a81490f),
+ (0x3fd8c000030eff1b, 0x3fd79dc68c3a3aab, 0x3febd64216bfa88e, 0xbfd2baaa9313a44f, 0xbfbef89e7a4b8711, 0x3fc81b78bad06d46, 0xbfa31ac6be01f8d3),
+ (0x3fd9000001646b31, 0x3fd7d5604c98efba, 0x3febc37be783ad55, 0xbfd2d19cd038f707, 0xbfbe38275d0e35cf, 0x3fc80259b3ae8770, 0xbfa42818bdbf14e0),
+ (0x3fd93fffff640ee2, 0x3fd80cd4698dc1aa, 0x3febb09f0e37b37e, 0xbfd2e7ff0093da3c, 0xbfbd787e5d4923f4, 0x3fc7e7f16509a908, 0xbfa52f26866c2b8d),
+ (0x3fd97ffffdf78491, 0x3fd84422b71e6925, 0x3feb9dac1a54d801, 0xbfd2fdd1c2de7d30, 0xbfbcb9ada303e230, 0x3fc7cc47acdded50, 0xbfa62fe87d13430b),
+ (0x3fd9c00001035746, 0x3fd87b4b0cfdf415, 0x3feb8aa399cd687a, 0xbfd31315be4d8968, 0xbfbbfbbf0e6d9a58, 0x3fc7af646fa7c2bc, 0xbfa72a586923b8a6),
+ (0x3fda000000ffe340, 0x3fd8b24d3a25bea9, 0x3feb77861d504643, 0xbfd327cb9da69e4a, 0xbfbb3ebc624b5dfa, 0x3fc7914f9d605de4, 0xbfa81e71361d0a89),
+ (0x3fda400000100095, 0x3fd8e92917038092, 0x3feb645431fd7c4a, 0xbfd33bf4161c483a, 0xbfba82af05253ec0, 0x3fc7721126702f2c, 0xbfa90c2f3fa9f863),
+ (0x3fda7fffffe222b8, 0x3fd91fde7cb74808, 0x3feb510e645d7829, 0xbfd34f8fe3a4d491, 0xbfb9c7a01f5fd899, 0x3fc751b0fe6efdd3, 0xbfa9f39021b08a68),
+ (0x3fdabffffed95691, 0x3fd9566d42a871d9, 0x3feb3db541373664, 0xbfd3629fc80afeac, 0xbfb90d98369c90d3, 0x3fc72f44fbe48540, 0xbfaad3a70aefe000),
+ (0x3fdb0000004583fc, 0x3fd98cd545886e51, 0x3feb2a49530ec627, 0xbfd375248cf7a4e4, 0xbfb854a0c7dada4b, 0x3fc70cb749bdaee0, 0xbfabae4bec818000),
+ (0x3fdb3ffffc2050fd, 0x3fd9c316597e02fc, 0x3feb16cb270de8bf, 0xbfd3871eff703673, 0xbfb79cc1ecb3e3f8, 0x3fc6e91fcc119ab0, 0xbfac829399932000),
+ (0x3fdb800003ac36ec, 0x3fd9f93069302d4b, 0x3feb033b41d7016a, 0xbfd3988ff9a15401, 0xbfb6e60411f5449e, 0x3fc6c57ea4699da3, 0xbfad516a451d0115),
+ (0x3fdbc00003fd5a1f, 0x3fda2f2341ba0bb7, 0x3feaef9a323ef369, 0xbfd3a97851fa0ac4, 0xbfb6306e7ee85ebe, 0x3fc69fed5d8e28b3, 0xbfae18ff332609af),
+ (0x3fdc0000039b3621, 0x3fda64eec6d30865, 0x3feadbe87e780073, 0xbfd3b9d8ebacdb82, 0xbfb57c08cdfb1b0a, 0x3fc6786dcb5c4d60, 0xbfaed958fd6d6000),
+ (0x3fdc400000a49b57, 0x3fda9a92d6285c97, 0x3feac826ae62f6c8, 0xbfd3c9b2ad31c963, 0xbfb4c8db56e0aecb, 0x3fc650fe5384d100, 0xbfaf944e806c6000),
+ (0x3fdc800001b0d7d2, 0x3fdad00f558b3c25, 0x3feab455465812ed, 0xbfd3d90684bfac31, 0xbfb416ecc8a08e19, 0x3fc628ac8bd8a750, 0xbfb0247eb73f7000),
+ (0x3fdcbfffff851ce5, 0x3fdb05642048528d, 0x3feaa074ce0698a1, 0xbfd3e7d5626ee3ae, 0xbfb366442a9359be, 0x3fc5ff804857c080, 0xbfb07bb6a4d84000),
+ (0x3fdcfffffe193401, 0x3fdb3a911c127cfd, 0x3fea8c85c8b1f3e1, 0xbfd3f6203e274e24, 0xbfb2b6e8269a6794, 0x3fc5d5814b5a6c10, 0xbfb0cfd354252000),
+ (0x3fdd400000235500, 0x3fdb6f962e90b921, 0x3fea7888b9051e04, 0xbfd403e8145c9461, 0xbfb208df2c5981f4, 0x3fc5aab74c19a030, 0xbfb120d984141000),
+ (0x3fdd80000286b038, 0x3fdba473399b82ae, 0x3fea647e227e11d3, 0xbfd4112de4ff3e7d, 0xbfb15c2f7dc2395e, 0x3fc57f29f8cba630, 0xbfb16ece5dded000),
+ (0x3fddc00001cef7fd, 0x3fdbd9281fcf3646, 0x3fea506687b5fab0, 0xbfd41df2b49c19d6, 0xbfb0b0df2042c910, 0x3fc552e0f214fba0, 0xbfb1b9b778d7e000),
+ (0x3fde0000004fc0e4, 0x3fdc0db4c9902cd8, 0x3fea3c42686ef7da, 0xbfd42a378d455f59, 0xbfb006f4457a3232, 0x3fc526eff3475f6d, 0xbfb2020c549fc7a9),
+ (0x3fde3fffff53e727, 0x3fdc42191f647305, 0x3fea281243b119b3, 0xbfd435fd7d3b143b, 0xbfaebce6fa454208, 0x3fc4f948161b503f, 0xbfb246f01632fcd7),
+ (0x3fde7ffffe4a0194, 0x3fdc7655094883b8, 0x3fea13d6980a5b5d, 0xbfd4414596489a02, 0xbfad6ec41b47fbf8, 0x3fc4c9eae58de1a0, 0xbfb2886a8dcd0000),
+ (0x3fdebfffff2d0b48, 0x3fdcaa687247ff28, 0x3fe9ff8fe22665c0, 0xbfd44c10ee631c8a, 0xbfac238be3c9f536, 0x3fc49afdfb9c85b0, 0xbfb2c764d01a0000),
+ (0x3fdefffffe2211e5, 0x3fdcde5341a8f7d7, 0x3fe9eb3e9f72a6e7, 0xbfd456609e708e83, 0xbfaadb471890deec, 0x3fc46b7a80ae40e0, 0xbfb303753a5c2000),
+ (0x3fdf400001e3314b, 0x3fdd1215683e1df1, 0x3fe9d6e348e546c7, 0xbfd46035c47f98ab, 0xbfa995fec6e063a5, 0x3fc43b67a5f30860, 0xbfb33ca3a9377000),
+ (0x3fdf800003a8d041, 0x3fdd45aeccdea6bf, 0x3fe9c27e5aa08505, 0xbfd469917fdb4a8f, 0xbfa853bbce31875a, 0x3fc40acc925a23f0, 0xbfb372f83dba2000),
+ (0x3fdfc000001f74ac, 0x3fdd791f5a2b6535, 0x3fe9ae104e8e2ac4, 0xbfd47274f3aafe74, 0xbfa714868a1a99ad, 0x3fc3d9b055497f10, 0xbfb3a67b69c57000),
+ (0x3fe0000003558fd9, 0x3fddac670ab76e44, 0x3fe999999777521d, 0xbfd47ae148877e35, 0xbfa5d867886713de, 0x3fc3a935919c77de, 0xbfb3d7a4b03bed7b),
+ (0x3fe020000337d712, 0x3fdddf85c0246f0a, 0x3fe9851ab14b1526, 0xbfd482d7a5dddc15, 0xbfa49f638c34686a, 0x3fc376100d9d1a30, 0xbfb40530d8726000),
+ (0x3fe0400000b62087, 0x3fde127b6c28d8fc, 0x3fe9709410c8e4b3, 0xbfd48a5937e72e68, 0xbfa36985896d6846, 0x3fc344b94377092e, 0xbfb430e35d9d8543),
+ (0x3fe06000020fc2e6, 0x3fde454809b1710d, 0x3fe95c062621c687, 0xbfd491672f00db4b, 0xbfa236d0d50efa28, 0x3fc311def4833020, 0xbfb4597aa3dcba29),
+ (0x3fe07ffffd1dd2ee, 0x3fde77eb7a8913af, 0x3fe947716adf56d1, 0xbfd49802ba11dc1c, 0xbfa1074bf3e6c5f4, 0x3fc2dd82145369e0, 0xbfb47f0161fcf000),
+ (0x3fe0a0000020ae23, 0x3fdeaa65c802a0b0, 0x3fe932d647a7f8b0, 0xbfd49e2d0f2d17c5, 0xbf9fb5fc526b9153, 0x3fc2a9edd8a10d00, 0xbfb4a25c9eca4000),
+ (0x3fe0bffffe62365e, 0x3fdedcb6d1b5ebed, 0x3fe91e35354713b3, 0xbfd4a3e76149657c, 0xbf9d63d88329f12e, 0x3fc276074acdc500, 0xbfb4c328bc780000),
+ (0x3fe0dfffff058c1a, 0x3fdf0ede976ba91e, 0x3fe9098e9e8a0791, 0xbfd4a932e90f89aa, 0xbf9b1835db6a2a2c, 0x3fc241d4acd80770, 0xbfb4e170117a3000),
+ (0x3fe1000000022af6, 0x3fdf40dd0b5775b6, 0x3fe8f4e2f2ee66e0, 0xbfd4ae10df38f0de, 0xbf98d31d9fe7cbba, 0x3fc20d5c35659f70, 0xbfb4fd3d151e8000),
+ (0x3fe11fffff8ab5a0, 0x3fdf72b220ee8980, 0x3fe8e032a092e141, 0xbfd4b2827e460418, 0xbf9694984ad2cdcb, 0x3fc1d8a4004008f0, 0xbfb5169a66a9f000),
+ (0x3fe14000015e176d, 0x3fdfa45dd5211a0b, 0x3fe8cb7e10c43f1d, 0xbfd4b68902f758ed, 0xbf945cad318f03e8, 0x3fc1a3b205241980, 0xbfb52d92cd84c000),
+ (0x3fe1600002c01245, 0x3fdfd5e01b9f65b5, 0x3fe8b6c5afc33871, 0xbfd4ba25aa85f813, 0xbf922b635f92cdb3, 0x3fc16e8c2b465870, 0xbfb542312c6ec000),
+ (0x3fe180000005b0df, 0x3fe0039c73c60584, 0x3fe8a209e92e4ca4, 0xbfd4bd59b35af61d, 0xbf9000c35a24191f, 0x3fc13a6a8ec6fa74, 0xbfb554eb109f6098),
+ (0x3fe19ffffcfd1f74, 0x3fe01c341c32ce05, 0x3fe88d4b228d3bc7, 0xbfd4c0265dfd8ea5, 0xbf8bb99bf9151606, 0x3fc104f00f064a5c, 0xbfb564f624f07b1f),
+ (0x3fe1bffffcc45037, 0x3fe034b706ac12f3, 0x3fe87889c0984b64, 0xbfd4c28ceb80b949, 0xbf877f0f7a3534d6, 0x3fc0ce1caea5dc80, 0xbfb5725eaa28f000),
+ (0x3fe1e000015d0939, 0x3fe04d25324d4a16, 0x3fe863c6277e23ec, 0xbfd4c48e9d66b268, 0xbf8351f1b06feba3, 0x3fc0986002b15e50, 0xbfb57e03f43a2000),
+ (0x3fe20000034260ab, 0x3fe0657e97550542, 0x3fe84f00c05a567c, 0xbfd4c62cb59d4924, 0xbf7e648be9263288, 0x3fc0628b48095ca0, 0xbfb5878723eec000),
+ (0x3fe220000196bcfb, 0x3fe07dc333828d0b, 0x3fe83a39eeb54b5a, 0xbfd4c76877223817, 0xbf7640237a6d6d30, 0x3fc02ca3ba99f440, 0xbfb58ef39ecd5000),
+ (0x3fe2400003756189, 0x3fe095f30afdbac3, 0x3fe825720fa6bb7d, 0xbfd4c8432598c0d4, 0xbf6c6d60d75bea04, 0x3fbfed5cd887c340, 0xbfb59454d68b4000),
+ (0x3fe26000013bdd1f, 0x3fe0ae0e172710bb, 0x3fe810a9889533b2, 0xbfd4c8be047e8fed, 0xbf5920e914dd15b8, 0x3fbf8160d625d180, 0xbfb597b64277d000),
+ (0x3fe28000026e7197, 0x3fe0c6145d2dc8dc, 0x3fe7fbe0b3cc857c, 0xbfd4c8da57d785da, 0x3f38b3db2033b920, 0x3fbf155d1d93f540, 0xbfb59923618c3000),
+ (0x3fe29ffffe4c855d, 0x3fe0de05d6652674, 0x3fe7e717f4e58b05, 0xbfd4c899639f2b70, 0x3f6287693d7864dc, 0x3fbea95b5bb7e400, 0xbfb598a7b4b93000),
+ (0x3fe2bfffffef534f, 0x3fe0f5e28b5b78df, 0x3fe7d24fa150837b, 0xbfd4c7fc6b9c5931, 0x3f70e125b375ae70, 0x3fbe3fedafd71237, 0xbfb596b5c99673e4),
+ (0x3fe2e000002e77a2, 0x3fe10daa7752f342, 0x3fe7bd8819154572, 0xbfd4c704b3dfd892, 0x3f7863a555ac2266, 0x3fbdd40f01365dd6, 0xbfb5928aaeb4bdf7),
+ (0x3fe2fffffd7c9d64, 0x3fe1255d9a2022cc, 0x3fe7a8c1b753a014, 0xbfd4c5b3801f820e, 0x3f7fcb39853a1bb2, 0x3fbd65bd8613d740, 0xbfb58c32edd58000),
+ (0x3fe31ffffd82cc9f, 0x3fe13cfbf94585b2, 0x3fe793fcd1396a06, 0xbfd4c40a1333413b, 0x3f838becc98b0eda, 0x3fbcfa1e649f96c0, 0xbfb5848705fe2000),
+ (0x3fe34000020760dc, 0x3fe1548597b4c2f9, 0x3fe77f39bd37da50, 0xbfd4c209afca8f6f, 0x3f8724ccc6f52d4f, 0x3fbc8ead87d86800, 0xbfb57b2bb11af000),
+ (0x3fe35fffffb1e89e, 0x3fe16bfa6f18133e, 0x3fe76a78d9002ba2, 0xbfd4bfb398e809ba, 0x3f8ab041c34cb79c, 0x3fbc23734d502d00, 0xbfb5702c550c2000),
+ (0x3fe37fffffa9dc9d, 0x3fe1835a887fabe0, 0x3fe755ba73b51888, 0xbfd4bd090f92bb18, 0x3f8e2e5415de4536, 0x3fbbb877951c57c0, 0xbfb5639444c5d000),
+ (0x3fe39fffffefd1ad, 0x3fe19aa5e51ddd70, 0x3fe740fee2743c7b, 0xbfd4ba0b54cf55f0, 0x3f90cf85e930af26, 0x3fbb4dc22c569940, 0xbfb5556ec8357000),
+ (0x3fe3bffffd025245, 0x3fe1b1dc8565ca41, 0x3fe72c467a13e900, 0xbfd4b6bba90e7725, 0x3f928138ee4f6ff2, 0x3fbae35aacf0e740, 0xbfb545c718118000),
+ (0x3fe3dffffcd8d6cc, 0x3fe1c8fe70fb7e35, 0x3fe717918808a566, 0xbfd4b31b4adbcf04, 0x3f942c4883ade35b, 0x3fba794859a0afc0, 0xbfb534a856177000),
+ (0x3fe3fffffd8e408d, 0x3fe1e00baa1d04da, 0x3fe702e05d9ffa6f, 0xbfd4af2b7871ca67, 0x3f95d0b7b3fb36e7, 0x3fba123e6124474e, 0xbfb522811fd8a133),
+ (0x3fe42000006e901a, 0x3fe1f70435a64d9d, 0x3fe6ee3348d92b91, 0xbfd4aaed6edf3dea, 0x3f976e91566dba37, 0x3fb9a8ef06a89b9d, 0xbfb50e94ff1867d4),
+ (0x3fe4400000a09097, 0x3fe20de8145acaa5, 0x3fe6d98a9a6f0264, 0xbfd4a6626ab55f2a, 0x3f9905db9ee4b8fd, 0x3fb93d56b7355c80, 0xbfb4f8efe7802000),
+ (0x3fe4600001861467, 0x3fe224b74d32a769, 0x3fe6c4e69c8fc28a, 0xbfd4a18ba5fdcca6, 0x3f9a9698b458c01f, 0x3fb8d4de4a82d700, 0xbfb4e262aeb99000),
+ (0x3fe47fffff909a16, 0x3fe23b71e27da2af, 0x3fe6b0479ca9c160, 0xbfd49c6a5abcf1e6, 0x3f9c20d20d8f992c, 0x3fb86cdc97970200, 0xbfb4ca94d76a4000),
+ (0x3fe49fffffb90e67, 0x3fe25217dce5c5c4, 0x3fe69bade1736aaf, 0xbfd496ffc020ed51, 0x3f9da48f8358d98f, 0x3fb805579f80a400, 0xbfb4b190f53f0000),
+ (0x3fe4c00000128cd2, 0x3fe268a940767cc7, 0x3fe68719b4de44dd, 0xbfd4914d0cf6c50f, 0x3f9f21d8f8715491, 0x3fb79e5545549d80, 0xbfb4976184a36000),
+ (0x3fe4dfffff3e5755, 0x3fe27f2611ebf889, 0x3fe6728b5f498085, 0xbfd48b53767a07ea, 0x3fa04c5b59e2627a, 0x3fb737db36653c00, 0xbfb47c10e4598000),
+ (0x3fe500000148f840, 0x3fe2958e5a167f56, 0x3fe65e032465797a, 0xbfd485142f44c956, 0x3fa10498ca140c07, 0x3fb6d1eedb71be40, 0xbfb45fa94fe02000),
+ (0x3fe52000018790be, 0x3fe2abe21bef8766, 0x3fe649814c4855c7, 0xbfd47e9069f4e9e3, 0x3fa1b9a9349c106a, 0x3fb66c95866404c0, 0xbfb44234ea994000),
+ (0x3fe540000064c499, 0x3fe2c2215e483288, 0x3fe635061a93197b, 0xbfd477c956655f3a, 0x3fa26b9005b791fd, 0x3fb60aa234b1e8ae, 0xbfb4241dc5417ce3),
+ (0x3fe5600002c99e32, 0x3fe2d84c2b4f57a0, 0x3fe62091ced9107e, 0xbfd470c021eda05c, 0x3fa31a54b048ef02, 0x3fb5a6812d9df714, 0xbfb404ad3b9865e5),
+ (0x3fe5800000512c22, 0x3fe2ee62843eb8ef, 0x3fe60c24b0014185, 0xbfd46975facafcfc, 0x3fa3c5fc054af685, 0x3fb5402d421a8b40, 0xbfb3e3ee1a090000),
+ (0x3fe5a0000334351e, 0x3fe30464776e19ff, 0x3fe5f7bef7e40401, 0xbfd461ec08f203ca, 0x3fa46e88d0626c5c, 0x3fb4dd50a5475c80, 0xbfb3c2a9098b8000),
+ (0x3fe5bffffe315006, 0x3fe31a52034bf998, 0x3fe5e360ee28562d, 0xbfd45a23782bc020, 0x3fa51401526e2908, 0x3fb47b1e8f091a80, 0xbfb3a087d2364000),
+ (0x3fe5e00000a1545d, 0x3fe3302b3a257b83, 0x3fe5cf0ac716d488, 0xbfd4521d6b4842ab, 0x3fa5b66b3eb3bc3a, 0x3fb4199af14bd180, 0xbfb37d93b34fe000),
+ (0x3fe5ffffff69b013, 0x3fe345f01c682620, 0x3fe5babcc6a742c0, 0xbfd449db09841112, 0x3fa655cbe07609ee, 0x3fb3b8c9e12c1d00, 0xbfb359d5e3704000),
+ (0x3fe61fffffac056a, 0x3fe35ba0b5d5fb7f, 0x3fe5a677255bb094, 0xbfd4415d73f9ab8d, 0x3fa6f228f4a18aac, 0x3fb358af15142d80, 0xbfb3355762050000),
+ (0x3fe63ffffff36e95, 0x3fe3713d0dee4c27, 0x3fe5923a1ec404b6, 0xbfd438a5cb4c88be, 0x3fa78b88346f22ca, 0x3fb2f94e2b0e0300, 0xbfb310210f69a000),
+ (0x3fe6600002e268dc, 0x3fe386c52f2da1f4, 0x3fe57e05eabaaeda, 0xbfd42fb52cd4f99a, 0x3fa821ef88cc1c46, 0x3fb29aaa894e96c0, 0xbfb2ea3ba114c000),
+ (0x3fe67ffffca43b52, 0x3fe39c391a94c1f4, 0x3fe569dac91c1aae, 0xbfd4268cb7b7c4c8, 0x3fa8b56360fbfb90, 0x3fb23fb635b482a9, 0xbfb2c40c5aced0ef),
+ (0x3fe6a0000087414b, 0x3fe3b198e63c8377, 0x3fe555b8e509d4d9, 0xbfd41d2d803d48ce, 0x3fa945ecc8324226, 0x3fb1e29a04de9b6e, 0xbfb29ce1f03b462c),
+ (0x3fe6bffffca9ca2a, 0x3fe3c6e48f902296, 0x3fe541a07ff88a58, 0xbfd41398a2ef107f, 0x3fa9d3919aace755, 0x3fb1834f7d89ce40, 0xbfb274c5bef82000),
+ (0x3fe6e00003fa3010, 0x3fe3dc1c2cba5e80, 0x3fe52d91c323fcee, 0xbfd409cf2ee4f9e8, 0x3faa5e55c45b939a, 0x3fb127c00d4bf6c0, 0xbfb24c780da48000),
+ (0x3fe6fffffff5c5b2, 0x3fe3f13fb897d886, 0x3fe5198cf0b1cf1f, 0xbfd3ffd23dbec556, 0x3faae6408c051ffe, 0x3fb0ccfcbf6e6a40, 0xbfb223a49e696000),
+ (0x3fe71fffffa86b8e, 0x3fe4064f471d169e, 0x3fe50592322d092f, 0xbfd3f5a2db1bb316, 0x3fab6b589c6aa3fa, 0x3fb07307e1378480, 0xbfb1fa5329170000),
+ (0x3fe7400001ee24e8, 0x3fe41b4ae1b35499, 0x3fe4f1a1ba383080, 0xbfd3eb42151dc89b, 0x3fabeda46cf6c88a, 0x3fb019e3c81c74c0, 0xbfb1d08b51ab4000),
+ (0x3fe76000004807ad, 0x3fe430328e7a1ebd, 0x3fe4ddbbbdbb66b6, 0xbfd3e0b0f944ff81, 0x3fac6d2a725bb64f, 0x3faf832565793f00, 0xbfb1a6549afec000),
+ (0x3fe7800000c376a9, 0x3fe445065bf8569b, 0x3fe4c9e068c4d1e4, 0xbfd3d5f08e7e91ba, 0x3face9f164bddc9a, 0x3faed42d30818400, 0xbfb17bb64ef82000),
+ (0x3fe7a00000bb278d, 0x3fe459c65333fe05, 0x3fe4b60febc4560b, 0xbfd3cb01dc145c05, 0x3fad63ffe9117670, 0x3fae26e2c80dc580, 0xbfb150b79aec4000),
+ (0x3fe7bffffdb2e507, 0x3fe46e727d826a87, 0x3fe4a24a7607bef5, 0xbfd3bfe5e6b19c89, 0x3faddb5b54b3040c, 0x3fad816604197de2, 0xbfb125b8c8b0881c),
+ (0x3fe7e0000252f58a, 0x3fe4830aecddb00e, 0x3fe48e902dbec701, 0xbfd3b49dac459984, 0x3fae500eb957d5e6, 0x3facd1650ee54e80, 0xbfb0f9b4d2d88000),
+ (0x3fe7fffffc963c19, 0x3fe4978fa0f7692e, 0x3fe47ae149c6e6d9, 0xbfd3a92a31ac4439, 0x3faec21c8e9fe30c, 0x3fac29381cb8a880, 0xbfb0cdbe52f5c000),
+ (0x3fe81ffffd933f52, 0x3fe4ac00b03b4c38, 0x3fe4673dea60cfe4, 0xbfd39d8c6d24080f, 0x3faf318d54f3de26, 0x3fab82c543a90280, 0xbfb0a1827562e000),
+ (0x3fe8400002a90411, 0x3fe4c05e248f2ba3, 0x3fe453a63af9e7a4, 0xbfd391c5592e5cad, 0x3faf9e67efa37f40, 0x3faade0f0b5e4c00, 0xbfb075079e842000),
+ (0x3fe85ffffeda28ed, 0x3fe4d4a8017a21db, 0x3fe4401a6c55b7ff, 0xbfd385d5f1a3c88b, 0x3fb0045995221920, 0x3faa3b17f114e780, 0xbfb048541a0ac000),
+ (0x3fe87ffffcb5e629, 0x3fe4e8de59a40615, 0x3fe42c9a9fd97bd7, 0xbfd379bf270aa8eb, 0x3fb0383b14344703, 0x3fa999e1bc66d880, 0xbfb01b6deb596000),
+ (0x3fe8a0000170c515, 0x3fe4fd013c656e86, 0x3fe41926f94766d6, 0xbfd36d81e923c791, 0x3fb06adc00899d63, 0x3fa8fa6e1683e780, 0xbfafdcb5ebc98000),
+ (0x3fe8c0000013189d, 0x3fe51110add1e051, 0x3fe405bfa67ab9da, 0xbfd3611f2c074f8a, 0x3fb09c3fcc701a50, 0x3fa85cbec910c700, 0xbfaf82421fc08000),
+ (0x3fe8dffffea5a4e4, 0x3fe5250cbe1a044e, 0x3fe3f264c8d246d0, 0xbfd35497d9e7b6f1, 0x3fb0cc6a0b4acaae, 0x3fa7c0d506895100, 0xbfaf278b9c710000),
+ (0x3fe8fffffff532e7, 0x3fe538f57b82510b, 0x3fe3df1682be020a, 0xbfd347ecdb613f48, 0x3fb0fb5d96a0b379, 0x3fa72d0add537aa4, 0xbfaecd493ac67089),
+ (0x3fe9200000e0371d, 0x3fe54ccaf0c0e17a, 0x3fe3cbd4f95a3d43, 0xbfd33b1f192ca5ae, 0x3fb129201bd9bcf5, 0x3fa68e563bf5ca00, 0xbfae7181cd9f0000),
+ (0x3fe93ffffed17bb8, 0x3fe5608d290c9c45, 0x3fe3b8a0508476b6, 0xbfd32e2f7884690d, 0x3fb155b3032aa8fb, 0x3fa5f7c2e4e4cd80, 0xbfae16438ba38000),
+ (0x3fe95fffffb19d46, 0x3fe5743c34fb13c6, 0x3fe3a578a61f34cc, 0xbfd3211ed8f518b9, 0x3fb1811a9c607ce3, 0x3fa562f8391c6200, 0xbfadbaec7dd30000),
+ (0x3fe9800001fbd6cc, 0x3fe587d820a9ca1d, 0x3fe3925e1ba3c385, 0xbfd313ee1a3ddcff, 0x3fb1ab5a76e24f11, 0x3fa4cff68dc4ae00, 0xbfad5f866b46c000),
+ (0x3fe9a000028625b6, 0x3fe59b60f7595cc7, 0x3fe37f50d2a59cb4, 0xbfd3069e1a26c510, 0x3fb1d47620a6ae76, 0x3fa43ebe0fa40580, 0xbfad041ad7158000),
+ (0x3fe9c0000013e88f, 0x3fe5aed6c59caa8f, 0x3fe36c50eab346d4, 0xbfd2f92fb30656ff, 0x3fb1fc712a84e7c4, 0x3fa3af4eb4269c00, 0xbfaca8b2f6dac000),
+ (0x3fe9e00000c59ba9, 0x3fe5c2399c9bbead, 0x3fe3595e7e2616dd, 0xbfd2eba3b9686805, 0x3fb2234f2e03dca7, 0x3fa321a82363d200, 0xbfac4d57a3e34000),
+ (0x3fe9ffffff9af828, 0x3fe5d58986d9bfee, 0x3fe34679ad1b9f34, 0xbfd2ddfb03da55d4, 0x3fb24913b63c63f4, 0x3fa295ca0bd98800, 0xbfabf211910ac000),
+ (0x3fea1ffffe6f5c7c, 0x3fe5e8c6931fdc41, 0x3fe333a292cda05f, 0xbfd2d036635848cf, 0x3fb26dc255bbabd0, 0x3fa20bb3d390e000, 0xbfab96e9186cc000),
+ (0x3fea3ffffddba306, 0x3fe5fbf0cf8e022d, 0x3fe320d94a65b14d, 0xbfd2c256a69b9f29, 0x3fb2915ddee0c8a5, 0x3fa189f836ccd898, 0xbfab3c8bd862fbc5),
+ (0x3fea5ffffe550ddd, 0x3fe60f084a48a440, 0x3fe30e1dee5569d2, 0xbfd2b45c9a3476c6, 0x3fb2b3ec17359215, 0x3fa0fcdbe6e84800, 0xbfaae111172ec000),
+ (0x3fea800000e4f98f, 0x3fe6220d11e54f19, 0x3fe2fb7097ede8aa, 0xbfd2a64907194c54, 0x3fb2d56e5363f0a3, 0x3fa07818437a3900, 0xbfaa8670f6f14000),
+ (0x3fea9ffffda78430, 0x3fe634ff2fca48bb, 0x3fe2e8d164e983b8, 0xbfd2981cb8011622, 0x3fb2f5e8d1d6530f, 0x3f9fea318f2ad100, 0xbfaa2c0d5c400000),
+ (0x3feac00000a7650e, 0x3fe647deb944952c, 0x3fe2d64066a2b0d9, 0xbfd289d86c0d2d1b, 0x3fb3155f2583e8e6, 0x3f9ee7b7e4800000, 0xbfa9d1ed38234000),
+ (0x3feadffffedf751c, 0x3fe65aabb61747e5, 0x3fe2c3bdbb183012, 0xbfd27b7ce9f8bb53, 0x3fb333d4c786c2ea, 0x3f9de8c0fc5ad200, 0xbfa978177dbb8000),
+ (0x3feafffffd65ea17, 0x3fe66d66379ec9f1, 0x3fe2b149762e2d21, 0xbfd26d0aeeb178ab, 0x3fb3514d3de8eaea, 0x3f9ced4987a4d100, 0xbfa91e92b3a98000),
+ (0x3feb200000caf460, 0x3fe6800e4ef44188, 0x3fe29ee3ab610acd, 0xbfd25e8334b45cff, 0x3fb36dcc0a58c590, 0x3f9bf54dfdf31300, 0xbfa8c56525bc0000),
+ (0x3feb4000000e26ac, 0x3fe692a4055f2f52, 0x3fe28c8c753d776e, 0xbfd24fe67a0bb733, 0x3fb389549efd4d08, 0x3f9b00cb02862100, 0xbfa86c9508b40000),
+ (0x3feb6000018849cd, 0x3fe6a5276d2d89d2, 0x3fe27a43e4c58f7d, 0xbfd24135732df900, 0x3fb3a3ea77d68263, 0x3f9a0fbc87055500, 0xbfa814282ba74000),
+ (0x3feb8000010d9519, 0x3fe6b79892a64df0, 0x3fe2680a104c3459, 0xbfd23270d6aa460d, 0x3fb3bd90404e0eb5, 0x3f992fb610f8b7a1, 0xbfa7bcc387688703),
+ (0x3feb9ffffe599af5, 0x3fe6c9f7846a2b45, 0x3fe255df0b3288ee, 0xbfd22399572c6060, 0x3fb3d64bb1660697, 0x3f9837ece0652d00, 0xbfa7648eb5fe0000),
+ (0x3febc00000284356, 0x3fe6dc44552c4ed1, 0x3fe243c2e433adbd, 0xbfd214afa0d6b3b1, 0x3fb3ee1def591e9a, 0x3f975122b84b5a00, 0xbfa70d6caea6c000),
+ (0x3febdffffe085993, 0x3fe6ee7f0f01eeef, 0x3fe231b5b1f500b7, 0xbfd205b464c5bfb6, 0x3fb4050b1cb5f2a6, 0x3f966dbba68c0400, 0xbfa6b6c3404c0000),
+ (0x3fec0000034fb3e6, 0x3fe700a7c7586398, 0x3fe21fb77f46182c, 0xbfd1f6a8482e18b6, 0x3fb41b16a1f46d60, 0x3f958db278506e00, 0xbfa66097156f0000),
+ (0x3fec1ffffdfe62e8, 0x3fe712be83078c02, 0x3fe20dc866cc799f, 0xbfd1e78bfb883685, 0x3fb43043ce52cf56, 0x3f94b1029a5ec700, 0xbfa60aecf2b30000),
+ (0x3fec3fffff94c963, 0x3fe724c35b136d78, 0x3fe1fbe86e98a7b2, 0xbfd1d8601c0fd209, 0x3fb4449603e9797b, 0x3f93d7a656cf6400, 0xbfa5b5c9094d0000),
+ (0x3fec60000082bdf4, 0x3fe736b65a605f56, 0x3fe1ea17aa267316, 0xbfd1c92550541ec4, 0x3fb458108eb64abd, 0x3f9301987b6f8700, 0xbfa5612f9c3b8000),
+ (0x3fec800000df6764, 0x3fe748979037234f, 0x3fe1d85628043564, 0xbfd1b9dc38ce0a23, 0x3fb46ab6ba64e101, 0x3f922ed378f00200, 0xbfa50d24a7678000),
+ (0x3fec9fffff08b5c1, 0x3fe75a670af97a58, 0x3fe1c6a3f714bf96, 0xbfd1aa8574538e0f, 0x3fb47c8bcc0c4e1c, 0x3f915f51a50a7500, 0xbfa4b9abfb4dc000),
+ (0x3fecbfffff264192, 0x3fe76c24dc4e49f1, 0x3fe1b501226bbf38, 0xbfd19b219c625478, 0x3fb48d923c79c7f8, 0x3f90a110cf52e4ae, 0xbfa4676263f4c032),
+ (0x3fece00000316893, 0x3fe77dd113055eab, 0x3fe1a36db76194ae, 0xbfd18bb14affb17d, 0x3fb49dcfa250598d, 0x3f8f93ff72a6f600, 0xbfa4147fa51d0000),
+ (0x3fed0000028a12de, 0x3fe78f6bbec21f96, 0x3fe191e9c1f0e9ca, 0xbfd17c351662872d, 0x3fb4ad44d4d39570, 0x3f8e08471e932a00, 0xbfa3c2d2ad154000),
+ (0x3fed200003373601, 0x3fe7a0f4ed5e56f4, 0x3fe180754f521d39, 0xbfd16cad947835ea, 0x3fb4bbf5ca4e213b, 0x3f8c82e4ca8e1c00, 0xbfa371c55ec44000),
+ (0x3fed400000e2fa29, 0x3fe7b26cada9f9ab, 0x3fe16f106b412869, 0xbfd15d1b57e6e3de, 0x3fb4c9e5aa0c4652, 0x3f8b03cbd541a800, 0xbfa3215aa7d68000),
+ (0x3fed5ffffc647157, 0x3fe7c3d30fb0d432, 0x3fe15dbb1fb975b9, 0xbfd14d7eefeac143, 0x3fb4d71795f22c94, 0x3f898aef4bb08a00, 0xbfa2d19546608000),
+ (0x3fed8000030878d4, 0x3fe7d5282a436004, 0x3fe14c756f72ef0b, 0xbfd13dd8e34e747c, 0x3fb4e38eae04b3a6, 0x3f881841786e0000, 0xbfa28277b07e0000),
+ (0x3feda0000038b7ba, 0x3fe7e66c01df9f9b, 0x3fe13b3f6ee50309, 0xbfd12e29c71cae76, 0x3fb4ef4dfe6e2bfe, 0x3f86abb5fe255e00, 0xbfa2340488c34000),
+ (0x3fedc000024cc650, 0x3fe7f79eadf54885, 0x3fe12a191f2548cb, 0xbfd11e721cfd633f, 0x3fb4fa589a26b592, 0x3f85453ebe0b9e00, 0xbfa1e63df3f04000),
+ (0x3fede000009d89d2, 0x3fe808c0399495a2, 0x3fe119028d1fda26, 0xbfd10eb26f95f279, 0x3fb504b184ed26c3, 0x3f83e4ce746a9e00, 0xbfa1992629544000),
+ (0x3fee00000039743a, 0x3fe819d0b7341e61, 0x3fe107fbbde28c9e, 0xbfd0feeb406dd82e, 0x3fb50e5af0d5593e, 0x3f82a72ef1e0c167, 0xbfa14d527b168f8a),
+ (0x3fee1ffffe5cc00a, 0x3fe82ad03521bbb6, 0x3fe0f704ba259f97, 0xbfd0ef1d13273fc2, 0x3fb5175a44a65f21, 0x3f8135cae0939400, 0xbfa1010aa6568000),
+ (0x3fee4000019b115b, 0x3fe83bbec6a702d7, 0x3fe0e61d852e5196, 0xbfd0df4863d03473, 0x3fb51fb00570e1c7, 0x3f7fce36d18fd400, 0xbfa0b60a74c20000),
+ (0x3fee5ffffdf4f7e1, 0x3fe84c9c7540d5f2, 0x3fe0d5462b922e3d, 0xbfd0cf6db5f37b20, 0x3fb5275fea93680c, 0x3f7d3c75fad99000, 0xbfa06bc035858000),
+ (0x3fee80000323ee64, 0x3fe85d6959120279, 0x3fe0c47eaad03167, 0xbfd0bf8d7d13d57e, 0x3fb52e6cdb8a684c, 0x3f7ab635583b0400, 0xbfa0222d3764c000),
+ (0x3feea000027f9467, 0x3fe86e257a464e49, 0x3fe0b3c70fe40a0e, 0xbfd0afa8399b9a17, 0x3fb534d9b198efb2, 0x3f783b593b7d2000, 0xbf9fb2a5de1a8000),
+ (0x3feec000016d28f0, 0x3fe87ed0eb9a3410, 0x3fe0a31f5cc9486a, 0xbfd09fbe5fe0dff2, 0x3fb53aa9435bdcf3, 0x3f75cbc42e130c00, 0xbf9f226507280000),
+ (0x3feee0000186b339, 0x3fe88f6bbdcc87be, 0x3fe092879506eb54, 0xbfd08fd063fd06b3, 0x3fb53fde5f2950c0, 0x3f736758dd6aac00, 0xbf9e939a0cb08000),
+ (0x3feeffffffce4723, 0x3fe89ff5ff3e4b8a, 0x3fe081ffbe119e2e, 0xbfd07fdeba3b0de8, 0x3fb5447bcb488540, 0x3f710dfa27bda400, 0xbf9e0646ebfc0000),
+ (0x3fef20000017d916, 0x3fe8b06fc1db7f25, 0x3fe07187d965874a, 0xbfd06fe9d16dfdc9, 0x3fb5488447a9ed9c, 0x3f6d7f1490008000, 0xbf9d7a6d4a5b0000),
+ (0x3fef3ffffe33911c, 0x3fe8c0d9137147b0, 0x3fe0611fec30af7f, 0xbfd05ff21a3a42d4, 0x3fb54bf9b699774b, 0x3f696e589b06ec6f, 0xbf9cf12a32a34060),
+ (0x3fef5ffffe084599, 0x3fe8d13205f7eff9, 0x3fe050c7f70b73f8, 0xbfd04ff7ffb69502, 0x3fb54ee148abcb5e, 0x3f648602474d4800, 0xbf9c672c6e7a8000),
+ (0x3fef7ffffdfbe7bb, 0x3fe8e17aa896a3fa, 0x3fe0407ffcf5ee0b, 0xbfd03ffbecf3660f, 0x3fb5513b2748a2d9, 0x3f6029586a500800, 0xbf9bdfc7884c0000),
+ (0x3fef9fffff0fa180, 0x3fe8f1b30bcb577c, 0x3fe03047ff31c004, 0xbfd02ffe4a3a5b50, 0x3fb5530ac95cb722, 0x3f57c33cda611000, 0xbf9b59e0e3a28000),
+ (0x3fefc00001a24bf4, 0x3fe901db3fc1bb6f, 0x3fe0201ffeecb132, 0xbfd01fff7e0f13c7, 0x3fb55452c9166f13, 0x3f4eba61be0c4000, 0xbf9ad57931778000),
+ (0x3fefdfffffe38639, 0x3fe911f3518b3813, 0x3fe01008000a410c, 0xbfd00ffff01bf387, 0x3fb55515b9099fdd, 0x3f3c805a37250000, 0xbf9a52910b908000),
+ (0x3feff0000008727f, 0x3fe919f953f31326, 0x3fe00801fffb8473, 0xbfd007fffdfb4766, 0x3fb555453ecbbd3f, 0x3f30045a21318845, 0xbf9a12c2520fd1de),
+]
+
+const atan32 = {x
+ /*
+ Irritating special rounding cases that would require
+ more extra accuracy than they're worth to correctly round
+ back from 64 to 32 bits.
+ */
+ var xb = std.flt32bits(x)
+ if xb == 0x3d8d6b23
+ -> std.flt32frombits(0x3d8d31c3)
+ elif xb == 0xbd8d6b23
+ -> std.flt32frombits(0xbd8d31c3)
+ ;;
+
+ var r = atan264((x : flt64), 1.0)
+ -> (r : flt32)
+}
+
+const atan64 = {x
+ -> atan264(x, 1.0)
+}
+
+const atan232 = {y, x
+ /* Handle the special cases of atan32 for consistency */
+ if x == 1.0
+ var yb = std.flt32bits(y)
+ if yb == 0x3d8d6b23
+ -> std.flt32frombits(0x3d8d31c3)
+ elif yb == 0xbd8d6b23
+ -> std.flt32frombits(0xbd8d31c3)
+ ;;
+ ;;
+
+ var r = atan264((y : flt64), (x : flt64))
+ -> (r : flt32)
+}
+
+const atan264 = {y, x
+ if std.isnan(y) || std.isnan(x)
+ -> std.flt64nan()
+ ;;
+
+ var xb = std.flt64bits(x)
+ var yb = std.flt64bits(y)
+ var xpos, xe, ypos, ye
+ (xpos, xe, _) = std.flt64explode(x)
+ (ypos, ye, _) = std.flt64explode(y)
+ xpos = !xpos
+ ypos = !ypos
+
+ /* All the special cases */
+ if yb == 0x0000000000000000
+ if xpos
+ -> 0.0
+ else
+ -> std.flt64frombits(0x400921fb54442d18)
+ ;;
+ elif yb == 0x8000000000000000
+ if xpos
+ -> std.flt64frombits(0x8000000000000000)
+ else
+ -> std.flt64frombits(0xc00921fb54442d18)
+ ;;
+ elif xb == 0x0000000000000000 || xb == 0x8000000000000000
+ if ypos
+ -> std.flt64frombits(0x3ff921fb54442d18)
+ else
+ -> std.flt64frombits(0xbff921fb54442d18)
+ ;;
+ ;;
+
+ var xinf = (xe == 1024)
+ var yinf = (ye == 1024)
+
+ match (yinf, ypos, xinf, xpos)
+ | (true, true, true, false): -> std.flt64frombits(0x4002d97c7f3321d2)
+ | (true, false, true, false): -> std.flt64frombits(0xc002d97c7f3321d2)
+ | (true, true, true, true ): -> std.flt64frombits(0x3fe921fb54442d18)
+ | (true, false, true, true ): -> std.flt64frombits(0xbfe921fb54442d18)
+ | (true, true, false, _ ): -> std.flt64frombits(0x3ff921fb54442d18)
+ | (true, false, false, _ ): -> std.flt64frombits(0xbff921fb54442d18)
+ | (false, true, true, false): -> std.flt64frombits(0x400921fb54442d18)
+ | (false, false, true, false): -> std.flt64frombits(0xc00921fb54442d18)
+ | (false, true, true, true ): -> std.flt64frombits(0x0000000000000000)
+ | (false, false, true, true ): -> std.flt64frombits(0x8000000000000000)
+ | _:
+ ;;
+
+ /* Normal case. Here we just reduce y/x to [0, 1] */
+ var xabs = std.flt64frombits(std.flt64bits(x) & (~(0 : uint64)) >> 1)
+ var yabs = std.flt64frombits(std.flt64bits(y) & (~(0 : uint64)) >> 1)
+
+ var then_negate = !ypos
+ var then_subtract_from_pi = !xpos
+
+ x = xabs
+ y = yabs
+
+
+ var then_reverse = false
+ if y > x
+ then_reverse = true
+ std.swap(&x, &y)
+ ;;
+
+ /* Compute y/x = u + du */
+ var u = y / x
+ var du
+ if u * x == y
+ du = 0.0
+ else
+ var t1, t2
+ (t1, t2) = two_by_two(u, x)
+ du = ((y - t1) - t2)/x
+ ;;
+
+ var ret = atan_reduced(u, du)
+
+ if then_reverse
+ /* Compute pi/2 - ret; pi_over_2 is in sin-impl.myr */
+ var po2_hi = std.flt64frombits(pi_over_2[0])
+ var po2_lo = std.flt64frombits(pi_over_2[1])
+ ret = (po2_hi - ret) + po2_lo
+ ;;
+ if then_subtract_from_pi
+ /* Compute pi - ret, or maybe -pi - ret */
+ var pi_hi = scale2(std.flt64frombits(pi_over_2[0]), 1)
+ var pi_lo = scale2(std.flt64frombits(pi_over_2[1]), 1)
+ ret = (pi_hi - ret) + pi_lo
+ ;;
+
+ if then_negate
+ ret = -1.0 * ret
+ ;;
+
+ -> ret
+}
+
+/* Handle arctan(z) with z in [0, 1] */
+const atan_reduced = {u : flt64, du : flt64
+ var ret : flt64
+
+ /*
+ If u is less than 1/16, [GB91] uses a single polynomial
+ approximation. I am not quite sure why they don't employ
+ tables in this case: the speed appears to be about the
+ same. Perhaps to save a few bits for the first entries
+ of the table?
+ */
+ if u < 0.0625
+ var s = u * u
+ var p = horner_polyu(s, atan_coeffs[:])
+ -> u*p + u*du
+ ;;
+
+ /*
+ Extract a polynomial from the table.
+ */
+ var j = rn(scale2(u, 8))
+ if j < 0
+ j = 0
+ elif j > 256
+ j = 256
+ ;;
+
+ var xi_u : uint64, c : uint64[6]
+ (xi_u, c[0], c[1], c[2], c[3], c[4], c[5]) = C[j]
+ var xi : flt64 = std.flt64frombits(xi_u)
+
+ -> horner_polyu((u - xi) + du, c[:])
+}
diff --git a/lib/math/bld.sub b/lib/math/bld.sub
index a434011..c2e9da4 100644
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -1,23 +1,35 @@
lib math =
fpmath.myr
- # exp
- exp-impl.myr
-
# rounding (to actual integers)
round-impl+posixy-x64-sse4.s
round-impl.myr
+ # atan and atan2
+ atan-impl.myr
+
+ # exp and expm1
+ exp-impl.myr
+
# fused-multiply-add
fma-impl+posixy-x64-fma.s
fma-impl.myr
+ # log and log1p
+ log-impl.myr
+
# polynomial evaluation methods
poly-impl.myr
+ # x^y
+ powr-impl.myr
+
# scalb (multiply x by 2^m)
scale2-impl.myr
+ # sin, cos
+ sin-impl.myr
+
# sqrt
sqrt-impl+posixy-x64-sse2.s
sqrt-impl.myr
@@ -29,6 +41,9 @@ lib math =
trunc-impl+posixy-x64-sse4.s
trunc-impl.myr
+ # tan, cot
+ tan-impl.myr
+
# util
util.myr
ftrap.myr
diff --git a/lib/math/exp-impl.myr b/lib/math/exp-impl.myr
index 26b8639..877abeb 100644
--- a/lib/math/exp-impl.myr
+++ b/lib/math/exp-impl.myr
@@ -10,22 +10,19 @@ use "util"
enough to be in the same function.
*/
pkg math =
- pkglocal const exp32 : (f : flt32 -> flt32)
- pkglocal const exp64 : (f : flt64 -> flt64)
+ pkglocal const exp32 : (x : flt32 -> flt32)
+ pkglocal const exp64 : (x : flt64 -> flt64)
- pkglocal const expm132 : (f : flt32 -> flt32)
- pkglocal const expm164 : (f : flt64 -> flt64)
+ pkglocal const expm132 : (x : flt32 -> flt32)
+ pkglocal const expm164 : (x : flt64 -> flt64)
;;
-extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
-extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
-extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
-extern const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
+extern const horner_polyu32 : (x : flt32, a : uint32[:] -> flt32)
+extern const horner_polyu64 : (x : flt64, a : uint64[:] -> flt64)
type fltdesc(@f, @u, @i) = struct
explode : (f : @f -> (bool, @i, @u))
assem : (n : bool, e : @i, s : @u -> @f)
- fma : (x : @f, y : @f, z : @f -> @f)
horner : (f : @f, a : @u[:] -> @f)
sgnmask : @u
tobits : (f : @f -> @u)
@@ -53,13 +50,11 @@ type fltdesc(@f, @u, @i) = struct
L1 : @u
L2 : @u
S : (@u, @u)[32]
-
;;
const desc32 : fltdesc(flt32, uint32, int32) = [
.explode = std.flt32explode,
.assem = std.flt32assem,
- .fma = fma32,
.horner = horner_polyu32,
.sgnmask = (1 << 31),
.tobits = std.flt32bits,
@@ -127,7 +122,6 @@ const desc32 : fltdesc(flt32, uint32, int32) = [
const desc64 : fltdesc(flt64, uint64, int64) = [
.explode = std.flt64explode,
.assem = std.flt64assem,
- .fma = fma64,
.horner = horner_polyu64,
.sgnmask = (1 << 63),
.tobits = std.flt64bits,
@@ -199,18 +193,18 @@ const desc64 : fltdesc(flt64, uint64, int64) = [
.precision = 53,
]
-const exp32 = {f : flt32
- -> expgen(f, desc32)
+const exp32 = {x : flt32
+ -> expgen(x, desc32)
}
-const exp64 = {f : flt64
- -> expgen(f, desc64)
+const exp64 = {x : flt64
+ -> expgen(x, desc64)
}
-generic expgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
- var b = d.tobits(f)
+generic expgen = {x : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
+ var b = d.tobits(x)
var n, e, s
- (n, e, s) = d.explode(f)
+ (n, e, s) = d.explode(x)
/*
Detect if exp(f) would round to outside representability.
@@ -231,7 +225,7 @@ generic expgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equata
/* Argument reduction to [ -ln(2)/64, ln(2)/64 ] */
var inv_L = d.frombits(d.inv_L)
- var N = rn(f * inv_L)
+ var N = rn(x * inv_L)
var N2 = N % (32 : @i)
if N2 < 0
N2 += (32 : @i)
@@ -247,9 +241,9 @@ generic expgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equata
(very well) f reduced into [ -ln(2)/64, ln(2)/64 ]
*/
if std.abs(N) >= (1 << d.nabs)
- R1 = (f - (N1 : @f) * d.frombits(d.L1)) - ((N2 : @f) * d.frombits(d.L1))
+ R1 = (x - (N1 : @f) * d.frombits(d.L1)) - ((N2 : @f) * d.frombits(d.L1))
else
- R1 = f - (N : @f) * d.frombits(d.L1)
+ R1 = x - (N : @f) * d.frombits(d.L1)
;;
R2 = -1.0 * (N : @f) * d.frombits(d.L2)
@@ -280,37 +274,37 @@ generic expgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equata
-> exp
}
-const expm132 = {f : flt32
- -> expm1gen(f, desc32)
+const expm132 = {x : flt32
+ -> expm1gen(x, desc32)
}
-const expm164 = {f : flt64
- -> expm1gen(f, desc64)
+const expm164 = {x : flt64
+ -> expm1gen(x, desc64)
}
-generic expm1gen = {f : @f, d : fltdesc(@f, @u, @i) :: \
+generic expm1gen = {x : @f, d : fltdesc(@f, @u, @i) :: \
numeric,floating,std.equatable @f,
numeric,integral @u,
numeric,integral @i,
roundable @f -> @i
- var b = d.tobits(f)
+ var b = d.tobits(x)
var n, e, s
- (n, e, s) = d.explode(f)
+ (n, e, s) = d.explode(x)
/* Special cases: +/- 0, inf, NaN, tiny, and huge */
if (b & ~d.sgnmask == 0)
- -> f
+ -> x
elif n && (b & ~d.sgnmask == d.inf)
-> (-1.0 : @f)
elif (b & ~d.sgnmask == d.inf)
- -> f
- elif std.isnan(f)
+ -> x
+ elif std.isnan(x)
-> d.frombits(d.nan)
elif (b & ~d.sgnmask) <= d.thresh_tiny
var two_to_large = d.assem(false, 100, 0)
var two_to_small = d.assem(false, -100, 0)
- var abs_f = d.assem(false, e, s)
- -> (two_to_large * f + abs_f) * two_to_small
+ var abs_x = d.assem(false, e, s)
+ -> (two_to_large * x + abs_x) * two_to_small
elif !n && b >= d.thresh_1_max /* exp(x) = oo <=> expm1(x) = oo, as it turns out */
-> d.frombits(d.inf)
elif n && b >= d.thresh_huge_neg
@@ -321,25 +315,25 @@ generic expm1gen = {f : @f, d : fltdesc(@f, @u, @i) :: \
/* Procedure 2 */
/* compute x^2 / 2 with extra precision */
- var u = round(f, d)
- var v = f - u
+ var u = round(x, d)
+ var v = x - u
var y = u * u * (0.5 : @f)
- var z = v * (f + u) * (0.5 : @f)
- var q = f * f * f * d.horner(f, d.Bi)
+ var z = v * (x + u) * (0.5 : @f)
+ var q = x * x * x * d.horner(x, d.Bi)
var yn, ye, ys
(yn, ye, ys) = d.explode(y)
if (ye >= -7)
-> (u + y) + (q + (v + z))
else
- -> f + (y + (q + z))
+ -> x + (y + (q + z))
;;
;;
/* Procedure 1 */
var inv_L = d.frombits(d.inv_L)
- var N = rn(f * inv_L)
+ var N = rn(x * inv_L)
var N2 = N % (32 : @i)
if N2 < 0
N2 += (32 : @i)
@@ -353,9 +347,9 @@ generic expm1gen = {f : @f, d : fltdesc(@f, @u, @i) :: \
reduced into [ -ln(2)/64, ln(2)/64 ]
*/
if std.abs(N) >= (1 << d.nabs)
- R1 = (f - (N1 : @f) * d.frombits(d.L1)) - ((N2 : @f) * d.frombits(d.L1))
+ R1 = (x - (N1 : @f) * d.frombits(d.L1)) - ((N2 : @f) * d.frombits(d.L1))
else
- R1 = f - (N : @f) * d.frombits(d.L1)
+ R1 = x - (N : @f) * d.frombits(d.L1)
;;
R2 = -1.0 * (N : @f) * d.frombits(d.L2)
diff --git a/lib/math/fpmath.myr b/lib/math/fpmath.myr
index bfc6b11..e470ec1 100644
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -3,36 +3,56 @@ use std
pkg math =
trait fpmath @f =
+ /* atan-impl */
+ atan : (x : @f -> @f)
+ atan2 : (y : @f, x : @f -> @f)
+
/* exp-impl */
- exp : (f : @f -> @f)
- expm1 : (f : @f -> @f)
+ exp : (x : @f -> @f)
+ expm1 : (x : @f -> @f)
/* fma-impl */
fma : (x : @f, y : @f, z : @f -> @f)
+ /* log-impl */
+ log : (x : @f -> @f)
+ log1p : (x : @f -> @f)
+
/* poly-impl */
horner_poly : (x : @f, a : @f[:] -> @f)
horner_polyu : (x : @f, a : @u[:] -> @f)
+ /* powr-impl */
+ powr : (x : @f, y : @f -> @f)
+
/* scale2-impl */
- scale2 : (f : @f, m : @i -> @f)
+ scale2 : (x : @f, m : @i -> @f)
+
+ /* sin-impl */
+ sin : (x : @f -> @f)
+ cos : (x : @f -> @f)
+ sincos : (x : @f -> (@f, @f))
/* sqrt-impl */
- sqrt : (f : @f -> @f)
+ sqrt : (x : @f -> @f)
/* sum-impl */
kahan_sum : (a : @f[:] -> @f)
priest_sum : (a : @f[:] -> @f)
+ /* tan-impl */
+ tan : (x : @f -> @f)
+ cot : (x : @f -> @f)
+
/* trunc-impl */
- trunc : (f : @f -> @f)
- ceil : (f : @f -> @f)
- floor : (f : @f -> @f)
+ trunc : (x : @f -> @f)
+ ceil : (x : @f -> @f)
+ floor : (x : @f -> @f)
;;
trait roundable @f -> @i =
/* round-impl */
- rn : (f : @f -> @i)
+ rn : (x : @f -> @i)
;;
impl std.equatable flt32
@@ -60,58 +80,97 @@ impl std.equatable flt64 =
;;
impl roundable flt32 -> int32 =
- rn = {f : flt32; -> rn32(f) }
+ rn = {x : flt32; -> rn32(x) }
;;
impl roundable flt64 -> int64 =
- rn = {f : flt64; -> rn64(f) }
+ rn = {x : flt64; -> rn64(x) }
;;
impl fpmath flt32 =
+ atan = {x; -> atan32(x)}
+ atan2 = {y, x; -> atan232(y, x)}
+
fma = {x, y, z; -> fma32(x, y, z)}
- exp = {f; -> exp32(f)}
- expm1 = {f; -> expm132(f)}
+ exp = {x; -> exp32(x)}
+ expm1 = {x; -> expm132(x)}
+
+ log = {x; -> log32(x)}
+ log1p = {x; -> log1p32(x)}
+
+ horner_poly = {x, a; -> horner_poly32(x, a)}
+ horner_polyu = {x, a; -> horner_polyu32(x, a)}
- horner_poly = {f, a; -> horner_poly32(f, a)}
- horner_polyu = {f, a; -> horner_polyu32(f, a)}
+ powr = {x, y; -> powr32(x, y)}
- scale2 = {f, m; -> scale232(f, m)}
+ scale2 = {x, m; -> scale232(x, m)}
- sqrt = {f; -> sqrt32(f)}
+ sin = {x; -> sin32(x)}
+ cos = {x; -> cos32(x)}
+ sincos = {x; -> sincos32(x)}
+
+ sqrt = {x; -> sqrt32(x)}
kahan_sum = {l; -> kahan_sum32(l) }
priest_sum = {l; -> priest_sum32(l) }
- trunc = {f; -> trunc32(f)}
- floor = {f; -> floor32(f)}
- ceil = {f; -> ceil32(f)}
+ tan = {x; -> tan32(x)}
+ cot = {x; -> cot32(x)}
+
+ trunc = {x; -> trunc32(x)}
+ floor = {x; -> floor32(x)}
+ ceil = {x; -> ceil32(x)}
;;
impl fpmath flt64 =
+ atan = {x; -> atan64(x)}
+ atan2 = {y, x; -> atan264(y, x)}
+
fma = {x, y, z; -> fma64(x, y, z)}
- exp = {f; -> exp64(f)}
- expm1 = {f; -> expm164(f)}
+ exp = {x; -> exp64(x)}
+ expm1 = {x; -> expm164(x)}
+
+ log = {x; -> log64(x)}
+ log1p = {x; -> log1p64(x)}
- horner_poly = {f, a; -> horner_poly64(f, a)}
- horner_polyu = {f, a; -> horner_polyu64(f, a)}
+ horner_poly = {x, a; -> horner_poly64(x, a)}
+ horner_polyu = {x, a; -> horner_polyu64(x, a)}
- scale2 = {f, m; -> scale264(f, m)}
+ powr = {x, y; -> powr64(x, y)}
- sqrt = {f; -> sqrt64(f)}
+ scale2 = {x, m; -> scale264(x, m)}
+
+ sin = {x; -> sin64(x)}
+ cos = {x; -> cos64(x)}
+ sincos = {x; -> sincos64(x)}
+
+ sqrt = {x; -> sqrt64(x)}
kahan_sum = {l; -> kahan_sum64(l) }
priest_sum = {l; -> priest_sum64(l) }
- trunc = {f; -> trunc64(f)}
- floor = {f; -> floor64(f)}
- ceil = {f; -> ceil64(f)}
+ tan = {x; -> tan64(x)}
+ cot = {x; -> cot64(x)}
+
+ trunc = {x; -> trunc64(x)}
+ floor = {x; -> floor64(x)}
+ ceil = {x; -> ceil64(x)}
;;
-extern const rn32 : (f : flt32 -> int32)
-extern const rn64 : (f : flt64 -> int64)
+extern const rn32 : (x : flt32 -> int32)
+extern const rn64 : (x : flt64 -> int64)
+
+extern const atan32 : (x : flt32 -> flt32)
+extern const atan64 : (x : flt64 -> flt64)
+
+extern const atan232 : (y : flt32, x : flt32 -> flt32)
+extern const atan264 : (y : flt64, x : flt64 -> flt64)
+
+extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
+extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
extern const exp32 : (x : flt32 -> flt32)
extern const exp64 : (x : flt64 -> flt64)
@@ -119,17 +178,32 @@ extern const exp64 : (x : flt64 -> flt64)
extern const expm132 : (x : flt32 -> flt32)
extern const expm164 : (x : flt64 -> flt64)
-extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
-extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
+extern const log32 : (x : flt32 -> flt32)
+extern const log64 : (x : flt64 -> flt64)
-extern const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)
-extern const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)
+extern const log1p32 : (x : flt32 -> flt32)
+extern const log1p64 : (x : flt64 -> flt64)
-extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
-extern const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
+extern const horner_poly32 : (x : flt32, a : flt32[:] -> flt32)
+extern const horner_poly64 : (x : flt64, a : flt64[:] -> flt64)
-extern const scale232 : (f : flt32, m : int32 -> flt32)
-extern const scale264 : (f : flt64, m : int64 -> flt64)
+extern const horner_polyu32 : (x : flt32, a : uint32[:] -> flt32)
+extern const horner_polyu64 : (x : flt64, a : uint64[:] -> flt64)
+
+extern const powr32 : (x : flt32, y : flt32 -> flt32)
+extern const powr64 : (x : flt64, y : flt64 -> flt64)
+
+extern const scale232 : (x : flt32, m : int32 -> flt32)
+extern const scale264 : (x : flt64, m : int64 -> flt64)
+
+extern const sin32 : (x : flt32 -> flt32)
+extern const sin64 : (x : flt64 -> flt64)
+
+extern const cos32 : (x : flt32 -> flt32)
+extern const cos64 : (x : flt64 -> flt64)
+
+extern const sincos32 : (x : flt32 -> (flt32, flt32))
+extern const sincos64 : (x : flt64 -> (flt64, flt64))
extern const sqrt32 : (x : flt32 -> flt32)
extern const sqrt64 : (x : flt64 -> flt64)
@@ -140,11 +214,17 @@ extern const kahan_sum64 : (l : flt64[:] -> flt64)
extern const priest_sum32 : (l : flt32[:] -> flt32)
extern const priest_sum64 : (l : flt64[:] -> flt64)
-extern const trunc32 : (f : flt32 -> flt32)
-extern const trunc64 : (f : flt64 -> flt64)
+extern const tan32 : (x : flt32 -> flt32)
+extern const tan64 : (x : flt64 -> flt64)
+
+extern const cot32 : (x : flt32 -> flt32)
+extern const cot64 : (x : flt64 -> flt64)
+
+extern const trunc32 : (x : flt32 -> flt32)
+extern const trunc64 : (x : flt64 -> flt64)
-extern const floor32 : (f : flt32 -> flt32)
-extern const floor64 : (f : flt64 -> flt64)
+extern const floor32 : (x : flt32 -> flt32)
+extern const floor64 : (x : flt64 -> flt64)
-extern const ceil32 : (f : flt32 -> flt32)
-extern const ceil64 : (f : flt64 -> flt64)
+extern const ceil32 : (x : flt32 -> flt32)
+extern const ceil64 : (x : flt64 -> flt64)
diff --git a/lib/math/log-impl.myr b/lib/math/log-impl.myr
new file mode 100644
index 0000000..f2ff68e
--- /dev/null
+++ b/lib/math/log-impl.myr
@@ -0,0 +1,588 @@
+use std
+
+use "fpmath"
+use "util"
+
+/*
+ See [Mul16] (6.2.2) and [Tan90].
+ */
+pkg math =
+ pkglocal const log32 : (x : flt32 -> flt32)
+ pkglocal const log64 : (x : flt64 -> flt64)
+
+ pkglocal const log1p32 : (x : flt32 -> flt32)
+ pkglocal const log1p64 : (x : flt64 -> flt64)
+
+ /* Constants from [Tan90], note that [128] contains accurate log(2) */
+ pkglocal const accurate_logs32 : (uint32, uint32)[129]
+ pkglocal const accurate_logs64 : (uint64, uint64)[129]
+;;
+
+extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
+extern const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
+
+type fltdesc(@f, @u, @i) = struct
+ explode : (f : @f -> (bool, @i, @u))
+ assem : (n : bool, e : @i, s : @u -> @f)
+ horner : (f : @f, a : @u[:] -> @f)
+ tobits : (f : @f -> @u)
+ frombits : (u : @u -> @f)
+ sgnmask : @u
+ sig8mask : @u
+ sig8last : @u
+ emin : @i
+ emax : @i
+ precision : @u
+ inf : @u
+ ninf : @u
+ nan : @u
+
+ /* For log */
+ logT1 : @u
+ logT2 : @u
+
+ /* For log1p */
+ log1pT1 : @u
+ log1pT2 : @u
+ T3exp : @u
+
+ /* For procedure 1 */
+ C : (@u, @u)[:]
+ Ai : @u[:]
+
+ /* For procedure 2 */
+ Bi : @u[:]
+ Mtruncmask : @u
+;;
+
+/* Accurate representations for log(1 + j/2^7), all j */
+const accurate_logs32 = [
+ (0000000000, 0000000000),
+ (0x3bff0000, 0x3429ac42),
+ (0x3c7e0000, 0x35a8b0fc),
+ (0x3cbdc000, 0x368d83eb),
+ (0x3cfc2000, 0xb6b278c4),
+ (0x3d1cf000, 0x3687b9ff),
+ (0x3d3ba000, 0x3631ec66),
+ (0x3d5a1000, 0x36dd7119),
+ (0x3d785000, 0x35c30046),
+ (0x3d8b2800, 0x365bba8e),
+ (0x3d9a1000, 0xb621a791),
+ (0x3da8d800, 0x34e7e0c3),
+ (0x3db78800, 0xb635d46a),
+ (0x3dc61800, 0x368bac63),
+ (0x3dd49000, 0x36da7496),
+ (0x3de2f000, 0x36a91eb8),
+ (0x3df13800, 0x34edc55e),
+ (0x3dff6800, 0xb6dd9c48),
+ (0x3e06bc00, 0xb44197b9),
+ (0x3e0db800, 0x36ab54be),
+ (0x3e14ac00, 0xb6b41f80),
+ (0x3e1b9000, 0xb4f7f85c),
+ (0x3e226800, 0x36adb32e),
+ (0x3e293800, 0xb650e2f2),
+ (0x3e2ff800, 0x36c1c29e),
+ (0x3e36b000, 0x35fe719d),
+ (0x3e3d5c00, 0x3590210e),
+ (0x3e43fc00, 0x36819483),
+ (0x3e4a9400, 0xb6958c2f),
+ (0x3e511c00, 0x36f07f8b),
+ (0x3e57a000, 0xb6dac5fd),
+ (0x3e5e1400, 0x354e85b2),
+ (0x3e648000, 0xb5838656),
+ (0x3e6ae000, 0x3685ad3f),
+ (0x3e713800, 0x356dc55e),
+ (0x3e778400, 0x36b72f71),
+ (0x3e7dc800, 0x36436af2),
+ (0x3e820200, 0xb6d35a59),
+ (0x3e851a00, 0xb6d8ec63),
+ (0x3e882c00, 0x363f9ae5),
+ (0x3e8b3a00, 0x36e55d5d),
+ (0x3e8e4400, 0x36c60b4d),
+ (0x3e914a00, 0x34fde7bd),
+ (0x3e944a00, 0x36d09ef4),
+ (0x3e974800, 0xb6ea28f7),
+ (0x3e9a3e00, 0x36ecd4c4),
+ (0x3e9d3200, 0x36455694),
+ (0x3ea02200, 0xb6779796),
+ (0x3ea30c00, 0x363c21c6),
+ (0x3ea5f200, 0x36fcabbc),
+ (0x3ea8d600, 0xb693c690),
+ (0x3eabb400, 0xb60e8baa),
+ (0x3eae8e00, 0xb51029fe),
+ (0x3eb16400, 0x353cae72),
+ (0x3eb43600, 0x3601e9b1),
+ (0x3eb70400, 0x366aa2ba),
+ (0x3eb9ce00, 0x36bfb5df),
+ (0x3ebc9600, 0xb6d50116),
+ (0x3ebf5800, 0xb5f88faa),
+ (0x3ec21600, 0x368ed0f4),
+ (0x3ec4d200, 0xb64793ec),
+ (0x3ec78800, 0x36f439b3),
+ (0x3eca3c00, 0x36a0e109),
+ (0x3eccec00, 0x36ac08bf),
+ (0x3ecf9a00, 0xb6e09a03),
+ (0x3ed24200, 0x3410e5bb),
+ (0x3ed4e800, 0xb69b2b30),
+ (0x3ed78a00, 0xb6b66dc4),
+ (0x3eda2800, 0xb6084337),
+ (0x3edcc200, 0x36c4b499),
+ (0x3edf5a00, 0x3659da72),
+ (0x3ee1ee00, 0x36bd3e6d),
+ (0x3ee48000, 0xb6038656),
+ (0x3ee70e00, 0xb687a3d0),
+ (0x3ee99800, 0xb4c0ff8a),
+ (0x3eec2000, 0xb6c6d3af),
+ (0x3eeea400, 0xb6afd9f2),
+ (0x3ef12400, 0x3601a7c7),
+ (0x3ef3a200, 0x351875a2),
+ (0x3ef61c00, 0x36ce9234),
+ (0x3ef89400, 0x3675faf0),
+ (0x3efb0a00, 0xb6e02c7f),
+ (0x3efd7a00, 0x36c47bc8),
+ (0x3effea00, 0xb68fbd40),
+ (0x3f012b00, 0xb6d5a5a3),
+ (0x3f025f00, 0xb444adb2),
+ (0x3f039200, 0xb551f190),
+ (0x3f04c300, 0x36f4f573),
+ (0x3f05f400, 0xb6d1bdad),
+ (0x3f072200, 0x36985d1d),
+ (0x3f085000, 0xb6c61d2b),
+ (0x3f097c00, 0xb6e6a6c1),
+ (0x3f0aa600, 0x35f4bd35),
+ (0x3f0bcf00, 0x36abbd8a),
+ (0x3f0cf700, 0x36568cf9),
+ (0x3f0e1e00, 0xb67c11d8),
+ (0x3f0f4300, 0xb4a18fbf),
+ (0x3f106700, 0xb5cb9b55),
+ (0x3f118a00, 0xb6f28414),
+ (0x3f12ab00, 0xb6062ce1),
+ (0x3f13cb00, 0xb576bb27),
+ (0x3f14ea00, 0xb68013d5),
+ (0x3f160700, 0x369ed449),
+ (0x3f172400, 0xb6bc91c0),
+ (0x3f183f00, 0xb68ccb0f),
+ (0x3f195900, 0xb6cc6ede),
+ (0x3f1a7100, 0x3689d9ce),
+ (0x3f1b8900, 0xb684ab8c),
+ (0x3f1c9f00, 0x34d3562a),
+ (0x3f1db400, 0x36094000),
+ (0x3f1ec800, 0x359a9c56),
+ (0x3f1fdb00, 0xb60f65d2),
+ (0x3f20ec00, 0x36fe8467),
+ (0x3f21fd00, 0xb368318d),
+ (0x3f230c00, 0x36bc21c6),
+ (0x3f241b00, 0xb6c2e157),
+ (0x3f252800, 0xb67449f8),
+ (0x3f263400, 0xb64a0662),
+ (0x3f273f00, 0xb67dc915),
+ (0x3f284900, 0xb6c33fe9),
+ (0x3f295100, 0x36d265bc),
+ (0x3f2a5900, 0x360cf333),
+ (0x3f2b6000, 0xb6454982),
+ (0x3f2c6500, 0x36db5cd8),
+ (0x3f2d6a00, 0x34186b3e),
+ (0x3f2e6e00, 0xb6e2393f),
+ (0x3f2f7000, 0x35aa4906),
+ (0x3f307200, 0xb6d0bb87),
+ (0x3f317200, 0x35bfbe8e), /* Note C[128] is log2 */
+]
+
+const accurate_logs64 = [
+ (000000000000000000, 000000000000000000),
+ (0x3f7fe02a6b200000, 0xbd6f30ee07912df9),
+ (0x3f8fc0a8b1000000, 0xbd5fe0e183092c59),
+ (0x3f97b91b07d80000, 0xbd62772ab6c0559c),
+ (0x3f9f829b0e780000, 0x3d2980267c7e09e4),
+ (0x3fa39e87ba000000, 0xbd642a056fea4dfd),
+ (0x3fa77458f6340000, 0xbd62303b9cb0d5e1),
+ (0x3fab42dd71180000, 0x3d671bec28d14c7e),
+ (0x3faf0a30c0100000, 0x3d662a6617cc9717),
+ (0x3fb16536eea40000, 0xbd60a3e2f3b47d18),
+ (0x3fb341d7961c0000, 0xbd4717b6b33e44f8),
+ (0x3fb51b073f060000, 0x3d383f69278e686a),
+ (0x3fb6f0d28ae60000, 0xbd62968c836cc8c2),
+ (0x3fb8c345d6320000, 0xbd5937c294d2f567),
+ (0x3fba926d3a4a0000, 0x3d6aac6ca17a4554),
+ (0x3fbc5e548f5c0000, 0xbd4c5e7514f4083f),
+ (0x3fbe27076e2a0000, 0x3d6e5cbd3d50fffc),
+ (0x3fbfec9131dc0000, 0xbd354555d1ae6607),
+ (0x3fc0d77e7cd10000, 0xbd6c69a65a23a170),
+ (0x3fc1b72ad52f0000, 0x3d69e80a41811a39),
+ (0x3fc29552f8200000, 0xbd35b967f4471dfc),
+ (0x3fc371fc201f0000, 0xbd6c22f10c9a4ea8),
+ (0x3fc44d2b6ccb0000, 0x3d6f4799f4f6543e),
+ (0x3fc526e5e3a20000, 0xbd62f21746ff8a47),
+ (0x3fc5ff3070a80000, 0xbd6b0b0de3077d7e),
+ (0x3fc6d60fe71a0000, 0xbd56f1b955c4d1da),
+ (0x3fc7ab8902110000, 0xbd537b720e4a694b),
+ (0x3fc87fa065210000, 0xbd5b77b7effb7f41),
+ (0x3fc9525a9cf40000, 0x3d65ad1d904c1d4e),
+ (0x3fca23bc1fe30000, 0xbd62a739b23b93e1),
+ (0x3fcaf3c94e810000, 0xbd600349cc67f9b2),
+ (0x3fcbc286742e0000, 0xbd6cca75818c5dbc),
+ (0x3fcc8ff7c79b0000, 0xbd697794f689f843),
+ (0x3fcd5c216b500000, 0xbd611ba91bbca682),
+ (0x3fce27076e2b0000, 0xbd3a342c2af0003c),
+ (0x3fcef0adcbdc0000, 0x3d664d948637950e),
+ (0x3fcfb9186d5e0000, 0x3d5f1546aaa3361c),
+ (0x3fd0402594b50000, 0xbd67df928ec217a5),
+ (0x3fd0a324e2738000, 0x3d50e35f73f7a018),
+ (0x3fd1058bf9ae8000, 0xbd6a9573b02faa5a),
+ (0x3fd1675cabab8000, 0x3d630701ce63eab9),
+ (0x3fd1c898c1698000, 0x3d59fafbc68e7540),
+ (0x3fd22941fbcf8000, 0xbd3a6976f5eb0963),
+ (0x3fd2895a13de8000, 0x3d3a8d7ad24c13f0),
+ (0x3fd2e8e2bae10000, 0x3d5d309c2cc91a85),
+ (0x3fd347dd9a988000, 0xbd25594dd4c58092),
+ (0x3fd3a64c55698000, 0xbd6d0b1c68651946),
+ (0x3fd4043086868000, 0x3d63f1de86093efa),
+ (0x3fd4618bc21c8000, 0xbd609ec17a426426),
+ (0x3fd4be5f95778000, 0xbd3d7c92cd9ad824),
+ (0x3fd51aad872e0000, 0xbd3f4bd8db0a7cc1),
+ (0x3fd5767717458000, 0xbd62c9d5b2a49af9),
+ (0x3fd5d1bdbf580000, 0x3d4394a11b1c1ee4),
+ (0x3fd62c82f2ba0000, 0xbd6c356848506ead),
+ (0x3fd686c81e9b0000, 0x3d54aec442be1015),
+ (0x3fd6e08eaa2b8000, 0x3d60f1c609c98c6c),
+ (0x3fd739d7f6bc0000, 0xbd67fcb18ed9d603),
+ (0x3fd792a55fdd8000, 0xbd6c2ec1f512dc03),
+ (0x3fd7eaf83b828000, 0x3d67e1b259d2f3da),
+ (0x3fd842d1da1e8000, 0x3d462e927628cbc2),
+ (0x3fd89a3386c18000, 0xbd6ed2a52c73bf78),
+ (0x3fd8f11e87368000, 0xbd5d3881e8962a96),
+ (0x3fd947941c210000, 0x3d56faba4cdd147d),
+ (0x3fd99d9581180000, 0xbd5f753456d113b8),
+ (0x3fd9f323ecbf8000, 0x3d584bf2b68d766f),
+ (0x3fda484090e58000, 0x3d6d8515fe535b87),
+ (0x3fda9cec9a9a0000, 0x3d40931a909fea5e),
+ (0x3fdaf12932478000, 0xbd3e53bb31eed7a9),
+ (0x3fdb44f77bcc8000, 0x3d4ec5197ddb55d3),
+ (0x3fdb985896930000, 0x3d50fb598fb14f89),
+ (0x3fdbeb4d9da70000, 0x3d5b7bf7861d37ac),
+ (0x3fdc3dd7a7cd8000, 0x3d66a6b9d9e0a5bd),
+ (0x3fdc8ff7c79a8000, 0x3d5a21ac25d81ef3),
+ (0x3fdce1af0b860000, 0xbd48290905a86aa6),
+ (0x3fdd32fe7e010000, 0xbd542a9e21373414),
+ (0x3fdd83e7258a0000, 0x3d679f2828add176),
+ (0x3fddd46a04c20000, 0xbd6dafa08cecadb1),
+ (0x3fde24881a7c8000, 0xbd53d9e34270ba6b),
+ (0x3fde744261d68000, 0x3d3e1f8df68dbcf3),
+ (0x3fdec399d2468000, 0x3d49802eb9dca7e7),
+ (0x3fdf128f5faf0000, 0x3d3bb2cd720ec44c),
+ (0x3fdf6123fa700000, 0x3d645630a2b61e5b),
+ (0x3fdfaf588f790000, 0xbd49c24ca098362b),
+ (0x3fdffd2e08580000, 0xbd46cf54d05f9367),
+ (0x3fe02552a5a5c000, 0x3d60fec69c695d7f),
+ (0x3fe04bdf9da94000, 0xbd692d9a033eff75),
+ (0x3fe0723e5c1cc000, 0x3d6f404e57963891),
+ (0x3fe0986f4f574000, 0xbd55be8dc04ad601),
+ (0x3fe0be72e4254000, 0xbd657d49676844cc),
+ (0x3fe0e44985d1c000, 0x3d5917edd5cbbd2d),
+ (0x3fe109f39e2d4000, 0x3d592dfbc7d93617),
+ (0x3fe12f7195940000, 0xbd6043acfedce638),
+ (0x3fe154c3d2f4c000, 0x3d65e9a98f33a396),
+ (0x3fe179eabbd88000, 0x3d69a0bfc60e6fa0),
+ (0x3fe19ee6b467c000, 0x3d52dd98b97baef0),
+ (0x3fe1c3b81f714000, 0xbd3eda1b58389902),
+ (0x3fe1e85f5e704000, 0x3d1a07bd8b34be7c),
+ (0x3fe20cdcd192c000, 0xbd64926cafc2f08a),
+ (0x3fe23130d7bec000, 0xbd17afa4392f1ba7),
+ (0x3fe2555bce990000, 0xbd506987f78a4a5e),
+ (0x3fe2795e1289c000, 0xbd5dca290f81848d),
+ (0x3fe29d37fec2c000, 0xbd5eea6f465268b4),
+ (0x3fe2c0e9ed448000, 0x3d5d1772f5386374),
+ (0x3fe2e47436e40000, 0x3d334202a10c3491),
+ (0x3fe307d7334f0000, 0x3d60be1fb590a1f5),
+ (0x3fe32b1339120000, 0x3d6d71320556b67b),
+ (0x3fe34e289d9d0000, 0xbd6e2ce9146d277a),
+ (0x3fe37117b5474000, 0x3d4ed71774092113),
+ (0x3fe393e0d3564000, 0xbd65e6563bbd9fc9),
+ (0x3fe3b6844a000000, 0xbd3eea838909f3d3),
+ (0x3fe3d9026a714000, 0x3d66faa404263d0b),
+ (0x3fe3fb5b84d18000, 0xbd60bda4b162afa3),
+ (0x3fe41d8fe8468000, 0xbd5aa33736867a17),
+ (0x3fe43f9fe2f9c000, 0x3d5ccef4e4f736c2),
+ (0x3fe4618bc21c4000, 0x3d6ec27d0b7b37b3),
+ (0x3fe48353d1ea8000, 0x3d51bee7abd17660),
+ (0x3fe4a4f85db04000, 0xbd244fdd840b8591),
+ (0x3fe4c679afcd0000, 0xbd61c64e971322ce),
+ (0x3fe4e7d811b74000, 0x3d6bb09cb0985646),
+ (0x3fe50913cc018000, 0xbd6794b434c5a4f5),
+ (0x3fe52a2d265bc000, 0x3d46abb9df22bc57),
+ (0x3fe54b2467998000, 0x3d6497a915428b44),
+ (0x3fe56bf9d5b40000, 0xbd58cd7dc73bd194),
+ (0x3fe58cadb5cd8000, 0xbd49db3db43689b4),
+ (0x3fe5ad404c358000, 0x3d6f2cfb29aaa5f0),
+ (0x3fe5cdb1dc6c0000, 0x3d67648cf6e3c5d7),
+ (0x3fe5ee02a9240000, 0x3d667570d6095fd2),
+ (0x3fe60e32f4478000, 0x3d51b194f912b417),
+ (0x3fe62e42fefa4000, 0xbd48432a1b0e2634),
+]
+
+const desc32 : fltdesc(flt32, uint32, int32) = [
+ .explode = std.flt32explode,
+ .assem = std.flt32assem,
+ .horner = horner_polyu32,
+ .tobits = std.flt32bits,
+ .frombits = std.flt32frombits,
+ .sgnmask = (1 << 31),
+ .sig8mask = 0xffff0000, /* Mask to get 8 significant bits */
+ .sig8last = 16, /* Last bit kept when masking */
+ .emin = -126,
+ .emax = 127,
+ .precision = 24,
+ .inf = 0x7f800000,
+ .ninf = 0xff800000,
+ .nan = 0x7fc00000,
+ .logT1 = 0x3f707d5f, /* Just smaller than e^(-1/16) ~= 0.939413 */
+ .logT2 = 0x3f88415b, /* Just larger than e^(1/16) ~= 1.06449 */
+ .log1pT1 = 0xbd782a03, /* Just smaller than e^(-1/16) - 1 ~= -0.0605869 */
+ .log1pT2 = 0x3d8415ac, /* Just larger than e^(1/16) - 1 ~= 0.06449445 */
+ .T3exp = 26, /* Beyond 2^T3exp, 1 + x rounds to x */
+ .C = accurate_logs32[:],
+ .Ai = [ 0x3daaaac2 ][:], /* Coefficients for log(1 + f/F) */
+ .Bi = [ /* Coefficients for log(1 + f) in terms of a = 2f/(2 + f) */
+ 0x3daaaaa9,
+ 0x3c4d0095,
+ ][:],
+ .Mtruncmask = 0xfffff000, /* Mask to get 12 significant bits */
+]
+
+const desc64 : fltdesc(flt64, uint64, int64) = [
+ .explode = std.flt64explode,
+ .assem = std.flt64assem,
+ .horner = horner_polyu64,
+ .tobits = std.flt64bits,
+ .frombits = std.flt64frombits,
+ .sgnmask = (1 << 63),
+ .sig8mask = 0xffffe00000000000, /* Mask to get 8 significant bits */
+ .sig8last = 45, /* Last bit kept when masking */
+ .emin = -1022,
+ .emax = 1023,
+ .precision = 53,
+ .inf = 0x7ff0000000000000,
+ .ninf = 0xfff0000000000000,
+ .nan = 0x7ff8000000000000,
+ .logT1 = 0x3fee0fabfbc702a3, /* Just smaller than e^(-1/16) ~= 0.939413 */
+ .logT2 = 0x3ff1082b577d34ee, /* Just larger than e^(1/16) ~= 1.06449 */
+ .log1pT1 = 0xbfaf0540428fd5c4, /* Just smaller than e^(-1/16) - 1 ~= -0.0605869 */
+ .log1pT2 = 0x3fb082b577d34ed8, /* Just larger than e^(1/16) - 1 ~= 0.06449445 */
+ .T3exp = 55, /* Beyond 2^T3exp, 1 + x rounds to x */
+ .C = accurate_logs64[:],
+ .Ai = [
+ 0x3fb5555555550286,
+ 0x3f8999a0bc712416,
+ ][:],
+ .Bi = [
+ 0x3fb55555555554e6,
+ 0x3f89999999bac6d4,
+ 0x3f62492307f1519f,
+ 0x3f3c8034c85dfff0,
+ ][:],
+ .Mtruncmask = 0xfffffffff0000000, /* Mask to get 24 significant bits */
+]
+
+const log32 = {x : flt32
+ -> loggen(x, desc32)
+}
+
+const log64 = {x : flt64
+ -> loggen(x, desc64)
+}
+
+generic loggen = {x : @f, d : fltdesc(@f, @u, @i) :: numeric,floating @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
+ var b = d.tobits(x)
+ var n : bool, e : @i, s : @u
+ (n, e, s) = d.explode(x)
+
+ /*
+ Special cases for NaN, +/- 0, < 0, inf, and 1. There are
+ certain exceptions (inexact, division by 0, &c) that
+ should be flagged in these cases, which we do not honor
+ currently. See [Tan90].
+ */
+ if std.isnan(x)
+ -> d.frombits(d.nan)
+ elif (b & ~d.sgnmask == 0)
+ -> d.frombits(d.ninf)
+ elif n
+ -> d.frombits(d.nan)
+ elif (b == d.inf)
+ -> x
+ elif std.eq(x, (1.0 : @f))
+ -> (0.0 : @f)
+ ;;
+
+ /* If x is close to 1, polynomial log1p(x - 1) will be sufficient */
+ if (d.logT1 < b && b < d.logT2)
+ -> procedure_2(x - (1.0 : @f), d)
+ ;;
+
+ /*
+ Reduce x to 2^m * (F + f), with (F + f) in [1, 2), so
+ procedure_2's tables work. We also require that F have
+ only 8 significant bits.
+ */
+ var m : @i, Y : @f, F : @f, f : @f
+
+ if e < d.emin
+ /* Normalize significand */
+ var first_1 = find_first1_64((s : uint64), (d.precision : int64))
+ var offset = (d.precision : @u) - 1 - (first_1 : @u)
+ s = s << offset
+ e = d.emin - offset
+ ;;
+
+ m = e
+ Y = d.assem(false, 0, s)
+ if need_round_away(0, (s : uint64), (d.sig8last : int64))
+ F = d.frombits((d.tobits(Y) & d.sig8mask) + (1 << d.sig8last))
+ else
+ F = d.frombits(d.tobits(Y) & d.sig8mask)
+ ;;
+
+ f = Y - F
+
+ -> procedure_1(m, F, f, Y, d)
+}
+
+const log1p32 = {x : flt32
+ -> log1pgen(x, desc32)
+}
+
+const log1p64 = {x : flt64
+ -> log1pgen(x, desc64)
+}
+
+generic log1pgen = {x : @f, d : fltdesc(@f, @u, @i) :: numeric,floating @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
+ var b = d.tobits(x)
+ var n, e, s
+ (n, e, s) = d.explode(x)
+
+ /*
+ Special cases for NaN, +/- 0, < 0, inf, and 1. There are
+ certain exceptions (inexact, division by 0, &c) that
+ should be flagged in these cases, which we do not honor
+ currently. See [Tan90].
+ */
+ if std.isnan(x)
+ -> d.frombits(d.nan)
+ elif (b & ~d.sgnmask == 0)
+ -> x
+ elif std.eq(x, (-1.0 : @f))
+ -> d.frombits(d.nan | d.sgnmask)
+ elif x < (-1.0 : @f)
+ -> d.frombits(d.nan)
+ elif (b == d.inf)
+ -> x
+ ;;
+
+ /* If x is small enough that 1 + x rounds to 1, return x */
+ if e < (-d.precision : @i)
+ -> x
+ ;;
+
+ /* If x is close to 0, use polynomial */
+ if (n && b < d.log1pT1) || (!n && b < d.log1pT2)
+ -> procedure_2(x, d)
+ ;;
+
+ /*
+ Reduce x m, F, f as in log case. However, since we're
+ approximating 1 + x, more care has to be taken (for
+ example: 1 + x might be infinity).
+ */
+ var Y, m, F, f
+ if e > d.T3exp
+ Y = x
+ else
+ Y = (1.0 : @f) + x
+ ;;
+
+ /*
+ y must be normal, otherwise x would have been -1 +
+ (subnormal), but that would round to -1.
+ */
+ var ny, ey, sy
+ (ny, ey, sy) = d.explode(Y)
+ m = ey
+ Y = d.assem(ny, 0, sy)
+ if need_round_away(0, (sy : uint64), (d.sig8last : int64))
+ F = d.frombits((d.tobits(Y) & d.sig8mask) + (1 << d.sig8last))
+ else
+ F = d.frombits(d.tobits(Y) & d.sig8mask)
+ ;;
+
+ /*
+ f is trickier to compute than in the exp case, because
+ the scale of the 1 is unknown near x.
+ */
+ if m <= -2
+ f = Y - F
+ elif m <= d.precision - 1
+ f = (d.assem(false, -m, 0) - F) + scale2(x, -m)
+ else
+ f = (scale2(x, -m) - F) + d.assem(false, -m, 0)
+ ;;
+
+ -> procedure_1(m, F, f, Y, d)
+}
+
+/* Approximate log(2^m * (F + f)) by tables */
+generic procedure_1 = {m : @i, F : @f, f : @f, Y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
+ /*
+ We must compute log(2^m * (F + f)) = m log(2) + log(F)
+ + log(1 + f/F). Only this last term need be approximated,
+ since log(2) and log(F) may be precomputed.
+
+ For computing log(1 + f/F), [Tan90] gives two alternatives.
+ We choose step 3', which requires floating-point division,
+ but allows us to save approximately 2.5 KiB of precomputed
+ values.
+
+ F is some 1 + j2^(-7), so first we compute j. Note that
+ j could actually be 128 (Ex: x = 0x4effac00.)
+ */
+ var j
+ var nF, eF, sF
+ (nF, eF, sF) = d.explode(F)
+ if eF != 0
+ j = 128
+ else
+ j = 0x7f & (((d.sig8mask & sF) >> d.sig8last) - 0x80)
+ ;;
+
+ var Cu_hi, Cu_lo, log2_hi, log2_lo
+ (Cu_hi, Cu_lo) = d.C[j]
+ (log2_hi, log2_lo) = d.C[128]
+
+ var L_hi = (m : @f) * d.frombits(log2_hi) + d.frombits(Cu_hi)
+ var L_lo = (m : @f) * d.frombits(log2_lo) + d.frombits(Cu_lo)
+
+ var u = ((2.0 : @f) * f)/(Y + F)
+ var v = u * u
+ var q = u * v * d.horner(v, d.Ai)
+
+ -> L_hi + (u + (q + L_lo))
+}
+
+/* Approximate log1p by polynomial */
+generic procedure_2 = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
+ var g = (1.0 : @f)/((2.0 : @f) + f)
+ var u = (2.0 : @f) * f * g
+ var v = u * u
+ var q = u * v * d.horner(v, d.Bi)
+
+ /*
+ 1 / (2 + f) in working precision was good enough for the
+ polynomial evaluation, but to complete the approximation
+ we need to add 2f/(2 + f) with higher precision than
+ working. So we go back and compute better, split u.
+ */
+ var u1 = d.frombits(d.Mtruncmask & d.tobits(u))
+ var f1 = d.frombits(d.Mtruncmask & d.tobits(f))
+ var f2 = f - f1
+ var u2 = (((2.0 : @f) * (f - u1) - u1 * f1) - u1 * f2) * g
+ -> u1 + (u2 + q)
+}
diff --git a/lib/math/poly-impl.myr b/lib/math/poly-impl.myr
index e7d645b..94118a0 100644
--- a/lib/math/poly-impl.myr
+++ b/lib/math/poly-impl.myr
@@ -2,44 +2,44 @@ use std
/* See [Mul16], section 5.1 */
pkg math =
- pkglocal const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)
- pkglocal const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)
+ pkglocal const horner_poly32 : (x : flt32, a : flt32[:] -> flt32)
+ pkglocal const horner_poly64 : (x : flt64, a : flt64[:] -> flt64)
- pkglocal const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
- pkglocal const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
+ pkglocal const horner_polyu32 : (x : flt32, a : uint32[:] -> flt32)
+ pkglocal const horner_polyu64 : (x : flt64, a : uint64[:] -> flt64)
;;
extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
-const horner_poly32 = {f : flt32, a : flt32[:]
+const horner_poly32 = {x : flt32, a : flt32[:]
var r : flt32 = 0.0
for var j = a.len - 1; j >= 0; j--
- r = fma32(r, f, a[j])
+ r = fma32(r, x, a[j])
;;
-> r
}
-const horner_poly64 = {f : flt64, a : flt64[:]
+const horner_poly64 = {x : flt64, a : flt64[:]
var r : flt64 = 0.0
for var j = a.len - 1; j >= 0; j--
- r = fma64(r, f, a[j])
+ r = fma64(r, x, a[j])
;;
-> r
}
-const horner_polyu32 = {f : flt32, a : uint32[:]
+const horner_polyu32 = {x : flt32, a : uint32[:]
var r : flt32 = 0.0
for var j = a.len - 1; j >= 0; j--
- r = fma32(r, f, std.flt32frombits(a[j]))
+ r = fma32(r, x, std.flt32frombits(a[j]))
;;
-> r
}
-const horner_polyu64 = {f : flt64, a : uint64[:]
+const horner_polyu64 = {x : flt64, a : uint64[:]
var r : flt64 = 0.0
for var j = a.len - 1; j >= 0; j--
- r = fma64(r, f, std.flt64frombits(a[j]))
+ r = fma64(r, x, std.flt64frombits(a[j]))
;;
-> r
}
diff --git a/lib/math/powr-impl.myr b/lib/math/powr-impl.myr
new file mode 100644
index 0000000..715f611
--- /dev/null
+++ b/lib/math/powr-impl.myr
@@ -0,0 +1,406 @@
+use std
+
+use "fpmath"
+use "log-impl"
+use "util"
+
+/*
+ This is an implementation of powr, not pow, so the special cases
+ are tailored more closely to the mathematical x^y = e^(y * log(x))
+ than to historical C implementations (pow was aligned to the C99
+ standard, which was aligned to codify existing practice).
+
+ Even then, some parts of the powr specification are unclear. For
+ example, IEEE 754-2008 does not specify what powr(infty, y) must
+ return when y is not 0.0 (an erratum was planned in 2010, but
+ does not appear to have been released as of 2018).
+
+ As a note: unlike many other functions in this library, there
+ has been no serious analysis of the accuracy and speed of this
+ particular implementation. Interested observers wishing to improve
+ this library will probably find this file goldmine of mistakes,
+ both theoretical and practical.
+ */
+pkg math =
+ pkglocal const powr32 : (x : flt32, y : flt32 -> flt32)
+ pkglocal const powr64 : (x : flt64, y : flt64 -> flt64)
+;;
+
+type fltdesc(@f, @u, @i) = struct
+ explode : (f : @f -> (bool, @i, @u))
+ assem : (n : bool, e : @i, s : @u -> @f)
+ tobits : (f : @f -> @u)
+ frombits : (u : @u -> @f)
+ nan : @u
+ inf : @u
+ expmask : @u
+ precision : @u
+ emax : @i
+ emin : @i
+ sgnmask : @u
+ sig8mask : @u
+ sig8last : @u
+ split_prec_mask : @u
+ split_prec_mask2 : @u
+ C : (@u, @u)[:]
+ eps_inf_border : @u
+ eps_zero_border : @u
+ exp_inf_border : @u
+ exp_zero_border : @u
+ exp_subnormal_border : @u
+ itercount : @u
+;;
+
+const desc32 : fltdesc(flt32, uint32, int32) = [
+ .explode = std.flt32explode,
+ .assem = std.flt32assem,
+ .tobits = std.flt32bits,
+ .frombits = std.flt32frombits,
+ .nan = 0x7fc00000,
+ .inf = 0x7f800000,
+ .expmask = 0x7f800000, /* mask to detect inf or NaN (inf, repeated for clarity) */
+ .precision = 24,
+ .emax = 127,
+ .emin = -126,
+ .sgnmask = 1 << 31,
+ .sig8mask = 0xffff0000, /* Mask to get 8 significant bits */
+ .sig8last = 16, /* Last bit kept when masking */
+ .split_prec_mask = 0xffff0000, /* 16 trailing zeros */
+ .split_prec_mask2 = 0xfffff000, /* 12 trailing zeros */
+ .C = accurate_logs32[0:130], /* See log-impl.myr */
+ .eps_inf_border = 0x4eb00f34, /* maximal y st. (1.00..1)^y < oo */
+ .eps_zero_border = 0x4ecff1b4, /* minimal y st. (0.99..9)^y > 0 */
+ .exp_inf_border = 0x42b17218, /* maximal y such that e^y < oo */
+ .exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */
+ .exp_subnormal_border = 0xc2aeac50, /* minimal y such that e^y is normal */
+ .itercount = 4, /* How many iterations of Taylor series for (1 + f)^y' */
+]
+
+const desc64 : fltdesc(flt64, uint64, int64) = [
+ .explode = std.flt64explode,
+ .assem = std.flt64assem,
+ .tobits = std.flt64bits,
+ .frombits = std.flt64frombits,
+ .nan = 0x7ff8000000000000,
+ .inf = 0x7ff0000000000000,
+ .expmask = 0x7ff0000000000000,
+ .precision = 53,
+ .emax = 1023,
+ .emin = -1022,
+ .sgnmask = 1 << 63,
+ .sig8mask = 0xffffe00000000000, /* Mask to get 8 significant bits */
+ .sig8last = 45, /* Last bit kept when masking */
+ .split_prec_mask = 0xffffff0000000000, /* 40 trailing zeroes */
+ .split_prec_mask2 = 0xfffffffffffc0000, /* 18 trailing zeroes */
+ .C = accurate_logs64[0:130], /* See log-impl.myr */
+ .eps_inf_border = 0x43d628b76e3a7b61, /* maximal y st. (1.00..1)^y < oo */
+ .eps_zero_border = 0x43d74e9c65eceee0, /* minimal y st. (0.99..9)^y > 0 */
+ .exp_inf_border = 0x40862e42fefa39ef, /* maximal y such that e^y < oo */
+ .exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */
+ .exp_subnormal_border = 0xc086232bdd7abcd2, /* minimal y such that e^y is normal */
+ .itercount = 8,
+]
+
+const powr32 = {x : flt32, y : flt32
+ -> powrgen(x, y, desc32)
+}
+
+const powr64 = {x : flt64, y : flt64
+ -> powrgen(x, y, desc64)
+}
+
+generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
+ var xb, yb
+ xb = d.tobits(x)
+ yb = d.tobits(y)
+
+ var xn : bool, xe : @i, xs : @u
+ var yn : bool, ye : @i, ys : @u
+ (xn, xe, xs) = d.explode(x)
+ (yn, ye, ys) = d.explode(y)
+
+ /*
+ Special cases. Note we do not follow IEEE exceptions.
+ */
+ if std.isnan(x) || std.isnan(y)
+ /* Propagate NaN */
+ -> d.frombits(d.nan)
+ elif (xb & ~d.sgnmask == 0)
+ if (yb & ~d.sgnmask == 0)
+ /* 0^0 is undefined. */
+ -> d.frombits(d.nan)
+ elif yn
+ /* 0^(< 0) is infinity */
+ -> d.frombits(d.inf)
+ else
+ /* otherwise, 0^y = 0. */
+ -> (0.0 : @f)
+ ;;
+ elif xn
+ /*
+ (< 0)^(anything) is undefined. This comes from
+ thinking of floating-point numbers as representing
+ small ranges of real numbers. If you really want
+ to compute (-1.23)^5, use pown.
+ */
+ -> d.frombits(d.nan)
+ elif (xb & ~d.sgnmask == d.inf)
+ if (yb & ~d.sgnmask == 0)
+ /* oo^0 is undefined */
+ -> d.frombits(d.nan)
+ elif yn
+ /* +/-oo^(< 0) is +/-0 */
+ -> d.assem(xn, 0, 0)
+ elif xn
+ /* (-oo)^(anything) is undefined */
+ -> d.frombits(d.nan)
+ else
+ /* oo^(> 0) is oo */
+ -> d.frombits(d.inf)
+ ;;
+ elif std.eq(y, (1.0 : @f))
+ /* x^1 = x */
+ -> x
+ elif yb & ~d.sgnmask == 0
+ /* (finite, positive)^0 = 1 */
+ -> (1.0 : @f)
+ elif std.eq(x, (1.0 : @f))
+ if yb & ~d.sgnmask == d.inf
+ /* 1^oo is undefined */
+ -> d.frombits(d.nan)
+ else
+ /* 1^(finite, positive) = 1 */
+ -> (1.0 : @f)
+ ;;
+ elif yb & ~d.sgnmask == d.inf
+ if xe < 0
+ /* (0 < x < 1)^oo = 0 */
+ -> (0.0 : @f)
+ else
+ /* (x > 1)^oo = oo */
+ -> d.frombits(d.inf)
+ ;;
+ ;;
+
+ /* Normalize x and y */
+ if xe < d.emin
+ var first_1 = find_first1_64((xs : uint64), (d.precision : int64))
+ var offset = (d.precision : @u) - 1 - (first_1 : @u)
+ xs = xs << offset
+ xe = d.emin - offset
+ ;;
+
+ if ye < d.emin
+ var first_1 = find_first1_64((ys : uint64), (d.precision : int64))
+ var offset = (d.precision : @u) - 1 - (first_1 : @u)
+ ys = ys << offset
+ ye = d.emin - offset
+ ;;
+
+ /*
+ Split x into 2^N * F * (1 + f), with F = 1 + j/128 (some
+ j) and f tiny. Compute F naively by truncation. Compute
+ f via f = (x' - 1 - F)/(1 + F), where 1/(1 + F) is
+ precomputed and x' is x/2^N. 128 is chosen so that we
+ can borrow some constants from log-impl.myr.
+
+ [Tan90] hints at a method of computing x^y which may be
+ comparable to this approach, but which is unfortunately
+ has not been elaborated on (as far as I can discover).
+ */
+ var N = xe
+ var j, F, Fn, Fe, Fs
+ var xprime = d.assem(false, 0, xs)
+
+ if need_round_away(0, (xs : uint64), (d.sig8last : int64))
+ F = d.frombits((d.tobits(xprime) & d.sig8mask) + (1 << d.sig8last))
+ else
+ F = d.frombits(d.tobits(xprime) & d.sig8mask)
+ ;;
+
+ (Fn, Fe, Fs) = d.explode(F)
+
+ if Fe != 0
+ j = 128
+ else
+ j = 0x7f & ((d.sig8mask & Fs) >> d.sig8last)
+ ;;
+
+ var f = (xprime - F)/F
+
+ /*
+ y could actually be above integer infinity, in which
+ case x^y is most certainly infinity of 0. More importantly,
+ we can't safely compute M (below).
+ */
+ if x > (1.0 : @f)
+ if y > d.frombits(d.eps_inf_border)
+ -> d.frombits(d.inf)
+ elif -y > d.frombits(d.eps_inf_border)
+ -> (0.0 : @f)
+ ;;
+ elif x < (1.0 : @f)
+ if y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
+ -> (0.0 : @f)
+ elif -y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
+ -> d.frombits(d.inf)
+ ;;
+ ;;
+
+ /* Split y into M + y', with |y'| <= 0.5 and M an integer */
+ var M = floor(y)
+ var yprime = y - M
+ if yprime > (0.5 : @f)
+ M += (1.0 : @f)
+ yprime = y - M
+ elif yprime < (-0.5 : @f)
+ M -= (1.0: @f)
+ yprime = y - M
+ ;;
+
+ /*
+ We'll multiply y' by log(2) and try to keep extra
+ precision, so we need to split y'. Since the high word
+ of C has 24 - 10 = 14 significant bits (53 - 16 = 37 in
+ flt64 case), we ensure 15 (39) trailing zeroes in
+ yprime_hi. (We also need this for y'*N, M, &c).
+ */
+ var yprime_hi = d.frombits(d.tobits(yprime) & d.split_prec_mask)
+ var yprime_lo = yprime - yprime_hi
+ var yprimeN_hi = d.frombits(d.tobits((N : @f) * yprime) & d.split_prec_mask)
+ var yprimeN_lo = fma((N : @f), yprime, -yprimeN_hi)
+ var M_hi = d.frombits(d.tobits(M) & d.split_prec_mask)
+ var M_lo = M - M_hi
+
+ /*
+ At this point, we've built out
+
+ x^y = [ 2^N * F * (1 + f) ]^(M + y')
+
+ where N, M are integers, F is well-known, and f, y' are
+ tiny. So we can get to computing
+
+ /-1-\ /-------------------2--------------------------\ /-3--\
+ 2^(N*M) * exp(log(F)*y' + log2*N*y' + log(F)*M + M*log(1+f)) * (1+f)^y'
+
+ where 1 can be handled by scale2, 2 we can mostly fake
+ by sticking high-precision values for log(F) and log(2)
+ through exp(), and 3 is composed of small numbers,
+ therefore can be reasonably approximated by a Taylor
+ expansion.
+ */
+
+ /* t2 */
+ var log2_lo, log2_hi, Cu_hi, Cu_lo
+ (log2_hi, log2_lo) = d.C[128]
+ (Cu_hi, Cu_lo) = d.C[j]
+
+ var es : @f[20]
+ std.slfill(es[:], (0.0 : @f))
+
+ /* log(F) * y' */
+ es[0] = d.frombits(Cu_hi) * yprime_hi
+ es[1] = d.frombits(Cu_lo) * yprime_hi
+ es[2] = d.frombits(Cu_hi) * yprime_lo
+ es[3] = d.frombits(Cu_lo) * yprime_lo
+
+ /* log(2) * N * y' */
+ es[4] = d.frombits(log2_hi) * yprimeN_hi
+ es[5] = d.frombits(log2_lo) * yprimeN_hi
+ es[6] = d.frombits(log2_hi) * yprimeN_lo
+ es[7] = d.frombits(log2_lo) * yprimeN_lo
+
+ /* log(F) * M */
+ es[8] = d.frombits(Cu_hi) * M_hi
+ es[9] = d.frombits(Cu_lo) * M_hi
+ es[10] = d.frombits(Cu_hi) * M_lo
+ es[11] = d.frombits(Cu_lo) * M_lo
+
+ /* log(1 + f) * M */
+ var lf = log1p(f)
+ var lf_hi = d.frombits(d.tobits(lf) & d.split_prec_mask)
+ var lf_lo = lf - lf_hi
+ es[12] = lf_hi * M_hi
+ es[13] = lf_lo * M_hi
+ es[14] = lf_hi * M_lo
+ es[15] = lf_lo * M_lo
+
+ /*
+ The correct way to handle this would be to compare
+ magnitudes of eis and parenthesize the additions correctly.
+ We take the cheap way out.
+ */
+ var exp_hi = priest_sum(es[0:16])
+
+ /*
+ We would like to just compute exp(exp_hi) * exp(exp_lo).
+ However, if that takes us into subnormal territory, yet
+ N * M is large, that will throw away a few bits of
+ information. We can correct for this by adding in a few
+ copies of P*log(2), then subtract off P when we compute
+ scale2() at the end.
+
+ We also have to be careful that P doesn't have too many
+ significant bits, otherwise we throw away some information
+ of log2_hi.
+ */
+ var P = -rn(exp_hi / d.frombits(log2_hi))
+ var P_f = (P : @f)
+ P_f = d.frombits(d.tobits(P_f) & d.split_prec_mask2)
+ P = rn(P_f)
+
+ es[16] = P_f * d.frombits(log2_hi)
+ es[17] = P_f * d.frombits(log2_lo)
+ exp_hi = priest_sum(es[0:18])
+ es[18] = -exp_hi
+ var exp_lo = priest_sum(es[0:19])
+
+
+ var t2 = exp(exp_hi) * exp(exp_lo)
+
+ /*
+ t3: Abbreviated Taylor expansion for (1 + f)^y' - 1.
+ Since f is on the order of 2^-7 (and y' is on the order
+ of 2^-1), we need to go up to f^3 for single-precision,
+ and f^7 for double. We can then compute (1 + t3) * t2
+
+ The expansion is \Sum_{k=1}^{\infty} {y' \choose k} x^k
+ */
+ var terms : @f[10] = [
+ (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f),
+ (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f),
+ ]
+ var current = (1.0 : @f)
+ for var j = 0; j <= d.itercount; ++j
+ current = current * f * (yprime - (j : @f)) / ((j : @f) + (1.0 : @f))
+ terms[j] = current
+ ;;
+ var t3 = priest_sum(terms[0:d.itercount + 1])
+
+ var total_exp_f = (N : @f) * M - (P : @f)
+ if total_exp_f > ((d.emax - d.emin + d.precision + 1) : @f)
+ -> d.frombits(d.inf)
+ elif total_exp_f < -((d.emax - d.emin + d.precision + 1) : @f)
+ -> (0.0 : @f)
+ ;;
+
+ /*
+ Pull t2's exponent out so that we don't hit subnormal
+ calculation with the t3 multiplication
+ */
+ var t2n, t2e, t2s
+ (t2n, t2e, t2s) = d.explode(t2)
+
+ if t2e < d.emin
+ var t2_first_1 = find_first1_64((t2s : uint64), (d.precision : int64))
+ var t2_offset = (d.precision : @u) - 1 - (t2_first_1 : @u)
+ t2s = t2s << t2_offset
+ t2e = d.emin - (t2_offset : @i)
+ ;;
+
+ t2 = d.assem(t2n, 0, t2s)
+ P -= t2e
+
+ var base = fma(t2, t3, t2)
+ -> scale2(base, N * rn(M) - P)
+}
diff --git a/lib/math/references b/lib/math/references
index 7a0496c..9a44159 100644
--- a/lib/math/references
+++ b/lib/math/references
@@ -1,11 +1,26 @@
References
+[GB91]
+Shmuel Gal and Boris Bachelis. “An accurate elementary mathematical
+library for the IEEE floating point standard”. In: ACM Trans. Math.
+Software 17.1 (1991), pp. 26–45. issn: 0098- 3500. doi:
+10.1145/103147.103151. url: https://doi.acm.org/10.1145/103147.103151.
+
[KM06]
Peter Kornerup and Jean-Michel Muller. “Choosing starting values
for certain Newton–Raphson iterations”. In: Theoretical Computer
Science 351 (1 2006), pp. 101–110. doi:
https://doi.org/10.1016/j.tcs.2005.09.056.
+[Lut95]
+Wolfram Luther. “Highly accurate tables for elementary functions”.
+In: BIT Numerical Mathematics 35.3 (Sept. 1995), pp. 352–360. doi:
+10.1007/BF01732609. url: https://doi.org/10.1007/BF01732609.
+
+[Mar00]
+Peter Markstein. IA-64 and elementary functions : speed and precision.
+Upper Saddle River, NJ: Prentice Hall, 2000. isbn: 9780130183484.
+
[Mul+10]
Jean-Michel Muller et al. Handbook of floating-point arithmetic.
Boston: Birkhäuser, 2010. isbn: 9780817647049.
@@ -17,11 +32,17 @@ Third edition. New York: Birkhäuser, 2016. isbn: 9781489979810.
[Tan89]
Ping-Tak Peter Tang. “Table-driven Implementation of the Exponential
Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.
-Softw. 15.2 (June 1989), pp. 144–157. issn: 0098-3500. doi:
+Softw. 15.2 (June 1989), pp. 144–157. issn: 0098-3500. doi:
10.1145/63522.214389. url: http://doi.acm.org/10.1145/63522.214389.
+[Tan90]
+Ping-Tak Peter Tang. “Table-driven Implementation of the Logarithm
+Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.
+Softw. 16.4 (Dec. 1990), pp. 378–400. issn: 0098-3500. doi:
+10.1145/98267.98294. url: http://doi.acm.org/10.1145/98267.98294.
+
[Tan92]
Ping Tak Peter Tang. “Table-driven Implementation of the Expm1
-Function in IEEE Floating- point Arithmetic”. In: ACM Trans. Math.
-Softw. 18.2 (June 1992), pp. 211–222. issn: 0098-3500. doi:
+Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.
+Softw. 18.2 (June 1992), pp. 211–222. issn: 0098-3500. doi:
10.1145/146847.146928. url: http://doi.acm.org/10.1145/146847.146928.
diff --git a/lib/math/round-impl.myr b/lib/math/round-impl.myr
index dc35869..589344f 100644
--- a/lib/math/round-impl.myr
+++ b/lib/math/round-impl.myr
@@ -3,14 +3,14 @@ use std
use "util"
pkg math =
- const rn64 : (f : flt64 -> int64)
- const rn32 : (f : flt32 -> int32)
+ const rn64 : (x : flt64 -> int64)
+ const rn32 : (x : flt32 -> int32)
;;
-const rn64 = {f : flt64
+const rn64 = {x : flt64
var n : bool, e : int64, s : uint64
- (n, e, s) = std.flt64explode(f)
+ (n, e, s) = std.flt64explode(x)
if e >= 63
-> -9223372036854775808
@@ -38,10 +38,10 @@ const rn64 = {f : flt64
;;
}
-const rn32 = {f : flt32
+const rn32 = {x : flt32
var n : bool, e : int32, s : uint32
- (n, e, s) = std.flt32explode(f)
+ (n, e, s) = std.flt32explode(x)
if e >= 31
-> -2147483648
diff --git a/lib/math/scale2-impl.myr b/lib/math/scale2-impl.myr
index 6301e14..f75aac8 100644
--- a/lib/math/scale2-impl.myr
+++ b/lib/math/scale2-impl.myr
@@ -9,20 +9,20 @@ use "util"
approach works quite well.
*/
pkg math =
- const scale232 : (f : flt32, m : int32 -> flt32)
- const scale264 : (f : flt64, m : int64 -> flt64)
+ const scale232 : (x : flt32, m : int32 -> flt32)
+ const scale264 : (x : flt64, m : int64 -> flt64)
;;
-const scale232 = {f : flt32, m : int32
+const scale232 = {x : flt32, m : int32
var n, e, s
- (n, e, s) = std.flt32explode(f)
+ (n, e, s) = std.flt32explode(x)
(n, e, s) = scale2gen(n, e, s, -126, 127, 24, m)
-> std.flt32assem(n, e, s)
}
-const scale264 = {f : flt64, m : int64
+const scale264 = {x : flt64, m : int64
var n, e, s
- (n, e, s) = std.flt64explode(f)
+ (n, e, s) = std.flt64explode(x)
(n, e, s) = scale2gen(n, e, s, -1022, 1023, 53, m)
-> std.flt64assem(n, e, s)
}
@@ -41,8 +41,11 @@ generic scale2gen = {n : bool, e : @i, s : @u, emin : @i, emax : @i, p : @u, m :
sprime++
;;
eprime = emin - 1
+ elif e + m < emin - p - 2
+ sprime = 0
+ eprime = emin - 1
elif e + m < emin
- sprime = s >> (emin - m - e)
+ sprime = s >> ((emin - m - e) : @u)
if need_round_away(0, (s : uint64), ((emin - m - e) : int64))
sprime++
;;
diff --git a/lib/math/sin-impl.myr b/lib/math/sin-impl.myr
new file mode 100644
index 0000000..4a9b0bc
--- /dev/null
+++ b/lib/math/sin-impl.myr
@@ -0,0 +1,815 @@
+use std
+
+use "fpmath"
+use "fma-impl"
+use "scale2-impl"
+use "sum-impl"
+use "util"
+
+/*
+ This implementation of sin and cos uses the "Highly Accurate
+ Tables" method of [GB91]. It's not as fast as it could be (the
+ sorting and excessive summation really takes a toll), however
+ we seem to be consistently within +/-1 ulp of the correct result
+ in flt64 case. In flt32 case we are correctly rounded.
+
+ The idea is to tabulate [0, pi/4] at 256 slightly irregular
+ intervals xi, with the lucky property that the infinite binary
+ expansions of significands of sin(xi) and cos(xi) look like
+
+ / 53 bits \ / many '0's \ / noise \
+ 1.bbbbbb...bb00000000000...0???????????...
+
+ This allows us to use storage only for a single floating point
+ number, but get more than 53 bits of precision.
+
+ Using that, we express x as (N * pi/4) + (xi) + (h), where h is
+ tiny. Using identities
+
+ sin(u+v) = sin(u)cos(v) + cos(u)sin(v),
+ cos(u+v) = cos(u)cos(v) - sin(u)sin(v),
+
+ we arrive at a sum where every term is known with greater than
+ 53 bits of precision except for sin(h) and cos(h), which we can
+ approximate well.
+
+ As a note, everything is performed in double-precision. Storing
+ a second set of tables for single precision constants would
+ occupy another 3KiB of memory for a not-very-significant speed
+ gain.
+
+ See files pi-constants.c, generate-triples-for-GB91.c, and
+ generate-minimax-by-Remez.gp for where the constants come from.
+
+ Note that in a departure from [GB91], we use the smaller degree
+ polynomials over a smaller range for the input. This requires
+ that we store more C-values for smaller xi values, but makes
+ the implementation a bit simpler to read.
+ */
+pkg math =
+ pkglocal const sin32 : (x : flt32 -> flt32)
+ pkglocal const sin64 : (x : flt64 -> flt64)
+
+ pkglocal const cos32 : (x : flt32 -> flt32)
+ pkglocal const cos64 : (x : flt64 -> flt64)
+
+ pkglocal const sincos32 : (x : flt32 -> (flt32, flt32))
+ pkglocal const sincos64 : (x : flt64 -> (flt64, flt64))
+
+ pkglocal const trig_reduce : (x : flt64 -> (int64, flt64, flt64))
+ pkglocal const trig_table_approx : (x1 : flt64, C : (uint64, uint64, uint64)[257] -> (uint64, uint64, uint64))
+
+ pkglocal const pi_over_2 : uint64[4]
+;;
+
+/* Pi/2 in lots of detail, for range reducing sin(2^18) or so */
+const pi_over_2 = [
+ 0x3ff921fb54442d18,
+ 0x3c91a62633145c07,
+ 0xb91f1976b7ed8fbc,
+ 0x35b4cf98e804177d,
+]
+
+/* Pi/4 in lots of detail */
+const pi_over_4 : uint64[4] = [
+ 0x3fe921fb54442d18,
+ 0x3c81a62633145c07,
+ 0xb90f1976b7ed8fbc,
+ 0x35a4cf98e804177d,
+]
+
+/* Pre-computed inverses */
+const two_over_pi : uint64 = 0x3fe45f306dc9c883 /* 1/(pi/2) */
+const oneohtwofour_over_pi : uint64 = 0x40745f306dc9c883 /* 1/(pi/(4 * 256)) */
+
+/*
+ Coefficients for minimax, degree 7 polynomials approximating sin
+ and cos on [-Pi/1024, Pi/1024] (generated by a Remez algorithm).
+ */
+const sin_coeff : uint64[8] = [
+ 0x0000000000000000,
+ 0x3ff0000000000000,
+ 0xb8c7400000000000,
+ 0xbfc5555555555555,
+ 0x39d0000000000000,
+ 0x3f81111111111061,
+ 0xbacc000000000000,
+ 0xbf2a019fa7ee0417,
+]
+
+const cos_coeff : uint64[8] = [
+ 0x3ff0000000000000,
+ 0x38c0800000000000,
+ 0xbfe0000000000000,
+ 0x39a0000000000000,
+ 0x3fa55555555553ee,
+ 0x0000000000000000,
+ 0xbf56c16b9bfd9fd6,
+ 0xbbec000000000000,
+]
+
+/*
+ The Highly Accurate Tables for use in a [GB91]-type algorithm;
+ generated by ancillary/generate-triples-for-GB91.c.
+ */
+const C : (uint64, uint64, uint64)[257] = [
+ /* xi cos(xi) sin(xi) */
+ (0x0000000000000000, 0x3ff0000000000000, 0x0000000000000000),
+ (0x3f6921fb42e71072, 0x3feffff621622aa5, 0x3f6921f8ad6f8d6f),
+ (0x3f7921fb576a8e70, 0x3fefffd8858e80ad, 0x3f7921f1018d5de6),
+ (0x3f82d97c6d961293, 0x3fefffa72c98324f, 0x3f82d96afcb3b8a8),
+ (0x3f8921fba95a4ba5, 0x3fefff62169765a8, 0x3f8921d251f34230),
+ (0x3f8f6a79afcf7cc4, 0x3fefff0943ccb09c, 0x3f8f6a28f137852f),
+ (0x3f92d97d6168427b, 0x3feffe9cb42a0249, 0x3f92d9379e0e6011),
+ (0x3f95fdbbdcaafdf1, 0x3feffe1c68730a0e, 0x3f95fd4d14eace13),
+ (0x3f9921fb47134298, 0x3feffd886087640a, 0x3f992155ea73805c),
+ (0x3f9c463ab6204953, 0x3feffce09ce490e2, 0x3f9c454f4439aa60),
+ (0x3f9f6a7a230622cf, 0x3feffc251df3604f, 0x3f9f69372b837c03),
+ (0x3fa1475c20c40678, 0x3feffb55e4815141, 0x3fa1468532309197),
+ (0x3fa2d97c761e7dc7, 0x3feffa72f0045061, 0x3fa2d8656c814503),
+ (0x3fa46b9c4167ee58, 0x3feff97c42007eca, 0x3fa46a397ce648ce),
+ (0x3fa5fdbbb3fec154, 0x3feff871db006d07, 0x3fa5fc009ce09729),
+ (0x3fa78fdba698a573, 0x3feff753bb15f9f8, 0x3fa78dbaad1df29e),
+ (0x3fa921fb569e20a0, 0x3feff621e37794e9, 0x3fa91f65f36711fd),
+ (0x3faab41b20e054f3, 0x3feff4dc549e4649, 0x3faab101d4af47e4),
+ (0x3fac463aa9c96aef, 0x3feff3830f9fe5ee, 0x3fac428cfdc5c35c),
+ (0x3fadd85a67cbe296, 0x3feff21614eca1d2, 0x3fadd406ed40d193),
+ (0x3faf6a7a3eff7872, 0x3feff09565793013, 0x3faf656e8f97f0f9),
+ (0x3fb07e4ceacc9a97, 0x3fefef01028ba740, 0x3fb07b6149c87151),
+ (0x3fb1475c76962b85, 0x3fefed58ed6a54be, 0x3fb14400e1aeeb44),
+ (0x3fb2106c98abe6cd, 0x3fefeb9d254b1740, 0x3fb20c96690fbe9f),
+ (0x3fb2d97c6bdba26e, 0x3fefe9cdad2f1061, 0x3fb2d5207f840524),
+ (0x3fb3a28c18e279a9, 0x3fefe7ea85e76da6, 0x3fb39d9ed203bab6),
+ (0x3fb46b9c42a3d8e4, 0x3fefe5f3af0a1548, 0x3fb4661187480b43),
+ (0x3fb534abcd8d6c89, 0x3fefe3e92c9764ff, 0x3fb52e7708fabcdf),
+ (0x3fb5fdbbf6d380b0, 0x3fefe1cafc99687d, 0x3fb5f6d017a62147),
+ (0x3fb6c6cbcc2f7248, 0x3fefdf9922e0f7ad, 0x3fb6bf1b46436fc7),
+ (0x3fb78fdb8cff3236, 0x3fefdd53a026e6fa, 0x3fb78758587024ce),
+ (0x3fb858eb79d953d8, 0x3fefdafa7513ab8e, 0x3fb84f8712f838d4),
+ (0x3fb921fb5df13728, 0x3fefd88da3b2cbcb, 0x3fb917a6c5cad0c3),
+ (0x3fb9eb0b15ad6e7f, 0x3fefd60d2df8efac, 0x3fb9dfb6d20cd89d),
+ (0x3fbab41b0d8e04c2, 0x3fefd3791414a510, 0x3fbaa7b72849583a),
+ (0x3fbb7d2b02ec004d, 0x3fefd0d1586ee673, 0x3fbb6fa70acd0c55),
+ (0x3fbc463a9c1cda98, 0x3fefce15fde7feac, 0x3fbc3785a525059d),
+ (0x3fbd0f4a92881eaf, 0x3fefcb4703aa4711, 0x3fbcff5334552718),
+ (0x3fbdd85a7ea6b644, 0x3fefc8646cd750e6, 0x3fbdc70ed631fba7),
+ (0x3fbea16a4da0e792, 0x3fefc56e3b81b243, 0x3fbe8eb7fcd470b2),
+ (0x3fbf6a7a2082960a, 0x3fefc26471042f37, 0x3fbf564e4de7cd23),
+ (0x3fc019c4f4362172, 0x3fefbf470f79208c, 0x3fc00ee89fc60798),
+ (0x3fc07e4cb4549583, 0x3fefbc1619c9367a, 0x3fc072a00d3f81a5),
+ (0x3fc0e2d4df1f9ef0, 0x3fefb8d18d51928e, 0x3fc0d64dbf2ea47f),
+ (0x3fc1475c06879885, 0x3fefb5797828e1da, 0x3fc139f00d3ac360),
+ (0x3fc1abe531137149, 0x3fefb20dc250d36d, 0x3fc19d89b968e759),
+ (0x3fc2106c256f2705, 0x3fefae8e92bf458c, 0x3fc20116570e32c3),
+ (0x3fc274f5eafb17bd, 0x3fefaafbbea74df5, 0x3fc2649aa381628e),
+ (0x3fc2d97c80aa0f5b, 0x3fefa7557efae43c, 0x3fc2c81070013fe8),
+ (0x3fc33e0462663cd9, 0x3fefa39bacdb1026, 0x3fc32b7bef4456d4),
+ (0x3fc3a28c534e14b9, 0x3fef9fce55ed894d, 0x3fc38edbaa59fe3f),
+ (0x3fc40714a18f49e2, 0x3fef9bed79763508, 0x3fc3f22fb1298233),
+ (0x3fc46b9c38ac6ee7, 0x3fef97f9249e436c, 0x3fc45576b5509b55),
+ (0x3fc4d02432e80a59, 0x3fef93f14ed442ec, 0x3fc4b8b1905fbd16),
+ (0x3fc534ac0298d5e2, 0x3fef8fd600323753, 0x3fc51bdf7942e899),
+ (0x3fc5993416dbe628, 0x3fef8ba736af1693, 0x3fc57f00a065f728),
+ (0x3fc5fdbbf1d87ff8, 0x3fef8764fa1887ba, 0x3fc5e2144c8984d0),
+ (0x3fc66243dc0ae9e8, 0x3fef830f4a092d65, 0x3fc6451a8808363c),
+ (0x3fc6c6cbc286e3f6, 0x3fef7ea629fb13d9, 0x3fc6a8130326d53f),
+ (0x3fc72b53b7c96348, 0x3fef7a299bd3df70, 0x3fc70afd9309c784),
+ (0x3fc78fdba21869ed, 0x3fef7599a37cdcb3, 0x3fc76dd9e15bdb4a),
+ (0x3fc7f4642a07f677, 0x3fef70f63bf58052, 0x3fc7d0a856c8ccd4),
+ (0x3fc858eb5b649af0, 0x3fef6c3f7f63a632, 0x3fc83366caea942f),
+ (0x3fc8bd736951ad00, 0x3fef6775566b1c57, 0x3fc896172a175d2d),
+ (0x3fc921fb399259ea, 0x3fef6297d144aa53, 0x3fc8f8b8223b223a),
+ (0x3fc986835ce1c644, 0x3fef5da6ebe94da7, 0x3fc95b4a04783401),
+ (0x3fc9eb0b24569abc, 0x3fef58a2b2008d0c, 0x3fc9bdcbe883cc5f),
+ (0x3fca4f9323461762, 0x3fef538b1f52fbe6, 0x3fca203e2200e122),
+ (0x3fcab41b09c5898b, 0x3fef4e603b080549, 0x3fca82a025ebcacb),
+ (0x3fcb18a2e6d4292f, 0x3fef492207941b17, 0x3fcae4f1c649efbf),
+ (0x3fcb7d2ae7b9bf9f, 0x3fef43d085cf0736, 0x3fcb4732f2b7a6f7),
+ (0x3fcbe1b2cb7b8481, 0x3fef3e6bbc6eba27, 0x3fcba9632f158ca6),
+ (0x3fcc463ab6f78496, 0x3fef38f3acd2bb22, 0x3fcc0b8262d9d9f9),
+ (0x3fccaac29faeedb5, 0x3fef33685aeb67ba, 0x3fcc6d90473e1ca0),
+ (0x3fcd0f4ab0d5a9e9, 0x3fef2dc9c7b77e6a, 0x3fcccf8cc9dfcdca),
+ (0x3fcd73d299801a67, 0x3fef2817fb345701, 0x3fcd31775f6e7021),
+ (0x3fcdd85a5f03c93d, 0x3fef2252f8ad886d, 0x3fcd934fd0c9eb90),
+ (0x3fce3ce2615c01b0, 0x3fef1c7abe28a12c, 0x3fcdf5163efb2fd2),
+ (0x3fcea16a34d49aca, 0x3fef168f557f8c38, 0x3fce56ca04ee54fd),
+ (0x3fcf05f2396af361, 0x3fef1090bcb17909, 0x3fceb86b43a82046),
+ (0x3fcf6a7a34c8a42b, 0x3fef0a7efae01f19, 0x3fcf19f9863cf10b),
+ (0x3fcfcf02154123ee, 0x3fef045a14e56969, 0x3fcf7b747f630b74),
+ (0x3fd019c51e84ab67, 0x3feefe22087e60d4, 0x3fcfdcdc522606ad),
+ (0x3fd04c08e989dfc7, 0x3feef7d6e70399a4, 0x3fd01f17f81c5502),
+ (0x3fd07e4ceb775576, 0x3feef178a4618400, 0x3fd04fb80a82fe62),
+ (0x3fd0b090f60ca615, 0x3feeeb074a3d9810, 0x3fd0804e15777f94),
+ (0x3fd0e2d4c54314ef, 0x3feee482e5663962, 0x3fd0b0d9b95007d9),
+ (0x3fd11518d2106a76, 0x3feeddeb6a30657d, 0x3fd0e15b4cec5711),
+ (0x3fd1475cc633879f, 0x3feed740e7e7b002, 0x3fd111d25f193a47),
+ (0x3fd179a0ce17673e, 0x3feed0835cc79369, 0x3fd1423efcc68ac8),
+ (0x3fd1abe4bace84c5, 0x3feec9b2d347a043, 0x3fd172a0dae268af),
+ (0x3fd1de28a262f5c2, 0x3feec2cf4cb15d4b, 0x3fd1a2f7f0d5a412),
+ (0x3fd2106cb1425d8c, 0x3feebbd8c71a89f1, 0x3fd1d3444b7da503),
+ (0x3fd242b0905f83c0, 0x3feeb4cf52e27d33, 0x3fd20385796d7260),
+ (0x3fd274f4947838ba, 0x3feeadb2e8897f5d, 0x3fd233bbae3e8ad0),
+ (0x3fd2a7386b632300, 0x3feea6839816a5b1, 0x3fd263e67d68a111),
+ (0x3fd2d97c9e0be433, 0x3fee9f41524c0663, 0x3fd294064c5a5940),
+ (0x3fd30bc07884186a, 0x3fee97ec359da93b, 0x3fd2c41a511fcae1),
+ (0x3fd33e046121eaf8, 0x3fee908437cd8074, 0x3fd2f422d00c7726),
+ (0x3fd3704855d0fd6b, 0x3fee89095dacc360, 0x3fd3241fa9798465),
+ (0x3fd3a28c5798cbdd, 0x3fee817baba342be, 0x3fd35410c0bfc504),
+ (0x3fd3d4d04553249a, 0x3fee79db2b58ee3d, 0x3fd383f5d8b1345d),
+ (0x3fd407145ea979a2, 0x3fee7227d7cc183e, 0x3fd3b3cf1062e88d),
+ (0x3fd43958386293b4, 0x3fee6a61c6350b11, 0x3fd3e39be4473723),
+ (0x3fd46b9c38cc0bc5, 0x3fee6288eb9aff42, 0x3fd4135c98341878),
+ (0x3fd49de029090fc7, 0x3fee5a9d555912a2, 0x3fd44310da8ddcf9),
+ (0x3fd4d024217e697d, 0x3fee529f047e7434, 0x3fd472b8a510e608),
+ (0x3fd50267f89f138c, 0x3fee4a8e04a2f573, 0x3fd4a253b2fc94cc),
+ (0x3fd534ac160681bb, 0x3fee426a4a0b5efa, 0x3fd4d1e249057326),
+ (0x3fd566eff496a5e0, 0x3fee3a33ef471d66, 0x3fd50163cbe183bf),
+ (0x3fd59933ebfe75ec, 0x3fee31eaeb28cfda, 0x3fd530d871303f3b),
+ (0x3fd5cb77fe42d304, 0x3fee298f425adc95, 0x3fd560401d7fa792),
+ (0x3fd5fdbbd011e31d, 0x3fee212109492bba, 0x3fd58f9a5d81663b),
+ (0x3fd62ffff34dbbed, 0x3fee18a02ca5e0f9, 0x3fd5bee79d6734e6),
+ (0x3fd66243bdc0c9c2, 0x3fee100cce7f06b6, 0x3fd5ee271fdac6fd),
+ (0x3fd69487ca38ebb6, 0x3fee0766d9c23a82, 0x3fd61d595943641b),
+ (0x3fd6c6cbc58a707e, 0x3fedfeae61f95db0, 0x3fd64c7dde58f888),
+ (0x3fd6f90fbf3b4b9a, 0x3fedf5e369de7ac8, 0x3fd67b94a09dba36),
+ (0x3fd72b53a84c8c13, 0x3feded05f987af46, 0x3fd6aa9d7500e60d),
+ (0x3fd75d97a7d7826a, 0x3fede4160f84614d, 0x3fd6d99863129ab7),
+ (0x3fd78fdba723c71a, 0x3feddb13b555c5d8, 0x3fd7088538928031),
+ (0x3fd7c21faf2481ab, 0x3fedd1feeeeb3f77, 0x3fd73763e0e5bb7e),
+ (0x3fd7f46380443ef6, 0x3fedc8d7cd74eb35, 0x3fd7663403ecedb8),
+ (0x3fd826a78fee86a2, 0x3fedbf9e41325ad2, 0x3fd794f5f21f812c),
+ (0x3fd858eb6fc6c00e, 0x3fedb652640d194e, 0x3fd7c3a927f6e7be),
+ (0x3fd88b2f6091a911, 0x3fedacf42fd7881c, 0x3fd7f24dc4deb30b),
+ (0x3fd8bd73747fa82a, 0x3feda383a6d8b415, 0x3fd820e3bcdb4516),
+ (0x3fd8efb76ad3bcc6, 0x3fed9a00db088521, 0x3fd84f6ab72e0455),
+ (0x3fd921fb52f99571, 0x3fed906bcf71ced7, 0x3fd87de2a57d3bef),
+ (0x3fd9543f4c295528, 0x3fed86c484089c2c, 0x3fd8ac4b87fa1376),
+ (0x3fd98683387c2ef2, 0x3fed7d0b047d2a2d, 0x3fd8daa526666236),
+ (0x3fd9b8c74c734cd7, 0x3fed733f4c983086, 0x3fd908ef9488c638),
+ (0x3fd9eb0b316e257b, 0x3fed6961734a4759, 0x3fd9372a66100418),
+ (0x3fda1d4f1c5b13c6, 0x3fed5f71745bef9a, 0x3fd96555af393979),
+ (0x3fda4f93134bbffd, 0x3fed556f54b18a49, 0x3fd99371591464cc),
+ (0x3fda81d7079868bb, 0x3fed4b5b1d5d78c0, 0x3fd9c17d39ba9f53),
+ (0x3fdab41b223bd70a, 0x3fed4134cc4c88b2, 0x3fd9ef795a3dd82c),
+ (0x3fdae65f0bd2deeb, 0x3fed36fc796c6d1f, 0x3fda1d654e53c0c4),
+ (0x3fdb18a2faa54a8e, 0x3fed2cb220194f43, 0x3fda4b412b549e3d),
+ (0x3fdb4ae6e27ee94a, 0x3fed2255c92cb15e, 0x3fda790cc9d54d8e),
+ (0x3fdb7d2afabcbd96, 0x3fed17e76f8b1bff, 0x3fdaa6c83ff29436),
+ (0x3fdbaf6edd63f9e6, 0x3fed0d672ed022eb, 0x3fdad47314b13a5e),
+ (0x3fdbe1b2ee09849b, 0x3fed02d4f8ac5a8e, 0x3fdb020d86625c4a),
+ (0x3fdc13f6d9f827b3, 0x3fecf830e504ee27, 0x3fdb2f972dd6dd44),
+ (0x3fdc463ac85c4fc4, 0x3feced7af23188c8, 0x3fdb5d101285fa87),
+ (0x3fdc787e99315d1d, 0x3fece2b32dae84df, 0x3fdb8a77fb79b6c9),
+ (0x3fdcaac2c391b562, 0x3fecd7d984771033, 0x3fdbb7cf38286324),
+ (0x3fdcdd06bf1547a8, 0x3fecccee1a978b83, 0x3fdbe515317a2767),
+ (0x3fdd0f4a93c25d1a, 0x3fecc1f0f53b85ed, 0x3fdc1249d2e7d3d7),
+ (0x3fdd418e7cb2c1ac, 0x3fecb6e20e487888, 0x3fdc3f6d35bf5483),
+ (0x3fdd73d292000a16, 0x3fecabc16721278b, 0x3fdc6c7f53ab8367),
+ (0x3fdda616817e6870, 0x3feca08f18d00ef9, 0x3fdc997fc72e0f8c),
+ (0x3fddd85a5a1d76bb, 0x3fec954b270990c5, 0x3fdcc66e8203a2b0),
+ (0x3fde0a9e6cc874c6, 0x3fec89f5868b6c72, 0x3fdcf34bb0b7c500),
+ (0x3fde3ce26dc62595, 0x3fec7e8e4f51729a, 0x3fdd2016f3f2781b),
+ (0x3fde6f264279a0bc, 0x3fec73158e8e71da, 0x3fdd4cd0187c011d),
+ (0x3fdea16a5680fb27, 0x3fec678b32bc65ff, 0x3fdd797762744bde),
+ (0x3fded3ae3d42764c, 0x3fec5bef5bdf2d0e, 0x3fdda60c55ccc8d6),
+ (0x3fdf05f24a4f648b, 0x3fec5041fdd6d9d1, 0x3fddd28f21277b30),
+ (0x3fdf383651e91662, 0x3fec448329efd6a1, 0x3fddfeff8240fbd5),
+ (0x3fdf6a7a42ac94c9, 0x3fec38b2eb87ae38, 0x3fde2b5d4e604dad),
+ (0x3fdf9cbe1d3b3429, 0x3fec2cd149d960c0, 0x3fde57a86acebf99),
+ (0x3fdfcf020d397ddb, 0x3fec20de41e8a738, 0x3fde83e0e2af7102),
+ (0x3fe000a313205931, 0x3fec14d9d64b5fbe, 0x3fdeb006abd63bf5),
+ (0x3fe019c4f9037b82, 0x3fec08c42ac58524, 0x3fdedc194338385b),
+ (0x3fe032e70860dbf0, 0x3febfc9d20441910, 0x3fdf08191a1b3b78),
+ (0x3fe04c0906e9f028, 0x3febf064da5e176a, 0x3fdf3405af2bebc0),
+ (0x3fe0652b03fdfd24, 0x3febe41b5936a8f3, 0x3fdf5fdf024485e7),
+ (0x3fe07e4d0b91a0db, 0x3febd7c09e80889b, 0x3fdf8ba50d2a0c48),
+ (0x3fe0976ef1f9c5ae, 0x3febcb54c769186a, 0x3fdfb75768e7fa9b),
+ (0x3fe0b090e7d18f51, 0x3febbed7c3a63454, 0x3fdfe2f64f20fede),
+ (0x3fe0c9b2e9a39b2d, 0x3febb2499c87d6e9, 0x3fe00740cf66097a),
+ (0x3fe0e2d4ddcaea14, 0x3feba5aa669df22e, 0x3fe01cfc88506502),
+ (0x3fe0fbf6e1078c8e, 0x3feb98fa1b3ff547, 0x3fe032ae5dc3499b),
+ (0x3fe11518d98a4a52, 0x3feb8c38cf44e150, 0x3fe048562c12ec1d),
+ (0x3fe12e3add8e0dcc, 0x3feb7f667f41f3ed, 0x3fe05df3f90aa7ce),
+ (0x3fe1475ce227685c, 0x3feb728338a5b7b8, 0x3fe07387ade96093),
+ (0x3fe1607ec66ef221, 0x3feb658f1462d09f, 0x3fe0891121335eab),
+ (0x3fe179a0be834cc7, 0x3feb5889ffa66df4, 0x3fe09e9072510f34),
+ (0x3fe192c2b8568d3a, 0x3feb4b740bbd2fb5, 0x3fe0b405848141dd),
+ (0x3fe1abe4b59244a1, 0x3feb3e4d3fd6bd5a, 0x3fe0c9704befbcad),
+ (0x3fe1c506b2c3bd2e, 0x3feb3115a5da6c4f, 0x3fe0ded0b8742978),
+ (0x3fe1de289173a977, 0x3feb23cd5613980e, 0x3fe0f426a30829d9),
+ (0x3fe1f74a8f60e6e9, 0x3feb167438110e6e, 0x3fe1097232ed0851),
+ (0x3fe2106ca03e5fd0, 0x3feb090a5a650d01, 0x3fe11eb350745afe),
+ (0x3fe2298e9db917c4, 0x3feafb8fd9ca55cd, 0x3fe133e9ce192ca0),
+ (0x3fe242b08f9ff90f, 0x3feaee04ba8026f6, 0x3fe14915a5706056),
+ (0x3fe25bd28f640cf6, 0x3feae068f72931d9, 0x3fe15e36ded7bb02),
+ (0x3fe274f47c0699ab, 0x3fead2bcaa0d2e11, 0x3fe1734d518c9922),
+ (0x3fe28e1685bcfd71, 0x3feac4ffc15749f9, 0x3fe1885918f9ac8e),
+ (0x3fe2a736fe1e1494, 0x3feab7333233d9b8, 0x3fe19d58c0a87f59),
+ (0x3fe2c05a9f4eb35a, 0x3feaa9546b006d3c, 0x3fe1b2502def806a),
+ (0x3fe2d97c6af42411, 0x3fea9b66344e259f, 0x3fe1c73b28d8e8d8),
+ (0x3fe2f29e6e1ad75e, 0x3fea8d6775437b8d, 0x3fe1dc1b5a8d2e11),
+ (0x3fe30bc06e6776fd, 0x3fea7f5856cdc155, 0x3fe1f0f0859072c6),
+ (0x3fe324e1d749c5d3, 0x3fea7139354a6417, 0x3fe205ba2249d8d8),
+ (0x3fe33e045ca06605, 0x3fea63092400435f, 0x3fe21a798c19a7e7),
+ (0x3fe357267800e833, 0x3fea54c9076262f5, 0x3fe22f2d7377bfc7),
+ (0x3fe370485e5be5d6, 0x3fea4678cad14810, 0x3fe243d5f7a46407),
+ (0x3fe3896a4d91d9d2, 0x3fea3818540e8f5e, 0x3fe258733edb7371),
+ (0x3fe3a28c599d4a38, 0x3fea29a7a0666245, 0x3fe26d054caf4fb0),
+ (0x3fe3bbae6b7bfd61, 0x3fea1b26c5d7e6f3, 0x3fe2818c01832d38),
+ (0x3fe3d4d03ed37a30, 0x3fea0c95f4fd994c, 0x3fe29607190147e2),
+ (0x3fe3edf268105227, 0x3fe9fdf4e0b7d0c8, 0x3fe2aa76ff6bb39c),
+ (0x3fe4071445cf2ff6, 0x3fe9ef43eff2b264, 0x3fe2bedb24e4da22),
+ (0x3fe420363dd59357, 0x3fe9e082f06f9ebe, 0x3fe2d333cf8d19db),
+ (0x3fe4395843ba2fd5, 0x3fe9d1b1f26b4f3c, 0x3fe2e780e8af8d40),
+ (0x3fe4527a412b9192, 0x3fe9c2d10c2c8325, 0x3fe2fbc251bb901b),
+ (0x3fe46b9c27a00cd7, 0x3fe9b3e04f99cbd5, 0x3fe30ff7f2910508),
+ (0x3fe484be309a8d17, 0x3fe9a4dfa3af458c, 0x3fe32421ecef726e),
+ (0x3fe49de033475607, 0x3fe995cf29f2517a, 0x3fe33840139184b1),
+ (0x3fe4b7021f53f06e, 0x3fe986aef5919af2, 0x3fe34c524d121d49),
+ (0x3fe4d0240f052501, 0x3fe9777f00244ce3, 0x3fe36058a217b268),
+ (0x3fe4e9463716a53c, 0x3fe9683f32f2ae77, 0x3fe3745330215f46),
+ (0x3fe502681e9a6016, 0x3fe958efe0cb9eeb, 0x3fe388418ac15fff),
+ (0x3fe51b8a16255181, 0x3fe94990e237aa2e, 0x3fe39c23e5b6b244),
+ (0x3fe534ac09d21709, 0x3fe93a224cd1d5d1, 0x3fe3affa24f6eadd),
+ (0x3fe54dcdf3b3627e, 0x3fe92aa42dcf60f8, 0x3fe3c3c437a1dace),
+ (0x3fe566effef25aad, 0x3fe91b16740dcf8f, 0x3fe3d782336d7ad4),
+ (0x3fe5801215d3e4d3, 0x3fe90b79366e5e53, 0x3fe3eb33faf99080),
+ (0x3fe59933ff695620, 0x3fe8fbcca2107806, 0x3fe3fed9559bf7b0),
+ (0x3fe5b255ee2800cd, 0x3fe8ec10a14c3bbf, 0x3fe412725ec52f70),
+ (0x3fe5cb77fdabb08b, 0x3fe8dc452c6aa905, 0x3fe425ff1fc9c896),
+ (0x3fe5e499ef7e111b, 0x3fe8cc6a746824f6, 0x3fe4397f5c023a11),
+ (0x3fe5fdbbe1bc4e34, 0x3fe8bc807027ea41, 0x3fe44cf31eda7ea0),
+ (0x3fe616ddfb59ae0d, 0x3fe8ac8710ace7bf, 0x3fe4605a7a5aaeff),
+ (0x3fe62fffdd1c090a, 0x3fe89c7e9c66cac4, 0x3fe473b51912497a),
+ (0x3fe64921cfa4dad5, 0x3fe88c66ef074f3a, 0x3fe48703271ce28c),
+ (0x3fe66243d696fbd4, 0x3fe87c401005fc0b, 0x3fe49a449b40f2d3),
+ (0x3fe67b65d9d2cf92, 0x3fe86c0a18cb21f8, 0x3fe4ad795715a3e0),
+ (0x3fe69487ba5a4421, 0x3fe85bc52776b9fc, 0x3fe4c0a1373040c0),
+ (0x3fe6ada9c7509d39, 0x3fe84b7112ce79ff, 0x3fe4d3bc6c09e271),
+ (0x3fe6c6cbcacfb7d4, 0x3fe83b0e07c9e659, 0x3fe4e6cac0c53e06),
+ (0x3fe6dfedb933bfad, 0x3fe82a9c1836e690, 0x3fe4f9cc20e5450e),
+ (0x3fe6f90fb67efcd3, 0x3fe81a1b36b01830, 0x3fe50cc09bf06f81),
+ (0x3fe71231b7343738, 0x3fe8098b74dea97c, 0x3fe51fa81d7d14ee),
+ (0x3fe72b53a85aa5f7, 0x3fe7f8ece98512ef, 0x3fe532828ba63ede),
+ (0x3fe74475af974694, 0x3fe7e83f85f965d1, 0x3fe5454ff702d29b),
+ (0x3fe75d9799aac134, 0x3fe7d783768ce953, 0x3fe558102da86994),
+ (0x3fe776b9903c81f2, 0x3fe7c6b8a9e499af, 0x3fe56ac34326d2d0),
+ (0x3fe78fdba082343f, 0x3fe7b5df2167453d, 0x3fe57d6935ab2542),
+ (0x3fe7a8fdabde74d9, 0x3fe7a4f6fbedea5e, 0x3fe59001e2ecf285),
+ (0x3fe7c21f8108af2e, 0x3fe794006540f1ec, 0x3fe5a28d1b2b3ab1),
+ (0x3fe7db4178eb2ca1, 0x3fe782fb2be4619a, 0x3fe5b50b14a054d2),
+ (0x3fe7f4637bfce457, 0x3fe771e76a206b4b, 0x3fe5c77bb26e7244),
+ (0x3fe80d856a5e5235, 0x3fe760c5402e27c0, 0x3fe5d9ded1dab0d8),
+ (0x3fe826a77aed6885, 0x3fe74f94932c2aa4, 0x3fe5ec348fa6b9f0),
+ (0x3fe83fc98cbf9b15, 0x3fe73e55841a0699, 0x3fe5fe7cc8635d47),
+ (0x3fe858eb8885faf1, 0x3fe72d082dab83d0, 0x3fe610b75fe5f52f),
+ (0x3fe8720d606e4a1c, 0x3fe71baca44228b1, 0x3fe622e441189d58),
+ (0x3fe88b2f5ac151d8, 0x3fe70a42c20978bf, 0x3fe63503939a8c88),
+ (0x3fe8a4516df43f11, 0x3fe6f8ca982975b6, 0x3fe64715452c2eb4),
+ (0x3fe8bd735c5fca22, 0x3fe6e7445c4d6347, 0x3fe659191e5ec1a0),
+ (0x3fe8d69563ed711a, 0x3fe6d5afee23005e, 0x3fe66b0f407fe153),
+ (0x3fe8efb7598d0461, 0x3fe6c40d769b68c4, 0x3fe67cf781aec585),
+ (0x3fe908d9720f2113, 0x3fe6b25cdb7a4ca4, 0x3fe68ed1fc7319a2),
+ (0x3fe921fb5b6a3d21, 0x3fe6a09e61712f13, 0x3fe6a09e6b8d4885),
+]
+
+/*
+ The huge reduction tables. R[j] = (l1, l2, l3), such that l1+l2+l3
+ is less than 2pi, and 2^(50j + 25) is approximately l1+l2+l3
+ (mod 2pi).
+
+ The stepping of 50 was chosen because it is a nice, round number,
+ close to 52. [GB91] make reference to a difficult-to-reduce
+ number, Xhard, which is in the range (-2^27, 2^27). By using an
+ offset of 25, we can ensure that huge_reduce returns results in
+ about that range. This allows us to reuse calculations in [GB91]
+ showing we have probably stored enough bits of pi (and of these
+ numbers).
+
+ TODO: There is always the chance that, for some x = x1*2^j +
+ x2*2^k, there is catastrophic cancellation which makes the
+ remainder sum x1*r[50j+25] + x2*r[50k+25] imprecise. That needs
+ to be checked; it shouldn't be too hard.
+ */
+const R : (uint64, uint64, uint64)[20] = [
+ (0x4011fb222e13e839, 0xbcbeecfb7d19df11, 0x3955f9708d867b5b), /* 2^25 mod 2pi */
+ (0x3ffee6639887604d, 0x3c88b437ad3f55e0, 0x39218522303457a8), /* 2^75 mod 2pi */
+ (0x3fc589bfb31f1687, 0x3c31994c1edb7977, 0xb8dd15afd80892a0), /* 2^125 mod 2pi */
+ (0x400663e27d2e0b47, 0xbca00a74acd21bf1, 0x39257592b34b8c25), /* 2^175 mod 2pi */
+ (0x40177ff13d560e79, 0xbc8403f354e865f6, 0x38e6e69984c4338e), /* 2^225 mod 2pi */
+ (0x4015a4a4671a3606, 0x3ca6fc3b1a2bddd5, 0xb9270923530d0279), /* 2^275 mod 2pi */
+ (0x3ff3bf65f73a0474, 0x3c67deda97eb4131, 0x38fa32118f8f578f), /* 2^325 mod 2pi */
+ (0x3ffa8e506685f311, 0x3c698a6391d9d31b, 0x38db58d212d28d0a), /* 2^375 mod 2pi */
+ (0x400d38d7ecc58385, 0x3c98b076242f0ed3, 0x38e04e83f1274a16), /* 2^425 mod 2pi */
+ (0x4017023cb671ed43, 0xbcbeb418af32f189, 0xb944d3aea8efaa5f), /* 2^475 mod 2pi */
+ (0x401655d13f672fe9, 0x3ca8e56fc8ad533e, 0x393d464a045591bf), /* 2^525 mod 2pi */
+ (0x3feec8dd916fa3b6, 0x3c82c834374ed2af, 0x39225586c285a0d8), /* 2^575 mod 2pi */
+ (0x400ed2900b47ba63, 0x3c697a0d5776cc4d, 0x390c831068ee85b1), /* 2^625 mod 2pi */
+ (0x4008563b8e99caac, 0xbc5ee03ee870ab9d, 0x38fb980dd9756063), /* 2^675 mod 2pi */
+ (0x4012d961cf7cd57d, 0xbcbdde0a1d27628e, 0xb95a8840599f8246), /* 2^725 mod 2pi */
+ (0x4016919aac4a18f4, 0xbc93552937a28b88, 0xb9145e70b28c0cbb), /* 2^775 mod 2pi */
+ (0x400c37d195196372, 0xbca948065398479d, 0xb94cadd4f6e80c1b), /* 2^825 mod 2pi */
+ (0x400d77a34a63ebae, 0x3ca8ca5d7ccc2874, 0xb91611b1e4b65369), /* 2^875 mod 2pi */
+ (0x3ff4cb08f62cb38b, 0xbc90f42df8fc967a, 0xb8ed7d71202d2f45), /* 2^925 mod 2pi */
+ (0x401848860f8742d6, 0x3c90337738c287b4, 0xb92f24fc614ec2f7), /* 2^975 mod 2pi */
+]
+
+const sin32 = {x : flt32
+ var s, s2
+ (s, s2, _, _) = w64((x : flt64), true, false)
+ -> round_down(s, s2)
+}
+
+const sin64 = {x : flt64
+ var s
+ (s, _, _, _) = w64(x, true, false)
+ -> s
+}
+
+const cos32 = {x : flt32
+ var c, c2
+ (_, _, c, c2) = w64((x : flt64), false, true)
+ -> round_down(c, c2)
+}
+
+const cos64 = {x : flt64
+ var c
+ (_, _, c, _) = w64(x, false, true)
+ -> c
+}
+
+const sincos32 = {x : flt32
+ var s : flt64, s2 : flt64, c : flt64, c2 : flt64
+ (s, s2, c, c2) = w64((x : flt64), true, true)
+ -> (round_down(s, s2), round_down(c, c2))
+}
+
+const sincos64 = {x : flt64
+ var s, c
+ (s, _, c, _) = w64(x, true, true)
+ -> (s, c)
+}
+
+/* Calculate sin and/or cos */
+const w64 = {x : flt64, want_sin : bool, want_cos : bool
+ var sin_ret : flt64 = 0.0
+ var cos_ret : flt64 = 0.0
+ var sin_ret2 : flt64 = 0.0
+ var cos_ret2 : flt64 = 0.0
+
+ var e : int64
+ (_, e, _) = std.flt64explode(x)
+
+ if e == 1024
+ -> (std.flt64nan(), 0.0, std.flt64nan(), 0.0)
+ ;;
+
+ var N : int64
+ var x1 : flt64, x2 : flt64
+ (N, x1, x2) = trig_reduce(x)
+
+ /* Handle multiples of pi/2 */
+ var swap_sin_cos : bool = false
+ var then_negate_sin : bool = false
+ var then_negate_cos : bool = false
+ match N
+ | 1:
+ /* sin(x + pi/2) = cos(x), cos(x + pi/2) = -sin(x) */
+ swap_sin_cos = true
+ then_negate_cos = true
+ | 2:
+ /* sin(x + pi) = -sin(x), cos(x + pi) = -cos(x) */
+ then_negate_cos = true
+ then_negate_sin = true
+ | 3:
+ /* sin(x + 3pi/2) = -cos(x), cos(x + 3pi/2) = sin(x) */
+ swap_sin_cos = true
+ then_negate_sin = true
+ | _:
+ ;;
+
+ var first_negate_sin : bool = false
+ if x1 < 0.0
+ x1 = -x1
+ x2 = -x2
+ first_negate_sin = true
+ ;;
+
+ /* Figure out where in the C table x lies */
+ var xi : uint64, sin_xi : uint64, cos_xi : uint64, sin_delta, cos_delta
+ (xi, cos_xi, sin_xi) = trig_table_approx(x1, C)
+
+ /*
+ Compute x - xi with compensated summation. Because xi
+ and delta both lie in the same interval of width (pi/4)/256,
+ which is less than 1/256, we can use that |delta1| <
+ 2^-8: we need a polynomial approximation of at least
+ degree 7.
+
+ This also gives that |delta2| < 2^(-60), vanishing quickly
+ in the polynomial approximations.
+ */
+ var delta1, delta2, deltat
+ (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1)
+ (delta2, _) = fast2sum(deltat, x2)
+
+ /*
+ sin(delta); the degree 2 coefficient is near 0, so delta_2
+ only shows up in deg 1
+ */
+ sin_delta = horner_polyu(delta1, sin_coeff[:])
+ sin_delta += delta2 * std.flt64frombits(sin_coeff[1])
+
+ /*
+ cos(delta); delta_2 shows up in deg 1 and 2; the term
+ we care about is a1*d2 + 2*a2*d1*d2
+ */
+ cos_delta = horner_polyu(delta1, cos_coeff[:])
+ cos_delta += delta2 * fma64(delta1, 2.0*std.flt64frombits(cos_coeff[2]), std.flt64frombits(cos_coeff[1]))
+
+ var q : flt64[4]
+
+ if (want_sin && !swap_sin_cos) || (want_cos && swap_sin_cos)
+ (q[0], q[1]) = two_by_two(std.flt64frombits(sin_xi), cos_delta)
+ (q[2], q[3]) = two_by_two(std.flt64frombits(cos_xi), sin_delta)
+ std.sort(q[:], fltabscmp)
+
+ (sin_ret, sin_ret2) = double_compensated_sum(q[:])
+ ;;
+
+ if (want_cos && !swap_sin_cos) || (want_sin && swap_sin_cos)
+ (q[0], q[1]) = two_by_two(std.flt64frombits(cos_xi), cos_delta)
+ (q[2], q[3]) = two_by_two(std.flt64frombits(sin_xi), sin_delta)
+ q[2] = -q[2]
+ q[3] = -q[3]
+
+ /*
+ No need to sort; cos_xi and sin_xi are in [0,1],
+ cos_delta is close to 1, sin_delta is close to
+ 0.
+ */
+ std.swap(&q[1], &q[2])
+
+ (cos_ret, cos_ret2) = double_compensated_sum(q[:])
+ ;;
+
+ if first_negate_sin
+ sin_ret = -sin_ret
+ sin_ret2 = -sin_ret2
+ ;;
+
+ if swap_sin_cos
+ std.swap(&sin_ret, &cos_ret)
+ std.swap(&sin_ret2, &cos_ret2)
+ ;;
+
+ if then_negate_sin
+ sin_ret = -sin_ret
+ sin_ret2 = -sin_ret2
+ ;;
+
+ if then_negate_cos
+ cos_ret = -cos_ret
+ cos_ret2 = -cos_ret2
+ ;;
+
+ -> (sin_ret, sin_ret2, cos_ret, cos_ret2)
+}
+
+/* Reduce x to N*(pi/2) + x', with x' in [-pi/4, pi/4] */
+const trig_reduce = {x : flt64
+ var N : int64 = 0
+ var Nf : flt64
+
+ /*
+ We actually only care about N mod 4. If x is very large,
+ the ultimate N that we end up using might not be
+ representable (either as an int64 or flt64), so we instead
+ just keep track of the mod 4 part exactly.
+ */
+
+ /*
+ If we want to store pi in a form to properly reduce
+ 2^1000 or so, it turns out that there's some complication
+ with the final digits: they trail off beyond subnormality
+ and can't be represented, even though they are the digits
+ we need for the reduction. Therefore, for extremely high
+ values of x, we pre-compute the reduction and return it
+ here.
+
+ We get: x1 about in range 2^25, and that (x1 + x2 + x3)
+ approximates x (mod 2pi) very well. This is good enough
+ to ensure that N = x/pi is representable (int64 and
+ flt64). We do need to worry about catastrophic cancellation,
+ however.
+ */
+ var x1, x2, x3
+ (x1, x2, x3) = huge_reduce_2pi(x)
+
+ var pi_o_2_0 : flt64 = std.flt64frombits(pi_over_2[0])
+ var pi_o_2_1 : flt64 = std.flt64frombits(pi_over_2[1])
+ var pi_o_2_2 : flt64 = std.flt64frombits(pi_over_2[2])
+ var pi_o_2_3 : flt64 = std.flt64frombits(pi_over_2[3])
+ var pi_o_4_0 : flt64 = std.flt64frombits(pi_over_4[0])
+ var pi_o_4_1 : flt64 = std.flt64frombits(pi_over_4[1])
+ var pi_o_4_2 : flt64 = std.flt64frombits(pi_over_4[2])
+ var pi_o_4_3 : flt64 = std.flt64frombits(pi_over_4[3])
+
+ var total_N = 0
+ var q : flt64[11]
+
+ /* Compute initial reduction -- this might not be sufficient. */
+ total_N = rn(x1 * std.flt64frombits(two_over_pi))
+ Nf = (-total_N : flt64)
+ (q[0], q[ 4], q[5]) = (x1, x2, x3)
+ (q[1], q[ 3]) = two_by_two(Nf, pi_o_2_0)
+ (q[2], q[ 6]) = two_by_two(Nf, pi_o_2_1)
+ (q[7], q[ 8]) = two_by_two(Nf, pi_o_2_2)
+ (q[9], q[10]) = two_by_two(Nf, pi_o_2_3)
+
+ /*
+ Sorting is very slow, but it's only the top five or so
+ that are in question
+ */
+ std.sort(q[0:5], fltabscmp)
+ (x1, x2) = double_compensated_sum(q[:])
+
+ while !(le_22(x1, x2, pi_o_4_0, pi_o_4_1) && le_22(-pi_o_4_0, -pi_o_4_1, x1, x2))
+ N = rn(x1 * std.flt64frombits(two_over_pi))
+ Nf = (-N : flt64)
+
+ /*
+ Sorting is slow. We know that x1 is roughly
+ cancelled by Nf * pi_o_2_0, so line those up.
+ */
+ (q[0], q[2]) = (x1, x2)
+ (q[1], q[3]) = two_by_two(Nf, pi_o_2_0)
+ (q[4], q[5]) = two_by_two(Nf, pi_o_2_1)
+ (q[6], q[7]) = two_by_two(Nf, pi_o_2_2)
+ (q[8], q[9]) = two_by_two(Nf, pi_o_2_3)
+ (x1, x2) = double_compensated_sum(q[0:10])
+ total_N += (N % 4)
+ ;;
+
+ -> (((total_N % 4) + 4) % 4, x1, x2)
+}
+
+const huge_reduce_2pi = {x : flt64
+ var e : int64
+ var b : uint64 = std.flt64bits(x)
+ (_, e, _) = std.flt64explode(x)
+
+ if e < 25
+ -> (x, 0.0, 0.0)
+ ;;
+
+ /*
+ Since the stepping of R is 50, and x has 53 significant
+ bits, we get a splitting of x into two components. We
+ want
+
+ x = [ xa * 2^(50j + 25) ] + [ xb * 2^(50(j-1) + 25) ]
+
+ and {ai}, {bi} such that
+
+ a1 + a2 + a3 === ( 2^(50j + 25) ) mod 2pi
+
+ b1 + b2 + b3 === ( 2^(50(j-1) + 25) ) mod 2pi
+
+ If j is small enough, we can slack off a bit. We really
+ just want xa(a1+a2+a3) + xb(b1+b2+b3) === x (mod 2pi)
+ with a heck of a lot of precision.
+ */
+ var j : uint64 = (e - 25 : uint64) / 50
+ var xa : flt64 = 0.0, xb : flt64 = 0.0, xc : flt64 = 0.0
+ var a1 : flt64 = 0.0, a2 : flt64 = 0.0, a3 : flt64 = 0.0
+ var b1 : flt64 = 0.0, b2 : flt64 = 0.0, b3 : flt64 = 0.0
+ var c1 : flt64 = 0.0, c2 : flt64 = 0.0, c3 : flt64 = 0.0
+ var u1 : uint64, u2 : uint64, u3 : uint64
+ var xcur = x
+
+ var e1 : int64 = 50*(j : int64) + 25
+ var e2 : int64 = e1 - 50
+ var e3 : int64 = e2 - 50
+ xa = trunc(scale2(x,-e1))
+ xb = trunc(scale2(x,-e2))
+ xc = trunc(scale2(x,-e3))
+ xc -= scale2(xb, 50)
+ xb -= scale2(xa, 50)
+
+ (u1, u2, u3) = R[j]
+ a1 = std.flt64frombits(u1)
+ a2 = std.flt64frombits(u2)
+ a3 = std.flt64frombits(u3)
+
+ if j == 0
+ (b1, b2, b3) = (x - scale2(xa, e1), 0.0, 0.0)
+ xb = 1.0
+ (c1, c2, c3) = (0.0, 0.0, 0.0)
+ xc = 0.0
+ else
+ (u1, u2, u3) = R[j - 1]
+ b1 = std.flt64frombits(u1)
+ b2 = std.flt64frombits(u2)
+ b3 = std.flt64frombits(u3)
+
+ if j == 1
+ (c1, c2, c3) = (x - scale2(xa, e1) - scale2(xb, e2), 0.0, 0.0)
+ xc = 1.0
+ else
+ (u1, u2, u3) = R[j - 2]
+ c1 = std.flt64frombits(u1)
+ c2 = std.flt64frombits(u2)
+ c3 = std.flt64frombits(u3)
+ ;;
+ ;;
+
+ /*
+ Now we need to combine all this. Even worse, when we
+ multiply the two together, we need to keep full precision
+ (at least for the high-ish bits), so we need some help.
+
+ TODO: The c-type calculations can probably be optimized,
+ since xc is so small.
+ */
+ var q : flt64[18]
+ (q[ 0], q[ 1]) = two_by_two(xa, a1)
+ (q[ 2], q[ 3]) = two_by_two(xa, a2)
+ (q[ 4], q[ 5]) = two_by_two(xa, a3)
+ (q[ 6], q[ 7]) = two_by_two(xb, b1)
+ (q[ 8], q[ 9]) = two_by_two(xb, b2)
+ (q[10], q[11]) = two_by_two(xb, b3)
+ (q[12], q[13]) = two_by_two(xc, c1)
+ (q[14], q[15]) = two_by_two(xc, c2)
+ (q[16], q[17]) = two_by_two(xc, c3)
+
+ -> triple_compensated_sum(q[:])
+}
+
+const trig_table_approx = {x1 : flt64, C : (uint64, uint64, uint64)[257]
+ var j = (rn(x1 * std.flt64frombits(oneohtwofour_over_pi)) : uint64)
+ var x1u = std.flt64bits(x1)
+ var xi, sin_xi, cos_xi, test_cos_xi, test_sin_xi
+
+ /* We either want j or j - 1. */
+ j = std.max(std.min(j, 256), 0)
+
+ (xi, cos_xi, sin_xi) = C[j]
+ if j > 0
+ var test_xi
+ (test_xi, test_cos_xi, test_sin_xi) = C[j - 1]
+ if std.abs(x1u - test_xi) < std.abs(x1u - xi)
+ -> (test_xi, test_cos_xi, test_sin_xi)
+ ;;
+ ;;
+
+ -> (xi, cos_xi, sin_xi)
+}
+
+const triple_compensated_sum = {q : flt64[:]
+ /* TODO: verify, with GAPPA or something, that this is correct. */
+ std.sort(q, fltabscmp)
+ var s1 : flt64, s2 : flt64, s3
+ var t1 : flt64, t2 : flt64, t3 : flt64, t4 : flt64, t5 : flt64, t6
+ s1 = q[0]
+ s2 = 0.0
+ s3 = 0.0
+ for qq : q[1:]
+ (t5, t6) = fast2sum(s3, qq)
+ (t3, t4) = fast2sum(s2, t5)
+ (t1, t2) = fast2sum(s1, t3)
+ s1 = t1
+ (s2, s3) = fast2sum(t2, t4 + t6)
+ ;;
+
+ -> (s1, s2, s3)
+}
+
+const fltabscmp = {x : flt64, y : flt64
+ var xb = std.flt64bits(x) & ~(1 << 63)
+ var yb = std.flt64bits(y) & ~(1 << 63)
+ if xb == yb
+ -> `std.Equal
+ elif xb > yb
+ -> `std.Before
+ else
+ -> `std.After
+ ;;
+}
+
+/*
+ Return true iff (a1 + a2) <= (b1 + b2)
+ */
+const le_22 = { a1, a2, b1, b2
+ if a1 < b1
+ -> true
+ elif a1 > b1
+ -> false
+ ;;
+
+ if a2 > b2
+ -> false
+ ;;
+
+ -> true
+}
diff --git a/lib/math/sqrt-impl.myr b/lib/math/sqrt-impl.myr
index 4d6b40d..a8b243f 100644
--- a/lib/math/sqrt-impl.myr
+++ b/lib/math/sqrt-impl.myr
@@ -4,8 +4,8 @@ use "fpmath"
/* See [Mul+10], sections 5.4 and 8.7 */
pkg math =
- pkglocal const sqrt32 : (f : flt32 -> flt32)
- pkglocal const sqrt64 : (f : flt64 -> flt64)
+ pkglocal const sqrt32 : (x : flt32 -> flt32)
+ pkglocal const sqrt64 : (x : flt64 -> flt64)
;;
extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
@@ -38,7 +38,7 @@ type fltdesc(@f, @u, @i) = struct
In the flt64 case, we need only one more iteration.
*/
-const ab32 : (uint32, uint32)[:] = [
+const ab32 : (uint32, uint32)[7] = [
(0x3f800000, 0x3f800000), /* Nothing should ever get normalized to < 1.0 */
(0x3fa66666, 0x3f6f30ae), /* [1.0, 1.3 ) -> 0.9343365431 */
(0x3fd9999a, 0x3f5173ca), /* [1.3, 1.7 ) -> 0.8181730509 */
@@ -46,9 +46,9 @@ const ab32 : (uint32, uint32)[:] = [
(0x40333333, 0x3f215342), /* [2.25, 2.8 ) -> 0.6301766634 */
(0x4059999a, 0x3f118e0e), /* [2.8, 3.4 ) -> 0.5685738325 */
(0x40800000, 0x3f053049), /* [3.4, 4.0 ) -> 0.520268023 */
-][:]
+]
-const ab64 : (uint64, uint64)[:] = [
+const ab64 : (uint64, uint64)[8] = [
(0x3ff0000000000000, 0x3ff0000000000000), /* < 1.0 */
(0x3ff3333333333333, 0x3fee892ce1608cbc), /* [1.0, 1.2) -> 0.954245033445111356940060431953 */
(0x3ff6666666666666, 0x3fec1513a2184094), /* [1.2, 1.4) -> 0.877572838393478438234751592972 */
@@ -57,56 +57,58 @@ const ab64 : (uint64, uint64)[:] = [
(0x400599999999999a, 0x3fe47717c17cd34f), /* [2.2, 2.7) -> 0.639537694840876969060161627567 */
(0x400b333333333333, 0x3fe258df212a8e9a), /* [2.7, 3.4) -> 0.573348583963212421465982515656 */
(0x4010000000000000, 0x3fe0a5989f2dc59a), /* [3.4, 4.0) -> 0.520214377304159869552790951275 */
-][:]
-
-const sqrt32 = {f : flt32
- const d : fltdesc(flt32, uint32, int32) = [
- .explode = std.flt32explode,
- .assem = std.flt32assem,
- .fma = fma32,
- .tobits = std.flt32bits,
- .frombits = std.flt32frombits,
- .nan = 0x7fc00000,
- .emin = -127,
- .emax = 128,
- .normmask = 1 << 23,
- .sgnmask = 1 << 31,
- .ab = ab32,
- .iterlim = 3,
- ]
- -> sqrtgen(f, d)
+]
+
+const desc32 : fltdesc(flt32, uint32, int32) = [
+ .explode = std.flt32explode,
+ .assem = std.flt32assem,
+ .fma = fma32,
+ .tobits = std.flt32bits,
+ .frombits = std.flt32frombits,
+ .nan = 0x7fc00000,
+ .emin = -127,
+ .emax = 128,
+ .normmask = 1 << 23,
+ .sgnmask = 1 << 31,
+ .ab = ab32[0:7],
+ .iterlim = 3,
+]
+
+const desc64 : fltdesc(flt64, uint64, int64) = [
+ .explode = std.flt64explode,
+ .assem = std.flt64assem,
+ .fma = fma64,
+ .tobits = std.flt64bits,
+ .frombits = std.flt64frombits,
+ .nan = 0x7ff8000000000000,
+ .emin = -1023,
+ .emax = 1024,
+ .normmask = 1 << 52,
+ .sgnmask = 1 << 63,
+ .ab = ab64[0:8],
+ .iterlim = 4,
+]
+
+const sqrt32 = {x : flt32
+ -> sqrtgen(x, desc32)
}
-const sqrt64 = {f : flt64
- const d : fltdesc(flt64, uint64, int64) = [
- .explode = std.flt64explode,
- .assem = std.flt64assem,
- .fma = fma64,
- .tobits = std.flt64bits,
- .frombits = std.flt64frombits,
- .nan = 0x7ff8000000000000,
- .emin = -1023,
- .emax = 1024,
- .normmask = 1 << 52,
- .sgnmask = 1 << 63,
- .ab = ab64,
- .iterlim = 4,
- ]
- -> sqrtgen(f, d)
+const sqrt64 = {x : flt64
+ -> sqrtgen(x, desc64)
}
-generic sqrtgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
+generic sqrtgen = {x : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
var n : bool, e : @i, s : @u, e2 : @i
- (n, e, s) = d.explode(f)
+ (n, e, s) = d.explode(x)
/* Special cases: +/- 0.0, negative, NaN, and +inf */
if e == d.emin && s == 0
- -> f
- elif n || std.isnan(f)
+ -> x
+ elif n || std.isnan(x)
/* Make sure to return a quiet NaN */
-> d.frombits(d.nan)
elif e == d.emax
- -> f
+ -> x
;;
/*
@@ -172,11 +174,11 @@ generic sqrtgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equat
var r_plus_ulp : @f = d.frombits(d.tobits(r) + 1)
var r_minus_ulp : @f = d.frombits(d.tobits(r) - 1)
- var delta_1 = d.fma(r, r_minus_ulp, -1.0 * f)
+ var delta_1 = d.fma(r, r_minus_ulp, -1.0 * x)
if d.tobits(delta_1) & d.sgnmask == 0
r = r_minus_ulp
else
- var delta_2 = d.fma(r, r_plus_ulp, -1.0 * f)
+ var delta_2 = d.fma(r, r_plus_ulp, -1.0 * x)
if d.tobits(delta_2) & d.sgnmask != 0
r = r_plus_ulp
else
diff --git a/lib/math/sum-impl.myr b/lib/math/sum-impl.myr
index 91aba19..9826947 100644
--- a/lib/math/sum-impl.myr
+++ b/lib/math/sum-impl.myr
@@ -3,10 +3,13 @@ use std
/* For references, see [Mul+10] section 6.3 */
pkg math =
pkglocal const kahan_sum32 : (l : flt32[:] -> flt32)
- pkglocal const priest_sum32 : (l : flt32[:] -> flt32)
+ pkglocal const kahan_sum64 : (l : flt64[:] -> flt64)
- pkglocal const kahan_sum64: (l : flt64[:] -> flt64)
+ pkglocal const priest_sum32 : (l : flt32[:] -> flt32)
pkglocal const priest_sum64 : (l : flt64[:] -> flt64)
+
+ /* Backend for priest_sum; currently not useful enough to expose */
+ pkglocal generic double_compensated_sum : (l : @f[:] -> (@f, @f)) :: numeric,floating @f
;;
type doomed_flt32_arr = flt32[:]
@@ -26,18 +29,18 @@ impl disposable doomed_flt64_arr =
something slower, but more accurate, use something like Priest's
doubly compensated sums.
*/
-pkglocal const kahan_sum32 = {l; -> kahan_sum_gen(l, (0.0 : flt32))}
-pkglocal const kahan_sum64 = {l; -> kahan_sum_gen(l, (0.0 : flt64))}
+pkglocal const kahan_sum32 = {l; -> kahan_sum_gen(l)}
+pkglocal const kahan_sum64 = {l; -> kahan_sum_gen(l)}
-generic kahan_sum_gen = {l : @f[:], zero : @f :: numeric,floating @f
+generic kahan_sum_gen = {l : @f[:] :: numeric,floating @f
if l.len == 0
- -> zero
+ -> (0.0 : @f)
;;
- var s = zero
- var c = zero
- var y = zero
- var t = zero
+ var s = (0.0 : @f)
+ var c = (0.0 : @f)
+ var y = (0.0 : @f)
+ var t = (0.0 : @f)
for x : l
y = x - c
@@ -59,7 +62,9 @@ pkglocal const priest_sum32 = {l : flt32[:]
var l2 = std.sldup(l)
std.sort(l2, mag_cmp32)
auto (l2 : doomed_flt32_arr)
- -> priest_sum_gen(l2, (0.0 : flt32))
+ var s, c
+ (s, c) = double_compensated_sum(l2)
+ -> s
}
const mag_cmp32 = {f : flt32, g : flt32
@@ -72,7 +77,9 @@ pkglocal const priest_sum64 = {l : flt64[:]
var l2 = std.sldup(l)
std.sort(l, mag_cmp64)
auto (l2 : doomed_flt64_arr)
- -> priest_sum_gen(l2, (0.0 : flt64))
+ var s, c
+ (s, c) = double_compensated_sum(l2)
+ -> s
}
const mag_cmp64 = {f : flt64, g : flt64
@@ -81,14 +88,14 @@ const mag_cmp64 = {f : flt64, g : flt64
-> std.numcmp(v, u)
}
-generic priest_sum_gen = {l : @f[:], zero : @f :: numeric,floating @f
+generic double_compensated_sum = {l : @f[:] :: numeric,floating @f
/* l should be sorted in descending order */
if l.len == 0
- -> zero
+ -> ((0.0 : @f), (0.0 : @f))
;;
- var s = zero
- var c = zero
+ var s = (0.0 : @f)
+ var c = (0.0 : @f)
for x : l
var y = c + x
@@ -100,6 +107,5 @@ generic priest_sum_gen = {l : @f[:], zero : @f :: numeric,floating @f
c = z - (s - t)
;;
- -> s
+ -> (s, c)
}
-
diff --git a/lib/math/tan-impl.myr b/lib/math/tan-impl.myr
new file mode 100644
index 0000000..adbc0ff
--- /dev/null
+++ b/lib/math/tan-impl.myr
@@ -0,0 +1,510 @@
+use std
+
+use "fpmath"
+
+/* We need trig_reduce only. */
+use "sin-impl"
+use "util"
+
+/*
+ See sin-impl.myr, this implementation is a very close copy;
+ [GB91] provides the guide for implementation. As with sin and
+ cos, our polynomials are of lower degree, and we restrict the
+ polynomial approximation to a smaller range than in [GB91].
+
+ See files pi-constants.c, generate-triples-for-GB91.c, and
+ generate-minimax-by-Remez.gp for where the constants come from.
+ */
+pkg math =
+ pkglocal const tan32 : (x : flt32 -> flt32)
+ pkglocal const tan64 : (x : flt64 -> flt64)
+
+ pkglocal const cot32 : (x : flt32 -> flt32)
+ pkglocal const cot64 : (x : flt64 -> flt64)
+;;
+
+/*
+ Coefficients for minimax polynomial approximating p(x), with
+ tan(x) = x*p(x^2), expanding out to a degree 7 polynomial for
+ tan(x).
+ */
+const tan_coeff : uint64[4] = [
+ 0x3ff0000000000000,
+ 0x3fd5555555555555,
+ 0x3fc1111111111111,
+ 0x3faba1c7ec067952,
+]
+
+/*
+ Coefficients for minimax polynomial approximating p(x), with
+ cot(x) = p(x^2)/x, expanding to a degree 7 polynomial for cot(x).
+ */
+const cot_coeff : uint64[5] = [
+ 0x3ff0000000000000,
+ 0xbfd5555555555555,
+ 0xbf96c16c16c16c17,
+ 0xbf61566abc00f016,
+ 0xbf2bbd7be567ea80,
+]
+
+/*
+ Coefficients for a minimax polynomial approximating p(x), with
+ tan(x) = x*p(x^2), expanding out to a degree 11 polynomial for
+ tan(x). This is slower than the tan_coeff version and overkill
+ for the typical range of delta.
+ */
+const tan_coeff_good : uint64[6] = [
+ 0x3ff0000000000000,
+ 0x3fd5555555555557,
+ 0x3fc1111111124b1b,
+ 0x3faba1ba5c206e2d,
+ 0x3f96660414be66b8,
+ 0x3f829ece970a9ba2,
+]
+
+/* Split 21 zeros out, for special cot() computation */
+const split_mask : uint64 = 0xffffffffffffffff << 21
+
+/*
+ The Highly Accurate Tables for use in a [GB91]-type algorithm;
+ generated by ancillary/generate-triples-for-GB91.c.
+
+ Note the 0th entry has infinite cotangent: we have to specially
+ handle arguments close to 0 to avoid inf pollution.
+ */
+const C : (uint64, uint64, uint64)[257] = [
+ /* xi cot(xi) tan(xi) */
+ (0x0000000000000000, 0x7ff0000000000000, 0x0000000000000000),
+ (0x3f6921fb43c5e5fb, 0x40745f2c4ad38437, 0x3f6922006eb65bc4),
+ (0x3f7921fb69404898, 0x40645f1f9b724978, 0x3f7922101511c956),
+ (0x3f82d97c65697c99, 0x405b2963c8d7c55e, 0x3f82d99f4785288e),
+ (0x3f8921fb51371df1, 0x40545eed6acdf109, 0x3f89224e01710949),
+ (0x3f8f6a7a2c57dd2f, 0x40504bd2f5b9fc24, 0x3f8f6b1badf01256),
+ (0x3f92d97c892e0401, 0x404b28ccc8515f8d, 0x3f92da0815417fe7),
+ (0x3f95fdbbfe00c048, 0x4047474caf184945, 0x3f95fe99994b31ca),
+ (0x3f9921fb362e82a0, 0x40445e246e48a7ee, 0x3f9923460660434c),
+ (0x3f9c463aa05ba8df, 0x40421a8bbcf0d59c, 0x3f9c4811ad83840e),
+ (0x3f9f6a7a32fd798b, 0x40404ad799698985, 0x3f9f6d0068177f47),
+ (0x3fa1475cadce672e, 0x403d9ed9b607478d, 0x3fa1490ac38b33a8),
+ (0x3fa2d97c804f3e96, 0x403b267194186780, 0x3fa2dbaaeabcdd1f),
+ (0x3fa46b9c34e4ab15, 0x40390f4a6551abeb, 0x3fa46e623ffdc4da),
+ (0x3fa5fdbbfbcb8cbd, 0x4037448cde3bdf75, 0x3fa60132e6a3c57c),
+ (0x3fa78fdba272466d, 0x4035b6f0dc2e1d02, 0x3fa7941e9f8a200e),
+ (0x3fa921fb46f590b9, 0x40345afff7eb9ed9, 0x3fa927277ce43c43),
+ (0x3faab41af9b5a4a1, 0x403327f64d5ddbe8, 0x3faaba4f83b868b9),
+ (0x3fac463aa15aa204, 0x40321702ba17ae76, 0x3fac4d988fd3c37d),
+ (0x3fadd85a541eb1e6, 0x403122c3623e77ea, 0x3fade104ad5a19ae),
+ (0x3faf6a7a28fb8b3c, 0x403046e9fe8f3bcb, 0x3faf7495e9e533ba),
+ (0x3fb07e4cd90f69c4, 0x402efff4fb017e86, 0x3fb08426e73b18e9),
+ (0x3fb1475ccf08bc32, 0x402d9634f7939f69, 0x3fb14e17812554ba),
+ (0x3fb2106c9ca80bcd, 0x402c4bde69326818, 0x3fb2181d64f21f1e),
+ (0x3fb2d97c6e8f0d99, 0x402b1d03d8431e54, 0x3fb2e239bc43f5ea),
+ (0x3fb3a28c5e9cda74, 0x402a0658d1258f8a, 0x3fb3ac6d9e61f454),
+ (0x3fb46b9c1af1ca7b, 0x40290513541f6bae, 0x3fb476b9b6a5b103),
+ (0x3fb534abf74c4f86, 0x402816d25c8e2911, 0x3fb5411f577b5e4b),
+ (0x3fb5fdbbe76972c5, 0x4027398c5a6a94ca, 0x3fb60b9f7341612c),
+ (0x3fb6c6cbcafad5b1, 0x40266b7fbfc9a854, 0x3fb6d63ae89faaee),
+ (0x3fb78fdb9c7d39ab, 0x4025ab26d260cbba, 0x3fb7a0f2b1ba77b4),
+ (0x3fb858eb82b930eb, 0x4024f72dfaa05197, 0x3fb86bc7f5f32f7d),
+ (0x3fb921fb549b731e, 0x40244e6c591405be, 0x3fb936bb8cb34c63),
+ (0x3fb9eb0b2e8adcf9, 0x4023afdcd1160af4, 0x3fba01ce94149e0a),
+ (0x3fbab41b1e68f64c, 0x40231a990b29843b, 0x3fbacd021c321ede),
+ (0x3fbb7d2ad90eb4c1, 0x40228dd5408b39fd, 0x3fbb9856dbc170d0),
+ (0x3fbc463aa9e0e6e1, 0x402208dbe8546fd1, 0x3fbc63ce22521e17),
+ (0x3fbd0f4a9ed6ac6b, 0x40218b0b447a840e, 0x3fbd2f69021ade8d),
+ (0x3fbdd85a6a1e3500, 0x402113d2d3340c0f, 0x3fbdfb283104590b),
+ (0x3fbea16a43111eb3, 0x4020a2b0974c728d, 0x3fbec70cec8d28e2),
+ (0x3fbf6a7a152ae159, 0x4020372fbdef0db6, 0x3fbf93182614dae9),
+ (0x3fc019c5091530ae, 0x401fa1cd6c211be1, 0x3fc02fa58b882864),
+ (0x3fc07e4cd60bb2ac, 0x401edeecb2e792c1, 0x3fc095d31b7ab15a),
+ (0x3fc0e2d4f9759641, 0x401e250f354f8dbe, 0x3fc0fc15d1664ec5),
+ (0x3fc1475ccb271aea, 0x401d7398cf43b80c, 0x3fc1626d86e71cc7),
+ (0x3fc1abe4ae6b768f, 0x401cc9f96a972d15, 0x3fc1c8db2614cbc0),
+ (0x3fc2106c905dda31, 0x401c27ae41d1edd4, 0x3fc22f5f2139f59d),
+ (0x3fc274f4896db29e, 0x401b8c3f6c6a4a71, 0x3fc295fa1740027b),
+ (0x3fc2d97c6d32e764, 0x401af73f66c3ead4, 0x3fc2fcac61406613),
+ (0x3fc33e045f0619c2, 0x401a6849297d859a, 0x3fc36376aa36e713),
+ (0x3fc3a28c3a37c404, 0x4019df00269fbec5, 0x3fc3ca5953fa7c1a),
+ (0x3fc407144075ebd8, 0x40195b0e8afc5158, 0x3fc4315529a44c20),
+ (0x3fc46b9c2c6b4a28, 0x4018dc25cb0e86a9, 0x3fc4986a6c8dde38),
+ (0x3fc4d02440498d08, 0x401861fca0c2727d, 0x3fc4ff99e978d677),
+ (0x3fc534ac1fd87d02, 0x4017ec4fef47d5ac, 0x3fc566e3cb1ac3b8),
+ (0x3fc59933e96f8d02, 0x40177ae0ebe39dcb, 0x3fc5ce48ba6cbc8c),
+ (0x3fc5fdbbe79e8588, 0x40170d751b4b5351, 0x3fc635c98ea0a06e),
+ (0x3fc66243c3de5553, 0x4016a3d6c893e89c, 0x3fc69d6679ac9090),
+ (0x3fc6c6cbd97ff2eb, 0x40163dd33b7cd8a1, 0x3fc70520654e8fbc),
+ (0x3fc72b53b2dfa886, 0x4015db3bfb9718c5, 0x3fc76cf7644f5456),
+ (0x3fc78fdba07a2b15, 0x40157be4e94511b4, 0x3fc7d4ec5682b6ec),
+ (0x3fc7f4639158cb7b, 0x40151fa5267b1359, 0x3fc83cffb7c5bd13),
+ (0x3fc858eb5d585717, 0x4014c6568b6182c0, 0x3fc8a531ec767b5b),
+ (0x3fc8bd735284b4b1, 0x40146fd4f60aa6ea, 0x3fc90d83d41787e6),
+ (0x3fc921fb44d8c003, 0x40141bfeeeeb615b, 0x3fc975f5d04d9510),
+ (0x3fc986833b7f08e5, 0x4013cab4e2b1ea03, 0x3fc9de8878736efe),
+ (0x3fc9eb0b3ba4fca5, 0x40137bd929ce2e37, 0x3fca473c62857446),
+ (0x3fca4f9336067d8d, 0x40132f4ff1744efc, 0x3fcab0120fe01a1b),
+ (0x3fcab41b018ad3ad, 0x4012e4ff1bf8fe76, 0x3fcb1909e77f6d3f),
+ (0x3fcb18a2f15fe343, 0x40129ccdb5a1a27c, 0x3fcb8224d2de4be1),
+ (0x3fcb7d2ae4498be8, 0x401256a48b1995d8, 0x3fcbeb6342b07ca5),
+ (0x3fcbe1b2c39aaa4c, 0x4012126daf49101c, 0x3fcc54c5b349b00d),
+ (0x3fcc463acb8ca010, 0x4011d01436249f34, 0x3fccbe4cf8afd08f),
+ (0x3fccaac2bf18961b, 0x40118f84a7fdac06, 0x3fcd27f968798e0c),
+ (0x3fcd0f4a797c05fe, 0x401150ac8980c6bc, 0x3fcd91cb72380077),
+ (0x3fcd73d270bdc038, 0x40111379fd8fc155, 0x3fcdfbc42953c563),
+ (0x3fcdd85a6542243b, 0x4010d7dc85aa36a5, 0x3fce65e3e2d8019c),
+ (0x3fce3ce24380c9db, 0x40109dc463970d1f, 0x3fced02b22de7ced),
+ (0x3fcea16a4eb76e81, 0x4010652276f8722a, 0x3fcf3a9aca25e284),
+ (0x3fcf05f225fcc8fc, 0x40102de8be5f5e9b, 0x3fcfa5330c823e20),
+ (0x3fcf6a7a2e7e6f81, 0x400ff01300763015, 0x3fd007fa78443530),
+ (0x3fcfcf021570cd37, 0x400f86f024238a5e, 0x3fd03d705d41a70c),
+ (0x3fd019c4e66f72bd, 0x400f205085a9838c, 0x3fd072fb7c3be48f),
+ (0x3fd04c08e54cf113, 0x400ebc1c6d564497, 0x3fd0a89c62f5e593),
+ (0x3fd07e4ceb3a2775, 0x400e5a3df148f481, 0x3fd0de53430437af),
+ (0x3fd0b090d14d1132, 0x400dfaa0416fb320, 0x3fd1142042a947fe),
+ (0x3fd0e2d4e9b68203, 0x400d9d2ea3b4f2db, 0x3fd14a040a4c0488),
+ (0x3fd11518dadb07fc, 0x400d41d68d4614b2, 0x3fd17ffe8abf93f8),
+ (0x3fd1475cabbbd1a5, 0x400ce8859a42336f, 0x3fd1b6101cb2785a),
+ (0x3fd179a0b7f1e7a4, 0x400c9129a990cfbb, 0x3fd1ec3974987375),
+ (0x3fd1abe4b50fd9a0, 0x400c3bb2863ab5e2, 0x3fd2227a94b976bd),
+ (0x3fd1de28a830b251, 0x400be8102968bfac, 0x3fd258d3d564ded0),
+ (0x3fd2106ca9bf313e, 0x400b96331fe26d7e, 0x3fd28f45a4688f6e),
+ (0x3fd242b0b89d73ec, 0x400b460cca6bd89f, 0x3fd2c5d0548f5454),
+ (0x3fd274f47ec75990, 0x400af78fac257637, 0x3fd2fc73dccd4043),
+ (0x3fd2a738a04751ef, 0x400aaaad59bb9e25, 0x3fd3333144835986),
+ (0x3fd2d97c85bb113f, 0x400a5f59df55d5a8, 0x3fd36a083c7e2ced),
+ (0x3fd30bc06f220a02, 0x400a1588855353e2, 0x3fd3a0f96085b6dd),
+ (0x3fd33e046e3c6329, 0x4009cd2d5e7d9094, 0x3fd3d8051ac8b850),
+ (0x3fd370486fc1fac0, 0x4009863d2da63ac8, 0x3fd40f2bad85dbce),
+ (0x3fd3a28c45f65785, 0x400940ad4be7678c, 0x3fd4466d3e4e392f),
+ (0x3fd3d4d04d809c18, 0x4008fc72c1faf287, 0x3fd47dca8b7b83b2),
+ (0x3fd407143dded9c0, 0x4008b983e519bfd8, 0x3fd4b5439e5d1536),
+ (0x3fd439583676eabf, 0x400877d6e093e9da, 0x3fd4ecd8f3427dcc),
+ (0x3fd46b9c4f298d45, 0x400837624b870e31, 0x3fd5248aff0c1bb9),
+ (0x3fd49de0389dbfc9, 0x4007f81d9a994879, 0x3fd55c59c4bc190f),
+ (0x3fd4d02415978a87, 0x4007ba0005be9705, 0x3fd59445c6583fea),
+ (0x3fd502680d976ddd, 0x40077d0114c36575, 0x3fd5cc4f8c28a849),
+ (0x3fd534ac009ad6ad, 0x40074118f5d5a027, 0x3fd604774f854e2e),
+ (0x3fd566effa9677a1, 0x4007063fec410192, 0x3fd63cbd7b6d7874),
+ (0x3fd59933e0e9ba24, 0x4006cc6eafc7e66d, 0x3fd675225058af7c),
+ (0x3fd5cb77eb6b2450, 0x4006939dddeba442, 0x3fd6ada66c018e76),
+ (0x3fd5fdbbdfcd5e78, 0x40065bc6d77251ea, 0x3fd6e649eca3c379),
+ (0x3fd62ffff08a2e0c, 0x400624e2c13b5a9b, 0x3fd71f0d6b9a2d05),
+ (0x3fd66243f23b0c45, 0x4005eeeb636b3b0e, 0x3fd757f1191ef14c),
+ (0x3fd69487b10a42f2, 0x4005b9dac4d6f74e, 0x3fd790f51c268135),
+ (0x3fd6c6cbae457fb1, 0x400585aa64d56be0, 0x3fd7ca1a6a0740e5),
+ (0x3fd6f90fbd5ea7f3, 0x40055254afeb627f, 0x3fd8036133dcb0d0),
+ (0x3fd72b539c1458e5, 0x40051fd4572bd740, 0x3fd83cc99243b1cc),
+ (0x3fd75d97a0772b20, 0x4004ee23a1eecaf5, 0x3fd876544c81706d),
+ (0x3fd78fdba1490a6d, 0x4004bd3d85c95693, 0x3fd8b001995400f4),
+ (0x3fd7c21f93c9a6f0, 0x40048d1d04c633fd, 0x3fd8e9d1d2f476b5),
+ (0x3fd7f463804f4cfb, 0x40045dbd38ab6945, 0x3fd923c56a5915d6),
+ (0x3fd826a7892f6d44, 0x40042f194c51167e, 0x3fd95ddcef7a54f2),
+ (0x3fd858eb80c2f052, 0x4004012cdbe1f350, 0x3fd9981896c629e4),
+ (0x3fd88b2f737b7ea4, 0x4003d3f372664ef1, 0x3fd9d278d890d001),
+ (0x3fd8bd735b872d13, 0x4003a768cf7d7edb, 0x3fda0cfe18e865e8),
+ (0x3fd8efb749b5b0fa, 0x40037b88c14a0190, 0x3fda47a8d71f4a59),
+ (0x3fd921fb4f73c582, 0x4003504f375b8a3b, 0x3fda827994591520),
+ (0x3fd9543f498e072b, 0x400325b86e2a7edc, 0x3fdabd70950976a6),
+ (0x3fd986833cb12fe2, 0x4002fbc09df08da7, 0x3fdaf88e4d1af906),
+ (0x3fd9b8c735da9e8f, 0x4002d26415c0dcf8, 0x3fdb33d33b48b23c),
+ (0x3fd9eb0b2b14f6b2, 0x4002a99f541052bd, 0x3fdb6f3fc4412f63),
+ (0x3fda1d4f1f259ed7, 0x4002816ee7faf9c9, 0x3fdbaad45ca7ae2f),
+ (0x3fda4f9317d51314, 0x400259cf78974618, 0x3fdbe6917dba0bac),
+ (0x3fda81d71d9a00d3, 0x400232bdc46748af, 0x3fdc2277a4fbcf04),
+ (0x3fdab41b1864b159, 0x40020c36bb584296, 0x3fdc5e872a26269b),
+ (0x3fdae65f0eadda20, 0x4001e6374cd17a1a, 0x3fdc9ac08a4bd320),
+ (0x3fdb18a2f6bdb980, 0x4001c0bc8b1148ee, 0x3fdcd724302664c9),
+ (0x3fdb4ae6f429a87d, 0x40019bc37c9e36a0, 0x3fdd13b2be0455c5),
+ (0x3fdb7d2adbb2f0d4, 0x400177497734de95, 0x3fdd506c786465b9),
+ (0x3fdbaf6ee37914fd, 0x4001534b9ddfc460, 0x3fdd8d521a67ad8d),
+ (0x3fdbe1b2dd912d13, 0x40012fc76f757964, 0x3fddca63e75202cb),
+ (0x3fdc13f6cc9e4889, 0x40010cba5a491b0c, 0x3fde07a25e238018),
+ (0x3fdc463ad9174142, 0x4000ea21c51caa0c, 0x3fde450e2d5365b9),
+ (0x3fdc787e9c84e6c4, 0x4000c7fb8a2ba964, 0x3fde82a755ab4856),
+ (0x3fdcaac2926527ca, 0x4000a644fa83f84e, 0x3fdec06eedc91cd2),
+ (0x3fdcdd06a43ce130, 0x400084fbdbdf1711, 0x3fdefe655b10819e),
+ (0x3fdd0f4a88e8826e, 0x4000641e23e987e1, 0x3fdf3c8ac50fd12d),
+ (0x3fdd418e8374c347, 0x400043a97b437c0b, 0x3fdf7ae00195ead5),
+ (0x3fdd73d27c65ab26, 0x4000239bd4e2229d, 0x3fdfb96577e9d701),
+ (0x3fdda6167d37836d, 0x400003f31ca39f94, 0x3fdff81bb96f0bef),
+ (0x3fddd85a68c19b42, 0x3fffc95ac8d38084, 0x3fe01b81944403fa),
+ (0x3fde0a9e587b4fd8, 0x3fff8b91526e80e7, 0x3fe03b0e368fb566),
+ (0x3fde3ce26298b772, 0x3fff4e85ef7772cc, 0x3fe05ab4165d6dbe),
+ (0x3fde6f26438b72e7, 0x3fff12353eaec03d, 0x3fe07a734e81ae40),
+ (0x3fdea16a54d3b255, 0x3ffed69b3a1ab4c7, 0x3fe09a4c5da07b69),
+ (0x3fded3ae2f825dc7, 0x3ffe9bb4d86851dd, 0x3fe0ba3f4945c2d6),
+ (0x3fdf05f21cdb9169, 0x3ffe617e5679952d, 0x3fe0da4c8763cb70),
+ (0x3fdf38363eaf5154, 0x3ffe27f4381597d6, 0x3fe0fa7475f5d590),
+ (0x3fdf6a7a098dc074, 0x3ffdef13dafcc870, 0x3fe11ab7049a4fea),
+ (0x3fdf9cbe1c32adf4, 0x3ffdb6d95ebfe258, 0x3fe13b14e2ceeb4a),
+ (0x3fdfcf01fc1a98f2, 0x3ffd7f4234582037, 0x3fe15b8e0c2a1e89),
+ (0x3fe000a30caf6ef5, 0x3ffd484adae8622e, 0x3fe17c23144ba5c9),
+ (0x3fe019c503e450d1, 0x3ffd11f0d6c93084, 0x3fe19cd4010aead7),
+ (0x3fe032e705d8ff9a, 0x3ffcdc30fd97dc6d, 0x3fe1bda14b7f7db5),
+ (0x3fe04c08e5135e13, 0x3ffca708e170a297, 0x3fe1de8b05a932bb),
+ (0x3fe0652af2ed30d5, 0x3ffc7275198adb9d, 0x3fe1ff91e87789c0),
+ (0x3fe07e4ce6bf0049, 0x3ffc3e7392215303, 0x3fe220b5e3b802d7),
+ (0x3fe0976f08fafefc, 0x3ffc0b0119a5b475, 0x3fe241f7a70e93a4),
+ (0x3fe0b090df3a9868, 0x3ffbd81c1d0ca878, 0x3fe26356e120198a),
+ (0x3fe0c9b2ead68147, 0x3ffba5c118435eb7, 0x3fe284d48f30abf1),
+ (0x3fe0e2d4dd7a7e38, 0x3ffb73ee3a8bbffd, 0x3fe2a6709b9c98c5),
+ (0x3fe0fbf6ec9a1e2c, 0x3ffb42a0b9cabf46, 0x3fe2c82ba151a704),
+ (0x3fe11518d95be884, 0x3ffb11d6bed203fe, 0x3fe2ea05a03d2b1c),
+ (0x3fe12e3acb91e4cc, 0x3ffae18db6b570dc, 0x3fe30bff230686f5),
+ (0x3fe1475cd97c8037, 0x3ffab1c3401e1903, 0x3fe32e189e288a82),
+ (0x3fe1607ecb17c3cc, 0x3ffa82759ac22a53, 0x3fe350521cba4339),
+ (0x3fe179a0be38d76d, 0x3ffa53a26f5af984, 0x3fe372ac1f34797e),
+ (0x3fe192c2b4ff17e0, 0x3ffa2547a7d6b75e, 0x3fe39527018c28f4),
+ (0x3fe1abe4a9d366b4, 0x3ff9f763480dbddc, 0x3fe3b7c316198532),
+ (0x3fe1c506b8926f4d, 0x3ff9c9f322aecbea, 0x3fe3da80de6b2be4),
+ (0x3fe1de289627ee62, 0x3ff99cf5cedc0b11, 0x3fe3fd604e8e3dcd),
+ (0x3fe1f74aa0f1e663, 0x3ff97068be5efcbc, 0x3fe4206246783e1e),
+ (0x3fe2106cc47cbcd5, 0x3ff9444a3c9eec82, 0x3fe4438708557940),
+ (0x3fe2298e9b74941b, 0x3ff9189929b725cd, 0x3fe466ce653433d7),
+ (0x3fe242b0875b781e, 0x3ff8ed53144242d5, 0x3fe48a3945bd5bdc),
+ (0x3fe25bd2a2ef236a, 0x3ff8c276131e7537, 0x3fe4adc83188d315),
+ (0x3fe274f49cd2ff63, 0x3ff89800fe076bc9, 0x3fe4d17b180d8e25),
+ (0x3fe28e16910c215f, 0x3ff86df1fa8545fa, 0x3fe4f552845a769c),
+ (0x3fe2a7387dfc7bb2, 0x3ff8444769c189a7, 0x3fe5194ed8c26526),
+ (0x3fe2c05a7c12d2de, 0x3ff81aff8aec0241, 0x3fe53d709e4082fa),
+ (0x3fe2d97c7c743349, 0x3ff7f218e6cd24d9, 0x3fe561b826bf35d4),
+ (0x3fe2f29e9716e420, 0x3ff7c991cf39c7a9, 0x3fe58625fd66563d),
+ (0x3fe30bc06e7f4a84, 0x3ff7a1695a457c1d, 0x3fe5aaba03e29b8c),
+ (0x3fe324e27374f758, 0x3ff7799d5b5545db, 0x3fe5cf7548f044c5),
+ (0x3fe33e047e936633, 0x3ff7522ca19d9740, 0x3fe5f457ff83db15),
+ (0x3fe35726746c3aee, 0x3ff72b15ef009b93, 0x3fe619626c900682),
+ (0x3fe3704880dc13de, 0x3ff704579db64f16, 0x3fe63e953f64e98f),
+ (0x3fe3896a79a01dfd, 0x3ff6ddf094709c2e, 0x3fe663f0a979932c),
+ (0x3fe3a28c5a0ef0b4, 0x3ff6b7df85cf5700, 0x3fe6897514cb6700),
+ (0x3fe3bbae72257473, 0x3ff69222ac668190, 0x3fe6af236bc6b21d),
+ (0x3fe3d4d05198d166, 0x3ff66cb96a79e48f, 0x3fe6d4fb7a2ab498),
+ (0x3fe3edf2390d7a1b, 0x3ff647a21ef19a42, 0x3fe6fafe165c04e5),
+ (0x3fe4071446b9849f, 0x3ff622db64677758, 0x3fe7212be57c166b),
+ (0x3fe4203660cc90a3, 0x3ff5fe642df6afa6, 0x3fe74785394a80fe),
+ (0x3fe4395830f053c5, 0x3ff5da3bca164098, 0x3fe76e0a075c1f61),
+ (0x3fe4527a51ef6abd, 0x3ff5b6603251fdeb, 0x3fe794bbb8782811),
+ (0x3fe46b9c35efc871, 0x3ff592d10f30a380, 0x3fe7bb99ef70fcb0),
+ (0x3fe484be1da35a2f, 0x3ff56f8ce774b6cf, 0x3fe7e2a58e4d7f2c),
+ (0x3fe49de0204a38f7, 0x3ff54c9283b95dee, 0x3fe809df39ac0a62),
+ (0x3fe4b70222176b17, 0x3ff529e0f9169404, 0x3fe83147483ce15a),
+ (0x3fe4d02423eab88d, 0x3ff50777396a8b69, 0x3fe858de3ecfe279),
+ (0x3fe4e9460a6bd40d, 0x3ff4e55461ce669f, 0x3fe880a47725d222),
+ (0x3fe50268221eb1c5, 0x3ff4c37707b189e0, 0x3fe8a89af16b273d),
+ (0x3fe51b8a1a015c9e, 0x3ff4a1de9a4d2715, 0x3fe8d0c1b5967d2c),
+ (0x3fe534abef35b431, 0x3ff4808a22df6aeb, 0x3fe8f91948a87d95),
+ (0x3fe54dce0e654a86, 0x3ff45f781cf2f1af, 0x3fe921a2e5a509da),
+ (0x3fe566effca9d1d5, 0x3ff43ea83a204e02, 0x3fe94a5e54941ccd),
+ (0x3fe58011f1e2647b, 0x3ff41e1944ab750f, 0x3fe9734c7f58c26c),
+ (0x3fe59933f5e86301, 0x3ff3fdca4ada27bb, 0x3fe99c6e04b5c577),
+ (0x3fe5b2560aab7515, 0x3ff3ddba66ec8460, 0x3fe9c5c37bdd6320),
+ (0x3fe5cb77e480bb2a, 0x3ff3bde919244f1b, 0x3fe9ef4cfd75f328),
+ (0x3fe5e499d2fe12b9, 0x3ff39e552085ea71, 0x3fea190ba428e394),
+ (0x3fe5fdbbd711540c, 0x3ff37efda4b7da0b, 0x3fea43000ba79067),
+ (0x3fe616dde7b3195c, 0x3ff35fe1dd7a4fd0, 0x3fea6d2ac0ff2200),
+ (0x3fe62fffe292e013, 0x3ff3410124e06ee8, 0x3fea978c284b69e9),
+ (0x3fe64921d3354ae1, 0x3ff3225a9fb1c71e, 0x3feac224f47e4717),
+ (0x3fe66243d9450683, 0x3ff303ed5e631dff, 0x3feaecf5fd84dff9),
+ (0x3fe67b65c258c493, 0x3ff2e5b8d84a4bf9, 0x3feb17ff91b6c931),
+ (0x3fe69487ced483eb, 0x3ff2c7bbfde79a7d, 0x3feb4342c6025af0),
+ (0x3fe6ada9d635f2e4, 0x3ff2a9f641170b2e, 0x3feb6ebffdf58dac),
+ (0x3fe6c6cbb61f0b15, 0x3ff28c670e616749, 0x3feb9a77a8b3dd2c),
+ (0x3fe6dfedafa2adc5, 0x3ff26f0d60a506c4, 0x3febc66ae4d5e2a0),
+ (0x3fe6f90fbe78ac86, 0x3ff251e88829ec96, 0x3febf29a5b89d1cc),
+ (0x3fe71231ac5b981e, 0x3ff234f81179eb17, 0x3fec1f065fb7e3ae),
+ (0x3fe72b539a5b2817, 0x3ff2183b26da1bf7, 0x3fec4bafe056a3e8),
+ (0x3fe74475a88b9513, 0x3ff1fbb0f7a7f5d2, 0x3fec7897ce6c8a57),
+ (0x3fe75d979d84d1b4, 0x3ff1df591bae6879, 0x3feca5be7daa52f0),
+ (0x3fe776b996729f58, 0x3ff1c332cb1b268b, 0x3fecd324def80a12),
+ (0x3fe78fdbb6c61400, 0x3ff1a73d3acc02d5, 0x3fed00cbf2860934),
+ (0x3fe7a8fd8e5473ec, 0x3ff18b7845e8ff44, 0x3fed2eb3ae59bf1f),
+ (0x3fe7c21fa821e9ae, 0x3ff16fe2b42697c2, 0x3fed5cddd5595691),
+ (0x3fe7db419cad411c, 0x3ff1547c5b66dd8f, 0x3fed8b4a71c71e74),
+ (0x3fe7f4638e3011f5, 0x3ff139447c65650b, 0x3fedb9fa8d11c553),
+ (0x3fe80d858178d451, 0x3ff11e3a7adfe5dc, 0x3fede8eefdefdd61),
+ (0x3fe826a79a4bde87, 0x3ff1035d9c2731dc, 0x3fee1828d8aa0d05),
+ (0x3fe83fc97febfc73, 0x3ff0e8adacafb5e2, 0x3fee47a84a31b400),
+ (0x3fe858eb90a17ea9, 0x3ff0ce29b858ff89, 0x3fee776edba13ed6),
+ (0x3fe8720d5f79bf77, 0x3ff0b3d1a433bc56, 0x3feea77c97bb286a),
+ (0x3fe88b2f85704599, 0x3ff099a444afc50a, 0x3feed7d380dfa51f),
+ (0x3fe8a4516bc9e19d, 0x3ff07fa1adcd5635, 0x3fef087356602ce4),
+ (0x3fe8bd7375248413, 0x3ff065c8f21e21d3, 0x3fef395dbac2fb6d),
+ (0x3fe8d69552cff0a1, 0x3ff04c19dd7ddf24, 0x3fef6a92fc644460),
+ (0x3fe8efb76cb96eab, 0x3ff0329382f2fcfc, 0x3fef9c14d23b84cb),
+ (0x3fe908d951162887, 0x3ff01935d537533b, 0x3fefcde34b070181),
+ (0x3fe921fb54442d18, 0x3ff0000000000000, 0x3ff0000000000000),
+]
+
+const tan32 = {x : flt32
+ /* A rather irritating special rounding case */
+ if std.flt32bits(x) == 0xdffd33a4
+ -> std.flt32frombits(0xbfd06c8c)
+ ;;
+
+ var r, s
+ (r, s) = tanorcot((x : flt64), true)
+ -> round_down(r, s)
+}
+
+const cot32 = {x : flt32
+ /* Two more irritating special cases */
+ if std.flt32bits(x) == 0x33de86a9
+ -> std.flt32frombits(0x4b134133)
+ elif std.flt32bits(x) == 0xb3de86a9
+ -> std.flt32frombits(0xcb134133)
+ ;;
+
+ var r, s
+ (r, s) = tanorcot((x : flt64), false)
+ -> round_down(r, s)
+}
+
+const tan64 = {x : flt64
+ var r
+ (r, _) = tanorcot(x, true)
+ -> r
+}
+
+const cot64 = {x : flt64
+ var r
+ (r, _) = tanorcot(x, false)
+ -> r
+}
+
+const tanorcot = {x : flt64, want_tan : bool
+ var n : bool, e : int64, s : uint64
+ (n, e, s) = std.flt64explode(x)
+
+ if e == 1024
+ -> (std.flt64nan(), 0.0)
+ ;;
+
+ if e == -1023 && s == 0x0
+ /* Special handling for +/-0.0 */
+ match (n, want_tan)
+ | (false, false): -> (std.flt64frombits(0x7ff0000000000000), 0.0)
+ | (false, true ): -> (std.flt64frombits(0x0000000000000000), 0.0)
+ | (true , false): -> (std.flt64frombits(0xfff0000000000000), 0.0)
+ | (true , true ): -> (std.flt64frombits(0x8000000000000000), 0.0)
+ ;;
+ ;;
+
+
+
+ var N : int64
+ var x1 : flt64, x2 : flt64
+
+ (N, x1, x2) = trig_reduce(x)
+
+ var then_negate : bool = false
+
+ if (N % 2 != 0)
+ /* tan(x + Pi/2) = -cot(x) */
+ want_tan = !want_tan
+ then_negate = true
+ ;;
+
+ if (x1 < 0.0)
+ then_negate = !then_negate
+ x1 = -1.0 * x1
+ x2 = -1.0 * x2
+ ;;
+
+ /* from sin-impl.myr */
+ var xi, tan_xi, cot_xi
+ (xi, cot_xi, tan_xi) = trig_table_approx(x1, C)
+ var cot = std.flt64frombits(cot_xi)
+ var tan = std.flt64frombits(tan_xi)
+
+ var ret1 : flt64 = 0.0, ret2 : flt64 = 0.0
+
+ if xi == 0x0
+ /*
+ Special case to avoid infinity in cotan
+ */
+ if want_tan
+ (ret1, ret2) = ptan(x1, x2)
+ else
+ (ret1, ret2) = pcot(x1, x2)
+ ;;
+
+ goto have_result
+ ;;
+
+ var delta1, delta2, deltat
+ (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1)
+ (delta2, _) = fast2sum(deltat, x2)
+ var p1, p2
+
+ /*
+ Since cot() can blow up close to 0, just fall back to
+ polynomial approximation. We use a stricter cutoff than
+ given in [GB91] since our C table is more accurate than
+ theirs, meaning we're safe to use it closer to 0.
+ */
+ if x1 < 0.03
+ var s = x1 * x1
+ p1 = x1 * horner_polyu(s, tan_coeff_good[:])
+ p2 = x2 + 3.0*s*x2
+
+ if want_tan
+ (ret1, ret2) = fast2sum(p1, p2)
+ goto have_result
+ ;;
+
+ (p1, p2) = fast2sum(p1, p2)
+
+ var f = std.flt64frombits(std.flt64bits(p1) & split_mask)
+ var g = (p1 - f) + p2
+
+ var u0 = 1.0/f
+ var u1 = std.flt64frombits(std.flt64bits(u0) & split_mask)
+ var u2 = u0 - u1
+
+ (ret1, ret2) = fast2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))
+ goto have_result
+ ;;
+
+ if want_tan
+ (p1, p2) = ptan(delta1, delta2)
+ var num = cot + tan
+ var den = (cot - p1) - p2
+ var f = num/den
+ var q1 = tan
+ var q2 = p1 * f
+ var q3 = p2 * f
+ (q1, q2) = fast2sum(q1, q2)
+ (ret1, ret2) = fast2sum(q1, q2 + q3)
+ else
+ (p1, p2) = ptan(delta1, delta2)
+ var num = cot + tan
+ var den = (tan + p1) + p2
+ var f = num/den
+ var q1 = cot
+ var q2 = -1.0 * p1 * f
+ var q3 = -1.0 * p2 * f
+ (q1, q2) = fast2sum(q1, q2)
+ (ret1, ret2) = fast2sum(q1, q2 + q3)
+ ;;
+
+:have_result
+
+ if then_negate
+ ret1 = -1.0 * ret1
+ ret2 = -1.0 * ret2
+ ;;
+
+ -> (ret1, ret2)
+}
+
+const ptan = {x1 : flt64, x2 : flt64
+ var s : flt64 = x1 * x1
+ var p : flt64 = horner_polyu(s, tan_coeff[:])
+ var r1, r2
+ (r1, r2) = two_by_two(p, x1)
+ -> fast2sum(r1, r2 + x2)
+}
+
+const pcot = {x1 : flt64, x2 : flt64
+ var s : flt64 = x1 * x1
+ var p : flt64 = horner_polyu(s, cot_coeff[:])
+ -> fast2sum(p/x1, std.flt64frombits(0x3fd5555555555555)*x2)
+}
diff --git a/lib/math/test/atan-impl.myr b/lib/math/test/atan-impl.myr
new file mode 100644
index 0000000..fa992c8
--- /dev/null
+++ b/lib/math/test/atan-impl.myr
@@ -0,0 +1,622 @@
+use std
+use math
+use testr
+
+const main = {
+ math.fptrap(false)
+ testr.run([
+ [.name="atan-01", .fn = atan01], /* atan, flt32 */
+ [.name="atan-02", .fn = atan02], /* atan, flt64 */
+ [.name="atan-03", .fn = atan03], /* atan2, flt32 */
+ [.name="atan-04", .fn = atan04], /* atan2, flt64 */
+ [.name="atan-05", .fn = atan05], /* atan, off-by-1-ulp quarantine */
+ [.name="atan-06", .fn = atan06], /* atan2, off-by-1-ulp quarantine */
+ [.name="atan-07", .fn = atan07], /* exhaustively test C */
+ [.name="atan-08", .fn = atan08], /* NaN handling */
+ ][:])
+}
+
+const same32 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt32frombits(a)) && std.isnan(std.flt32frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const same64 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt64frombits(a)) && std.isnan(std.flt64frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const atan01 = {c
+ var inputs : (uint32, uint32)[:] = [
+ (0x00000000, 0x00000000),
+ (0xec0c0000, 0xbfc90fdb),
+ (0xd30d0000, 0xbfc90fdb),
+ (0x4c120000, 0x3fc90fda),
+ (0x0c010000, 0x0c010000),
+ (0xc0070000, 0xbf9065b4),
+ (0x3d8d6b23, 0x3d8d31c3),
+ (0xbd8d6b23, 0xbd8d31c3),
+ (0xbf000000, 0xbeed6338),
+ (0xc1010000, 0xbfb94442),
+ ][:]
+
+ for (x, y) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yf : flt32 = std.flt32frombits(y)
+ var a1f : flt32 = math.atan(xf)
+ var a2f : flt32 = math.atan2(xf, 1.0)
+ var a1u : uint32 = std.flt32bits(a1f)
+ var a2u : uint32 = std.flt32bits(a2f)
+ testr.check(c, same32(a1u, a2u),
+ "atan(0x{b=16,w=8,p=0}) = 0x{b=16,w=8,p=0}, but atan2(0x{b=16,w=8,p=0}, 1.0) = 0x{b=16,w=8,p=0}",
+ x, a1u, x, a2u)
+
+ testr.check(c, same32(a1u, y),
+ "atan(0x{b=16,w=8,p=0}) = 0x{b=16,w=8,p=0}, should be 0x{b=16,w=8,p=0}",
+ x, a1u, y)
+ ;;
+}
+
+const atan02 = {c
+ var inputs : (uint64, uint64)[:] = [
+ (0x10ba5f16340a0bc9, 0x10ba5f16340a0bc9),
+ (0xd98ac92afca0e53d, 0xbff921fb54442d18),
+ (0xec7a77dca00c3b26, 0xbff921fb54442d18),
+ (0xee57f77337f8e709, 0xbff921fb54442d18),
+ (0x9e25782df360cddd, 0x9e25782df360cddd),
+ (0x9243219f73e1f319, 0x9243219f73e1f319),
+ (0xa4d1ffde70716ab8, 0xa4d1ffde70716ab8),
+ (0x48bcc07018965976, 0x3ff921fb54442d18),
+ (0x1f864ca16540f910, 0x1f864ca16540f910),
+ (0x15cb61e0de4d0245, 0x15cb61e0de4d0245),
+ (0xd9bd7e5a3d6bc814, 0xbff921fb54442d18),
+ (0x75cd7563af1d13f2, 0x3ff921fb54442d18),
+ (0xed91a27ee1cf7fd4, 0xbff921fb54442d18),
+ (0xc41d55b172ee68ee, 0xbff921fb54442d18),
+ (0x558fe35fd3a69e4b, 0x3ff921fb54442d18),
+ (0xa3cfec3df7c1c356, 0xa3cfec3df7c1c356),
+ (0x38de5d01bc664ea9, 0x38de5d01bc664ea9),
+ (0x190e8e5960b38edb, 0x190e8e5960b38edb),
+ (0x03bd0fd0a2cdf14e, 0x03bd0fd0a2cdf14e),
+ (0xb24e849d7122766b, 0xb24e849d7122766b),
+ (0xda4ee410df592fdd, 0xbff921fb54442d18),
+ (0xc76fb300ce286b3f, 0xbff921fb54442d18),
+ (0x9f3dc5e92e95d81b, 0x9f3dc5e92e95d81b),
+ (0x22ebeafb26c4e1cd, 0x22ebeafb26c4e1cd),
+ (0x85af77b27a6c9ebd, 0x85af77b27a6c9ebd),
+ (0x060bdb1f2d9809c8, 0x060bdb1f2d9809c8),
+ (0xeaefd066fb169d3e, 0xbff921fb54442d18),
+ (0x55036c3913a92f51, 0x3ff921fb54442d18),
+ (0x5049f3035f2e17b9, 0x3ff921fb54442d18),
+ (0xa6032d5682f84540, 0xa6032d5682f84540),
+ (0xf33fb18d1d5b162a, 0xbff921fb54442d18),
+ (0x9c88441ff7f72238, 0x9c88441ff7f72238),
+ (0xbbd17263c0f5c1dc, 0xbbd17263c0f5c1dc),
+ (0x4d82ee8ecba53872, 0x3ff921fb54442d18),
+ (0x32f4e9594348833e, 0x32f4e9594348833e),
+ (0xdc5ca2769af7825f, 0xbff921fb54442d18),
+ (0xcfcc1defc79fc30b, 0xbff921fb54442d18),
+ (0xcf7ce4fad9366c04, 0xbff921fb54442d18),
+ (0x2919d12d8e3ce4ea, 0x2919d12d8e3ce4ea),
+ (0x43914f32a6c79113, 0x3ff921fb54442d18),
+ (0x3dc0726cc985fcb4, 0x3dc0726cc985fcb4),
+ (0x39279f3510ab6c1c, 0x39279f3510ab6c1c),
+ (0x888f53f3ef5a64eb, 0x888f53f3ef5a64eb),
+ (0xf6698a51e74bc635, 0xbff921fb54442d18),
+ ][:]
+
+ for (x, y) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yf : flt64 = std.flt64frombits(y)
+ var a1f : flt64 = math.atan(xf)
+ var a2f : flt64 = math.atan2(xf, 1.0)
+ var a1u : uint64 = std.flt64bits(a1f)
+ var a2u : uint64 = std.flt64bits(a2f)
+ testr.check(c, same64(a1u, a2u),
+ "atan(0x{b=16,w=16,p=0}) = 0x{b=16,w=16,p=0}, but atan2(0x{b=16,w=16,p=0}, 1.0) = 0x{b=16,w=16,p=0}",
+ x, a1u, x, a2u)
+
+ testr.check(c, same64(a1u, y),
+ "atan(0x{b=16,w=16,p=0}) = 0x{b=16,w=16,p=0}, should be 0x{b=16,w=16,p=0}",
+ x, a1u, y)
+ ;;
+}
+
+const atan03 = {c
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x00000000, 0x80000000, 0x40490fdb), /* atan2(+0, -0) = +Pi */
+ (0x80000000, 0x80000000, 0xc0490fdb), /* atan2(-0, -0) = -Pi */
+ (0x00000000, 0x00000000, 0x00000000), /* atan2(+0, +0) = +0 */
+ (0x80000000, 0x00000000, 0x80000000), /* atan2(-0, +0) = -0 */
+ (0x00000000, 0xc5a33ab8, 0x40490fdb), /* atan2(+0, x < 0) = +Pi */
+ (0x00000000, 0x80000002, 0x40490fdb),
+ (0x00000000, 0xdddddddd, 0x40490fdb),
+ (0x80000000, 0xc5a33ab8, 0xc0490fdb), /* atan2(-0, x < 0) = -Pi */
+ (0x80000000, 0x80000002, 0xc0490fdb),
+ (0x80000000, 0xdddddddd, 0xc0490fdb),
+ (0x00000000, 0x35a33ab8, 0x00000000), /* atan2(+0, x > 0) = +0 */
+ (0x00000000, 0x00000002, 0x00000000),
+ (0x00000000, 0x4ddddddd, 0x00000000),
+ (0x80000000, 0x35a33ab8, 0x80000000), /* atan2(-0, x > 0) = -0 */
+ (0x80000000, 0x00000002, 0x80000000),
+ (0x80000000, 0x4ddddddd, 0x80000000),
+ (0xdddddddd, 0x00000000, 0xbfc90fdb), /* atan2(y < 0, 0) = -Pi/2 */
+ (0xc5a33ab8, 0x00000000, 0xbfc90fdb),
+ (0x80000002, 0x00000000, 0xbfc90fdb),
+ (0x4ddddddd, 0x00000000, 0x3fc90fdb), /* atan2(y > 0, 0) = +Pi/2 */
+ (0x35a33ab8, 0x00000000, 0x3fc90fdb),
+ (0x00000002, 0x00000000, 0x3fc90fdb),
+ (0x7f800000, 0xff800000, 0x4016cbe4), /* atan2(+Inf, -Inf) = +3*Pi/4 */
+ (0xff800000, 0xff800000, 0xc016cbe4), /* atan2(-Inf, -Inf) = -3*Pi/4 */
+ (0x7f800000, 0x7f800000, 0x3f490fdb), /* atan2(+Inf, +Inf) = +Pi/4 */
+ (0xff800000, 0x7f800000, 0xbf490fdb), /* atan2(-Inf, +Inf) = -Pi/4 */
+ (0x7f800000, 0x4ddddddd, 0x3fc90fdb), /* atan2(+Inf, finite) = +Pi/2 */
+ (0x7f800000, 0x00000001, 0x3fc90fdb),
+ (0x7f800000, 0x80000004, 0x3fc90fdb),
+ (0x7f800000, 0xfedcba87, 0x3fc90fdb),
+ (0xff800000, 0x4ddddddd, 0xbfc90fdb), /* atan2(-Inf, finite) = -Pi/2 */
+ (0xff800000, 0x00000001, 0xbfc90fdb),
+ (0xff800000, 0x80000004, 0xbfc90fdb),
+ (0xff800000, 0xfedcba87, 0xbfc90fdb),
+ (0x6a520b4c, 0xff800000, 0x40490fdb), /* atan2(finite > 0, -Inf) = +Pi */
+ (0x35a33ab8, 0xff800000, 0x40490fdb),
+ (0x55a33ab8, 0xff800000, 0x40490fdb),
+ (0xea520b4c, 0xff800000, 0xc0490fdb), /* atan2(finite < 0, -Inf) = -Pi */
+ (0x95a33ab8, 0xff800000, 0xc0490fdb),
+ (0xc5a33ab8, 0xff800000, 0xc0490fdb),
+ (0x6a520b4c, 0x7f800000, 0x00000000), /* atan2(finite > 0, +Inf) = +0 */
+ (0x35a33ab8, 0x7f800000, 0x00000000),
+ (0x55a33ab8, 0x7f800000, 0x00000000),
+ (0xea520b4c, 0x7f800000, 0x80000000), /* atan2(finite < 0, +Inf) = -0 */
+ (0x95a33ab8, 0x7f800000, 0x80000000),
+ (0xc5a33ab8, 0x7f800000, 0x80000000),
+ (0x1aae129e, 0xde263fa8, 0x40490fdb), /* misc */
+ (0xb76e98b6, 0xdbeb6637, 0xc0490fdb),
+ (0x7112fd5b, 0x7509b252, 0x3b88a34d),
+ (0xe53215fe, 0xcd0f08fc, 0xbfc90fdb),
+ (0xcd47c963, 0x85268f36, 0xbfc90fdb),
+ (0xfacd1adc, 0x79fd5d79, 0xbfa2b8c8),
+ (0xfa3f79f2, 0xf5f06269, 0xbfc96033),
+ (0xddc7b749, 0x5f3d9db0, 0xbe060c09),
+ (0x63c8ee47, 0x792ac38f, 0x2a169cbe),
+ (0xe3c24a4f, 0xe0f9b02f, 0xbfcba1c1),
+ (0xe1f9385d, 0xe317764d, 0xc03c145d)
+ ][:]
+
+ for (y, x, z_exp) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yf : flt32 = std.flt32frombits(y)
+ var zf_act : flt32 = math.atan2(yf, xf)
+ var z_act : uint32 = std.flt32bits(zf_act)
+
+ testr.check(c, same32(z_act, z_exp),
+ "atan(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) = 0x{b=16,w=8,p=0}, should be 0x{b=16,w=8,p=0}",
+ y, x, z_act, z_exp)
+ ;;
+}
+
+const atan04 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x0000000000000000, 0x8000000000000000, 0x400921fb54442d18), /* atan2(+0, -0) = +Pi */
+ (0x8000000000000000, 0x8000000000000000, 0xc00921fb54442d18), /* atan2(-0, -0) = -Pi */
+ (0x0000000000000000, 0x0000000000000000, 0x0000000000000000), /* atan2(+0, +0) = +0 */
+ (0x8000000000000000, 0x0000000000000000, 0x8000000000000000), /* atan2(-0, +0) = -0 */
+ (0x0000000000000000, 0xc678962c36789c22, 0x400921fb54442d18), /* atan2(+0, x < 0) = +Pi */
+ (0x0000000000000000, 0x8000000200000000, 0x400921fb54442d18),
+ (0x0000000000000000, 0xdddddddddddddddd, 0x400921fb54442d18),
+ (0x8000000000000000, 0xc678962c36789c22, 0xc00921fb54442d18), /* atan2(-0, x < 0) = -Pi */
+ (0x8000000000000000, 0x8000000200000000, 0xc00921fb54442d18),
+ (0x8000000000000000, 0xdddddddddddddddd, 0xc00921fb54442d18),
+ (0x0000000000000000, 0x36789ac6987e678a, 0x0000000000000000), /* atan2(+0, x > 0) = +0 */
+ (0x0000000000000000, 0x0000000000000002, 0x0000000000000000),
+ (0x0000000000000000, 0x4ddddddd00000000, 0x0000000000000000),
+ (0x8000000000000000, 0x36789ac6987e678a, 0x8000000000000000), /* atan2(-0, x > 0) = -0 */
+ (0x8000000000000000, 0x0000000000000002, 0x8000000000000000),
+ (0x8000000000000000, 0x4ddddddddddddddd, 0x8000000000000000),
+ (0xdddddddddddddddd, 0x0000000000000000, 0xbff921fb54442d18), /* atan2(y < 0, 0) = -Pi/2 */
+ (0xc678962c36789c22, 0x0000000000000000, 0xbff921fb54442d18),
+ (0x8000000000000002, 0x0000000000000000, 0xbff921fb54442d18),
+ (0x4ddddddddddddddd, 0x0000000000000000, 0x3ff921fb54442d18), /* atan2(y > 0, 0) = +Pi/2 */
+ (0x36789ac6987e678a, 0x0000000000000000, 0x3ff921fb54442d18),
+ (0x0000000000000002, 0x0000000000000000, 0x3ff921fb54442d18),
+ (0x7ff0000000000000, 0xfff0000000000000, 0x4002d97c7f3321d2), /* atan2(+Inf, -Inf) = +3*Pi/4 */
+ (0xfff0000000000000, 0xfff0000000000000, 0xc002d97c7f3321d2), /* atan2(-Inf, -Inf) = -3*Pi/4 */
+ (0x7ff0000000000000, 0x7ff0000000000000, 0x3fe921fb54442d18), /* atan2(+Inf, +Inf) = +Pi/4 */
+ (0xfff0000000000000, 0x7ff0000000000000, 0xbfe921fb54442d18), /* atan2(-Inf, +Inf) = -Pi/4 */
+ (0x7ff0000000000000, 0x4ddddddddddddddd, 0x3ff921fb54442d18), /* atan2(+Inf, finite) = +Pi/2 */
+ (0x7ff0000000000000, 0x0000000000000001, 0x3ff921fb54442d18),
+ (0x7ff0000000000000, 0x8000000000000004, 0x3ff921fb54442d18),
+ (0x7ff0000000000000, 0xf6789ac6987e678a, 0x3ff921fb54442d18),
+ (0xfff0000000000000, 0x4ddddddddddddddd, 0xbff921fb54442d18), /* atan2(-Inf, finite) = -Pi/2 */
+ (0xfff0000000000000, 0x0000000000000001, 0xbff921fb54442d18),
+ (0xfff0000000000000, 0x8000000000000004, 0xbff921fb54442d18),
+ (0xfff0000000000000, 0xf6789ac6987e678a, 0xbff921fb54442d18),
+ (0x6a53a787aa520b4c, 0xfff0000000000000, 0x400921fb54442d18), /* atan2(finite > 0, -Inf) = +Pi */
+ (0x3553a787aaa33ab8, 0xfff0000000000000, 0x400921fb54442d18),
+ (0x5553a787aaa33ab8, 0xfff0000000000000, 0x400921fb54442d18),
+ (0xea53a787aa520b4c, 0xfff0000000000000, 0xc00921fb54442d18), /* atan2(finite < 0, -Inf) = -Pi */
+ (0x9553a787aaa33ab8, 0xfff0000000000000, 0xc00921fb54442d18),
+ (0xc553a787aaa33ab8, 0xfff0000000000000, 0xc00921fb54442d18),
+ (0x6a53a787aa520b4c, 0x7ff0000000000000, 0x0000000000000000), /* atan2(finite > 0, +Inf) = +0 */
+ (0x3553a787aaa33ab8, 0x7ff0000000000000, 0x0000000000000000),
+ (0x5553a787aaa33ab8, 0x7ff0000000000000, 0x0000000000000000),
+ (0xea53a787aa520b4c, 0x7ff0000000000000, 0x8000000000000000), /* atan2(finite < 0, +Inf) = -0 */
+ (0x9553a787aaa33ab8, 0x7ff0000000000000, 0x8000000000000000),
+ (0xc553a787aaa33ab8, 0x7ff0000000000000, 0x8000000000000000),
+ (0xc3a19e952471b77e, 0x3787d6fbb86b73be, 0xbff921fb54442d18), /* misc */
+ (0x27aa78e1d1f0495c, 0xddd13f5bd464f1a5, 0x400921fb54442d18),
+ (0xb8402fae1dfc652c, 0xba6fcc2cc1ed7e10, 0xc00921fb54432877),
+ (0x66dce7f72e6044ad, 0x275302c43652a638, 0x3ff921fb54442d18),
+ (0x4e4b96b837a90109, 0x0f6e0a35d7a1c7ae, 0x3ff921fb54442d18),
+ (0x39e3f59d2cdaf453, 0xc1c9eec772632c8b, 0x400921fb54442d18),
+ (0x0e80d207f7fc1dee, 0xdc3ccf9de59c1620, 0x400921fb54442d18),
+ (0xc53a12760a738704, 0x013d7cd6b38fd663, 0xbff921fb54442d18),
+ (0x24c9826368e2bab6, 0x673ee065b77f3411, 0x000000000000069c),
+ (0x8648f8f528372eef, 0x70bdc29525e17f3d, 0x8000000000000000),
+ (0x949d1ddd9d2c70d9, 0xed7a82c886f9b2b5, 0xc00921fb54442d18),
+ (0x953f916c39b9967c, 0x0e43ed486cdf330c, 0xbff921fb54442d18),
+ (0x9e3633326b5d0ef3, 0xf5c8913f8e308a65, 0xc00921fb54442d18),
+ (0xaf1c2b5cfb3e9c97, 0xae2582384f4c4364, 0xbff92213c33fcf98),
+ (0x13a48905d9e021e7, 0x764d46c054f3e5f8, 000000000000000000),
+ (0x1c8142b5bce1d2a8, 0xe7f6c7dc0f16862c, 0x400921fb54442d18),
+ (0xfb0f7a74951ff0d7, 0x0bb323507b3cbacf, 0xbff921fb54442d18),
+ (0x4eac767a5766fcd1, 0xf8b2a6b240350c57, 0x400921fb54442d18),
+ (0x2fb71a162e3ea604, 0x60d683328bfc7d57, 0x0ed06b3d4cc87ec9),
+ (0xc5e748de4fffadde, 0x10ba5f16340a0bc9, 0xbff921fb54442d18),
+ (0xd98ac92afca0e53d, 0xec7a77dca00c3b26, 0xc00921fb54442d18),
+ (0x9243219f73e1f319, 0xa4d1ffde70716ab8, 0xc00921fb54442d18),
+ (0x48bcc07018965976, 0x1f864ca16540f910, 0x3ff921fb54442d18),
+ (0x15cb61e0de4d0245, 0xd9bd7e5a3d6bc814, 0x400921fb54442d18),
+ (0xc41d55b172ee68ee, 0x558fe35fd3a69e4b, 0xae7d7006d1d45ebc),
+ (0xa3cfec3df7c1c356, 0x38de5d01bc664ea9, 0xaae0d260b47f817c),
+ (0xbee29ab450ee989f, 0x190e8e5960b38edb, 0xbff921fb54442d18),
+ (0x03bd0fd0a2cdf14e, 0xb24e849d7122766b, 0x400921fb54442d18),
+ (0x9f3dc5e92e95d81b, 0x22ebeafb26c4e1cd, 0xbc41102fc5241049),
+ (0x85af77b27a6c9ebd, 0x060bdb1f2d9809c8, 0xbf921294a138c466),
+ (0xeaefd066fb169d3e, 0x55036c3913a92f51, 0xbff921fb54442d18),
+ (0xbbd17263c0f5c1dc, 0x4d82ee8ecba53872, 0xae3d7d699b3da6ce),
+ (0xbef4c6bbd5b013f7, 0x32f4e9594348833e, 0xbff921fb54442d18),
+ (0xcf7ce4fad9366c04, 0x2919d12d8e3ce4ea, 0xbff921fb54442d18),
+ (0x3ff0de4d0245d9bd, 0x3ff07e5a3d6bc814, 0x3fe97e0262debf5b),
+ (0x3ff0060bdb1f2d98, 0x3ff009c8eaefd066, 0x3fe91e401d092a68),
+ (0x3ff04348833edc5c, 0x3ff0a2769af7825f, 0x3fe8c566f4d252eb),
+ (0x3ff0d9366c042919, 0x3ff0d12d8e3ce4ea, 0x3fe9299e6eb7eb7f),
+ ][:]
+
+ for (y, x, z_exp) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yf : flt64 = std.flt64frombits(y)
+ var zf_act : flt64 = math.atan2(yf, xf)
+ var z_act : uint64 = std.flt64bits(zf_act)
+
+ testr.check(c, same64(z_act, z_exp),
+ "atan(0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}) = 0x{b=16,w=16,p=0}, should be 0x{b=16,w=16,p=0}",
+ y, x, z_act, z_exp)
+ ;;
+}
+
+const atan05 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0xbee29ab450ee989f, 0xbee29ab450ec8003, 0xbee29ab450ec8002),
+ (0xbef4c6bbd5b013f7, 0xbef4c6bbd5a46691, 0xbef4c6bbd5a46692),
+ ][:]
+
+ for (x, y_perfect, y_acceptable) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yf : flt64 = math.atan(xf)
+ var yu : uint64 = std.flt64bits(yf)
+
+ if !same64(yu, y_perfect) && !same64(yu, y_acceptable)
+ testr.fail(c,
+ "atan(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, wil also accept 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, y_perfect, y_acceptable, yu)
+ ;;
+ ;;
+}
+
+const atan06 = {c
+ var inputs : (uint64, uint64, uint64, uint64)[:] = [
+ (0x7ee54b8760669171, 0xb2be78b5d28bd697, 0x3ff921fb54442d18, 0x3ff921fb54442d19),
+ (0x6ac1e65e358fa249, 0xbd59edfa1607e504, 0x3ff921fb54442d18, 0x3ff921fb54442d19),
+ (0xd525a7f2ced4927f, 0x95ede4db7783fcc9, 0xbff921fb54442d18, 0xbff921fb54442d19),
+ (0xccd3a5e34176f426, 0xa8a777c89a5ef6f2, 0xbff921fb54442d18, 0xbff921fb54442d19),
+ (0x3ff075cd7563af1d, 0x3ff013f2ed91a27e, 0x3fe9823675699a92, 0x3fe9823675699a93),
+ (0x3ff0f7c1c35638de, 0x3ff05d01bc664ea9, 0x3fe9b68915c8bb31, 0x3fe9b68915c8bb32),
+ (0x3ff0bef4c6bbd5b0, 0x3ff013f732f4e959, 0x3fe9c8a841c1c952, 0x3fe9c8a841c1c953),
+ (0x3ff0888f53f3ef5a, 0x3ff064ebf6698a51, 0x3fe9449d9c8c9bc0, 0x3fe9449d9c8c9bc1),
+ ][:]
+
+ for (y, x, z_perfect, z_acceptable) : inputs
+ var yf : flt64 = std.flt64frombits(y)
+ var xf : flt64 = std.flt64frombits(x)
+ var zf : flt64 = math.atan2(yf, xf)
+ var zu : uint64 = std.flt64bits(zf)
+
+ if !same64(zu, z_perfect) && !same64(zu, z_acceptable)
+ testr.fail(c,
+ "atan2(0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, wil also accept 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ y, x, z_perfect, z_acceptable, zu)
+ ;;
+ ;;
+}
+
+const atan07 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x18426dc1da370147, 0x988212e3418dac39, 0x40089fa4e7718127), /* start at j = 16 */
+ (0x75f53f43ddf1bea3, 0xf5b664a94607233a, 0x3ffa2f66b9da6113),
+ (0xa70f3b26830c1a94, 0xa74c60b2a9f15203, 0xc0089555472fa904),
+ (0x3937099c00388aaa, 0x38faf7f608176196, 0x3ff7f6d4e7911d74),
+ (0xea2807836bcaf607, 0x69edc29120f8d3db, 0xbff7e58f64ee105c),
+ (0x8b4d90a9fe6bae02, 0x0b869a0bac14cf06, 0xbfb4e20b353d1cd2),
+ (0xb4b65db1cbab83cf, 0xb4efd7158af2665a, 0xc0086e9d9bb6e77d),
+ (0x4716d543d404ae35, 0xc7502d88541a937a, 0x40086dcab757f45c),
+ (0xd821ab3bf346da52, 0xd857a58e9faf7b83, 0xc00863404ebd1155),
+ (0x222a54c254087afe, 0xa260db8f64f01324, 0x40085aae7d9e5ede),
+ (0xbf77a88b96eed0c5, 0x3fad49845988038f, 0xbfb9c332e78b9982),
+ (0x4230048eaf9862bc, 0x41fb16739ee88b27, 0x3ff772a95cc1b5d0),
+ (0x1100f13181258d69, 0x10cd58b7c1ae82ff, 0x3ff76844d0627ee0),
+ (0x7b5c5a93ccb53cd6, 0xfb2a02a3c95263b7, 0x3ffaf59f0e4eb2a5),
+ (0xb247b96c86d56ece, 0x3279bb34f20c78d6, 0xbfbd5fded650f614),
+ (0x115f263dc39ac9a5, 0x112df4339ae6bb12, 0x3ff737fb844962b8),
+ (0x0868e008bbd86562, 0x0838b0561328de61, 0x3ff728666e0adb09),
+ (0xd4bb121d74efe2da, 0xd4ea966130621e4c, 0xc0081eb8c1a4153a),
+ (0x88e5d8329155d426, 0x88b7242e0fc48972, 0xbffb3d3d48336602),
+ (0x0d0ea0219a2c652d, 0x0d3c060e7f8db79d, 0x3fc160c1988f88aa),
+ (0xc643f7c8910fb2aa, 0x46163c97e80a13ef, 0xbff6eb72b043c981),
+ (0xf6e835bc9adeaf59, 0x76bc0ffdf6576c1d, 0xbff6d49ba0ac648b),
+ (0x8c6daee970ffa651, 0x0c9945452e078530, 0xbfc2a918953f600a),
+ (0xda75a3bf208dd25a, 0xdaa1ce8481388f38, 0xc007ed3cd8698716),
+ (0x256084badab69b6f, 0xa58a3e76a1a9ac0f, 0x4007e25710289717),
+ (0x192bbb1d943d1c72, 0x99558053a5ca0554, 0x4007daa06a0fe77a),
+ (0x5ac36b61305c29f0, 0x5a9937845b781436, 0x3ff68ee124470819),
+ (0x5c68b6c9ddb3ba95, 0xdc92798f09125a3b, 0x4007ceaaaa488a83),
+ (0x6391aa182e1d70d9, 0xe3b9d8cda07245bf, 0x4007c76a07bf2989),
+ (0xbc68b545ae310bd6, 0x3c41338754f714d5, 0xbff66029d8a55e48),
+ (0x7d5059dea1719a3a, 0x7d2786542fd88ffd, 0x3ff64924d073ac1c),
+ (0x53aab9beef29438b, 0x5383baac71cff20e, 0x3ff63676298edf36),
+ (0x30822d2315e384fa, 0xb0a8494f9e7c25f1, 0x4007a72c29d2ead8),
+ (0x5d88506b28721aeb, 0x5daff0d1a8860f63, 0x3fc8124991d9ed52),
+ (0x848effeaebeb397a, 0x04b3e7d641cd15d5, 0xbfc89c0bddbcf109),
+ (0xab474e4dc88b6320, 0xab6d3e4cb75879b9, 0xc0078f3604696ffb),
+ (0x5afc797a50fff365, 0xdb219c67f404cafd, 0x40078990f8464cb6),
+ (0x4e6bed9552b442f7, 0xce90cbb1e8f96d77, 0x40077e47244135f4),
+ (0xd23f67b8daeaa1cd, 0x521a57dcda8bd0b1, 0xbff5d34d1830378e),
+ (0x200f3502581f0c00, 0x1feadb65c0d29de5, 0x3ff5bdf3d5eeee34),
+ (0x2819e6a5a099f07c, 0xa7f6987e601acbee, 0x3ffc918895432ab5),
+ (0x6a06f6146c77eb36, 0xea29db6e2e2b221b, 0x400762945f726fc0),
+ (0x3df11cfec787e47d, 0x3dcf1da836e547fb, 0x3ff58e991eb48d58),
+ (0xd98eb9a768d3b13c, 0x596c4b654760f131, 0xbff58324124d2f0c),
+ (0x83ea73b5e71eb08d, 0x03c8cb5bb22e1b8c, 0xbff57329b63cf62d),
+ (0x5bf364f87b9850a1, 0x5c14713fe742a7b1, 0x3fcdcf07dd413790),
+ (0x4329fd6f8a5c9c09, 0xc34afddcf85aa8f3, 0x40073e2fdb6f0d04),
+ (0xc459a529603faebe, 0xc47a0903b2bab70a, 0xc007337dbdbc972c),
+ (0xf155d5bc8bbbc622, 0xf175fa349aeb35d6, 0xc0072f63e3e01cd1),
+ (0xc2ff7df0c28d2d98, 0x431f3588db6a0ce4, 0xbfcfa14c1091d92a),
+ (0xcb98fb0c1faefad6, 0x4b79ba56a9a81681, 0xbff519c591857427),
+ (0xc1f6eaccd468f9dc, 0xc215ed95c0b8dd82, 0xc007169655bd9847),
+ (0xcfaf375aaa174fd7, 0x4f90a24cd877f957, 0xbff4f773f34d6a52),
+ (0x5d404fa9511420bd, 0xdd5e26d9482f9752, 0x400704f98a04df78),
+ (0xc83d634df8159064, 0x481fecb010d984d8, 0xbff4e3cac9a2565f),
+ (0xf2acd33690651c1d, 0xf2c9dcbcd74009c7, 0xc006f571800acf60),
+ (0xa6d1c7979c5718c5, 0x26efa12e7273ea28, 0xbfd189143428723d),
+ (0xf0aca04752feb2b3, 0xf0903cbb018d78c1, 0xbffd8deb869dad07),
+ (0x13e6221db9bd1161, 0x940339a43410d0fc, 0x4006e40bcc4251a9),
+ (0xf1449d321262f59a, 0xf1282a47a2951a26, 0xbffdb1af654de151),
+ (0x8da10731ac7ff6c4, 0x8dbc847feeba9f63, 0xc006cfcb9724a57d),
+ (0x993a3fc8bc79deb7, 0x991f96b86734466b, 0xbffdcf002904b697),
+ (0x850ae8ca929d38cf, 0x85260ddf20107de1, 0xc006c3a0c653534f),
+ (0x550d39c0ed02033b, 0x5527bb5ae216a837, 0x3fd31d4ff5141bb4),
+ (0x69f04200fd15c44d, 0x6a09f5bbe0965894, 0x3fd36beb7ba30050),
+ (0xea5103cbcd2dc0f8, 0xea6addccedc4cad9, 0xc006aded9c63e371),
+ (0x66354277caa622f2, 0x661b2ecf586161fd, 0x3ff42eba1f1329a2),
+ (0x827944bfe027fc91, 0x029395925de79e23, 0xbfd3f8386b70acdc),
+ (0xc8578c33abd012b6, 0xc871f1ed754c80bb, 0xc00698c9033d2443),
+ (0x36c47150cf44174b, 0x36ded6df6fba558b, 0x3fd47b977c18f044),
+ (0x0392076753c5ad4d, 0x83aab570a430e601, 0x400687593a62215e),
+ (0xab409115af3336d0, 0x2b5877b08ee26b9a, 0xbfd4e44b158c63c8),
+ (0x025b46e2bba8aac6, 0x8273c70ac00f43f5, 0x400679fae4615e8f),
+ (0xcb83e4d6f4a0f930, 0x4b6bbbc7e601ddbc, 0xbff3c45f6ead2fc0),
+ (0x894070ba821a311d, 0x89576e66d85dfa8c, 0xc0066ef9792f16ef),
+ (0xbe34ef324e717dd4, 0x3e1de4d09ab52519, 0xbff3a57e9fb9e4d6),
+ (0xe18d9f521cb0e0bb, 0x61a49ca578fb4cd3, 0xbfd6133cf03299ed),
+ (0x980a8c64968d03c9, 0x98225772e9299381, 0xc0065ae999121db5),
+ (0x0a243a4c4e15733c, 0x8a0dbbfcdb7f1ba0, 0x3ffec47d07799e8b),
+ (0x30ebfe62d6baac2f, 0xb0d4c33a3d8aecac, 0x3ffed090244929ea),
+ (0x1ad0d3f4d1963ae1, 0x9ab93fd035e118a7, 0x3ffedfe5dfb819a7),
+ (0xf4c7b73c55323728, 0x74df33d218f0a341, 0xbfd73e3dd0e9d6b4),
+ (0x7fc7730f17b94425, 0x7fb1e36aa5f19e93, 0x3ff34d70518b2f0a),
+ (0xa45ae4c0b2526558, 0xa471685078380a57, 0xc0062f1ece927028),
+ (0x49031632c2e74c77, 0xc91858d8e8b972cf, 0x400624eb07ef7e19),
+ (0xc9fa2a90bbdf54fd, 0x49e49a101827560d, 0xbff321d2ed9c9bf2),
+ (0xcd3430e4ecbc9273, 0xcd49508246135777, 0xc00618d195a84255),
+ (0x593e0312105fe577, 0x592821625d7288f0, 0x3ff3045358e522e4),
+ (0x3c9d87375ce18bb9, 0x3c8804bcc678f476, 0x3ff2f3ceda086c71),
+ (0xd2364efc21d22d8d, 0xd24b2fbffa709218, 0xc006049bacf46bfd),
+ (0x455a71c01bcb719c, 0x456ff860d9a0d4a2, 0x3fd91915e32aa709),
+ (0xae00c21ee6a731f5, 0xae13f99538b94618, 0xc005f48a43984d8d),
+ (0x9ac24bc4af5dd567, 0x9aaef59c81be5029, 0xbfff8939b93cd223),
+ (0xcc2151b4908db683, 0x4c0d79efa1932ddd, 0xbff2b23af38d32a8),
+ (0x91f0443849a9df00, 0x11dbfefd41ae3d5d, 0xbff2a1a9f24970cc),
+ (0xa5e237687877a0fb, 0x25f506dcba88a63b, 0xbfda2951404b2019),
+ (0xdf7b27a4a6b4ee2e, 0xdf8f110382326777, 0xc005d625ecf371a1),
+ (0xae86d7602355270b, 0xae7432cdebdccba1, 0xbfffcb2a540c5fed),
+ (0x287d154fbdf9fc0f, 0xa869f56a3ef77a7e, 0x3fffd94f19c79d25),
+ (0x0560e0d82db59047, 0x8572d5c422c9c0cf, 0x4005c34a330f2ea9),
+ (0xf2d7467700a5f6f6, 0xf2e99ed266fb898d, 0xc005b8c77efcdd76),
+ (0x7b7aab211d5ae862, 0x7b685e3df97eab47, 0x3ff246a054d4ca30),
+ (0xbce01a15fc14603f, 0x3ccdbbf7cd56a322, 0xbff2366ffd36a688),
+ (0x3aba009f287274a8, 0x3aa835a8c9b86f42, 0x3ff229654835b3f6),
+ (0x74f69aa82ae6ae36, 0xf50804d5d9b96f4d, 0x40059d3ff942caf1),
+ (0xf00bd0188ab5f549, 0xeffa4a0666680629, 0xc000192c055d2cea),
+ (0xf966c40358ee231f, 0xf955a9e47ee17b2a, 0xc0001e80c754ecbf),
+ (0xef0eb2afdf5e914e, 0x6efd83223ec05254, 0xbff1f6a77f563010),
+ (0xeb402915a3acc0e7, 0xeb2f48bba99a1e8a, 0xc0002c17cb7296fe),
+ (0xe7119821252f5168, 0xe7220ebe8d4ade9a, 0xc005818f17dd93fe),
+ (0x2fe7b730d0019f73, 0xaff8153060f5e0eb, 0x400578f5a3c8b63d),
+ (0x421abce463ace518, 0xc22af76910fd4298, 0x400573632910fd19),
+ (0x1b8c7b64e857b17f, 0x9b7c720983ec6dcb, 0x4000457d58d0072a),
+ (0x4ed09b917d422bba, 0x4ec0bde86f0d6e2a, 0x3ff1a9ab56acdc13),
+ (0x1ac4929922665d01, 0x1ad4414eed995920, 0x3fde12d20b86172f),
+ (0xd3c9c0a93edc039b, 0x53d926a1de6138a9, 0xbfde486e28ca8ef5),
+ (0x998d665d7a57087a, 0x197e51760e709cfe, 0xbff184061e1f4dc9),
+ (0x845b25117034a928, 0x046a2b75ea771ab1, 0xbfde9ec075594604),
+ (0x4103349aefa22309, 0x40f41f2648539bbf, 0x3ff169784d6d111c),
+ (0xbb6eacc18d908188, 0x3b7d1de075b5f774, 0xbfdf0756f298d7b0),
+ (0x46d62b3c7dc84ee7, 0xc6c78930cb465f26, 0x4000786e39a79f00),
+ (0xd4403bcec8aee8ab, 0x544e56b935d9fb5b, 0xbfdf71b509d061d7),
+ (0x493307757157381f, 0xc9248bc78a541d30, 0x400086cbed68f834),
+ (0x3491c69fff9a59bb, 0xb48346e8b2cfe7dd, 0x40008a8bb3c6676f),
+ (0x04ec3f1f3d711c03, 0x84ded70b376da1ed, 0x4000905e0bb52c89),
+ (0xfcdb52d3d18e3a40, 0xfce8bd40675bf7a6, 0xc00518a7beaf01e8),
+ (0xe9065952a9652233, 0xe9142bf74757fcf9, 0xc00515eef025e8d7),
+ (0x937fb429117409d8, 0x938c5e4f3ea26f39, 0xc0050e650830c708),
+ (0xab95c177fc58320c, 0xaba3633be704d70e, 0xc0050ad48b6fc0ee),
+ (0xc1fd1980fb96cdbb, 0x41f081d2b0e3346c, 0xbff0e072570bab0c),
+ (0xfb3e1409a3aac153, 0x7b31299de4625efe, 0xbff0d620e73490c7),
+ (0x362445d91f2158c6, 0xb6174ee311b98730, 0x4000bd7e82f648aa),
+ (0xa973576024c5cffd, 0xa980ba86c0a0af2b, 0xc004f083ccd3f3e2),
+ (0x6e5366b29436f556, 0x6e60ad029064c091, 0x3fe0dc16f81f34f9),
+ (0x594f6a41578e84b4, 0x595ae034c59be57d, 0x3fe0ecccc9db1d49),
+ (0x578d5e6f4974d6fd, 0x5798df248e1d8929, 0x3fe111196f271e7a),
+ (0x537cb0e054b6923e, 0x5371029b86d81056, 0x3ff091f1a9dfd0c6),
+ (0xe1e6318db224d875, 0x61da859400b913e9, 0xbff083edca6c0ecd),
+ (0xdccc45f468c3e71c, 0xdcd77e2ee145872a, 0xc004cc956cd5a28e),
+ (0x67c3a7faad55a8ee, 0xe7d03d98b1a75491, 0x4004c7752b4fdcd9),
+ (0x2891970a3578028e, 0xa89ce872cfb4636f, 0x4004c27a3b67cdcf),
+ (0x8197efb3f923b946, 0x81a3825edc1edafc, 0xc004bb0d616fd574),
+ (0x0116a707885bea19, 0x01225827e6c86b5d, 0x3fe1b34228cc2534),
+ (0xc8151285ea7320cf, 0xc820fc98709a4ee5, 0xc004b0f7200e8a99),
+ (0xd73da3db3f61f8f1, 0x5747b5df8a698ef3, 0xbfe1e04ba329ad62),
+ (0x9265b496538d7585, 0x125b44a6f29fa8b0, 0xbff028b7a225c296),
+ (0xbd9516a13ead26dd, 0xbd8aaaaabfd125d9, 0xc00113a87fe993a5),
+ (0xedc420889150a1ef, 0x6dcf9a6066e34016, 0xbfe2259bce59e2ef),
+ (0x997d5b3eb41cba8d, 0x1972cccd840f3247, 0xbff004e5bad8a285),
+ (0x5f28fe17a9282e60, 0x5f336ac7a90ee043, 0x3fe24c876b3ecc7e),
+ (0xd3bf8d4067b1be95, 0x53c85bfb3f716245, 0xbfe26404b24839b8),
+ (0x25b895214b2b60e6, 0x25b0062fa5260ec0, 0x3fefc7a4f0f7a7d9),
+ (0xc530319eb1a1e2a5, 0xc538aaaf7c51c545, 0xc0047c3a760d1cd9),
+ (0xeb98221ee7d54a2f, 0xeba24cbee2b4d677, 0xc004781bcc01a10f),
+ (0xe2611e7acd950bd0, 0xe269bd3f709cf616, 0xc0046ffb487c0c68),
+ (0xf257d9cfc126807e, 0x724fe41944255eca, 0xbfef685db0489536),
+ (0x60a2c666176c460d, 0xe0abeccbd99ea353, 0x400465bbec65b35d),
+ (0x4b9e86470c8af801, 0xcba6946dd1f68f93, 0x400460ae560b89ee),
+ (0x6c00e2b8c8111aae, 0xec08d824cdc92e8b, 0x40045b7172d19dd8),
+ (0x9d6d541b54057b72, 0x1d7572d679275923, 0xbfe330baa38d160f),
+ (0x4ee3d469e11c3b1e, 0xceecd622cce943fb, 0x40045045ab0feb07),
+ (0x8df338c143e7fe4a, 0x8dfbcefefbf7b3b7, 0xc0044b5717874d94),
+ (0x2b3ce380a38fd495, 0xab44c4c877e2d851, 0x4004456e8c85e86c),
+ (0x35b6254148b0cc48, 0x35aef51eba25a22d, 0x3feebea390775bfc),
+ (0x0646804d41598e1c, 0x863f963de5cfd390, 0x4001765bc9bab45f),
+ (0xf5adf458b64d8736, 0x75b52ec33d04e25f, 0xbfe3b1b56ae7344b),
+ (0x7d60b41f76311796, 0xfd67815e15d963fd, 0x400430ad3c195d17),
+ (0x3611a21dc610b785, 0xb618aa3b1b3b0f86, 0x40042adcc77995cf),
+ (0x7021254ec8bc72f6, 0x7027e00c63e273e9, 0x3fe3edf444b73214),
+ (0x7e63b7e103cd0489, 0xfe6b3f53fceb3bfa, 0x40041f0862b0af2d),
+ (0x9d9ebb9f04cdf5d7, 0x1da52bc78ab56424, 0xbfe417560828f6d8),
+ (0xda9cfabc3c20417a, 0xda95246166d15ad5, 0xc0019bd2040f1c7e),
+ (0xf66766aa0d38e359, 0xf66fcec939e5a5bb, 0xc0040ef609989e8e),
+ (0xd4045e5d16392197, 0xd40b9412c98c303b, 0xc0040b27876f70db),
+ (0xc9b37a2a0a83c474, 0xc9ba326a8ba44628, 0xc00404a61a0681d2),
+ (0x21022b885f2bcb4d, 0xa10852810f14a7d9, 0x4003fff572516d6e),
+ (0xf5cac65697217a36, 0xf5d1df272cc38ccc, 0xc003fd4b443c03a2),
+ (0xb055c66f4fc8c399, 0xb05ced1fe2377c16, 0xc003f8739e21830a),
+ (0x71ad384d868e6b06, 0xf1a621cf2fe7d1dc, 0x4001c095f2a99696),
+ (0x4d17c1de6ac68d6b, 0xcd1213396573b855, 0x4001c4fe437c1196),
+ (0x9b0d3fb9cbf1b4c6, 0x1b0664d3b4eeb0d2, 0xbfed5b11c719fe27),
+ (0x0fb3fe727a21e5ac, 0x8fba01df1f586bf8, 0x4003e3b20f8de5db),
+ (0x3f21c2ca49b88075, 0xbf26f05cd43cab02, 0x4003dca664c31bed),
+ (0x6cc6f1d079f04b98, 0xeccd8e177ab76b6e, 0x4003da024b7a6ca8),
+ (0xb106c2d0afd0eaad, 0x310d28333e770e2d, 0xbfe535d434ff2257),
+ (0xeb08335b08e8334f, 0xeb0ecd205b92dea2, 0xc003ce17cbb48a9e),
+ (0x4250b2cc567b0093, 0x42552deaa34b7e5b, 0x3fe55d5a9c073f8a),
+ (0x1e91017ab850072a, 0x9e8af50a0be3b825, 0x4001ed927090ff6e),
+ (0x958b250c719e9fd7, 0x9585a086f26895d7, 0xc001f2c5bba0f280),
+ (0x7f42ee1989c19aa5, 0xff47a9a405acd3d9, 0x4003bc1d6d17b2f4),
+ (0xe0c9bf3add3dc69f, 0xe0c4b9d987749d38, 0xc001fd126d2f17ff),
+ (0x77fd24e199b5c560, 0x7802080019b6ad88, 0x3fe5c00c41b089be),
+ (0x1fb8dc0fd3ff8fca, 0x9fbea2df8431838b, 0x4003ade63a714362),
+ (0x83edb83bbda9ff38, 0x83e841c15223cf20, 0xc0020aeae788d82b),
+ (0x2f40b627343797bf, 0xaf3b72af6d17d8f0, 0x40021118e92733b6),
+ (0x22ad7d192e8784cb, 0x22a84d2fe4f68aa1, 0x3fec3586c884069b),
+ (0x4326ee8596b374ae, 0xc32bb17d8682f8f9, 0x4003998c58705302),
+ (0xaee48c63ca918372, 0xaee115755ee72125, 0xc0021d7816852a75),
+ (0x9c0bcb204ceabc61, 0x1c109f3e0a1b79d4, 0xbfe648603bcc455f),
+ (0x479a3a8c8f6446ff, 0x479f43821aefc385, 0x3fe65661082eeea4),
+ (0xd2b8fa499fed0f9f, 0xd2b50cd6798137ed, 0xc0022b274cd99e10),
+ (0x6ad25250a41576bb, 0xead59e36376d5d94, 0x400382238981cc06),
+ (0xb9219a8a8f7efe91, 0x3924ac87f8e6b194, 0xbfe6924cc8683709),
+ (0x37beda024f45aedc, 0x37ba5f9abbc7859b, 0x3feba1be0e2fe373),
+ (0x6fc8d5499dc46f95, 0xefccd7149f2bb89c, 0x40037218f702b002),
+ (0x7ca3becda1d57a3f, 0xfca6e07559fa362b, 0x40036faf147e21fe),
+ (0x9ef8de78ed7949ef, 0x9efca914a41f2409, 0xc0036a4d5a44860a),
+ (0x88bc103761961766, 0x88b86ecd68ae0774, 0xc0024c137e9920a2),
+ (0x5a04350fba24bc39, 0x5a071ac1f8f03b5c, 0x3fe6fecf73bc6287),
+ (0xb0da7bac5589300e, 0xb0de18d46a4b846c, 0xc0035c1e7a96f4bc),
+ (0xda8ca23fbc0a437e, 0xda8947fa661a2f3f, 0xc0025a4c5b426b40),
+ (0x3d6061ffc7256d60, 0x3d627ef1b2a33874, 0x3fe7324a015abfea),
+ (0x85aafe4c0caab70b, 0x85ae5951fb175351, 0xc0035131e1813858),
+ (0x5b472dc1329a819b, 0xdb49f252edb042d8, 0x40034cc10f47cfd3),
+ (0x5af048199a331d49, 0xdaf215c3b1ceecb8, 0x400344db170384a7),
+ (0x6d3983381694e0fd, 0x6d3c4bed9dbefd56, 0x3fe77a83f0976226),
+ (0x820385902a31f7d0, 0x82058f0517dfa1c0, 0xc0033ef3a4e28c4d),
+ (0x6b19ea8931b5c441, 0x6b1c7b28c26fbbcf, 0x3fe79fffc4138dcb),
+ (0x29e61553c19ddd07, 0x29e827f3d30d2850, 0x3fe7b301fac7e326),
+ (0xfac0e54d45520a82, 0x7ac266978d40aba1, 0xbfe7c4e2fc5c5a43),
+ (0xe058426c7875fb1e, 0xe05a4c0ce0f0e3c0, 0xc0032bfbb10cb569),
+ (0x8d49ec500d90d579, 0x8d4805cc07d433de, 0xc0028b98c8b802ca),
+ (0x0b15c458a6db43ce, 0x8b1762a8fd98d47a, 0x400322dcb52b5ef4),
+ (0x486a31a4c0d3a267, 0xc86c0a2479e4bc7e, 0x40031f2867d112f7),
+ (0x48917f21b9fcbe55, 0x489060b8296d7b50, 0x3fea3072865b102d),
+ (0xd9c2672a452fdbd9, 0xd9c1524843cb3b6c, 0xc0029b8045923d1f),
+ (0x9277a69bceb041df, 0x927657a5eb7794f3, 0xc0029f3d4968e8ec),
+ (0x0984c57a7e9d51da, 0x8985e00a293f6a14, 0x40030e7acd40c442),
+ (0xf0cb5b77a2c2f56a, 0xf0ccafe7a265b2f1, 0xc0030a129477250a),
+ (0x02740ba4fb4cdb85, 0x8274ef33fe8f8762, 0x400305e720d195af),
+ (0x9357aa6b6e40e4ce, 0x1356ba9fc9b8c04a, 0xbfe9c75ab7859d93),
+ (0xc39eaa3101026fc5, 0x439fcb8dfd282364, 0xbfe88dbfbb9e3a03),
+ (0xc06ef5bac1f0755a, 0xc06ff8851d5dfcf5, 0xc002fa61c91e15e9),
+ (0x03a8d2f6619e1128, 0x03a988403eb55bcd, 0x3fe8aec91f700e10),
+ (0x93cb2e10bb006567, 0x13cbcd7e9dcf755a, 0xbfe8c53297e66569),
+ (0x90ce665b5f887a3c, 0x10cdcdabe52970fd, 0xbfe97322edfec555),
+ (0x9252ad57a250df33, 0x1252fc2a219dff1c, 0xbfe8df02dad91818),
+ (0xa4f9e11b6cb66d86, 0xa4f9947d89a8a498, 0xc002cd934ed639b9),
+ (0x4eb98661a247fdce, 0xceb9babf2dbaa10b, 0x4002e1a8e0f0f5b7),
+ (0x13758224e135e1c5, 0x937596e7d8816f9c, 0x4002dd5714deeb1e),
+ (0x26ecf80bd6fb389b, 0xa6ecf11130353926, 0x4002d885ae886a69), /* j = 256 */
+ ][:]
+
+ for (y, x, z_exp) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yf : flt64 = std.flt64frombits(y)
+ var zf_act : flt64 = math.atan2(yf, xf)
+ var z_act : uint64 = std.flt64bits(zf_act)
+
+ testr.check(c, same64(z_act, z_exp),
+ "atan(0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}) = 0x{b=16,w=16,p=0}, should be 0x{b=16,w=16,p=0}",
+ y, x, z_act, z_exp)
+ ;;
+}
+
+const atan08 = {c
+ testr.check(c, std.isnan(math.atan(std.flt64nan())), "atan(NaN64) should be NaN")
+ testr.check(c, std.isnan(math.atan(std.flt32nan())), "atan(NaN32) should be NaN")
+
+ testr.check(c, std.isnan(math.atan2(std.flt64nan(), 3.0)), "atan2(NaN64, 3.0) should be NaN")
+ testr.check(c, std.isnan(math.atan2(std.flt32nan(), -2.0)), "atan2(NaN32, -2.0) should be NaN")
+ testr.check(c, std.isnan(math.atan2(6.0, std.flt64nan())), "atan2(6.0, NaN64) should be NaN")
+ testr.check(c, std.isnan(math.atan2(4.0, std.flt32nan())), "atan2(4.0, NaN32) should be NaN")
+}
diff --git a/lib/math/test/log-impl.myr b/lib/math/test/log-impl.myr
new file mode 100644
index 0000000..8a9fdea
--- /dev/null
+++ b/lib/math/test/log-impl.myr
@@ -0,0 +1,742 @@
+use std
+use math
+use testr
+
+/*
+ Note: a major part of the algorithms are the C constants. They
+ are tested extensively in log{,1p}0{1,2}
+ */
+const main = {
+ testr.run([
+ [.name="log-01", .fn = log01],
+ [.name="log-02", .fn = log02],
+ [.name="log-03", .fn = log03],
+ [.name="log-04", .fn = log04],
+ [.name="log1p-01", .fn = log1p01],
+ [.name="log1p-02", .fn = log1p02],
+ [.name="log1p-03", .fn = log1p03],
+ [.name="log1p-04", .fn = log1p04],
+ ][:])
+}
+
+const log01 = {c
+ var inputs : (uint32, uint32)[:] = [
+ (0x00000000, 0xff800000),
+ (0x01000000, 0xc2ad496b),
+ (0x3f060000, 0xbf25b7eb),
+ (0x3f871b00, 0x3d5d49cd),
+ (0x3f710000, 0xbd77518e),
+ (0x4effac00, 0x41abe3e7),
+ (0x477fb7b6, 0x41316d93),
+ (0x41ff8653, 0x405db02b),
+ (0x7f800000, 0x7f800000),
+ (0x3f800000, 0x00000000),
+ (0x000009a4, 0xc2beef7f),
+ (0x00000002, 0xc2cd2bec), /* C[0] */
+ (0x61017aaf, 0x4239cf35), /* C[1] */
+ (0x5702333f, 0x4202613d), /* C[2] */
+ (0x27837177, 0xc204fa63), /* ... */
+ (0x3603beba, 0xc152415d),
+ (0x6c04905b, 0x4276e689),
+ (0x4805d58d, 0x413d3fca),
+ (0x3e0692bf, 0xc001e117),
+ (0x4f08632d, 0x41ac6883),
+ (0x4b88da49, 0x41859e88),
+ (0x0389bcca, 0xc2a6356c),
+ (0x720b6590, 0x428c2fb3),
+ (0x1c0c5e59, 0xc2447c1e),
+ (0x7b0cf7a7, 0x42a5297b),
+ (0x488e1f1c, 0x41494d03),
+ (0x7a8f5038, 0x42a3cf0a),
+ (0x35905a30, 0xc15be22b),
+ (0x49113e8d, 0x4154bd2b),
+ (0x5811f2be, 0x420861b9),
+ (0x73129cb1, 0x428f0f52),
+ (0x6f93b5c4, 0x42855ee6),
+ (0x5894d094, 0x420b3b6c),
+ (0x08967e92, 0xc2982b29),
+ (0x3396e716, 0xc183c476),
+ (0x68981c93, 0x42640ae9),
+ (0x6718afeb, 0x425bbd6e),
+ (0x000009a4, 0xc2beef7f),
+ (0x6c1a8cc2, 0x427783ac),
+ (0x609bff02, 0x4237c835),
+ (0x229d097e, 0xc21ffe0a),
+ (0x031e0ed1, 0xc2a751dc),
+ (0x491e82ab, 0x4156232c),
+ (0x629fabc8, 0x4242f72e),
+ (0x7b2155b9, 0x42a56e93),
+ (0x36a205aa, 0xc143daeb),
+ (0x2322aae2, 0xc21d142f),
+ (0x1fa46ee3, 0xc230719c),
+ (0x3ba4c8ff, 0xc0a95cb2),
+ (0x0e26770d, 0xc288b7b7),
+ (0x1d273361, 0xc23e3d6e),
+ (0x01283d1c, 0xc2acbd76),
+ (0x5d2967a1, 0x4224b42b),
+ (0x2c29e575, 0xc1d5ff25),
+ (0x12ab16d1, 0xc2785f53),
+ (0x39ac732d, 0xc10050a6),
+ (0x41ad00fe, 0x4044ba54),
+ (0x522da14e, 0x41cf9c59),
+ (0x6caf1807, 0x427ac941),
+ (0x6a30133d, 0x426cf210),
+ (0x4b309bc3, 0x41821d44),
+ (0x043197b1, 0xc2a45069),
+ (0x00b3484b, 0xc2adffcd),
+ (0x6d347bde, 0x427dae16),
+ (0x68348026, 0x4261f45a),
+ (0x5c35d8c1, 0x421f712d),
+ (0x0c372300, 0xc28e1269),
+ (0x6737b23e, 0x425c7ac2),
+ (0x5c38b8d6, 0x421f813d),
+ (0x753a038d, 0x429514c2),
+ (0x0abac215, 0xc292310f),
+ (0x333bad9e, 0xc187915f),
+ (0x623cc68a, 0x4240dcdc),
+ (0x2b3e04e8, 0xc1e03107),
+ (0x473ef1b8, 0x412cc12a),
+ (0x3b3f8f2e, 0xc0bab99c),
+ (0x7140973b, 0x428a0f6a),
+ (0x44c1d017, 0x40eb152c),
+ (0x2ec2d4dc, 0xc1b92cda),
+ (0x6bc47734, 0x4275b39e),
+ (0x5cc54778, 0x42228a5e),
+ (0x78463aba, 0x429d86ab),
+ (0x1a46981f, 0xc24e2fee),
+ (0x334802ab, 0xc1870f08),
+ (0x68c97d24, 0x42652ac6),
+ (0x464a3cc7, 0x41177e43),
+ (0x48cb418c, 0x414f067e),
+ (0x71cc037f, 0x428b8fcf),
+ (0x2d4d0ef7, 0xc1c966c5),
+ (0x6cce3dc1, 0x427b70e9),
+ (0x2e4f6e9c, 0xc1be3812),
+ (0x32cf8b91, 0xc18c4edd),
+ (0x0d515df5, 0xc28b0818),
+ (0x1cd1e03f, 0xc2401a70),
+ (0x75535300, 0x42955613),
+ (0x0cd3dd45, 0xc28c64ea),
+ (0x1cd566e5, 0xc2400960),
+ (0x57d628d5, 0x4207249d),
+ (0x75d75209, 0x4296c28e),
+ (0x0fd8044a, 0xc28409a1),
+ (0x0358fcb6, 0xc2a6af9e),
+ (0x4d5a2f36, 0x4199fc7c),
+ (0x3adb5dff, 0xc0cc9173),
+ (0x0bdc4079, 0xc28f16d1),
+ (0x12dcb6e2, 0xc2775a87),
+ (0x175e379b, 0xc25e5f89),
+ (0x38df6a31, 0xc1125a5c),
+ (0x3d601f07, 0xc039f502),
+ (0x4ae113c3, 0x417d04b7),
+ (0x77e22cfa, 0x429c674e),
+ (0x6fe31066, 0x42863b0d),
+ (0x486401b0, 0x4145c607),
+ (0x0f653fb1, 0xc2854e14),
+ (0x39659200, 0xc106d3e7),
+ (0x51e6f204, 0x41cc58fc),
+ (0x13e7d980, 0xc2719c90),
+ (0x5f691614, 0x42311213),
+ (0x68ea445b, 0x4265c51f),
+ (0x1c6b20bf, 0xc2426be1),
+ (0x7b6b9715, 0x42a6306d),
+ (0x606d0172, 0x4236aeb7),
+ (0x3a6dea49, 0xc0e026ca),
+ (0x0beecc4f, 0xc28eed6d),
+ (0x056fdd5a, 0xc2a0f0bb),
+ (0x4df0dd4c, 0x41a05296),
+ (0x3c71cfbc, 0xc086e8ac),
+ (0x5e736a8a, 0x422bb2ea),
+ (0x47f3e7ff, 0x413bc301),
+ (0x17f57b61, 0xc25b33cc),
+ (0x3675ceb6, 0xc14846c5),
+ (0x75f6c0f1, 0x42970853),
+ (0x1077f9f5, 0xc2826017),
+ (0x7278e907, 0x428d588a),
+ (0x62f99bda, 0x4244c0af),
+ (0x407af5c4, 0x3faee68b),
+ (0x067c2ef4, 0xc29e114e),
+ (0x7f7d456e, 0x42b16c9b),
+ (0x4ffe543f, 0x41b6f03f),
+ (0x4efebc08, 0x41abdc61),
+ (0x2f7ffc46, 0xc1b17236), /* C[128] */
+ ][:]
+
+ for (x, y) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yf : flt32 = std.flt32frombits(y)
+ var rf = math.log(xf)
+ testr.check(c, rf == yf,
+ "log(0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, y, std.flt32bits(rf))
+ ;;
+
+ for nan : [ 0x7fc00000, 0x7fd00000, 0x7fffffff, 0xc147a5e3 ][:]
+ testr.check(c, std.isnan(math.log(std.flt32frombits(nan))),
+ "log(0x{b=16,w=8,p=0}) should be NaN", nan)
+ ;;
+}
+
+const log02 = {c
+ var inputs : (uint64, uint64)[:] = [
+ (0x0000000000000000, 0xfff0000000000000),
+ (0x4000000000000000, 0x3fe62e42fefa39ef),
+ (0x7ff0000000000000, 0x7ff0000000000000),
+ (0x3ff0000000000000, 0x0000000000000000),
+ (0x3fee0fabffc702a3, 0xbfafffffbbdf52b4),
+ (0x6834802690008002, 0x407bea2785dd467d),
+ (0x00000000000009a4, 0xc08705080132de98),
+ (0x49113e8d334802ab, 0x4059518f80c8b520),
+ (0x70fffac8f637436f, 0x408100f58e19ab8f),
+ (0x6900000000000002, 0x407c765cf8301757), /* C[0] */
+ (0x6a30133d035de442, 0x407d4927a622132e), /* C[1] */
+ (0x248037d89731238d, 0xc0730472f99e1f4a), /* C[2] */
+ (0x0ab05ff44dd62adb, 0xc082744e51a6f0b5), /* ... */
+ (0x2b108aea88779bcb, 0xc06cef4a2f1c6596),
+ (0x4b309bc3b92e2dfc, 0x405f3371b7645f6c),
+ (0x1990c9fa6a4c0713, 0xc07a98b52fca339e),
+ (0x0740d2f4341bcac4, 0xc083a512fd9739f7),
+ (0x2c4100ab8cb78c63, 0xc06b48fa89cd7e6c),
+ (0x4ae113c3a6bc3a99, 0x405e576b1bac2f98),
+ (0x49113e8d334802ab, 0x4059518f80c8b520),
+ (0x54815a632f7ffc46, 0x406c840d22b38727),
+ (0x482175028dd2b016, 0x4056b8ec87c62e57),
+ (0x3f7197c085acb2f4, 0xc015cd158b41fb6a),
+ (0x4651b338cfcc7ad5, 0x4051b353db2b8859),
+ (0x1cd1e03f717eff79, 0xc07857016bce40ea),
+ (0x0261f48674017371, 0xc0855513d4bd24c2),
+ (0x7b922a8c7d7b8896, 0x4084ab1d75ac1cd5),
+ (0x3fb23d2c3bf35ec8, 0xc0052208742a9d29),
+ (0x5f92651d0e7b356e, 0x4075edf38e954d24),
+ (0x7f7285f6b3e07dfa, 0x4086031261f39110),
+ (0x2322aae2d2150337, 0xc073f62fbbbec476),
+ (0x7eb2bb6da1727619, 0x4085c09e8f1dadd9),
+ (0x71d2e2eb43d8256d, 0x40814a60e0b279cb),
+ (0x74630f734df0dd4c, 0x40822dcdd6310ba8),
+ (0x6fe31066fa71ca7a, 0x40809e8d868734a7),
+ (0x00000000000009a4, 0xc08705080132de98),
+ (0x5e736a8abf6dda11, 0x40752730815a6166),
+ (0x6903781bd44c88f2, 0x407c7980c8a083ab),
+ (0x28b3963c1648a637, 0xc0701a602dded579),
+ (0x6f93b5c49bb1bad3, 0x40808317f139e8e2),
+ (0x6133e925e6bf8a12, 0x40770f91495b0b5f),
+ (0x3c44077c04158248, 0xc04455e5edbac018),
+ (0x6e042c14d7b5903c, 0x407ff14c8c3b9be2),
+ (0x0dc44db612e4d131, 0xc08162df301e5813),
+ (0x4f646b7b7f44f304, 0x40656e70d305a121),
+ (0x6834802690008002, 0x407bea2785dd467d),
+ (0x43d4a2bb177f4778, 0x40459d6223a7d41b),
+ (0x6634cc7aab4228cc, 0x407a877e7a78169b),
+ (0x5894d0947b2155b9, 0x407115cf1b30a32d),
+ (0x5624f4404ffe543f, 0x406ecac8a9c25223),
+ (0x58751ddd6052330d, 0x4070ffdbd350379f),
+ (0x7f75348c0f5498fb, 0x408604275047d724),
+ (0x7265517fa66418ae, 0x40817d410871254b),
+ (0x08158bfe5ae12b55, 0xc0835b01ec0d209d),
+ (0x39659200fba6c321, 0xc0521ed4910ed7c0),
+ (0x7635cba6522da14e, 0x4082cfafdb79820f),
+ (0x5c35d8c190aea62e, 0x407399d2e3f6c83c),
+ (0x36f6035fd99d5091, 0xc058dfa002d358b7),
+ (0x57d628d547f3e7ff, 0x407091b9f622cd4e),
+ (0x78463abad2eed0c7, 0x408386d5e3383f6b),
+ (0x4b76524c54f71c43, 0x405ff7cf887ecb05),
+ (0x524675375aaefb86, 0x40696dcc32c58d99),
+ (0x1806909d6b4d344f, 0xc07ba93c617cc0e8),
+ (0x7306c0360b296d7a, 0x4081b539dff3fe2b),
+ (0x3396e71654ada984, 0xc0611c4da154c729),
+ (0x51e6f204333bad9e, 0x4068e9668d521be8),
+ (0x1af71f4d0ff0979d, 0xc0799f9949497468),
+ (0x1d273361f32be48e, 0xc0781b61d584de4f),
+ (0x05776dbf84a723dc, 0xc084433c2faca744),
+ (0x541784f5f4f77091, 0x406bf2841f114315),
+ (0x2577a13952a873f2, 0xc07258125f58596d),
+ (0x01e7b0ed0c372300, 0xc0857f389ab75fb2),
+ (0x13e7d98039ac732d, 0xc07e8450364cae3a),
+ (0x1077f9f5782403b6, 0xc08073195883aa08),
+ (0x62e813931fa46ee3, 0x40783e0bf5cda73d),
+ (0x131831ae8a34c781, 0xc07f14422da7ff1f),
+ (0x4f08632da9f79134, 0x4064ef09d28eeda0),
+ (0x1eb87975b31b171e, 0xc0770544a50ec830),
+ (0x11989fbebae2eb0f, 0xc0800f1295e3f21e),
+ (0x5c38b8d6d96e1ffe, 0x40739bcd56c393a8),
+ (0x7278e907f500a3f8, 0x4081840b7f2ad445),
+ (0x73f90e51c4166808, 0x4082092d01e05c94),
+ (0x7a7927863ba4c8ff, 0x408449e7d7ea5790),
+ (0x504943add8ae6c70, 0x4066abc8767dbc43),
+ (0x481953e382eee069, 0x4056a4615edb9861),
+ (0x3e297530d3b5006a, 0xc033a307aa45595f),
+ (0x71799aa6bce3b976, 0x40812b8ab6921ace),
+ (0x28f9c062122d4255, 0xc06fd345b377c762),
+ (0x2c29e575a79ea752, 0xc06b67e06908d1f4),
+ (0x753a038d35905a30, 0x4082786127c0c498),
+ (0x3d9a1e1a1f21c1bc, 0xc039d97d98b10ccb),
+ (0x156a3759cb620ff3, 0xc07d78a18c7ae709),
+ (0x1bea5f7f0998cd3b, 0xc078f72383135970),
+ (0x6c1a8cc212dcb6e2, 0x407e9de4bd07d597),
+ (0x089a9e5f347220f2, 0xc0832cf47cf5e0f7),
+ (0x5c1abf7963c10a0c, 0x407386e1b0c37460),
+ (0x0f1ae87bb321fb89, 0xc080ec2b87d49467),
+ (0x407af5c43978f008, 0x4018448cf475c732),
+ (0x12ab16d1692d6449, 0xc07f601524bb16d3),
+ (0x48cb418c32cf8b91, 0x4058910d56870021),
+ (0x720b6590a8e3f54c, 0x40815dfd6485181b),
+ (0x46eb81a434b9f620, 0x40535ecb7294cc14),
+ (0x7b6b9715161c2442, 0x40849dd29d738baf),
+ (0x4eebb8fcb686a19f, 0x4064c6c75b7c96b0),
+ (0x4cfbe7fe8242657f, 0x4062176353551bf1),
+ (0x609bff020bdc4079, 0x4076a61dec83f7c3),
+ (0x0f5c17c620a239e8, 0xc080d5a506dd57ea),
+ (0x4f1c3cfb0a12764c, 0x406509e91a2092fc),
+ (0x2fac53e44bfddb93, 0xc0668ae29cc4834f),
+ (0x371c7eaeb3110aa5, 0xc05876628c3c6820),
+ (0x56fcaa4913caa832, 0x406ff52903fd288b),
+ (0x623cc68a255eedf9, 0x4077c6e7cd2766f5),
+ (0x4bfcde0e60de47ad, 0x4060b5948be5f1a9),
+ (0x285cf7151c6b20bf, 0xc07056a876ed47dd),
+ (0x44bd10aeecadc14d, 0x404aa358796a4dca),
+ (0x7f7d456ec4df8ff2, 0x408606bb7f61f92f),
+ (0x395d6841a6821a8a, 0xc052375b513417c1),
+ (0x33ed717b73129cb1, 0xc060a55c64cbc4e9),
+ (0x667dad261da3f8f5, 0x407ab98af553a20c),
+ (0x42cdc8a5274640b7, 0x403fd020a35da739),
+ (0x3a6dea498fa485ba, 0xc04e883bb08ae507),
+ (0x2b3e04e8f6266d83, 0xc06cafdc166f9da3),
+ (0x488e1f1c6db084f8, 0x4057e601112df1ff),
+ (0x6cce3dc1ca42a083, 0x407f19f86830564f),
+ (0x697e599a1e4afd5f, 0x407cce3d2d79ec2e),
+ (0x491e82abf353e79c, 0x40597613f67eb37a),
+ (0x169eabc19e6f453d, 0xc07ca3673fa3d24d),
+ (0x4efebc0847e20599, 0x4064e04285d5e27e),
+ (0x544eeb58043197b1, 0x406c3dcfe12bf3bb),
+ (0x27aeff1996633c47, 0xc070cf914713d900),
+ (0x6caf1807e61b6f03, 0x407f043c0805dba1),
+ (0x3aef3641195d57ab, 0xc04bbd04d4b5e1f2),
+ (0x2e4f6e9cea0e6903, 0xc0686f887e310813),
+ (0x4f5f7177b7abc69e, 0x40656612da92283f),
+ (0x629fabc84d5a2f36, 0x40780afb4bbfbe4c),
+ (0x418fce8b9cdac427, 0x40320409991d5e65),
+ (0x056fdd5a05ee0cf9, 0xc0844651eb4324d0),
+ (0x70fffac8f637436f, 0x408100f58e19ab8f), /* C[128] */
+ ][:]
+
+ for (x, y) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yf : flt64 = std.flt64frombits(y)
+ var rf = math.log(xf)
+ testr.check(c, rf == yf,
+ "log(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, y, std.flt64bits(rf))
+ ;;
+
+ for nan : [
+ 0x7ff8000000000000,
+ 0x7ff9000000000000,
+ 0x7fffffffffffffff,
+ 0xc147a5e354789328,
+ ][:]
+ testr.check(c, std.isnan(math.log(std.flt64frombits(nan))),
+ "log(0x{b=16,w=16,p=0}) should be NaN", nan)
+ ;;
+}
+
+const log03 = {c
+ /*
+ The [Tan90], steps 1-3' implementation have error bounds
+ of about 0.6 ulps, so we do not obtain last-bit accuracy.
+ Here are some known-bad results.
+ */
+
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x3f610400, 0xbe041a91, 0xbe041a92),
+ (0x3fc70700, 0x3ee200bf, 0x3ee200c0),
+ (0x3f610400, 0xbe041a91, 0xbe041a92),
+ (0x3e360700, 0xbfdd18a7, 0xbfdd18a8),
+ ][:]
+
+ for (x, y_perfect, y_acceptable) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var ypf : flt32 = std.flt32frombits(y_perfect)
+ var yaf : flt32 = std.flt32frombits(y_acceptable)
+ var rf = math.log(xf)
+ if rf != ypf && rf != yaf
+ testr.fail(c, "log(0x{b=16,w=8,p=0}) was 0x{b=16,w=8,p=0}. It should have been 0x{b=16,w=8,p=0}, although we will also accept 0x{b=16,w=8,p=0}",
+ x, std.flt32bits(rf), y_perfect, y_acceptable)
+ ;;
+ ;;
+}
+
+const log04 = {c
+ /*
+ The [Tan90], steps 1-3' implementation have error bounds
+ of about 0.6 ulps, so we do not obtain last-bit accuracy.
+ Here are some known-bad results.
+ */
+
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x3c71cfbc354934ae, 0xc0435ac0222f1703, 0xc0435ac0222f1704),
+ (0x35f0681e2059a1bb, 0xc05bb8387abe5fcf, 0xc05bb8387abe5fd0),
+ (0x40d268e6c4ad9588, 0x4023b04f15e91586, 0x4023b04f15e91585),
+ ][:]
+
+ for (x, y_perfect, y_acceptable) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var ypf : flt64 = std.flt64frombits(y_perfect)
+ var yaf : flt64 = std.flt64frombits(y_acceptable)
+ var rf = math.log(xf)
+ if rf != ypf && rf != yaf
+ testr.fail(c, "log(0x{b=16,w=16,p=0}) was 0x{b=16,w=16,p=0}. It should have been 0x{b=16,w=16,p=0}, although we will also accept 0x{b=16,w=16,p=0}",
+ x, std.flt64bits(rf), y_perfect, y_acceptable)
+ ;;
+ ;;
+}
+
+const log1p01 = {c
+ var inputs : (uint32, uint32)[:] = [
+ (0x00000000, 0x00000000),
+ (0xbf700700, 0xc0318e1e),
+ (0x69000000, 0x42661ff7), /* C[0] */
+ (0x61017aaf, 0x4239cf35), /* C[1] */
+ (0x5702333f, 0x4202613d), /* C[2] */
+ (0x6e030844, 0x4280f8e2), /* ... */
+ (0xbf5f1ae3, 0xc00351a2),
+ (0x6c04905b, 0x4276e689),
+ (0x4805d58d, 0x413d3fd2),
+ (0x7c06e112, 0x42a7d8a8),
+ (0x4f08632d, 0x41ac6883),
+ (0x4b88da49, 0x41859e88),
+ (0x6b898457, 0x42744651),
+ (0x720b6590, 0x428c2fb3),
+ (0x5c0c2c03, 0x421e66a2),
+ (0x7b0cf7a7, 0x42a5297b),
+ (0x488e1f1c, 0x41494d06),
+ (0x7a8f5038, 0x42a3cf0a),
+ (0x768fe527, 0x4298b9fb),
+ (0x49113e8d, 0x4154bd2d),
+ (0x5811f2be, 0x420861b9),
+ (0x73129cb1, 0x428f0f52),
+ (0x6f93b5c4, 0x42855ee6),
+ (0x5894d094, 0x420b3b6c),
+ (0x3facbad0, 0x3f5aaba7),
+ (0x6f16daf7, 0x428406cc),
+ (0x68981c93, 0x42640ae9),
+ (0x6718afeb, 0x425bbd6e),
+ (0x4b19a1ed, 0x4180ffd5),
+ (0x6c1a8cc2, 0x427783ac),
+ (0x609bff02, 0x4237c835),
+ (0x407af5c4, 0x3fcbf9dc),
+ (0x459de4ba, 0x41087217),
+ (0x491e82ab, 0x4156232d),
+ (0x629fabc8, 0x4242f72e),
+ (0x7b2155b9, 0x42a56e93),
+ (0x7ba20272, 0x42a6d39a),
+ (0x44229f49, 0x40cf561a),
+ (0x782403b6, 0x429d25a9),
+ (0x5624f440, 0x41fb8fe5),
+ (0x7e263c63, 0x42adcf3f),
+ (0x6026873d, 0x42354554),
+ (0x52a873f2, 0x41d4e9ec),
+ (0x5d2967a1, 0x4224b42b),
+ (0x41a1dbe2, 0x40438dc0),
+ (0x52ab609c, 0x41d50d2b),
+ (0x632ba15e, 0x424606ed),
+ (0x692d6449, 0x426756c6),
+ (0x522da14e, 0x41cf9c59),
+ (0x6caf1807, 0x427ac941),
+ (0x6a30133d, 0x426cf210),
+ (0x4b309bc3, 0x41821d44),
+ (0x47b19da7, 0x4136aff5),
+ (0x7eb2bb6d, 0x42af573f),
+ (0x6d347bde, 0x427dae16),
+ (0x68348026, 0x4261f45a),
+ (0x5c35d8c1, 0x421f712d),
+ (0x4fb68eb5, 0x41b44934),
+ (0x6737b23e, 0x425c7ac2),
+ (0x5c38b8d6, 0x421f813d),
+ (0x753a038d, 0x429514c2),
+ (0x733b43e1, 0x428f8ca0),
+ (0x613c001e, 0x423b4d15),
+ (0x623cc68a, 0x4240dcdc),
+ (0x6e3e4d6b, 0x4281b7f2),
+ (0x473ef1b8, 0x412cc13f),
+ (0x50c01f91, 0x41bfc8ef),
+ (0x7140973b, 0x428a0f6a),
+ (0x44c1d017, 0x40eb1a74),
+ (0x7d42efec, 0x42ab5b02),
+ (0x6bc47734, 0x4275b39e),
+ (0x5cc54778, 0x42228a5e),
+ (0x78463aba, 0x429d86ab),
+ (0x50473690, 0x41ba8795),
+ (0x4fc7e753, 0x41b5031a),
+ (0x68c97d24, 0x42652ac6),
+ (0x464a3cc7, 0x41177e94),
+ (0x48cb418c, 0x414f0681),
+ (0x71cc037f, 0x428b8fcf),
+ (0x6b4d344f, 0x42731a66),
+ (0x6cce3dc1, 0x427b70e9),
+ (0xbe43ff34, 0xbe598dc6),
+ (0x42cdc8a5, 0x40949654),
+ (0x5450d14b, 0x41e74489),
+ (0x6052330d, 0x423633cf),
+ (0x75535300, 0x42955613),
+ (0x67d3bac5, 0x425fd1fa),
+ (0x43d4a2bb, 0x40c1c32f),
+ (0x57d628d5, 0x4207249d),
+ (0x75d75209, 0x4296c28e),
+ (0x65d79618, 0x4254cd54),
+ (0x665934fc, 0x42579ac8),
+ (0x4d5a2f36, 0x4199fc7c),
+ (0x4e5b5517, 0x41a51e5d),
+ (0x51dba523, 0x41cbf23d),
+ (0x51dd3656, 0x41cc00cd),
+ (0x3f3b4d9e, 0x3f0c9047),
+ (0x745f3fc3, 0x4292ac65),
+ (0x43dfe6db, 0x40c36926),
+ (0x4ae113c3, 0x417d04b7),
+ (0x77e22cfa, 0x429c674e),
+ (0xbf6399e7, 0xc00cb9a1),
+ (0x486401b0, 0x4145c60b),
+ (0x76e50770, 0x4299a7f1),
+ (0x53661704, 0x41dcf415),
+ (0x51e6f204, 0x41cc58fc),
+ (0x62e81393, 0x4244761b),
+ (0x5f691614, 0x42311213),
+ (0x68ea445b, 0x4265c51f),
+ (0x4beb7a84, 0x4189f603),
+ (0x7b6b9715, 0x42a6306d),
+ (0x606d0172, 0x4236aeb7),
+ (0x6a6d8093, 0x426e2483),
+ (0x4aef46b9, 0x417dff4a),
+ (0x40cff51e, 0x4000f145),
+ (0x4df0dd4c, 0x41a05296),
+ (0x6771f0ee, 0x425d94c7),
+ (0x5e736a8a, 0x422bb2ea),
+ (0x47f3e7ff, 0x413bc30a),
+ (0x58751ddd, 0x420a74a6),
+ (0x6675ba2a, 0x4258191d),
+ (0x75f6c0f1, 0x42970853),
+ (0x6d77f9a2, 0x427ef365),
+ (0x7278e907, 0x428d588a),
+ (0x62f99bda, 0x4244c0af),
+ (0x5b7b5415, 0x421b30f9),
+ (0x4cfbe7fe, 0x41959740),
+ (0x3f79f629, 0x3f2e6894),
+ (0x4ffe543f, 0x41b6f03f),
+ (0x4efebc08, 0x41abdc61),
+ (0x70fffac8, 0x42893e34), /* C[128] */
+ ][:]
+
+ for (x, y) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yf : flt32 = std.flt32frombits(y)
+ var rf = math.log1p(xf)
+ testr.check(c, rf == yf,
+ "log1p(0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, y, std.flt32bits(rf))
+ ;;
+}
+
+const log1p02 = {c
+ var inputs : (uint64, uint64)[:] = [
+ (0x0000000000000000, 0x0000000000000000),
+ (0x6900000000000002, 0x407c765cf8301757), /* C[0] */
+ (0x6a30133d035de442, 0x407d4927a622132e), /* C[1] */
+ (0x545031c8b889228e, 0x406c3f4c487ae367), /* C[2] */
+ (0xbfdf2f686343ab7f, 0xbfe56047e9fe5abe), /* ... */
+ (0x52b08a1d750f04ff, 0x4069ff462f9f1cbc),
+ (0x4b309bc3b92e2dfc, 0x405f3371b7645f6c),
+ (0x5580cc2d69e348b5, 0x406de5e6ca65b1cd),
+ (0x4fe0e879a2736200, 0x406619d8f80fa200),
+ (0x4ad0fbf9d10cd8a2, 0x405e2ab53092727b),
+ (0x4ae113c3a6bc3a99, 0x405e576b1bac2f98),
+ (0x49113e8d334802ab, 0x4059518f80c8b520),
+ (0x54815a632f7ffc46, 0x406c840d22b38727),
+ (0x482175028dd2b016, 0x4056b8ec87c62e57),
+ (0x46719e36e5f0da58, 0x40520bc0c4781e1f),
+ (0x4651b338cfcc7ad5, 0x4051b353db2b8859),
+ (0x41a1dbe267002305, 0x4032d32be3bb9cb9),
+ (0x5811f2beb37f6007, 0x4070bab72614d157),
+ (0x7b922a8c7d7b8896, 0x4084ab1d75ac1cd5),
+ (0x74a2309e5aa7b2e9, 0x4082439c5e5fec3b),
+ (0x40d268e6c4ad9588, 0x4023b05609c9a18b),
+ (0x7f7285f6b3e07dfa, 0x4086031261f39110),
+ (0x54f29bb41ec2d2b9, 0x406d218d0b1c6484),
+ (0x7eb2bb6da1727619, 0x4085c09e8f1dadd9),
+ (0x71d2e2eb43d8256d, 0x40814a60e0b279cb),
+ (0x74630f734df0dd4c, 0x40822dcdd6310ba8),
+ (0x6fe31066fa71ca7a, 0x40809e8d868734a7),
+ (0x50e345290de2dd6c, 0x406780ec610f0c26),
+ (0x5e736a8abf6dda11, 0x40752730815a6166),
+ (0x6903781bd44c88f2, 0x407c7980c8a083ab),
+ (0x78b39b662df2692e, 0x4083aca5c3801c1c),
+ (0x6f93b5c49bb1bad3, 0x40808317f139e8e2),
+ (0x6133e925e6bf8a12, 0x40770f91495b0b5f),
+ (0x5a63fa758c92398a, 0x407256c5e940a2db),
+ (0x6e042c14d7b5903c, 0x407ff14c8c3b9be2),
+ (0x5f7445e6b0eee22e, 0x4075d9537ce0389b),
+ (0x4f646b7b7f44f304, 0x40656e70d305a121),
+ (0x6834802690008002, 0x407bea2785dd467d),
+ (0x43d4a2bb177f4778, 0x40459d6223a7d41b),
+ (0x6634cc7aab4228cc, 0x407a877e7a78169b),
+ (0x5894d0947b2155b9, 0x407115cf1b30a32d),
+ (0x5624f4404ffe543f, 0x406ecac8a9c25223),
+ (0x58751ddd6052330d, 0x4070ffdbd350379f),
+ (0x7f75348c0f5498fb, 0x408604275047d724),
+ (0x7265517fa66418ae, 0x40817d410871254b),
+ (0x7515838bfa241f5a, 0x40826bc50abc4fa5),
+ (0x45459662ae3323f5, 0x404d9bc7c072e470),
+ (0x7635cba6522da14e, 0x4082cfafdb79820f),
+ (0x5c35d8c190aea62e, 0x407399d2e3f6c83c),
+ (0x4555f61f1c0c5e59, 0x404df6b397ade3ba),
+ (0x57d628d547f3e7ff, 0x407091b9f622cd4e),
+ (0x78463abad2eed0c7, 0x408386d5e3383f6b),
+ (0x4b76524c54f71c43, 0x405ff7cf887ecb05),
+ (0x524675375aaefb86, 0x40696dcc32c58d99),
+ (0x68569f0d424db12a, 0x407c01e8fcc54a3b),
+ (0x7306c0360b296d7a, 0x4081b539dff3fe2b),
+ (0x4ad6e0cb2febd13d, 0x405e3dc5d9b757f7),
+ (0x51e6f204333bad9e, 0x4068e9668d521be8),
+ (0x4097178f382d0b5a, 0x401d32395ff3dc65),
+ (0x7f373de46ebc9a27, 0x4085eeb4db53f788),
+ (0x66d76e22a34e58cd, 0x407af84dc23a7f99),
+ (0x541784f5f4f77091, 0x406bf2841f114315),
+ (0x627797a59dd57660, 0x4077f016d94fac5c),
+ (0x7ab7bb38889422cf, 0x40845f9ed64c4859),
+ (0x4fc7e75368981c93, 0x4065f890d0df7af1),
+ (0x75d8018df3774adb, 0x4082af304ec36d3f),
+ (0x62e813931fa46ee3, 0x40783e0bf5cda73d),
+ (0x75984e941fdcceb2, 0x4082991b8dfdda1c),
+ (0x4f08632da9f79134, 0x4064ef09d28eeda0),
+ (0x4ee87d4f36b08d11, 0x4064c2cf83f46cf4),
+ (0x46a8931cf6708831, 0x4052a622d2cf5019),
+ (0x5c38b8d6d96e1ffe, 0x40739bcd56c393a8),
+ (0x7278e907f500a3f8, 0x4081840b7f2ad445),
+ (0x73f90e51c4166808, 0x4082092d01e05c94),
+ (0x7a7927863ba4c8ff, 0x408449e7d7ea5790),
+ (0x504943add8ae6c70, 0x4066abc8767dbc43),
+ (0x481953e382eee069, 0x4056a4615edb9861),
+ (0x6b8984577920fba9, 0x407e397207c69605),
+ (0x71799aa6bce3b976, 0x40812b8ab6921ace),
+ (0x6019b68e88bc2e04, 0x40764c0873609927),
+ (0x4bf9d05bd54aa5c3, 0x4060b200ae465dfa),
+ (0x753a038d35905a30, 0x4082786127c0c498),
+ (0x617a151591b1b79f, 0x4077403fbb1aa8c9),
+ (0x503a33bad761d1c1, 0x406696c4be84ff41),
+ (0x4d2a6cc069c8de6f, 0x4062582f440da781),
+ (0x6c1a8cc212dcb6e2, 0x407e9de4bd07d597),
+ (0x6b5a9349dd94a791, 0x407e18d31a0f5866),
+ (0x5c1abf7963c10a0c, 0x407386e1b0c37460),
+ (0x4d2ad7c63e651a60, 0x406258afdaa5e19a),
+ (0x407af5c43978f008, 0x401846ebf755d962),
+ (0x73cb2d9a04724837, 0x4081f930d140b8b3),
+ (0x48cb418c32cf8b91, 0x4058910d56870021),
+ (0x720b6590a8e3f54c, 0x40815dfd6485181b),
+ (0x46eb81a434b9f620, 0x40535ecb7294cc14),
+ (0x7b6b9715161c2442, 0x40849dd29d738baf),
+ (0x4eebb8fcb686a19f, 0x4064c6c75b7c96b0),
+ (0x4cfbe7fe8242657f, 0x4062176353551bf1),
+ (0x609bff020bdc4079, 0x4076a61dec83f7c3),
+ (0x7b9c14acd61754cb, 0x4084ae996873a75c),
+ (0x4f1c3cfb0a12764c, 0x406509e91a2092fc),
+ (0x6cfc6da63a428918, 0x407f3a4094ee1891),
+ (0x74ec84ae3e4aeb42, 0x40825d639229d899),
+ (0x56fcaa4913caa832, 0x406ff52903fd288b),
+ (0x623cc68a255eedf9, 0x4077c6e7cd2766f5),
+ (0x4bfcde0e60de47ad, 0x4060b5948be5f1a9),
+ (0x7b0cf7a77eff4d34, 0x40847cf0fbe4ff93),
+ (0x44bd10aeecadc14d, 0x404aa358796a4dca),
+ (0x7f7d456ec4df8ff2, 0x408606bb7f61f92f),
+ (0x7b8d69195829f3f8, 0x4084a96c99b76f20),
+ (0x7f6d78ed8101a55a, 0x4086013df53b153f),
+ (0x667dad261da3f8f5, 0x407ab98af553a20c),
+ (0x42cdc8a5274640b7, 0x403fd020a35da73e),
+ (0x5a6de65a705a667d, 0x40725d396e1dd345),
+ (0x795dfa770c749b2c, 0x4083e77ef991f95a),
+ (0x488e1f1c6db084f8, 0x4057e601112df1ff),
+ (0x6cce3dc1ca42a083, 0x407f19f86830564f),
+ (0x697e599a1e4afd5f, 0x407cce3d2d79ec2e),
+ (0x491e82abf353e79c, 0x40597613f67eb37a),
+ (0x722ea4ce7f96d2ff, 0x408169f9e9352797),
+ (0x4efebc0847e20599, 0x4064e04285d5e27e),
+ (0x544eeb58043197b1, 0x406c3dcfe12bf3bb),
+ (0x46ff046b11b6e7e0, 0x405392d817dda2d2),
+ (0x6caf1807e61b6f03, 0x407f043c0805dba1),
+ (0x50ef387e0fc4a5e1, 0x4067905d34c36a51),
+ (0x4cbf6bd74d8f3edf, 0x4061c276224e5d71),
+ (0x4f5f7177b7abc69e, 0x40656612da92283f),
+ (0x629fabc84d5a2f36, 0x40780afb4bbfbe4c),
+ (0x418fce8b9cdac427, 0x40320409995dc1e7),
+ (0x46cfe9b12985df6e, 0x40530f94e5ddae5e),
+ (0x70fffac8f637436f, 0x408100f58e19ab8f), /* C[128] */
+ ][:]
+
+ for (x, y) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yf : flt64 = std.flt64frombits(y)
+ var rf = math.log1p(xf)
+ testr.check(c, rf == yf,
+ "log1p(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, y, std.flt64bits(rf))
+ ;;
+}
+
+const log1p03 = {c
+ /*
+ As with log, there is some accepted error in log1p.
+ */
+
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x49c68d15, 0x4164d4d5, 0x4164d4d4),
+ (0x3d86912c, 0x3d8254a9, 0x3d8254a8),
+ (0x3dd7210e, 0x3dcc905b, 0x3dcc905c),
+ (0x3d986e71, 0x3d93067e, 0x3d93067f),
+ (0xbe1eefcb, 0xbe2cb799, 0xbe2cb798),
+ (0x3e057287, 0x3dfae18d, 0x3dfae18c),
+ (0x424d8fe0, 0x407d5bc1, 0x407d5bc2),
+ (0xb95cb5e9, 0xb95cbbdb, 0xb95cbbdc),
+ (0x3de66745, 0x3dda56fd, 0x3dda56fc),
+ ][:]
+
+ for (x, y_perfect, y_acceptable) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var ypf : flt32 = std.flt32frombits(y_perfect)
+ var yaf : flt32 = std.flt32frombits(y_acceptable)
+ var rf = math.log1p(xf)
+ if rf != ypf && rf != yaf
+ testr.fail(c, "log1p(0x{b=16,w=8,p=0}) was 0x{b=16,w=8,p=0}. It should have been 0x{b=16,w=8,p=0}, although we will also accept 0x{b=16,w=8,p=0}",
+ x, std.flt32bits(rf), y_perfect, y_acceptable)
+ ;;
+ ;;
+}
+
+const log1p04 = {c
+ /*
+ As with log, there is some accepted error in log1p.
+ */
+
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0xbf8d2fb5e91b21dc, 0xbf8d65764edb0cd6, 0xbf8d65764edb0cd5),
+ (0x3fc855690a4a67e1, 0x3fc64708ed6e9abb, 0x3fc64708ed6e9aba),
+ (0xbfafb59aa6bb5f14, 0xbfb05dee438595dd, 0xbfb05dee438595de),
+ (0x3f896e0154c1be37, 0x3f8945eb78442aa1, 0x3f8945eb78442aa0),
+ (0x3fb09ef0bcfe6932, 0x3fb01a8404c5051a, 0x3fb01a8404c50519),
+ (0x3fa071dec13893e8, 0x3fa02fad06dc3334, 0x3fa02fad06dc3335),
+ (0x4000d2445e953eb4, 0x3ff21dbfa8f28f5d, 0x3ff21dbfa8f28f5c),
+ (0xbfe37c5eda902f8d ,0xbfee0b40d5f061d7, 0xbfee0b40d5f061d6),
+ (0x400dd2fe516cced3, 0x3ff8db2a8f466eeb, 0x3ff8db2a8f466eea),
+ (0xbfb5d9612ba5b9bf, 0xbfb6d6962ad7508b, 0xbfb6d6962ad7508c),
+ (0x40c512345c72e7f9, 0x4022929892b71a96, 0x4022929892b71a95),
+ (0x47409b795894785f, 0x405448ab9f468935, 0x405448ab9f468936),
+ ][:]
+
+ for (x, y_perfect, y_acceptable) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var ypf : flt64 = std.flt64frombits(y_perfect)
+ var yaf : flt64 = std.flt64frombits(y_acceptable)
+ var rf = math.log1p(xf)
+ if rf != ypf && rf != yaf
+ testr.fail(c, "log1p(0x{b=16,w=16,p=0}) was 0x{b=16,w=16,p=0}. It should have been 0x{b=16,w=16,p=0}, although we will also accept 0x{b=16,w=16,p=0}",
+ x, std.flt64bits(rf), y_perfect, y_acceptable)
+ ;;
+ ;;
+}
diff --git a/lib/math/test/powr-impl.myr b/lib/math/test/powr-impl.myr
new file mode 100644
index 0000000..8e6110a
--- /dev/null
+++ b/lib/math/test/powr-impl.myr
@@ -0,0 +1,76 @@
+use std
+use math
+use testr
+
+const main = {
+ math.fptrap(false)
+ testr.run([
+ [.name="powr-01", .fn = powr01],
+ [.name="powr-02", .fn = powr02],
+ [.name="powr-03", .fn = powr03],
+ ][:])
+}
+
+const powr01 = {c
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x08a38749, 0x2ffb67c0, 0x3f7fffff),
+ (0x01433ed5, 0x367caeda, 0x3f7feaba),
+ (0x7112fd5b, 0x7509b252, 0x7f800000),
+ (0x22b5f461, 0xc7335035, 0x7f800000),
+ (0x29529847, 0x43c6b361, 0x00000000),
+ (0x3fc1cc03, 0x64eb4c95, 0x7f800000),
+ (0x653f944a, 0xbf7c2388, 0x1a3c784b),
+ (0x545ba67c, 0xc0c7e947, 0x00000000),
+ (0x3fca6b0d, 0x44ff18e0, 0x7f800000),
+ // (0x3f74c7a7, 0x44feae20, 0x000265c6),
+ // (0x3f7ebd6c, 0xc5587884, 0x4bc9ab07),
+ ][:]
+
+ for (x, y, z) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yf : flt32 = std.flt32frombits(y)
+ var zf : flt32 = std.flt32frombits(z)
+ var rf = math.powr(xf, yf)
+ testr.check(c, rf == zf,
+ "powr(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, y, z, std.flt32bits(rf))
+ ;;
+}
+
+const powr02 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x0000000000000000, 0x0000000000000000, 0x0000000000000000),
+ ][:]
+
+ for (x, y, z) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yf : flt64 = std.flt64frombits(y)
+ var zf : flt64 = std.flt64frombits(z)
+ var rf = math.powr(xf, yf)
+ testr.check(c, rf == zf,
+ "powr(0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, y, z, std.flt64bits(rf))
+ ;;
+}
+
+const powr03 = {c
+ var inputs : (uint32, uint32, uint32, uint32)[:] = [
+ (0x1bd2244e, 0x3a647973, 0x3f7535a1, 0x3f7535a0),
+ (0x3f264a46, 0x423c927a, 0x30c9b8d3, 0x30c9b8d4),
+ (0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f7),
+ (0x3bbd11f6, 0x3cc159b1, 0x3f62ac1b, 0x3f62ac1a),
+ (0x3f7ca5b7, 0xc309857a, 0x40c41bbf, 0x40c41bc0),
+ (0x3f6a1a65, 0x43e16065, 0x226731e2, 0x226731e3),
+ ][:]
+
+ for (x, y, z_perfect, z_accepted) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yf : flt32 = std.flt32frombits(y)
+ var zf_perfect : flt32 = std.flt32frombits(z_perfect)
+ var zf_accepted : flt32 = std.flt32frombits(z_accepted)
+ var rf = math.powr(xf, yf)
+ testr.check(c, rf == zf_perfect || rf == zf_accepted,
+ "powr(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, y, z_perfect, z_accepted, std.flt32bits(rf))
+ ;;
+}
diff --git a/lib/math/test/scale2-impl.myr b/lib/math/test/scale2-impl.myr
index 9c3bc1a..2e21fa5 100644
--- a/lib/math/test/scale2-impl.myr
+++ b/lib/math/test/scale2-impl.myr
@@ -13,6 +13,7 @@ const main = {
const scale201 = {c
var inputsf : (flt32, int32, flt32)[:] = [
+ (0.000000011971715, -246, 0.0),
(0.0, 1, 0.0),
(-0.0, 2, -0.0),
(1.0, 3, 8.0),
@@ -25,7 +26,9 @@ const scale201 = {c
][:]
for (f, m, g) : inputsf
- testr.eq(c, math.scale2(f, m), g)
+ var r = math.scale2(f, m)
+ testr.check(c, r == g, "scale2(0x{w=8,b=16,p=0}, {}) should be 0x{w=8,b=16,p=0}, was 0x{w=8,b=16,p=0}",
+ std.flt32bits(f), m, std.flt32bits(g), std.flt32bits(r))
;;
}
diff --git a/lib/math/test/sin-impl.myr b/lib/math/test/sin-impl.myr
new file mode 100644
index 0000000..458f7cc
--- /dev/null
+++ b/lib/math/test/sin-impl.myr
@@ -0,0 +1,438 @@
+use std
+use math
+use testr
+
+const main = {
+ math.fptrap(false)
+ testr.run([
+ [.name="sin-cos-01", .fn = sincos01], /* flt32 */
+ [.name="sin-cos-02", .fn = sincos02], /* flt64 */
+ [.name="sin-cos-03", .fn = sincos03], /* off-by-1-ulp quarantine */
+ [.name="sin-cos-04", .fn = sincos04], /* exhaustively test C */
+ [.name="sin-cos-05", .fn = sincos05], /* NaN handling */
+ ][:])
+}
+
+const same32 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt32frombits(a)) && std.isnan(std.flt32frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const same64 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt64frombits(a)) && std.isnan(std.flt64frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const sincos01 = {c
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x00000000, 0x00000000, 0x3f800000),
+ (0x3f000000, 0x3ef57744, 0x3f60a940),
+ (0x6e000000, 0xbec002e4, 0xbf6d50ea),
+ (0xeca5b501, 0x3f6e879c, 0x3eb9e60c),
+ (0x67a9242b, 0xbf7fab81, 0xbd4fee38),
+ (0xdf18b878, 0xbdad60f7, 0x3f7f14bb),
+ (0x5f18b878, 0x3dad60f7, 0x3f7f14bb),
+ ][:]
+
+ for (x, ys, yc) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var rsf1, rcf1, rsf2, rcf2
+ (rsf1, rcf1) = math.sincos(xf)
+ rsf2 = math.sin(xf)
+ rcf2 = math.cos(xf)
+
+ var rsu1 = std.flt32bits(rsf1)
+ var rcu1 = std.flt32bits(rcf1)
+ var rsu2 = std.flt32bits(rsf2)
+ var rcu2 = std.flt32bits(rcf2)
+
+ testr.check(c, rsf1 == rsf2 && rcf1 == rcf2,
+ "sincos(0x{b=16,w=8,p=0}) is (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}), individual results (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0})",
+ x, rsu1, rcu1, rsu2, rcu2)
+
+ testr.check(c, same32(rsu1, ys) && same32(rcu1, yc),
+ "sincos(0x{b=16,w=8,p=0}) should be (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}), was (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0})",
+ x, ys, yc, rsu1, rcu1)
+ ;;
+}
+
+const sincos02 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x0000000000000000, 0x0000000000000000, 0x3ff0000000000000),
+ (0x4100000000000000, 0xbfeff8bd7b10d6b0, 0x3fa58ced65ec8b50),
+ (0x4b11000000000000, 0xbfef2cb48ed49aa6, 0x3fcce246843789ad),
+ (0x020400000a0c0000, 0x020400000a0c0000, 0x3ff0000000000000),
+ (0xbfeff57020000000, 0xbfeae79e2eb87020, 0x3fe1530a59ef0400),
+ (0x44f5248560000000, 0xbfeff57010000001, 0xbfa9fdc6fcf27758),
+ (0xc3e3170f00000000, 0xbfb5ac1ed995c7c4, 0x3fefe29770000000),
+ (0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0x3ff0000000000000), /* [GB91]'s "Xhard" */
+ ][:]
+
+ for (x, ys, yc) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var rsf1, rcf1, rsf2, rcf2
+ (rsf1, rcf1) = math.sincos(xf)
+ rsf2 = math.sin(xf)
+ rcf2 = math.cos(xf)
+
+ var rsu1 = std.flt64bits(rsf1)
+ var rcu1 = std.flt64bits(rcf1)
+ var rsu2 = std.flt64bits(rsf2)
+ var rcu2 = std.flt64bits(rcf2)
+
+ testr.check(c, rsf1 == rsf2 && rcf1 == rcf2,
+ "sincos(0x{b=16,w=16,p=0}) is (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), individual results (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, rsu1, rcu1, rsu2, rcu2)
+
+ testr.check(c, same64(rsu1, ys) && same64(rcu1, yc),
+ "sincos(0x{b=16,w=16,p=0}) should be (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), was (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, ys, yc, rsu1, rcu1)
+ ;;
+}
+
+const sincos03 = {c
+ var inputs : (uint64, uint64, uint64, uint64, uint64)[:] = [
+ (0x5101000000000000, 0x3fe9706123d509f1, 0xbfe369af9695aba1, 0x3fe9706123d509f0, 0xbfe369af9695aba0),
+ (0xf83b13a6a142b6d5, 0xbf5a86f4edeb02f2, 0x3feffffd404efc20, 0xbf5a86f4edeb02f1, 0x3feffffd404efc20),
+ (0x4b01000000000000, 0xbfe3e9527dc75f12, 0x3fe90cf80997c963, 0xbfe3e9527dc75f13, 0x3fe90cf80997c964),
+ ][:]
+
+ for (x, ys_perfect, yc_perfect, ys_acceptable, yc_acceptable) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var rsf1, rcf1, rsf2, rcf2
+ (rsf1, rcf1) = math.sincos(xf)
+ rsf2 = math.sin(xf)
+ rcf2 = math.cos(xf)
+
+ var rsu1 = std.flt64bits(rsf1)
+ var rcu1 = std.flt64bits(rcf1)
+ var rsu2 = std.flt64bits(rsf2)
+ var rcu2 = std.flt64bits(rcf2)
+
+ testr.check(c, rsf1 == rsf2 && rcf1 == rcf2,
+ "sincos(0x{b=16,w=16,p=0}) is (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), individual results (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, rsu1, rcu1, rsu2, rcu2)
+
+ testr.check(c, (same64(rsu1, ys_perfect) || same64(rsu1, ys_acceptable)) && \
+ (same64(rcu1, yc_perfect) || same64(rcu1, yc_acceptable)),
+ "sincos(0x{b=16,w=16,p=0}) should be (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), will also accept (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), was (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, ys_perfect, yc_perfect, ys_acceptable, yc_acceptable, rsu1, rcu1)
+ ;;
+}
+
+const sincos04 = {c
+ /*
+ There should be one of these for each j, each corresponding
+ to the appropriate xi. This should ensure that, when
+ upgrading the C tables, things don't get too terribly
+ broken.
+ */
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x00000000000000f6, 0x00000000000000f6, 0x3ff0000000000000),
+ (0x512272897288fc3f, 0xbf74c1fa7a394e5a, 0xbfefffe511d2bf60),
+ (0xbf7bf1bf57973983, 0xbf7bf1b122b6a164, 0x3fefffcf31e20334),
+ (0x54b7c50a7a9883cb, 0xbfefff7870aeb34e, 0xbf874920cd9ace22),
+ (0xdace0787a600a1ca, 0x3f8b9ca34b1db23a, 0xbfefff416335fa92),
+ (0x72864769869c2318, 0x3feffeddc1ece35c, 0xbf910931b7190c35),
+ (0xd91723aa83b79cb9, 0xbf942999d22d35b9, 0x3feffe696f1dc7b2),
+ (0x475f423e817765ea, 0x3f96b423c56f5447, 0xbfeffdfc7adf3a9d),
+ (0xe6ea21331235f850, 0xbf9c0c0fa6749f60, 0xbfeffced3639af5a),
+ (0x6d2a3c2098179905, 0xbf9f11a72b55a41b, 0xbfeffc3a7f66553b),
+ (0xf28fa3bd259e1376, 0xbfeffc23303dea74, 0xbf9f71111b7fd616),
+ (0x7f26be7ff1138291, 0xbfa185023632479d, 0xbfeffb33e657eb59),
+ (0x675c32e626080a1e, 0x3feff9b5b177af51, 0xbfa40f7a6e6b7994),
+ (0x52924d2cbb5901d1, 0x3fa48c4b82d66c9b, 0x3feff9666f07db89),
+ (0x417ec533964706ec, 0x3fa64f235d1be278, 0x3feff83843b411af),
+ (0x52107ea0ff1110db, 0xbfeff6a3fc169867, 0x3fa877b2347f0a28),
+ (0xd71b728c627f815d, 0x3faa8d784340586e, 0xbfeff4f9ef7c5107),
+ (0xda739024569252ce, 0x3feff3eee1e5663a, 0x3fabc79919273897),
+ (0xdb96494b55f3f081, 0xbfeff24bb7274d9e, 0x3fad9a5e1c7820a2),
+ (0x739f3c2a746a1b50, 0xbfeff19570cd1d86, 0x3fae5c932d2f3605),
+ (0x447aa716086a8c88, 0xbfb05ec5a49745a1, 0x3fefef3bdef16841),
+ (0xf968280b3a414e54, 0xbfb12ed0ae4aeb09, 0xbfefed86a66a999f),
+ (0xd02fb5bd9a62c870, 0x3fefebdb551c9f8e, 0x3fb1f1029914d91e),
+ (0x60ffe411eafbc6da, 0x3fb245acb57399e6, 0x3fefeb1b39d132e8),
+ (0x4f318e7d4aea96dc, 0xbfefe7f62e48f0eb, 0x3fb398e0aeb9c445),
+ (0xc404978c116309d8, 0x3fb423d6b3b85040, 0x3fefe69c043f3e90),
+ (0x43f2630170779582, 0x3fb5177a797d0e87, 0xbfefe4261d504451),
+ (0x5fdbbf61f00b7f03, 0xbfefe38d527600ca, 0xbfb550ed5bee6d60),
+ (0xc0197a1d458871ce, 0xbfb6018646c93b7b, 0x3fefe1ad70c7618a),
+ (0xfec0c7c70c5fef38, 0xbfb72b204c19e155, 0xbfefde61e65655a4),
+ (0x536118c6d0da429d, 0xbfb813122d1d4d33, 0x3fefdbb21abcadb1),
+ (0x468bde452261d739, 0xbfefd95e3ccfcb94, 0x3fb8d51e4332cdff),
+ (0x640531fd736dc271, 0xbfb9cb4924d9690e, 0xbfefd64f7c8c42d5),
+ (0x45882463c54dff4d, 0x3fba4eb0d809a756, 0xbfefd4a15a83accd),
+ (0xe47b0287f9f402c4, 0x3fbb4f875473f88e, 0xbfefd13fe58f7d94),
+ (0x776a9cb59bc714b1, 0x3fefd05ef4cb673b, 0x3fbb90bc6355a021),
+ (0x6e03d54eae350b2b, 0x3fbc40e4b1fc64b5, 0x3fefcdf4b6ba3d60),
+ (0xd7e13510abfa4b8f, 0xbfbd8c17b8daf635, 0x3fefc9408a0ed455),
+ (0xf5b0b388d525a652, 0x3fbe06c30febf5ed, 0xbfefc774aca78be1),
+ (0xc872046ad7b74e5d, 0xbfefc4762a3df5f2, 0xbfbeceee72729a95),
+ (0xe7de05b2f658e70f, 0xbfbf705c3c62b429, 0xbfefc1fd7092a24b),
+ (0xcc324b95dbc8fd01, 0x3fc0705192fdc54c, 0xbfefbc2939080e96),
+ (0xf721d915cf272a16, 0xbfefbb27b38fdf96, 0xbfc08f4641cf4d68),
+ (0x727b9da90c1b363d, 0x3fefb822c42d1549, 0xbfc0ead5e194ff63),
+ (0x76833092f555af28, 0xbfc16fc26448ec57, 0xbfefb3a2bb1657ce),
+ (0x4aee1902b654832c, 0x3fefb04eed8111ec, 0x3fc1cf8146eca7eb),
+ (0x4baf9ffda9dc11e3, 0x3fefac8684367fbe, 0xbfc239ecfe4bf73d),
+ (0x5657a3059c18f2d9, 0xbfc297363782e038, 0xbfefa922de862569),
+ (0x7bff45dd88b13800, 0xbfc2d478620b4e02, 0xbfefa6df905ff50e),
+ (0x5dee435102358eaf, 0x3fefa1b101e4c684, 0xbfc35dd586187ce4),
+ (0xde9d9f0cc9f77177, 0x3fc3defc1325ae88, 0x3fef9caef7a753cc),
+ (0xfdda28f4488d7a83, 0xbfc415283aea05a2, 0x3fef9a8b282c574d),
+ (0xf50af38a50f30e4c, 0xbfc4730a16260b3b, 0xbfef96c7b1abe323),
+ (0xfb496fa21c023920, 0x3fc4cea2d28cfdfb, 0x3fef930a70b822c6),
+ (0xe505ecdfee427dbd, 0x3fc554226a887900, 0x3fef8d78b9f34b72),
+ (0xeb0d8c0df28dd71b, 0x3fc5b337c0fb9e5e, 0xbfef896b2289c5b2),
+ (0x7509b2527112fd5b, 0xbfc630b105366287, 0x3fef83f5ac088905),
+ (0xffd6c9165e18ac33, 0x3fef81c853e8136a, 0xbfc661f1ad0ce950),
+ (0xe1c86c04b0706291, 0xbfef7d873a6519fe, 0x3fc6c0f2cce48329),
+ (0x7f4ab100fd15ade5, 0x3fc7148e0c9e3182, 0x3fef79b97d3d745b),
+ (0x4b1f0b0b22dfeef3, 0xbfc7c4eeb85ba3ff, 0x3fef718430c69651),
+ (0xcb9bb37fc0f2a235, 0xbfc7d6435e271ea1, 0x3fef70b2445b8479),
+ (0xd6e8414566d4c52e, 0xbfef6a8b61c9b366, 0x3fc856b04dcf03ea),
+ (0xe79fb9553990f8be, 0xbfef64d85f36d000, 0xbfc8cb42fe24375f),
+ (0x6dcb241574906b9f, 0x3fc9451b9809f028, 0x3fef5ec551a7ce80),
+ (0xd35310f2b682fb52, 0x3fc976452c4ba67d, 0x3fef5c492d13c71d),
+ (0x49b456519453bc2e, 0xbfef5770a5a4ac32, 0x3fc9d50bf14496e0),
+ (0x59d7185ea4188d7a, 0x3fca6cd56e57cc8a, 0xbfef4f86fdc2aa14),
+ (0xfddefd05372bf5b1, 0xbfca83aa03c4c548, 0x3fef4e52288add54),
+ (0x7b7322413d2ae5f8, 0x3fef46fc203fc847, 0xbfcb0ccdf3f26a28),
+ (0x6360cbf4cc0e423a, 0xbfcb8abc4d3fbf70, 0xbfef401cf3b2c8ad),
+ (0xc2ef40aac8c0caff, 0xbfef3b40e2b1c574, 0x3fcbe2633bec6338),
+ (0xc2ef40aac8c0caff, 0xbfef3b40e2b1c574, 0x3fcbe2633bec6338),
+ (0x7517b932422321b3, 0xbfcc7c0a66359b8f, 0x3fef329516485b7e),
+ (0x4d40e5da1750b47b, 0x3fccecafe584ca7a, 0x3fef2c1a191c72da),
+ (0xd3eeab6f1ce0c65f, 0x3fef275dde80b9a5, 0xbfcd3ddea87ad082),
+ (0x7332b1c80505c25d, 0x3fcdd243aeac42e6, 0xbfef1e91f34d9e70),
+ (0xdd7f47ace75b1278, 0xbfef1ba6ee071cfd, 0x3fce02d275470464),
+ (0xf6deca320321040f, 0x3fef1188ee0ff435, 0xbfcea8b76971a173),
+ (0xde7018284f03e03f, 0x3fef0acf75273337, 0x3fcf14f3f457127c),
+ (0x5a35f121d3d2a2e7, 0x3fcf3fb1a0b5d9eb, 0x3fef0820c67c621a),
+ (0x5f8789fe1295b431, 0xbfcf801328302267, 0xbfef040f08a74b86),
+ (0xeb60edaa99fb631b, 0xbfcff6c4e0ab7ecf, 0xbfeefc7723cce8e8),
+ (0x740043f1ff2cdc39, 0x3feef58264f03ec2, 0xbfd030f4ca52cdfd),
+ (0x755909d7a66ec123, 0x3fd05b0aacb3547b, 0xbfeeeffa179a7126),
+ (0xce7378a1c330830a, 0xbfd092eb3679cee6, 0xbfeee889fe5671b2),
+ (0x61229f2e883ad502, 0x3feee354dcbf5384, 0xbfd0b993622b89d8),
+ (0xc308cff80474c43c, 0x3fd0f14e25183c0c, 0xbfeedbbc1b54a4a4),
+ (0x5474ef4a0497c293, 0x3fd11f8458f1b696, 0xbfeed55af85d48b3),
+ (0x64a85b58557d81d6, 0xbfd16ead3eb9741d, 0xbfeeca421433b493),
+ (0xf77724543b8b3885, 0x3fd19ed46e8f52e9, 0x3feec36716fdec27),
+ (0x464183db1f445d09, 0x3feec26f93025caa, 0xbfd1a593925c8dc8),
+ (0x778eae02a44000a2, 0xbfeebb73eb5aed84, 0x3fd1d5fba8930f8f),
+ (0x5cdc5f4b5e0d1940, 0xbfd224373cb42c70, 0x3feeafff121ff829),
+ (0x525e3d38881ed63d, 0x3feea6cd7f0438c8, 0xbfd261f9b2e4fa14),
+ (0x5a10e8f3294cb8da, 0xbfeea58f1791757a, 0x3fd26a434e6e77e4),
+ (0x4fc23cca0a71ea3a, 0x3fee9b1b63dc509c, 0x3fd2af49e08c3126),
+ (0xf60fe4b87e3c1da9, 0xbfd2d96cd21892d2, 0xbfee94a51d4d7187),
+ (0xf57c4045e43ccaf9, 0xbfee8e15317f07be, 0x3fd303ce72c3ddcb),
+ (0xfbba53b7ccff6358, 0xbfd337ace7d8abf2, 0x3fee85f74296d316),
+ (0x7c92ab74e6d83f53, 0x3fee7e33cfcfc9c9, 0x3fd368bac190b88b),
+ (0x6aa52ba172654ed1, 0x3fd3a3c7bd76153b, 0x3fee74be8564f496),
+ (0x5c6e0f3ec20f8e7d, 0x3fee6c801bff99d9, 0x3fd3d6a1b9e825a9),
+ (0x44438c0b81cf287e, 0xbfd3f7fb8049f941, 0xbfee670b3f6d116a),
+ (0xe9eeb63337ece529, 0x3fd42bb1af27955a, 0x3fee5e814f8fa4ee),
+ (0xd05cf079494581b4, 0xbfee58a3bd5aaaad, 0x3fd44ee2b0defb88),
+ (0x685f1a87b750f254, 0xbfd4a144f9e741c4, 0xbfee4abc1dcdf720),
+ (0x536b65af59fbc622, 0x3fee477e3635fcdf, 0x3fd4b4458e9e69ef),
+ (0xdd2564daf3c4e467, 0x3fd4f0c8e7efe5da, 0xbfee3d15289f8043),
+ (0x7fa52667c976d82e, 0xbfee3a08ee598072, 0xbfd5025b4e066b88),
+ (0x72f883ad82e4a3ba, 0xbfee2fdf26ce794e, 0xbfd53c7e2146d269),
+ (0xf6e835bc9adeaf59, 0xbfd57c6ac4883fb5, 0x3fee248e035dacb0),
+ (0x42ea8e91bba3889d, 0x3fee202b02dd668d, 0xbfd594f8cca01aba),
+ (0x58b1d3f25c054d48, 0x3fee179309c7e2fe, 0xbfd5c4b8add182bf),
+ (0xe9e2cd65f67e2ee0, 0xbfee0dd7bea36726, 0x3fd5fa3dc3a4f8c7),
+ (0x6b9ffac3db03a107, 0x3fd631fb285e219d, 0xbfee039865991e53),
+ (0x6eb4d607fe115dcf, 0x3fedfa4b939ca0ac, 0x3fd66409024e6323),
+ (0xf7917095f9a51925, 0xbfd69796d53fd17f, 0xbfedf09e862de5d1),
+ (0x7118f8c15e2991c4, 0x3fedeb8848f6cf5b, 0xbfd6b27ba1696913),
+ (0x79b55f3637cc4586, 0xbfd6f5638f450935, 0x3feddec291264c5d),
+ (0x5fd1cfe5b08757a6, 0xbfd70ffb893ab566, 0x3fedd9a2fedb6c24),
+ (0xc733503522b5f461, 0xbfd7554c3b69eebf, 0xbfedcc2869cbf541),
+ (0x446d7c038f34a64a, 0x3fedc88bba34e66a, 0x3fd767b74c87b19e),
+ (0xc3a23c3b9032b8f3, 0x3fd79831044581c9, 0xbfedbefa4c29de60),
+ (0xc48d690c9e9f2305, 0xbfd7e8fd81ac1776, 0xbfedaed4c0b2b8f3),
+ (0x6936ad82b0657c14, 0x3fd7f3be25e63b3f, 0xbfedaca9dc90a41b),
+ (0x4d4c51754594c5a7, 0xbfed9c8f1505a0bd, 0xbfd842f47bbbff65),
+ (0x53af975d069ceebf, 0x3fed98ae77ed0af4, 0xbfd855d9e1b87671),
+ (0xbfd929812210b263, 0xbfd884d5b1521b6a, 0x3fed8efb1e579d8f),
+ (0xf311b32b3a68939e, 0xbfed856f902db836, 0xbfd8b2aaa6160a9f),
+ (0xc8bb45b934289a6f, 0xbfd8e9ef0de3dc36, 0xbfed79d12674cd54),
+ (0xcf7f65187e57f8e0, 0x3fed6bc8263fc275, 0x3fd92bf37a97badc),
+ (0xc8e046f199566a7b, 0x3fed61acdd8efca5, 0x3fd95aff9d017381),
+ (0xe63586b4c42da14f, 0x3fd992a9570938bd, 0xbfed559aec9c0440),
+ (0x7570d9f0f3faa4e7, 0x3fed51bace294110, 0xbfd9a46a32ec9b5d),
+ (0xf6bb3c9916ba9b7b, 0x3fed454543bc2de4, 0x3fd9dd1b108a60cc),
+ (0xd660ed6e55136cdf, 0x3fed37afb7050a25, 0x3fda1a4302c68bf6),
+ (0x429a8fa0df460917, 0x3fed2f17f136daf1, 0xbfda409a52187af7),
+ (0xebf471a9b1b6caa5, 0xbfda64f02f41d321, 0x3fed26e544de1649),
+ (0xe65e21ead3c2ba60, 0xbfed22498d657c09, 0x3fda7942a404499c),
+ (0x6ae3d5db498749f6, 0xbfed0e459db364cf, 0xbfdad0af537af3fc),
+ (0x76bc0ffdf6576c1d, 0xbfdaf074aec69954, 0xbfed06ebcd22c8e7),
+ (0x576061576fcdadfd, 0x3fecfe0996e5d249, 0x3fdb169da2f019c9),
+ (0xe9922b0dd12503ba, 0x3fdb3e68e31cf5d6, 0x3fecf4b5b421be75),
+ (0xf090512b08c64f2b, 0x3fece4479edc03c5, 0xbfdb83d63f9cb587),
+ (0xc9064cce58144c06, 0x3fdbb290774db3ef, 0x3fecd91bf6c9a299),
+ (0xd2c65def3f53a9b7, 0x3fdbd52702603513, 0x3fecd0c83afda1b0),
+ (0xc366379b9a957cdb, 0x3fdc01b0c4b0e4d4, 0x3fecc5fc650dfa55),
+ (0x739d5edf97b83065, 0xbfdc2b7a687242b7, 0x3fecbbc7dbedc827),
+ (0xc40021d66b2f4d61, 0xbfdc4d9c4f8f51e7, 0xbfecb363f1bca281),
+ (0xc76aa2fdbe146487, 0x3feca866fe836c4e, 0xbfdc7a02cee3d6de),
+ (0x7738dad760be3d56, 0xbfec9933d62aa560, 0xbfdcb6e1350e6fb2),
+ (0x666928445c6724e3, 0xbfec91c8c33109d5, 0x3fdcd45bdedee292),
+ (0xc1a41b02da9ba0ea, 0x3fec863b137bd6e1, 0x3fdd01fa8ac7aa24),
+ (0x7848fe3e78cb663c, 0xbfec79f540706d37, 0x3fdd320e30bad112),
+ (0x5be9ac5535d046cc, 0xbfdd69dbc3730780, 0x3fec6b965f72d6d9),
+ (0x7d00750f7493ec14, 0x3fec63edee9d4c7e, 0x3fdd8761eaf89474),
+ (0x70b08a0803a2e4f7, 0x3fddab0ae8940bb3, 0x3fec5aa10a89a08b),
+ (0x55fbd54cc4be5f3c, 0x3fdddcb4a70293c4, 0x3fec4d956bba1326),
+ (0xf3554e8402f6265a, 0x3fde1f406db596ad, 0xbfec3beecfce2cf7),
+ (0xd1c4f6c0576b4ceb, 0x3fde321eb7b4e647, 0x3fec36e47718b312),
+ (0x7d59507f71b8f889, 0xbfec27de7468cb95, 0x3fde6a02a16464fe),
+ (0xdc7e5afce57e9cd7, 0x3fdea90fe172c6c6, 0xbfec16c09e490bad),
+ (0xeb073e7d1af5222d, 0xbfdec9544c37d497, 0xbfec0dec6f134e6c),
+ (0xc60882772b865dc5, 0x3fec07c509e4da5b, 0x3fdedfb80fb87768),
+ (0x75dc9fbbd1640b6a, 0xbfebf854a6961d91, 0xbfdf1787a24fb4b4),
+ (0x633901c12d4cb6e1, 0x3fdf4c84cada936a, 0x3febe98a62583ac2),
+ (0x7f04dacd2c2ca8ce, 0xbfebe3a53f98fb9a, 0xbfdf6182e7252bac),
+ (0x4332ecebbe152c9a, 0xbfebd268556dddea, 0xbfdf9e7c5618737b),
+ (0x5ac66c5d9fa15663, 0x3fdfc21149cd81f9, 0x3febc844c23cdafa),
+ (0xc404cdd7bc992e30, 0x3febb629d34f27fa, 0xbfe0008c6d07e1bd),
+ (0xc53d64204fdf1db3, 0xbfebafa459158ca0, 0x3fe00bd23ae3931f),
+ (0xf9d4afa07a8a6ce7, 0xbfeba1a3c12f4839, 0x3fe023e2f03b637d),
+ (0x637f1d1420fea3f8, 0xbfe0448291240078, 0x3feb8e7b76ad81a3),
+ (0x72eae57421307d45, 0xbfeb849a47abb68b, 0x3fe05533525f37a3),
+ (0xc2e6fa2f34fd750b, 0x3feb7482e375b78d, 0xbfe070318df28781),
+ (0x6d7ed298ca67fbc5, 0x3feb65bbbbb39c6c, 0x3fe088c7245680ca),
+ (0x4d744a5ecf95fd93, 0xbfe08d4873b04602, 0xbfeb630334f1eee3),
+ (0xd0b010cb5af6b5b1, 0xbfe0a62065b173bc, 0x3feb53effed8251e),
+ (0xfa89d1a75f768cd0, 0x3feb42d7b90ec7ad, 0x3fe0c20f7c61cec5),
+ (0x6ce2de1511d237eb, 0x3fe0ce8b7293b359, 0xbfeb3b2725681250),
+ (0x56a33b90bd51784c, 0x3fe0e8cc84a7b798, 0x3feb2ae16eac76c8),
+ (0xd998ed50ac13add1, 0x3feb20901b10690f, 0x3fe0f954fc1fd74a),
+ (0xd2573e1a86722c43, 0xbfeb0fa0f7a8064b, 0xbfe114476b668554),
+ (0x498c3881301c9450, 0xbfeafe2803de2552, 0xbfe12fd761d178a8),
+ (0x676028d2e9aa9de4, 0x3feaf378b320bee9, 0xbfe14093da4a17db),
+ (0x4e22da5810e48e2c, 0xbfeae18365d0ce13, 0xbfe15c81b1c187a4),
+ (0xfc2b1173d2f9d7c5, 0x3fe16349ac5ca8bf, 0x3feadd20fb79cee5),
+ (0x7473a4f18d083564, 0xbfe1853ad10a9564, 0x3feac70a53c134e8),
+ (0xfb85676d181e0881, 0xbfe19133a8342b55, 0xbfeabf312f9ba36c),
+ (0x6269a55fd3dcfce2, 0x3fe19f8d1330bb0a, 0xbfeab5befc7864f0),
+ (0x4598ddfdd67bf239, 0x3fea9eb39a3896ce, 0x3fe1c248efe0508e),
+ (0xd95338bde4d930aa, 0xbfe1d9d6772be796, 0x3fea8eee09962254),
+ (0x611503484c027608, 0x3fea8b32e1adee9a, 0xbfe1df6247815710),
+ (0x5c2ee2e8eb412270, 0xbfe1f6da5410afd9, 0x3fea7b56645112df),
+ (0xcb58f3d014fca0aa, 0xbfea66537cfebec2, 0x3fe215acd67cbd00),
+ (0x43c6b36129529847, 0xbfea610121a2376e, 0xbfe21d6f232602fd),
+ (0xfa1614294a9f72c8, 0x3fea49e8d6077dc7, 0x3fe23ee2fa2c432c),
+ (0x473b0851ec7a71fc, 0xbfea3ae9bc9717ab, 0xbfe2546b8f878bb2),
+ (0xf6d74bab0f0688d3, 0x3fe26ad4d6d860df, 0x3fea2b3235d28e02),
+ (0xe9bee3b9d2c67c03, 0xbfe26e2812e3c037, 0x3fea28dacda84df7),
+ (0xcc92e4c94a84e291, 0x3fe285c9915a00bf, 0xbfea1824b911e6c7),
+ (0xf8aab297bc40c6d3, 0xbfe2a8efe687cdd7, 0x3fe9ff0dac00a59e),
+ (0x6bfb68a736eeb1b6, 0x3fe2b1176d2ab489, 0x3fe9f931533ac9e4),
+ (0xcf94cd7e76b5d43b, 0x3fe9eefebc7842b3, 0x3fe2bf3ae0fb4f01),
+ (0x58e19d8279e73d8c, 0x3fe2e269a7c9fff8, 0x3fe9d56b655c37fc),
+ (0x4c2f18aaaae1a9a5, 0x3fe9c6856bb54b0c, 0x3fe2f6ba53166329),
+ (0x4473bbb450eb9885, 0xbfe9bd919cb7c452, 0xbfe302df564ec155),
+ (0x5352f27ad3264f74, 0xbfe3105c54f5cb4c, 0xbfe9b395db2fe1b5),
+ (0xd70c16903457ac91, 0xbfe32d9f9dbb21f9, 0xbfe99dc756e89b3d),
+ (0xcac2e8c8c586b2d1, 0xbfe345aa02d0e659, 0xbfe98bb610c6fd7a),
+ (0x72d7978807cae706, 0xbfe35d2b78fbd0b9, 0x3fe979e9612ca3a5),
+ (0xe6c2f09d247a94a9, 0xbfe36b250c0e3485, 0x3fe96f44150abf2c),
+ (0xfa6723c5149699bc, 0xbfe95f43f91aa66f, 0xbfe380085ac23ee0),
+ (0xff8223b58fd12c2f, 0x3fe94bba9f307406, 0xbfe39959860b58ec),
+ (0x75296e4dd81e95f4, 0x3fe3a3e75e16aa44, 0x3fe94389b95f2efa),
+ (0x7e5b65a329da9204, 0x3fe3c1f15c9eb331, 0xbfe92c12b84a4ade),
+ (0xee3f0726908f6f7e, 0xbfe91cc6ea4e16dc, 0x3fe3d55ed2290c82),
+ (0x7af275b9d87202c4, 0x3fe3d7db0cdad96d, 0x3fe91ad03ab42e84),
+ (0x58404c6817fb0387, 0xbfe3ecf399bb98a3, 0x3fe90a151c0f3a99),
+ (0x4e24e759fc383d3b, 0x3fe8f9c1d2e98eb7, 0xbfe401664bfe107a),
+ (0x6e6e238154fe510b, 0xbfe8dc876efe2cea, 0xbfe425ad5d3088a4),
+ (0x51115721fcb875a5, 0x3fe8ce38b96c431d, 0xbfe4374857dced36),
+ (0xcd0f08fce53215fe, 0x3fe44b7fa906b663, 0x3fe8bdb137695bac),
+ (0xc793de849b92e545, 0x3fe8ae29fd1807b2, 0xbfe45e5f09a79710),
+ (0xf14f3d838006f1f3, 0x3fe8a0e6c0cb53d4, 0xbfe46e665c9bfa98),
+ (0x485b3050ec24e894, 0xbfe89ac1501c4664, 0x3fe475ccc67a5a6f),
+ (0x53cdd772c04825b7, 0xbfe499d932904180, 0xbfe87c9a6ee0e5f4),
+ (0xf3830e1d9fcb8a63, 0x3fe49ee7f784dc34, 0xbfe878583c6d1b17),
+ (0x4fd516dfb479c284, 0xbfe4b8fa646a8702, 0x3fe86247ec1b5405),
+ (0x401c9636ba305733, 0x3fe852f4e454503a, 0x3fe4caf517981ddf),
+ (0x74e6629d161dcdf0, 0x3fe84b4f58913c4f, 0x3fe4d3e3c394b4c3),
+ (0xc831ccafeb5988ca, 0xbfe4e9f00dc7029d, 0x3fe8385704515fc8),
+ (0xea8b6c2e5b7438e9, 0xbfe821d634afb0ed, 0x3fe503e361ffde43),
+ (0x50ddbc2d3d77331b, 0xbfe51e8f21a24dd4, 0xbfe80a82564c99e1),
+ (0xd31f8e19edda2f9a, 0x3fe802894421cfd0, 0x3fe5279f1e05c5c6),
+ (0xfbb16b339d0a17fe, 0xbfe542cec6203a63, 0xbfe7ea79c4f28160),
+ (0xfe93c58449c8fd51, 0x3fe7e653ec9effe6, 0xbfe547784cf0eeb5),
+ (0xe125d9535e1d7d96, 0x3fe56a646c9bc577, 0xbfe7c70e160b76ba),
+ (0xe9492784719b40f6, 0xbfe7bd8d74e13697, 0x3fe574eca79f247d),
+ (0x76693dbf45a5c11c, 0x3fe7a6aca203b146, 0x3fe58e21d55361dd),
+ (0xca01a90ba147a7a8, 0x3fe7a0e9b1cc2382, 0x3fe594727d17c430),
+ (0xeba7b196f2ce8a86, 0x3fe792dcb3fd68a0, 0x3fe5a3caee7bef45),
+ (0x65cec0767168cebc, 0xbfe781dbdad094ee, 0xbfe5b64237c79451),
+ (0xe46e9ddaa8d41951, 0xbfe76df89a6184ca, 0x3fe5cbb6beaf19d3),
+ (0xc394901bb60cd165, 0x3fe5e7e07a75103e, 0xbfe753a5e9dd67ee),
+ (0xefe39e4669e6b8e3, 0xbfe5ee1e7c908e91, 0xbfe74dc7ad48a603),
+ (0x6606d5cab1344ed4, 0xbfe60bbb919e2739, 0x3fe731c5d36a20ec),
+ (0xe151f4cddef4501f, 0xbfe7201c54d669d7, 0xbfe61e41c6dae1e5),
+ (0xd9381aa73bb196ba, 0xbfe719018e9ef492, 0xbfe625acfdb6514e),
+ (0x4de08bca51af503b, 0xbfe7060f07286185, 0xbfe6395ed52b7540),
+ (0x43ba67829cbf5ddd, 0xbfe64d04587eee54, 0xbfe6f307ef31a64f),
+ (0x51c3d8b11e8e3186, 0x3fe664fbc2d6363c, 0x3fe6dba5765c9f8b),
+ (0x762158955b8d3e1f, 0x3fe67b88d77e47ab, 0xbfe6c577911c77f9),
+ (0x7e1f8126bc820e3d, 0x3fe685a6ed43a0f0, 0x3fe6bb75e2686a13),
+ (0x77c0705a72b5023d, 0x3fe69d4e28e80836, 0x3fe6a3ee27f25da6),
+ ][:]
+
+ for (x, ys, yc) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var rsf1, rcf1, rsf2, rcf2
+ (rsf1, rcf1) = math.sincos(xf)
+ rsf2 = math.sin(xf)
+ rcf2 = math.cos(xf)
+
+ var rsu1 = std.flt64bits(rsf1)
+ var rcu1 = std.flt64bits(rcf1)
+ var rsu2 = std.flt64bits(rsf2)
+ var rcu2 = std.flt64bits(rcf2)
+
+ testr.check(c, rsf1 == rsf2 && rcf1 == rcf2,
+ "sincos(0x{b=16,w=16,p=0}) is (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), individual results (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, rsu1, rcu1, rsu2, rcu2)
+
+ testr.check(c, same64(rsu1, ys) && same64(rcu1, yc),
+ "sincos(0x{b=16,w=16,p=0}) should be (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), was (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, ys, yc, rsu1, rcu1)
+ ;;
+}
+
+const sincos05 = {c
+ testr.check(c, std.isnan(math.cos(std.flt64nan())), "cos(NaN64) should be NaN")
+ testr.check(c, std.isnan(math.sin(std.flt64nan())), "sin(NaN64) should be NaN")
+ testr.check(c, std.isnan(math.cos(std.flt32nan())), "cos(NaN32) should be NaN")
+ testr.check(c, std.isnan(math.sin(std.flt32nan())), "sin(NaN32) should be NaN")
+
+ var s1, c1, s2, c2
+ (s1, c1) = math.sincos(std.flt64nan())
+ testr.check(c, std.isnan(s1), "sincos(NaN64) should be (NaN, NaN)")
+ testr.check(c, std.isnan(c1), "sincos(NaN64) should be (NaN, NaN)")
+ (s2, c2) = math.sincos(std.flt32nan())
+ testr.check(c, std.isnan(s2), "sincos(NaN32) should be (NaN, NaN)")
+ testr.check(c, std.isnan(c2), "sincos(NaN32) should be (NaN, NaN)")
+}
diff --git a/lib/math/test/tan-impl.myr b/lib/math/test/tan-impl.myr
new file mode 100644
index 0000000..3901fe7
--- /dev/null
+++ b/lib/math/test/tan-impl.myr
@@ -0,0 +1,494 @@
+use std
+use math
+use testr
+
+const main = {
+ math.fptrap(false)
+ testr.run([
+ [.name="tan-cot-01", .fn = tancot01], /* flt32 */
+ [.name="tan-cot-02", .fn = tancot02], /* flt64 */
+ [.name="tan-cot-03", .fn = tancot03], /* off-by-1-ulp quarantine */
+ [.name="tan-cot-04", .fn = tancot04], /* exhaustively test C */
+ [.name="tan-cot-05", .fn = tancot05], /* NaN handling */
+ ][:])
+}
+
+const same32 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt32frombits(a)) && std.isnan(std.flt32frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const same64 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt64frombits(a)) && std.isnan(std.flt64frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const tancot01 = {c
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x00000000, 0x00000000, 0x7f800000),
+ (0x80000000, 0x80000000, 0xff800000),
+ (0x01000000, 0x01000000, 0x7e000000),
+ (0x3c000000, 0x3c0000ab, 0x42fffeab),
+ (0x9e95c3a1, 0x9e95c3a1, 0xe05acc1d),
+ (0xb77e2471, 0xb77e2471, 0xc780ef84),
+ (0xd6fb3787, 0x3fc3e4b0, 0x3f274655),
+ (0x73beb86b, 0xc03fdfc0, 0xbeaac75a),
+ (0x78e127aa, 0xbd743d4f, 0xc18629d5),
+ (0x495cd1f0, 0xc036296b, 0xbeb3e251),
+ (0x3f5bddd1, 0x3f9455b7, 0x3f5ce7d8),
+ (0xf1a5d464, 0x3faec70a, 0x3f3b7bed),
+ (0x4b877ee5, 0xbea1d1b9, 0xc04a7f60),
+ (0x91716066, 0x91716066, 0xed87c133),
+ (0x78b5b2be, 0xbf33a6ca, 0xbfb665c7),
+ (0xd697d28b, 0x3f2a1d37, 0x3fc09fa6),
+ (0x2faeb840, 0x2faeb840, 0x4f3b8bcc),
+ (0x652c1dfc, 0x3e06264d, 0x40f443bd),
+ (0xcc2cba6f, 0x41b5c9c3, 0x3d3440f9),
+ (0x7e10c1ed, 0x3f3ae4bf, 0x3faf546b),
+ (0xe65e6ac1, 0xbeff4f29, 0xc00058a9),
+ (0xa249358f, 0xa249358f, 0xdca2daf9),
+ (0xedfabd59, 0x41135f72, 0x3dde58fe),
+ (0xe5041607, 0x40162a53, 0x3eda367b),
+ (0xe7f766dc, 0x421af490, 0x3cd377a8),
+ (0x44ad2e60, 0x3be0be50, 0x4311cd45),
+ (0x02c42753, 0x02c42753, 0x7c270d81),
+ (0xa6383652, 0xa6383652, 0xd8b1e1bf),
+ (0x96b84e4b, 0x96b84e4b, 0xe831ca9c),
+ (0x010937a9, 0x010937a9, 0x7deecdb2),
+ (0x0a350f6e, 0x0a350f6e, 0x74b4fa79),
+ (0xc7aed7a1, 0x3ee7a189, 0x400d7765),
+ (0xf59d39e3, 0xbd1aa1aa, 0xc1d3e906),
+ (0xf4532cda, 0x3e247e88, 0x40c7345b),
+ (0xeec7c1c9, 0xbe7caa4b, 0xc081b07d),
+ (0x2c8b7263, 0x2c8b7263, 0x526afc52),
+ (0xa7f2d525, 0xa7f2d525, 0xd706f0d2),
+ (0x927fced4, 0x927fced4, 0xec80189b),
+ (0xe4db95ed, 0x3e092efb, 0x40eedcce),
+ (0xfcc97783, 0x3ffc8e75, 0x3f01bec8),
+ (0xa5e3ccd3, 0xa5e3ccd3, 0xd90fd86d),
+ (0xf4264176, 0x4077503e, 0x3e847eee),
+ (0x77c8a8a7, 0x3f261644, 0x3fc54b51),
+ (0xf6f29a5e, 0xbef8da53, 0xc003ad1d),
+ ][:]
+
+ for (x, yt, yc) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var rtf, rcf
+ rtf = math.tan(xf)
+ rcf = math.cot(xf)
+
+ var rtu = std.flt32bits(rtf)
+ var rcu = std.flt32bits(rcf)
+
+ testr.check(c, same32(rtu, yt),
+ "tan(0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, yt, rtu)
+
+ testr.check(c, same32(rcu, yc),
+ "cot(0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, yc, rcu)
+ ;;
+}
+
+const tancot02 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x0000000000000000, 0x0000000000000000, 0x7ff0000000000000),
+ (0x8000000000000000, 0x8000000000000000, 0xfff0000000000000),
+ (0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0xc35057584c429b3a), /* [GB91]'s "Xhard" */
+ (0x5101000000000000, 0xbff4f77bbc53c8f9, 0xbfe86b6d64c43ec0),
+ (0x4b01000000000000, 0xbfe96f60bbc6c837, 0xbff421332f057cb5),
+ (0xc3a19e952471b77e, 0xbfecc29cbf8278ef, 0xbff1cd6894a73f54),
+ (0x3787d6fbb86b73be, 0x3787d6fbb86b73be, 0x48557a09a4de8cf1),
+ (0x27aa78e1d1f0495c, 0x27aa78e1d1f0495c, 0x5833574ee69506a6),
+ (0x7ee54b8760669171, 0xbfe9c97ba8512160, 0xbff3dadcdaf63a4f),
+ (0xb2be78b5d28bd697, 0xb2be78b5d28bd697, 0xcd20cd75656cae19),
+ (0xb8402fae1dfc652c, 0xb8402fae1dfc652c, 0xc79fa1bcab9bfa3a),
+ (0xba6fcc2cc1ed7e10, 0xba6fcc2cc1ed7e10, 0xc5701a13dad38280),
+ (0x6ac1e65e358fa249, 0x3fc7cd0dbfe6d3df, 0x401582ff610d2706),
+ (0xbd59edfa1607e504, 0xbd59edfa1607e504, 0xc283beeb209192c9),
+ (0x66dce7f72e6044ad, 0x3ff4fbabd2ab23f9, 0x3fe8668db91e9b7e),
+ (0x275302c43652a638, 0x275302c43652a638, 0x588aee9adce0c415),
+ (0x4e4b96b837a90109, 0xc01f79ba64664aea, 0xbfc04441334c23a7),
+ (0x0f6e0a35d7a1c7ae, 0x0f6e0a35d7a1c7ae, 0x70710b440e1aaef5),
+ (0x39e3f59d2cdaf453, 0x39e3f59d2cdaf453, 0x45f9a6ebd03c22d5),
+ (0xc1c9eec772632c8b, 0x3ff0f7dd116e0647, 0x3fee2c8f34e6790d),
+ (0xd525a7f2ced4927f, 0x3ff9b655c6a6b5fe, 0x3fe3e9a613743163),
+ (0x95ede4db7783fcc9, 0x95ede4db7783fcc9, 0xe9f1209007b04e55),
+ (0xccd3a5e34176f426, 0x3fde5ad1ab611c65, 0x4000de012b1b5082),
+ (0xa8a777c89a5ef6f2, 0xa8a777c89a5ef6f2, 0xd735d128ee518a7a),
+ (0x0e80d207f7fc1dee, 0x0e80d207f7fc1dee, 0x715e706d3d44d3cb),
+ (0xdc3ccf9de59c1620, 0xc025240a96dce158, 0xbfb837f561f3466c),
+ (0xc53a12760a738704, 0xbfd33aa6f3b5cde8, 0xc00aa05505b58fed),
+ (0x013d7cd6b38fd663, 0x013d7cd6b38fd663, 0x7ea15cfab65b2026),
+ (0x24c9826368e2bab6, 0x24c9826368e2bab6, 0x5b141232d64139ff),
+ (0x673ee065b77f3411, 0xc002680c95540481, 0xbfdbd0fc321d37ee),
+ (0x8648f8f528372eef, 0x8648f8f528372eef, 0xf99480a7cbbaf84f),
+ (0x70bdc29525e17f3d, 0xbfcab0378314cf75, 0xc0132f3505ba9e89),
+ (0x949d1ddd9d2c70d9, 0x949d1ddd9d2c70d9, 0xeb41959d6300f232),
+ (0xed7a82c886f9b2b5, 0xbffc586c9dee2029, 0xbfe210199dcba8ea),
+ (0x953f916c39b9967c, 0x953f916c39b9967c, 0xeaa0380b8daa847f),
+ (0x0e43ed486cdf330c, 0x0e43ed486cdf330c, 0x7199b1a551a932e8),
+ (0xb626a5410afb3c6e, 0xb626a5410afb3c6e, 0xc9b69bfcb4e83197),
+ (0x9e3633326b5d0ef3, 0x9e3633326b5d0ef3, 0xe1a71025b63c78d7),
+ (0xf5c8913f8e308a65, 0xbfcb0c899ca207ec, 0xc012edba9bf64e3d),
+ (0xaf1c2b5cfb3e9c97, 0xaf1c2b5cfb3e9c97, 0xd0c22cfe8b4e4af6),
+ (0xae2582384f4c4364, 0xae2582384f4c4364, 0xd1b7cdea08fb3afc),
+ (0x13a48905d9e021e7, 0x13a48905d9e021e7, 0x6c38eec83f657958),
+ (0x764d46c054f3e5f8, 0xc00236c5e85323a3, 0xbfdc1c3d52cfee80),
+ (0x1c8142b5bce1d2a8, 0x1c8142b5bce1d2a8, 0x635da9b7849a7998),
+ ][:]
+
+var n = 0
+ for (x, yt, yc) : inputs
+n++
+ var xf : flt64 = std.flt64frombits(x)
+ var rtf, rcf
+ rtf = math.tan(xf)
+ rcf = math.cot(xf)
+
+ var rtu = std.flt64bits(rtf)
+ var rcu = std.flt64bits(rcf)
+
+ testr.check(c, same64(rtu, yt),
+ "tan(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, yt, rtu)
+
+ testr.check(c, same64(rcu, yc),
+ "cot(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, yc, rcu)
+ ;;
+}
+
+const tancot03 = {c
+ var inputs : (uint64, uint64, uint64, uint64, uint64)[:] = [
+ (0xf83b13a6a142b6d5, 0xbf5a86f73542c78a, 0xc0834d0a344cbe85, 0xbf5a86f73542c789, 0xc0834d0a344cbe85),
+ (0x3f80000000000000, 0x3f800015557777af, 0x405fffd55549f49b, 0x3f800015557777af, 0x405fffd55549f49a),
+ (0x77709d0579b8480f, 0x40657b00018b5542, 0x3f77d5ea4c97113d, 0x40657b00018b5543, 0x3f77d5ea4c97113d),
+ (0xd5de89ca329a1145, 0x4068d90e2dd81e00, 0x3f749afa8f6b9cf9, 0x4068d90e2dd81e00, 0x3f749afa8f6b9cf8),
+ ][:]
+
+ for (x, yt_perfect, yc_perfect, yt_acceptable, yc_acceptable) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var rtf, rcf
+ rtf = math.tan(xf)
+ rcf = math.cot(xf)
+
+ var rtu = std.flt64bits(rtf)
+ var rcu = std.flt64bits(rcf)
+
+ testr.check(c, (same64(rtu, yt_perfect) || same64(rtu, yt_acceptable)),
+ "tan(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, will also accept 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, yt_perfect, yt_acceptable, rtu)
+
+ testr.check(c, (same64(rcu, yc_perfect) || same64(rcu, yc_acceptable)),
+ "cot(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, will also accept 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, yc_perfect, yc_acceptable, rcu)
+ ;;
+}
+
+const tancot04 = {c
+ /*
+ There should be one of these for each j, each corresponding
+ to the appropriate xi. This should ensure that, when
+ upgrading the C tables, things don't get too terribly
+ broken.
+ */
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x3c79565a4a53b896, 0x3c79565a4a53b896, 0x43643514e5e6151d), /* j = 0 */
+ (0x521071413389fce0, 0xc069c888b4021d3b, 0xbf73db97f25fd281),
+ (0xbf7c2388653f944a, 0xbf7c23a56814d6a2, 0xc06231fa9c94d936),
+ (0x503ece41b1165990, 0x40564720618519f6, 0x3f86fb83eb45eb36),
+ (0xf549b46989519196, 0x405362a6d937767d, 0x3f8a6964419f2412),
+ (0xcd512459433a0231, 0x3f91b4eac5967922, 0x404cea6483e23c58),
+ (0xcaf4adbc5ee7cd89, 0xc0483c7c5380436e, 0xbf952017aadab226),
+ (0x475f423e817765ea, 0xbf96b5919e08c282, 0xc0468bbe82b62f6c),
+ (0xfcf1de6e51eb3a2a, 0xbf9a4811a09dd560, 0xc0437b3b137bbcbf),
+ (0x6d2a3c2098179905, 0x3f9f1551031330ae, 0x404078cd715b3aff),
+ (0x62e01a08ac6db91f, 0x403e0e0bcee51c2c, 0x3fa109172f8fb6e5),
+ (0x5516368407fe18ac, 0xbfa2a9ddeb41fdb2, 0xc03b6ee3c2e19447),
+ (0x5c7c4e1923c950ec, 0x403a340e131a9b8c, 0x3fa38a1c47557d66),
+ (0x52924d2cbb5901d1, 0x3fa490894ccf2c23, 0x4038e5ac3c9ea8a7),
+ (0x77352e83554977a2, 0x3fa741604124d6ca, 0x40360433cfc3b416),
+ (0xeab236c01f08c19f, 0x4034ed5f1b1f4d57, 0x3fa8773a05bad8e8),
+ (0x54b55622172ba5a0, 0x3faa765b3f6b8f73, 0x40335927794e9bd3),
+ (0xda739024569252ce, 0x40326751a22b67ca, 0x3fabd216c378b36d),
+ (0xdb96494b55f3f081, 0xc0314440acaa21e0, 0xbfada7110aa502e2),
+ (0x5a446ae100d4ad56, 0x40308389b1b086a8, 0x3faf011c18275726),
+ (0x447aa716086a8c88, 0xbfb0675ddd971236, 0xc02f365afa1f8a00),
+ (0xf968280b3a414e54, 0x3fb138c1ea322780, 0x402dbadbee341cac),
+ (0x79a51c7ac9f069e8, 0x402d72743ce098d6, 0x3fb1631a402c8bcd),
+ (0x60ffe411eafbc6da, 0x3fb251a2b23e2026, 0x402bf304e674f736),
+ (0x4f318e7d4aea96dc, 0xc02a0caf0e7cbf34, 0xbfb3a7a47204e201),
+ (0xc404978c116309d8, 0x3fb433de58d623c0, 0x402957dfc56925ab),
+ (0xe66830cbfd75b885, 0x4028733a310ce62e, 0x3fb4f0cb6b3bb391),
+ (0x5aa8e5159f4c635a, 0xc027399eacbe42ec, 0xbfb60b8e0f2a47b3),
+ (0xc0197a1d458871ce, 0xbfb616745039ebcf, 0xc0272e28bde38084),
+ (0xfec0c7c70c5fef38, 0x3fb74390e9cea2bf, 0x4026022137bd7bbb),
+ (0x536118c6d0da429d, 0xbfb82e815bcb8454, 0xc0252c4e4ad03fb7),
+ (0xc0c5332662ea8467, 0xbfb8fb3bbbe9507a, 0xc0247ec9aabe16a4),
+ (0x640531fd736dc271, 0x3fb9ed0fe38c96f4, 0x4023bf9d7f18200a),
+ (0x4223b2a56016896f, 0x3fbaa24c8e7074dc, 0x4023393b6983f060),
+ (0xe47b0287f9f402c4, 0xbfbb77a83aebd88c, 0xc022a3e8d92ebb66),
+ (0x720e26846afc74a5, 0xbfbbdb0ae1cd9bfc, 0xc02261674852b84a),
+ (0x6e03d54eae350b2b, 0x3fbc6d5994a2db4d, 0x402202cdbfabb7c2),
+ (0xe223c42c81a9e6a1, 0x3fbde6dc34d255c1, 0x40211f6a57a2ab8b),
+ (0xf5b0b388d525a652, 0xbfbe3c30082a93b9, 0xc020ef17c4c56fb5),
+ (0xc872046ad7b74e5d, 0x40207f837f2bb080, 0x3fbf08ac1c355dae),
+ (0x69df9989dca89d7f, 0x401fbfeee53b8ab8, 0x3fc0204931062ab3),
+ (0xcc324b95dbc8fd01, 0xbfc09375925b4df4, 0xc01ee3546f5cc75d),
+ (0xf721d915cf272a16, 0x401ea89e0f1b1f2b, 0x3fc0b333f1b3aa2d),
+ (0x727b9da90c1b363d, 0xc01dffd43f5fbb8a, 0xbfc11129f51ec3b0),
+ (0x76833092f555af28, 0x3fc199c2dfe24290, 0x401d17019925c551),
+ (0x4aee1902b654832c, 0x401c77a2efc4a270, 0x3fc1fc4ba79a254b),
+ (0xfc53224870aff4ac, 0xc01bf5d5327767d8, 0xbfc24fcac6864d6e),
+ (0x5657a3059c18f2d9, 0x3fc2ca3784efa8a1, 0x401b3fa8abd32c27),
+ (0x7bff45dd88b13800, 0x3fc3097e149ad48c, 0x401ae516e8c1b891),
+ (0x5dee435102358eaf, 0xc01a220c17076f81, 0xbfc39793235ac8be),
+ (0xde9d9f0cc9f77177, 0x3fc41d6a02d1d67d, 0x4019742a3de2e8a8),
+ (0xfdda28f4488d7a83, 0xbfc455a0bdcd9658, 0xc0192dcc96c09118),
+ (0xf50af38a50f30e4c, 0x3fc4b7278305fffc, 0x4018b742599f99bd),
+ (0x5cb2267a738b9206, 0xc01802b6ce056aa1, 0xbfc552ec0075b902),
+ (0xc989143e4bf260b6, 0x4017a9b2fb7a0491, 0x3fc5a32358ce4607),
+ (0xf819600bb7324a38, 0xbfc5fc033f10df52, 0xc0174a0a066b5f52),
+ (0x7509b2527112fd5b, 0xbfc688076788f691, 0xc016b9503b90cf6c),
+ (0x73028e0de36d1132, 0xc01642d4ff378b19, 0xbfc6fff2ee5689dc),
+ (0xe1c86c04b0706291, 0xc01624b91bb4a760, 0xbfc71f38e7f04bea),
+ (0x423c927a3f264a46, 0xbfc7934c85e3eedd, 0xc015b7b260999f02),
+ (0x4b1f0b0b22dfeef3, 0xbfc830a4286c0b3e, 0xc0152a6fb053767d),
+ (0xcb9bb37fc0f2a235, 0xbfc842e95009eb8c, 0xc0151a7f4f9abae8),
+ (0xd6e8414566d4c52e, 0xc014a7191ae3c278, 0xbfc8ca796bb67c9c),
+ (0xe79fb9553990f8be, 0x401442516c878b1e, 0x3fc945cc6022d782),
+ (0x6dcb241574906b9f, 0x3fc9c6fc0108ec47, 0x4013dcc98c43328e),
+ (0x4bb2a2d62ad20180, 0xbfca2782885fb0ee, 0xc013937b923e0d8d),
+ (0xe62474e683c786e6, 0xc0134de7ff797441, 0xbfca85c66da38626),
+ (0x468f3f43adc7a648, 0xbfcb08c355d5d83b, 0xc012f05f2b6d3816),
+ (0xfddefd05372bf5b1, 0xbfcb1a25e121b44c, 0xc012e439221e5f3e),
+ (0xde30e787f515ee90, 0x3fcbe4216eeb9065, 0x40125b6a11095216),
+ (0x6360cbf4cc0e423a, 0x3fcc33d9e47482b3, 0x4012278629c47e06),
+ (0x78a35210c57021ed, 0xc011f1def6fbdf9c, 0xbfcc882c7fe33bf1),
+ (0x431b628966118011, 0x4011c83151013186, 0x3fcccb0c5fbba2bf),
+ (0x7517b932422321b3, 0xbfcd37982a92dd89, 0xc01186214075d675),
+ (0x71ce2347503f237b, 0xbfcdef2b5b49c97d, 0xc0111aa996e77d3d),
+ (0xd3eeab6f1ce0c65f, 0xc0110bd5cf4078be, 0xbfce0934fc2e3a6e),
+ (0x7332b1c80505c25d, 0xbfceaa4a800b7861, 0xc010b24a9e60f783),
+ (0x50afa8bba1d961cd, 0x3fcef110d2a3c406, 0x40108c19c7572e79),
+ (0xf6deca320321040f, 0xc01036b3933681fe, 0xbfcf9409f3ae8d15),
+ (0xde7018284f03e03f, 0x400ff58ecd97b1cc, 0x3fd0053a4dea9fb4),
+ (0x5a35f121d3d2a2e7, 0x3fd01ca658f621d6, 0x400fc7193193f7c8),
+ (0x5f8789fe1295b431, 0x3fd03ff9c0820c6f, 0x400f82043cd6360d),
+ (0x6b03c18f1a7b6e8a, 0x3fd0a52e5904da84, 0x400ec271b160fcbc),
+ (0x67e5a3e87999944c, 0xbfd0c25d63bbab57, 0xc00e8ce1807d4ce5),
+ (0x755909d7a66ec123, 0xbfd0eada0993264b, 0xc00e43c47922f882),
+ (0xce7378a1c330830a, 0x3fd128c5fbe43f9f, 0x400dd68d8dd1fc71),
+ (0x61229f2e883ad502, 0xc00d8c9aec55bb8d, 0xbfd153b704e7a90b),
+ (0xced4e84706b222f4, 0x400d30974b14de74, 0x3fd18a5597eef25d),
+ (0x5474ef4a0497c293, 0xbfd1c55dfa04175a, 0xc00ccfa05aea5cad),
+ (0x64a85b58557d81d6, 0x3fd21e0ade9fa6aa, 0x400c429c5c6d1fd2),
+ (0xf77724543b8b3885, 0x3fd2542b2e3b002c, 0x400bef2809369f45),
+ (0x464183db1f445d09, 0xc00be3999eb1362e, 0xbfd25bc3761d6ba4),
+ (0x778eae02a44000a2, 0xc00b91a5151bc992, 0xbfd29256a1f0ff97),
+ (0x5cdc5f4b5e0d1940, 0xbfd2ead9b759215a, 0xc00b10a741a8f24e),
+ (0x525e3d38881ed63d, 0xc00aadb8d95bd5b5, 0xbfd3310047d6f099),
+ (0x5a10e8f3294cb8da, 0xc00aa0a28dbee61b, 0xbfd33a6ef6650b66),
+ (0x69f6fc4f0ca39ec6, 0x3fd39ac21be3213c, 0x400a1dcdba18b71c),
+ (0xf60fe4b87e3c1da9, 0x3fd3b963dc04a281, 0x4009f53e9c589989),
+ (0xf57c4045e43ccaf9, 0xc009b5ddc506f67d, 0xbfd3ea0305520474),
+ (0x73c33cf71af08874, 0x40097b0f54c56a4e, 0x3fd417f89e98496b),
+ (0xfdbeacaff2a4886d, 0xbfd453456f2c976b, 0xc00930b7f6d7f86a),
+ (0xf6c928e27302222b, 0xbfd4a6a6d3b97c7e, 0xc008cb029bb3ec39),
+ (0x5c6e0f3ec20f8e7d, 0x4008898dbbd99fd5, 0x3fd4ddbdab41d462),
+ (0x44438c0b81cf287e, 0x3fd50496c406f2ca, 0x40085c335bad6163),
+ (0xe9eeb63337ece529, 0x3fd540fcb11eb4a5, 0x400816f9a23c2445),
+ (0xd05cf079494581b4, 0xc007e89c0579cff4, 0xbfd56a345270dd18),
+ (0xfeb5da20af64f306, 0x4007a27c85a6b271, 0x3fd5a9bdc193fe25),
+ (0x536b65af59fbc622, 0x40076647dcdfdc29, 0x3fd5e17aa3dc4f94),
+ (0xfcd5a50b4eafe1f6, 0x3fd638c6e1863d76, 0x40070a5b2ba01912),
+ (0x7e36add9e924c14b, 0x3fd66668ebe3fd5a, 0x4006db6b2431c502),
+ (0x72f883ad82e4a3ba, 0x4006be62fab1f37f, 0x3fd68300c86cfbc2),
+ (0xf6e835bc9adeaf59, 0xbfd6cf5164e5b38b, 0xc006724b03e11174),
+ (0x42ea8e91bba3889d, 0xc0065580a01f30a5, 0xbfd6ecb8d0b4f4bd),
+ (0xe28b1ead713c1846, 0x4005f81eef90c072, 0x3fd74e2a329ce9c5),
+ (0x5bf9ffb760361ee7, 0x4005ddd8910dabf0, 0x3fd76a2b2380d2ae),
+ (0x6b9ffac3db03a107, 0xbfd7a9f1a170ad6f, 0xc005a2ea1057b86a),
+ (0x6eb4d607fe115dcf, 0x40056be6a4971a46, 0x3fd7e6b7460e031b),
+ (0xf7917095f9a51925, 0x3fd8258aa65c5906, 0x4005342a6a586d87),
+ (0x76d55c397cac2f9d, 0xbfd86bb7872939f1, 0xc004f73c163572bd),
+ (0xe8c4f20489ec8ffa, 0xc004d3779bebab91, 0xbfd895a87acf8c8a),
+ (0x5fd1cfe5b08757a6, 0xbfd8b93277c1bfa9, 0xc004b587d2303bad),
+ (0xc733503522b5f461, 0x3fd90ed1c765c07b, 0x40046ec49898d8d0),
+ (0x697a2fac6afef92f, 0x3fd946b41ef58f1a, 0x40044197ada15b60),
+ (0xc3a23c3b9032b8f3, 0xbfd961e286c44ef0, 0xc0042be69206e0dc),
+ (0xc48d690c9e9f2305, 0x3fd9c6cc521e03df, 0x4003dcee4a750686),
+ (0x56be9f39eac7f817, 0x4003c478b9f59a35, 0x3fd9e6b1549b7658),
+ (0x4d4c51754594c5a7, 0x40038739eb41f268, 0x3fda37ecd1c42e19),
+ (0x53af975d069ceebf, 0xc00375837628fefd, 0xbfda4fca655fc1f0),
+ (0xbfd929812210b263, 0xbfda8b4a401ecbee, 0xc00349e53e631bbe),
+ (0xf311b32b3a68939e, 0x40031feb00197de6, 0x3fdac58d8019fd53),
+ (0xc8bb45b934289a6f, 0x3fdb0c1b08eecc8d, 0x4002ee07feabd489),
+ (0xcf7f65187e57f8e0, 0x4002b37691192ade, 0x3fdb60d005c0ebf0),
+ (0xd869ac083d0a16a5, 0xbfdb72cf0c12ea21, 0xc002a733bc84bf9e),
+ (0xe63586b4c42da14f, 0xbfdbe58dd31b8bff, 0xc0025a7a487dad5b),
+ (0x71bf7ddcf4d0e040, 0xc0024ba3e95ec017, 0xbfdbfc2d8849f915),
+ (0xf6bb3c9916ba9b7b, 0x40021b8b9ffa115b, 0x3fdc468204bfdeb5),
+ (0xd660ed6e55136cdf, 0x4001e8cb5555e3e4, 0x3fdc96a258c6d38b),
+ (0xc6f8b9cd5c389bec, 0x4001dc0f3763963d, 0x3fdcab04ea35603b),
+ (0x4e2d2d544b4e9749, 0xc001a8e4711d8cb3, 0xbfdcfe14e4b1dcb8),
+ (0x457b0455f480dbf3, 0xbfdd3a851ad22002, 0xc00184603a2c2e09),
+ (0xcb84660f30d05c41, 0xbfdd5500361e5c20, 0xc001748fbe8527f4),
+ (0x76bc0ffdf6576c1d, 0x3fddb2cb62781ad6, 0x40013d6f1df2661f),
+ (0x576061576fcdadfd, 0x40011fe6863ad307, 0x3fdde6035e4f9836),
+ (0xe9922b0dd12503ba, 0x3fde1b9f362deb6d, 0x40010168bfcfe60a),
+ (0xf090512b08c64f2b, 0xc000ccf22ac45b1a, 0xbfde79a3d5d64425),
+ (0xc9064cce58144c06, 0x3fdeb945fad69a7d, 0x4000aa26459366ea),
+ (0x7f2ba4659829232b, 0xc0008e9aaebc8235, 0xbfdeec631b7b2781),
+ (0xc366379b9a957cdb, 0x3fdf25b1353833ee, 0x4000702471ca91a3),
+ (0xf9f816554910c126, 0xc00044255f350059, 0xbfdf79f03db1f5f6),
+ (0xc40021d66b2f4d61, 0x3fdf8e852ce9efce, 0x4000398973c3656f),
+ (0xc76aa2fdbe146487, 0xc0001a10cf5b0ea1, 0xbfdfcc32c535fbc5),
+ (0x7738dad760be3d56, 0x3fffdeed4df15e90, 0x3fe0109a8210be31),
+ (0x666928445c6724e3, 0xbfffb61a9ba6cf8e, 0xbfe02548cb82b62f),
+ (0xc1a41b02da9ba0ea, 0x3fff777ca9858298, 0x3fe04569c9bb4266),
+ (0x7848fe3e78cb663c, 0xbfff3637f050767c, 0xbfe0677047cb2dbb),
+ (0x5be9ac5535d046cc, 0xbfe08f262f95b8a2, 0xbffeeb5e979f7bf6),
+ (0x7d00750f7493ec14, 0x3ffec4280177abce, 0x3fe0a44135df1990),
+ (0x70b08a0803a2e4f7, 0x3fe0bdd60b0c4039, 0x3ffe952519d0df4a),
+ (0x55fbd54cc4be5f3c, 0x3fe0e1a0aad88c06, 0x3ffe544de9778904),
+ (0xf3554e8402f6265a, 0xbfe111e4621d3a85, 0xbffdfe8c9d7f0ba0),
+ (0xd1c4f6c0576b4ceb, 0x3fe11fa43d2b9635, 0x3ffde6771af75c65),
+ (0x7577c5523259981f, 0xbfe142d0f9becfee, 0xbffda988b5c492be),
+ (0x75b6af03b453c0ef, 0xbfe16fdd915c91fb, 0xbffd5ce67b5edd2e),
+ (0xeb073e7d1af5222d, 0x3fe18ee2b608d90e, 0x3ffd290653690906),
+ (0x550e8ceaf98764b8, 0x3ffce3611c108856, 0x3fe1b9373efe3caf),
+ (0x75dc9fbbd1640b6a, 0x3ffcc975824760e0, 0x3fe1c92ca429ec72),
+ (0x633901c12d4cb6e1, 0x3fe1f0f9270d1256, 0x3ffc8999f7e80c9f),
+ (0x7f04dacd2c2ca8ce, 0x3ffc707fe49c2ec9, 0x3fe200cf1aa3a42b),
+ (0x4332ecebbe152c9a, 0x3ffc2836137657d3, 0x3fe22f07698081d6),
+ (0xc94f80eb836c1307, 0x3ffbf0338691284f, 0x3fe2537bb1fa7042),
+ (0xc404cdd7bc992e30, 0xbffbb536a4e0dad3, 0xbfe27a7f7c6c6344),
+ (0xc53d64204fdf1db3, 0xbffb9b3ee2a4f02d, 0xbfe28be132c06bf3),
+ (0xe4efa91ba9b9e968, 0xbfe2b45dea407296, 0xbffb5f7d64e821e0),
+ (0x637f1d1420fea3f8, 0xbfe2e406e3980e76, 0xbffb1a6df6baf1a2),
+ (0x72eae57421307d45, 0xbffaf50df59e0bba, 0xbfe2fe37d31e7efd),
+ (0xc2e6fa2f34fd750b, 0xbffab9203dbe4805, 0xbfe328cfb306dd71),
+ (0x61ff0525749ceb45, 0xbffa87c166523e00, 0xbfe34c771bd73054),
+ (0x4d744a5ecf95fd93, 0x3fe3570aef0e3b1f, 0x3ffa793eda0a7fe5),
+ (0x62fcbf662333deb3, 0xbffa515ad324e0c2, 0xbfe3745b63c100ee),
+ (0xfa89d1a75f768cd0, 0x3ffa0727c5c4f9f4, 0x3fe3abd130534148),
+ (0x6ce2de1511d237eb, 0xbfe3c00ae9db31f0, 0xbff9ec8041d9a2e7),
+ (0x56a33b90bd51784c, 0x3fe3eacbe508524c, 0x3ff9b4da77df56fd),
+ (0x4c71f17579861fde, 0xbff978184060ca52, 0xbfe41a4f7d1be713),
+ (0xd2573e1a86722c43, 0x3ff959e15efdd2e6, 0x3fe432450d7c4a5f),
+ (0x498c3881301c9450, 0x3ff920f594aa06e6, 0x3fe46004a0bcc88c),
+ (0xde3e4fc147357cf2, 0x3fe47a694e965949, 0x3ff9009276eb7768),
+ (0x4e22da5810e48e2c, 0x3ff8c5e9f63dadb2, 0x3fe4aae663098ea7),
+ (0xfc2b1173d2f9d7c5, 0x3fe4b659cd95ff4a, 0x3ff8b837d228b883),
+ (0x5c782c96a47e6797, 0xbff8951c12bcea82, 0xbfe4d3ee8f6c5d58),
+ (0xfb85676d181e0881, 0x3fe50477049116da, 0x3ff85c5827c4e282),
+ (0x6269a55fd3dcfce2, 0xbfe51d16cd6fc97d, 0xbff83feee767409d),
+ (0x4598ddfdd67bf239, 0x3ff7fbbe057723d2, 0x3fe5591ee0085ea3),
+ (0xd95338bde4d930aa, 0xbfe5822d4e25b75c, 0xbff7cdf636d6b0c8),
+ (0x611503484c027608, 0xbff7c33c14f7c2d5, 0xbfe58be2f042fb3b),
+ (0x5c2ee2e8eb412270, 0xbfe5b526acefe84a, 0xbff796104c692ece),
+ (0xcb58f3d014fca0aa, 0xbff75b46900e6ad1, 0xbfe5ebc9ee50344d),
+ (0x43c6b36129529847, 0x3ff74c923ad7544f, 0x3fe5f99f9e0b3d0a),
+ (0xfa1614294a9f72c8, 0x3ff70d99dd8a1f18, 0x3fe635a624809779),
+ (0x6d769e17aec9b01c, 0xbff6f700728c5edf, 0xbfe64b81491e4d3b),
+ (0xda3899b5876d7e4c, 0x3fe665a0157cdfcc, 0x3ff6dc381ade767d),
+ (0xdea2d627e9529863, 0x3ff6b24ede764eb2, 0x3fe68efbb7f423ba),
+ (0xcc92e4c94a84e291, 0xbfe6b6f40b3bd7cd, 0xbff68a5eb7b8ee06),
+ (0xf8aab297bc40c6d3, 0xbfe6f824ba593202, 0xbff64a65961cc5b3),
+ (0x6bfb68a736eeb1b6, 0x3fe7075f6fd332d8, 0x3ff63ba7d9b8357b),
+ (0xcf94cd7e76b5d43b, 0x3ff6222f4b54bb14, 0x3fe721dfbd310dc3),
+ (0x605cb87016a4858e, 0xbfe75dd6ab966b3b, 0xbff5e9626960a874),
+ (0x4c2f18aaaae1a9a5, 0x3ff5bf431f6a202f, 0x3fe78b18f4dd86cc),
+ (0x5d83a57bdc3cdd0e, 0xbfe7b8fb49fd3969, 0xbff59532f3461951),
+ (0xcfb59ea70736bcbf, 0xbff5840f8b17744d, 0xbfe7cbe0ae02d204),
+ (0xc1089dc2ab12c9b4, 0xbfe7f5a1e9bfb06b, 0xbff55e908266f122),
+ (0x68affef34749279e, 0x3fe8209711c62ee3, 0x3ff5388466d3ef9f),
+ (0x72d7978807cae706, 0xbfe85291c100af7b, 0xbff50ce960ab4ab7),
+ (0xc4a750b852001a47, 0x3ff4f6b641a2a8e6, 0x3fe86c536d8567a6),
+ (0x66cfeadab1736af0, 0xbff4e155036c93d7, 0xbfe885555e56c040),
+ (0xff8223b58fd12c2f, 0xbff4a692b6776ff1, 0xbfe8cb1ac1da9263),
+ (0x74ea533d2ebb1aac, 0xbff4893fc1ab4b47, 0xbfe8ee81f1f3c4fb),
+ (0x7e5b65a329da9204, 0xbfe91de34b0c25b5, 0xbff4628279c98889),
+ (0xee3f0726908f6f7e, 0xbff44233d2075f4e, 0xbfe945f14e54dcc8),
+ (0x7af275b9d87202c4, 0x3fe94b1657573c6c, 0x3ff43e14f1e4ba73),
+ (0x5ab6206f24450152, 0x3ff41215d5e23105, 0x3fe9828845174281),
+ (0x4e24e759fc383d3b, 0xbff3f99b9c0d4208, 0xbfe9a1cacc4b3f71),
+ (0x6e6e238154fe510b, 0x3ff3be6db5d3c2e5, 0x3fe9ee9ecbc9c72c),
+ (0x51115721fcb875a5, 0xbff3a1e91c8e1a9a, 0xbfea144a011e41f3),
+ (0xcd0f08fce53215fe, 0x3fea3fdc281037d2, 0x3ff38152b5b8b26c),
+ (0xc793de849b92e545, 0xbff3630d84adfdc4, 0xbfea68d8627834ab),
+ (0xd8fdb9e8f5a2831b, 0x3ff35d39f575b15e, 0x3fea70caa0a7598a),
+ (0x485b3050ec24e894, 0xbff33dacd8e66b2d, 0xbfea9c2613ca1aca),
+ (0x53cdd772c04825b7, 0x3feaec063c5537cf, 0x3ff30496b5e6234f),
+ (0xf3830e1d9fcb8a63, 0xbfeaf752dcf645db, 0xbff2fc9ead41769c),
+ (0x4fd516dfb479c284, 0xbfeb31f070d00bb3, 0xbff2d3b23a11d388),
+ (0x401c9636ba305733, 0x3ff2b79fee475ac3, 0x3feb5ab9b110014b),
+ (0x74e6629d161dcdf0, 0x3ff2a9b9289117f3, 0x3feb6f19cbe183c1),
+ (0xc831ccafeb5988ca, 0xbfeba1b7e0c9a4be, 0xbff2878906543222),
+ (0x4c6c1ddb028d0a24, 0xbfebda5d682a67dc, 0xbff261d9c2b6d631),
+ (0x50ddbc2d3d77331b, 0x3fec1c6fa9dfd3f2, 0x3ff236a54a6f7473),
+ (0x7f475a5c2b07cd78, 0xbff225167cb98c22, 0xbfec37a3454ef052),
+ (0xfbb16b339d0a17fe, 0x3fec729712255b64, 0x3ff1ff7c6a466364),
+ (0xfe93c58449c8fd51, 0xbff1f86c842db0af, 0xbfec7dc50581afe9),
+ (0xe125d9535e1d7d96, 0xbfecd23da36c0bcc, 0xbff1c3c14d3af49c),
+ (0xe9492784719b40f6, 0xbff1b3f3088363ee, 0xbfecebf92a8f36c0),
+ (0x76693dbf45a5c11c, 0x3ff18e43bdf45f9b, 0x3fed2a0e601005bf),
+ (0x4815c52cf555dcc6, 0xbff17e592952677b, 0xbfed44974ac6a894),
+ (0x49147fcad588e154, 0xbff15f3dded8be9a, 0xbfed78ffa064b041),
+ (0x65cec0767168cebc, 0x3ff152b06f8600d4, 0x3fed8e5ad53d0ccb),
+ (0x7c5a50d620432646, 0xbff125f49b9c4132, 0xbfeddb74c0de0fdb),
+ (0xc394901bb60cd165, 0xbfee0cf91359537e, 0xbff109b2ee01353b),
+ (0xefe39e4669e6b8e3, 0x3fee1d1c905094f4, 0x3ff100916565e23e),
+ (0x6606d5cab1344ed4, 0xbfee6a547b6e0341, 0xbff0d5671b0a47dc),
+ (0xe151f4cddef4501f, 0x3ff0ba872431bfa7, 0x3fee9b315f5e68d6),
+ (0xdbeb6637b76e98b6, 0xbfeec87a200cacbf, 0xbff0a1eb3b8b38a8),
+ (0x4de08bca51af503b, 0x3ff0935cf27fb96c, 0x3feee382525684bd),
+ (0x43ba67829cbf5ddd, 0x3fef18839c0e31e2, 0x3ff0771bd1cb28aa),
+ (0x51c3d8b11e8e3186, 0x3fef59e0cd085215, 0x3ff054c7b82fd1b6),
+ (0x762158955b8d3e1f, 0xbfef981ac2ad0844, 0xbff0349d725d26aa),
+ (0x6b4a97418aacc176, 0xbfefbb6cc26e10da, 0xbff02293b7c89d69),
+ (0xdf22beeedddcdfc1, 0xbfefe7a8fffac117, 0xbff00c34c8c8b9e2),
+ ][:]
+
+ for (x, yt, yc) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var rtf, rcf
+ rtf = math.tan(xf)
+ rcf = math.cot(xf)
+
+ var rtu = std.flt64bits(rtf)
+ var rcu = std.flt64bits(rcf)
+
+ testr.check(c, same64(rtu, yt),
+ "tan(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, yt, rtu)
+
+ testr.check(c, same64(rcu, yc),
+ "cot(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, yc, rcu)
+ ;;
+
+}
+
+const tancot05 = {c
+ testr.check(c, std.isnan(math.cot(std.flt64nan())), "cot(NaN64) should be NaN")
+ testr.check(c, std.isnan(math.tan(std.flt64nan())), "tan(NaN64) should be NaN")
+ testr.check(c, std.isnan(math.cot(std.flt32nan())), "cot(NaN32) should be NaN")
+ testr.check(c, std.isnan(math.tan(std.flt32nan())), "tan(NaN32) should be NaN")
+}
diff --git a/lib/math/trunc-impl.myr b/lib/math/trunc-impl.myr
index 0ce41ca..cf8fb1d 100644
--- a/lib/math/trunc-impl.myr
+++ b/lib/math/trunc-impl.myr
@@ -1,12 +1,12 @@
use std
pkg math =
- pkglocal const trunc32 : (f : flt32 -> flt32)
- pkglocal const floor32 : (f : flt32 -> flt32)
- pkglocal const ceil32 : (f : flt32 -> flt32)
- pkglocal const trunc64 : (f : flt64 -> flt64)
- pkglocal const floor64 : (f : flt64 -> flt64)
- pkglocal const ceil64 : (f : flt64 -> flt64)
+ pkglocal const trunc32 : (x : flt32 -> flt32)
+ pkglocal const floor32 : (x : flt32 -> flt32)
+ pkglocal const ceil32 : (x : flt32 -> flt32)
+ pkglocal const trunc64 : (x : flt64 -> flt64)
+ pkglocal const floor64 : (x : flt64 -> flt64)
+ pkglocal const ceil64 : (x : flt64 -> flt64)
;;
const Flt32NegMask : uint32 = (1 << 31)
@@ -15,13 +15,13 @@ const Flt32SigMask : uint32 = (1 << 23) - 1
const Flt64NegMask : uint64 = (1 << 63)
const Flt64SigMask : uint64 = (1 << 52) - 1
-pkglocal const floor32 = {f : flt32
+pkglocal const floor32 = {x : flt32
var n, e, s
- (n, e, s) = std.flt32explode(f)
+ (n, e, s) = std.flt32explode(x)
/* Many special cases */
- if e >= 23 || f == -0.0
- -> f
+ if e >= 23 || x == -0.0
+ -> x
elif e < 0
if n
-> -1.0
@@ -33,10 +33,10 @@ pkglocal const floor32 = {f : flt32
if n
var fractional_mask = Flt32SigMask >> (e : uint32)
if s & fractional_mask == 0
- -> f
+ -> x
else
/* Turns out the packing of exp and sig is useful */
- var u : uint32 = std.flt32bits(f) & ~fractional_mask
+ var u : uint32 = std.flt32bits(x) & ~fractional_mask
u += ((1 << 23) >> (e : uint32))
-> std.flt32frombits(u)
;;
@@ -46,25 +46,25 @@ pkglocal const floor32 = {f : flt32
-> std.flt32assem(n, e, s & ~m)
}
-pkglocal const trunc32 = {f : flt32
- if std.flt32bits(f) & Flt32NegMask != 0
- -> -floor32(-f)
+pkglocal const trunc32 = {x : flt32
+ if std.flt32bits(x) & Flt32NegMask != 0
+ -> -floor32(-x)
else
- -> floor32(f)
+ -> floor32(x)
;;
}
-pkglocal const ceil32 = {f : flt32
- -> -floor32(-f)
+pkglocal const ceil32 = {x : flt32
+ -> -floor32(-x)
}
-pkglocal const floor64 = {f : flt64
+pkglocal const floor64 = {x : flt64
var n, e, s
- (n, e, s) = std.flt64explode(f)
+ (n, e, s) = std.flt64explode(x)
/* Many special cases */
- if e >= 52 || f == -0.0
- -> f
+ if e >= 52 || x == -0.0
+ -> x
elif e < 0
if n
-> -1.0
@@ -76,10 +76,10 @@ pkglocal const floor64 = {f : flt64
if n
var fractional_mask = Flt64SigMask >> (e : uint64)
if s & fractional_mask == 0
- -> f
+ -> x
else
/* Turns out the packing of exp and sig is useful */
- var u : uint64 = std.flt64bits(f) & ~fractional_mask
+ var u : uint64 = std.flt64bits(x) & ~fractional_mask
u += ((1 << 52) >> (e : uint64))
-> std.flt64frombits(u)
;;
@@ -89,15 +89,15 @@ pkglocal const floor64 = {f : flt64
-> std.flt64assem(n, e, s & ~m)
}
-pkglocal const trunc64 = {f : flt64
- if std.flt64bits(f) & Flt64NegMask != 0
- -> -floor64(-f)
+pkglocal const trunc64 = {x : flt64
+ if std.flt64bits(x) & Flt64NegMask != 0
+ -> -floor64(-x)
else
- -> floor64(f)
+ -> floor64(x)
;;
}
-pkglocal const ceil64 = {f : flt64
- -> -floor64(-f)
+pkglocal const ceil64 = {x : flt64
+ -> -floor64(-x)
}
diff --git a/lib/math/util.myr b/lib/math/util.myr
index d2e82c8..dadd180 100644
--- a/lib/math/util.myr
+++ b/lib/math/util.myr
@@ -14,8 +14,20 @@ pkg math =
/* Whether RN() requires incrementing after truncating */
const need_round_away : (h : uint64, l : uint64, bitpos_last : int64 -> bool)
+
+ /* Multiply x * y to z1 + z2 */
+ const two_by_two : (x : flt64, y : flt64 -> (flt64, flt64))
+
+ /* Return (s, t) such that s + t = a + b, with s = rn(a + b). */
+ generic fast2sum : (x : @f, y : @f -> (@f, @f)) :: floating, numeric @f
+
+ /* Rounds a + b (as flt64s) to a flt32. */
+ const round_down : (a : flt64, b : flt64 -> flt32)
;;
+/* Split precision down the middle */
+const twentysix_bits_mask = (0xffffffffffffffff << 27)
+
const flt64fromflt32 = {f : flt32
var n, e, s
(n, e, s) = std.flt32explode(f)
@@ -172,3 +184,71 @@ const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
-> hl_is_odd
}
+
+/*
+ Perform high-prec multiplication: x * y = z1 + z2.
+ */
+const two_by_two = {x : flt64, y : flt64
+ var xh : flt64 = std.flt64frombits(std.flt64bits(x) & twentysix_bits_mask)
+ var xl : flt64 = x - xh
+ var yh : flt64 = std.flt64frombits(std.flt64bits(y) & twentysix_bits_mask)
+ var yl : flt64 = y - yh
+
+ /* Multiply out */
+ var a1 : flt64 = xh * yh
+ var a2 : flt64 = xh * yl
+ var a3 : flt64 = xl * yh
+ var a4 : flt64 = xl * yl
+
+ /* By-hand compensated summation */
+ var yy, u, t, v, z, s, c
+ if a2 < a3
+ std.swap(&a3, &a2)
+ ;;
+
+ s = a1
+ c = 0.0
+
+ /* a2 */
+ (s, c) = fast2sum(s, a2)
+
+ /* a3 */
+ (yy, u) = fast2sum(c, a3)
+ (t, v) = fast2sum(s, yy)
+ z = u + v
+ (s, c) = fast2sum(t, z)
+
+ /* a4 */
+ (yy, u) = fast2sum(c, a4)
+ (t, v) = fast2sum(s, yy)
+ z = u + v
+ (s, c) = fast2sum(t, z)
+
+ -> (s, c)
+}
+
+/* Return (s, t) such that s + t = a + b, with s = rn(a + b). */
+generic fast2sum = {a : @f, b : @f :: floating, numeric @f
+ var s = a + b
+ var z = s - a
+ var t = b - z
+ -> (s, t)
+}
+
+/*
+ Round a + b to a flt32. Only notable if round(a) is a rounding
+ tie, and b is non-zero
+ */
+const round_down = {a : flt64, b : flt64
+ var au : uint64 = std.flt64bits(a)
+ if au & 0x0000000070000000 == 0x0000000070000000
+ if b > 0.0
+ au++
+ elif b < 0.0
+ au--
+ ;;
+ -> (std.flt64frombits(au) : flt32)
+ ;;
+
+ -> (a : flt32)
+}