diff options
author | Ori Bernstein <ori@eigenstate.org> | 2019-06-09 18:02:59 -0700 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2019-06-09 18:02:59 -0700 |
commit | 4a8bc7aa008f753c082a914a0f95ea3df09d2123 (patch) | |
tree | 9147165ffc40f7a49ea69584024f8fab3078fdb6 /lib | |
parent | 9bf446ab7795f7397bc4fb7ae9e46ce14c3f679c (diff) | |
parent | e09c6b53f3b1928d3752c53c813025dbcbb976e0 (diff) | |
download | mc-4a8bc7aa008f753c082a914a0f95ea3df09d2123.tar.gz |
Merge commit 'e09c6b53f3b1928d3752c53c813025dbcbb976e0'
Diffstat (limited to 'lib')
-rw-r--r-- | lib/math/ancillary/generate-minimax-by-Remez.gp | 45 | ||||
-rw-r--r-- | lib/math/ancillary/log-overkill-constants.c | 102 | ||||
-rw-r--r-- | lib/math/ancillary/ulp.gp | 4 | ||||
-rw-r--r-- | lib/math/log-overkill.myr | 609 | ||||
-rw-r--r-- | lib/math/pown-impl.myr | 192 | ||||
-rw-r--r-- | lib/math/powr-impl.myr | 290 | ||||
-rw-r--r-- | lib/math/test/log-overkill.myr | 6 | ||||
-rw-r--r-- | lib/math/test/pown-impl.myr | 64 | ||||
-rw-r--r-- | lib/math/test/powr-impl.myr | 165 | ||||
-rw-r--r-- | lib/math/util.myr | 50 |
10 files changed, 853 insertions, 674 deletions
diff --git a/lib/math/ancillary/generate-minimax-by-Remez.gp b/lib/math/ancillary/generate-minimax-by-Remez.gp index a388f4e..01784b3 100644 --- a/lib/math/ancillary/generate-minimax-by-Remez.gp +++ b/lib/math/ancillary/generate-minimax-by-Remez.gp @@ -1,8 +1,6 @@ /* Attempts to find a minimax polynomial of degree n for f via Remez - algorithm. The Remez algorithm appears to be slightly shot, but - the initial step of approximating by Chebyshev nodes works, and - is usually good enough. + algorithm. */ { chebyshev_node(a, b, k, n) = my(p, m, c); @@ -62,19 +60,21 @@ xnext = a + k*(b - a)/gran; ynext = err(f, n, v, xnext); - if(ycur > yprev && ycur > ynext, listput(l, [xcur, abs(ycur)]),); + if(ycur > yprev && ycur > ynext, listput(l, [xcur, ycur]),); ); vecsort(l, 2); if(length(l) < n + 2 || l[1][2] < 2^(-2000), return("q"),); len = length(l); - lnext = listcreate(); + X = vector(n + 2); + for(j = 1, n + 2, + lnext = listcreate(); thisa = l[j][1] - (b-a)/gran; thisb = l[j][1] + (b-a)/gran; - xprev = thisa - (thisb - a)/gran; + xprev = thisa - (thisb - thisa)/gran; yprev = err(f, n, v, xprev); xcur = thisa; @@ -93,14 +93,15 @@ xnext = thisa + k*(thisb - thisa)/gran; ynext = abs(f(xnext) - p_eval(n, v, xnext)); - if(ycur > yprev && ycur > ynext, listput(lnext, xcur),); + if(ycur > yprev && ycur > ynext, listput(lnext, [xcur, ycur]),); ); + + vecsort(lnext, 2); + if(length(lnext) < 1, return("q"),); + X[j] = lnext[1][1]; + listkill(lnext); ); - vecsort(lnext, cmp); listkill(l); - X = vector(n + 2); - for (k = 1, min(n + 2, length(lnext)), X[k] = lnext[k]); - listkill(lnext); vecsort(X); return(X); } @@ -135,6 +136,14 @@ return(1/tanoverx(x)); } +{ log2xoverx(x) = + return(if(x == 1,1,log(x)/(x-1))/log(2)); +} + +{ log1p(x) = + return(log(1 + x)); +} + print("\n"); print("Minimaxing sin(x) / x, degree 6, on [-Pi/(4 * 256), Pi/(4 * 256)]:"); find_minimax(sinoverx, 6, -Pi/1024, Pi/1024) @@ -158,7 +167,7 @@ print("\n"); print("Minmimaxing x*cot(x), degree 8, on [-Pi/(4 * 256), Pi/(4 * 256)]:"); find_minimax(cotx, 8, -Pi/1024, Pi/1024) print("\n"); -print("(Take the first v, and remember to divide by x)\n"); +print("(Remember to divide by x)\n"); print("\n"); print("---\n"); print("\n"); @@ -175,4 +184,16 @@ print("\n"); print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n"); print("\n"); print("---\n"); +print("Minmimaxing log_2(x) / (x - 1), degree 7, on [1, 2^(1/8)]:"); +find_minimax(log2xoverx, 7, 1, 2^(1/8)) +print("\n"); +/* print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n"); */ +/* print("\n"); */ +print("---\n"); +print("Minmimaxing log(1 + x), degree 5, on [0, 2^-20) [it's just going to give the Taylor expansion]:"); +find_minimax(log1p, 5, 0, 2^-20) +print("\n"); +/* print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n"); */ +/* print("\n"); */ +print("---\n"); print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n"); diff --git a/lib/math/ancillary/log-overkill-constants.c b/lib/math/ancillary/log-overkill-constants.c index ac145cc..1af4480 100644 --- a/lib/math/ancillary/log-overkill-constants.c +++ b/lib/math/ancillary/log-overkill-constants.c @@ -8,80 +8,60 @@ #define FLT64_TO_UINT64(f) (*((uint64_t *) ((char *) &f))) #define UINT64_TO_FLT64(u) (*((double *) ((char *) &u))) -#define FLT32_TO_UINT32(f) (*((uint32_t *) ((char *) &f))) -#define UINT32_TO_FLT32(u) (*((double *) ((char *) &u))) - -int main(void) +int +main(void) { - mpfr_t one; - mpfr_t two_to_the_minus_k; - mpfr_t one_plus_two_to_the_minus_k; - mpfr_t ln_one_plus_two_to_the_minus_k; - mpfr_t one_minus_two_to_the_minus_k; - mpfr_t ln_one_minus_two_to_the_minus_k; mpfr_t t1; mpfr_t t2; + mpfr_t t3; double d = 0; uint64_t u = 0; + size_t j = 0; + long int k = 0; - mpfr_init2(one, 10000); - mpfr_init2(two_to_the_minus_k, 10000); - mpfr_init2(one_plus_two_to_the_minus_k, 10000); - mpfr_init2(ln_one_plus_two_to_the_minus_k, 10000); - mpfr_init2(one_minus_two_to_the_minus_k, 10000); - mpfr_init2(ln_one_minus_two_to_the_minus_k, 10000); mpfr_init2(t1, 10000); mpfr_init2(t2, 10000); + mpfr_init2(t3, 10000); - printf("/* C_plus */\n"); - printf("(0x0000000000000000, 0x0000000000000000, 0x0000000000000000), /* dummy */\n"); - - for (size_t k = 1; k <= 27; ++k) { - mpfr_set_si(one, 1, MPFR_RNDN); - mpfr_mul_2si(two_to_the_minus_k, one, -k, MPFR_RNDN); - mpfr_add(one_plus_two_to_the_minus_k, one, two_to_the_minus_k, MPFR_RNDN); - mpfr_log(ln_one_plus_two_to_the_minus_k, one_plus_two_to_the_minus_k, MPFR_RNDN); + /* C1 */ + for (k = -5; k >= -20; k -= 5) { + printf( + "const C%ld : (uint64, uint64, uint64, uint64)[32] = [\n", + (k / + ( + -5))); - mpfr_set(t1, ln_one_plus_two_to_the_minus_k, MPFR_RNDN); - d = mpfr_get_d(t1, MPFR_RNDN); - u = FLT64_TO_UINT64(d); - printf("(%#018lx, ", u); - mpfr_set_d(t2, d, MPFR_RNDN); - mpfr_sub(t1, t1, t2, MPFR_RNDN); - d = mpfr_get_d(t1, MPFR_RNDN); - u = FLT64_TO_UINT64(d); - printf("%#018lx, ", u); - mpfr_set_d(t2, d, MPFR_RNDN); - mpfr_sub(t1, t1, t2, MPFR_RNDN); - d = mpfr_get_d(t1, MPFR_RNDN); - u = FLT64_TO_UINT64(d); - printf("%#018lx), /* k = %zu */\n", u, k); - } + for (j = 0; j < 32; ++j) { + mpfr_set_si(t1, 1, MPFR_RNDN); + mpfr_set_si(t2, k, MPFR_RNDN); + mpfr_exp2(t2, t2, MPFR_RNDN); + mpfr_mul_si(t2, t2, j, MPFR_RNDN); + mpfr_add(t1, t1, t2, MPFR_RNDN); - printf("\n"); - printf("/* C_minus */\n"); - printf("(0x0000000000000000, 0x0000000000000000, 0x0000000000000000), /* dummy */\n"); + /* first, log(1 + ...) */ + mpfr_log(t2, t1, MPFR_RNDN); + d = mpfr_get_d(t2, MPFR_RNDN); + u = FLT64_TO_UINT64(d); + printf(" (%#018lx, ", u); + mpfr_set_d(t3, d, MPFR_RNDN); + mpfr_sub(t2, t2, t3, MPFR_RNDN); + d = mpfr_get_d(t2, MPFR_RNDN); + u = FLT64_TO_UINT64(d); + printf("%#018lx, ", u); - for (size_t k = 1; k <= 27; ++k) { - mpfr_set_si(one, 1, MPFR_RNDN); - mpfr_mul_2si(two_to_the_minus_k, one, -k, MPFR_RNDN); - mpfr_sub(one_minus_two_to_the_minus_k, one, two_to_the_minus_k, MPFR_RNDN); - mpfr_log(ln_one_minus_two_to_the_minus_k, one_minus_two_to_the_minus_k, MPFR_RNDN); + /* now, 1/(1 + ...) */ + mpfr_pow_si(t2, t1, -1, MPFR_RNDN); + d = mpfr_get_d(t2, MPFR_RNDN); + u = FLT64_TO_UINT64(d); + printf(" %#018lx, ", u); + mpfr_set_d(t3, d, MPFR_RNDN); + mpfr_sub(t2, t2, t3, MPFR_RNDN); + d = mpfr_get_d(t2, MPFR_RNDN); + u = FLT64_TO_UINT64(d); + printf("%#018lx), /* j = %zu */\n", u, j); + } - mpfr_set(t1, ln_one_minus_two_to_the_minus_k, MPFR_RNDN); - d = mpfr_get_d(t1, MPFR_RNDN); - u = FLT64_TO_UINT64(d); - printf("(%#018lx, ", u); - mpfr_set_d(t2, d, MPFR_RNDN); - mpfr_sub(t1, t1, t2, MPFR_RNDN); - d = mpfr_get_d(t1, MPFR_RNDN); - u = FLT64_TO_UINT64(d); - printf("%#018lx, ", u); - mpfr_set_d(t2, d, MPFR_RNDN); - mpfr_sub(t1, t1, t2, MPFR_RNDN); - d = mpfr_get_d(t1, MPFR_RNDN); - u = FLT64_TO_UINT64(d); - printf("%#018lx), /* k = %zu */\n", u, k); + printf("]\n\n"); } return 0; diff --git a/lib/math/ancillary/ulp.gp b/lib/math/ancillary/ulp.gp index 9a4b155..c4861b4 100644 --- a/lib/math/ancillary/ulp.gp +++ b/lib/math/ancillary/ulp.gp @@ -31,7 +31,7 @@ e = bitand(a, 0x7f800000) >> 23; s = bitand(a, 0x007fffff); - if(e != 0, s = bitor(s, 0x00800000),); + if(e != 0, s = bitor(s, 0x00800000), s = 2.0 * s); s = s * 2.0^(-23); e = e - 127; return((-1)^n * s * 2^(e)); @@ -42,7 +42,7 @@ e = bitand(a, 0x7ff0000000000000) >> 52; s = bitand(a, 0x000fffffffffffff); - if(e != 0, s = bitor(s, 0x0010000000000000),); + if(e != 0, s = bitor(s, 0x0010000000000000), s = 2.0 * s); s = s * 2.0^(-52); e = e - 1023; return((-1)^n * 2^(e) * s); diff --git a/lib/math/log-overkill.myr b/lib/math/log-overkill.myr index a7e27ed..dcc2569 100644 --- a/lib/math/log-overkill.myr +++ b/lib/math/log-overkill.myr @@ -1,48 +1,39 @@ use std use "fpmath" -use "log-impl" -use "exp-impl" -use "sum-impl" +use "impls" use "util" /* - This is an implementation of log following [Mul16] 8.3.2, returning - far too much precision. These are slower than the - table-and-low-degree-polynomial implementations of exp-impl.myr and - log-impl.myr, but are needed to handle the powr, pown, and rootn - functions. + This is an implementation of log(x) using the following idea, based on [Tan90] - This is only for flt64, because if you want this for flt32 you should - just cast to flt64, use the fast functions, and then split back. + First, reduce to 2^e · xs, with xs ∈ [1, 2). - Note that the notation of [Mul16] 8.3.2 is confusing, to say the - least. [NM96] is, perhaps, a clearer presentation. + xs = F1 + f1, with + F1 = 1 + j1/2^5 + j1 ∈ {1, 2, …, 2^5 - 1} + f1 ∈ [0, 2^-5) - To recap, we use an iteration, starting with t_1 = 0, L_1 = x, and - iterate as + log(xs) = log(F1) + log(1 + f1/F1) - t_{n+1} = t_{n} - ln(1 + d_n 2^{-n}) - L_{n+1} = L_n (1 + d_n 2^{-n}) + 1 + f1/F1 = F2 + f2, with + F2 = 1 + j1/2^10 + j2 ∈ {1, 2, …, 2^5 - 1} + f2 ∈ [0, 2^-10) - where d_n is in {-1, 0, 1}, chosen so that as n -> oo, L_n approaches - 1, then t_n approaches ln(x). + log(xs) = log(F1) + log(F2) + log(1 + f2/F2) - If we let l_n = L_n - 1, we initialize l_1 = x - 1 and iterate as + … - l_{n+1} = l_n (1 + d_n 2^{-n}) + d_n 2^{-n} + log(xs) = log(F1) + log(F2) + log(F3) + log(F4) + log(1 + f4/F4) - If we further consider l'_n = 2^n l_n, we initialize l'_1 = x - 1, - and iterate as + And f4/F4 < 2^-20, so we can get 100 bits of precision using a + degree 5 polynomial. - l'_{n + 1} = 2 l'_{n} (1 + d_n 2^{-n}) + 2 d_n - - The nice thing about this is that we can pick d_n easily based on - comparing l'_n to some easy constants: - - { +1 if [l'_n] <= -1/2 - d_n = { 0 if [l'_n] = 0 or 1/2 - { -1 if [l'_n] >= 1 + The specific choice of using 4 tables, each with 2^5 entries, may + be improvable. It's a trade-off between storage for the tables and + the number of floating point operations to chain the results + together. */ pkg math = pkglocal const logoverkill32 : (x : flt32 -> (flt32, flt32)) @@ -50,83 +41,148 @@ pkg math = ;; /* - Tables of 1 +/- 2^-k, for k = 0 to 159, with k = 0 a dummy row. 159 - is chosen as the first k such that the error between 2^(-53 * 2) and - 2^(-53 * 2) + log(1 + 2^(-k)) is less than 1 ulp, therefore we'll - have a full 53 * 2 bits of precision available with these tables. The - layout for C_plus is - - ( ln(1 + 2^-k)[hi], ln(1 + 2^-k)[lo], ln(1 + 2^-k)[very lo]) , - - and for C_minus it is similar, but handling ln(1 - 2^-k). + Ci is a table such that, for Ci[j] = (L1, L2, I1, I2), + L1, L2 are log(1 + j·2^-(5i)) + I1, I2 are 1/(1 + j·2^-(5i)) + */ +const C1 : (uint64, uint64, uint64, uint64)[32] = [ + (000000000000000000, 000000000000000000, 0x3ff0000000000000, 000000000000000000), /* j = 0 */ + (0x3f9f829b0e783300, 0x3c333e3f04f1ef23, 0x3fef07c1f07c1f08, 0xbc7f07c1f07c1f08), /* j = 1 */ + (0x3faf0a30c01162a6, 0x3c485f325c5bbacd, 0x3fee1e1e1e1e1e1e, 0x3c6e1e1e1e1e1e1e), /* j = 2 */ + (0x3fb6f0d28ae56b4c, 0xbc5906d99184b992, 0x3fed41d41d41d41d, 0x3c80750750750750), /* j = 3 */ + (0x3fbe27076e2af2e6, 0xbc361578001e0162, 0x3fec71c71c71c71c, 0x3c8c71c71c71c71c), /* j = 4 */ + (0x3fc29552f81ff523, 0x3c6301771c407dbf, 0x3febacf914c1bad0, 0xbc8bacf914c1bad0), /* j = 5 */ + (0x3fc5ff3070a793d4, 0xbc5bc60efafc6f6e, 0x3feaf286bca1af28, 0x3c8af286bca1af28), /* j = 6 */ + (0x3fc9525a9cf456b4, 0x3c6d904c1d4e2e26, 0x3fea41a41a41a41a, 0x3c80690690690690), /* j = 7 */ + (0x3fcc8ff7c79a9a22, 0xbc64f689f8434012, 0x3fe999999999999a, 0xbc8999999999999a), /* j = 8 */ + (0x3fcfb9186d5e3e2b, 0xbc6caaae64f21acb, 0x3fe8f9c18f9c18fa, 0xbc7f3831f3831f38), /* j = 9 */ + (0x3fd1675cababa60e, 0x3c2ce63eab883717, 0x3fe8618618618618, 0x3c88618618618618), /* j = 10 */ + (0x3fd2e8e2bae11d31, 0xbc78f4cdb95ebdf9, 0x3fe7d05f417d05f4, 0x3c67d05f417d05f4), /* j = 11 */ + (0x3fd4618bc21c5ec2, 0x3c7f42decdeccf1d, 0x3fe745d1745d1746, 0xbc7745d1745d1746), /* j = 12 */ + (0x3fd5d1bdbf5809ca, 0x3c74236383dc7fe1, 0x3fe6c16c16c16c17, 0xbc7f49f49f49f49f), /* j = 13 */ + (0x3fd739d7f6bbd007, 0xbc78c76ceb014b04, 0x3fe642c8590b2164, 0x3c7642c8590b2164), /* j = 14 */ + (0x3fd89a3386c1425b, 0xbc729639dfbbf0fb, 0x3fe5c9882b931057, 0x3c7310572620ae4c), /* j = 15 */ + (0x3fd9f323ecbf984c, 0xbc4a92e513217f5c, 0x3fe5555555555555, 0x3c85555555555555), /* j = 16 */ + (0x3fdb44f77bcc8f63, 0xbc7cd04495459c78, 0x3fe4e5e0a72f0539, 0x3c8e0a72f0539783), /* j = 17 */ + (0x3fdc8ff7c79a9a22, 0xbc74f689f8434012, 0x3fe47ae147ae147b, 0xbc6eb851eb851eb8), /* j = 18 */ + (0x3fddd46a04c1c4a1, 0xbc70467656d8b892, 0x3fe4141414141414, 0x3c64141414141414), /* j = 19 */ + (0x3fdf128f5faf06ed, 0xbc7328df13bb38c3, 0x3fe3b13b13b13b14, 0xbc83b13b13b13b14), /* j = 20 */ + (0x3fe02552a5a5d0ff, 0xbc7cb1cb51408c00, 0x3fe3521cfb2b78c1, 0x3c7a90e7d95bc60a), /* j = 21 */ + (0x3fe0be72e4252a83, 0xbc8259da11330801, 0x3fe2f684bda12f68, 0x3c82f684bda12f68), /* j = 22 */ + (0x3fe154c3d2f4d5ea, 0xbc859c33171a6876, 0x3fe29e4129e4129e, 0x3c804a7904a7904a), /* j = 23 */ + (0x3fe1e85f5e7040d0, 0x3c7ef62cd2f9f1e3, 0x3fe2492492492492, 0x3c82492492492492), /* j = 24 */ + (0x3fe2795e1289b11b, 0xbc6487c0c246978e, 0x3fe1f7047dc11f70, 0x3c81f7047dc11f70), /* j = 25 */ + (0x3fe307d7334f10be, 0x3c6fb590a1f566da, 0x3fe1a7b9611a7b96, 0x3c61a7b9611a7b96), /* j = 26 */ + (0x3fe393e0d3562a1a, 0xbc858eef67f2483a, 0x3fe15b1e5f75270d, 0x3c415b1e5f75270d), /* j = 27 */ + (0x3fe41d8fe84672ae, 0x3c89192f30bd1806, 0x3fe1111111111111, 0x3c61111111111111), /* j = 28 */ + (0x3fe4a4f85db03ebb, 0x3c313dfa3d3761b6, 0x3fe0c9714fbcda3b, 0xbc7f79b47582192e), /* j = 29 */ + (0x3fe52a2d265bc5ab, 0xbc61883750ea4d0a, 0x3fe0842108421084, 0x3c70842108421084), /* j = 30 */ + (0x3fe5ad404c359f2d, 0xbc435955683f7196, 0x3fe0410410410410, 0x3c80410410410410), /* j = 31 */ +] - They are generated by ancillary/log-overkill-constants.c. +const C2 : (uint64, uint64, uint64, uint64)[32] = [ + (000000000000000000, 000000000000000000, 0x3ff0000000000000, 000000000000000000), /* j = 0 */ + (0x3f4ffc00aa8ab110, 0xbbe0fecbeb9b6cdb, 0x3feff801ff801ff8, 0x3c2ff801ff801ff8), /* j = 1 */ + (0x3f5ff802a9ab10e6, 0x3bfe29e3a153e3b2, 0x3feff007fc01ff00, 0x3c8ff007fc01ff00), /* j = 2 */ + (0x3f67f7047d7983da, 0x3c0a275a19204e80, 0x3fefe811f28a186e, 0xbc849093915301bf), /* j = 3 */ + (0x3f6ff00aa2b10bc0, 0x3c02821ad5a6d353, 0x3fefe01fe01fe020, 0xbc6fe01fe01fe020), /* j = 4 */ + (0x3f73f38a60f06489, 0x3c1693c937494046, 0x3fefd831c1cdbed1, 0x3c8e89d3b75ace7e), /* j = 5 */ + (0x3f77ee11ebd82e94, 0xbc161e96e2fc5d90, 0x3fefd04794a10e6a, 0x3c881bd63ea20ced), /* j = 6 */ + (0x3f7be79c70058ec9, 0xbbd964fefef02b62, 0x3fefc86155aa1659, 0xbc6b8fc468497f61), /* j = 7 */ + (0x3f7fe02a6b106789, 0xbbce44b7e3711ebf, 0x3fefc07f01fc07f0, 0x3c6fc07f01fc07f0), /* j = 8 */ + (0x3f81ebde2d1997e6, 0xbbfffa46e1b2ec81, 0x3fefb8a096acfacc, 0xbc82962e18495af3), /* j = 9 */ + (0x3f83e7295d25a7d9, 0xbbeff29a11443a06, 0x3fefb0c610d5e939, 0xbc5cb8337f41db5c), /* j = 10 */ + (0x3f85e1f703ecbe50, 0x3c23eb0bb43693b9, 0x3fefa8ef6d92aca5, 0x3c7cd0c1eaba7f22), /* j = 11 */ + (0x3f87dc475f810a77, 0xbc116d7687d3df21, 0x3fefa11caa01fa12, 0xbc7aaff02f71aaff), /* j = 12 */ + (0x3f89d61aadc6bd8d, 0xbc239b097b525947, 0x3fef994dc3455e8d, 0xbc82546d9bc5bd59), /* j = 13 */ + (0x3f8bcf712c74384c, 0xbc1f6842688f499a, 0x3fef9182b6813baf, 0x3c6b210c54d70f4a), /* j = 14 */ + (0x3f8dc84b19123815, 0xbc25f0e2d267d821, 0x3fef89bb80dcc421, 0xbc8e7da8c7156f9d), /* j = 15 */ + (0x3f8fc0a8b0fc03e4, 0xbc183092c59642a1, 0x3fef81f81f81f820, 0xbc8f81f81f81f820), /* j = 16 */ + (0x3f90dc4518afcc88, 0xbc379c0189fdfe78, 0x3fef7a388f9da20f, 0x3c7f99b2c82d3fb1), /* j = 17 */ + (0x3f91d7f7eb9eebe7, 0xbc2d41fe63d2dbf9, 0x3fef727cce5f530a, 0x3c84643cedd1cfd9), /* j = 18 */ + (0x3f92d36cefb557c3, 0xbc132fa3e4f20cf7, 0x3fef6ac4d8f95f7a, 0x3c8e8ed9770a8dde), /* j = 19 */ + (0x3f93cea44346a575, 0xbc10cb5a902b3a1c, 0x3fef6310aca0dbb5, 0x3c8d2e19807d8c43), /* j = 20 */ + (0x3f94c99e04901ded, 0xbc2cd10505ada0d6, 0x3fef5b60468d989f, 0xbc805a26b4cad717), /* j = 21 */ + (0x3f95c45a51b8d389, 0xbc3b10b6c3ec21b4, 0x3fef53b3a3fa204e, 0x3c8450467c5430f3), /* j = 22 */ + (0x3f96bed948d1b7d1, 0xbc1058290fde6de1, 0x3fef4c0ac223b2bc, 0x3c815c2df7afcd24), /* j = 23 */ + (0x3f97b91b07d5b11b, 0xbc35b602ace3a510, 0x3fef44659e4a4271, 0x3c85fc17734c36b8), /* j = 24 */ + (0x3f98b31faca9b00e, 0x3c1d5a46da6f6772, 0x3fef3cc435b0713c, 0x3c81d0a7e69ea094), /* j = 25 */ + (0x3f99ace7551cc514, 0x3c33409c1df8167f, 0x3fef3526859b8cec, 0x3c2f3526859b8cec), /* j = 26 */ + (0x3f9aa6721ee835aa, 0xbc24a3a50b6c5621, 0x3fef2d8c8b538c0f, 0xbc88a987ac35964a), /* j = 27 */ + (0x3f9b9fc027af9198, 0xbbf0ae69229dc868, 0x3fef25f644230ab5, 0x3c594ed8175c78b3), /* j = 28 */ + (0x3f9c98d18d00c814, 0xbc250589df0f25bf, 0x3fef1e63ad57473c, 0xbc8bf54d8dbc6a00), /* j = 29 */ + (0x3f9d91a66c543cc4, 0xbc1d34e608cbdaab, 0x3fef16d4c4401f17, 0xbc759ddff074959e), /* j = 30 */ + (0x3f9e8a3ee30cdcac, 0x3c07086b1c00b395, 0x3fef0f4986300ba6, 0xbc811b6b7ee8766a), /* j = 31 */ +] - Conveniently, for k > 27, we can calculate the entry exactly using a - few terms of the Taylor expansion for ln(1 + x), with the x^{2+} - terms vanishing past k = 53. This is possible since we only care - about two flt64s worth of precision. - */ -const C_plus : (uint64, uint64, uint64)[28] = [ - (0x0000000000000000, 0x0000000000000000, 0x0000000000000000), /* dummy */ - (0x3fd9f323ecbf984c, 0xbc4a92e513217f5c, 0x38e0c0cfa41ff669), /* k = 1 */ - (0x3fcc8ff7c79a9a22, 0xbc64f689f8434012, 0x390a24ae3b2f53a1), /* k = 2 */ - (0x3fbe27076e2af2e6, 0xbc361578001e0162, 0x38b55db94ebc4018), /* k = 3 */ - (0x3faf0a30c01162a6, 0x3c485f325c5bbacd, 0xb8e0ece597165991), /* k = 4 */ - (0x3f9f829b0e783300, 0x3c333e3f04f1ef23, 0xb8d814544147acc9), /* k = 5 */ - (0x3f8fc0a8b0fc03e4, 0xbc183092c59642a1, 0xb8b52414fc416fc2), /* k = 6 */ - (0x3f7fe02a6b106789, 0xbbce44b7e3711ebf, 0x386a567b6587df34), /* k = 7 */ - (0x3f6ff00aa2b10bc0, 0x3c02821ad5a6d353, 0xb8912dcccb588a4a), /* k = 8 */ - (0x3f5ff802a9ab10e6, 0x3bfe29e3a153e3b2, 0xb89538d49c4f745e), /* k = 9 */ - (0x3f4ffc00aa8ab110, 0xbbe0fecbeb9b6cdb, 0xb877171cf29e89d1), /* k = 10 */ - (0x3f3ffe002aa6ab11, 0x3b999e2b62cc632d, 0xb81eae851c58687c), /* k = 11 */ - (0x3f2fff000aaa2ab1, 0x3ba0bbc04dc4e3dc, 0x38152723342e000b), /* k = 12 */ - (0x3f1fff8002aa9aab, 0x3b910e6678af0afc, 0x382ed521af29bc8d), /* k = 13 */ - (0x3f0fffc000aaa8ab, 0xbba3bbc110fec82c, 0xb84f79185e42fbaf), /* k = 14 */ - (0x3effffe0002aaa6b, 0xbb953bbbe6661d42, 0xb835071791df7d3e), /* k = 15 */ - (0x3eeffff0000aaaa3, 0xbb8553bbbd110fec, 0xb82ff1fae6cea01a), /* k = 16 */ - (0x3edffff80002aaaa, 0xbb75553bbbc66662, 0x3805f05f166325ff), /* k = 17 */ - (0x3ecffffc0000aaab, 0xbb6d5553bbbc1111, 0x37a380f8138f70f4), /* k = 18 */ - (0x3ebffffe00002aab, 0xbb5655553bbbbe66, 0xb7f987507707503c), /* k = 19 */ - (0x3eafffff00000aab, 0xbb45755553bbbbd1, 0xb7c10fec7ed7ec7e), /* k = 20 */ - (0x3e9fffff800002ab, 0xbb355955553bbbbc, 0xb7d9999875075875), /* k = 21 */ - (0x3e8fffffc00000ab, 0xbb2555d55553bbbc, 0x37bf7777809c09a1), /* k = 22 */ - (0x3e7fffffe000002b, 0xbb15556555553bbc, 0x37b106666678af8b), /* k = 23 */ - (0x3e6ffffff000000b, 0xbb055557555553bc, 0x37a110bbbbbc04e0), /* k = 24 */ - (0x3e5ffffff8000003, 0xbaf555559555553c, 0x3791110e6666678b), /* k = 25 */ - (0x3e4ffffffc000001, 0xbae555555d555554, 0x37811110fbbbbbc0), /* k = 26 */ - (0x3e3ffffffe000000, 0x3ac5555553555556, 0xb76ddddddf333333), /* k = 27 */ +const C3 : (uint64, uint64, uint64, uint64)[32] = [ + (000000000000000000, 000000000000000000, 0x3ff0000000000000, 000000000000000000), /* j = 0 */ + (0x3effffe0002aaa6b, 0xbb953bbbe6661d42, 0x3fefffc0007fff00, 0x3c2fffc0007fff00), /* j = 1 */ + (0x3f0fffc000aaa8ab, 0xbba3bbc110fec82c, 0x3fefff8001fff800, 0x3c6fff8001fff800), /* j = 2 */ + (0x3f17ffb8011ffaf0, 0x3b984c534f3d9b6a, 0x3fefff40047fe501, 0xbc8780f2fa4e222b), /* j = 3 */ + (0x3f1fff8002aa9aab, 0x3b910e6678af0afc, 0x3fefff0007ffc002, 0xbbdfff0007ffc002), /* j = 4 */ + (0x3f23ff9c029a9723, 0x3bc1b965303b23b1, 0x3feffec00c7f8305, 0xbc6e30d217cb1211), /* j = 5 */ + (0x3f27ff70047fd782, 0xbbced098a5c0aff0, 0x3feffe8011ff280a, 0x3c6f8685b1bbab34), /* j = 6 */ + (0x3f2bff3c07250a51, 0xbbc89dd6d6bad8c1, 0x3feffe40187ea913, 0xbc7f8346d2208239), /* j = 7 */ + (0x3f2fff000aaa2ab1, 0x3ba0bbc04dc4e3dc, 0x3feffe001ffe0020, 0xbc2ffe001ffe0020), /* j = 8 */ + (0x3f31ff5e07979982, 0xbbce0e704817ebcd, 0x3feffdc0287d2733, 0x3c7f32ce6d7c4d43), /* j = 9 */ + (0x3f33ff380a6a0e74, 0x3bdb81fcb95bc1fe, 0x3feffd8031fc184e, 0x3c69e5fa087756ad), /* j = 10 */ + (0x3f35ff0e0ddc70a1, 0x3bacf6f3d97a3c05, 0x3feffd403c7acd72, 0x3c860b1b0bacff22), /* j = 11 */ + (0x3f37fee011febc18, 0x3bd2b9bcf5d3f323, 0x3feffd0047f940a2, 0xbc5e5d274451985a), /* j = 12 */ + (0x3f39feae16e0ec8b, 0xbbb6137aceeb34b1, 0x3feffcc054776bdf, 0x3c56b1b1f3ed39e8), /* j = 13 */ + (0x3f3bfe781c92fd4a, 0xbbc4ed10713cc126, 0x3feffc8061f5492c, 0xbc19fd284f974b74), /* j = 14 */ + (0x3f3dfe3e2324e946, 0x3bc0916462dd5deb, 0x3feffc407072d28b, 0x3c84eb0c748a57ca), /* j = 15 */ + (0x3f3ffe002aa6ab11, 0x3b999e2b62cc632d, 0x3feffc007ff00200, 0xbc7ffc007ff00200), /* j = 16 */ + (0x3f40fedf19941e6e, 0xbbb194c2e0aa6338, 0x3feffbc0906cd18c, 0x3c75b11e79f3cd9f), /* j = 17 */ + (0x3f41febc1e5ccc3c, 0x3bdc657d895d3592, 0x3feffb80a1e93b34, 0xbc84d11299626e29), /* j = 18 */ + (0x3f42fe9723b55bac, 0x3bd6cfb73e538464, 0x3feffb40b46538fa, 0xbc8d42a81b0bfc39), /* j = 19 */ + (0x3f43fe7029a5c947, 0xbbd4d578bf46e36a, 0x3feffb00c7e0c4e1, 0x3c7e673fde054f2c), /* j = 20 */ + (0x3f44fe4730361165, 0x3be400d77e93f2fd, 0x3feffac0dc5bd8ee, 0x3c8a38b2b2aeaf57), /* j = 21 */ + (0x3f45fe1c376e3031, 0xbbd524eb8a5ae7f6, 0x3feffa80f1d66f25, 0xbc6a5778f73582ce), /* j = 22 */ + (0x3f46fdef3f5621a3, 0xbbdf09d734886d52, 0x3feffa4108508189, 0xbc81a45478d24a37), /* j = 23 */ + (0x3f47fdc047f5e185, 0xbbebfa5c57d202d3, 0x3feffa011fca0a1e, 0x3c6a5b0eed338657), /* j = 24 */ + (0x3f48fd8f51556b70, 0xbbd0f4f2e08fd201, 0x3feff9c1384302e9, 0x3c8b9a1be68cf877), /* j = 25 */ + (0x3f49fd5c5b7cbace, 0x3beb6ed49f17d42d, 0x3feff98151bb65ef, 0x3c82d92be315df8f), /* j = 26 */ + (0x3f4afd276673cada, 0x3bd3222545da594f, 0x3feff9416c332d34, 0x3c8dbbba66ae573a), /* j = 27 */ + (0x3f4bfcf07242969d, 0x3bc5db4d2b3efe1c, 0x3feff90187aa52be, 0xbc698a69b8df8f19), /* j = 28 */ + (0x3f4cfcb77ef118f1, 0x3becc55406f300fb, 0x3feff8c1a420d091, 0xbc8032d47bdbf02c), /* j = 29 */ + (0x3f4dfc7c8c874c82, 0xbbe863e9d57a176f, 0x3feff881c196a0b2, 0x3c858cf2f70e18b2), /* j = 30 */ + (0x3f4efc3f9b0d2bc8, 0x3bd1e8e8a5f5b8b7, 0x3feff841e00bbd28, 0x3c782227ba60dc8b), /* j = 31 */ ] -const C_minus : (uint64, uint64, uint64)[28] = [ - (0x0000000000000000, 0x0000000000000000, 0x0000000000000000), /* dummy */ - (0xbfe62e42fefa39ef, 0xbc7abc9e3b39803f, 0xb907b57a079a1934), /* k = 1 */ - (0xbfd269621134db92, 0xbc7e0efadd9db02b, 0x39163d5cf0b6f233), /* k = 2 */ - (0xbfc1178e8227e47c, 0x3c50e63a5f01c691, 0xb8f03c776a3fb0f1), /* k = 3 */ - (0xbfb08598b59e3a07, 0x3c5dd7009902bf32, 0x38ea7da07274e01d), /* k = 4 */ - (0xbfa0415d89e74444, 0xbc4c05cf1d753622, 0xb8d3bc1c184cef0a), /* k = 5 */ - (0xbf90205658935847, 0xbc327c8e8416e71f, 0x38b19642aac1310f), /* k = 6 */ - (0xbf8010157588de71, 0xbc146662d417ced0, 0xb87e91702f8418af), /* k = 7 */ - (0xbf70080559588b35, 0xbc1f96638cf63677, 0x38a90badb5e868b4), /* k = 8 */ - (0xbf60040155d5889e, 0x3be8f98e1113f403, 0x38601ac2204fbf4b), /* k = 9 */ - (0xbf50020055655889, 0xbbe9abe6bf0fa436, 0x3867c7d335b216f3), /* k = 10 */ - (0xbf40010015575589, 0x3bec8863f23ef222, 0x38852c36a3d20146), /* k = 11 */ - (0xbf30008005559559, 0x3bddd332a0e20e2f, 0x385c8b6b9ff05329), /* k = 12 */ - (0xbf20004001555d56, 0x3bcddd88863f53f6, 0xb859332cbe6e6ac5), /* k = 13 */ - (0xbf10002000555655, 0xbbb62224ccd5f17f, 0xb8366327cc029156), /* k = 14 */ - (0xbf00001000155575, 0xbba5622237779c0a, 0xb7d38f7110a9391d), /* k = 15 */ - (0xbef0000800055559, 0xbb95562222cccd5f, 0xb816715f87b8e1ee), /* k = 16 */ - (0xbee0000400015556, 0x3b75553bbbb1110c, 0x381fb17b1f791778), /* k = 17 */ - (0xbed0000200005555, 0xbb79555622224ccd, 0x3805074f75071791), /* k = 18 */ - (0xbec0000100001555, 0xbb65d55562222377, 0xb80de7027127028d), /* k = 19 */ - (0xbeb0000080000555, 0xbb5565555622222d, 0x37e9995075035075), /* k = 20 */ - (0xbea0000040000155, 0xbb45575555622222, 0xb7edddde70270670), /* k = 21 */ - (0xbe90000020000055, 0xbb35559555562222, 0xb7c266666af8af9b), /* k = 22 */ - (0xbe80000010000015, 0xbb25555d55556222, 0xb7b11bbbbbce04e0), /* k = 23 */ - (0xbe70000008000005, 0xbb15555655555622, 0xb7a111666666af8b), /* k = 24 */ - (0xbe60000004000001, 0xbb05555575555562, 0xb7911113bbbbbce0), /* k = 25 */ - (0xbe50000002000000, 0xbaf5555559555556, 0xb78111112666666b), /* k = 26 */ - (0xbe40000001000000, 0xbac5555557555556, 0x376ddddddc888888), /* k = 27 */ +const C4 : (uint64, uint64, uint64, uint64)[32] = [ + (000000000000000000, 000000000000000000, 0x3ff0000000000000, 000000000000000000), /* j = 0 */ + (0x3eafffff00000aab, 0xbb45755553bbbbd1, 0x3feffffe00002000, 0xbc2ffffe00002000), /* j = 1 */ + (0x3ebffffe00002aab, 0xbb5655553bbbbe66, 0x3feffffc00008000, 0xbc5ffffc00008000), /* j = 2 */ + (0x3ec7fffdc0004800, 0xbb343ffcf666dfe6, 0x3feffffa00012000, 0xbc7afffaf000f300), /* j = 3 */ + (0x3ecffffc0000aaab, 0xbb6d5553bbbc1111, 0x3feffff800020000, 0xbc8ffff800020000), /* j = 4 */ + (0x3ed3fffce000a6ab, 0xbb7f1952e455f818, 0x3feffff600031fff, 0x3c4801387f9e581f), /* j = 5 */ + (0x3ed7fffb80012000, 0xbb743ff9ecceb2cc, 0x3feffff400047ffe, 0x3c8400287ff0d006), /* j = 6 */ + (0x3edbfff9e001c955, 0xbb702e9d89490dc5, 0x3feffff200061ffd, 0x3c84804b07df2c8e), /* j = 7 */ + (0x3edffff80002aaaa, 0xbb75553bbbc66662, 0x3feffff00007fffc, 0x3baffff00007fffc), /* j = 8 */ + (0x3ee1fffaf001e5ff, 0x3b797c2e21b72cff, 0x3fefffee000a1ffa, 0x3c8380cd078cabc1), /* j = 9 */ + (0x3ee3fff9c0029aa9, 0x3b8c8ad1ba965260, 0x3fefffec000c7ff8, 0x3c780270fe7960f4), /* j = 10 */ + (0x3ee5fff870037754, 0xbb8d0c6bc1b51bdd, 0x3fefffea000f1ff6, 0xbc897e36793a8ca8), /* j = 11 */ + (0x3ee7fff700047ffd, 0x3b8e006132f6735a, 0x3fefffe80011fff3, 0xbc8ffd7801e5fe94), /* j = 12 */ + (0x3ee9fff57005b8a7, 0x3b771277672a8835, 0x3fefffe600151fef, 0xbc74f906f5aa5866), /* j = 13 */ + (0x3eebfff3c0072551, 0xbb86c9d894dd7427, 0x3fefffe400187feb, 0xbc8bfb4f841a6c69), /* j = 14 */ + (0x3eedfff1f008c9fa, 0xbb7701aebecf7ae4, 0x3fefffe2001c1fe6, 0xbc8779d1fdcb2212), /* j = 15 */ + (0x3eeffff0000aaaa3, 0xbb8553bbbd110fec, 0x3fefffe0001fffe0, 0x3befffe0001fffe0), /* j = 16 */ + (0x3ef0fff6f80665a6, 0xbb9b95400571451d, 0x3fefffde00241fda, 0xbc8875ce02d51cfe), /* j = 17 */ + (0x3ef1fff5e00797fa, 0xbb9a0e8ef2f395cc, 0x3fefffdc00287fd2, 0x3c8c0cd071958038), /* j = 18 */ + (0x3ef2fff4b808ee4d, 0x3b984638f069f71f, 0x3fefffda002d1fca, 0x3c8a8fe8751bf4ef), /* j = 19 */ + (0x3ef3fff3800a6aa1, 0xbb794b915f81751a, 0x3fefffd80031ffc2, 0xbc8fec781869e17c), /* j = 20 */ + (0x3ef4fff2380c0ef4, 0x3b80a43b5348b6b9, 0x3fefffd600371fb8, 0xbc8668429728999b), /* j = 21 */ + (0x3ef5fff0e00ddd47, 0x3b624a1f136cc09c, 0x3fefffd4003c7fad, 0xbc77c6cf4ea2f3e0), /* j = 22 */ + (0x3ef6ffef780fd79a, 0xbb9a716c4210cdd0, 0x3fefffd200421fa1, 0xbc5aeeb948d5a74d), /* j = 23 */ + (0x3ef7ffee0011ffec, 0xbb8ff3d9a8c98613, 0x3fefffd00047ff94, 0x3c143fe1a02d8fbc), /* j = 24 */ + (0x3ef8ffec7814583d, 0x3b9f7bc8a4e1db73, 0x3fefffce004e1f86, 0xbc6141450a04205a), /* j = 25 */ + (0x3ef9ffeae016e28f, 0xbb8cb889999dd735, 0x3fefffcc00547f77, 0xbc83c837daa53cb3), /* j = 26 */ + (0x3efaffe93819a0e0, 0xbb9be60d8a0b5ba8, 0x3fefffca005b1f66, 0x3c7d81be350f0677), /* j = 27 */ + (0x3efbffe7801c9530, 0xbb873b12aed4646c, 0x3fefffc80061ff55, 0xbc8fb4f8834d1a39), /* j = 28 */ + (0x3efcffe5b81fc17f, 0x3b9fe950a87b2785, 0x3fefffc600691f41, 0x3c8dd655eb844520), /* j = 29 */ + (0x3efdffe3e02327cf, 0xbb9bfd7604f796f2, 0x3fefffc400707f2d, 0x3c618b7f1a71ae6b), /* j = 30 */ + (0x3efeffe1f826ca1d, 0xbb60ae99630e566e, 0x3fefffc200781f17, 0x3c80f0bb2d9557af), /* j = 31 */ ] const logoverkill32 = {x : flt32 @@ -140,204 +196,209 @@ const logoverkill32 = {x : flt32 const logoverkill64 = {x : flt64 var xn, xe, xs (xn, xe, xs) = std.flt64explode(x) + var xsf = std.flt64assem(false, 0, xs) + var xb = std.flt64bits(x) + var tn, te, ts + var t1, t2 /* Special cases */ if std.isnan(x) -> (std.flt64frombits(0x7ff8000000000000), std.flt64frombits(0x7ff8000000000000)) - elif xe == -1024 && xs == 0 + elif xe <= -1023 && xs == 0 /* log (+/- 0) is -infinity */ -> (std.flt64frombits(0xfff0000000000000), std.flt64frombits(0xfff0000000000000)) - elif xe == 1023 - -> (std.flt64frombits(0xfff8000000000000), std.flt64frombits(0xfff8000000000000)) elif xn /* log(-anything) is NaN */ -> (std.flt64frombits(0x7ff8000000000000), std.flt64frombits(0x7ff8000000000000)) ;; - /* - Deal with 2^xe up front: multiply xe by a high-precision log(2). We'll - add them back in to the giant mess of tis later. - */ - var xef : flt64 = (xe : flt64) - var log_2_hi, log_2_lo - (log_2_hi, log_2_lo) = accurate_logs64[128] /* See log-impl.myr */ - var lxe_1, lxe_2, lxe_3, lxe_4 - (lxe_1, lxe_2) = two_by_two64(xef, std.flt64frombits(log_2_hi)) - (lxe_3, lxe_4) = two_by_two64(xef, std.flt64frombits(log_2_lo)) - - /* - We split t into three parts, so that we can gradually build up two - flt64s worth of information - */ - var t1 = 0.0 - var t2 = 0.0 - var t3 = 0.0 - - /* - We also split lprime. - */ - var lprime1 - var lprime2 - (lprime1, lprime2) = slow2sum(std.flt64assem(false, 1, xs), -2.0) - var lprime3 = 0.0 - - for var k = 1; k <= 107; ++k - /* Calculate d_k and some quanitites for iteration */ - var d = 0.0 - var ln_hi : flt64, ln_mi : flt64, ln_lo : flt64 - + if xe <= -1023 /* - Note the truncation method for [Mul16] is for signed-digit systems, - which we don't have. This comparison follows from the remark following - (8.23), though. + We depend on being able to pick bits out of xs as if it were normal, so + normalize any subnormals. */ - if lprime1 <= -0.5 - d = 1.0 - (ln_hi, ln_mi, ln_lo) = get_C_plus(k) - elif lprime1 < 1.0 - d = 0.0 - - /* In this case, t_n is unchanged, and we just scale lprime by 2 */ - lprime1 = lprime1 * 2.0 - lprime2 = lprime2 * 2.0 - lprime3 = lprime3 * 2.0 - - /* - If you're looking for a way to speed this up, calculate how many k we - can skip here, preferably by a lookup table. - */ - continue - else - d = -1.0 - (ln_hi, ln_mi, ln_lo) = get_C_minus(k) + xe++ + var check = 1 << 52 + while xs & check == 0 + xs <<= 1 + xe-- ;; - - /* t_{n + 1} */ - (t1, t2, t3) = foursum(t1, t2, t3, -1.0 * ln_hi) - (t1, t2, t3) = foursum(t1, t2, t3, -1.0 * ln_mi) - (t1, t2, t3) = foursum(t1, t2, t3, -1.0 * ln_lo) - - /* lprime_{n + 1} */ - lprime1 *= 2.0 - lprime2 *= 2.0 - lprime3 *= 2.0 - - var lp1m = d * scale2(lprime1, -k) - var lp2m = d * scale2(lprime2, -k) - var lp3m = d * scale2(lprime3, -k) - (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, lp1m) - (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, lp2m) - (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, lp3m) - (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, 2.0 * d) + xsf = std.flt64assem(false, 0, xs) ;; - var l : flt64[:] = [t1, t2, t3, lxe_1, lxe_2, lxe_3, lxe_4][:] - std.sort(l, mag_cmp64) - -> double_compensated_sum(l) -} - -/* significand for 1/3 (if you reconstruct without compensating, you get 4/3) */ -const one_third_sig = 0x0015555555555555 - -/* and for 1/5 (if you reconstruct, you get 8/5) */ -const one_fifth_sig = 0x001999999999999a + var shift = 0 + var non_trivial = 0 + var then_invert = false + var j1, F1, f1, logF1_hi, logF1_lo, F1_inv_hi, F1_inv_lo, fF1_hi, fF1_lo -/* - These calculations are incredibly slow. Somebody should speed them up. - */ -const get_C_plus = {k : int64 - if k < 0 - -> (0.0, 0.0, 0.0) - elif k < 28 - var t1, t2, t3 - (t1, t2, t3) = C_plus[k] - -> (std.flt64frombits(t1), std.flt64frombits(t2), std.flt64frombits(t3)) - elif k < 36 - var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ - var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ - var t3 = std.flt64assem(false, -3*k - 2, one_third_sig) /* x^3 / 3 */ - var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ - var t5 = std.flt64assem(false, -5*k - 3, one_fifth_sig) /* x^5 / 5 */ - -> fast_fivesum(t1, t2, t3, t4, t5) - elif k < 54 - var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ - var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ - var t3 = std.flt64assem(false, -3*k - 2, one_third_sig) /* x^3 / 3 */ - var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ - -> fast_foursum(t1, t2, t3, t4) + /* F1 */ + if xe == -1 && xs > 0x001d1ff05e41cfba + /* + If we reduced to [1, 2) unconditionally, then values of x like 0.999… = + 2^-1 · 1.999… would cause subtractive cancellation; we'd compute + log(1.999…), then subtract out log(2) at the end. They'd agree on the + first n bits, and we'd lose n bits of precision. + + This is only a problem for exponent -1, and for xs large enough; + outside that, the numbers are so different that we won't lose precision + by cancelling. But here, we compute 1/x, proceed (with exponent 0), and + flip all the signs at the end. + */ + xe = 0 + var xinv_hi = 1.0 / x + var xinv_lo = fma64(-1.0 * xinv_hi, x, 1.0) / x + (tn, te, ts) = std.flt64explode(xinv_hi) + non_trivial = ((47 >= te) : uint64) * ((47 - te < 64) : uint64) + shift = non_trivial * ((47 - te) : uint64) + j1 = non_trivial * ((ts >> shift) & 0x1f) + var F1m1 = scale2((j1 : flt64), -5) + F1 = 1.0 + F1m1 + var f1_hi, f1_lo + (f1_hi, f1_lo) = fast2sum(xinv_hi - F1, xinv_lo) + (logF1_hi, logF1_lo, F1_inv_hi, F1_inv_lo) = C1[j1] + + /* Compute 1 + f1/F1 */ + (fF1_hi, fF1_lo) = two_by_two64(f1_hi, std.flt64frombits(F1_inv_hi)) + (fF1_lo, t1) = slow2sum(fF1_lo, f1_lo * std.flt64frombits(F1_inv_hi)) + (fF1_lo, t2) = slow2sum(fF1_lo, f1_hi * std.flt64frombits(F1_inv_lo)) + (fF1_hi, fF1_lo) = fast2sum(fF1_hi, fF1_lo + (t1 + t2)) + then_invert = true else - var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ - var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ - var t3 = std.flt64assem(false, -3*k - 2, one_third_sig) /* x^3 / 3 */ - -> (t1, t2, t3) + j1 = (xs & 0x000f800000000000) >> 47 + F1 = std.flt64assem(false, 0, xs & 0xffff800000000000) + f1 = xsf - F1 + (logF1_hi, logF1_lo, F1_inv_hi, F1_inv_lo) = C1[j1] + + /* Compute 1 + f1/F1 */ + (fF1_hi, fF1_lo) = two_by_two64(f1, std.flt64frombits(F1_inv_hi)) + (fF1_lo, t1) = slow2sum(fF1_lo, f1 * std.flt64frombits(F1_inv_lo)) + (fF1_hi, fF1_lo) = fast2sum(fF1_hi, fF1_lo) + fF1_lo += t1 ;; -} -const get_C_minus = {k : int64 - if k < 0 - -> (0.0, 0.0, 0.0) - elif k < 28 - var t1, t2, t3 - (t1, t2, t3) = C_minus[k] - -> (std.flt64frombits(t1), std.flt64frombits(t2), std.flt64frombits(t3)) - elif k < 36 - var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ - var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ - var t3 = std.flt64assem(true, -3*k - 2, one_third_sig) /* x^3 / 3 */ - var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ - var t5 = std.flt64assem(true, -5*k - 3, one_fifth_sig) /* x^5 / 5 */ - -> fast_fivesum(t1, t2, t3, t4, t5) - elif k < 54 - var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ - var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ - var t3 = std.flt64assem(true, -3*k - 2, one_third_sig) /* x^3 / 3 */ - var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ - -> fast_foursum(t1, t2, t3, t4) - else - var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ - var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ - var t3 = std.flt64assem(true, -3*k - 2, one_third_sig) /* x^3 / 3 */ - -> (t1, t2, t3) - ;; -} + /* F2 */ + (tn, te, ts) = std.flt64explode(fF1_hi) + non_trivial = ((42 >= te) : uint64) * ((42 - te < 64) : uint64) + shift = non_trivial * ((42 - te) : uint64) + var j2 = non_trivial * ((ts >> shift) & 0x1f) + var F2m1 = scale2((j2 : flt64), -10) + var F2 = 1.0 + F2m1 + var f2_hi, f2_lo + (f2_hi, f2_lo) = fast2sum(fF1_hi - F2m1, fF1_lo) + var logF2_hi, logF2_lo, F2_inv_hi, F2_inv_lo + (logF2_hi, logF2_lo, F2_inv_hi, F2_inv_lo) = C2[j2] + + /* Compute 1 + f2/F2 */ + var fF2_hi, fF2_lo + (fF2_hi, fF2_lo) = two_by_two64(f2_hi, std.flt64frombits(F2_inv_hi)) + (fF2_lo, t1) = slow2sum(fF2_lo, f2_lo * std.flt64frombits(F2_inv_hi)) + (fF2_lo, t2) = slow2sum(fF2_lo, f2_hi * std.flt64frombits(F2_inv_lo)) + (fF2_hi, fF2_lo) = fast2sum(fF2_hi, fF2_lo + (t1 + t2)) + + /* F3 (just like F2) */ + (tn, te, ts) = std.flt64explode(fF2_hi) + non_trivial = ((37 >= te) : uint64) * ((37 - te < 64) : uint64) + shift = non_trivial * ((37 - te) : uint64) + var j3 = non_trivial * ((ts >> shift) & 0x1f) + var F3m1 = scale2((j3 : flt64), -15) + var F3 = 1.0 + F3m1 + var f3_hi, f3_lo + (f3_hi, f3_lo) = fast2sum(fF2_hi - F3m1, fF2_lo) + var logF3_hi, logF3_lo, F3_inv_hi, F3_inv_lo + (logF3_hi, logF3_lo, F3_inv_hi, F3_inv_lo) = C3[j3] + + /* Compute 1 + f3/F3 */ + var fF3_hi, fF3_lo + (fF3_hi, fF3_lo) = two_by_two64(f3_hi, std.flt64frombits(F3_inv_hi)) + (fF3_lo, t1) = slow2sum(fF3_lo, f3_lo * std.flt64frombits(F3_inv_hi)) + (fF3_lo, t2) = slow2sum(fF3_lo, f3_hi * std.flt64frombits(F3_inv_lo)) + (fF3_hi, fF3_lo) = fast2sum(fF3_hi, fF3_lo + (t1 + t2)) + + /* F4 (just like F2) */ + (tn, te, ts) = std.flt64explode(fF3_hi) + non_trivial = ((32 >= te) : uint64) * ((32 - te < 64) : uint64) + shift = non_trivial * ((32 - te) : uint64) + var j4 = non_trivial * ((ts >> shift) & 0x1f) + var F4m1 = scale2((j4 : flt64), -20) + var F4 = 1.0 + F4m1 + var f4_hi, f4_lo + (f4_hi, f4_lo) = fast2sum(fF3_hi - F4m1, fF3_lo) + var logF4_hi, logF4_lo, F4_inv_hi, F4_inv_lo + (logF4_hi, logF4_lo, F4_inv_hi, F4_inv_lo) = C4[j4] + + /* Compute 1 + f4/F4 */ + var fF4_hi, fF4_lo + (fF4_hi, fF4_lo) = two_by_two64(f4_hi, std.flt64frombits(F4_inv_hi)) + (fF4_lo, t1) = slow2sum(fF4_lo, f4_lo * std.flt64frombits(F4_inv_hi)) + (fF4_lo, t2) = slow2sum(fF4_lo, f4_hi * std.flt64frombits(F4_inv_lo)) + (fF4_hi, fF4_lo) = fast2sum(fF4_hi, fF4_lo + (t1 + t2)) -const foursum = {a1 : flt64, a2 : flt64, a3 : flt64, x : flt64 - var t1, t2, t3, t4, t5, t6, s1, s2, s3, s4 + /* + L = log(1 + f4/F4); we'd like to use horner_polyu, but since we have + _hi and _lo, it becomes more complicated. + */ + var L_hi, L_lo + /* r = (1/5) · x */ + (L_hi, L_lo) = hl_mult(std.flt64frombits(0x3fc999999999999a), std.flt64frombits(0xbc6999999999999a), fF4_hi, fF4_lo) - (t5, t6) = slow2sum(a3, x) - (t3, t4) = slow2sum(a2, t5) - (t1, t2) = slow2sum(a1, t3) - (s3, s4) = slow2sum(t4, t6) - (s1, s2) = slow2sum(t2, s3) + /* r = r - 1/4 */ + (t1, t2) = fast2sum(std.flt64frombits(0xbfd0000000000000), L_lo) + (L_hi, L_lo) = fast2sum(t1, L_hi) + L_lo += t2 - -> (t1, s1, s2 + s4) -} + /* r = r · x */ + (L_hi, L_lo) = hl_mult(L_hi, L_lo, fF4_hi, fF4_lo) -/* - Specifically for use in get_C_{plus,minus}, in which we know the - magnitude orders of the ais. - */ -const fast_foursum = {a1 : flt64, a2 : flt64, a3 : flt64, a4 : flt64 - (a3, a4) = fast2sum(a3, a4) - (a2, a3) = fast2sum(a2, a3) - (a1, a2) = fast2sum(a1, a2) + /* r = r + 1/3 */ + (L_hi, L_lo) = hl_add(std.flt64frombits(0x3fd5555555555555), std.flt64frombits(0x3c75555555555555), L_hi, L_lo) - (a3, a4) = slow2sum(a3, a4) - (a2, a3) = slow2sum(a2, a3) + /* r = r · x */ + (L_hi, L_lo) = hl_mult(L_hi, L_lo, fF4_hi, fF4_lo) - -> (a1, a2, a3 + a4) -} + /* r = r - 1/2 */ + (t1, t2) = fast2sum(std.flt64frombits(0xbfe0000000000000), L_lo) + (L_hi, L_lo) = fast2sum(t1, L_hi) + L_lo += t2 + + /* r = r · x */ + (L_hi, L_lo) = hl_mult(L_hi, L_lo, fF4_hi, fF4_lo) -const fast_fivesum = {a1 : flt64, a2 : flt64, a3 : flt64, a4 : flt64, a5 : flt64 - (a4, a5) = fast2sum(a4, a5) - (a3, a4) = fast2sum(a3, a4) - (a2, a3) = fast2sum(a2, a3) - (a1, a2) = fast2sum(a1, a2) + /* r = r + 1 */ + (t1, t2) = fast2sum(1.0, L_lo) + (L_hi, L_lo) = fast2sum(t1, L_hi) + L_lo += t2 + /* r = r · x */ + (L_hi, L_lo) = hl_mult(L_hi, L_lo, fF4_hi, fF4_lo) - (a4, a5) = slow2sum(a4, a5) - (a3, a4) = slow2sum(a3, a4) - (a2, a3) = slow2sum(a2, a3) + /* + Finally, compute log(F1) + log(F2) + log(F3) + log(F4) + L. We may + assume F1 > F2 > F3 > F4 > F5, since the only way this is disrupted is + if some Fi == 1.0, in which case the log is 0 and the fast2sum works + out either way. We can also assume each F1,2,3 > L. + */ + var lsig_hi, lsig_lo + + /* log(F4) + L, slow because they're the same order of magnitude */ + (t1, t2) = slow2sum(std.flt64frombits(logF4_lo), L_lo) + (lsig_lo, t1) = slow2sum(L_hi, t1) + (lsig_hi, lsig_lo) = slow2sum(std.flt64frombits(logF4_hi), lsig_lo) + + (lsig_hi, lsig_lo) = hl_add(std.flt64frombits(logF3_hi), std.flt64frombits(logF3_lo), lsig_hi, lsig_lo + (t1 + t2)) + (lsig_hi, lsig_lo) = hl_add(std.flt64frombits(logF2_hi), std.flt64frombits(logF2_lo), lsig_hi, lsig_lo) + (lsig_hi, lsig_lo) = hl_add(std.flt64frombits(logF1_hi), std.flt64frombits(logF1_lo), lsig_hi, lsig_lo) + + /* Oh yeah, and we need xe * log(2) */ + var xel2_hi, xel2_lo, lx_hi, lx_lo + (xel2_hi, xel2_lo) = hl_mult((xe : flt64), 0.0, std.flt64frombits(0x3fe62e42fefa39ef), std.flt64frombits(0x3c7abc9e3b39803f)) + + (t1, t2) = slow2sum(xel2_lo, lsig_lo) + (lx_lo, t1) = slow2sum(lsig_hi, t1) + (lx_hi, lx_lo) = slow2sum(xel2_hi, lx_lo) + (lx_hi, lx_lo) = slow2sum(lx_hi, lx_lo + (t1 + t2)) + + if then_invert + -> (-1.0 * lx_hi, -1.0 * lx_lo) + ;; - (a4, a5) = slow2sum(a4, a5) - -> (a1, a2, a3 + a4) + -> (lx_hi, lx_lo) } diff --git a/lib/math/pown-impl.myr b/lib/math/pown-impl.myr index 2feecb5..6dfec19 100644 --- a/lib/math/pown-impl.myr +++ b/lib/math/pown-impl.myr @@ -1,8 +1,9 @@ use std use "fpmath" -use "log-impl" +use "impls" use "log-overkill" +use "log-impl" use "sum-impl" use "util" @@ -32,7 +33,13 @@ type fltdesc(@f, @u, @i) = struct neginf : @u magcmp : (f : @f, g : @f -> std.order) two_by_two : (x : @f, y : @f -> (@f, @f)) + split_add : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f)) + split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f)) + floor : (x : @f -> @f) log_overkill : (x : @f -> (@f, @f)) + precision : @i + nosgn_mask : @u + implicit_bit : @u emin : @i emax : @i imax : @i @@ -52,7 +59,13 @@ const desc32 : fltdesc(flt32, uint32, int32) = [ .neginf = 0xff800000, .magcmp = mag_cmp32, .two_by_two = two_by_two32, + .split_add = split_add32, + .split_mul = split_mul32, + .floor = floor32, .log_overkill = logoverkill32, + .precision = 24, + .nosgn_mask = 0x7fffffff, + .implicit_bit = 23, .emin = -126, .emax = 127, .imax = 2147483647, /* For detecting overflow in final exponent */ @@ -72,13 +85,37 @@ const desc64 : fltdesc(flt64, uint64, int64) = [ .neginf = 0xfff0000000000000, .magcmp = mag_cmp64, .two_by_two = two_by_two64, + .split_add = hl_add, + .split_mul = hl_mult, + .floor = floor64, .log_overkill = logoverkill64, + .precision = 53, + .nosgn_mask = 0x7fffffffffffffff, + .implicit_bit = 52, .emin = -1022, .emax = 1023, .imax = 9223372036854775807, .imin = -9223372036854775808, ] +const split_add32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32 + var x : flt64 = (x_h : flt64) + (x_l : flt64) + var y : flt64 = (y_h : flt64) + (y_l : flt64) + var z = x + y + var z_h : flt32 = (z : flt32) + var z_l : flt32 = ((z - (z_h : flt64)) : flt32) + -> (z_h, z_l) +} + +const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32 + var x : flt64 = (x_h : flt64) + (x_l : flt64) + var y : flt64 = (y_h : flt64) + (y_l : flt64) + var z = x * y + var z_h : flt32 = (z : flt32) + var z_l : flt32 = ((z - (z_h : flt64)) : flt32) + -> (z_h, z_l) +} + const pown32 = {x : flt32, n : int32 -> powngen(x, n, desc32) } @@ -109,7 +146,7 @@ generic powngen = {x : @f, n : @i, d : fltdesc(@f, @u, @i) :: numeric,floating,s /* Propagate NaN (why doesn't this come first? Ask IEEE.) */ -> d.frombits(d.nan) elif (x == 0.0 || x == -0.0) - if n < 0 && (n % 2 == 1) && xn + if n < 0 && (n % 2 == -1) && xn /* (+/- 0)^n = +/- oo */ -> d.frombits(d.neginf) elif n < 0 @@ -123,6 +160,9 @@ generic powngen = {x : @f, n : @i, d : fltdesc(@f, @u, @i) :: numeric,floating,s elif n == 1 /* Anything^1 is itself */ -> x + elif n == -1 + /* The CPU is probably better at division than we are at pow(). */ + -> 1.0/x ;; /* (-f)^n = (-1)^n * (f)^n. Figure this out now, then pretend f >= 0.0 */ @@ -142,62 +182,58 @@ generic powngen = {x : @f, n : @i, d : fltdesc(@f, @u, @i) :: numeric,floating,s Since n and e, and I are all integers, we can get the last part from scale2. The hard part is computing I and F, and then computing 2^F. */ + if xe > 0 + /* + But first: do some rough calculations: if we can show n*log(xs) has the + same sign as n*e, and n*e would cause overflow, then we might as well + return right now. + + This also takes care of subnormals very nicely, so we don't have to do + any special handling to reconstitute xs "right", as we do in rootn. + */ + var exp_rough_estimate = n * xe + if n > 0 && (exp_rough_estimate > d.emax + 1 || (exp_rough_estimate / n != xe)) + -> ult_sgn * d.frombits(d.inf) + elif n < 0 && (exp_rough_estimate < d.emin - d.precision - 1 || (exp_rough_estimate / n != xe)) + -> ult_sgn * 0.0 + ;; + elif xe < 0 + /* + Also, if consider xs/2 and xe + 1, we can analyze the case in which + n*log(xs) has a different sign from n*e. + */ + var exp_rough_estimate = n * (xe + 1) + if n > 0 && (exp_rough_estimate < d.emin - d.precision - 1 || (exp_rough_estimate / n != (xe + 1))) + -> ult_sgn * 0.0 + elif n < 0 && (exp_rough_estimate > d.emax + 1 || (exp_rough_estimate / n != (xe + 1))) + -> ult_sgn * d.frombits(d.inf) + ;; + ;; + var ln_xs_hi, ln_xs_lo (ln_xs_hi, ln_xs_lo) = d.log_overkill(d.assem(false, 0, xs)) /* Now x^n = 2^(n * [ ln_xs / ln(2) ]) * 2^(n + e) */ - - var ls1 : @f[8] - (ls1[0], ls1[1]) = d.two_by_two(ln_xs_hi, d.frombits(d.one_over_ln2_hi)) - (ls1[2], ls1[3]) = d.two_by_two(ln_xs_hi, d.frombits(d.one_over_ln2_lo)) - (ls1[4], ls1[5]) = d.two_by_two(ln_xs_lo, d.frombits(d.one_over_ln2_hi)) - (ls1[6], ls1[7]) = d.two_by_two(ln_xs_lo, d.frombits(d.one_over_ln2_lo)) + var E1, E2 + (E1, E2) = d.split_mul(ln_xs_hi, ln_xs_lo, d.frombits(d.one_over_ln2_hi), d.frombits(d.one_over_ln2_lo)) /* - Now log2(xs) = Sum(ls1), so + Now log2(xs) = E1 + E2, so - x^n = 2^(n * Sum(ls1)) * 2^(n * e) + x^n = 2^(n * E1 + E2) * 2^(n * e) */ - var E1, E2 - (E1, E2) = double_compensated_sum(ls1[0:8]) - var ls2 : @f[5] - var ls2s : @f[5] - var I = 0 - (ls2[0], ls2[1]) = d.two_by_two(E1, nf) - (ls2[2], ls2[3]) = d.two_by_two(E2, nf) - ls2[4] = 0.0 - - /* Now x^n = 2^(Sum(ls2)) * 2^(n + e) */ - - for var j = 0; j < 5; ++j - var i = rn(ls2[j]) - I += i - ls2[j] -= (i : @f) - ;; var F1, F2 - std.slcp(ls2s[0:5], ls2[0:5]) - std.sort(ls2s[0:5], d.magcmp) - (F1, F2) = double_compensated_sum(ls2s[0:5]) - - if (F1 < 0.0 || F1 > 1.0) - var i = rn(F1) - I += i - ls2[4] -= (i : @f) - std.slcp(ls2s[0:5], ls2[0:5]) - std.sort(ls2s[0:5], d.magcmp) - (F1, F2) = double_compensated_sum(ls2s[0:5]) - ;; + (F1, F2) = d.split_mul(E1, E2, nf, 0.0) + + var I = rn(F1) + (F1, F2) = d.split_add(-1.0 * (I : @f), 0.0, F1, F2) /* Now, x^n = 2^(F1 + F2) * 2^(I + n*e). */ - var ls3 : @f[6] var log2_hi, log2_lo (log2_hi, log2_lo) = d.C[128] - (ls3[0], ls3[1]) = d.two_by_two(F1, d.frombits(log2_hi)) - (ls3[2], ls3[3]) = d.two_by_two(F1, d.frombits(log2_lo)) - (ls3[4], ls3[5]) = d.two_by_two(F2, d.frombits(log2_hi)) var G1, G2 - (G1, G2) = double_compensated_sum(ls3[0:6]) + (G1, G2) = d.split_mul(F1, F2, d.frombits(log2_hi), d.frombits(log2_lo)) var base = exp(G1) + G2 var pow_xen = xe * n @@ -259,6 +295,19 @@ generic rootngen = {x : @f, q : @u, d : fltdesc(@f, @u, @i) :: numeric,floating, elif q == 1 /* Anything^1/1 is itself */ -> x + elif xe < d.emin + /* + Subnormals are actually a problem. If we naively reconstitute xs, it + will be wildly wrong and won't match up with the exponent. So let's + pretend we have unbounded exponent range. We know the loop terminates + because we covered the +/-0.0 case above. + */ + xe++ + var check = 1 << d.implicit_bit + while xs & check == 0 + xs <<= 1 + xe-- + ;; ;; /* As in pown */ @@ -267,6 +316,33 @@ generic rootngen = {x : @f, q : @u, d : fltdesc(@f, @u, @i) :: numeric,floating, ult_sgn = -1.0 ;; + /* + If we're looking at (1 + 2^-h)^1/q, and the answer will be 1 + e, with + (1 + e)^q = 1 + 2^-h, then for q and h large enough, e might be below + the representable range. Specifically, + + (1 + e)^q ≅ 1 + qe + (q choose 2)e^2 + ... + + So (using single-precision as the example) + + (1 + 2^-23)^q ≅ 1 + q 2^-23 + (absolutely tiny terms) + + And anything in [1, 1 + q 2^-24) will just truncate to 1.0 when + calculated. + */ + if xe == 0 + var cutoff = scale2(qf, -1 * d.precision - 1) + 1.0 + if (xb & d.nosgn_mask) < d.tobits(cutoff) + -> 1.0 + ;; + elif xe == -1 + /* Something similar for (1 - e)^q */ + var cutoff = 1.0 - scale2(qf, -1 * d.precision - 1) + if (xb & d.nosgn_mask) > d.tobits(cutoff) + -> 1.0 + ;; + ;; + /* Similar to pown. Let e/q = E + psi, with E an integer. x^(1/q) = e^(log(xs)/q) * 2^(e/q) @@ -280,14 +356,13 @@ generic rootngen = {x : @f, q : @u, d : fltdesc(@f, @u, @i) :: numeric,floating, */ /* Calculate 1/q in very high precision */ - var r1 = 1.0 / qf - var r2 = -math.fma(r1, qf, -1.0) / qf + var qinv_hi = 1.0 / qf + var qinv_lo = -math.fma(qinv_hi, qf, -1.0) / qf var ln_xs_hi, ln_xs_lo (ln_xs_hi, ln_xs_lo) = d.log_overkill(d.assem(false, 0, xs)) - var ls1 : @f[12] - (ls1[0], ls1[1]) = d.two_by_two(ln_xs_hi, r1) - (ls1[2], ls1[3]) = d.two_by_two(ln_xs_hi, r2) - (ls1[4], ls1[5]) = d.two_by_two(ln_xs_lo, r1) + + var G1, G2 + (G1, G2) = d.split_mul(ln_xs_hi, ln_xs_lo, qinv_hi, qinv_lo) var E : @i if q > std.abs(xe) @@ -301,15 +376,20 @@ generic rootngen = {x : @f, q : @u, d : fltdesc(@f, @u, @i) :: numeric,floating, var psi_lo = -math.fma(psi_hi, qf, -(qpsi : @f)) / qf var log2_hi, log2_lo (log2_hi, log2_lo) = d.C[128] - (ls1[ 6], ls1[ 7]) = d.two_by_two(psi_hi, d.frombits(log2_hi)) - (ls1[ 8], ls1[ 9]) = d.two_by_two(psi_hi, d.frombits(log2_lo)) - (ls1[10], ls1[11]) = d.two_by_two(psi_lo, d.frombits(log2_hi)) + var H1, H2 + (H1, H2) = d.split_mul(psi_hi, psi_lo, d.frombits(log2_hi), d.frombits(log2_lo)) - var G1, G2 - (G1, G2) = double_compensated_sum(ls1[0:12]) - /* G1 + G2 approximates log(xs)/q + log(2)*psi */ + var J1, J2, t1, t2 + /* + We can't use split_add; we don't kow the relative magitudes of G and H + */ + (t1, t2) = slow2sum(G2, H2) + (J2, t1) = slow2sum(H1, t1) + (J1, J2) = slow2sum(G1, J2) + J2 = J2 + (t1 + t2) - var base = exp(G1) + G2 + /* J1 + J2 approximates log(xs)/q + log(2)*psi */ + var base = exp(J1) + J2 -> ult_sgn * scale2(base, E) } diff --git a/lib/math/powr-impl.myr b/lib/math/powr-impl.myr index 715f611..dadd23b 100644 --- a/lib/math/powr-impl.myr +++ b/lib/math/powr-impl.myr @@ -1,7 +1,9 @@ use std use "fpmath" +use "impls" use "log-impl" +use "log-overkill" use "util" /* @@ -14,12 +16,6 @@ use "util" example, IEEE 754-2008 does not specify what powr(infty, y) must return when y is not 0.0 (an erratum was planned in 2010, but does not appear to have been released as of 2018). - - As a note: unlike many other functions in this library, there - has been no serious analysis of the accuracy and speed of this - particular implementation. Interested observers wishing to improve - this library will probably find this file goldmine of mistakes, - both theoretical and practical. */ pkg math = pkglocal const powr32 : (x : flt32, y : flt32 -> flt32) @@ -38,17 +34,10 @@ type fltdesc(@f, @u, @i) = struct emax : @i emin : @i sgnmask : @u - sig8mask : @u - sig8last : @u - split_prec_mask : @u - split_prec_mask2 : @u - C : (@u, @u)[:] - eps_inf_border : @u - eps_zero_border : @u - exp_inf_border : @u + log_overkill : (x : @f -> (@f, @f)) + fma : (x : @f, y : @f, z : @f -> @f) + split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f)) exp_zero_border : @u - exp_subnormal_border : @u - itercount : @u ;; const desc32 : fltdesc(flt32, uint32, int32) = [ @@ -63,17 +52,10 @@ const desc32 : fltdesc(flt32, uint32, int32) = [ .emax = 127, .emin = -126, .sgnmask = 1 << 31, - .sig8mask = 0xffff0000, /* Mask to get 8 significant bits */ - .sig8last = 16, /* Last bit kept when masking */ - .split_prec_mask = 0xffff0000, /* 16 trailing zeros */ - .split_prec_mask2 = 0xfffff000, /* 12 trailing zeros */ - .C = accurate_logs32[0:130], /* See log-impl.myr */ - .eps_inf_border = 0x4eb00f34, /* maximal y st. (1.00..1)^y < oo */ - .eps_zero_border = 0x4ecff1b4, /* minimal y st. (0.99..9)^y > 0 */ - .exp_inf_border = 0x42b17218, /* maximal y such that e^y < oo */ + .log_overkill = logoverkill32, + .fma = fma32, + .split_mul = split_mul32, .exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */ - .exp_subnormal_border = 0xc2aeac50, /* minimal y such that e^y is normal */ - .itercount = 4, /* How many iterations of Taylor series for (1 + f)^y' */ ] const desc64 : fltdesc(flt64, uint64, int64) = [ @@ -88,19 +70,21 @@ const desc64 : fltdesc(flt64, uint64, int64) = [ .emax = 1023, .emin = -1022, .sgnmask = 1 << 63, - .sig8mask = 0xffffe00000000000, /* Mask to get 8 significant bits */ - .sig8last = 45, /* Last bit kept when masking */ - .split_prec_mask = 0xffffff0000000000, /* 40 trailing zeroes */ - .split_prec_mask2 = 0xfffffffffffc0000, /* 18 trailing zeroes */ - .C = accurate_logs64[0:130], /* See log-impl.myr */ - .eps_inf_border = 0x43d628b76e3a7b61, /* maximal y st. (1.00..1)^y < oo */ - .eps_zero_border = 0x43d74e9c65eceee0, /* minimal y st. (0.99..9)^y > 0 */ - .exp_inf_border = 0x40862e42fefa39ef, /* maximal y such that e^y < oo */ + .log_overkill = logoverkill64, + .fma = fma64, + .split_mul = hl_mult, .exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */ - .exp_subnormal_border = 0xc086232bdd7abcd2, /* minimal y such that e^y is normal */ - .itercount = 8, ] +const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32 + var x : flt64 = (x_h : flt64) + (x_l : flt64) + var y : flt64 = (y_h : flt64) + (y_l : flt64) + var z = x * y + var z_h : flt32 = (z : flt32) + var z_l : flt32 = ((z - (z_h : flt64)) : flt32) + -> (z_h, z_l) +} + const powr32 = {x : flt32, y : flt32 -> powrgen(x, y, desc32) } @@ -182,225 +166,41 @@ generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,s ;; ;; - /* Normalize x and y */ - if xe < d.emin - var first_1 = find_first1_64((xs : uint64), (d.precision : int64)) - var offset = (d.precision : @u) - 1 - (first_1 : @u) - xs = xs << offset - xe = d.emin - offset - ;; - - if ye < d.emin - var first_1 = find_first1_64((ys : uint64), (d.precision : int64)) - var offset = (d.precision : @u) - 1 - (first_1 : @u) - ys = ys << offset - ye = d.emin - offset - ;; - /* - Split x into 2^N * F * (1 + f), with F = 1 + j/128 (some - j) and f tiny. Compute F naively by truncation. Compute - f via f = (x' - 1 - F)/(1 + F), where 1/(1 + F) is - precomputed and x' is x/2^N. 128 is chosen so that we - can borrow some constants from log-impl.myr. - - [Tan90] hints at a method of computing x^y which may be - comparable to this approach, but which is unfortunately - has not been elaborated on (as far as I can discover). + Just do the dumb thing: compute exp( log(x) · y ). All the hard work + goes into computing log(x) with high enough precision that our exp() + implementation becomes the weakest link. The Table Maker's Dilemma + says that quantifying "high enough" is a very difficult problem, but + experimentally twice the precision of @f appears quite good enough. */ - var N = xe - var j, F, Fn, Fe, Fs - var xprime = d.assem(false, 0, xs) - - if need_round_away(0, (xs : uint64), (d.sig8last : int64)) - F = d.frombits((d.tobits(xprime) & d.sig8mask) + (1 << d.sig8last)) - else - F = d.frombits(d.tobits(xprime) & d.sig8mask) - ;; + var ln_x_hi, ln_x_lo + (ln_x_hi, ln_x_lo) = d.log_overkill(x) - (Fn, Fe, Fs) = d.explode(F) + var final_exp_hi, final_exp_lo + (final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0) - if Fe != 0 - j = 128 - else - j = 0x7f & ((d.sig8mask & Fs) >> d.sig8last) - ;; - - var f = (xprime - F)/F - - /* - y could actually be above integer infinity, in which - case x^y is most certainly infinity of 0. More importantly, - we can't safely compute M (below). - */ - if x > (1.0 : @f) - if y > d.frombits(d.eps_inf_border) - -> d.frombits(d.inf) - elif -y > d.frombits(d.eps_inf_border) - -> (0.0 : @f) - ;; - elif x < (1.0 : @f) - if y > d.frombits(d.eps_zero_border) && x < (1.0 : @f) - -> (0.0 : @f) - elif -y > d.frombits(d.eps_zero_border) && x < (1.0 : @f) + if d.tobits(final_exp_hi) & d.expmask == d.inf + /* + split_mul doesn't actually preserve the sign of infinity, so we can't + trust final_exp_hi to get it. + */ + if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask) + /* e^+Inf */ -> d.frombits(d.inf) + else + /* e^-Inf */ + -> 0.0 ;; ;; - /* Split y into M + y', with |y'| <= 0.5 and M an integer */ - var M = floor(y) - var yprime = y - M - if yprime > (0.5 : @f) - M += (1.0 : @f) - yprime = y - M - elif yprime < (-0.5 : @f) - M -= (1.0: @f) - yprime = y - M - ;; - - /* - We'll multiply y' by log(2) and try to keep extra - precision, so we need to split y'. Since the high word - of C has 24 - 10 = 14 significant bits (53 - 16 = 37 in - flt64 case), we ensure 15 (39) trailing zeroes in - yprime_hi. (We also need this for y'*N, M, &c). - */ - var yprime_hi = d.frombits(d.tobits(yprime) & d.split_prec_mask) - var yprime_lo = yprime - yprime_hi - var yprimeN_hi = d.frombits(d.tobits((N : @f) * yprime) & d.split_prec_mask) - var yprimeN_lo = fma((N : @f), yprime, -yprimeN_hi) - var M_hi = d.frombits(d.tobits(M) & d.split_prec_mask) - var M_lo = M - M_hi - - /* - At this point, we've built out - - x^y = [ 2^N * F * (1 + f) ]^(M + y') - - where N, M are integers, F is well-known, and f, y' are - tiny. So we can get to computing - - /-1-\ /-------------------2--------------------------\ /-3--\ - 2^(N*M) * exp(log(F)*y' + log2*N*y' + log(F)*M + M*log(1+f)) * (1+f)^y' - - where 1 can be handled by scale2, 2 we can mostly fake - by sticking high-precision values for log(F) and log(2) - through exp(), and 3 is composed of small numbers, - therefore can be reasonably approximated by a Taylor - expansion. - */ - - /* t2 */ - var log2_lo, log2_hi, Cu_hi, Cu_lo - (log2_hi, log2_lo) = d.C[128] - (Cu_hi, Cu_lo) = d.C[j] - - var es : @f[20] - std.slfill(es[:], (0.0 : @f)) - - /* log(F) * y' */ - es[0] = d.frombits(Cu_hi) * yprime_hi - es[1] = d.frombits(Cu_lo) * yprime_hi - es[2] = d.frombits(Cu_hi) * yprime_lo - es[3] = d.frombits(Cu_lo) * yprime_lo - - /* log(2) * N * y' */ - es[4] = d.frombits(log2_hi) * yprimeN_hi - es[5] = d.frombits(log2_lo) * yprimeN_hi - es[6] = d.frombits(log2_hi) * yprimeN_lo - es[7] = d.frombits(log2_lo) * yprimeN_lo - - /* log(F) * M */ - es[8] = d.frombits(Cu_hi) * M_hi - es[9] = d.frombits(Cu_lo) * M_hi - es[10] = d.frombits(Cu_hi) * M_lo - es[11] = d.frombits(Cu_lo) * M_lo - - /* log(1 + f) * M */ - var lf = log1p(f) - var lf_hi = d.frombits(d.tobits(lf) & d.split_prec_mask) - var lf_lo = lf - lf_hi - es[12] = lf_hi * M_hi - es[13] = lf_lo * M_hi - es[14] = lf_hi * M_lo - es[15] = lf_lo * M_lo - - /* - The correct way to handle this would be to compare - magnitudes of eis and parenthesize the additions correctly. - We take the cheap way out. - */ - var exp_hi = priest_sum(es[0:16]) - - /* - We would like to just compute exp(exp_hi) * exp(exp_lo). - However, if that takes us into subnormal territory, yet - N * M is large, that will throw away a few bits of - information. We can correct for this by adding in a few - copies of P*log(2), then subtract off P when we compute - scale2() at the end. - - We also have to be careful that P doesn't have too many - significant bits, otherwise we throw away some information - of log2_hi. - */ - var P = -rn(exp_hi / d.frombits(log2_hi)) - var P_f = (P : @f) - P_f = d.frombits(d.tobits(P_f) & d.split_prec_mask2) - P = rn(P_f) - - es[16] = P_f * d.frombits(log2_hi) - es[17] = P_f * d.frombits(log2_lo) - exp_hi = priest_sum(es[0:18]) - es[18] = -exp_hi - var exp_lo = priest_sum(es[0:19]) - - - var t2 = exp(exp_hi) * exp(exp_lo) - - /* - t3: Abbreviated Taylor expansion for (1 + f)^y' - 1. - Since f is on the order of 2^-7 (and y' is on the order - of 2^-1), we need to go up to f^3 for single-precision, - and f^7 for double. We can then compute (1 + t3) * t2 - - The expansion is \Sum_{k=1}^{\infty} {y' \choose k} x^k - */ - var terms : @f[10] = [ - (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), - (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), - ] - var current = (1.0 : @f) - for var j = 0; j <= d.itercount; ++j - current = current * f * (yprime - (j : @f)) / ((j : @f) + (1.0 : @f)) - terms[j] = current - ;; - var t3 = priest_sum(terms[0:d.itercount + 1]) - - var total_exp_f = (N : @f) * M - (P : @f) - if total_exp_f > ((d.emax - d.emin + d.precision + 1) : @f) - -> d.frombits(d.inf) - elif total_exp_f < -((d.emax - d.emin + d.precision + 1) : @f) - -> (0.0 : @f) + if final_exp_hi < d.frombits(d.exp_zero_border) + -> 0.0 ;; - /* - Pull t2's exponent out so that we don't hit subnormal - calculation with the t3 multiplication - */ - var t2n, t2e, t2s - (t2n, t2e, t2s) = d.explode(t2) - - if t2e < d.emin - var t2_first_1 = find_first1_64((t2s : uint64), (d.precision : int64)) - var t2_offset = (d.precision : @u) - 1 - (t2_first_1 : @u) - t2s = t2s << t2_offset - t2e = d.emin - (t2_offset : @i) + var z_hi = exp(final_exp_hi) + if d.tobits(z_hi) & d.expmask == d.inf + -> z_hi ;; - t2 = d.assem(t2n, 0, t2s) - P -= t2e - - var base = fma(t2, t3, t2) - -> scale2(base, N * rn(M) - P) + -> d.fma(z_hi, final_exp_lo, z_hi) } diff --git a/lib/math/test/log-overkill.myr b/lib/math/test/log-overkill.myr index f9162a9..03e67ef 100644 --- a/lib/math/test/log-overkill.myr +++ b/lib/math/test/log-overkill.myr @@ -142,7 +142,10 @@ const log02 = {c (0x3f974b5311aeae57, 0xc00e4420e231d7f0, 0xbc9d614ed9b94484), (0x3fe28aed659dab73, 0xbfe1760d162fed7e, 0xbc64a0ff30250148), (0x403273d9892e62d3, 0x40075255633e0533, 0xbc91eb9834046d7b), + + /* This one catches naive catastrophic cancellation */ (0x3fee1d239d2061d7, 0xbfaf1ad3961ab8ba, 0xbbc9bff82ae3fde7), + (0x3fbc0666ebc60265, 0xc001b257198142d0, 0xbca1cf93360a27f6), (0x3f53267a24ceab6a, 0xc01b01c8ad09c3c1, 0xbca0d85af74df975), (0x3fd2005446cb268e, 0xbff44b879f2ec561, 0x3c66e8eff64f40a1), @@ -224,6 +227,9 @@ const log02 = {c (0x4014a5ce06d7df05, 0x3ffa42cc8df38d10, 0xbc5e54e0ca2ed44c), (0x3f68d1447d5a29e8, 0xc017328d1195bac4, 0x3cb4fd5db024d1da), (0x404963f9a80919eb, 0x400f6b9160b05ec4, 0x3c4526374db12c53), + (0x3fe78000b3cf1a39, 0xbfd3c2508d81ebf9, 0x3c7d6df43454d213), + (0x7fe6c53d8cef3d27, 0x40862b8a1ec909c8, 0x3cf9a8752da53a7e), + (0x000342cdeeb18fc9, 0xc0862fe5598ee7e6, 0xbd2bf7df7d1e9517), ][:] for (x, y1, y2) : inputs diff --git a/lib/math/test/pown-impl.myr b/lib/math/test/pown-impl.myr index a7dbeb4..7308d25 100644 --- a/lib/math/test/pown-impl.myr +++ b/lib/math/test/pown-impl.myr @@ -21,7 +21,20 @@ const int64fromuint64 = {b; -> (&b : int64#)#} const pown01 = {c var inputs : (uint32, uint32, uint32)[:] = [ (0x000000f6, 0x00000000, 0x3f800000), + (0x7fc00000, 0x00000001, 0x7fc00000), + (0x7fc00000, 0x00000021, 0x7fc00000), + (0x7fc00000, 0x00030021, 0x7fc00000), + (0x7fc00000, 0xfacecafe, 0x7fc00000), (0x00000000, 0x3d800000, 0x00000000), + (0x80000000, 0x00000124, 0x00000000), + (0x80000000, 0x00000123, 0x80000000), + (0x00000000, 0x00000124, 0x00000000), + (0x00000000, 0x00000123, 0x00000000), + (0x00000000, 0xad800001, 0x7f800000), + (0x80000000, 0x80000123, 0xff800000), + (0x80000000, 0x80000122, 0x7f800000), + (0x00000000, 0x80000127, 0x7f800000), + (0x00000000, 0x80000128, 0x7f800000), (0x946fc13b, 0x3b21efc7, 0x80000000), (0xb76e98b6, 0xdbeb6637, 0xff800000), (0xc04825b7, 0x53cdd772, 0x7f800000), @@ -52,11 +65,11 @@ const pown01 = {c for (x, y, z) : inputs var xf : flt32 = std.flt32frombits(x) var yi : int32 = int32fromuint32(y) - var zf : flt32 = std.flt32frombits(z) var rf = math.pown(xf, yi) - testr.check(c, rf == zf, + var ru : uint32 = std.flt32bits(rf) + testr.check(c, ru == z, "pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}", - x, yi, z, std.flt32bits(rf)) + x, yi, z, ru) ;; } @@ -87,16 +100,24 @@ const pown02 = {c (0xc017043172d0152b, 0x00000000000000e9, 0xe4b2c1666379afdc), (0xc0325800cfeffb8e, 0x00000000000000d8, 0x78983c24a5e29e19), (0xbfee2ae3cd3208ec, 0x00000000000006b7, 0xb6cb06585f39893d), + (0x3f7dd2994731f21f, 0x0000000000000097, 0x0000000000000003), + (0x61696e53830d02af, 0xfffffffffffffffe, 0x0000000000000006), + (0xc0e60abfce171c2e, 0xffffffffffffffbb, 0x800000000000008a), + (0x32dbf16a23293407, 0x0000000000000005, 0x00000000103f2cd6), + (0xb95741e695eb8ab2, 0x000000000000000a, 0x00000000000a873c), + (0x000aa88b5c2dd078, 0xffffffffffffffff, 0x7fd804c764025003), + (0x800cd2d56c4a4074, 0xffffffffffffffff, 0xffd3f696f65f6596), + (0x8000d6838a5a8463, 0xffffffffffffffff, 0xfff0000000000000), ][:] for (x, y, z) : inputs var xf : flt64 = std.flt64frombits(x) var yi : int64 = int64fromuint64(y) - var zf : flt64 = std.flt64frombits(z) - var rf = math.pown(xf, yi) - testr.check(c, rf == zf, + var rf : flt64 = math.pown(xf, yi) + var ru : uint64 = std.flt64bits(rf) + testr.check(c, ru == z, "pown(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}", - x, yi, z, std.flt64bits(rf)) + x, yi, z, ru) ;; } @@ -112,12 +133,11 @@ const pown03 = {c for (x, y, z_perfect, z_accepted) : inputs var xf : flt32 = std.flt32frombits(x) var yi : int32 = int32fromuint32(y) - var zf_perfect : flt32 = std.flt32frombits(z_perfect) - var zf_accepted : flt32 = std.flt32frombits(z_accepted) - var rf = math.pown(xf, yi) - testr.check(c, rf == zf_perfect || rf == zf_accepted, + var rf : flt32 = math.pown(xf, yi) + var ru : uint32 = std.flt32bits(rf) + testr.check(c, ru == z_perfect || ru == z_accepted, "pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}", - x, yi, z_perfect, z_accepted, std.flt32bits(rf)) + x, yi, z_perfect, z_accepted, ru) ;; } @@ -279,10 +299,11 @@ const rootn01 = {c for (x, y, z) : inputs var xf : flt32 = std.flt32frombits(x) var zf : flt32 = std.flt32frombits(z) - var rf = math.rootn(xf, y) - testr.check(c, rf == zf, + var rf : flt32 = math.rootn(xf, y) + var ru : uint32 = std.flt32bits(rf) + testr.check(c, ru == z, "rootn(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}", - x, y, z, std.flt32bits(rf)) + x, y, z, ru) ;; } @@ -436,15 +457,22 @@ const rootn02 = {c (0xe0bc4cbf6bd74d8f, 0x000000000000bd8b, 0xbff01ed2c4e821fc), (0x31d4f2baa91a9e8e, 0x000000000000244d, 0x3fef774c954b40bf), (0x01283d1c679f5652, 0x0000000000008647, 0x3fef5bc18f5e292f), + (0x80003d8a341ee060, 0x0000000000009c71, 0xbfef6f873f76b7cd), + (0xbfecf0fc4dc97b93, 0x0000000000005f4b, 0xbfeffff75d0a25fe), + (0xbfe2fb84944a35ee, 0x000000000000e947, 0xbfefffeda94c6d07), + (0xbfe0ef0c05bd84ab, 0x0000000000007165, 0xbfefffd205e2a1c8), + (0xbfe7354e962bdcb3, 0x000000000000076b, 0xbfeffe9d4844aad6), + (0xbfea556dd1eb1e58, 0x00000000000095cb, 0xbfeffff557890356), ][:] for (x, y, z) : inputs var xf : flt64 = std.flt64frombits(x) var zf : flt64 = std.flt64frombits(z) - var rf = math.rootn(xf, y) - testr.check(c, rf == zf, + var rf : flt64 = math.rootn(xf, y) + var ru : uint64 = std.flt64bits(rf) + testr.check(c, ru == z, "rootn(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}", - x, y, z, std.flt64bits(rf)) + x, y, z, ru) ;; } diff --git a/lib/math/test/powr-impl.myr b/lib/math/test/powr-impl.myr index 8e6110a..fb54d84 100644 --- a/lib/math/test/powr-impl.myr +++ b/lib/math/test/powr-impl.myr @@ -8,6 +8,7 @@ const main = { [.name="powr-01", .fn = powr01], [.name="powr-02", .fn = powr02], [.name="powr-03", .fn = powr03], + [.name="powr-04", .fn = powr04], ][:]) } @@ -22,8 +23,6 @@ const powr01 = {c (0x653f944a, 0xbf7c2388, 0x1a3c784b), (0x545ba67c, 0xc0c7e947, 0x00000000), (0x3fca6b0d, 0x44ff18e0, 0x7f800000), - // (0x3f74c7a7, 0x44feae20, 0x000265c6), - // (0x3f7ebd6c, 0xc5587884, 0x4bc9ab07), ][:] for (x, y, z) : inputs @@ -32,7 +31,7 @@ const powr01 = {c var zf : flt32 = std.flt32frombits(z) var rf = math.powr(xf, yf) testr.check(c, rf == zf, - "powr(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}", + "powr(0x{w=8,p=0,x}, 0x{w=8,p=0,x}) should be 0x{w=8,p=0,x}, was 0x{w=8,p=0,x}", x, y, z, std.flt32bits(rf)) ;; } @@ -40,6 +39,131 @@ const powr01 = {c const powr02 = {c var inputs : (uint64, uint64, uint64)[:] = [ (0x0000000000000000, 0x0000000000000000, 0x0000000000000000), + (0x0d30ad40d8223045, 0xffb56d6e33cd6ad2, 0x7ff0000000000000), + (0x1f192b55704d3500, 0xff8a52f877359f3c, 0x7ff0000000000000), + (0x7fe6c53d8cef3d27, 0x4bcca2e651c57788, 0x7ff0000000000000), + (0x7fe78a3740493383, 0xe84e801c38a71fc9, 0x0000000000000000), + (0x7fea162ffbabd3bc, 0x02414c7fa33dd7db, 0x3ff0000000000000), + (0x7fe087a9112a21d8, 0x0f2b7a9584736b41, 0x3ff0000000000000), + (0x7fe5d78f049c0918, 0xd3a145ba5b0fdb9b, 0x0000000000000000), + (0x7febb342860b3386, 0xe758bd2af063aec2, 0x0000000000000000), + (0x000342cdeeb18fc9, 0xbe645d513ed4670d, 0x3ff0001c3d5eaa62), + (0x3e086c8c9160ccfe, 0xc027b2f7021e0508, 0x567134b75886e1ce), + (0x3d799a9014c5c710, 0xc0294dc8ea46772e, 0x5f068df47efc8583), + (0x3fca93f7f9ca25b6, 0x3ff5847df0338da2, 0x3fbee98550a085a0), + (0x3fabb5869cd3c59e, 0x3ff36f822e577f69, 0x3f9da0409e2f391c), + (0x3f77f7f7131769cb, 0x3feaf3f718dd7ebe, 0x3f8af6517cbfdc36), + (0x3fcac3d4060de9e2, 0x4005f5e897c9aeb8, 0x3f8be74e7fa8bdd6), + (0x406f0b978c302c5e, 0x4035705781be3d35, 0x4a97d0db24be1855), + (0x40644d87e8676e6c, 0x404b0add51b6a1db, 0x58c21ad2028c4bcc), + (0x4196f9c67efe9a1f, 0x3fbb8bebec6ccd29, 0x401ceae1ef1736e2), + + /* x in (-0.1, 0.1), y in (-50, 50) */ + (0x85c8e7c348119f30, 0x895b661cd3f6bf12, 0xfff8000000000000), + (0x3ddec142f9c6c4ab, 0xb5eb602a73c886a4, 0x3ff0000000000000), + (0x808f2728666a17cc, 0x2034c24921d546bc, 0xfff8000000000000), + (0x19d8ae8ebf85e0e8, 0x3f07eb136357cb57, 0x3fef63a7defaa877), + (0x26e70f36e0237155, 0xaa4492e648bc196b, 0x3ff0000000000000), + + /* x in (0, 0.001), y in (-50, 50) */ + (0x3a8c503f997b1d9e, 0x9130be1b1bddafac, 0x3ff0000000000000), + (0x26bbb433c53c87cc, 0x3cea3bfe4404e194, 0x3fefffffffffe35c), + (0x1bfde85e7bef5604, 0x3cb5719f71329990, 0x3feffffffffffbd3), + (0x277cfd17030c8328, 0x2a74030781333076, 0x3ff0000000000000), + (0x19d8ae8ebf85e0e8, 0x3f07eb136357cb57, 0x3fef63a7defaa877), + (0x1ef50bc4cddd2717, 0xbee4389ff84a8f05, 0x3ff00e780c600a2f), + (0x01f66f97fb1a8dbb, 0x3d44ec5cc95c9f87, 0x3feffffffff1f50b), + + /* x in (1e5, 1e9), y in (0.001, 0.2) */ + (0x414d7781220eed89, 0x3f8e612e27ccef27, 0x3ff4096a3251e2a2), + (0x4151aeaf0db9e5e1, 0x3f75fc2b0f18576c, 0x3ff15fbe532b3ad6), + (0x41524b52b1ee2ea5, 0x7ff33dad40d7b286, 0xfff8000000000000), + (0xfffe3ddc1a7f09a7, 0x3f845a6294f3a890, 0xfff8000000000000), + (0x41477fe19e4e0642, 0xfff07c620f9b4061, 0xfff8000000000000), + (0x4130cad83460ac8d, 0x3f88f49d9474e1b7, 0x3ff2f4a5838051aa), + (0x4190c048c037d848, 0x3fc5f01bff5f120e, 0x40361f86b9ec8c03), + (0x4122532606cab2c2, 0x3f8c598493e15271, 0x3ff33c5ae51249f2), + (0x40ff7a13e85cc78c, 0x3f6e18c00e65b64d, 0x3ff0b4f518ad7a02), + (0x412aee1389285ddb, 0x3f56e92836547637, 0x3ff04f2b8a876442), + (0xfff7ceefbe9fcc65, 0x3faa28ac719649eb, 0xfff8000000000000), + (0x417113d63f0c2c95, 0x3f9c349235da5418, 0x3ff9586d156467dc), + (0x416e11f180357015, 0x3fa36745e43f0a12, 0x3ffdfbf8fdbce7b0), + (0x41cb75c7b1047ff0, 0x3f8e735375e7a550, 0x3ff5bf54bad30bd9), + (0x41bf0008e5b87821, 0x3fbb4108f504a250, 0x4020f10b4cd6005e), + (0x41cbe5fe5c674130, 0x3fa3e99fb287ea63, 0x4001dd6b2714c7b8), + (0x4174c2a767af0aff, 0x3f9d5bfb33b9c6f9, 0x3ff9f8d15a26d76f), + (0x41a51631969c7dc9, 0x3f92d1ce6076d806, 0x3ff6aed7a9d9b770), + (0x41101c4bb112bd60, 0x3fa24751eb557f8b, 0x3ff8fc0821e9cc12), + (0x41537e074bb931da, 0x7ff77c70e127f089, 0xfff8000000000000), + (0x412de8565421cfeb, 0x3fb3561b7eeae244, 0x4006add19c50cb4a), + (0x41aa670479d6802f, 0x3f9b9492b7c63280, 0x3ffad8c8dd5f6602), + (0x4166c5a1e9dd6276, 0x3fc0b0d8d4896f0a, 0x4020be53d9626333), + (0x41aee3dafeb3da8f, 0x3f88813f6c025f4b, 0x3ff42c84ca07e6dd), + + /* x in (1e5, 1e9), y in (-100, -0.1) */ + (0x417460e6ed428d99, 0xc04da3a1370b5e66, 000000000000000000), + (0x4186804e4380d00b, 0xbff51daac36e8cbd, 0x3dd47ebddadb4770), + (0x415c4dbac0e0621b, 0xbfff189d3f3bd269, 0x3d28fe34109afe0b), + (0x41477fe19e4e0642, 0xfff07c620f9b4061, 0xfff8000000000000), + (0x419ecd479467fb54, 0xbff4cd29a369ac90, 0x3dbf52e03dcc6f2f), + (0x41ac20c06ee35e4f, 0xbfdbcc358bb0fb18, 0x3f2e427c8b80ed9c), + (0xfff4326b9f6b3410, 0xc03d77ad8d9dd6b1, 0xfff8000000000000), + (0x4197910978aa8b96, 0xc03b9cd283f1294a, 0x12190b7d6c88576b), + (0x41094a4327327bff, 0xc011131c033fdfd5, 0x3b3879e23ee63805), + (0x40f8a138d93dfa6f, 0xc001413283b35d5f, 0x3db1bbb215bf42ae), + (0x414539311cf511ff, 0xbfbca848c2d205ed, 0x3fc84fc70f28d73e), + (0xfff3bc381986a30c, 0xbfdca95e011cebe9, 0xfff8000000000000), + (0x412e44a5346a1be6, 0xbffdcb72cdad7356, 0x3d9dfbadec90e3fa), + (0x418e5e64d3edf4c6, 0xc01674baa108856c, 0x36d602089075174d), + (0x416b58e89ad71a84, 0xbfca78a8fcb854b3, 0x3fa0f412b7918686), + + /* x and y both in (0, 10) */ + (0x7ff693c2af30864a, 0x3fa16af316e2d88c, 0xfff8000000000000), + (0x3fdde3d0f357d227, 0x3fd4e93bd57a2981, 0x3fe8f3d387c3cfe2), + (0x3fea1f57254b2d12, 0x3ff33aaf58690aa3, 0x3fe912f746775394), + (0x3fd19a840c634304, 0x3fba064281d93290, 0x3fec1099957576a6), + (0x3fce944eca01df35, 0x3f746940bfb8cacf, 0x3fefc5c3319cd7dd), + (0x3fa0030cd217ac73, 0x3fff10ee29f4ba92, 0x3f539d8ddb010192), + (0x3f748b091924c15d, 0x3fbb4ab930d8139f, 0x3fe2323de643196b), + (0x3f74b789b403b8f1, 0x3f5a0719b998a149, 0x3fefbb7c7865b589), + (0x4008bdfdc4bdf83c, 0x3fa84fdf9bbc5ad1, 0x3ff0e197a48356a8), + (0x3f707ab79cd549eb, 0x3fc776bf069fbf9f, 0x3fd748e843888881), + (0x40215d0d73f3daaf, 0x3f6909363b8c583a, 0x3ff01b24ca035345), + (0x400a5fe7abf90e94, 0x400552a0bcbf22fb, 0x403809d5d2eaca93), + (0x3fd48e701b8afa9a, 0xfffdf7de16212166, 0xfff8000000000000), + (0x3fdbed39bc744363, 0x401da621f6a5f1cc, 0x3f6187c614da505b), + (0x3ffd74273040c698, 0x3f9a6c6d49dd529c, 0x3ff0410227112c44), + (0x3faaeac0f7d4763a, 0xfff0a23bbb29b144, 0xfff8000000000000), + (0x7ff4035e90ccdb5d, 0x3fe19bd7bfc5fc57, 0xfff8000000000000), + (0x3fdf5a05d19f39f8, 0x3f5d97918fcfa553, 0x3feff572b799a75e), + (0x40139fa494d5c102, 0x3fff9e51a66d6c9c, 0x40372c0ec85cdb67), + (0x3fb64cb543db5008, 0x3fb6e061cb1f1a13, 0x3fe9bac3bc838dff), + (0x4002619e676678da, 0x3ff78e803ae2b8ec, 0x400b3a4285ab2635), + (0x3fe318b748070dba, 0x3fb7f9f73d9ab7fd, 0x3fee7d592f800221), + (0x3feb56ee622481b8, 0x3f7c19c1249a1d42, 0x3feff7289e9534f9), + (0x3f6d1ed0f9c04a9f, 0x3f62ffb9e40680c7, 0x3fef958dbae35a0f), + (0xfffe3ddc1a7f09a7, 0x3f845a6294f3a890, 0xfff8000000000000), + (0x3ff041e289ac5b8c, 0x3f57490a9fd58f95, 0x3ff00017c7d7ad4b), + (0x4020bc3c2d25bb94, 0xfff903dc5ef7b5da, 0xfff8000000000000), + (0x4003d467c525071c, 0x3f98c1fe89a5ecc4, 0x3ff05ae362b9a84a), + (0x3fc238b70f4e7216, 0x3f8a75e2fd6e64f4, 0x3fef343ee42df045), + (0x3fe001d1da500969, 0x400ff32df1b36945, 0x3fb0191d9c03801f), + (0x3ffb954dbc226673, 0x400408855a1fc270, 0x400f49e30f335764), + (0x400bb9f936dad207, 0xfffb58709426aaec, 0xfff8000000000000), + (0x3f7bbe6db78c8014, 0xfff26b1c8b0746a0, 0xfff8000000000000), + (0x3f6a3223e6ed8897, 0x3f5fdcdd7023700e, 0x3fefa4fa8d17ce4d), + (0x3fa7156dd9119631, 0x3fbe50c7d9ea5796, 0x3fe62b747aab2686), + + /* x and y both in (10, 1000) */ + (0x4041c47b9cc539bc, 0x406e00257d16c8da, 0x7ff0000000000000), + (0x40633d5af37862b4, 0x402b00f8698b8065, 0x46113505e72b8731), + (0xfff31fb18f68c667, 0x7ffd331caa2b7f2c, 0xfff8000000000000), + (0x405104222d3cd0f1, 0x4060abf88a1f4bfd, 0x72b10f2bdd0d952d), + (0x406868520ca4881f, 0x408785f3d85e41e2, 0x7ff0000000000000), + (0x4087586e5fc62ee6, 0xfff0f261e9e9c83d, 0xfff8000000000000), + (0x4024634aeecdfa76, 0x7ff10c2e3cf5cee6, 0xfff8000000000000), + (0x40410a0b21390465, 0x7ffef5998f6203e6, 0xfff8000000000000), + (0xfff6ddae433e1801, 0x403064702d5f4ed8, 0xfff8000000000000), ][:] for (x, y, z) : inputs @@ -48,7 +172,7 @@ const powr02 = {c var zf : flt64 = std.flt64frombits(z) var rf = math.powr(xf, yf) testr.check(c, rf == zf, - "powr(0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}", + "powr(0x{w=16,p=0,x}, 0x{w=16,p=0,x}) should be 0x{w=16,p=0,x}, was 0x{w=16,p=0,x}", x, y, z, std.flt64bits(rf)) ;; } @@ -57,7 +181,7 @@ const powr03 = {c var inputs : (uint32, uint32, uint32, uint32)[:] = [ (0x1bd2244e, 0x3a647973, 0x3f7535a1, 0x3f7535a0), (0x3f264a46, 0x423c927a, 0x30c9b8d3, 0x30c9b8d4), - (0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f7), + (0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f5), (0x3bbd11f6, 0x3cc159b1, 0x3f62ac1b, 0x3f62ac1a), (0x3f7ca5b7, 0xc309857a, 0x40c41bbf, 0x40c41bc0), (0x3f6a1a65, 0x43e16065, 0x226731e2, 0x226731e3), @@ -70,7 +194,36 @@ const powr03 = {c var zf_accepted : flt32 = std.flt32frombits(z_accepted) var rf = math.powr(xf, yf) testr.check(c, rf == zf_perfect || rf == zf_accepted, - "powr(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}", + "powr(0x{w=8,p=0,x}, 0x{w=8,p=0,x}) should be 0x{w=8,p=0,x}, will also accept 0x{w=8,p=0,x}, was 0x{w=8,p=0,x}", x, y, z_perfect, z_accepted, std.flt32bits(rf)) ;; } + +const powr04 = {c + var inputs : (uint64, uint64, uint64, uint64)[:] = [ + (0x3f8627bbf0b2534e, 0x3fab532501422efb, 0x3fe921e86671e519, 0x3fe921e86671e518), + (0x41c84ac138a030ef, 0x3f91da7f2b3e4605, 0x3ff6e1b47e9ed782, 0x3ff6e1b47e9ed781), + (0x41ca9d0efec9e036, 0x3fa8f2f672d68769, 0x4005d70b6fe1084b, 0x4005d70b6fe1084a), + (0x40f949e1394ba90c, 0x3fb52c7ac7e9fb25, 0x4004cad8a0151dff, 0x4004cad8a0151e00), + (0x41341d6a23c92414, 0xc00943b1e82bb55e, 0x3bebc88b57f77f8f, 0x3bebc88b57f77f8e), + (0x41242aa370d444b6, 0xbff214c0dfc7a867, 0x3e91c53dfd314590, 0x3e91c53dfd31458f), + (0x418f00a1df7a23d0, 0xc033fc3dd88f7505, 0x1f83a0afed046038, 0x1f83a0afed046039), + (0x4169769b9e71e521, 0xc00ddf0ecaa33de6, 0x3a6889acb36be574, 0x3a6889acb36be573), + (0x417c6bc9e89d9c1d, 0xc01992c87dc70f2c, 0x36032a3faeb94526, 0x36032a3faeb94525), + (0x41c7ac7f3f65ea65, 0xc01e7dbcfb5c724c, 0x31d8c5031b0424e3, 0x31d8c5031b0424e2), + (0x3fa351c48c3746ce, 0x3fe729ab4b99e801, 0x3fb7e116428f44c0, 0x3fb7e116428f44c1), + (0x3fc9ffe250089ea1, 0x3ff88bf79a9dc33c, 0x3fb63170e54074b8, 0x3fb63170e54074b9), + (0x3f55ee3142fec6bf, 0x401cdc101b6b2276, 0x3ba18abf782d7bc4, 0x3ba18abf782d7bc5), +][:] + + for (x, y, z_perfect, z_accepted) : inputs + var xf : flt64 = std.flt64frombits(x) + var yf : flt64 = std.flt64frombits(y) + var zf_perfect : flt64 = std.flt64frombits(z_perfect) + var zf_accepted : flt64 = std.flt64frombits(z_accepted) + var rf = math.powr(xf, yf) + testr.check(c, rf == zf_perfect || rf == zf_accepted, + "powr(0x{w=16,p=0,x}, 0x{w=16,p=0,x}) should be 0x{w=16,p=0,x}, will also accept 0x{w=16,p=0,x}, was 0x{w=16,p=0,x}", + x, y, z_perfect, z_accepted, std.flt64bits(rf)) + ;; +} diff --git a/lib/math/util.myr b/lib/math/util.myr index 8223d13..b36715a 100644 --- a/lib/math/util.myr +++ b/lib/math/util.myr @@ -19,6 +19,12 @@ pkg math = const two_by_two64 : (x : flt64, y : flt64 -> (flt64, flt64)) const two_by_two32 : (x : flt32, y : flt32 -> (flt32, flt32)) + /* Multiply (a_hi + a_lo) * (b_hi + b_lo) to (z_hi + z_lo) */ + const hl_mult : (a_h : flt64, a_l : flt64, b_h : flt64, b_l : flt64 -> (flt64, flt64)) + + /* Add (a_hi + a_lo) * (b_hi + b_lo) to (z_hi + z_lo). Must have |a| > |b|. */ + const hl_add : (a_h : flt64, a_l : flt64, b_h : flt64, b_l : flt64 -> (flt64, flt64)) + /* Compare by magnitude */ const mag_cmp32 : (f : flt32, g : flt32 -> std.order) const mag_cmp64 : (f : flt64, g : flt64 -> std.order) @@ -251,6 +257,50 @@ const two_by_two32 = {x : flt32, y : flt32 -> (s, c) } +const hl_mult = {a_h : flt64, a_l : flt64, b_h : flt64, b_l : flt64 + /* + [ a_h ][ a_l ] * [ b_h ][ b_l ] + = + (A) [ a_h*b_h ] + (B) + [ a_h*b_l ] + (C) + [ a_l*b_h ] + (D) + [ a_l*b_l ] + + We therefore want to keep all of A, and the top halves of the two + smaller products B and C. + + To be pedantic, *_l could be much smaller than pictured above; *_h and + *_l need not butt up right against each other. But that won't cause + any problems; there's no way we'll ignore important information. + */ + var Ah, Al + (Ah, Al) = two_by_two64(a_h, b_h) + var Bh = a_h * b_l + var Ch = a_l * b_h + var resh, resl, t1, t2 + (resh, resl) = slow2sum(Bh, Ch) + (resl, t1) = fast2sum(Al, resl) + (resh, t2) = slow2sum(resh, resl) + (resh, resl) = slow2sum(Ah, resh) + -> (resh, resl + (t1 + t2)) +} + +const hl_add = {a_h : flt64, a_l : flt64, b_h : flt64, b_l : flt64 + /* + Not terribly clever, we just chain a couple of 2sums together. We are + free to impose the requirement that |a_h| > |b_h|, because we'll only + be using this for a = 1/5, 1/3, and the log(Fi)s from the C tables. + However, we can't guarantee that a_l > b_l. For example, compare C1[10] + to C2[18]. + */ + + var resh, resl, t1, t2 + (t1, t2) = slow2sum(a_l, b_l) + (resl, t1) = slow2sum(b_h, t1) + (resh, resl) = fast2sum(a_h, resl) + -> (resh, resl + (t1 + t2)) +} + const mag_cmp32 = {f : flt32, g : flt32 var u = std.flt32bits(f) & ~(1 << 31) var v = std.flt32bits(g) & ~(1 << 31) |