diff options
author | S. Gilles <sgilles@math.umd.edu> | 2019-05-01 16:03:44 -0400 |
---|---|---|
committer | S. Gilles <sgilles@math.umd.edu> | 2019-05-01 16:03:44 -0400 |
commit | 1d77b810242a5824469d6f8cf462ee708ea25fe3 (patch) | |
tree | 8243c263019916c0ea9942fe183f422373a061cc | |
parent | 303798469e8afe0e514c667f3c8c7ae2c65e58c5 (diff) | |
download | mc-1d77b810242a5824469d6f8cf462ee708ea25fe3.tar.gz |
Add pown function.
-rw-r--r-- | lib/math/bld.sub | 1 | ||||
-rw-r--r-- | lib/math/fpmath.myr | 7 | ||||
-rw-r--r-- | lib/math/impls.myr | 3 | ||||
-rw-r--r-- | lib/math/pown-impl.myr | 219 | ||||
-rw-r--r-- | lib/math/test/pown-impl.myr | 119 |
5 files changed, 349 insertions, 0 deletions
diff --git a/lib/math/bld.sub b/lib/math/bld.sub index b1806f7..f89a333 100644 --- a/lib/math/bld.sub +++ b/lib/math/bld.sub @@ -24,6 +24,7 @@ lib math = # x^n log-overkill.myr + pown-impl.myr # x^y powr-impl.myr diff --git a/lib/math/fpmath.myr b/lib/math/fpmath.myr index f49918b..0e7c3f1 100644 --- a/lib/math/fpmath.myr +++ b/lib/math/fpmath.myr @@ -23,6 +23,9 @@ pkg math = horner_poly : (x : @f, a : @f[:] -> @f) horner_polyu : (x : @f, a : @u[:] -> @f) + /* pown-impl */ + pown : (x : @f, n : @i -> @f) + /* powr-impl */ powr : (x : @f, y : @f -> @f) @@ -103,6 +106,8 @@ impl fpmath flt32 = horner_poly = {x, a; -> horner_poly32(x, a)} horner_polyu = {x, a; -> horner_polyu32(x, a)} + pown = {x, n; -> pown32(x, n)} + powr = {x, y; -> powr32(x, y)} scale2 = {x, m; -> scale232(x, m)} @@ -140,6 +145,8 @@ impl fpmath flt64 = horner_poly = {x, a; -> horner_poly64(x, a)} horner_polyu = {x, a; -> horner_polyu64(x, a)} + pown = {x, n; -> pown64(x, n)} + powr = {x, y; -> powr64(x, y)} scale2 = {x, m; -> scale264(x, m)} diff --git a/lib/math/impls.myr b/lib/math/impls.myr index 87abd33..86b81c0 100644 --- a/lib/math/impls.myr +++ b/lib/math/impls.myr @@ -29,6 +29,9 @@ pkg math = pkglocal extern const horner_polyu32 : (x : flt32, a : uint32[:] -> flt32) pkglocal extern const horner_polyu64 : (x : flt64, a : uint64[:] -> flt64) + pkglocal extern const pown32 : (x : flt32, n : int32 -> flt32) + pkglocal extern const pown64 : (x : flt64, n : int64 -> flt64) + pkglocal extern const powr32 : (x : flt32, y : flt32 -> flt32) pkglocal extern const powr64 : (x : flt64, y : flt64 -> flt64) diff --git a/lib/math/pown-impl.myr b/lib/math/pown-impl.myr new file mode 100644 index 0000000..f20ceff --- /dev/null +++ b/lib/math/pown-impl.myr @@ -0,0 +1,219 @@ +use std + +use "fpmath" +use "log-impl" +use "log-overkill" +use "sum-impl" +use "util" + +/* + This is an implementation of pown: computing x^n where n is an + integer. We sort of follow [PEB04], but without their high-radix + log_2. + */ +pkg math = + pkglocal const pown32 : (x : flt32, n : int32 -> flt32) + pkglocal const pown64 : (x : flt64, n : int64 -> flt64) +;; + +type fltdesc(@f, @u, @i) = struct + explode : (f : @f -> (bool, @i, @u)) + assem : (n : bool, e : @i, s : @u -> @f) + tobits : (f : @f -> @u) + frombits : (u : @u -> @f) + C : (@u, @u)[:] + one_over_ln2_hi : @u + one_over_ln2_lo : @u + nan : @u + inf : @u + neginf : @u + magcmp : (f : @f, g : @f -> std.order) + two_by_two : (x : @f, y : @f -> (@f, @f)) + log_overkill : (x : @f -> (@f, @f)) + emin : @i + emax : @i + imax : @i + imin : @i +;; + +const desc32 : fltdesc(flt32, uint32, int32) = [ + .explode = std.flt32explode, + .assem = std.flt32assem, + .tobits = std.flt32bits, + .frombits = std.flt32frombits, + .C = accurate_logs32[0:130], /* See log-impl.myr */ + .one_over_ln2_hi = 0x3fb8aa3b, /* 1/ln(2), top part */ + .one_over_ln2_lo = 0x32a57060, /* 1/ln(2), bottom part */ + .nan = 0x7fc00000, + .inf = 0x7f800000, + .neginf = 0xff800000, + .magcmp = mag_cmp32, + .two_by_two = two_by_two32, + .log_overkill = logoverkill32, + .emin = -126, + .emax = 127, + .imax = 2147483647, /* For detecting overflow in final exponent */ + .imin = -2147483648, +] + +const desc64 : fltdesc(flt64, uint64, int64) = [ + .explode = std.flt64explode, + .assem = std.flt64assem, + .tobits = std.flt64bits, + .frombits = std.flt64frombits, + .C = accurate_logs64[0:130], /* See log-impl.myr */ + .one_over_ln2_hi = 0x3ff71547652b82fe, + .one_over_ln2_lo = 0x3c7777d0ffda0d24, + .nan = 0x7ff8000000000000, + .inf = 0x7ff0000000000000, + .neginf = 0xfff0000000000000, + .magcmp = mag_cmp64, + .two_by_two = two_by_two64, + .log_overkill = logoverkill64, + .emin = -1022, + .emax = 1023, + .imax = 9223372036854775807, + .imin = -9223372036854775808, +] + +const pown32 = {x : flt32, n : int32 + -> powngen(x, n, desc32) +} + +const pown64 = {x : flt64, n : int64 + -> powngen(x, n, desc64) +} + +generic powngen = {x : @f, n : @i, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i + var xb + xb = d.tobits(x) + + var xn : bool, xe : @i, xs : @u + (xn, xe, xs) = d.explode(x) + + var nf : @f = (n : @f) + + /* + Special cases. Note we do not follow IEEE exceptions. + */ + if n == 0 + /* + Anything^0 is 1. We're taking the view that x is a tiny range of reals, + so a dense subset of them are 1, even if x is 0.0. + */ + -> 1.0 + elif std.isnan(x) + /* Propagate NaN (why doesn't this come first? Ask IEEE.) */ + -> d.frombits(d.nan) + elif (x == 0.0) + if n < 0 && (n % 2 == 1) && xn + /* (+/- 0)^n = +/- oo */ + -> d.frombits(d.neginf) + elif n < 0 + -> d.frombits(d.inf) + elif n % 2 == 1 + /* (+/- 0)^n = +/- 0 (n odd) */ + -> d.assem(xn, d.emin - 1, 0) + else + -> 0.0 + ;; + elif n == 1 + /* Anything^1 is itself */ + -> x + ;; + + /* (-f)^n = (-1)^n * (f)^n. Figure this out now, then pretend f >= 0.0 */ + var ult_sgn = 1.0 + if xn && (n % 2 == 1 || n % 2 == -1) + ult_sgn = -1.0 + ;; + + /* + Compute (with x = xs * 2^e) + + x^n = 2^(n*log2(xs)) * 2^(n*e) + + = 2^(I + F) * 2^(n*e) + = 2^(F) * 2^(I+n*e) + + Since n and e, and I are all integers, we can get the last part from + scale2. The hard part is computing I and F, and then computing 2^F. + */ + var ln_xs_hi, ln_xs_lo + (ln_xs_hi, ln_xs_lo) = d.log_overkill(d.assem(false, 0, xs)) + + /* Now x^n = 2^(n * [ ln_xs / ln(2) ]) * 2^(n + e) */ + + var ls1 : @f[8] + (ls1[0], ls1[1]) = d.two_by_two(ln_xs_hi, d.frombits(d.one_over_ln2_hi)) + (ls1[2], ls1[3]) = d.two_by_two(ln_xs_hi, d.frombits(d.one_over_ln2_lo)) + (ls1[4], ls1[5]) = d.two_by_two(ln_xs_lo, d.frombits(d.one_over_ln2_hi)) + (ls1[6], ls1[7]) = d.two_by_two(ln_xs_lo, d.frombits(d.one_over_ln2_lo)) + + /* + Now log2(xs) = Sum(ls1), so + + x^n = 2^(n * Sum(ls1)) * 2^(n * e) + */ + var E1, E2 + (E1, E2) = double_compensated_sum(ls1[0:8]) + var ls2 : @f[5] + var ls2s : @f[5] + var I = 0 + (ls2[0], ls2[1]) = d.two_by_two(E1, nf) + (ls2[2], ls2[3]) = d.two_by_two(E2, nf) + ls2[4] = 0.0 + + /* Now x^n = 2^(Sum(ls2)) * 2^(n + e) */ + + for var j = 0; j < 5; ++j + var i = rn(ls2[j]) + I += i + ls2[j] -= (i : @f) + ;; + + var F1, F2 + std.slcp(ls2s[0:5], ls2[0:5]) + std.sort(ls2s[0:5], d.magcmp) + (F1, F2) = double_compensated_sum(ls2s[0:5]) + + if (F1 < 0.0 || F1 > 1.0) + var i = rn(F1) + I += i + ls2[4] -= (i : @f) + std.slcp(ls2s[0:5], ls2[0:5]) + std.sort(ls2s[0:5], d.magcmp) + (F1, F2) = double_compensated_sum(ls2s[0:5]) + ;; + + /* Now, x^n = 2^(F1 + F2) * 2^(I + n*e). */ + var ls3 : @f[6] + var log2_hi, log2_lo + (log2_hi, log2_lo) = d.C[128] + (ls3[0], ls3[1]) = d.two_by_two(F1, d.frombits(log2_hi)) + (ls3[2], ls3[3]) = d.two_by_two(F1, d.frombits(log2_lo)) + (ls3[4], ls3[5]) = d.two_by_two(F2, d.frombits(log2_hi)) + var G1, G2 + (G1, G2) = double_compensated_sum(ls3[0:6]) + + var base1 = exp(G1) + var base2 = exp(G2) + var pow_xen = xe * n + var pow = pow_xen + I + if pow_xen / n != xe || (I > 0 && d.imax - I < pow_xen) || (I < 0 && d.imin - I > pow_xen) + /* + The exponent overflowed. There's no way this is representable. We need + to at least recover the correct sign. If the overflow was from the + multiplication, then the sign we want is the sign that pow_xen should + have been. If the overflow was from the addition, then we still want + the sign that pow_xen should have had. + */ + if (xe > 0) == (n > 0) + pow = 2 * d.emax + else + pow = 2 * d.emin + ;; + ;; + + -> ult_sgn * scale2(base1 * base2, pow) +} diff --git a/lib/math/test/pown-impl.myr b/lib/math/test/pown-impl.myr new file mode 100644 index 0000000..8fd4495 --- /dev/null +++ b/lib/math/test/pown-impl.myr @@ -0,0 +1,119 @@ +use std +use math +use testr + +const main = { + math.fptrap(false) + testr.run([ + [.name="pown-01", .fn = pown01], + [.name="pown-02", .fn = pown02], + [.name="pown-03", .fn = pown03], + ][:]) +} + +const int32fromuint32 = {b; -> (&b : int32#)#} +const int64fromuint64 = {b; -> (&b : int64#)#} + +/* Mostly obtained from fpmath-consensus */ +const pown01 = {c + var inputs : (uint32, uint32, uint32)[:] = [ + (0x000000f6, 0x00000000, 0x3f800000), + (0x00000000, 0x3d800000, 0x00000000), + (0x946fc13b, 0x3b21efc7, 0x80000000), + (0xb76e98b6, 0xdbeb6637, 0xff800000), + (0xc04825b7, 0x53cdd772, 0x7f800000), + (0x3f7ff8c0, 0x000a53ba, 0x097c15ec), + (0xbefb4dd0, 0xffffffc6, 0x5d3b4cbc), + (0xbf77a88b, 0x0000038f, 0xa9b0342c), + (0xbd5cdf23, 0xffffffeb, 0xebb17f33), + (0x3fb66246, 0x000000e0, 0x78ac13ae), + (0x3bfec3ab, 0x00000005, 0x2df9e189), + (0x3f7762db, 0x000002b8, 0x2e466343), + (0x3e245431, 0xfffffff7, 0x4b582b71), + (0xbf73903f, 0x00000328, 0x2276ffa9), + (0x3f6a088a, 0xfffffe4d, 0x5b9dcb2e), + (0xc06650d6, 0x00000019, 0xd691b004), + (0x3f751050, 0x00000731, 0x0583b3e2), + (0x3f8124aa, 0x00001e6d, 0x7171d834), + (0xbf6c06b0, 0x000001b2, 0x260cb0e7), + (0x38307acf, 0x00000003, 0x29a7bd3a), + (0x3f7fafa1, 0x00005c97, 0x2a835867), + (0xbf80fb78, 0xfffffdcb, 0xbc5a0a5b), + (0xbff532bd, 0xffffffe7, 0xb3bc09eb), + (0xbf804fe0, 0x00002cbb, 0xd39529b4), + (0xbf7eaac3, 0x000026c1, 0x9a1b5779), + (0xb6ecbef2, 0xfffffff9, 0xfb5d43d7), + (0x40c1d332, 0xffffffef, 0x29628a12), + ][:] + + for (x, y, z) : inputs + var xf : flt32 = std.flt32frombits(x) + var yi : int32 = int32fromuint32(y) + var zf : flt32 = std.flt32frombits(z) + var rf = math.pown(xf, yi) + testr.check(c, rf == zf, + "pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}", + x, yi, z, std.flt32bits(rf)) + ;; +} + +/* Mostly obtained from fpmath-consensus */ +const pown02 = {c + var inputs : (uint64, uint64, uint64)[:] = [ + (0x212c65442137286a, 0x000000007f47df7c, 0x0000000000000000), + (0x9add65abd325b6eb, 0xfc2b1173d2f9d7c5, 0xfff0000000000000), + (0xc00616fb023b43b5, 0x57ce36258314fd54, 0x7ff0000000000000), + (0xc12300696c144c98, 0x0000000000000032, 0x7c152603516d90a8), + (0xbfd3dfdfc81e60e0, 0xfffffffffffffe8a, 0x675fe457fecdc77b), + (0xbff03687f3c6d148, 0xffffffffffffa5ce, 0x2465ab45a663c6e7), + (0x3feae36a83f91b30, 0xfffffffffffff2a9, 0x75864016b705ed6e), + (0x3feb9a4d0abc8192, 0xffffffffffffff7f, 0x41a6cb5da02a95f7), + (0xbfbf733faaeccb42, 0xffffffffffffffc8, 0x4a851d5e1c674b7c), + (0xbff0a5cfa8cc163b, 0xfffffffffffffc2c, 0x3c6dbc034c342e8e), + (0xbfee9beac6a3caf0, 0xffffffffffffef92, 0x50c94fc31f862949), + (0x4cba94fc5ab93856, 0xfffffffffffffffe, 0x26572fe2438caeb6), + (0x3feccc63016da0b8, 0x00000000000010a3, 0x17735a1a2b8d1115), + (0x3ff06cc390fe6413, 0x0000000000006183, 0x7aec68e2525e0756), + (0xbfef7da61a3fc45e, 0x0000000000003c20, 0x29ac311e57d77bb2), + (0xbd17d3142424b4ce, 0x000000000000000d, 0x9b061d29ecc312d7), + (0xbd9b46e7dcf5ddd7, 0xffffffffffffffee, 0x69d1b75168c2d601), + (0x3fb885ed105fdf42, 0x000000000000004b, 0x301272ca384d27ab), + (0xbfedd75714c016db, 0x00000000000004a5, 0xb8723796de107a57), + (0x3ff25198b971bb27, 0x00000000000012f3, 0x7b21bc435390825e), + (0x3fef791958603e0e, 0xffffffffffff85b6, 0x6ecebfe2dc493669), + (0xc017043172d0152b, 0x00000000000000e9, 0xe4b2c1666379afdc), + (0xc0325800cfeffb8e, 0x00000000000000d8, 0x78983c24a5e29e19), + (0xbfee2ae3cd3208ec, 0x00000000000006b7, 0xb6cb06585f39893d), + ][:] + + for (x, y, z) : inputs + var xf : flt64 = std.flt64frombits(x) + var yi : int64 = int64fromuint64(y) + var zf : flt64 = std.flt64frombits(z) + var rf = math.pown(xf, yi) + testr.check(c, rf == zf, + "pown(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}", + x, yi, z, std.flt64bits(rf)) + ;; +} + +/* Here we quarantine off some known-bad results */ +const pown03 = {c + var inputs : (uint32, uint32, uint32, uint32)[:] = [ + (0x3f80040a, 0xfff6d0a0, 0x09f93f64, 0x09f93f63), + (0x409d7f5a, 0xffffffd5, 0x0e0c8fa0, 0x0e0c8f9f), + (0xbfb588b4, 0x0000008c, 0x62be78b9, 0x62be78ba), + (0xb5e0d8c0, 0x00000002, 0x2c457c08, 0x2c457c07), + ][:] + + for (x, y, z_perfect, z_accepted) : inputs + var xf : flt32 = std.flt32frombits(x) + var yi : int32 = int32fromuint32(y) + var zf_perfect : flt32 = std.flt32frombits(z_perfect) + var zf_accepted : flt32 = std.flt32frombits(z_accepted) + var rf = math.pown(xf, yi) + testr.check(c, rf == zf_perfect || rf == zf_accepted, + "pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}", + x, yi, z_perfect, z_accepted, std.flt32bits(rf)) + ;; +} |