summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorS. Gilles <sgilles@math.umd.edu>2019-06-09 03:18:07 -0400
committerS. Gilles <sgilles@math.umd.edu>2019-06-09 04:23:42 -0400
commit6ac5eec951dc80bc612e5c6784429e43ffff02ad (patch)
treedf0fcfe154d620902d18587622c81c2d93b97ff1
parent2002d2fa999d7efeb71aa64a32a1a87c2f4b8399 (diff)
downloadmc-6ac5eec951dc80bc612e5c6784429e43ffff02ad.tar.gz
Rewrite powr to use log-overkill.
-rw-r--r--lib/math/powr-impl.myr286
-rw-r--r--lib/math/test/powr-impl.myr14
2 files changed, 52 insertions, 248 deletions
diff --git a/lib/math/powr-impl.myr b/lib/math/powr-impl.myr
index 5df69cd..32ee678 100644
--- a/lib/math/powr-impl.myr
+++ b/lib/math/powr-impl.myr
@@ -2,6 +2,7 @@ use std
use "fpmath"
use "log-impl"
+use "log-overkill"
use "util"
/*
@@ -14,12 +15,6 @@ use "util"
example, IEEE 754-2008 does not specify what powr(infty, y) must
return when y is not 0.0 (an erratum was planned in 2010, but
does not appear to have been released as of 2018).
-
- As a note: unlike many other functions in this library, there
- has been no serious analysis of the accuracy and speed of this
- particular implementation. Interested observers wishing to improve
- this library will probably find this file goldmine of mistakes,
- both theoretical and practical.
*/
pkg math =
pkglocal const powr32 : (x : flt32, y : flt32 -> flt32)
@@ -38,17 +33,9 @@ type fltdesc(@f, @u, @i) = struct
emax : @i
emin : @i
sgnmask : @u
- sig8mask : @u
- sig8last : @u
- split_prec_mask : @u
- split_prec_mask2 : @u
- C : (@u, @u)[:]
- eps_inf_border : @u
- eps_zero_border : @u
- exp_inf_border : @u
+ log_overkill : (x : @f -> (@f, @f))
+ split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f))
exp_zero_border : @u
- exp_subnormal_border : @u
- itercount : @u
;;
const desc32 : fltdesc(flt32, uint32, int32) = [
@@ -63,17 +50,9 @@ const desc32 : fltdesc(flt32, uint32, int32) = [
.emax = 127,
.emin = -126,
.sgnmask = 1 << 31,
- .sig8mask = 0xffff0000, /* Mask to get 8 significant bits */
- .sig8last = 16, /* Last bit kept when masking */
- .split_prec_mask = 0xffff0000, /* 16 trailing zeros */
- .split_prec_mask2 = 0xfffff000, /* 12 trailing zeros */
- .C = accurate_logs32[0:130], /* See log-impl.myr */
- .eps_inf_border = 0x4eb00f34, /* maximal y st. (1.00..1)^y < oo */
- .eps_zero_border = 0x4ecff1b4, /* minimal y st. (0.99..9)^y > 0 */
- .exp_inf_border = 0x42b17218, /* maximal y such that e^y < oo */
+ .log_overkill = logoverkill32,
+ .split_mul = split_mul32,
.exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */
- .exp_subnormal_border = 0xc2aeac50, /* minimal y such that e^y is normal */
- .itercount = 4, /* How many iterations of Taylor series for (1 + f)^y' */
]
const desc64 : fltdesc(flt64, uint64, int64) = [
@@ -88,19 +67,20 @@ const desc64 : fltdesc(flt64, uint64, int64) = [
.emax = 1023,
.emin = -1022,
.sgnmask = 1 << 63,
- .sig8mask = 0xffffe00000000000, /* Mask to get 8 significant bits */
- .sig8last = 45, /* Last bit kept when masking */
- .split_prec_mask = 0xffffff0000000000, /* 40 trailing zeroes */
- .split_prec_mask2 = 0xfffffffffffc0000, /* 18 trailing zeroes */
- .C = accurate_logs64[0:130], /* See log-impl.myr */
- .eps_inf_border = 0x43d628b76e3a7b61, /* maximal y st. (1.00..1)^y < oo */
- .eps_zero_border = 0x43d74e9c65eceee0, /* minimal y st. (0.99..9)^y > 0 */
- .exp_inf_border = 0x40862e42fefa39ef, /* maximal y such that e^y < oo */
+ .log_overkill = logoverkill64,
+ .split_mul = hl_mult,
.exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */
- .exp_subnormal_border = 0xc086232bdd7abcd2, /* minimal y such that e^y is normal */
- .itercount = 8,
]
+const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32
+ var x : flt64 = (x_h : flt64) + (x_l : flt64)
+ var y : flt64 = (y_h : flt64) + (y_l : flt64)
+ var z = x * y
+ var z_h : flt32 = (z : flt32)
+ var z_l : flt32 = ((z - (z_h : flt64)) : flt32)
+ -> (z_h, z_l)
+}
+
const powr32 = {x : flt32, y : flt32
-> powrgen(x, y, desc32)
}
@@ -182,225 +162,41 @@ generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,s
;;
;;
- /* Normalize x and y */
- if xe < d.emin
- var first_1 = find_first1_64((xs : uint64), (d.precision : int64))
- var offset = (d.precision : @u) - 1 - (first_1 : @u)
- xs = xs << offset
- xe = d.emin - offset
- ;;
-
- if ye < d.emin
- var first_1 = find_first1_64((ys : uint64), (d.precision : int64))
- var offset = (d.precision : @u) - 1 - (first_1 : @u)
- ys = ys << offset
- ye = d.emin - offset
- ;;
-
/*
- Split x into 2^N * F * (1 + f), with F = 1 + j/128 (some
- j) and f tiny. Compute F naively by truncation. Compute
- f via f = (x' - 1 - F)/(1 + F), where 1/(1 + F) is
- precomputed and x' is x/2^N. 128 is chosen so that we
- can borrow some constants from log-impl.myr.
-
- [Tan90] hints at a method of computing x^y which may be
- comparable to this approach, but which is unfortunately
- has not been elaborated on (as far as I can discover).
+ Just do the dumb thing: compute exp( log(x) ยท y ). All the hard work
+ goes into computing log(x) with high enough precision that our exp()
+ implementation becomes the weakest link. The Table Maker's Dilemma
+ says that quantifying "high enough" is a very difficult problem, but
+ experimentally twice the precision of @f appears quite good enough.
*/
- var N = xe
- var j, F, Fn, Fe, Fs
- var xprime = d.assem(false, 0, xs)
-
- if need_round_away(0, (xs : uint64), (d.sig8last : int64))
- F = d.frombits((d.tobits(xprime) & d.sig8mask) + (1 << d.sig8last))
- else
- F = d.frombits(d.tobits(xprime) & d.sig8mask)
- ;;
+ var ln_x_hi, ln_x_lo
+ (ln_x_hi, ln_x_lo) = d.log_overkill(x)
- (Fn, Fe, Fs) = d.explode(F)
+ var final_exp_hi, final_exp_lo
+ (final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0)
- if Fe != 0
- j = 128
- else
- j = 0x7f & ((d.sig8mask & Fs) >> d.sig8last)
- ;;
-
- var f = (xprime - F)/F
-
- /*
- y could actually be above integer infinity, in which
- case x^y is most certainly infinity or 0. More importantly,
- we can't safely compute M (below).
- */
- if x > (1.0 : @f)
- if y > d.frombits(d.eps_inf_border)
- -> d.frombits(d.inf)
- elif -y > d.frombits(d.eps_inf_border)
- -> (0.0 : @f)
- ;;
- elif x < (1.0 : @f)
- if y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
- -> (0.0 : @f)
- elif -y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
+ if d.tobits(final_exp_hi) & d.expmask == d.inf
+ /*
+ split_mul doesn't actually preserve the sign of infinity, so we can't
+ trust final_exp_hi to get it.
+ */
+ if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask)
+ /* e^+Inf */
-> d.frombits(d.inf)
+ else
+ /* e^-Inf */
+ -> 0.0
;;
;;
- /* Split y into M + y', with |y'| <= 0.5 and M an integer */
- var M = floor(y)
- var yprime = y - M
- if yprime > (0.5 : @f)
- M += (1.0 : @f)
- yprime = y - M
- elif yprime < (-0.5 : @f)
- M -= (1.0: @f)
- yprime = y - M
- ;;
-
- /*
- We'll multiply y' by log(2) and try to keep extra
- precision, so we need to split y'. Since the high word
- of C has 24 - 10 = 14 significant bits (53 - 16 = 37 in
- flt64 case), we ensure 15 (39) trailing zeroes in
- yprime_hi. (We also need this for y'*N, M, &c).
- */
- var yprime_hi = d.frombits(d.tobits(yprime) & d.split_prec_mask)
- var yprime_lo = yprime - yprime_hi
- var yprimeN_hi = d.frombits(d.tobits((N : @f) * yprime) & d.split_prec_mask)
- var yprimeN_lo = fma((N : @f), yprime, -yprimeN_hi)
- var M_hi = d.frombits(d.tobits(M) & d.split_prec_mask)
- var M_lo = M - M_hi
-
- /*
- At this point, we've built out
-
- x^y = [ 2^N * F * (1 + f) ]^(M + y')
-
- where N, M are integers, F is well-known, and f, y' are
- tiny. So we can get to computing
-
- /-1-\ /-------------------2--------------------------\ /-3--\
- 2^(N*M) * exp(log(F)*y' + log2*N*y' + log(F)*M + M*log(1+f)) * (1+f)^y'
-
- where 1 can be handled by scale2, 2 we can mostly fake
- by sticking high-precision values for log(F) and log(2)
- through exp(), and 3 is composed of small numbers,
- therefore can be reasonably approximated by a Taylor
- expansion.
- */
-
- /* t2 */
- var log2_lo, log2_hi, Cu_hi, Cu_lo
- (log2_hi, log2_lo) = d.C[128]
- (Cu_hi, Cu_lo) = d.C[j]
-
- var es : @f[20]
- std.slfill(es[:], (0.0 : @f))
-
- /* log(F) * y' */
- es[0] = d.frombits(Cu_hi) * yprime_hi
- es[1] = d.frombits(Cu_lo) * yprime_hi
- es[2] = d.frombits(Cu_hi) * yprime_lo
- es[3] = d.frombits(Cu_lo) * yprime_lo
-
- /* log(2) * N * y' */
- es[4] = d.frombits(log2_hi) * yprimeN_hi
- es[5] = d.frombits(log2_lo) * yprimeN_hi
- es[6] = d.frombits(log2_hi) * yprimeN_lo
- es[7] = d.frombits(log2_lo) * yprimeN_lo
-
- /* log(F) * M */
- es[8] = d.frombits(Cu_hi) * M_hi
- es[9] = d.frombits(Cu_lo) * M_hi
- es[10] = d.frombits(Cu_hi) * M_lo
- es[11] = d.frombits(Cu_lo) * M_lo
-
- /* log(1 + f) * M */
- var lf = log1p(f)
- var lf_hi = d.frombits(d.tobits(lf) & d.split_prec_mask)
- var lf_lo = lf - lf_hi
- es[12] = lf_hi * M_hi
- es[13] = lf_lo * M_hi
- es[14] = lf_hi * M_lo
- es[15] = lf_lo * M_lo
-
- /*
- The correct way to handle this would be to compare
- magnitudes of eis and parenthesize the additions correctly.
- We take the cheap way out.
- */
- var exp_hi = priest_sum(es[0:16])
-
- /*
- We would like to just compute exp(exp_hi) * exp(exp_lo).
- However, if that takes us into subnormal territory, yet
- N * M is large, that will throw away a few bits of
- information. We can correct for this by adding in a few
- copies of P*log(2), then subtract off P when we compute
- scale2() at the end.
-
- We also have to be careful that P doesn't have too many
- significant bits, otherwise we throw away some information
- of log2_hi.
- */
- var P = -rn(exp_hi / d.frombits(log2_hi))
- var P_f = (P : @f)
- P_f = d.frombits(d.tobits(P_f) & d.split_prec_mask2)
- P = rn(P_f)
-
- es[16] = P_f * d.frombits(log2_hi)
- es[17] = P_f * d.frombits(log2_lo)
- exp_hi = priest_sum(es[0:18])
- es[18] = -exp_hi
- var exp_lo = priest_sum(es[0:19])
-
-
- var t2 = exp(exp_hi) * exp(exp_lo)
-
- /*
- t3: Abbreviated Taylor expansion for (1 + f)^y' - 1.
- Since f is on the order of 2^-7 (and y' is on the order
- of 2^-1), we need to go up to f^3 for single-precision,
- and f^7 for double. We can then compute (1 + t3) * t2
-
- The expansion is \Sum_{k=1}^{\infty} {y' \choose k} x^k
- */
- var terms : @f[10] = [
- (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f),
- (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f), (0.0 : @f),
- ]
- var current = (1.0 : @f)
- for var j = 0; j <= d.itercount; ++j
- current = current * f * (yprime - (j : @f)) / ((j : @f) + (1.0 : @f))
- terms[j] = current
- ;;
- var t3 = priest_sum(terms[0:d.itercount + 1])
-
- var total_exp_f = (N : @f) * M - (P : @f)
- if total_exp_f > ((d.emax - d.emin + d.precision + 1) : @f)
- -> d.frombits(d.inf)
- elif total_exp_f < -((d.emax - d.emin + d.precision + 1) : @f)
- -> (0.0 : @f)
+ if final_exp_hi < d.frombits(d.exp_zero_border)
+ -> 0.0
;;
- /*
- Pull t2's exponent out so that we don't hit subnormal
- calculation with the t3 multiplication
- */
- var t2n, t2e, t2s
- (t2n, t2e, t2s) = d.explode(t2)
-
- if t2e < d.emin
- var t2_first_1 = find_first1_64((t2s : uint64), (d.precision : int64))
- var t2_offset = (d.precision : @u) - 1 - (t2_first_1 : @u)
- t2s = t2s << t2_offset
- t2e = d.emin - (t2_offset : @i)
+ var z_hi = exp(final_exp_hi)
+ if d.tobits(z_hi) & d.expmask == d.inf
+ -> z_hi
;;
- t2 = d.assem(t2n, 0, t2s)
- P -= t2e
-
- var base = fma(t2, t3, t2)
- -> scale2(base, N * rn(M) - P)
+ -> z_hi * (1.0 + final_exp_lo)
}
diff --git a/lib/math/test/powr-impl.myr b/lib/math/test/powr-impl.myr
index 8e6110a..b3fa0b8 100644
--- a/lib/math/test/powr-impl.myr
+++ b/lib/math/test/powr-impl.myr
@@ -22,8 +22,6 @@ const powr01 = {c
(0x653f944a, 0xbf7c2388, 0x1a3c784b),
(0x545ba67c, 0xc0c7e947, 0x00000000),
(0x3fca6b0d, 0x44ff18e0, 0x7f800000),
- // (0x3f74c7a7, 0x44feae20, 0x000265c6),
- // (0x3f7ebd6c, 0xc5587884, 0x4bc9ab07),
][:]
for (x, y, z) : inputs
@@ -40,6 +38,16 @@ const powr01 = {c
const powr02 = {c
var inputs : (uint64, uint64, uint64)[:] = [
(0x0000000000000000, 0x0000000000000000, 0x0000000000000000),
+ (0x0d30ad40d8223045, 0xffb56d6e33cd6ad2, 0x7ff0000000000000),
+ (0x1f192b55704d3500, 0xff8a52f877359f3c, 0x7ff0000000000000),
+ (0x7fe6c53d8cef3d27, 0x4bcca2e651c57788, 0x7ff0000000000000),
+ (0x7fe78a3740493383, 0xe84e801c38a71fc9, 0x0000000000000000),
+ (0x7fea162ffbabd3bc, 0x02414c7fa33dd7db, 0x3ff0000000000000),
+ (0x7fe087a9112a21d8, 0x0f2b7a9584736b41, 0x3ff0000000000000),
+ (0x7fe5d78f049c0918, 0xd3a145ba5b0fdb9b, 0x0000000000000000),
+ (0x7febb342860b3386, 0xe758bd2af063aec2, 0x0000000000000000),
+
+ (0x000342cdeeb18fc9, 0xbe645d513ed4670d, 0x3ff0001c3d5eaa62),
][:]
for (x, y, z) : inputs
@@ -57,7 +65,7 @@ const powr03 = {c
var inputs : (uint32, uint32, uint32, uint32)[:] = [
(0x1bd2244e, 0x3a647973, 0x3f7535a1, 0x3f7535a0),
(0x3f264a46, 0x423c927a, 0x30c9b8d3, 0x30c9b8d4),
- (0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f7),
+ (0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f5),
(0x3bbd11f6, 0x3cc159b1, 0x3f62ac1b, 0x3f62ac1a),
(0x3f7ca5b7, 0xc309857a, 0x40c41bbf, 0x40c41bc0),
(0x3f6a1a65, 0x43e16065, 0x226731e2, 0x226731e3),