diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/math/atan-impl.myr | 2 | ||||
-rw-r--r-- | lib/math/sin-impl.myr | 81 | ||||
-rw-r--r-- | lib/math/sum-impl.myr | 14 | ||||
-rw-r--r-- | lib/math/tan-impl.myr | 10 | ||||
-rw-r--r-- | lib/math/util.myr | 74 |
5 files changed, 103 insertions, 78 deletions
diff --git a/lib/math/atan-impl.myr b/lib/math/atan-impl.myr index 78f235d..38b973a 100644 --- a/lib/math/atan-impl.myr +++ b/lib/math/atan-impl.myr @@ -427,7 +427,7 @@ const atan264 = {y, x du = 0.0 else var t1, t2 - (t1, t2) = two_by_two(u, x) + (t1, t2) = two_by_two64(u, x) du = ((y - t1) - t2)/x ;; diff --git a/lib/math/sin-impl.myr b/lib/math/sin-impl.myr index ba7099b..5a321a4 100644 --- a/lib/math/sin-impl.myr +++ b/lib/math/sin-impl.myr @@ -508,8 +508,8 @@ const w64 = {x : flt64, want_sin : bool, want_cos : bool in the polynomial approximations. */ var delta1, delta2, deltat - (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1) - (delta2, _) = fast2sum(deltat, x2) + (delta1, deltat) = slow2sum(-std.flt64frombits(xi), x1) + (delta2, _) = slow2sum(deltat, x2) /* sin(delta); the degree 2 coefficient is near 0, so delta_2 @@ -528,16 +528,16 @@ const w64 = {x : flt64, want_sin : bool, want_cos : bool var q : flt64[4] if (want_sin && !swap_sin_cos) || (want_cos && swap_sin_cos) - (q[0], q[1]) = two_by_two(std.flt64frombits(sin_xi), cos_delta) - (q[2], q[3]) = two_by_two(std.flt64frombits(cos_xi), sin_delta) - std.sort(q[:], fltabscmp) + (q[0], q[1]) = two_by_two64(std.flt64frombits(sin_xi), cos_delta) + (q[2], q[3]) = two_by_two64(std.flt64frombits(cos_xi), sin_delta) + std.sort(q[:], mag_cmp64) (sin_ret, sin_ret2) = double_compensated_sum(q[:]) ;; if (want_cos && !swap_sin_cos) || (want_sin && swap_sin_cos) - (q[0], q[1]) = two_by_two(std.flt64frombits(cos_xi), cos_delta) - (q[2], q[3]) = two_by_two(std.flt64frombits(sin_xi), sin_delta) + (q[0], q[1]) = two_by_two64(std.flt64frombits(cos_xi), cos_delta) + (q[2], q[3]) = two_by_two64(std.flt64frombits(sin_xi), sin_delta) q[2] = -q[2] q[3] = -q[3] @@ -620,16 +620,16 @@ const trig_reduce = {x : flt64 total_N = rn(x1 * std.flt64frombits(two_over_pi)) Nf = (-total_N : flt64) (q[0], q[ 4], q[5]) = (x1, x2, x3) - (q[1], q[ 3]) = two_by_two(Nf, pi_o_2_0) - (q[2], q[ 6]) = two_by_two(Nf, pi_o_2_1) - (q[7], q[ 8]) = two_by_two(Nf, pi_o_2_2) - (q[9], q[10]) = two_by_two(Nf, pi_o_2_3) + (q[1], q[ 3]) = two_by_two64(Nf, pi_o_2_0) + (q[2], q[ 6]) = two_by_two64(Nf, pi_o_2_1) + (q[7], q[ 8]) = two_by_two64(Nf, pi_o_2_2) + (q[9], q[10]) = two_by_two64(Nf, pi_o_2_3) /* Sorting is very slow, but it's only the top five or so that are in question */ - std.sort(q[0:5], fltabscmp) + std.sort(q[0:5], mag_cmp64) (x1, x2) = double_compensated_sum(q[:]) while !(le_22(x1, x2, pi_o_4_0, pi_o_4_1) && le_22(-pi_o_4_0, -pi_o_4_1, x1, x2)) @@ -641,10 +641,10 @@ const trig_reduce = {x : flt64 cancelled by Nf * pi_o_2_0, so line those up. */ (q[0], q[2]) = (x1, x2) - (q[1], q[3]) = two_by_two(Nf, pi_o_2_0) - (q[4], q[5]) = two_by_two(Nf, pi_o_2_1) - (q[6], q[7]) = two_by_two(Nf, pi_o_2_2) - (q[8], q[9]) = two_by_two(Nf, pi_o_2_3) + (q[1], q[3]) = two_by_two64(Nf, pi_o_2_0) + (q[4], q[5]) = two_by_two64(Nf, pi_o_2_1) + (q[6], q[7]) = two_by_two64(Nf, pi_o_2_2) + (q[8], q[9]) = two_by_two64(Nf, pi_o_2_3) (x1, x2) = double_compensated_sum(q[0:10]) total_N += (N % 4) ;; @@ -731,15 +731,15 @@ const huge_reduce_2pi = {x : flt64 since xc is so small. */ var q : flt64[18] - (q[ 0], q[ 1]) = two_by_two(xa, a1) - (q[ 2], q[ 3]) = two_by_two(xa, a2) - (q[ 4], q[ 5]) = two_by_two(xa, a3) - (q[ 6], q[ 7]) = two_by_two(xb, b1) - (q[ 8], q[ 9]) = two_by_two(xb, b2) - (q[10], q[11]) = two_by_two(xb, b3) - (q[12], q[13]) = two_by_two(xc, c1) - (q[14], q[15]) = two_by_two(xc, c2) - (q[16], q[17]) = two_by_two(xc, c3) + (q[ 0], q[ 1]) = two_by_two64(xa, a1) + (q[ 2], q[ 3]) = two_by_two64(xa, a2) + (q[ 4], q[ 5]) = two_by_two64(xa, a3) + (q[ 6], q[ 7]) = two_by_two64(xb, b1) + (q[ 8], q[ 9]) = two_by_two64(xb, b2) + (q[10], q[11]) = two_by_two64(xb, b3) + (q[12], q[13]) = two_by_two64(xc, c1) + (q[14], q[15]) = two_by_two64(xc, c2) + (q[16], q[17]) = two_by_two64(xc, c3) -> triple_compensated_sum(q[:]) } @@ -764,37 +764,6 @@ const trig_table_approx = {x1 : flt64, C : (uint64, uint64, uint64)[257] -> (xi, cos_xi, sin_xi) } -const triple_compensated_sum = {q : flt64[:] - /* TODO: verify, with GAPPA or something, that this is correct. */ - std.sort(q, fltabscmp) - var s1 : flt64, s2 : flt64, s3 - var t1 : flt64, t2 : flt64, t3 : flt64, t4 : flt64, t5 : flt64, t6 - s1 = q[0] - s2 = 0.0 - s3 = 0.0 - for qq : q[1:] - (t5, t6) = fast2sum(s3, qq) - (t3, t4) = fast2sum(s2, t5) - (t1, t2) = fast2sum(s1, t3) - s1 = t1 - (s2, s3) = fast2sum(t2, t4 + t6) - ;; - - -> (s1, s2, s3) -} - -const fltabscmp = {x : flt64, y : flt64 - var xb = std.flt64bits(x) & ~(1 << 63) - var yb = std.flt64bits(y) & ~(1 << 63) - if xb == yb - -> `std.Equal - elif xb > yb - -> `std.Before - else - -> `std.After - ;; -} - /* Return true iff (a1 + a2) <= (b1 + b2) */ diff --git a/lib/math/sum-impl.myr b/lib/math/sum-impl.myr index 9826947..81f9a23 100644 --- a/lib/math/sum-impl.myr +++ b/lib/math/sum-impl.myr @@ -1,5 +1,7 @@ use std +use "util" + /* For references, see [Mul+10] section 6.3 */ pkg math = pkglocal const kahan_sum32 : (l : flt32[:] -> flt32) @@ -67,12 +69,6 @@ pkglocal const priest_sum32 = {l : flt32[:] -> s } -const mag_cmp32 = {f : flt32, g : flt32 - var u = std.flt32bits(f) & ~(1 << 31) - var v = std.flt32bits(g) & ~(1 << 31) - -> std.numcmp(v, u) -} - pkglocal const priest_sum64 = {l : flt64[:] var l2 = std.sldup(l) std.sort(l, mag_cmp64) @@ -82,12 +78,6 @@ pkglocal const priest_sum64 = {l : flt64[:] -> s } -const mag_cmp64 = {f : flt64, g : flt64 - var u = std.flt64bits(f) & ~(1 << 63) - var v = std.flt64bits(g) & ~(1 << 63) - -> std.numcmp(v, u) -} - generic double_compensated_sum = {l : @f[:] :: numeric,floating @f /* l should be sorted in descending order */ if l.len == 0 diff --git a/lib/math/tan-impl.myr b/lib/math/tan-impl.myr index adbc0ff..e2cbadf 100644 --- a/lib/math/tan-impl.myr +++ b/lib/math/tan-impl.myr @@ -430,8 +430,8 @@ const tanorcot = {x : flt64, want_tan : bool ;; var delta1, delta2, deltat - (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1) - (delta2, _) = fast2sum(deltat, x2) + (delta1, deltat) = slow2sum(-std.flt64frombits(xi), x1) + (delta2, _) = slow2sum(deltat, x2) var p1, p2 /* @@ -459,7 +459,7 @@ const tanorcot = {x : flt64, want_tan : bool var u1 = std.flt64frombits(std.flt64bits(u0) & split_mask) var u2 = u0 - u1 - (ret1, ret2) = fast2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f)) + (ret1, ret2) = slow2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f)) goto have_result ;; @@ -499,12 +499,12 @@ const ptan = {x1 : flt64, x2 : flt64 var s : flt64 = x1 * x1 var p : flt64 = horner_polyu(s, tan_coeff[:]) var r1, r2 - (r1, r2) = two_by_two(p, x1) + (r1, r2) = two_by_two64(p, x1) -> fast2sum(r1, r2 + x2) } const pcot = {x1 : flt64, x2 : flt64 var s : flt64 = x1 * x1 var p : flt64 = horner_polyu(s, cot_coeff[:]) - -> fast2sum(p/x1, std.flt64frombits(0x3fd5555555555555)*x2) + -> slow2sum(p/x1, std.flt64frombits(0x3fd5555555555555)*x2) } diff --git a/lib/math/util.myr b/lib/math/util.myr index dadd180..8223d13 100644 --- a/lib/math/util.myr +++ b/lib/math/util.myr @@ -16,10 +16,19 @@ pkg math = const need_round_away : (h : uint64, l : uint64, bitpos_last : int64 -> bool) /* Multiply x * y to z1 + z2 */ - const two_by_two : (x : flt64, y : flt64 -> (flt64, flt64)) + const two_by_two64 : (x : flt64, y : flt64 -> (flt64, flt64)) + const two_by_two32 : (x : flt32, y : flt32 -> (flt32, flt32)) + + /* Compare by magnitude */ + const mag_cmp32 : (f : flt32, g : flt32 -> std.order) + const mag_cmp64 : (f : flt64, g : flt64 -> std.order) /* Return (s, t) such that s + t = a + b, with s = rn(a + b). */ generic fast2sum : (x : @f, y : @f -> (@f, @f)) :: floating, numeric @f + generic slow2sum : (x : @f, y : @f -> (@f, @f)) :: floating, numeric @f + + /* return (a, b, c), a decent sum for q */ + const triple_compensated_sum : (q : flt64[:] -> (flt64, flt64, flt64)) /* Rounds a + b (as flt64s) to a flt32. */ const round_down : (a : flt64, b : flt64 -> flt32) @@ -188,7 +197,7 @@ const need_round_away = {h : uint64, l : uint64, bitpos_last : int64 /* Perform high-prec multiplication: x * y = z1 + z2. */ -const two_by_two = {x : flt64, y : flt64 +const two_by_two64 = {x : flt64, y : flt64 var xh : flt64 = std.flt64frombits(std.flt64bits(x) & twentysix_bits_mask) var xl : flt64 = x - xh var yh : flt64 = std.flt64frombits(std.flt64bits(y) & twentysix_bits_mask) @@ -213,13 +222,13 @@ const two_by_two = {x : flt64, y : flt64 (s, c) = fast2sum(s, a2) /* a3 */ - (yy, u) = fast2sum(c, a3) + (yy, u) = slow2sum(c, a3) (t, v) = fast2sum(s, yy) z = u + v (s, c) = fast2sum(t, z) /* a4 */ - (yy, u) = fast2sum(c, a4) + (yy, u) = slow2sum(c, a4) (t, v) = fast2sum(s, yy) z = u + v (s, c) = fast2sum(t, z) @@ -227,7 +236,45 @@ const two_by_two = {x : flt64, y : flt64 -> (s, c) } +/* + The same, for flt32s + */ +const two_by_two32 = {x : flt32, y : flt32 + var xL : flt64 = (x : flt64) + var yL : flt64 = (y : flt64) + var zL : flt64 = xL * yL + var s : flt32 = (zL : flt32) + var sL : flt64 = (s : flt64) + var cL : flt64 = zL - sL + var c : flt32 = (cL : flt32) + + -> (s, c) +} + +const mag_cmp32 = {f : flt32, g : flt32 + var u = std.flt32bits(f) & ~(1 << 31) + var v = std.flt32bits(g) & ~(1 << 31) + -> std.numcmp(v, u) +} + +const mag_cmp64 = {f : flt64, g : flt64 + var u = std.flt64bits(f) & ~(1 << 63) + var v = std.flt64bits(g) & ~(1 << 63) + -> std.numcmp(v, u) +} + /* Return (s, t) such that s + t = a + b, with s = rn(a + b). */ +generic slow2sum = {a : @f, b : @f :: floating, numeric @f + var s = a + b + var aa = s - b + var bb = s - aa + var da = a - aa + var db = b - bb + var t = da + db + -> (s, t) +} + +/* Return (s, t) such that s + t = a + b, with s = rn(a + b), when you KNOW |a| > |b|. */ generic fast2sum = {a : @f, b : @f :: floating, numeric @f var s = a + b var z = s - a @@ -235,6 +282,25 @@ generic fast2sum = {a : @f, b : @f :: floating, numeric @f -> (s, t) } +const triple_compensated_sum = {q : flt64[:] + /* TODO: verify, with GAPPA or something, that this is correct. */ + std.sort(q, mag_cmp64) + var s1 : flt64, s2 : flt64, s3 + var t1 : flt64, t2 : flt64, t3 : flt64, t4 : flt64, t5 : flt64, t6 + s1 = q[0] + s2 = 0.0 + s3 = 0.0 + for qq : q[1:] + (t5, t6) = slow2sum(s3, qq) + (t3, t4) = slow2sum(s2, t5) + (t1, t2) = slow2sum(s1, t3) + s1 = t1 + (s2, s3) = slow2sum(t2, t4 + t6) + ;; + + -> (s1, s2, s3) +} + /* Round a + b to a flt32. Only notable if round(a) is a rounding tie, and b is non-zero |