summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorS. Gilles <sgilles@math.umd.edu>2019-05-01 15:57:06 -0400
committerS. Gilles <sgilles@math.umd.edu>2019-05-01 15:57:59 -0400
commit218258e4ac2940303be597272245cb09662fd759 (patch)
treea82aa527a29f863e16216bfc1725efbf209fbc50 /lib
parentfb2a06c91f60a948687939741aa884b17219f715 (diff)
downloadmc-218258e4ac2940303be597272245cb09662fd759.tar.gz
Add 2sum in math util
Fast2sum is only for when magnitudes of the arguments are known. 2sum (called "slow2sum") avoids this, at the cost of speed. It ends up being necessary in the forthcoming log-overkill function.
Diffstat (limited to 'lib')
-rw-r--r--lib/math/atan-impl.myr2
-rw-r--r--lib/math/sin-impl.myr81
-rw-r--r--lib/math/sum-impl.myr14
-rw-r--r--lib/math/tan-impl.myr10
-rw-r--r--lib/math/util.myr74
5 files changed, 103 insertions, 78 deletions
diff --git a/lib/math/atan-impl.myr b/lib/math/atan-impl.myr
index 78f235d..38b973a 100644
--- a/lib/math/atan-impl.myr
+++ b/lib/math/atan-impl.myr
@@ -427,7 +427,7 @@ const atan264 = {y, x
du = 0.0
else
var t1, t2
- (t1, t2) = two_by_two(u, x)
+ (t1, t2) = two_by_two64(u, x)
du = ((y - t1) - t2)/x
;;
diff --git a/lib/math/sin-impl.myr b/lib/math/sin-impl.myr
index ba7099b..5a321a4 100644
--- a/lib/math/sin-impl.myr
+++ b/lib/math/sin-impl.myr
@@ -508,8 +508,8 @@ const w64 = {x : flt64, want_sin : bool, want_cos : bool
in the polynomial approximations.
*/
var delta1, delta2, deltat
- (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1)
- (delta2, _) = fast2sum(deltat, x2)
+ (delta1, deltat) = slow2sum(-std.flt64frombits(xi), x1)
+ (delta2, _) = slow2sum(deltat, x2)
/*
sin(delta); the degree 2 coefficient is near 0, so delta_2
@@ -528,16 +528,16 @@ const w64 = {x : flt64, want_sin : bool, want_cos : bool
var q : flt64[4]
if (want_sin && !swap_sin_cos) || (want_cos && swap_sin_cos)
- (q[0], q[1]) = two_by_two(std.flt64frombits(sin_xi), cos_delta)
- (q[2], q[3]) = two_by_two(std.flt64frombits(cos_xi), sin_delta)
- std.sort(q[:], fltabscmp)
+ (q[0], q[1]) = two_by_two64(std.flt64frombits(sin_xi), cos_delta)
+ (q[2], q[3]) = two_by_two64(std.flt64frombits(cos_xi), sin_delta)
+ std.sort(q[:], mag_cmp64)
(sin_ret, sin_ret2) = double_compensated_sum(q[:])
;;
if (want_cos && !swap_sin_cos) || (want_sin && swap_sin_cos)
- (q[0], q[1]) = two_by_two(std.flt64frombits(cos_xi), cos_delta)
- (q[2], q[3]) = two_by_two(std.flt64frombits(sin_xi), sin_delta)
+ (q[0], q[1]) = two_by_two64(std.flt64frombits(cos_xi), cos_delta)
+ (q[2], q[3]) = two_by_two64(std.flt64frombits(sin_xi), sin_delta)
q[2] = -q[2]
q[3] = -q[3]
@@ -620,16 +620,16 @@ const trig_reduce = {x : flt64
total_N = rn(x1 * std.flt64frombits(two_over_pi))
Nf = (-total_N : flt64)
(q[0], q[ 4], q[5]) = (x1, x2, x3)
- (q[1], q[ 3]) = two_by_two(Nf, pi_o_2_0)
- (q[2], q[ 6]) = two_by_two(Nf, pi_o_2_1)
- (q[7], q[ 8]) = two_by_two(Nf, pi_o_2_2)
- (q[9], q[10]) = two_by_two(Nf, pi_o_2_3)
+ (q[1], q[ 3]) = two_by_two64(Nf, pi_o_2_0)
+ (q[2], q[ 6]) = two_by_two64(Nf, pi_o_2_1)
+ (q[7], q[ 8]) = two_by_two64(Nf, pi_o_2_2)
+ (q[9], q[10]) = two_by_two64(Nf, pi_o_2_3)
/*
Sorting is very slow, but it's only the top five or so
that are in question
*/
- std.sort(q[0:5], fltabscmp)
+ std.sort(q[0:5], mag_cmp64)
(x1, x2) = double_compensated_sum(q[:])
while !(le_22(x1, x2, pi_o_4_0, pi_o_4_1) && le_22(-pi_o_4_0, -pi_o_4_1, x1, x2))
@@ -641,10 +641,10 @@ const trig_reduce = {x : flt64
cancelled by Nf * pi_o_2_0, so line those up.
*/
(q[0], q[2]) = (x1, x2)
- (q[1], q[3]) = two_by_two(Nf, pi_o_2_0)
- (q[4], q[5]) = two_by_two(Nf, pi_o_2_1)
- (q[6], q[7]) = two_by_two(Nf, pi_o_2_2)
- (q[8], q[9]) = two_by_two(Nf, pi_o_2_3)
+ (q[1], q[3]) = two_by_two64(Nf, pi_o_2_0)
+ (q[4], q[5]) = two_by_two64(Nf, pi_o_2_1)
+ (q[6], q[7]) = two_by_two64(Nf, pi_o_2_2)
+ (q[8], q[9]) = two_by_two64(Nf, pi_o_2_3)
(x1, x2) = double_compensated_sum(q[0:10])
total_N += (N % 4)
;;
@@ -731,15 +731,15 @@ const huge_reduce_2pi = {x : flt64
since xc is so small.
*/
var q : flt64[18]
- (q[ 0], q[ 1]) = two_by_two(xa, a1)
- (q[ 2], q[ 3]) = two_by_two(xa, a2)
- (q[ 4], q[ 5]) = two_by_two(xa, a3)
- (q[ 6], q[ 7]) = two_by_two(xb, b1)
- (q[ 8], q[ 9]) = two_by_two(xb, b2)
- (q[10], q[11]) = two_by_two(xb, b3)
- (q[12], q[13]) = two_by_two(xc, c1)
- (q[14], q[15]) = two_by_two(xc, c2)
- (q[16], q[17]) = two_by_two(xc, c3)
+ (q[ 0], q[ 1]) = two_by_two64(xa, a1)
+ (q[ 2], q[ 3]) = two_by_two64(xa, a2)
+ (q[ 4], q[ 5]) = two_by_two64(xa, a3)
+ (q[ 6], q[ 7]) = two_by_two64(xb, b1)
+ (q[ 8], q[ 9]) = two_by_two64(xb, b2)
+ (q[10], q[11]) = two_by_two64(xb, b3)
+ (q[12], q[13]) = two_by_two64(xc, c1)
+ (q[14], q[15]) = two_by_two64(xc, c2)
+ (q[16], q[17]) = two_by_two64(xc, c3)
-> triple_compensated_sum(q[:])
}
@@ -764,37 +764,6 @@ const trig_table_approx = {x1 : flt64, C : (uint64, uint64, uint64)[257]
-> (xi, cos_xi, sin_xi)
}
-const triple_compensated_sum = {q : flt64[:]
- /* TODO: verify, with GAPPA or something, that this is correct. */
- std.sort(q, fltabscmp)
- var s1 : flt64, s2 : flt64, s3
- var t1 : flt64, t2 : flt64, t3 : flt64, t4 : flt64, t5 : flt64, t6
- s1 = q[0]
- s2 = 0.0
- s3 = 0.0
- for qq : q[1:]
- (t5, t6) = fast2sum(s3, qq)
- (t3, t4) = fast2sum(s2, t5)
- (t1, t2) = fast2sum(s1, t3)
- s1 = t1
- (s2, s3) = fast2sum(t2, t4 + t6)
- ;;
-
- -> (s1, s2, s3)
-}
-
-const fltabscmp = {x : flt64, y : flt64
- var xb = std.flt64bits(x) & ~(1 << 63)
- var yb = std.flt64bits(y) & ~(1 << 63)
- if xb == yb
- -> `std.Equal
- elif xb > yb
- -> `std.Before
- else
- -> `std.After
- ;;
-}
-
/*
Return true iff (a1 + a2) <= (b1 + b2)
*/
diff --git a/lib/math/sum-impl.myr b/lib/math/sum-impl.myr
index 9826947..81f9a23 100644
--- a/lib/math/sum-impl.myr
+++ b/lib/math/sum-impl.myr
@@ -1,5 +1,7 @@
use std
+use "util"
+
/* For references, see [Mul+10] section 6.3 */
pkg math =
pkglocal const kahan_sum32 : (l : flt32[:] -> flt32)
@@ -67,12 +69,6 @@ pkglocal const priest_sum32 = {l : flt32[:]
-> s
}
-const mag_cmp32 = {f : flt32, g : flt32
- var u = std.flt32bits(f) & ~(1 << 31)
- var v = std.flt32bits(g) & ~(1 << 31)
- -> std.numcmp(v, u)
-}
-
pkglocal const priest_sum64 = {l : flt64[:]
var l2 = std.sldup(l)
std.sort(l, mag_cmp64)
@@ -82,12 +78,6 @@ pkglocal const priest_sum64 = {l : flt64[:]
-> s
}
-const mag_cmp64 = {f : flt64, g : flt64
- var u = std.flt64bits(f) & ~(1 << 63)
- var v = std.flt64bits(g) & ~(1 << 63)
- -> std.numcmp(v, u)
-}
-
generic double_compensated_sum = {l : @f[:] :: numeric,floating @f
/* l should be sorted in descending order */
if l.len == 0
diff --git a/lib/math/tan-impl.myr b/lib/math/tan-impl.myr
index adbc0ff..e2cbadf 100644
--- a/lib/math/tan-impl.myr
+++ b/lib/math/tan-impl.myr
@@ -430,8 +430,8 @@ const tanorcot = {x : flt64, want_tan : bool
;;
var delta1, delta2, deltat
- (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1)
- (delta2, _) = fast2sum(deltat, x2)
+ (delta1, deltat) = slow2sum(-std.flt64frombits(xi), x1)
+ (delta2, _) = slow2sum(deltat, x2)
var p1, p2
/*
@@ -459,7 +459,7 @@ const tanorcot = {x : flt64, want_tan : bool
var u1 = std.flt64frombits(std.flt64bits(u0) & split_mask)
var u2 = u0 - u1
- (ret1, ret2) = fast2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))
+ (ret1, ret2) = slow2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))
goto have_result
;;
@@ -499,12 +499,12 @@ const ptan = {x1 : flt64, x2 : flt64
var s : flt64 = x1 * x1
var p : flt64 = horner_polyu(s, tan_coeff[:])
var r1, r2
- (r1, r2) = two_by_two(p, x1)
+ (r1, r2) = two_by_two64(p, x1)
-> fast2sum(r1, r2 + x2)
}
const pcot = {x1 : flt64, x2 : flt64
var s : flt64 = x1 * x1
var p : flt64 = horner_polyu(s, cot_coeff[:])
- -> fast2sum(p/x1, std.flt64frombits(0x3fd5555555555555)*x2)
+ -> slow2sum(p/x1, std.flt64frombits(0x3fd5555555555555)*x2)
}
diff --git a/lib/math/util.myr b/lib/math/util.myr
index dadd180..8223d13 100644
--- a/lib/math/util.myr
+++ b/lib/math/util.myr
@@ -16,10 +16,19 @@ pkg math =
const need_round_away : (h : uint64, l : uint64, bitpos_last : int64 -> bool)
/* Multiply x * y to z1 + z2 */
- const two_by_two : (x : flt64, y : flt64 -> (flt64, flt64))
+ const two_by_two64 : (x : flt64, y : flt64 -> (flt64, flt64))
+ const two_by_two32 : (x : flt32, y : flt32 -> (flt32, flt32))
+
+ /* Compare by magnitude */
+ const mag_cmp32 : (f : flt32, g : flt32 -> std.order)
+ const mag_cmp64 : (f : flt64, g : flt64 -> std.order)
/* Return (s, t) such that s + t = a + b, with s = rn(a + b). */
generic fast2sum : (x : @f, y : @f -> (@f, @f)) :: floating, numeric @f
+ generic slow2sum : (x : @f, y : @f -> (@f, @f)) :: floating, numeric @f
+
+ /* return (a, b, c), a decent sum for q */
+ const triple_compensated_sum : (q : flt64[:] -> (flt64, flt64, flt64))
/* Rounds a + b (as flt64s) to a flt32. */
const round_down : (a : flt64, b : flt64 -> flt32)
@@ -188,7 +197,7 @@ const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
/*
Perform high-prec multiplication: x * y = z1 + z2.
*/
-const two_by_two = {x : flt64, y : flt64
+const two_by_two64 = {x : flt64, y : flt64
var xh : flt64 = std.flt64frombits(std.flt64bits(x) & twentysix_bits_mask)
var xl : flt64 = x - xh
var yh : flt64 = std.flt64frombits(std.flt64bits(y) & twentysix_bits_mask)
@@ -213,13 +222,13 @@ const two_by_two = {x : flt64, y : flt64
(s, c) = fast2sum(s, a2)
/* a3 */
- (yy, u) = fast2sum(c, a3)
+ (yy, u) = slow2sum(c, a3)
(t, v) = fast2sum(s, yy)
z = u + v
(s, c) = fast2sum(t, z)
/* a4 */
- (yy, u) = fast2sum(c, a4)
+ (yy, u) = slow2sum(c, a4)
(t, v) = fast2sum(s, yy)
z = u + v
(s, c) = fast2sum(t, z)
@@ -227,7 +236,45 @@ const two_by_two = {x : flt64, y : flt64
-> (s, c)
}
+/*
+ The same, for flt32s
+ */
+const two_by_two32 = {x : flt32, y : flt32
+ var xL : flt64 = (x : flt64)
+ var yL : flt64 = (y : flt64)
+ var zL : flt64 = xL * yL
+ var s : flt32 = (zL : flt32)
+ var sL : flt64 = (s : flt64)
+ var cL : flt64 = zL - sL
+ var c : flt32 = (cL : flt32)
+
+ -> (s, c)
+}
+
+const mag_cmp32 = {f : flt32, g : flt32
+ var u = std.flt32bits(f) & ~(1 << 31)
+ var v = std.flt32bits(g) & ~(1 << 31)
+ -> std.numcmp(v, u)
+}
+
+const mag_cmp64 = {f : flt64, g : flt64
+ var u = std.flt64bits(f) & ~(1 << 63)
+ var v = std.flt64bits(g) & ~(1 << 63)
+ -> std.numcmp(v, u)
+}
+
/* Return (s, t) such that s + t = a + b, with s = rn(a + b). */
+generic slow2sum = {a : @f, b : @f :: floating, numeric @f
+ var s = a + b
+ var aa = s - b
+ var bb = s - aa
+ var da = a - aa
+ var db = b - bb
+ var t = da + db
+ -> (s, t)
+}
+
+/* Return (s, t) such that s + t = a + b, with s = rn(a + b), when you KNOW |a| > |b|. */
generic fast2sum = {a : @f, b : @f :: floating, numeric @f
var s = a + b
var z = s - a
@@ -235,6 +282,25 @@ generic fast2sum = {a : @f, b : @f :: floating, numeric @f
-> (s, t)
}
+const triple_compensated_sum = {q : flt64[:]
+ /* TODO: verify, with GAPPA or something, that this is correct. */
+ std.sort(q, mag_cmp64)
+ var s1 : flt64, s2 : flt64, s3
+ var t1 : flt64, t2 : flt64, t3 : flt64, t4 : flt64, t5 : flt64, t6
+ s1 = q[0]
+ s2 = 0.0
+ s3 = 0.0
+ for qq : q[1:]
+ (t5, t6) = slow2sum(s3, qq)
+ (t3, t4) = slow2sum(s2, t5)
+ (t1, t2) = slow2sum(s1, t3)
+ s1 = t1
+ (s2, s3) = slow2sum(t2, t4 + t6)
+ ;;
+
+ -> (s1, s2, s3)
+}
+
/*
Round a + b to a flt32. Only notable if round(a) is a rounding
tie, and b is non-zero