summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/math/atan-impl.myr2
-rw-r--r--lib/math/sin-impl.myr81
-rw-r--r--lib/math/sum-impl.myr14
-rw-r--r--lib/math/tan-impl.myr10
-rw-r--r--lib/math/util.myr74
5 files changed, 103 insertions, 78 deletions
diff --git a/lib/math/atan-impl.myr b/lib/math/atan-impl.myr
index 78f235d..38b973a 100644
--- a/lib/math/atan-impl.myr
+++ b/lib/math/atan-impl.myr
@@ -427,7 +427,7 @@ const atan264 = {y, x
du = 0.0
else
var t1, t2
- (t1, t2) = two_by_two(u, x)
+ (t1, t2) = two_by_two64(u, x)
du = ((y - t1) - t2)/x
;;
diff --git a/lib/math/sin-impl.myr b/lib/math/sin-impl.myr
index ba7099b..5a321a4 100644
--- a/lib/math/sin-impl.myr
+++ b/lib/math/sin-impl.myr
@@ -508,8 +508,8 @@ const w64 = {x : flt64, want_sin : bool, want_cos : bool
in the polynomial approximations.
*/
var delta1, delta2, deltat
- (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1)
- (delta2, _) = fast2sum(deltat, x2)
+ (delta1, deltat) = slow2sum(-std.flt64frombits(xi), x1)
+ (delta2, _) = slow2sum(deltat, x2)
/*
sin(delta); the degree 2 coefficient is near 0, so delta_2
@@ -528,16 +528,16 @@ const w64 = {x : flt64, want_sin : bool, want_cos : bool
var q : flt64[4]
if (want_sin && !swap_sin_cos) || (want_cos && swap_sin_cos)
- (q[0], q[1]) = two_by_two(std.flt64frombits(sin_xi), cos_delta)
- (q[2], q[3]) = two_by_two(std.flt64frombits(cos_xi), sin_delta)
- std.sort(q[:], fltabscmp)
+ (q[0], q[1]) = two_by_two64(std.flt64frombits(sin_xi), cos_delta)
+ (q[2], q[3]) = two_by_two64(std.flt64frombits(cos_xi), sin_delta)
+ std.sort(q[:], mag_cmp64)
(sin_ret, sin_ret2) = double_compensated_sum(q[:])
;;
if (want_cos && !swap_sin_cos) || (want_sin && swap_sin_cos)
- (q[0], q[1]) = two_by_two(std.flt64frombits(cos_xi), cos_delta)
- (q[2], q[3]) = two_by_two(std.flt64frombits(sin_xi), sin_delta)
+ (q[0], q[1]) = two_by_two64(std.flt64frombits(cos_xi), cos_delta)
+ (q[2], q[3]) = two_by_two64(std.flt64frombits(sin_xi), sin_delta)
q[2] = -q[2]
q[3] = -q[3]
@@ -620,16 +620,16 @@ const trig_reduce = {x : flt64
total_N = rn(x1 * std.flt64frombits(two_over_pi))
Nf = (-total_N : flt64)
(q[0], q[ 4], q[5]) = (x1, x2, x3)
- (q[1], q[ 3]) = two_by_two(Nf, pi_o_2_0)
- (q[2], q[ 6]) = two_by_two(Nf, pi_o_2_1)
- (q[7], q[ 8]) = two_by_two(Nf, pi_o_2_2)
- (q[9], q[10]) = two_by_two(Nf, pi_o_2_3)
+ (q[1], q[ 3]) = two_by_two64(Nf, pi_o_2_0)
+ (q[2], q[ 6]) = two_by_two64(Nf, pi_o_2_1)
+ (q[7], q[ 8]) = two_by_two64(Nf, pi_o_2_2)
+ (q[9], q[10]) = two_by_two64(Nf, pi_o_2_3)
/*
Sorting is very slow, but it's only the top five or so
that are in question
*/
- std.sort(q[0:5], fltabscmp)
+ std.sort(q[0:5], mag_cmp64)
(x1, x2) = double_compensated_sum(q[:])
while !(le_22(x1, x2, pi_o_4_0, pi_o_4_1) && le_22(-pi_o_4_0, -pi_o_4_1, x1, x2))
@@ -641,10 +641,10 @@ const trig_reduce = {x : flt64
cancelled by Nf * pi_o_2_0, so line those up.
*/
(q[0], q[2]) = (x1, x2)
- (q[1], q[3]) = two_by_two(Nf, pi_o_2_0)
- (q[4], q[5]) = two_by_two(Nf, pi_o_2_1)
- (q[6], q[7]) = two_by_two(Nf, pi_o_2_2)
- (q[8], q[9]) = two_by_two(Nf, pi_o_2_3)
+ (q[1], q[3]) = two_by_two64(Nf, pi_o_2_0)
+ (q[4], q[5]) = two_by_two64(Nf, pi_o_2_1)
+ (q[6], q[7]) = two_by_two64(Nf, pi_o_2_2)
+ (q[8], q[9]) = two_by_two64(Nf, pi_o_2_3)
(x1, x2) = double_compensated_sum(q[0:10])
total_N += (N % 4)
;;
@@ -731,15 +731,15 @@ const huge_reduce_2pi = {x : flt64
since xc is so small.
*/
var q : flt64[18]
- (q[ 0], q[ 1]) = two_by_two(xa, a1)
- (q[ 2], q[ 3]) = two_by_two(xa, a2)
- (q[ 4], q[ 5]) = two_by_two(xa, a3)
- (q[ 6], q[ 7]) = two_by_two(xb, b1)
- (q[ 8], q[ 9]) = two_by_two(xb, b2)
- (q[10], q[11]) = two_by_two(xb, b3)
- (q[12], q[13]) = two_by_two(xc, c1)
- (q[14], q[15]) = two_by_two(xc, c2)
- (q[16], q[17]) = two_by_two(xc, c3)
+ (q[ 0], q[ 1]) = two_by_two64(xa, a1)
+ (q[ 2], q[ 3]) = two_by_two64(xa, a2)
+ (q[ 4], q[ 5]) = two_by_two64(xa, a3)
+ (q[ 6], q[ 7]) = two_by_two64(xb, b1)
+ (q[ 8], q[ 9]) = two_by_two64(xb, b2)
+ (q[10], q[11]) = two_by_two64(xb, b3)
+ (q[12], q[13]) = two_by_two64(xc, c1)
+ (q[14], q[15]) = two_by_two64(xc, c2)
+ (q[16], q[17]) = two_by_two64(xc, c3)
-> triple_compensated_sum(q[:])
}
@@ -764,37 +764,6 @@ const trig_table_approx = {x1 : flt64, C : (uint64, uint64, uint64)[257]
-> (xi, cos_xi, sin_xi)
}
-const triple_compensated_sum = {q : flt64[:]
- /* TODO: verify, with GAPPA or something, that this is correct. */
- std.sort(q, fltabscmp)
- var s1 : flt64, s2 : flt64, s3
- var t1 : flt64, t2 : flt64, t3 : flt64, t4 : flt64, t5 : flt64, t6
- s1 = q[0]
- s2 = 0.0
- s3 = 0.0
- for qq : q[1:]
- (t5, t6) = fast2sum(s3, qq)
- (t3, t4) = fast2sum(s2, t5)
- (t1, t2) = fast2sum(s1, t3)
- s1 = t1
- (s2, s3) = fast2sum(t2, t4 + t6)
- ;;
-
- -> (s1, s2, s3)
-}
-
-const fltabscmp = {x : flt64, y : flt64
- var xb = std.flt64bits(x) & ~(1 << 63)
- var yb = std.flt64bits(y) & ~(1 << 63)
- if xb == yb
- -> `std.Equal
- elif xb > yb
- -> `std.Before
- else
- -> `std.After
- ;;
-}
-
/*
Return true iff (a1 + a2) <= (b1 + b2)
*/
diff --git a/lib/math/sum-impl.myr b/lib/math/sum-impl.myr
index 9826947..81f9a23 100644
--- a/lib/math/sum-impl.myr
+++ b/lib/math/sum-impl.myr
@@ -1,5 +1,7 @@
use std
+use "util"
+
/* For references, see [Mul+10] section 6.3 */
pkg math =
pkglocal const kahan_sum32 : (l : flt32[:] -> flt32)
@@ -67,12 +69,6 @@ pkglocal const priest_sum32 = {l : flt32[:]
-> s
}
-const mag_cmp32 = {f : flt32, g : flt32
- var u = std.flt32bits(f) & ~(1 << 31)
- var v = std.flt32bits(g) & ~(1 << 31)
- -> std.numcmp(v, u)
-}
-
pkglocal const priest_sum64 = {l : flt64[:]
var l2 = std.sldup(l)
std.sort(l, mag_cmp64)
@@ -82,12 +78,6 @@ pkglocal const priest_sum64 = {l : flt64[:]
-> s
}
-const mag_cmp64 = {f : flt64, g : flt64
- var u = std.flt64bits(f) & ~(1 << 63)
- var v = std.flt64bits(g) & ~(1 << 63)
- -> std.numcmp(v, u)
-}
-
generic double_compensated_sum = {l : @f[:] :: numeric,floating @f
/* l should be sorted in descending order */
if l.len == 0
diff --git a/lib/math/tan-impl.myr b/lib/math/tan-impl.myr
index adbc0ff..e2cbadf 100644
--- a/lib/math/tan-impl.myr
+++ b/lib/math/tan-impl.myr
@@ -430,8 +430,8 @@ const tanorcot = {x : flt64, want_tan : bool
;;
var delta1, delta2, deltat
- (delta1, deltat) = fast2sum(-std.flt64frombits(xi), x1)
- (delta2, _) = fast2sum(deltat, x2)
+ (delta1, deltat) = slow2sum(-std.flt64frombits(xi), x1)
+ (delta2, _) = slow2sum(deltat, x2)
var p1, p2
/*
@@ -459,7 +459,7 @@ const tanorcot = {x : flt64, want_tan : bool
var u1 = std.flt64frombits(std.flt64bits(u0) & split_mask)
var u2 = u0 - u1
- (ret1, ret2) = fast2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))
+ (ret1, ret2) = slow2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))
goto have_result
;;
@@ -499,12 +499,12 @@ const ptan = {x1 : flt64, x2 : flt64
var s : flt64 = x1 * x1
var p : flt64 = horner_polyu(s, tan_coeff[:])
var r1, r2
- (r1, r2) = two_by_two(p, x1)
+ (r1, r2) = two_by_two64(p, x1)
-> fast2sum(r1, r2 + x2)
}
const pcot = {x1 : flt64, x2 : flt64
var s : flt64 = x1 * x1
var p : flt64 = horner_polyu(s, cot_coeff[:])
- -> fast2sum(p/x1, std.flt64frombits(0x3fd5555555555555)*x2)
+ -> slow2sum(p/x1, std.flt64frombits(0x3fd5555555555555)*x2)
}
diff --git a/lib/math/util.myr b/lib/math/util.myr
index dadd180..8223d13 100644
--- a/lib/math/util.myr
+++ b/lib/math/util.myr
@@ -16,10 +16,19 @@ pkg math =
const need_round_away : (h : uint64, l : uint64, bitpos_last : int64 -> bool)
/* Multiply x * y to z1 + z2 */
- const two_by_two : (x : flt64, y : flt64 -> (flt64, flt64))
+ const two_by_two64 : (x : flt64, y : flt64 -> (flt64, flt64))
+ const two_by_two32 : (x : flt32, y : flt32 -> (flt32, flt32))
+
+ /* Compare by magnitude */
+ const mag_cmp32 : (f : flt32, g : flt32 -> std.order)
+ const mag_cmp64 : (f : flt64, g : flt64 -> std.order)
/* Return (s, t) such that s + t = a + b, with s = rn(a + b). */
generic fast2sum : (x : @f, y : @f -> (@f, @f)) :: floating, numeric @f
+ generic slow2sum : (x : @f, y : @f -> (@f, @f)) :: floating, numeric @f
+
+ /* return (a, b, c), a decent sum for q */
+ const triple_compensated_sum : (q : flt64[:] -> (flt64, flt64, flt64))
/* Rounds a + b (as flt64s) to a flt32. */
const round_down : (a : flt64, b : flt64 -> flt32)
@@ -188,7 +197,7 @@ const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
/*
Perform high-prec multiplication: x * y = z1 + z2.
*/
-const two_by_two = {x : flt64, y : flt64
+const two_by_two64 = {x : flt64, y : flt64
var xh : flt64 = std.flt64frombits(std.flt64bits(x) & twentysix_bits_mask)
var xl : flt64 = x - xh
var yh : flt64 = std.flt64frombits(std.flt64bits(y) & twentysix_bits_mask)
@@ -213,13 +222,13 @@ const two_by_two = {x : flt64, y : flt64
(s, c) = fast2sum(s, a2)
/* a3 */
- (yy, u) = fast2sum(c, a3)
+ (yy, u) = slow2sum(c, a3)
(t, v) = fast2sum(s, yy)
z = u + v
(s, c) = fast2sum(t, z)
/* a4 */
- (yy, u) = fast2sum(c, a4)
+ (yy, u) = slow2sum(c, a4)
(t, v) = fast2sum(s, yy)
z = u + v
(s, c) = fast2sum(t, z)
@@ -227,7 +236,45 @@ const two_by_two = {x : flt64, y : flt64
-> (s, c)
}
+/*
+ The same, for flt32s
+ */
+const two_by_two32 = {x : flt32, y : flt32
+ var xL : flt64 = (x : flt64)
+ var yL : flt64 = (y : flt64)
+ var zL : flt64 = xL * yL
+ var s : flt32 = (zL : flt32)
+ var sL : flt64 = (s : flt64)
+ var cL : flt64 = zL - sL
+ var c : flt32 = (cL : flt32)
+
+ -> (s, c)
+}
+
+const mag_cmp32 = {f : flt32, g : flt32
+ var u = std.flt32bits(f) & ~(1 << 31)
+ var v = std.flt32bits(g) & ~(1 << 31)
+ -> std.numcmp(v, u)
+}
+
+const mag_cmp64 = {f : flt64, g : flt64
+ var u = std.flt64bits(f) & ~(1 << 63)
+ var v = std.flt64bits(g) & ~(1 << 63)
+ -> std.numcmp(v, u)
+}
+
/* Return (s, t) such that s + t = a + b, with s = rn(a + b). */
+generic slow2sum = {a : @f, b : @f :: floating, numeric @f
+ var s = a + b
+ var aa = s - b
+ var bb = s - aa
+ var da = a - aa
+ var db = b - bb
+ var t = da + db
+ -> (s, t)
+}
+
+/* Return (s, t) such that s + t = a + b, with s = rn(a + b), when you KNOW |a| > |b|. */
generic fast2sum = {a : @f, b : @f :: floating, numeric @f
var s = a + b
var z = s - a
@@ -235,6 +282,25 @@ generic fast2sum = {a : @f, b : @f :: floating, numeric @f
-> (s, t)
}
+const triple_compensated_sum = {q : flt64[:]
+ /* TODO: verify, with GAPPA or something, that this is correct. */
+ std.sort(q, mag_cmp64)
+ var s1 : flt64, s2 : flt64, s3
+ var t1 : flt64, t2 : flt64, t3 : flt64, t4 : flt64, t5 : flt64, t6
+ s1 = q[0]
+ s2 = 0.0
+ s3 = 0.0
+ for qq : q[1:]
+ (t5, t6) = slow2sum(s3, qq)
+ (t3, t4) = slow2sum(s2, t5)
+ (t1, t2) = slow2sum(s1, t3)
+ s1 = t1
+ (s2, s3) = slow2sum(t2, t4 + t6)
+ ;;
+
+ -> (s1, s2, s3)
+}
+
/*
Round a + b to a flt32. Only notable if round(a) is a rounding
tie, and b is non-zero