diff options
author | S. Gilles <sgilles@math.umd.edu> | 2019-05-02 07:20:05 -0400 |
---|---|---|
committer | S. Gilles <sgilles@math.umd.edu> | 2019-05-02 07:23:27 -0400 |
commit | 9dd2a40974da19018eeb33338cfcc486f4eeca8d (patch) | |
tree | e0924ee372ad376ef339bdd559a5d2c75bb75b80 | |
parent | ac2d8aa21e1e6ae45573b268151fda57cada110d (diff) | |
download | mc-9dd2a40974da19018eeb33338cfcc486f4eeca8d.tar.gz |
Replace costly sum with slightly faster sums in log-overkill.
This still doesn't make the function fast, but it's less embarassingly
slow now.
-rw-r--r-- | lib/math/log-overkill.myr | 49 |
1 files changed, 45 insertions, 4 deletions
diff --git a/lib/math/log-overkill.myr b/lib/math/log-overkill.myr index 17fb7f9..a7e27ed 100644 --- a/lib/math/log-overkill.myr +++ b/lib/math/log-overkill.myr @@ -252,13 +252,19 @@ const get_C_plus = {k : int64 var t1, t2, t3 (t1, t2, t3) = C_plus[k] -> (std.flt64frombits(t1), std.flt64frombits(t2), std.flt64frombits(t3)) - elif k < 54 + elif k < 36 var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ var t3 = std.flt64assem(false, -3*k - 2, one_third_sig) /* x^3 / 3 */ var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ var t5 = std.flt64assem(false, -5*k - 3, one_fifth_sig) /* x^5 / 5 */ - -> triple_compensated_sum([t1, t2, t3, t4, t5][:]) + -> fast_fivesum(t1, t2, t3, t4, t5) + elif k < 54 + var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ + var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ + var t3 = std.flt64assem(false, -3*k - 2, one_third_sig) /* x^3 / 3 */ + var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ + -> fast_foursum(t1, t2, t3, t4) else var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ @@ -274,13 +280,19 @@ const get_C_minus = {k : int64 var t1, t2, t3 (t1, t2, t3) = C_minus[k] -> (std.flt64frombits(t1), std.flt64frombits(t2), std.flt64frombits(t3)) - elif k < 54 + elif k < 36 var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ var t3 = std.flt64assem(true, -3*k - 2, one_third_sig) /* x^3 / 3 */ var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ var t5 = std.flt64assem(true, -5*k - 3, one_fifth_sig) /* x^5 / 5 */ - -> triple_compensated_sum([t1, t2, t3, t4, t5][:]) + -> fast_fivesum(t1, t2, t3, t4, t5) + elif k < 54 + var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ + var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ + var t3 = std.flt64assem(true, -3*k - 2, one_third_sig) /* x^3 / 3 */ + var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ + -> fast_foursum(t1, t2, t3, t4) else var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ @@ -300,3 +312,32 @@ const foursum = {a1 : flt64, a2 : flt64, a3 : flt64, x : flt64 -> (t1, s1, s2 + s4) } + +/* + Specifically for use in get_C_{plus,minus}, in which we know the + magnitude orders of the ais. + */ +const fast_foursum = {a1 : flt64, a2 : flt64, a3 : flt64, a4 : flt64 + (a3, a4) = fast2sum(a3, a4) + (a2, a3) = fast2sum(a2, a3) + (a1, a2) = fast2sum(a1, a2) + + (a3, a4) = slow2sum(a3, a4) + (a2, a3) = slow2sum(a2, a3) + + -> (a1, a2, a3 + a4) +} + +const fast_fivesum = {a1 : flt64, a2 : flt64, a3 : flt64, a4 : flt64, a5 : flt64 + (a4, a5) = fast2sum(a4, a5) + (a3, a4) = fast2sum(a3, a4) + (a2, a3) = fast2sum(a2, a3) + (a1, a2) = fast2sum(a1, a2) + + (a4, a5) = slow2sum(a4, a5) + (a3, a4) = slow2sum(a3, a4) + (a2, a3) = slow2sum(a2, a3) + + (a4, a5) = slow2sum(a4, a5) + -> (a1, a2, a3 + a4) +} |