summaryrefslogtreecommitdiff log msg author committer range
path: root/lib/math/log-overkill.myr
blob: db7fb7eff443cf3601756a4c38f7da83f31fa8e4 (plain)
 ```1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 ``` ``````use std use "fpmath" use "log-impl" use "exp-impl" use "sum-impl" use "util" /* This is an implementation of log following [Mul16] 8.3.2, returning far too much precision. These are slower than the table-and-low-degree-polynomial implementations of exp-impl.myr and log-impl.myr, but are needed to handle the powr, pown, and rootn functions. This is only for flt64, because if you want this for flt32 you should just cast to flt64, use the fast functions, and then split back. Note that the notation of [Mul16] 8.3.2 is confusing, to say the least. [NM96] is, perhaps, a clearer presentation. To recap, we use an iteration, starting with t_1 = 0, L_1 = x, and iterate as t_{n+1} = t_{n} - ln(1 + d_n 2^{-n}) L_{n+1} = L_n (1 + d_n 2^{-n}) where d_n is in {-1, 0, 1}, chosen so that as n -> oo, L_n approaches 1, then t_n approaches ln(x). If we let l_n = L_n - 1, we initialize l_1 = x - 1 and iterate as l_{n+1} = l_n (1 + d_n 2^{-n}) + d_n 2^{-n} If we further consider l'_n = 2^n l_n, we initialize l'_1 = x - 1, and iterate as l'_{n + 1} = 2 l'_{n} (1 + d_n 2^{-n}) + 2 d_n The nice thing about this is that we can pick d_n easily based on truncating l'_n to the first fractional digit: [l'_n]. Then { +1 if [l'_n] <= -1/2 d_n = { 0 if [l'_n] = 0 or 1/2 { -1 if [l'_n] >= 1 */ pkg math = pkglocal const logoverkill32 : (x : flt32 -> (flt32, flt32)) /*pkglocal*/ const logoverkill64 : (x : flt64 -> (flt64, flt64)) ;; /* Tables of 1 +/- 2^-k, for k = 0 to 159, with k = 0 a dummy row. 159 is chosen as the first k such that the error between 2^(-53 * 2) and 2^(-53 * 2) + log(1 + 2^(-k)) is less than 1 ulp, therefore we'll have a full 53 * 2 bits of precision available with these tables. The layout for C_plus is ( ln(1 + 2^-k)[hi], ln(1 + 2^-k)[lo], ln(1 + 2^-k)[very lo]) , and for C_minus it is similar, but handling ln(1 - 2^-k). They are generated by ancillary/log-overkill-constants.c. Conveniently, for k > 27, we can calculate the entry exactly using a few terms of the Taylor expansion for ln(1 + x), with the x^{2+} terms vanishing past k = 53. This is possible since we only care about two flt64s worth of precision. */ const C_plus : (uint64, uint64, uint64)[28] = [ (0x0000000000000000, 0x0000000000000000, 0x0000000000000000), /* dummy */ (0x3fd9f323ecbf984c, 0xbc4a92e513217f5c, 0x38e0c0cfa41ff669), /* k = 1 */ (0x3fcc8ff7c79a9a22, 0xbc64f689f8434012, 0x390a24ae3b2f53a1), /* k = 2 */ (0x3fbe27076e2af2e6, 0xbc361578001e0162, 0x38b55db94ebc4018), /* k = 3 */ (0x3faf0a30c01162a6, 0x3c485f325c5bbacd, 0xb8e0ece597165991), /* k = 4 */ (0x3f9f829b0e783300, 0x3c333e3f04f1ef23, 0xb8d814544147acc9), /* k = 5 */ (0x3f8fc0a8b0fc03e4, 0xbc183092c59642a1, 0xb8b52414fc416fc2), /* k = 6 */ (0x3f7fe02a6b106789, 0xbbce44b7e3711ebf, 0x386a567b6587df34), /* k = 7 */ (0x3f6ff00aa2b10bc0, 0x3c02821ad5a6d353, 0xb8912dcccb588a4a), /* k = 8 */ (0x3f5ff802a9ab10e6, 0x3bfe29e3a153e3b2, 0xb89538d49c4f745e), /* k = 9 */ (0x3f4ffc00aa8ab110, 0xbbe0fecbeb9b6cdb, 0xb877171cf29e89d1), /* k = 10 */ (0x3f3ffe002aa6ab11, 0x3b999e2b62cc632d, 0xb81eae851c58687c), /* k = 11 */ (0x3f2fff000aaa2ab1, 0x3ba0bbc04dc4e3dc, 0x38152723342e000b), /* k = 12 */ (0x3f1fff8002aa9aab, 0x3b910e6678af0afc, 0x382ed521af29bc8d), /* k = 13 */ (0x3f0fffc000aaa8ab, 0xbba3bbc110fec82c, 0xb84f79185e42fbaf), /* k = 14 */ (0x3effffe0002aaa6b, 0xbb953bbbe6661d42, 0xb835071791df7d3e), /* k = 15 */ (0x3eeffff0000aaaa3, 0xbb8553bbbd110fec, 0xb82ff1fae6cea01a), /* k = 16 */ (0x3edffff80002aaaa, 0xbb75553bbbc66662, 0x3805f05f166325ff), /* k = 17 */ (0x3ecffffc0000aaab, 0xbb6d5553bbbc1111, 0x37a380f8138f70f4), /* k = 18 */ (0x3ebffffe00002aab, 0xbb5655553bbbbe66, 0xb7f987507707503c), /* k = 19 */ (0x3eafffff00000aab, 0xbb45755553bbbbd1, 0xb7c10fec7ed7ec7e), /* k = 20 */ (0x3e9fffff800002ab, 0xbb355955553bbbbc, 0xb7d9999875075875), /* k = 21 */ (0x3e8fffffc00000ab, 0xbb2555d55553bbbc, 0x37bf7777809c09a1), /* k = 22 */ (0x3e7fffffe000002b, 0xbb15556555553bbc, 0x37b106666678af8b), /* k = 23 */ (0x3e6ffffff000000b, 0xbb055557555553bc, 0x37a110bbbbbc04e0), /* k = 24 */ (0x3e5ffffff8000003, 0xbaf555559555553c, 0x3791110e6666678b), /* k = 25 */ (0x3e4ffffffc000001, 0xbae555555d555554, 0x37811110fbbbbbc0), /* k = 26 */ (0x3e3ffffffe000000, 0x3ac5555553555556, 0xb76ddddddf333333), /* k = 27 */ ] const C_minus : (uint64, uint64, uint64)[28] = [ (0x0000000000000000, 0x0000000000000000, 0x0000000000000000), /* dummy */ (0xbfe62e42fefa39ef, 0xbc7abc9e3b39803f, 0xb907b57a079a1934), /* k = 1 */ (0xbfd269621134db92, 0xbc7e0efadd9db02b, 0x39163d5cf0b6f233), /* k = 2 */ (0xbfc1178e8227e47c, 0x3c50e63a5f01c691, 0xb8f03c776a3fb0f1), /* k = 3 */ (0xbfb08598b59e3a07, 0x3c5dd7009902bf32, 0x38ea7da07274e01d), /* k = 4 */ (0xbfa0415d89e74444, 0xbc4c05cf1d753622, 0xb8d3bc1c184cef0a), /* k = 5 */ (0xbf90205658935847, 0xbc327c8e8416e71f, 0x38b19642aac1310f), /* k = 6 */ (0xbf8010157588de71, 0xbc146662d417ced0, 0xb87e91702f8418af), /* k = 7 */ (0xbf70080559588b35, 0xbc1f96638cf63677, 0x38a90badb5e868b4), /* k = 8 */ (0xbf60040155d5889e, 0x3be8f98e1113f403, 0x38601ac2204fbf4b), /* k = 9 */ (0xbf50020055655889, 0xbbe9abe6bf0fa436, 0x3867c7d335b216f3), /* k = 10 */ (0xbf40010015575589, 0x3bec8863f23ef222, 0x38852c36a3d20146), /* k = 11 */ (0xbf30008005559559, 0x3bddd332a0e20e2f, 0x385c8b6b9ff05329), /* k = 12 */ (0xbf20004001555d56, 0x3bcddd88863f53f6, 0xb859332cbe6e6ac5), /* k = 13 */ (0xbf10002000555655, 0xbbb62224ccd5f17f, 0xb8366327cc029156), /* k = 14 */ (0xbf00001000155575, 0xbba5622237779c0a, 0xb7d38f7110a9391d), /* k = 15 */ (0xbef0000800055559, 0xbb95562222cccd5f, 0xb816715f87b8e1ee), /* k = 16 */ (0xbee0000400015556, 0x3b75553bbbb1110c, 0x381fb17b1f791778), /* k = 17 */ (0xbed0000200005555, 0xbb79555622224ccd, 0x3805074f75071791), /* k = 18 */ (0xbec0000100001555, 0xbb65d55562222377, 0xb80de7027127028d), /* k = 19 */ (0xbeb0000080000555, 0xbb5565555622222d, 0x37e9995075035075), /* k = 20 */ (0xbea0000040000155, 0xbb45575555622222, 0xb7edddde70270670), /* k = 21 */ (0xbe90000020000055, 0xbb35559555562222, 0xb7c266666af8af9b), /* k = 22 */ (0xbe80000010000015, 0xbb25555d55556222, 0xb7b11bbbbbce04e0), /* k = 23 */ (0xbe70000008000005, 0xbb15555655555622, 0xb7a111666666af8b), /* k = 24 */ (0xbe60000004000001, 0xbb05555575555562, 0xb7911113bbbbbce0), /* k = 25 */ (0xbe50000002000000, 0xbaf5555559555556, 0xb78111112666666b), /* k = 26 */ (0xbe40000001000000, 0xbac5555557555556, 0x376ddddddc888888), /* k = 27 */ ] const logoverkill32 = {x : flt32 var x64 : flt64 = (x : flt64) var l64 : flt64 = log64(x64) var y1 : flt32 = (l64 : flt32) var y2 : flt32 = ((l64 - (y1 : flt64)) : flt32) -> (y1, y2) } const logoverkill64 = {x : flt64 var xn, xe, xs (xn, xe, xs) = std.flt64explode(x) /* Special cases */ if std.isnan(x) -> (std.flt64frombits(0x7ff8000000000000), std.flt64frombits(0x7ff8000000000000)) elif xe == -1024 && xs == 0 /* log (+/- 0) is -infinity */ -> (std.flt64frombits(0xfff0000000000000), std.flt64frombits(0xfff0000000000000)) elif xe == 1023 -> (std.flt64frombits(0xfff8000000000000), std.flt64frombits(0xfff8000000000000)) elif xn /* log(-anything) is NaN */ -> (std.flt64frombits(0x7ff8000000000000), std.flt64frombits(0x7ff8000000000000)) ;; /* Deal with 2^xe up front: multiply xe by a high-precision log(2). We'll add them back in to the giant mess of tis later. */ var xef : flt64 = (xe : flt64) var log_2_hi, log_2_lo (log_2_hi, log_2_lo) = accurate_logs64[128] /* See log-impl.myr */ var lxe_1, lxe_2, lxe_3, lxe_4 (lxe_1, lxe_2) = two_by_two64(xef, std.flt64frombits(log_2_hi)) (lxe_3, lxe_4) = two_by_two64(xef, std.flt64frombits(log_2_lo)) /* We split t into three parts, so that we can gradually build up two flt64s worth of information */ var t1 = 0.0 var t2 = 0.0 var t3 = 0.0 /* We also split lprime. */ var lprime1 var lprime2 (lprime1, lprime2) = slow2sum(std.flt64assem(false, 1, xs), -2.0) var lprime3 = 0.0 for var k = 1; k <= 107; ++k /* Calculate d_k and some quanitites for iteration */ var d = 0.0 var ln_hi : flt64, ln_mi : flt64, ln_lo : flt64 /* Note the truncation method for [Mul16] is for signed-digit systems, which we don't have. This comparison follows from the remark following (8.23), though. */ if lprime1 <= -0.5 d = 1.0 (ln_hi, ln_mi, ln_lo) = get_C_plus(k) elif lprime1 < 1.0 d = 0.0 /* In this case, t_n is unchanged, and we just scale lprime by 2 */ lprime1 = lprime1 * 2.0 lprime2 = lprime2 * 2.0 lprime3 = lprime3 * 2.0 /* If you're looking for a way to speed this up, calculate how many k we can skip here, preferably by a lookup table. */ continue else d = -1.0 (ln_hi, ln_mi, ln_lo) = get_C_minus(k) ;; /* t_{n + 1} */ (t1, t2, t3) = foursum(t1, t2, t3, -1.0 * ln_hi) (t1, t2, t3) = foursum(t1, t2, t3, -1.0 * ln_mi) (t1, t2, t3) = foursum(t1, t2, t3, -1.0 * ln_lo) /* lprime_{n + 1} */ lprime1 *= 2.0 lprime2 *= 2.0 lprime3 *= 2.0 var lp1m = d * scale2(lprime1, -k) var lp2m = d * scale2(lprime2, -k) var lp3m = d * scale2(lprime3, -k) (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, lp1m) (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, lp2m) (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, lp3m) (lprime1, lprime2, lprime3) = foursum(lprime1, lprime2, lprime3, 2.0 * d) ;; var l : flt64[:] = [t1, t2, t3, lxe_1, lxe_2, lxe_3, lxe_4][:] std.sort(l, mag_cmp64) -> double_compensated_sum(l) } /* significand for 1/3 (if you reconstruct without compensating, you get 4/3) */ const one_third_sig = 0x0015555555555555 /* and for 1/5 (if you reconstruct, you get 8/5) */ const one_fifth_sig = 0x001999999999999a /* These calculations are incredibly slow. Somebody should speed them up. */ const get_C_plus = {k : int64 if k < 0 -> (0.0, 0.0, 0.0) elif k < 28 var t1, t2, t3 (t1, t2, t3) = C_plus[k] -> (std.flt64frombits(t1), std.flt64frombits(t2), std.flt64frombits(t3)) elif k < 54 var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ var t3 = std.flt64assem(false, -3*k - 2, one_third_sig) /* x^3 / 3 */ var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ var t5 = std.flt64assem(false, -5*k - 3, one_fifth_sig) /* x^5 / 5 */ -> triple_compensated_sum([t1, t2, t3, t4, t5][:]) else var t1 = std.flt64assem(false, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ var t3 = std.flt64assem(false, -3*k - 2, one_third_sig) /* x^3 / 3 */ -> (t1, t2, t3) ;; } const get_C_minus = {k : int64 if k < 0 -> (0.0, 0.0, 0.0) elif k < 28 var t1, t2, t3 (t1, t2, t3) = C_minus[k] -> (std.flt64frombits(t1), std.flt64frombits(t2), std.flt64frombits(t3)) elif k < 54 var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ var t3 = std.flt64assem(true, -3*k - 2, one_third_sig) /* x^3 / 3 */ var t4 = std.flt64assem(true, -4*k - 2, 1 << 53) /* -x^4 / 4 */ var t5 = std.flt64assem(true, -5*k - 3, one_fifth_sig) /* x^5 / 5 */ -> triple_compensated_sum([t1, t2, t3, t4, t5][:]) else var t1 = std.flt64assem(true, -k, 1 << 53) /* x [ = 2^-k ] */ var t2 = std.flt64assem(true, -2*k - 1, 1 << 53) /* -x^2 / 2 */ var t3 = std.flt64assem(true, -3*k - 2, one_third_sig) /* x^3 / 3 */ -> (t1, t2, t3) ;; } const foursum = {a1 : flt64, a2 : flt64, a3 : flt64, x : flt64 var t1, t2, t3, t4, t5, t6, s1, s2, s3, s4 (t5, t6) = slow2sum(a3, x) (t3, t4) = slow2sum(a2, t5) (t1, t2) = slow2sum(a1, t3) (s3, s4) = slow2sum(t4, t6) (s1, s2) = slow2sum(t2, s3) -> (t1, s1, s2 + s4) } ``````