summaryrefslogtreecommitdiff log msg author committer range
path: root/lib/math/powr-impl.myr
 ```1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 ``` ``````use std use "fpmath" use "impls" use "log-impl" use "log-overkill" use "util" /* This is an implementation of powr, not pow, so the special cases are tailored more closely to the mathematical x^y = e^(y * log(x)) than to historical C implementations (pow was aligned to the C99 standard, which was aligned to codify existing practice). Even then, some parts of the powr specification are unclear. For example, IEEE 754-2008 does not specify what powr(infty, y) must return when y is not 0.0 (an erratum was planned in 2010, but does not appear to have been released as of 2018). */ pkg math = pkglocal const powr32 : (x : flt32, y : flt32 -> flt32) pkglocal const powr64 : (x : flt64, y : flt64 -> flt64) ;; type fltdesc(@f, @u, @i) = struct explode : (f : @f -> (bool, @i, @u)) assem : (n : bool, e : @i, s : @u -> @f) tobits : (f : @f -> @u) frombits : (u : @u -> @f) nan : @u inf : @u expmask : @u precision : @u emax : @i emin : @i sgnmask : @u log_overkill : (x : @f -> (@f, @f)) fma : (x : @f, y : @f, z : @f -> @f) split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f)) exp_zero_border : @u ;; const desc32 : fltdesc(flt32, uint32, int32) = [ .explode = std.flt32explode, .assem = std.flt32assem, .tobits = std.flt32bits, .frombits = std.flt32frombits, .nan = 0x7fc00000, .inf = 0x7f800000, .expmask = 0x7f800000, /* mask to detect inf or NaN (inf, repeated for clarity) */ .precision = 24, .emax = 127, .emin = -126, .sgnmask = 1 << 31, .log_overkill = logoverkill32, .fma = fma32, .split_mul = split_mul32, .exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */ ] const desc64 : fltdesc(flt64, uint64, int64) = [ .explode = std.flt64explode, .assem = std.flt64assem, .tobits = std.flt64bits, .frombits = std.flt64frombits, .nan = 0x7ff8000000000000, .inf = 0x7ff0000000000000, .expmask = 0x7ff0000000000000, .precision = 53, .emax = 1023, .emin = -1022, .sgnmask = 1 << 63, .log_overkill = logoverkill64, .fma = fma64, .split_mul = hl_mult, .exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */ ] const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32 var x : flt64 = (x_h : flt64) + (x_l : flt64) var y : flt64 = (y_h : flt64) + (y_l : flt64) var z = x * y var z_h : flt32 = (z : flt32) var z_l : flt32 = ((z - (z_h : flt64)) : flt32) -> (z_h, z_l) } const powr32 = {x : flt32, y : flt32 -> powrgen(x, y, desc32) } const powr64 = {x : flt64, y : flt64 -> powrgen(x, y, desc64) } generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i var xb, yb xb = d.tobits(x) yb = d.tobits(y) var xn : bool, xe : @i, xs : @u var yn : bool, ye : @i, ys : @u (xn, xe, xs) = d.explode(x) (yn, ye, ys) = d.explode(y) /* Special cases. Note we do not follow IEEE exceptions. */ if std.isnan(x) || std.isnan(y) /* Propagate NaN */ -> d.frombits(d.nan) elif (xb & ~d.sgnmask == 0) if (yb & ~d.sgnmask == 0) /* 0^0 is undefined. */ -> d.frombits(d.nan) elif yn /* 0^(< 0) is infinity */ -> d.frombits(d.inf) else /* otherwise, 0^y = 0. */ -> (0.0 : @f) ;; elif xn /* (< 0)^(anything) is undefined. This comes from thinking of floating-point numbers as representing small ranges of real numbers. If you really want to compute (-1.23)^5, use pown. */ -> d.frombits(d.nan) elif (xb & ~d.sgnmask == d.inf) if (yb & ~d.sgnmask == 0) /* oo^0 is undefined */ -> d.frombits(d.nan) elif yn /* +/-oo^(< 0) is +/-0 */ -> d.assem(xn, 0, 0) elif xn /* (-oo)^(anything) is undefined */ -> d.frombits(d.nan) else /* oo^(> 0) is oo */ -> d.frombits(d.inf) ;; elif std.eq(y, (1.0 : @f)) /* x^1 = x */ -> x elif yb & ~d.sgnmask == 0 /* (finite, positive)^0 = 1 */ -> (1.0 : @f) elif std.eq(x, (1.0 : @f)) if yb & ~d.sgnmask == d.inf /* 1^oo is undefined */ -> d.frombits(d.nan) else /* 1^(finite, positive) = 1 */ -> (1.0 : @f) ;; elif yb & ~d.sgnmask == d.inf if xe < 0 /* (0 < x < 1)^oo = 0 */ -> (0.0 : @f) else /* (x > 1)^oo = oo */ -> d.frombits(d.inf) ;; ;; /* Just do the dumb thing: compute exp( log(x) ยท y ). All the hard work goes into computing log(x) with high enough precision that our exp() implementation becomes the weakest link. The Table Maker's Dilemma says that quantifying "high enough" is a very difficult problem, but experimentally twice the precision of @f appears quite good enough. */ var ln_x_hi, ln_x_lo (ln_x_hi, ln_x_lo) = d.log_overkill(x) var final_exp_hi, final_exp_lo (final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0) if d.tobits(final_exp_hi) & d.expmask == d.inf /* split_mul doesn't actually preserve the sign of infinity, so we can't trust final_exp_hi to get it. */ if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask) /* e^+Inf */ -> d.frombits(d.inf) else /* e^-Inf */ -> 0.0 ;; ;; if final_exp_hi < d.frombits(d.exp_zero_border) -> 0.0 ;; var z_hi = exp(final_exp_hi) if d.tobits(z_hi) & d.expmask == d.inf -> z_hi ;; -> d.fma(z_hi, final_exp_lo, z_hi) } ``````