diff options
author | S. Gilles <sgilles@math.umd.edu> | 2019-06-10 08:32:51 -0400 |
---|---|---|
committer | Ori Bernstein <ori@markovcorp.com> | 2019-06-13 15:20:28 -0700 |
commit | cfb284970e8c164d43d814496a5363b92782fb15 (patch) | |
tree | d82a6725fad55c9ab5fb70239876bde6805e9067 | |
parent | 4a8bc7aa008f753c082a914a0f95ea3df09d2123 (diff) | |
download | mc-cfb284970e8c164d43d814496a5363b92782fb15.tar.gz |
Correctly Fused Multiply Add when all top bits cancel.
-rw-r--r-- | lib/math/fma-impl.myr | 15 | ||||
-rw-r--r-- | lib/math/test/fma-impl.myr | 2 |
2 files changed, 14 insertions, 3 deletions
diff --git a/lib/math/fma-impl.myr b/lib/math/fma-impl.myr index 8dfecb5..cbb0872 100644 --- a/lib/math/fma-impl.myr +++ b/lib/math/fma-impl.myr @@ -180,12 +180,21 @@ pkglocal const fma64 = {x : flt64, y : flt64, z : flt64 var t_m : uint64 = xs_l * ys_h + xs_h * ys_l var t_h : uint64 = xs_h * ys_h + var prod_l : uint64 = t_l + (t_m << 32) var prod_h : uint64 = t_h + (t_m >> 32) if t_l > prod_l prod_h++ ;; + /* + At this point, we should have + + xs * ys = (2^32 * xs_h + xs_l) * (2^32 * ys_h + ys_l) + = (2^64 * t_h) + (2^32 * t_m) + (t_l) + = (2^64 * prod_h) + (prod_l) + */ + var prod_n = xn != yn var prod_lastbit_e = (xe - 52) + (ye - 52) var prod_first1 = find_first1_64_hl(prod_h, prod_l, 105) @@ -254,12 +263,12 @@ pkglocal const fma64 = {x : flt64, y : flt64, z : flt64 match compare_hl_z(prod_h, prod_l, prod_firstbit_e, prod_lastbit_e, zs, z_firstbit_e, z_lastbit_e) | `std.Equal: -> 0.0 | `std.Before: - /* prod > z */ + /* |prod| > |z| */ res_n = prod_n res_lastbit_e = prod_lastbit_e (res_h, res_l) = sub_shifted(prod_h, prod_l, zs, z_lastbit_e - prod_lastbit_e) | `std.After: - /* z > prod */ + /* |z| > |prod| */ res_n = zn res_lastbit_e = z_lastbit_e - 64 (res_h, res_l) = sub_shifted(zs, 0, prod_h, prod_lastbit_e + 64 - (z_lastbit_e - 64)) @@ -325,7 +334,7 @@ pkglocal const fma64 = {x : flt64, y : flt64, z : flt64 res_s++ ;; else - res_s = shl(res_h, res_first1 - 52) + res_s = shl(res_l, 52 - res_first1) ;; /* The res_s++s might have messed everything up */ diff --git a/lib/math/test/fma-impl.myr b/lib/math/test/fma-impl.myr index f5d7472..02233ae 100644 --- a/lib/math/test/fma-impl.myr +++ b/lib/math/test/fma-impl.myr @@ -91,6 +91,8 @@ const fma02 = {c (0x0be868cb5a7180c8, 0x3357a30707ed947c, 0x80000050d6b86ac6, 0x000000cfa41cb229), (0xbe535f4f8a7498af, 0x00d24adee12217b8, 0x0000005729e93fb0, 0x800000016d975af3), (0x39d1968eb883f088, 0x856f286e3b268f0e, 0x800000d7cdd0ed70, 0x800001e9cf01a0ae), + + (0xbfb1983143bd2412, 0x40bc6b0000000000, 0x407f400000000000, 0xbd270c0000000000), ][:] for (x, y, z, r) : inputs |