summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/math/fma-impl.myr9
-rw-r--r--lib/math/fpmath.myr6
-rw-r--r--lib/math/test/fma-impl.myr3
3 files changed, 12 insertions, 6 deletions
diff --git a/lib/math/fma-impl.myr b/lib/math/fma-impl.myr
index 12f748c..bb055e0 100644
--- a/lib/math/fma-impl.myr
+++ b/lib/math/fma-impl.myr
@@ -107,10 +107,9 @@ pkglocal const fma32 = {x : flt32, y : flt32, z : flt32
base is 2, subtraction or addition are equally
useful.)
*/
- if (larger ^ shr(smaller, larger_e - smaller_e)) & 0x1 != 0
- mask >>= 1
+ if (larger ^ shr(smaller, larger_e - smaller_e)) & 0x1 == 0
+ prevent_rounding = smaller & mask != 0
;;
- prevent_rounding = smaller & mask != 0
else
/*
The prospective rounding agrees with the signage.
@@ -206,6 +205,10 @@ const flt32fromflt64 = {f : flt64
ts = (ts1 : uint32)
if need_round_away(0, s, shift)
ts++
+ if ts & (1 << 23) != 0
+ /* false alarm, it's normal again */
+ te++
+ ;;
;;
-> std.flt32assem(n, te, ts)
}
diff --git a/lib/math/fpmath.myr b/lib/math/fpmath.myr
index 1e084de..1b500de 100644
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -3,15 +3,15 @@ use std
pkg math =
trait fpmath @f =
- /* fpmath-fma-impl */
+ /* fma-impl */
fma : (x : @f, y : @f, z : @f -> @f)
- /* fpmath-trunc-impl */
+ /* trunc-impl */
trunc : (f : @f -> @f)
ceil : (f : @f -> @f)
floor : (f : @f -> @f)
- /* fpmath-sum-impl */
+ /* sum-impl */
kahan_sum : (a : @f[:] -> @f)
priest_sum : (a : @f[:] -> @f)
;;
diff --git a/lib/math/test/fma-impl.myr b/lib/math/test/fma-impl.myr
index 15c1395..0bb30bb 100644
--- a/lib/math/test/fma-impl.myr
+++ b/lib/math/test/fma-impl.myr
@@ -44,10 +44,13 @@ const fma01 = {c
(0xa19e9a6f, 0xb49af3e3, 0xa2468b59, 0xa2468b57),
(0xd119e996, 0x8e5ad0e3, 0x247e0028, 0x247e83b7),
(0x381adbc6, 0x00ee4f61, 0x005f2aeb, 0x005f2d2c),
+ (0x7008233c, 0x2a9613fb, 0x46affd02, 0x5b1f9e8a),
+ (0xe85018a1, 0x2cbd53ed, 0x3fcffab8, 0xd599e668),
/* These ones are especially tricky */
(0x65dbf098, 0xd5beb8b4, 0x7c23db61, 0x73027654),
(0xa4932927, 0xc565bc34, 0x316887af, 0x31688bcf),
+ (0xb080a420, 0x09e2e5ca, 0x807ff1bf, 0x80800000),
][:]
for (x, y, z, r) : inputs