summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorS. Gilles <sgilles@math.umd.edu>2018-04-17 21:59:22 -0400
committerS. Gilles <sgilles@math.umd.edu>2018-04-18 08:32:39 -0400
commit8092374b4e87b78053a68b2e5cc7159941a3f46f (patch)
tree27b5afc82d1f2e3c419cd1b0d12b6d3579b3d4ee
parentd25b147ccbeff5b7f086b2e43d835543aaa8e6dd (diff)
downloadmc-8092374b4e87b78053a68b2e5cc7159941a3f46f.tar.gz
Implement Horner's Scheme for polynomial evaluation.
-rw-r--r--lib/math/bld.sub3
-rw-r--r--lib/math/fpmath.myr10
-rw-r--r--lib/math/poly-impl.myr28
-rw-r--r--lib/math/references6
-rw-r--r--lib/math/test/poly-impl.myr66
5 files changed, 112 insertions, 1 deletions
diff --git a/lib/math/bld.sub b/lib/math/bld.sub
index 68a118c..ddd3e49 100644
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -9,6 +9,9 @@ lib math =
fma-impl+posixy-x64-fma.s
fma-impl.myr
+ # polynomial evaluation methods
+ poly-impl.myr
+
# sqrt
sqrt-impl+posixy-x64-sse2.s
sqrt-impl.myr
diff --git a/lib/math/fpmath.myr b/lib/math/fpmath.myr
index 96e7169..aab64fa 100644
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -6,6 +6,9 @@ pkg math =
/* fma-impl */
fma : (x : @f, y : @f, z : @f -> @f)
+ /* poly-impl */
+ horner_poly : (x : @f, a : @f[:] -> @f)
+
/* sqrt-impl */
sqrt : (f : @f -> @f)
@@ -59,6 +62,8 @@ impl roundable flt64 -> int64 =
impl fpmath flt32 =
fma = {x, y, z; -> fma32(x, y, z)}
+ horner_poly = {f, a; -> horner_poly32(f, a)}
+
sqrt = {f; -> sqrt32(f)}
kahan_sum = {l; -> kahan_sum32(l) }
@@ -73,6 +78,8 @@ impl fpmath flt32 =
impl fpmath flt64 =
fma = {x, y, z; -> fma64(x, y, z)}
+ horner_poly = {f, a; -> horner_poly64(f, a)}
+
sqrt = {f; -> sqrt64(f)}
kahan_sum = {l; -> kahan_sum64(l) }
@@ -89,6 +96,9 @@ extern const rn64 : (f : flt64 -> int64)
extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
+extern const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)
+extern const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)
+
extern const sqrt32 : (x : flt32 -> flt32)
extern const sqrt64 : (x : flt64 -> flt64)
diff --git a/lib/math/poly-impl.myr b/lib/math/poly-impl.myr
new file mode 100644
index 0000000..4d96de8
--- /dev/null
+++ b/lib/math/poly-impl.myr
@@ -0,0 +1,28 @@
+use std
+
+/* See [Mul16], section 5.1 */
+pkg math =
+ pkglocal const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)
+ pkglocal const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)
+;;
+
+extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
+extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
+
+const horner_poly32 = {f : flt32, a : flt32[:]
+ var r : flt32 = 0.0
+ for var j = a.len - 1; j >= 0; j--
+ r = fma32(r, f, a[j])
+ ;;
+ -> r
+}
+
+const horner_poly64 = {f : flt64, a : flt64[:]
+ var r : flt64 = 0.0
+ for var j = a.len - 1; j >= 0; j--
+ r = fma64(r, f, a[j])
+ ;;
+ -> r
+}
+
+/* TODO: add Knuth's method */
diff --git a/lib/math/references b/lib/math/references
index d000fb2..822da6e 100644
--- a/lib/math/references
+++ b/lib/math/references
@@ -8,4 +8,8 @@ https://doi.org/10.1016/j.tcs.2005.09.056.
[Mul+10]
Jean-Michel Muller et al. Handbook of floating-point arithmetic.
-Boston: Birkhauser, 2010. isbn: 9780817647049.
+Boston: Birkhäuser, 2010. isbn: 9780817647049.
+
+[Mul16]
+J. M. Muller. Elementary functions : algorithms and implementation.
+Third edition. New York: Birkhäuser, 2016. isbn: 9781489979810.
diff --git a/lib/math/test/poly-impl.myr b/lib/math/test/poly-impl.myr
new file mode 100644
index 0000000..565a658
--- /dev/null
+++ b/lib/math/test/poly-impl.myr
@@ -0,0 +1,66 @@
+use std
+use math
+use testr
+
+const main = {
+ testr.run([
+ [.name="horner-01", .fn = horner01],
+ [.name="horner-02", .fn = horner02],
+ ][:])
+}
+
+const horner01 = {c
+ var inputs : (uint32, uint32[:], uint32)[:] = [
+ (0x00000000, [][:], 0x00000000),
+ (0xbf800000, [0x3f715545, 0x3f715546, 0x3f715544, 0x3f715545][:], 0xb4000000),
+ (0xbf800001, [0x3f715545, 0x3f715546, 0x3f715544, 0x3f715545][:], 0xb4bc5552),
+ ][:]
+
+ for (f, a, r) : inputs
+ var f2 = std.flt32frombits(f)
+ var a2 = [][:]
+ for aa : a
+ std.slpush(&a2, std.flt32frombits(aa))
+ ;;
+ var sf = math.horner_poly(f2, a2)
+ var s : uint32 = std.flt32bits(sf)
+ testr.eq(c, s, r)
+ ;;
+}
+
+const horner02 = {c
+ var inputs : (uint64, uint64[:], uint64)[:] = [
+ (0x0000000000000000, [][:], 0x0000000000000000),
+ (0xc0003d3368ee1111, [
+ 0x3ff0000000000000,
+ 0x3fe0000000000000,
+ 0x3fc5555555555555,
+ 0x3fa5555555555555,
+ 0x3f81a7b9611a7b96,
+ 0x3f59c2d14ee4a102,
+ 0x3f3136f054ff42a4,
+ 0x3f05902ed525b6ee
+ ][:], 0x3fdb64730ab8fa29),
+ (0x40003d3368ee1111, [
+ 0x3ff0000000000000,
+ 0x3fe0000000000000,
+ 0x3fc5555555555555,
+ 0x3fa5555555555555,
+ 0x3f81a7b9611a7b96,
+ 0x3f59c2d14ee4a102,
+ 0x3f3136f054ff42a4,
+ 0x3f05902ed525b6ee
+ ][:], 0x400a331575ee40db),
+ ][:]
+
+ for (f, a, r) : inputs
+ var f2 = std.flt64frombits(f)
+ var a2 = [][:]
+ for aa : a
+ std.slpush(&a2, std.flt64frombits(aa))
+ ;;
+ var sf = math.horner_poly(f2, a2)
+ var s : uint64 = std.flt64bits(sf)
+ testr.eq(c, s, r)
+ ;;
+}