summaryrefslogtreecommitdiff
path: root/lib/math/sum-impl.myr
blob: 91aba19ad67176de321627d3c0d881f02cf49284 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
use std

/* For references, see [Mul+10] section 6.3 */
pkg math =
	pkglocal const kahan_sum32 : (l : flt32[:] -> flt32)
	pkglocal const priest_sum32 : (l : flt32[:] -> flt32)

	pkglocal const kahan_sum64: (l : flt64[:] -> flt64)
	pkglocal const priest_sum64 : (l : flt64[:] -> flt64)
;;

type doomed_flt32_arr = flt32[:]
type doomed_flt64_arr = flt64[:]

impl disposable doomed_flt32_arr =
	__dispose__ = {a : doomed_flt32_arr; std.slfree((a : flt32[:])) }
;;

impl disposable doomed_flt64_arr =
	__dispose__ = {a : doomed_flt64_arr; std.slfree((a : flt64[:])) }
;;

/*
   Kahan's compensated summation. Fast and reasonably accurate,
   although cancellation can cause relative error blowup. For
   something slower, but more accurate, use something like Priest's
   doubly compensated sums.
 */
pkglocal const kahan_sum32 = {l; -> kahan_sum_gen(l, (0.0 : flt32))}
pkglocal const kahan_sum64 = {l; -> kahan_sum_gen(l, (0.0 : flt64))}

generic kahan_sum_gen = {l : @f[:], zero : @f :: numeric,floating @f
	if l.len == 0
		-> zero
	;;

	var s = zero
	var c = zero
	var y = zero
	var t = zero

	for x : l
		y = x - c
		t = s + y
		c = (t - s) - y
		s = t
	;;

	-> s
}

/*
   Priest's doubly compensated summation. Extremely accurate, but
   relatively slow. For situations in which cancellation is not
   expected, something like Kahan's compensated summation may be
   more useful.
 */
pkglocal const priest_sum32 = {l : flt32[:]
	var l2 = std.sldup(l)
	std.sort(l2, mag_cmp32)
	auto (l2 : doomed_flt32_arr)
	-> priest_sum_gen(l2, (0.0 : flt32))
}

const mag_cmp32 = {f : flt32, g : flt32
	var u = std.flt32bits(f) & ~(1 << 31)
	var v = std.flt32bits(g) & ~(1 << 31)
	-> std.numcmp(v, u)
}

pkglocal const priest_sum64 = {l : flt64[:]
	var l2 = std.sldup(l)
	std.sort(l, mag_cmp64)
	auto (l2 : doomed_flt64_arr)
	-> priest_sum_gen(l2, (0.0 : flt64))
}

const mag_cmp64 = {f : flt64, g : flt64
	var u = std.flt64bits(f) & ~(1 << 63)
	var v = std.flt64bits(g) & ~(1 << 63)
	-> std.numcmp(v, u)
}

generic priest_sum_gen = {l : @f[:], zero : @f :: numeric,floating @f
	/* l should be sorted in descending order */
	if l.len == 0
		-> zero
	;;

	var s = zero
	var c = zero

	for x : l
		var y = c + x
		var u = x - (y - c)
		var t = (y + s)
		var v = (y - (t - s))
		var z = u + v
		s = t + z
		c = z - (s - t)
	;;

	-> s
}