summaryrefslogtreecommitdiff
path: root/lib/math/sum-impl.myr
blob: 9826947a46c1134337453547caec2f26a4a5a5ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
use std

/* For references, see [Mul+10] section 6.3 */
pkg math =
	pkglocal const kahan_sum32 : (l : flt32[:] -> flt32)
	pkglocal const kahan_sum64 : (l : flt64[:] -> flt64)

	pkglocal const priest_sum32 : (l : flt32[:] -> flt32)
	pkglocal const priest_sum64 : (l : flt64[:] -> flt64)

	/* Backend for priest_sum; currently not useful enough to expose */
	pkglocal generic double_compensated_sum : (l : @f[:] -> (@f, @f)) :: numeric,floating @f
;;

type doomed_flt32_arr = flt32[:]
type doomed_flt64_arr = flt64[:]

impl disposable doomed_flt32_arr =
	__dispose__ = {a : doomed_flt32_arr; std.slfree((a : flt32[:])) }
;;

impl disposable doomed_flt64_arr =
	__dispose__ = {a : doomed_flt64_arr; std.slfree((a : flt64[:])) }
;;

/*
   Kahan's compensated summation. Fast and reasonably accurate,
   although cancellation can cause relative error blowup. For
   something slower, but more accurate, use something like Priest's
   doubly compensated sums.
 */
pkglocal const kahan_sum32 = {l; -> kahan_sum_gen(l)}
pkglocal const kahan_sum64 = {l; -> kahan_sum_gen(l)}

generic kahan_sum_gen = {l : @f[:] :: numeric,floating @f
	if l.len == 0
		-> (0.0 : @f)
	;;

	var s = (0.0 : @f)
	var c = (0.0 : @f)
	var y = (0.0 : @f)
	var t = (0.0 : @f)

	for x : l
		y = x - c
		t = s + y
		c = (t - s) - y
		s = t
	;;

	-> s
}

/*
   Priest's doubly compensated summation. Extremely accurate, but
   relatively slow. For situations in which cancellation is not
   expected, something like Kahan's compensated summation may be
   more useful.
 */
pkglocal const priest_sum32 = {l : flt32[:]
	var l2 = std.sldup(l)
	std.sort(l2, mag_cmp32)
	auto (l2 : doomed_flt32_arr)
	var s, c
	(s, c) = double_compensated_sum(l2)
	-> s
}

const mag_cmp32 = {f : flt32, g : flt32
	var u = std.flt32bits(f) & ~(1 << 31)
	var v = std.flt32bits(g) & ~(1 << 31)
	-> std.numcmp(v, u)
}

pkglocal const priest_sum64 = {l : flt64[:]
	var l2 = std.sldup(l)
	std.sort(l, mag_cmp64)
	auto (l2 : doomed_flt64_arr)
	var s, c
	(s, c) = double_compensated_sum(l2)
	-> s
}

const mag_cmp64 = {f : flt64, g : flt64
	var u = std.flt64bits(f) & ~(1 << 63)
	var v = std.flt64bits(g) & ~(1 << 63)
	-> std.numcmp(v, u)
}

generic double_compensated_sum = {l : @f[:] :: numeric,floating @f
	/* l should be sorted in descending order */
	if l.len == 0
		-> ((0.0 : @f), (0.0 : @f))
	;;

	var s = (0.0 : @f)
	var c = (0.0 : @f)

	for x : l
		var y = c + x
		var u = x - (y - c)
		var t = (y + s)
		var v = (y - (t - s))
		var z = u + v
		s = t + z
		c = z - (s - t)
	;;

	-> (s, c)
}