1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
pkg std =
const flt64bits : (flt : flt64 -> uint64)
const flt32bits : (flt : flt32 -> uint32)
const flt64inf : (-> flt64)
const flt64nan : (-> flt64)
const flt32inf : (-> flt32)
const flt32nan : (-> flt32)
generic isnan : (f : @a -> bool) ::floating @a
const flt64frombits : (bits : uint64 -> flt64)
const flt32frombits : (bits : uint32 -> flt32)
const flt64explode : (flt : flt64 -> (bool, uint64, int64))
const flt32explode : (flt : flt32 -> (bool, uint32, int32))
const flt64assem : (sign : bool, mant : uint64, exp : int64 -> flt64)
const flt32assem : (sign : bool, mant : uint32, exp : int32 -> flt32)
;;
const flt64bits = {flt; -> (&flt : uint64#)#}
const flt32bits = {flt; -> (&flt : uint32#)#}
const flt64frombits = {bits; -> (&bits : flt64#)#}
const flt32frombits = {bits; -> (&bits : flt32#)#}
const Dblbias = 1023
const Fltbias = 127
const flt64explode = {flt
var bits, isneg, mant, uexp, exp
bits = flt64bits(flt)
isneg = (bits >> 63) != 0 /* msb is sign bit */
uexp = (bits >> 52) & 0x7ff /* exp is in bits [52..63] */
mant = bits & ((1ul << 52) - 1) /* mant is in bits [..51] */
/* add back the implicit bit if this is not a denormal */
if uexp != 0
mant |= 1ul << 52
;;
/* adjust for exponent bias */
exp = (uexp : int64) - Dblbias
-> (isneg, mant, exp)
}
const flt32explode = {flt
var bits, isneg, mant, uexp, exp
bits = flt32bits(flt)
isneg = (bits >> 31) != 0 /* msb is sign bit */
uexp = (bits >> 23) & 0xff /* exp is in bits [23..30] */
mant = bits & ((1 << 23) - 1) /* mant is in bits [0..22] */
/* add back the implicit bit if this is not a denormal */
if uexp != 0
mant |= 1 << 23
;;
/* adjust for exponent bias */
exp = (uexp : int32) - Fltbias
-> (isneg, mant, exp)
}
const flt64assem = {sign, mant, exp
var s, m, e
s = (sign : uint64)
e = (exp + Dblbias : uint64) & 0x7ff
m = (mant : uint64) & ((1ul<<52) - 1)
-> std.flt64frombits((s << 63) | (e << 52) | m)
}
const flt32assem = {sign, mant, exp
var s, m, e
s = (sign : uint32)
e = (exp + Fltbias : uint32) & 0xff
m = (mant : uint32) & ((1<<23) - 1)
-> std.flt32frombits(s << 31 | e << 23 | m)
}
generic isnan = {f
var b
b = flt64bits((f : flt64))
-> (b >> 52) & 0x7fful == 0x7fful && \
b & ~(0xffful << 52) != 0
}
const flt64inf = {
-> std.flt64frombits(0x7ff0000000000000ul)
}
const flt64nan = {
-> std.flt64frombits(0x7ff8000000000000ul)
}
const flt32inf = {
-> std.flt32frombits(0x7f800000)
}
const flt32nan = {
-> std.flt32frombits(0x7fc00000)
}
|