summaryrefslogtreecommitdiff
path: root/lib/math/sin-impl.myr
blob: 5a321a4f96e6087db7101acbc68e2cc49170bb1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
use std

use "fpmath"
use "sum-impl" /* we use generics from here */
use "util"

/*
   This implementation of sin and cos uses the "Highly Accurate
   Tables" method of [GB91]. It's not as fast as it could be (the
   sorting and excessive summation really takes a toll), however
   we seem to be consistently within +/-1 ulp of the correct result
   in flt64 case. In flt32 case we are correctly rounded.

   The idea is to tabulate [0, pi/4] at 256 slightly irregular
   intervals xi, with the lucky property that the infinite binary
   expansions of significands of sin(xi) and cos(xi) look like

      / 53 bits \  / many '0's \  / noise \
     1.bbbbbb...bb00000000000...0???????????...

   This allows us to use storage only for a single floating point
   number, but get more than 53 bits of precision.

   Using that, we express x as (N * pi/4) + (xi) + (h), where h is
   tiny. Using identities

      sin(u+v) = sin(u)cos(v) + cos(u)sin(v),
      cos(u+v) = cos(u)cos(v) - sin(u)sin(v),

   we arrive at a sum where every term is known with greater than
   53 bits of precision except for sin(h) and cos(h), which we can
   approximate well.

   As a note, everything is performed in double-precision. Storing
   a second set of tables for single precision constants would
   occupy another 3KiB of memory for a not-very-significant speed
   gain.

   See files pi-constants.c, generate-triples-for-GB91.c, and
   generate-minimax-by-Remez.gp for where the constants come from.

   Note that in a departure from [GB91], we use the smaller degree
   polynomials over a smaller range for the input. This requires
   that we store more C-values for smaller xi values, but makes
   the implementation a bit simpler to read.
 */
pkg math =
	pkglocal const sin32 : (x : flt32 -> flt32)
	pkglocal const sin64 : (x : flt64 -> flt64)

	pkglocal const cos32 : (x : flt32 -> flt32)
	pkglocal const cos64 : (x : flt64 -> flt64)

	pkglocal const sincos32 : (x : flt32 -> (flt32, flt32))
	pkglocal const sincos64 : (x : flt64 -> (flt64, flt64))

	pkglocal const trig_reduce : (x : flt64 -> (int64, flt64, flt64))
	pkglocal const trig_table_approx : (x1 : flt64, C : (uint64, uint64, uint64)[257] -> (uint64, uint64, uint64))

	pkglocal const pi_over_2 : uint64[4]
;;

/* Pi/2 in lots of detail, for range reducing sin(2^18) or so */
const pi_over_2 = [
	0x3ff921fb54442d18,
	0x3c91a62633145c07,
	0xb91f1976b7ed8fbc,
	0x35b4cf98e804177d,
]

/* Pi/4 in lots of detail */
const pi_over_4 : uint64[4] = [
	0x3fe921fb54442d18,
	0x3c81a62633145c07,
	0xb90f1976b7ed8fbc,
	0x35a4cf98e804177d,
]

/* Pre-computed inverses */
const two_over_pi : uint64 = 0x3fe45f306dc9c883 /* 1/(pi/2) */
const oneohtwofour_over_pi : uint64 = 0x40745f306dc9c883 /* 1/(pi/(4 * 256)) */

/*
   Coefficients for minimax, degree 7 polynomials approximating sin
   and cos on [-Pi/1024, Pi/1024] (generated by a Remez algorithm).
 */
const sin_coeff : uint64[8] = [
	0x0000000000000000,
	0x3ff0000000000000,
	0xb8c7400000000000,
	0xbfc5555555555555,
	0x39d0000000000000,
	0x3f81111111111061,
	0xbacc000000000000,
	0xbf2a019fa7ee0417,
]

const cos_coeff : uint64[8] = [
	0x3ff0000000000000,
	0x38c0800000000000,
	0xbfe0000000000000,
	0x39a0000000000000,
	0x3fa55555555553ee,
	0x0000000000000000,
	0xbf56c16b9bfd9fd6,
	0xbbec000000000000,
]

/*
   The Highly Accurate Tables for use in a [GB91]-type algorithm;
   generated by ancillary/generate-triples-for-GB91.c.
 */
const C : (uint64, uint64, uint64)[257] = [
	/*       xi               cos(xi)            sin(xi)      */
	(0x0000000000000000, 0x3ff0000000000000, 0x0000000000000000),
	(0x3f6921fb42e71072, 0x3feffff621622aa5, 0x3f6921f8ad6f8d6f),
	(0x3f7921fb576a8e70, 0x3fefffd8858e80ad, 0x3f7921f1018d5de6),
	(0x3f82d97c6d961293, 0x3fefffa72c98324f, 0x3f82d96afcb3b8a8),
	(0x3f8921fba95a4ba5, 0x3fefff62169765a8, 0x3f8921d251f34230),
	(0x3f8f6a79afcf7cc4, 0x3fefff0943ccb09c, 0x3f8f6a28f137852f),
	(0x3f92d97d6168427b, 0x3feffe9cb42a0249, 0x3f92d9379e0e6011),
	(0x3f95fdbbdcaafdf1, 0x3feffe1c68730a0e, 0x3f95fd4d14eace13),
	(0x3f9921fb47134298, 0x3feffd886087640a, 0x3f992155ea73805c),
	(0x3f9c463ab6204953, 0x3feffce09ce490e2, 0x3f9c454f4439aa60),
	(0x3f9f6a7a230622cf, 0x3feffc251df3604f, 0x3f9f69372b837c03),
	(0x3fa1475c20c40678, 0x3feffb55e4815141, 0x3fa1468532309197),
	(0x3fa2d97c761e7dc7, 0x3feffa72f0045061, 0x3fa2d8656c814503),
	(0x3fa46b9c4167ee58, 0x3feff97c42007eca, 0x3fa46a397ce648ce),
	(0x3fa5fdbbb3fec154, 0x3feff871db006d07, 0x3fa5fc009ce09729),
	(0x3fa78fdba698a573, 0x3feff753bb15f9f8, 0x3fa78dbaad1df29e),
	(0x3fa921fb569e20a0, 0x3feff621e37794e9, 0x3fa91f65f36711fd),
	(0x3faab41b20e054f3, 0x3feff4dc549e4649, 0x3faab101d4af47e4),
	(0x3fac463aa9c96aef, 0x3feff3830f9fe5ee, 0x3fac428cfdc5c35c),
	(0x3fadd85a67cbe296, 0x3feff21614eca1d2, 0x3fadd406ed40d193),
	(0x3faf6a7a3eff7872, 0x3feff09565793013, 0x3faf656e8f97f0f9),
	(0x3fb07e4ceacc9a97, 0x3fefef01028ba740, 0x3fb07b6149c87151),
	(0x3fb1475c76962b85, 0x3fefed58ed6a54be, 0x3fb14400e1aeeb44),
	(0x3fb2106c98abe6cd, 0x3fefeb9d254b1740, 0x3fb20c96690fbe9f),
	(0x3fb2d97c6bdba26e, 0x3fefe9cdad2f1061, 0x3fb2d5207f840524),
	(0x3fb3a28c18e279a9, 0x3fefe7ea85e76da6, 0x3fb39d9ed203bab6),
	(0x3fb46b9c42a3d8e4, 0x3fefe5f3af0a1548, 0x3fb4661187480b43),
	(0x3fb534abcd8d6c89, 0x3fefe3e92c9764ff, 0x3fb52e7708fabcdf),
	(0x3fb5fdbbf6d380b0, 0x3fefe1cafc99687d, 0x3fb5f6d017a62147),
	(0x3fb6c6cbcc2f7248, 0x3fefdf9922e0f7ad, 0x3fb6bf1b46436fc7),
	(0x3fb78fdb8cff3236, 0x3fefdd53a026e6fa, 0x3fb78758587024ce),
	(0x3fb858eb79d953d8, 0x3fefdafa7513ab8e, 0x3fb84f8712f838d4),
	(0x3fb921fb5df13728, 0x3fefd88da3b2cbcb, 0x3fb917a6c5cad0c3),
	(0x3fb9eb0b15ad6e7f, 0x3fefd60d2df8efac, 0x3fb9dfb6d20cd89d),
	(0x3fbab41b0d8e04c2, 0x3fefd3791414a510, 0x3fbaa7b72849583a),
	(0x3fbb7d2b02ec004d, 0x3fefd0d1586ee673, 0x3fbb6fa70acd0c55),
	(0x3fbc463a9c1cda98, 0x3fefce15fde7feac, 0x3fbc3785a525059d),
	(0x3fbd0f4a92881eaf, 0x3fefcb4703aa4711, 0x3fbcff5334552718),
	(0x3fbdd85a7ea6b644, 0x3fefc8646cd750e6, 0x3fbdc70ed631fba7),
	(0x3fbea16a4da0e792, 0x3fefc56e3b81b243, 0x3fbe8eb7fcd470b2),
	(0x3fbf6a7a2082960a, 0x3fefc26471042f37, 0x3fbf564e4de7cd23),
	(0x3fc019c4f4362172, 0x3fefbf470f79208c, 0x3fc00ee89fc60798),
	(0x3fc07e4cb4549583, 0x3fefbc1619c9367a, 0x3fc072a00d3f81a5),
	(0x3fc0e2d4df1f9ef0, 0x3fefb8d18d51928e, 0x3fc0d64dbf2ea47f),
	(0x3fc1475c06879885, 0x3fefb5797828e1da, 0x3fc139f00d3ac360),
	(0x3fc1abe531137149, 0x3fefb20dc250d36d, 0x3fc19d89b968e759),
	(0x3fc2106c256f2705, 0x3fefae8e92bf458c, 0x3fc20116570e32c3),
	(0x3fc274f5eafb17bd, 0x3fefaafbbea74df5, 0x3fc2649aa381628e),
	(0x3fc2d97c80aa0f5b, 0x3fefa7557efae43c, 0x3fc2c81070013fe8),
	(0x3fc33e0462663cd9, 0x3fefa39bacdb1026, 0x3fc32b7bef4456d4),
	(0x3fc3a28c534e14b9, 0x3fef9fce55ed894d, 0x3fc38edbaa59fe3f),
	(0x3fc40714a18f49e2, 0x3fef9bed79763508, 0x3fc3f22fb1298233),
	(0x3fc46b9c38ac6ee7, 0x3fef97f9249e436c, 0x3fc45576b5509b55),
	(0x3fc4d02432e80a59, 0x3fef93f14ed442ec, 0x3fc4b8b1905fbd16),
	(0x3fc534ac0298d5e2, 0x3fef8fd600323753, 0x3fc51bdf7942e899),
	(0x3fc5993416dbe628, 0x3fef8ba736af1693, 0x3fc57f00a065f728),
	(0x3fc5fdbbf1d87ff8, 0x3fef8764fa1887ba, 0x3fc5e2144c8984d0),
	(0x3fc66243dc0ae9e8, 0x3fef830f4a092d65, 0x3fc6451a8808363c),
	(0x3fc6c6cbc286e3f6, 0x3fef7ea629fb13d9, 0x3fc6a8130326d53f),
	(0x3fc72b53b7c96348, 0x3fef7a299bd3df70, 0x3fc70afd9309c784),
	(0x3fc78fdba21869ed, 0x3fef7599a37cdcb3, 0x3fc76dd9e15bdb4a),
	(0x3fc7f4642a07f677, 0x3fef70f63bf58052, 0x3fc7d0a856c8ccd4),
	(0x3fc858eb5b649af0, 0x3fef6c3f7f63a632, 0x3fc83366caea942f),
	(0x3fc8bd736951ad00, 0x3fef6775566b1c57, 0x3fc896172a175d2d),
	(0x3fc921fb399259ea, 0x3fef6297d144aa53, 0x3fc8f8b8223b223a),
	(0x3fc986835ce1c644, 0x3fef5da6ebe94da7, 0x3fc95b4a04783401),
	(0x3fc9eb0b24569abc, 0x3fef58a2b2008d0c, 0x3fc9bdcbe883cc5f),
	(0x3fca4f9323461762, 0x3fef538b1f52fbe6, 0x3fca203e2200e122),
	(0x3fcab41b09c5898b, 0x3fef4e603b080549, 0x3fca82a025ebcacb),
	(0x3fcb18a2e6d4292f, 0x3fef492207941b17, 0x3fcae4f1c649efbf),
	(0x3fcb7d2ae7b9bf9f, 0x3fef43d085cf0736, 0x3fcb4732f2b7a6f7),
	(0x3fcbe1b2cb7b8481, 0x3fef3e6bbc6eba27, 0x3fcba9632f158ca6),
	(0x3fcc463ab6f78496, 0x3fef38f3acd2bb22, 0x3fcc0b8262d9d9f9),
	(0x3fccaac29faeedb5, 0x3fef33685aeb67ba, 0x3fcc6d90473e1ca0),
	(0x3fcd0f4ab0d5a9e9, 0x3fef2dc9c7b77e6a, 0x3fcccf8cc9dfcdca),
	(0x3fcd73d299801a67, 0x3fef2817fb345701, 0x3fcd31775f6e7021),
	(0x3fcdd85a5f03c93d, 0x3fef2252f8ad886d, 0x3fcd934fd0c9eb90),
	(0x3fce3ce2615c01b0, 0x3fef1c7abe28a12c, 0x3fcdf5163efb2fd2),
	(0x3fcea16a34d49aca, 0x3fef168f557f8c38, 0x3fce56ca04ee54fd),
	(0x3fcf05f2396af361, 0x3fef1090bcb17909, 0x3fceb86b43a82046),
	(0x3fcf6a7a34c8a42b, 0x3fef0a7efae01f19, 0x3fcf19f9863cf10b),
	(0x3fcfcf02154123ee, 0x3fef045a14e56969, 0x3fcf7b747f630b74),
	(0x3fd019c51e84ab67, 0x3feefe22087e60d4, 0x3fcfdcdc522606ad),
	(0x3fd04c08e989dfc7, 0x3feef7d6e70399a4, 0x3fd01f17f81c5502),
	(0x3fd07e4ceb775576, 0x3feef178a4618400, 0x3fd04fb80a82fe62),
	(0x3fd0b090f60ca615, 0x3feeeb074a3d9810, 0x3fd0804e15777f94),
	(0x3fd0e2d4c54314ef, 0x3feee482e5663962, 0x3fd0b0d9b95007d9),
	(0x3fd11518d2106a76, 0x3feeddeb6a30657d, 0x3fd0e15b4cec5711),
	(0x3fd1475cc633879f, 0x3feed740e7e7b002, 0x3fd111d25f193a47),
	(0x3fd179a0ce17673e, 0x3feed0835cc79369, 0x3fd1423efcc68ac8),
	(0x3fd1abe4bace84c5, 0x3feec9b2d347a043, 0x3fd172a0dae268af),
	(0x3fd1de28a262f5c2, 0x3feec2cf4cb15d4b, 0x3fd1a2f7f0d5a412),
	(0x3fd2106cb1425d8c, 0x3feebbd8c71a89f1, 0x3fd1d3444b7da503),
	(0x3fd242b0905f83c0, 0x3feeb4cf52e27d33, 0x3fd20385796d7260),
	(0x3fd274f4947838ba, 0x3feeadb2e8897f5d, 0x3fd233bbae3e8ad0),
	(0x3fd2a7386b632300, 0x3feea6839816a5b1, 0x3fd263e67d68a111),
	(0x3fd2d97c9e0be433, 0x3fee9f41524c0663, 0x3fd294064c5a5940),
	(0x3fd30bc07884186a, 0x3fee97ec359da93b, 0x3fd2c41a511fcae1),
	(0x3fd33e046121eaf8, 0x3fee908437cd8074, 0x3fd2f422d00c7726),
	(0x3fd3704855d0fd6b, 0x3fee89095dacc360, 0x3fd3241fa9798465),
	(0x3fd3a28c5798cbdd, 0x3fee817baba342be, 0x3fd35410c0bfc504),
	(0x3fd3d4d04553249a, 0x3fee79db2b58ee3d, 0x3fd383f5d8b1345d),
	(0x3fd407145ea979a2, 0x3fee7227d7cc183e, 0x3fd3b3cf1062e88d),
	(0x3fd43958386293b4, 0x3fee6a61c6350b11, 0x3fd3e39be4473723),
	(0x3fd46b9c38cc0bc5, 0x3fee6288eb9aff42, 0x3fd4135c98341878),
	(0x3fd49de029090fc7, 0x3fee5a9d555912a2, 0x3fd44310da8ddcf9),
	(0x3fd4d024217e697d, 0x3fee529f047e7434, 0x3fd472b8a510e608),
	(0x3fd50267f89f138c, 0x3fee4a8e04a2f573, 0x3fd4a253b2fc94cc),
	(0x3fd534ac160681bb, 0x3fee426a4a0b5efa, 0x3fd4d1e249057326),
	(0x3fd566eff496a5e0, 0x3fee3a33ef471d66, 0x3fd50163cbe183bf),
	(0x3fd59933ebfe75ec, 0x3fee31eaeb28cfda, 0x3fd530d871303f3b),
	(0x3fd5cb77fe42d304, 0x3fee298f425adc95, 0x3fd560401d7fa792),
	(0x3fd5fdbbd011e31d, 0x3fee212109492bba, 0x3fd58f9a5d81663b),
	(0x3fd62ffff34dbbed, 0x3fee18a02ca5e0f9, 0x3fd5bee79d6734e6),
	(0x3fd66243bdc0c9c2, 0x3fee100cce7f06b6, 0x3fd5ee271fdac6fd),
	(0x3fd69487ca38ebb6, 0x3fee0766d9c23a82, 0x3fd61d595943641b),
	(0x3fd6c6cbc58a707e, 0x3fedfeae61f95db0, 0x3fd64c7dde58f888),
	(0x3fd6f90fbf3b4b9a, 0x3fedf5e369de7ac8, 0x3fd67b94a09dba36),
	(0x3fd72b53a84c8c13, 0x3feded05f987af46, 0x3fd6aa9d7500e60d),
	(0x3fd75d97a7d7826a, 0x3fede4160f84614d, 0x3fd6d99863129ab7),
	(0x3fd78fdba723c71a, 0x3feddb13b555c5d8, 0x3fd7088538928031),
	(0x3fd7c21faf2481ab, 0x3fedd1feeeeb3f77, 0x3fd73763e0e5bb7e),
	(0x3fd7f46380443ef6, 0x3fedc8d7cd74eb35, 0x3fd7663403ecedb8),
	(0x3fd826a78fee86a2, 0x3fedbf9e41325ad2, 0x3fd794f5f21f812c),
	(0x3fd858eb6fc6c00e, 0x3fedb652640d194e, 0x3fd7c3a927f6e7be),
	(0x3fd88b2f6091a911, 0x3fedacf42fd7881c, 0x3fd7f24dc4deb30b),
	(0x3fd8bd73747fa82a, 0x3feda383a6d8b415, 0x3fd820e3bcdb4516),
	(0x3fd8efb76ad3bcc6, 0x3fed9a00db088521, 0x3fd84f6ab72e0455),
	(0x3fd921fb52f99571, 0x3fed906bcf71ced7, 0x3fd87de2a57d3bef),
	(0x3fd9543f4c295528, 0x3fed86c484089c2c, 0x3fd8ac4b87fa1376),
	(0x3fd98683387c2ef2, 0x3fed7d0b047d2a2d, 0x3fd8daa526666236),
	(0x3fd9b8c74c734cd7, 0x3fed733f4c983086, 0x3fd908ef9488c638),
	(0x3fd9eb0b316e257b, 0x3fed6961734a4759, 0x3fd9372a66100418),
	(0x3fda1d4f1c5b13c6, 0x3fed5f71745bef9a, 0x3fd96555af393979),
	(0x3fda4f93134bbffd, 0x3fed556f54b18a49, 0x3fd99371591464cc),
	(0x3fda81d7079868bb, 0x3fed4b5b1d5d78c0, 0x3fd9c17d39ba9f53),
	(0x3fdab41b223bd70a, 0x3fed4134cc4c88b2, 0x3fd9ef795a3dd82c),
	(0x3fdae65f0bd2deeb, 0x3fed36fc796c6d1f, 0x3fda1d654e53c0c4),
	(0x3fdb18a2faa54a8e, 0x3fed2cb220194f43, 0x3fda4b412b549e3d),
	(0x3fdb4ae6e27ee94a, 0x3fed2255c92cb15e, 0x3fda790cc9d54d8e),
	(0x3fdb7d2afabcbd96, 0x3fed17e76f8b1bff, 0x3fdaa6c83ff29436),
	(0x3fdbaf6edd63f9e6, 0x3fed0d672ed022eb, 0x3fdad47314b13a5e),
	(0x3fdbe1b2ee09849b, 0x3fed02d4f8ac5a8e, 0x3fdb020d86625c4a),
	(0x3fdc13f6d9f827b3, 0x3fecf830e504ee27, 0x3fdb2f972dd6dd44),
	(0x3fdc463ac85c4fc4, 0x3feced7af23188c8, 0x3fdb5d101285fa87),
	(0x3fdc787e99315d1d, 0x3fece2b32dae84df, 0x3fdb8a77fb79b6c9),
	(0x3fdcaac2c391b562, 0x3fecd7d984771033, 0x3fdbb7cf38286324),
	(0x3fdcdd06bf1547a8, 0x3fecccee1a978b83, 0x3fdbe515317a2767),
	(0x3fdd0f4a93c25d1a, 0x3fecc1f0f53b85ed, 0x3fdc1249d2e7d3d7),
	(0x3fdd418e7cb2c1ac, 0x3fecb6e20e487888, 0x3fdc3f6d35bf5483),
	(0x3fdd73d292000a16, 0x3fecabc16721278b, 0x3fdc6c7f53ab8367),
	(0x3fdda616817e6870, 0x3feca08f18d00ef9, 0x3fdc997fc72e0f8c),
	(0x3fddd85a5a1d76bb, 0x3fec954b270990c5, 0x3fdcc66e8203a2b0),
	(0x3fde0a9e6cc874c6, 0x3fec89f5868b6c72, 0x3fdcf34bb0b7c500),
	(0x3fde3ce26dc62595, 0x3fec7e8e4f51729a, 0x3fdd2016f3f2781b),
	(0x3fde6f264279a0bc, 0x3fec73158e8e71da, 0x3fdd4cd0187c011d),
	(0x3fdea16a5680fb27, 0x3fec678b32bc65ff, 0x3fdd797762744bde),
	(0x3fded3ae3d42764c, 0x3fec5bef5bdf2d0e, 0x3fdda60c55ccc8d6),
	(0x3fdf05f24a4f648b, 0x3fec5041fdd6d9d1, 0x3fddd28f21277b30),
	(0x3fdf383651e91662, 0x3fec448329efd6a1, 0x3fddfeff8240fbd5),
	(0x3fdf6a7a42ac94c9, 0x3fec38b2eb87ae38, 0x3fde2b5d4e604dad),
	(0x3fdf9cbe1d3b3429, 0x3fec2cd149d960c0, 0x3fde57a86acebf99),
	(0x3fdfcf020d397ddb, 0x3fec20de41e8a738, 0x3fde83e0e2af7102),
	(0x3fe000a313205931, 0x3fec14d9d64b5fbe, 0x3fdeb006abd63bf5),
	(0x3fe019c4f9037b82, 0x3fec08c42ac58524, 0x3fdedc194338385b),
	(0x3fe032e70860dbf0, 0x3febfc9d20441910, 0x3fdf08191a1b3b78),
	(0x3fe04c0906e9f028, 0x3febf064da5e176a, 0x3fdf3405af2bebc0),
	(0x3fe0652b03fdfd24, 0x3febe41b5936a8f3, 0x3fdf5fdf024485e7),
	(0x3fe07e4d0b91a0db, 0x3febd7c09e80889b, 0x3fdf8ba50d2a0c48),
	(0x3fe0976ef1f9c5ae, 0x3febcb54c769186a, 0x3fdfb75768e7fa9b),
	(0x3fe0b090e7d18f51, 0x3febbed7c3a63454, 0x3fdfe2f64f20fede),
	(0x3fe0c9b2e9a39b2d, 0x3febb2499c87d6e9, 0x3fe00740cf66097a),
	(0x3fe0e2d4ddcaea14, 0x3feba5aa669df22e, 0x3fe01cfc88506502),
	(0x3fe0fbf6e1078c8e, 0x3feb98fa1b3ff547, 0x3fe032ae5dc3499b),
	(0x3fe11518d98a4a52, 0x3feb8c38cf44e150, 0x3fe048562c12ec1d),
	(0x3fe12e3add8e0dcc, 0x3feb7f667f41f3ed, 0x3fe05df3f90aa7ce),
	(0x3fe1475ce227685c, 0x3feb728338a5b7b8, 0x3fe07387ade96093),
	(0x3fe1607ec66ef221, 0x3feb658f1462d09f, 0x3fe0891121335eab),
	(0x3fe179a0be834cc7, 0x3feb5889ffa66df4, 0x3fe09e9072510f34),
	(0x3fe192c2b8568d3a, 0x3feb4b740bbd2fb5, 0x3fe0b405848141dd),
	(0x3fe1abe4b59244a1, 0x3feb3e4d3fd6bd5a, 0x3fe0c9704befbcad),
	(0x3fe1c506b2c3bd2e, 0x3feb3115a5da6c4f, 0x3fe0ded0b8742978),
	(0x3fe1de289173a977, 0x3feb23cd5613980e, 0x3fe0f426a30829d9),
	(0x3fe1f74a8f60e6e9, 0x3feb167438110e6e, 0x3fe1097232ed0851),
	(0x3fe2106ca03e5fd0, 0x3feb090a5a650d01, 0x3fe11eb350745afe),
	(0x3fe2298e9db917c4, 0x3feafb8fd9ca55cd, 0x3fe133e9ce192ca0),
	(0x3fe242b08f9ff90f, 0x3feaee04ba8026f6, 0x3fe14915a5706056),
	(0x3fe25bd28f640cf6, 0x3feae068f72931d9, 0x3fe15e36ded7bb02),
	(0x3fe274f47c0699ab, 0x3fead2bcaa0d2e11, 0x3fe1734d518c9922),
	(0x3fe28e1685bcfd71, 0x3feac4ffc15749f9, 0x3fe1885918f9ac8e),
	(0x3fe2a736fe1e1494, 0x3feab7333233d9b8, 0x3fe19d58c0a87f59),
	(0x3fe2c05a9f4eb35a, 0x3feaa9546b006d3c, 0x3fe1b2502def806a),
	(0x3fe2d97c6af42411, 0x3fea9b66344e259f, 0x3fe1c73b28d8e8d8),
	(0x3fe2f29e6e1ad75e, 0x3fea8d6775437b8d, 0x3fe1dc1b5a8d2e11),
	(0x3fe30bc06e6776fd, 0x3fea7f5856cdc155, 0x3fe1f0f0859072c6),
	(0x3fe324e1d749c5d3, 0x3fea7139354a6417, 0x3fe205ba2249d8d8),
	(0x3fe33e045ca06605, 0x3fea63092400435f, 0x3fe21a798c19a7e7),
	(0x3fe357267800e833, 0x3fea54c9076262f5, 0x3fe22f2d7377bfc7),
	(0x3fe370485e5be5d6, 0x3fea4678cad14810, 0x3fe243d5f7a46407),
	(0x3fe3896a4d91d9d2, 0x3fea3818540e8f5e, 0x3fe258733edb7371),
	(0x3fe3a28c599d4a38, 0x3fea29a7a0666245, 0x3fe26d054caf4fb0),
	(0x3fe3bbae6b7bfd61, 0x3fea1b26c5d7e6f3, 0x3fe2818c01832d38),
	(0x3fe3d4d03ed37a30, 0x3fea0c95f4fd994c, 0x3fe29607190147e2),
	(0x3fe3edf268105227, 0x3fe9fdf4e0b7d0c8, 0x3fe2aa76ff6bb39c),
	(0x3fe4071445cf2ff6, 0x3fe9ef43eff2b264, 0x3fe2bedb24e4da22),
	(0x3fe420363dd59357, 0x3fe9e082f06f9ebe, 0x3fe2d333cf8d19db),
	(0x3fe4395843ba2fd5, 0x3fe9d1b1f26b4f3c, 0x3fe2e780e8af8d40),
	(0x3fe4527a412b9192, 0x3fe9c2d10c2c8325, 0x3fe2fbc251bb901b),
	(0x3fe46b9c27a00cd7, 0x3fe9b3e04f99cbd5, 0x3fe30ff7f2910508),
	(0x3fe484be309a8d17, 0x3fe9a4dfa3af458c, 0x3fe32421ecef726e),
	(0x3fe49de033475607, 0x3fe995cf29f2517a, 0x3fe33840139184b1),
	(0x3fe4b7021f53f06e, 0x3fe986aef5919af2, 0x3fe34c524d121d49),
	(0x3fe4d0240f052501, 0x3fe9777f00244ce3, 0x3fe36058a217b268),
	(0x3fe4e9463716a53c, 0x3fe9683f32f2ae77, 0x3fe3745330215f46),
	(0x3fe502681e9a6016, 0x3fe958efe0cb9eeb, 0x3fe388418ac15fff),
	(0x3fe51b8a16255181, 0x3fe94990e237aa2e, 0x3fe39c23e5b6b244),
	(0x3fe534ac09d21709, 0x3fe93a224cd1d5d1, 0x3fe3affa24f6eadd),
	(0x3fe54dcdf3b3627e, 0x3fe92aa42dcf60f8, 0x3fe3c3c437a1dace),
	(0x3fe566effef25aad, 0x3fe91b16740dcf8f, 0x3fe3d782336d7ad4),
	(0x3fe5801215d3e4d3, 0x3fe90b79366e5e53, 0x3fe3eb33faf99080),
	(0x3fe59933ff695620, 0x3fe8fbcca2107806, 0x3fe3fed9559bf7b0),
	(0x3fe5b255ee2800cd, 0x3fe8ec10a14c3bbf, 0x3fe412725ec52f70),
	(0x3fe5cb77fdabb08b, 0x3fe8dc452c6aa905, 0x3fe425ff1fc9c896),
	(0x3fe5e499ef7e111b, 0x3fe8cc6a746824f6, 0x3fe4397f5c023a11),
	(0x3fe5fdbbe1bc4e34, 0x3fe8bc807027ea41, 0x3fe44cf31eda7ea0),
	(0x3fe616ddfb59ae0d, 0x3fe8ac8710ace7bf, 0x3fe4605a7a5aaeff),
	(0x3fe62fffdd1c090a, 0x3fe89c7e9c66cac4, 0x3fe473b51912497a),
	(0x3fe64921cfa4dad5, 0x3fe88c66ef074f3a, 0x3fe48703271ce28c),
	(0x3fe66243d696fbd4, 0x3fe87c401005fc0b, 0x3fe49a449b40f2d3),
	(0x3fe67b65d9d2cf92, 0x3fe86c0a18cb21f8, 0x3fe4ad795715a3e0),
	(0x3fe69487ba5a4421, 0x3fe85bc52776b9fc, 0x3fe4c0a1373040c0),
	(0x3fe6ada9c7509d39, 0x3fe84b7112ce79ff, 0x3fe4d3bc6c09e271),
	(0x3fe6c6cbcacfb7d4, 0x3fe83b0e07c9e659, 0x3fe4e6cac0c53e06),
	(0x3fe6dfedb933bfad, 0x3fe82a9c1836e690, 0x3fe4f9cc20e5450e),
	(0x3fe6f90fb67efcd3, 0x3fe81a1b36b01830, 0x3fe50cc09bf06f81),
	(0x3fe71231b7343738, 0x3fe8098b74dea97c, 0x3fe51fa81d7d14ee),
	(0x3fe72b53a85aa5f7, 0x3fe7f8ece98512ef, 0x3fe532828ba63ede),
	(0x3fe74475af974694, 0x3fe7e83f85f965d1, 0x3fe5454ff702d29b),
	(0x3fe75d9799aac134, 0x3fe7d783768ce953, 0x3fe558102da86994),
	(0x3fe776b9903c81f2, 0x3fe7c6b8a9e499af, 0x3fe56ac34326d2d0),
	(0x3fe78fdba082343f, 0x3fe7b5df2167453d, 0x3fe57d6935ab2542),
	(0x3fe7a8fdabde74d9, 0x3fe7a4f6fbedea5e, 0x3fe59001e2ecf285),
	(0x3fe7c21f8108af2e, 0x3fe794006540f1ec, 0x3fe5a28d1b2b3ab1),
	(0x3fe7db4178eb2ca1, 0x3fe782fb2be4619a, 0x3fe5b50b14a054d2),
	(0x3fe7f4637bfce457, 0x3fe771e76a206b4b, 0x3fe5c77bb26e7244),
	(0x3fe80d856a5e5235, 0x3fe760c5402e27c0, 0x3fe5d9ded1dab0d8),
	(0x3fe826a77aed6885, 0x3fe74f94932c2aa4, 0x3fe5ec348fa6b9f0),
	(0x3fe83fc98cbf9b15, 0x3fe73e55841a0699, 0x3fe5fe7cc8635d47),
	(0x3fe858eb8885faf1, 0x3fe72d082dab83d0, 0x3fe610b75fe5f52f),
	(0x3fe8720d606e4a1c, 0x3fe71baca44228b1, 0x3fe622e441189d58),
	(0x3fe88b2f5ac151d8, 0x3fe70a42c20978bf, 0x3fe63503939a8c88),
	(0x3fe8a4516df43f11, 0x3fe6f8ca982975b6, 0x3fe64715452c2eb4),
	(0x3fe8bd735c5fca22, 0x3fe6e7445c4d6347, 0x3fe659191e5ec1a0),
	(0x3fe8d69563ed711a, 0x3fe6d5afee23005e, 0x3fe66b0f407fe153),
	(0x3fe8efb7598d0461, 0x3fe6c40d769b68c4, 0x3fe67cf781aec585),
	(0x3fe908d9720f2113, 0x3fe6b25cdb7a4ca4, 0x3fe68ed1fc7319a2),
	(0x3fe921fb5b6a3d21, 0x3fe6a09e61712f13, 0x3fe6a09e6b8d4885),
]

/*
   The huge reduction tables. R[j] = (l1, l2, l3), such that l1+l2+l3
   is less than 2pi, and 2^(50j + 25) is approximately l1+l2+l3
   (mod 2pi).

   The stepping of 50 was chosen because it is a nice, round number,
   close to 52.  [GB91] make reference to a difficult-to-reduce
   number, Xhard, which is in the range (-2^27, 2^27). By using an
   offset of 25, we can ensure that huge_reduce returns results in
   about that range.  This allows us to reuse calculations in [GB91]
   showing we have probably stored enough bits of pi (and of these
   numbers).

   TODO: There is always the chance that, for some x = x1*2^j +
   x2*2^k, there is catastrophic cancellation which makes the
   remainder sum x1*r[50j+25] + x2*r[50k+25] imprecise. That needs
   to be checked; it shouldn't be too hard.
 */
const R : (uint64, uint64, uint64)[20] = [
	(0x4011fb222e13e839, 0xbcbeecfb7d19df11, 0x3955f9708d867b5b), /* 2^25 mod 2pi */
	(0x3ffee6639887604d, 0x3c88b437ad3f55e0, 0x39218522303457a8), /* 2^75 mod 2pi */
	(0x3fc589bfb31f1687, 0x3c31994c1edb7977, 0xb8dd15afd80892a0), /* 2^125 mod 2pi */
	(0x400663e27d2e0b47, 0xbca00a74acd21bf1, 0x39257592b34b8c25), /* 2^175 mod 2pi */
	(0x40177ff13d560e79, 0xbc8403f354e865f6, 0x38e6e69984c4338e), /* 2^225 mod 2pi */
	(0x4015a4a4671a3606, 0x3ca6fc3b1a2bddd5, 0xb9270923530d0279), /* 2^275 mod 2pi */
	(0x3ff3bf65f73a0474, 0x3c67deda97eb4131, 0x38fa32118f8f578f), /* 2^325 mod 2pi */
	(0x3ffa8e506685f311, 0x3c698a6391d9d31b, 0x38db58d212d28d0a), /* 2^375 mod 2pi */
	(0x400d38d7ecc58385, 0x3c98b076242f0ed3, 0x38e04e83f1274a16), /* 2^425 mod 2pi */
	(0x4017023cb671ed43, 0xbcbeb418af32f189, 0xb944d3aea8efaa5f), /* 2^475 mod 2pi */
	(0x401655d13f672fe9, 0x3ca8e56fc8ad533e, 0x393d464a045591bf), /* 2^525 mod 2pi */
	(0x3feec8dd916fa3b6, 0x3c82c834374ed2af, 0x39225586c285a0d8), /* 2^575 mod 2pi */
	(0x400ed2900b47ba63, 0x3c697a0d5776cc4d, 0x390c831068ee85b1), /* 2^625 mod 2pi */
	(0x4008563b8e99caac, 0xbc5ee03ee870ab9d, 0x38fb980dd9756063), /* 2^675 mod 2pi */
	(0x4012d961cf7cd57d, 0xbcbdde0a1d27628e, 0xb95a8840599f8246), /* 2^725 mod 2pi */
	(0x4016919aac4a18f4, 0xbc93552937a28b88, 0xb9145e70b28c0cbb), /* 2^775 mod 2pi */
	(0x400c37d195196372, 0xbca948065398479d, 0xb94cadd4f6e80c1b), /* 2^825 mod 2pi */
	(0x400d77a34a63ebae, 0x3ca8ca5d7ccc2874, 0xb91611b1e4b65369), /* 2^875 mod 2pi */
	(0x3ff4cb08f62cb38b, 0xbc90f42df8fc967a, 0xb8ed7d71202d2f45), /* 2^925 mod 2pi */
	(0x401848860f8742d6, 0x3c90337738c287b4, 0xb92f24fc614ec2f7), /* 2^975 mod 2pi */
]

const sin32 = {x : flt32
	var s, s2
	(s, s2, _, _) = w64((x : flt64), true, false)
	-> round_down(s, s2)
}

const sin64 = {x : flt64
	var s
	(s, _, _, _) = w64(x, true, false)
	-> s
}

const cos32 = {x : flt32
	var c, c2
	(_, _, c, c2) = w64((x : flt64), false, true)
	-> round_down(c, c2)
}

const cos64 = {x : flt64
	var c
	(_, _, c, _) = w64(x, false, true)
	-> c
}

const sincos32 = {x : flt32
	var s : flt64, s2 : flt64, c : flt64, c2 : flt64
	(s, s2, c, c2) = w64((x : flt64), true, true)
	-> (round_down(s, s2), round_down(c, c2))
}

const sincos64 = {x : flt64
	var s, c
	(s, _, c, _) = w64(x, true, true)
	-> (s, c)
}

/* Calculate sin and/or cos */
const w64 = {x : flt64, want_sin : bool, want_cos : bool
	var sin_ret : flt64 = 0.0
	var cos_ret : flt64 = 0.0
	var sin_ret2 : flt64 = 0.0
	var cos_ret2 : flt64 = 0.0

	var e : int64
	(_, e, _) = std.flt64explode(x)

	if e == 1024
		-> (std.flt64nan(), 0.0, std.flt64nan(), 0.0)
	;;

	var N : int64
	var x1 : flt64, x2 : flt64
	(N, x1, x2) = trig_reduce(x)

	/* Handle multiples of pi/2 */
	var swap_sin_cos : bool = false
	var then_negate_sin : bool = false
	var then_negate_cos : bool = false
	match N
	| 1:
		/* sin(x + pi/2) = cos(x), cos(x + pi/2) = -sin(x) */
		swap_sin_cos = true
		then_negate_cos = true
	| 2:
		/* sin(x + pi) = -sin(x), cos(x + pi) = -cos(x) */
		then_negate_cos = true
		then_negate_sin = true
	| 3:
		/* sin(x + 3pi/2) = -cos(x), cos(x + 3pi/2) = sin(x) */
		swap_sin_cos = true
		then_negate_sin = true
	| _:
	;;

	var first_negate_sin : bool = false
	if x1 < 0.0
		x1 = -x1
		x2 = -x2
		first_negate_sin = true
	;;

	/* Figure out where in the C table x lies */
	var xi : uint64, sin_xi : uint64, cos_xi : uint64, sin_delta, cos_delta
	(xi, cos_xi, sin_xi) = trig_table_approx(x1, C)

	/*
	   Compute x - xi with compensated summation. Because xi
	   and delta both lie in the same interval of width (pi/4)/256,
	   which is less than 1/256, we can use that |delta1| <
	   2^-8: we need a polynomial approximation of at least
	   degree 7.

	   This also gives that |delta2| < 2^(-60), vanishing quickly
	   in the polynomial approximations.
	 */
	var delta1, delta2, deltat
	(delta1, deltat) = slow2sum(-std.flt64frombits(xi), x1)
	(delta2, _) = slow2sum(deltat, x2)

	/*
	   sin(delta); the degree 2 coefficient is near 0, so delta_2
	   only shows up in deg 1
	 */
	sin_delta = horner_polyu(delta1, sin_coeff[:])
	sin_delta += delta2 * std.flt64frombits(sin_coeff[1])

	/*
	   cos(delta); delta_2 shows up in deg 1 and 2; the term
	   we care about is a1*d2 + 2*a2*d1*d2
	 */
	cos_delta = horner_polyu(delta1, cos_coeff[:])
	cos_delta += delta2 * fma(delta1, 2.0*std.flt64frombits(cos_coeff[2]), std.flt64frombits(cos_coeff[1]))

	var q : flt64[4]

	if (want_sin && !swap_sin_cos) || (want_cos && swap_sin_cos)
		(q[0], q[1]) = two_by_two64(std.flt64frombits(sin_xi), cos_delta)
		(q[2], q[3]) = two_by_two64(std.flt64frombits(cos_xi), sin_delta)
		std.sort(q[:], mag_cmp64)

		(sin_ret, sin_ret2) = double_compensated_sum(q[:])
	;;

	if (want_cos && !swap_sin_cos) || (want_sin && swap_sin_cos)
		(q[0], q[1]) = two_by_two64(std.flt64frombits(cos_xi), cos_delta)
		(q[2], q[3]) = two_by_two64(std.flt64frombits(sin_xi), sin_delta)
		q[2] = -q[2]
		q[3] = -q[3]

		/*
		   No need to sort; cos_xi and sin_xi are in [0,1],
		   cos_delta is close to 1, sin_delta is close to
		   0.
		*/
		std.swap(&q[1], &q[2])

		(cos_ret, cos_ret2) = double_compensated_sum(q[:])
	;;

	if first_negate_sin
		sin_ret = -sin_ret
		sin_ret2 = -sin_ret2
	;;

	if swap_sin_cos
		std.swap(&sin_ret, &cos_ret)
		std.swap(&sin_ret2, &cos_ret2)
	;;

	if then_negate_sin
		sin_ret = -sin_ret
		sin_ret2 = -sin_ret2
	;;

	if then_negate_cos
		cos_ret = -cos_ret
		cos_ret2 = -cos_ret2
	;;

	-> (sin_ret, sin_ret2, cos_ret, cos_ret2)
}

/* Reduce x to N*(pi/2) + x', with x' in [-pi/4, pi/4] */
const trig_reduce = {x : flt64
	var N : int64 = 0
	var Nf : flt64

	/*
	   We actually only care about N mod 4. If x is very large,
	   the ultimate N that we end up using might not be
	   representable (either as an int64 or flt64), so we instead
	   just keep track of the mod 4 part exactly.
	 */

	/*
	   If we want to store pi in a form to properly reduce
	   2^1000 or so, it turns out that there's some complication
	   with the final digits: they trail off beyond subnormality
	   and can't be represented, even though they are the digits
	   we need for the reduction. Therefore, for extremely high
	   values of x, we pre-compute the reduction and return it
	   here.

	   We get: x1 about in range 2^25, and that (x1 + x2 + x3)
	   approximates x (mod 2pi) very well. This is good enough
	   to ensure that N = x/pi is representable (int64 and
	   flt64).  We do need to worry about catastrophic cancellation,
	   however.
	 */
	var x1, x2, x3
	(x1, x2, x3) = huge_reduce_2pi(x)

	var pi_o_2_0 : flt64 = std.flt64frombits(pi_over_2[0])
	var pi_o_2_1 : flt64 = std.flt64frombits(pi_over_2[1])
	var pi_o_2_2 : flt64 = std.flt64frombits(pi_over_2[2])
	var pi_o_2_3 : flt64 = std.flt64frombits(pi_over_2[3])
	var pi_o_4_0 : flt64 = std.flt64frombits(pi_over_4[0])
	var pi_o_4_1 : flt64 = std.flt64frombits(pi_over_4[1])
	var pi_o_4_2 : flt64 = std.flt64frombits(pi_over_4[2])
	var pi_o_4_3 : flt64 = std.flt64frombits(pi_over_4[3])

	var total_N = 0
	var q : flt64[11]

	/* Compute initial reduction -- this might not be sufficient. */
	total_N = rn(x1 * std.flt64frombits(two_over_pi))
	Nf = (-total_N : flt64)
	(q[0], q[ 4], q[5]) = (x1, x2, x3)
	(q[1], q[ 3]) = two_by_two64(Nf, pi_o_2_0)
	(q[2], q[ 6]) = two_by_two64(Nf, pi_o_2_1)
	(q[7], q[ 8]) = two_by_two64(Nf, pi_o_2_2)
	(q[9], q[10]) = two_by_two64(Nf, pi_o_2_3)

	/*
	  Sorting is very slow, but it's only the top five or so
	  that are in question
	 */
	std.sort(q[0:5], mag_cmp64)
	(x1, x2) = double_compensated_sum(q[:])

	while !(le_22(x1, x2, pi_o_4_0, pi_o_4_1) && le_22(-pi_o_4_0, -pi_o_4_1, x1, x2))
		N = rn(x1 * std.flt64frombits(two_over_pi))
		Nf = (-N : flt64)

		/*
		   Sorting is slow. We know that x1 is roughly
		   cancelled by Nf * pi_o_2_0, so line those up.
		 */
		(q[0], q[2]) = (x1, x2)
		(q[1], q[3]) = two_by_two64(Nf, pi_o_2_0)
		(q[4], q[5]) = two_by_two64(Nf, pi_o_2_1)
		(q[6], q[7]) = two_by_two64(Nf, pi_o_2_2)
		(q[8], q[9]) = two_by_two64(Nf, pi_o_2_3)
		(x1, x2) = double_compensated_sum(q[0:10])
		total_N += (N % 4)
	;;

	-> (((total_N % 4) + 4) % 4, x1, x2)
}

const huge_reduce_2pi = {x : flt64
	var e : int64
	var b : uint64 = std.flt64bits(x)
	(_, e, _) = std.flt64explode(x)

	if e < 25
		-> (x, 0.0, 0.0)
	;;

	/*
	   Since the stepping of R is 50, and x has 53 significant
	   bits, we get a splitting of x into two components. We
	   want

	       x = [ xa * 2^(50j + 25) ] + [ xb * 2^(50(j-1) + 25) ]

	   and {ai}, {bi} such that

	      a1 + a2 + a3 === ( 2^(50j + 25) ) mod 2pi

	      b1 + b2 + b3 === ( 2^(50(j-1) + 25) ) mod 2pi

	   If j is small enough, we can slack off a bit. We really
	   just want xa(a1+a2+a3) + xb(b1+b2+b3) === x (mod 2pi)
	   with a heck of a lot of precision.
	 */
	var j : uint64 = (e - 25 : uint64) / 50
	var xa : flt64 = 0.0, xb : flt64 = 0.0, xc : flt64 = 0.0
	var a1 : flt64 = 0.0, a2 : flt64 = 0.0, a3 : flt64 = 0.0
	var b1 : flt64 = 0.0, b2 : flt64 = 0.0, b3 : flt64 = 0.0
	var c1 : flt64 = 0.0, c2 : flt64 = 0.0, c3 : flt64 = 0.0
	var u1 : uint64, u2 : uint64, u3 : uint64
	var xcur = x

	var e1 : int64 = 50*(j : int64) + 25
	var e2 : int64 = e1 - 50
	var e3 : int64 = e2 - 50
	xa = trunc(scale2(x,-e1))
	xb = trunc(scale2(x,-e2))
	xc = trunc(scale2(x,-e3))
	xc -= scale2(xb, 50)
	xb -= scale2(xa, 50)

	(u1, u2, u3) = R[j]
	a1 = std.flt64frombits(u1)
	a2 = std.flt64frombits(u2)
	a3 = std.flt64frombits(u3)

	if j == 0
		(b1, b2, b3) = (x - scale2(xa, e1), 0.0, 0.0)
		xb = 1.0
		(c1, c2, c3) = (0.0, 0.0, 0.0)
		xc = 0.0
	else
		(u1, u2, u3) = R[j - 1]
		b1 = std.flt64frombits(u1)
		b2 = std.flt64frombits(u2)
		b3 = std.flt64frombits(u3)

		if j == 1
			(c1, c2, c3) = (x - scale2(xa, e1) - scale2(xb, e2), 0.0, 0.0)
			xc = 1.0
		else
			(u1, u2, u3) = R[j - 2]
			c1 = std.flt64frombits(u1)
			c2 = std.flt64frombits(u2)
			c3 = std.flt64frombits(u3)
		;;
	;;

	/*
	   Now we need to combine all this. Even worse, when we
	   multiply the two together, we need to keep full precision
	   (at least for the high-ish bits), so we need some help.

	   TODO: The c-type calculations can probably be optimized,
	   since xc is so small.
	 */
	var q : flt64[18]
	(q[ 0], q[ 1]) = two_by_two64(xa, a1)
	(q[ 2], q[ 3]) = two_by_two64(xa, a2)
	(q[ 4], q[ 5]) = two_by_two64(xa, a3)
	(q[ 6], q[ 7]) = two_by_two64(xb, b1)
	(q[ 8], q[ 9]) = two_by_two64(xb, b2)
	(q[10], q[11]) = two_by_two64(xb, b3)
	(q[12], q[13]) = two_by_two64(xc, c1)
	(q[14], q[15]) = two_by_two64(xc, c2)
	(q[16], q[17]) = two_by_two64(xc, c3)

	-> triple_compensated_sum(q[:])
}

const trig_table_approx = {x1 : flt64, C : (uint64, uint64, uint64)[257]
	var j = (rn(x1 * std.flt64frombits(oneohtwofour_over_pi)) : uint64)
	var x1u = std.flt64bits(x1)
	var xi, sin_xi, cos_xi, test_cos_xi, test_sin_xi

	/* We either want j or j - 1. */
	j = std.max(std.min(j, 256), 0)

	(xi, cos_xi, sin_xi) = C[j]
	if j > 0
		var test_xi
		(test_xi, test_cos_xi, test_sin_xi) = C[j - 1]
		if std.abs(x1u - test_xi) < std.abs(x1u - xi)
			-> (test_xi, test_cos_xi, test_sin_xi)
		;;
	;;

	-> (xi, cos_xi, sin_xi)
}

/*
   Return true iff (a1 + a2) <= (b1 + b2)
 */
const le_22 = { a1, a2, b1, b2
	if a1 < b1
		-> true
	elif a1 > b1
		-> false
	;;

	if a2 > b2
		-> false
	;;

	-> true
}