-
Notifications
You must be signed in to change notification settings - Fork 54
/
dec64.s
1237 lines (910 loc) · 38.3 KB
/
dec64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
; dec64.s
; 2024-04-09
; Public Domain
; No warranty expressed or implied. Use at your own risk. You have been warned.
; This file contains the source assembly code for the ARM64 implementation of
; the DEC64 library, being the elementary arithmetic operations for DEC64, a
; decimal floating point type.
;
; This file has been tested with Visual Studio 19.
; DEC64 uses 64 bits to represent a number. The low order 8 bits contain an
; exponent that ranges from -127 thru 127. The exponent is not biased. The
; exponent -128 is reserved for nan (not a number). The remaining 56 bits,
; including the sign bit, are the coefficient in the range
; -36_028_797_018_963_968 thru 36_028_797_018_963_967. The exponent and the
; coefficient are both twos complement signed numbers.
;
; The value of any non-nan DEC64 number is coefficient * (10 ** exponent).
;
; Rounding is to the nearest value. Ties are rounded away from zero. Integer
; division is floored. The result of modulo has the sign of the divisor.
; There is no negative zero.
;
; All 72_057_594_037_927_936 values with an exponent of -128 are nan values.
; When these functions return nan, they always return DEC64_NULL,
; the normal nan value.
; These operations produce a result of DEC64_NULL:
;
; dec64_abs(nan)
; dec64_ceiling(nan)
; dec64_floor(nan)
; dec64_neg(nan)
; dec64_normal(nan)
; dec64_signum(nan)
;
; These operations produce a result of zero for all values of n,
; even if n is nan:
;
; dec64_divide(0, n)
; dec64_integer_divide(0, n)
; dec64_modulo(0, n)
; dec64_multiply(0, n)
; dec64_multiply(n, 0)
;
; These operations produce a result of DEC64_NULL for all values of n
; except zero:
;
; dec64_divide(n, 0)
; dec64_divide(n, nan)
; dec64_integer_divide(n, 0)
; dec64_integer_divide(n, nan)
; dec64_modulo(n, 0)
; dec64_modulo(n, nan)
; dec64_multiply(n, nan)
; dec64_multiply(nan, n)
;
; These operations produce a result of normal nan for all values of n:
;
; dec64_add(n, nan)
; dec64_add(nan, n)
; dec64_divide(nan, n)
; dec64_integer_divide(nan, n)
; dec64_modulo(nan, n)
; dec64_round(nan, n)
; dec64_subtract(n, nan)
; dec64_subtract(nan, n)
;
; You know what goes great with those defective pentium chips?
; Defective pentium salsa! (David Letterman)
; All public symbols have a dec64_ prefix. All other symbols are private.
global dec64_abs [func];(number: dec64)
; returns absolution: dec64
global dec64_add [func];(augend: dec64, addend: dec64)
; returns sum: dec64
global dec64_ceiling [func];(number: dec64)
; returns integer: dec64
global dec64_coefficient [func];(number: dec64)
; returns coefficient: int64
global dec64_divide [func];(dividend: dec64, divisor: dec64)
; returns quotient: dec64
global dec64_exponent [func];(number: dec64)
; returns exponent: int64
global dec64_floor [func];(number: dec64)
; returns integer: dec64
global dec64_integer_divide [func];(dividend: dec64, divisor: dec64)
; returns quotient: dec64
global dec64_is_equal [func];(comparahend: dec64, comparator: dec64)
; returns comparison: dec64
global dec64_is_false [func];(boolean: dec64)
; returns comparison: dec64
global dec64_is_integer [func];(number: dec64)
; returns comparison: dec64
global dec64_is_less [func];(comparahend: dec64, comparator: dec64)
; returns comparison: dec64
global dec64_is_nan [func];(number: dec64)
; returns comparison: dec64
global dec64_is_zero [func];(number: dec64)
; returns comparison: dec64
global dec64_modulo [func];(dividend: dec64, divisor: dec64)
; returns modulus: dec64
global dec64_multiply [func];(multiplicand: dec64, multiplier: dec64)
; returns product: dec64
global dec64_neg [func];(number: dec64)
; returns negation: dec64
global dec64_new [func];(coefficient: int64, exponent: int64)
; returns number: dec64
global dec64_normal [func];(number: dec64)
; returns normalization: dec64
global dec64_round [func];(number: dec64, place: dec64)
; returns quantization: dec64
global dec64_signum [func];(number: dec64)
; returns signature: dec64
global dec64_subtract [func];(minuend: dec64, subtrahend: dec64)
; returns difference: dec64
; All of the public functions in this file accept up to two arguments,
; which are passed in registers (x0, x1), returning a result in x0.
; Registers x0 thru x15 may be clobbered.
; The other registers are not disturbed.
; The stack is not touched in any way.
area dec64, align=8, code, readonly
power ; the powers of 10
dcq 1 ; 0
dcq 10 ; 1
dcq 100 ; 2
dcq 1000 ; 3
dcq 10000 ; 4
dcq 100000 ; 5
dcq 1000000 ; 6
dcq 10000000 ; 7
dcq 100000000 ; 8
dcq 1000000000 ; 9
dcq 10000000000 ; 10
dcq 100000000000 ; 11
dcq 1000000000000 ; 12
dcq 10000000000000 ; 13
dcq 100000000000000 ; 14
dcq 1000000000000000 ; 15
dcq 10000000000000000 ; 16
dcq 100000000000000000 ; 17
dcq 1000000000000000000 ; 18
dcq 10000000000000000000 ; 19
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_coefficient;(number: dec64) returns coefficient: int64
; Return the coefficient part from a dec64 number.
asr x0, x0, 8
ret
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_exponent;(number: dec64) returns exponent: int64
; Return the exponent part, sign extended to 64 bits, from a dec64 number.
; dec64_exponent(nan) returns -128.
sxtb x0, w0
ret
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_new;(coefficient: int64, exponent: int64) returns number: dec64
; The dec64_new function combines the coefficient and exponent into a dec64.
; Numbers that are too tiny to be contained in this format become zero.
; Numbers that are too huge to be contained in this format become nan.
; Clobbers x1, x4 thru x7, x11.
; The coefficient is in x0.
; The exponent is in x1.
mov x11, x1 ; x11 is the exponent
mov x1, xzr ; x1 is zero
new
mov x7, 10
; If the exponent is less than -127, or if abs(coefficient) >= 2**55 then we
; must shrink the coefficient.
asr x4, x0, 55 ; x4 is the 9 msb of exponent, sign extended
add x4, x4, x4, lsr 63 ; x4 is zero if the number fits
sub x5, x11, -127 ; x5 is negative if exponent is too negative
orr x4, x4, x5, asr 63
cbnz x4, new_shrink
new_almost_done
; If the exponent is too large, then we must grow the coefficient.
subs xzr, x11, 127
b.gt new_grow
new_done
; Pack the exponent and coefficient together to form a new dec64.
cbz x0, return
and x4, x11, 0xFF
add x0, x4, x0, lsl 8
ret
new_grow
; The coefficient is good, but the exponent is too big.
; We try to grow the coefficient by multiplying by ten.
mul x0, x0, x7
sub x11, x11, 1
; Is the coefficient still in range?
asr x4, x0, 55 ; x4 is the 9 msb of exponent, sign extended
add x4, x4, x4, lsr 63 ; x4 is zero if the number fits
; If so, we are almost done.
cbz x4, new_almost_done
; The number is too big to represent as a DEC64. So sad.
b return_null
new_shrink
; Divide the coefficient by 10 (remembering its old value in x6)
; and increment the exponent.
mov x6, x0
sdiv x0, x0, x7
add x11, x11, 1
; Are the coefficient and exponent now in range? If not, keep shrinking.
asr x4, x0, 55 ; x4 is the 9 msb of exponent, sign extended
add x4, x4, x4, lsr 63 ; x4 is zero if the number fits
sub x5, x11, -127 ; x5 is negative if exponent is too negative
orr x4, x4, x5, asr 63
cbnz x4, new_shrink
; Is the absolute value of the remainder greater than or equal to 5?
msub x5, x0, x7, x6 ; x5 is old coefficient - coefficient * 10
ands xzr, x5, x5 ; is the remainder negative?
cneg x5, x5, mi ; x5 is abs(remainder)
subs xzr, x5, 5 ; is the remainder 5 or more?
b.ge new_round ; rounding is required
subs xzr, x11, 127 ; is the exponent still in range?
b.le new_done
b return_null
new_round
; If so, round the coefficient away from zero.
asr x6, x6, 63 ; x6 is the sign extension
orr x6, x6, 1 ; x6 is -1 or 1
add x0, x0, x6 ; x0 is the rounded coefficient
b new
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_round;(number: dec64, place: dec64) returns quantization: dec64
; The place argument indicates at what decimal place to round.
; -2 nearest cent
; 0 nearest integer
; 3 nearest thousand
; 6 nearest million
; 9 nearest billion
; The place should be between -16 and 16.
asr x4, x0, 8 ; x4 is coefficient of number
sxtb x11, w0 ; x11 is exponent of number
asr x6, x1, 8 ; x6 is coefficient places
sxtb x5, w1 ; x5 is exponent of places
subs xzr, x11, -128
b.eq return_null
cbz x4, return_zero ; is number zero?
; If places is null, use zero.
subs xzr, x5, -128
csel x5, xzr, x5, eq
csel x6, xzr, x6, eq
cbnz x5, round_normal ; if places is not an integer, normalize
subs xzr, x11, x6 ; are we done?
b.ge return
mov x10, 10
round_loop
; Increment the exponent and divide the coefficient by 10 until the target
; exponent is reached.
cbz x4, return_zero
mov x5, x4 ; x5 is old coefficient
sdiv x4, x4, x10 ; x4 is coefficient / 10
add x11, x11, 1
subs xzr, x11, x6
b.lt round_loop
; If abs(remainder) is 5 or more, bump the coefficient.
msub x5, x4, x10, x5 ; x5 is the remainder
ands xzr, x4, x4 ; is the number negative?
cneg x5, x5, mi ; x5 is abs(remainder)
asr x8, x4, 63 ; x8 is -1 or 0
orr x8, x8, 1 ; x8 is -1 or 1
subs xzr, x5, 5
csel x8, x8, xzr, ge ; x8 is zero if no rounding needed
add x0, x4, x8
b new
round_normal
; If places is not obviously an integer, then attempt to normalize it.
mov x14, x0 ; x14 is the number
mov x15, x30 ; x15 is return address
mov x0, x1
adr x30, 8
b dec64_normal
mov x30, x15
ands xzr, x0, 0xFF
b.ne return_null
mov x1, x0
mov x0, x14
b dec64_round
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_add;(augend: dec64, addend: dec64) returns sum: dec64
; Add two dec64 numbers together.
; If the two exponents are both zero (which is usually the case for integers)
; we can take the fastest path. Since the exponents are both zero, we can
; simply add the numbers together and check for overflow.
; Clobbers x4 thru x11.
orr x5, x0, x1
ands xzr, x5, 255
b.ne add_begin
adds x0, x0, x1
b.vs add_overflow
ret
add_overflow
; The fast add overflowed. This is very uncommon. The exponent is in the bottom
; of x1. x0 contains the scaled coefficient, missing its most significant bit.
sub x0, x0, x1 ; undo the addition
sxtb x11, w1 ; x11 is the exponent
asr x0, x0, 8
add x0, x0, x1, asr 8
b new
add_begin
; If the exponents are equal, then we can add fast
sxtb x5, w0 ; x5 is the first exponent
subs xzr, x5, -128 ; Make sure the augend is not nan.
b.eq return_null
sxtb x7, w1 ; x7 is the second exponent
bfxil x0, xzr, 0, 8 ; clear the x0 exponent
subs xzr, x5, x7 ; are the exponents equal?
b.eq add_fast
; The exponents must be made equal before we can add.
mov x10, 10 ; x10 is 10
asr x4, x0, 8 ; x4 is the first coefficient
asr x6, x1, 8 ; x6 is the second coefficient
add_slow
; Make sure that the number with the larger exponent is in (x4, x5).
; The other goes in (x6, x7).
subs xzr, x7, -128 ; Make sure the addend is not nan.
b.eq return_null
subs xzr, x5, x7
b.ge add_grow
mov x8, x4
mov x9, x5
mov x4, x6
mov x5, x7
mov x6, x8
mov x7, x9
add_grow
; If the exponents are not equal, try growing x4.
; First make sure there is some headroom.
subs xzr, x5, x7
b.eq add_ready
asr x11, x4, 58
eor x11, x11, x4, asr 63
cbnz x11, add_shrink
mul x4, x4, x10
sub x5, x5, 1
b add_grow
add_shrink
; If the exponents are not equal yet, try shrinking x6.
sdiv x6, x6, x10
add x7, x7, 1
subs xzr, x5, x7
b.ne add_shrink
add_ready
; The exponents are equal. We are ready to add.
add x0, x4, x6
mov x11, x5
b new
add_fast
adds x0, x0, x1
b.vs add_overflow
asr x10, x0, 8
cbz x10, return_zero
ret
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_subtract;(minuend: dec64, subtrahend: dec64) returns difference: dec64
; Subtract two dec64 by negating the subtrahend and adding. The complication
; is that the coeffient -36028797018963968 is not like the other coefficients.
; Negate the subtrahend coefficient without changing the exponent.
eor x1, x1, 0xFFFFFFFFFFFFFF00
adds x1, x1, 256 ; 2s complement adds 1 after a 'not'
b.vc dec64_add ; if it did not overflow, we can add
; Negating the subtrahend caused an overflow.
; Set things up to jump into the add slow path.
asr x4, x0, 8 ; x4 is the first coefficient
sxtb x5, w0 ; x5 is the first exponent
mov x6, 0x80000000000000
sxtb x7, w1 ; x7 is the second exponent
subs xzr, x5, -128 ; make sure the minuend is not nan
b.ne add_slow
b return_null
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_ceiling;(number: dec64) returns integer: dec64
; Produce the smallest integer that is greater than or equal to the number.
; In the result, the exponent will be greater than or equal to zero unless
; it is nan. Numbers with positive exponents are not modified, even if
; the numbers are outside of the safe integer range.
mov x7, 1 ; x7 is the incrementor (round up)
b floor_begin
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_floor;(number: dec64) returns integer: dec64
; Produce the largest integer that is less than or equal to the number. This
; is sometimes called the entier function. In the result, the exponent will
; be greater than or equal to zero unless it is nan. Numbers with positive
; exponents are not modified, even if the numbers are outside of the safe
; integer range.
; Clobbers x4 thru x10.
mov x7, -1 ; x7 is the incrementor (round down)
floor_begin
asr x4, x0, 8 ; x4 is the coefficient
tbnz x0, 7, floor_fractional
cbz x4, return_zero
ret
floor_fractional
sxtb x5, w0 ; x5 is the exponent
sub x5, xzr, x5 ; x5 is abs(exponent)
subs xzr, x5, 16 ; is the number super dinky?
b.gt floor_dinky
adr x10, power
ldr x10, [x10, x5 lsl 3] ; get a power of 10
sdiv x0, x4, x10 ; x0 is coefficient / power of 10
msub x8, x0, x10, x4 ; x8 is the remainder
; Three things determine if x0 needs to be incremented, decremented,
; or left alone:
; Is this 'floor' or 'ceiling' (x7)
; Is the remainder zero (x8)
; Is the coefficient negative (x4)
floor_inc
eor x9, x4, x7 ; if number & incrementor are not same sign
bic x7, x7, x9, asr 63 ; then incrementor is zero
subs xzr, x8, xzr ; if the remainder is zero
csel x7, xzr, x7, eq ; then incrementor is zero
add x0, x0, x7
lsl x0, x0, 8
ret
floor_dinky
subs xzr, x5, 0x80
b.ge return_null
mov x8, x4
mov x0, xzr
b floor_inc
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_multiply;(multiplicand: dec64, multiplier: dec64) returns product: dec64
; Multiply two dec64 numbers together.
; Clobbers x4 thru x11.
asr x4, x0, 8 ; x4 is the first coefficient
sxtb x5, w0 ; x5 is the first exponent
asr x6, x1, 8 ; x6 is the second coefficient
sxtb x7, w1 ; x7 is the second exponent
subs xzr, x5, -128
csetm x8, ne ; x8 is 0 if first exponent is null
subs xzr, x7, -128
csel x8, x8, xzr, ne ; x8 is 0 if either exponent is null
cbz x8, multiply_null
ands xzr, x4, x4
csetm x8, ne ; x8 is 0 if first coefficient is 0
ands xzr, x6, x6
csel x8, x8, xzr, ne ; x8 is 0 if either coefficient is 0
cbz x8, return_zero
eor x0, x4, x6 ; x0 is the sign of the result
ands xzr, x4, x4
cneg x4, x4, mi ; x4 is abs(first coefficient)
ands xzr, x6, x6
cneg x6, x6, mi ; x6 is abs(second coefficient)
mul x8, x4, x6 ; x8 is the low part of the product
umulh x9, x4, x6 ; x9 is the high part of the product
add x11, x5, x7 ; x11 is the new exponent
multiply_size
cbnz x9, multiply_reduce ; is the product too big?
tbz x8, 63, multiply_sign ; does the product fit?
; The product coefficient contains one too many bits. Divide it by 10 and
; increment the exponent.
mov x10, 10
udiv x8, x8, x10
add x11, x11, 1
multiply_sign
ands xzr, x0, x0
cneg x0, x8, mi ; x0 is signed product coefficient
b new
multiply_reduce
; The product coefficent is in two words (x9, x8). We need to get it down to one
; word. Count the number of leading zero bits to get an estimate of the number
; of excess digits. Then divide.
clz x5, x9 ; x5 is the number of leading zeros
mov x4, 69 ; x4 is 69 (64 + fudge)
mov x7, 77 ; x7 is 77
sub x4, x4, x5 ; x4 is number of excess bits
mul x4, x4, x7 ; x4 is x5 * 77
lsr x4, x4, 8 ; x4 is number of excess digits
adr x10, power
ldr x6, [x10, x4, lsl 3] ; x6 is a power of ten
add x11, x11, x4 ; pump up the exponent
clz x7, x6 ; count the leading 0 in high dividend
mov x4, 64
sub x7, x4, x7 ; x7 is sigbits in power of ten
; x0 is the sign of the product
; x6 is the power of ten
; x7 is the sigbits in the power of ten
; x9, x8 is the oversized product
; x11 is the exponent
b divide_big
multiply_null
subs xzr, x0, 0x80
csel x4, x0, x4, eq
subs xzr, x1, 0x80
csel x6, x1, x6, eq
cbz x4, return_zero
cbz x6, return_zero
b return_null
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_divide;(dividend: dec64, divisor: dec64) returns quotient: dec64
; Divide a dec64 number by another.
; Clobbers x4 thru x11.
asr x4, x0, 8 ; x4 is the dividend coefficient
cbz x4, divide_zero ; is the dividend 0?
sxtb x5, w0 ; x5 is the dividend exponent
asr x6, x1, 8 ; x6 is the divisor coefficient
sxtb x7, w1 ; x7 is the divisor exponent
; If the divisor is zero, or if either number is nan, the result is null.
subs xzr, x5, -128
csetm x9, ne ; x9 is 0 if dividend exponent is nan
subs xzr, x7, -128
csel x9, x9, xzr, ne ; x9 is 0 if divisor exponent is nan
ands xzr, x6, x6
csel x9, x9, xzr, ne ; x9 is 0 if divisor coefficient is 0
cbz x9, return_null
; There are a couple of interesting special cases.
subs xzr, x1, 0x200 ; is the divisor 2?
b.eq divide_two
subs xzr, x1, 0x100 ; is the divisor 1?
b.eq return
; The exponent of the quotient is the difference of the input exponents.
sub x11, x5, x7 ; x11 is the quotient exponent
; Save the sign of the quotient. We are mostly using unsigned arithmetic.
eor x0, x4, x6 ; x0 is the sign of the quotient
; Make the arguments positive.
ands xzr, x4, x4
cneg x4, x4, mi ; x4 is abs(dividend coefficient)
ands xzr, x6, x6
cneg x6, x6, mi ; x6 is abs(divisor coefficient)
; This is a floating point divide, so we want to preserve as much information
; in the quotient as possible. To do this, we scale up the dividend by a
; suitable power of ten, reducing the exponent by a corresponding amount.
clz x5, x4 ; x5 is leading zeros of dividend
mov x9, 64 ; x9 is 64
clz x7, x6 ; x7 is leading zeros of divisor
sub x5, x9, x5 ; x5 is sigbits in dividend
sub x7, x9, x7 ; x7 is sigbits in divisor
add x8, x7, 59 ; x8 is sigbits needed in dividend
sub x8, x8, x5 ; x8 is additional sigbits required
; To convert bits to digits, we multiply by log10/log2 (0.30103), which is almost
; equal to 77/256 (0.30078).
mov x10, 77
mul x8, x8, x10
lsr x8, x8, 8 ; x8 is the number of digits to inflate
sub x11, x11, x8 ; subtract the new digits from the exponent
; The number of new digits could be as great as 34, but the ARM64 multiplier can
; only take multipliers as great as 19 digits. So it might be necessary to split
; the multiplication into two parts.
; The optional first part produces a product that fits in one word.
adr x9, power ; x9 is the address of the power of 10 table
subs x10, x8, 19 ; is the number of digits more than 19?
b.le divide_inflate ; if not, then a single multiply is needed
ldr x10, [x9, x10, lsl 3] ; x10 is the first power of ten
mul x4, x4, x10 ; multiply the dividend by the first part
mov x8, 19 ; the next multiply will be by 1e19
divide_inflate
; Put the inflated dividend in (x9, x8).
ldr x10, [x9, x8, lsl 3] ; load the power of ten
mul x8, x4, x10 ; x8 is the low half of the dividend
umulh x9, x4, x10 ; x9 is the high half of the dividend
; Align the dividend and divisor by their leading 1 bits.
; How we do this depends on the size of the dividend.
cbnz x9, divide_big
; If the dividend has only 1 word, then shift the divisor.
clz x5, x8 ; count the leading 0 in low dividend
mov x4, 64 ; x4 is 64
mov x9, x8 ; move the low part to the high part
sub x5, x4, x5 ; x5 is sigbits in dividend
mov x8, xzr ; zero out the low part
sub x10, x5, x7 ; x10 is the countdown
lsl x6, x6, x10 ; align the divisor
b divide_ready
divide_big
; If the dividend has two words, then shift the dividend.
clz x5, x9 ; count the leading 0 in high dividend
mov x4, 64 ; x4 is 64 (word size)
sub x5, x4, x5 ; x5 is sigbits in high dividend
sub x10, x7, x5 ; x10 is left shift distance
sub x4, x4, x10 ; x4 is right shift distance
lsl x9, x9, x10 ; shift high dividend
lsr x4, x8, x4 ; x4 is the carry
orr x9, x9, x4 ; insert the carry into the high dividend
lsl x8, x8, x10 ; shift the low dividend
add x10, x5, 64 ; x10 is sigbits in whole dividend (x9, x8)
sub x10, x10, x7 ; x10 is the countdown
divide_ready
mov x7, xzr ; x7 is the quotient
divide_step
; In each divide step:
; Double the quotient
; Find the difference between the aligned dividend and divisor
; If the difference is not negative
; Add 1 to the quotient
; Subtract the divisor from the dividend
; Double the dividend (x9, x8)
; Decrement the countdown
subs x4, x9, x6 ; x4 is high dividend - divisor
cset x5, pl ; x5 is 1 if difference is positive
csel x9, x4, x9, pl ; x9 is the difference if positive
add x7, x5, x7, lsl 1 ; double quotient and + 1 if positive diff
lsr x5, x8, 63 ; x5 is carry (high bit of low dividend)
orr x9, x5, x9, lsl 1 ; shift high dividend and insert carry
lsl x8, x8, 1 ; shift low dividend
subs x10, x10, 1 ; decrement countdown
b.pl divide_step ; is it done?
; Correct the sign and get out.
ands xzr, x0, x0 ; should it negative?
cneg x0, x7, mi ; correct the sign
b new
divide_zero
; If x0 is not 0x80, return zero.
subs xzr, x0, 0x80
b.ne return_zero
b return_null
divide_two
; Divide a dec64 number by two. If it is even, we can do a shift. If it is odd,
; then we decrement the exponent and multiply the coefficient by 5.
tbnz x4, 0, divide_two_odd ; the coefficient is odd
cbz x4, return_zero
lsl x0, x4, 7
bfi x0, x5, 0, 8
ret
divide_two_odd
; Multiply by 5 and divide by 10.
sub x11, x5, 1 ; decrease the exponent
add x0, x4, x4, lsl 2 ; x0 is coefficient * 5
b new
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_integer_divide;(dividend: dec64, divisor: dec64) returns quotient: dec64
; Divide, with a floored integer result. It produces the same result as
; dec64_floor(dec64_divide(dividend, divisor))
; Clobbers x12. It can also clobber more via dec64_divide and dec64_floor.
; If either exponent is not zero, or if either coefficient is negative, then do
; it the hard way.
orr x12, x0, x1
ands xzr, x12, 255
orr x12, x12, x12, asr 63
cbnz x12, integer_divide_hard
cbz x0, return
asr x12, x1, 8 ; x12 is the divisor
cbz x12, return_null ; divide by zero
sdiv x0, x0, x12
lsl x0, x0, 8
ret
integer_divide_hard
mov x12, x30 ; save the return address in x12
bl dec64_divide
mov x30, x12 ; restore the return address
b dec64_floor ; tail call
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_modulo;(dividend: dec64, divisor: dec64) returns modulus: dec64
; Modulo. It produces the same result as
; dec64_subtract(
; dividend,
; dec64_multiply(
; dec64_floor(
; dec64_divide(
; dividend,
; divisor
; )
; ),
; divisor
; )
; )
asr x4, x0, 8 ; x4 is dividend coefficient
cbz x4, modulo_zero
asr x6, x1, 8 ; x6 is divisor coefficient
cbz x6, return_null
mov x14, x0 ; x14 is dividend
mov x15, x30 ; x15 is return address
adr x30, 8
b dec64_integer_divide
adr x30, 8
b dec64_multiply
mov x1, x0
mov x0, x14
mov x30, x15
b dec64_subtract
modulo_zero
subs xzr, x0, 0x80
b.ne return_zero
b return_null
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_signum;(number: dec64) returns signature: dec64
; If the number is nan, the result is nan.
; If the number is less than zero, the result is -1.
; If the number is zero, the result is 0.
; If the number is greater than zero, the result is 1.
and x5, x0, 0xFF ; x5 is the exponent
subs xzr, x5, 0x80 ; is it nan?
b.eq return_null
adds xzr, xzr, x0, asr 8 ; is the coefficient zero?
cset x4, ne ; x4 is either 1 or 0
asr x0, x0, 63 ; x0 is either -1 or 0
orr x0, x0, x4 ; x0 is either -1, 0, or 1
lsl x0, x0, 8
ret
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_abs;(number: dec64) returns absolution: dec64
; Find the absolute value of a number. If the number is negative, hand it off
; to dec64_neg. Otherwise, return the number unless it is nan or zero.
tbnz x0, 63, dec64_neg
and x5, x0, 0xFF ; x5 is the exponent
subs xzr, x5, 0x80 ; is it nan?
b.eq return_null
adds x4, xzr, x0, asr 8 ; is the coefficient zero?
csel x0, x0, x4, ne ; x0 is either the number or zero
ret
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_neg;(number: dec64) returns negation: dec64
; Negate a number.
sxtb x11, w0 ; x11 is the exponent
subs xzr, x11, -128
b.eq return_null
sub x0, xzr, x0, asr 8
b new
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_normal;(number: dec64) returns normalization: dec64
; Make the exponent as close to zero as possible without losing any signficance.
; Usually normalization is not needed since it does not materially change the
; value of a number.
; Clobbers x4 thru x7.
sxtb x5, w0 ; x5 is the exponent
tbnz x0, 7, normal_micro ; is the exponent negative?
cbz x5, return ; is the number an integer?
asr x0, x0, 8 ; x0 is the coefficient
cbz x0, return_zero ; is the coefficient zero?
mov x7, 10
normal_grow
; The exponent is greater than zero. If we subtract 1 from the exponent, we must
; multiply the coefficient by 10.
mul x4, x0, x7
asr x6, x4, 55
adds xzr, x6, x6, asr 63
b.ne normal_done
mov x0, x4
subs x5, x5, 1
b.gt normal_grow
lsl x0, x0, 8
ret
normal_micro
; The exponent is negative. Is it nan?
subs xzr, x5, -128
b.eq return_null
mov x7, 10
asr x0, x0, 8 ; x0 is the coefficient
normal_shrink
; If we add 1 to the exponent, we must divide the coefficient by 10.
cbz x0, return_zero ; is the coefficient zero?
sdiv x4, x0, x7
mul x6, x4, x7
subs xzr, x6, x0
b.ne normal_done
mov x0, x4
adds x5, x5, 1