Commit | Line | Data |
---|---|---|
2bb2136a ZAL |
1 | # |
2 | # Copyright (c) 1987 Regents of the University of California. | |
3 | # | |
4 | # Use and reproduction of this software are granted in accordance with | |
5 | # the terms and conditions specified in the Berkeley Software License | |
6 | # Agreement (in particular, this entails acknowledgement of the programs' | |
7 | # source, and inclusion of this notice) with the additional understanding | |
8 | # that all recipients should regard themselves as participants in an | |
9 | # ongoing research project and hence should feel obligated to report | |
10 | # their experiences (good or bad) with these elementary function codes, | |
11 | # using "sendbug 4bsd-bugs@BERKELEY", to the authors. | |
12 | # | |
13 | .data | |
14 | .align 2 | |
15 | _sccsid: | |
f47ef219 | 16 | .asciz "@(#)cbrt.s 1.2 (ucb.elefunt) %G%" |
2bb2136a ZAL |
17 | |
18 | # double cbrt(double arg) | |
19 | # W. Kahan, 10/13/80. revised 1/13/84 for keeping sign symmetry | |
aa6843f5 | 20 | # Re-coded in tahoe assembly language by Z. Alex Liu (7/13/87) |
2bb2136a ZAL |
21 | # Max error less than 0.667 ULPs _if_ +,-,*,/ were all correctly rounded... |
22 | .globl _cbrt | |
23 | .globl _d_cbrt | |
24 | .globl _dcbrt_ | |
25 | .text | |
aa6843f5 | 26 | .align 2 |
2bb2136a ZAL |
27 | _cbrt: |
28 | _d_cbrt: | |
aa6843f5 | 29 | .word 0x01fc # save r2-r8 |
2bb2136a ZAL |
30 | movl 4(fp),r0 # r0:r1 = x |
31 | movl 8(fp),r1 | |
32 | brb 1f | |
33 | _dcbrt_: | |
aa6843f5 | 34 | .word 0x01fc # save r2-r8 |
2bb2136a ZAL |
35 | movl 4(fp),r8 |
36 | movl (r8),r0 | |
37 | movl 4(r8),r1 # r0:r1 = x | |
38 | ||
39 | 1: andl3 $0x7f800000,r0,r2 # biased exponent of x | |
40 | beql return # dcbrt(0)=0 dcbrt(res)=res. operand | |
41 | andl3 $0x80000000,r0,r8 # r8 has sign(x) | |
42 | xorl2 r8,r0 # r0 is abs(x) | |
43 | movl r0,r2 # r2 has abs(x) | |
44 | divl2 $3,r2 # rough dcbrt with bias/3 | |
45 | addl2 B,r2 # restore bias, diminish fraction | |
46 | ldf r2 # acc = |q|=|dcbrt| to 5 bits | |
47 | mulf r2 # acc = qq | |
48 | divf r0 # acc = qq/|x| | |
49 | mulf r2 # acc = qqq/|x| | |
50 | addf C # acc = C+qqq/|x| | |
51 | stf r3 # r3 = s = C+qqq/|x| | |
52 | ldf D # acc = D | |
53 | divf r3 # acc = D/s | |
54 | addf E # acc = E+D/s | |
55 | addf r3 # acc = s+E+D/s | |
56 | stf r3 # r3 = s+E+D/s | |
57 | ldf F # acc = F | |
58 | divf r3 # acc = F/(s+E+D/s) | |
59 | addf G # acc = G+F/(s+E+D/s) | |
60 | mulf r2 # acc = q*(G+F/(s+E+D/s)) = new q to 23 bits | |
61 | stf r2 # r2 = q*(G+F/(s+E+D/s)) = new q to 23 bits | |
62 | clrl r3 # r2:r3 = q as double float | |
63 | ldd r2 # acc = q as double float | |
64 | muld r2 # acc = qq exactly | |
65 | std r4 # r4:r5 = qq exactly | |
66 | ldd r0 # acc = |x| | |
67 | divd r4 # acc = |x|/(q*q) rounded | |
68 | std r0 # r0:r1 = |x|/(q*q) rounded | |
69 | subd r2 # acc = |x|/(q*q)-q exactly | |
70 | std r6 # r6:r7 = |x|/(q*q)-q exactly | |
71 | movl r2,r4 | |
72 | clrl r5 # r4:r5 = q as double float | |
73 | addl2 $0x800000,r4 # r4:r5 = 2*q | |
74 | ldd r4 # acc = 2*q | |
75 | addd r0 # acc = 2*q+|x|/(q*q) | |
76 | std r4 # r4:r5 = 2*q+|x|/(q*q) | |
77 | ldd r6 # acc = |x|/(q*q)-q | |
78 | divd r4 # acc = (|x|/(q*q)-q)/(2*q+|x|/(q*q)) | |
79 | muld r2 # acc = q*(|x|/(q*q)-q)/(2*q+|x|/(q*q)) | |
80 | addd r2 # acc = q+q*(|x|/(q*q)-q)/(2*q+|x|/(q*q)) | |
81 | std r0 # r0:r1 = |result| | |
82 | orl2 r8,r0 # restore the sign bit | |
83 | return: ret # error less than 0.667ULPs? | |
84 | ||
85 | .data | |
86 | .align 2 | |
87 | B : .long 721142941 #(86-0.03306235651)*(2^23) | |
88 | .align 2 | |
89 | C: .long 0x400af8b0 #.float 0f0.5428571429 # 19/35 | |
90 | .align 2 | |
91 | D: .long 0xc0348ef1 #.float 0f-0.7053061224 # -864/1225 | |
92 | .align 2 | |
93 | E: .long 0x40b50750 #.float 0f1.414285714 # 99/70 | |
94 | .align 2 | |
95 | F: .long 0x40cdb6db #.float 0f1.607142857 # 45/28 | |
96 | .align 2 | |
97 | G: .long 0x3fb6db6e #.float 0f0.3571428571 # 5/14 |