Commit | Line | Data |
---|---|---|
bc0f2b0e | 1 | /* |
2be22526 | 2 | * Copyright (c) 1987 Regents of the University of California. |
bc0f2b0e KB |
3 | * All rights reserved. |
4 | * | |
7e3eac84 | 5 | * %sccs.include.redist.c% |
2be22526 ZAL |
6 | */ |
7 | .data | |
8 | .align 2 | |
9 | _sccsid: | |
2bc65d90 | 10 | .asciz "@(#)sqrt.s 5.6 (ucb.elefunt) %G%" |
2be22526 ZAL |
11 | |
12 | /* | |
13 | * double sqrt(arg) revised August 15,1982 | |
14 | * double arg; | |
15 | * if(arg<0.0) { _errno = EDOM; return(<a reserved operand>); } | |
16 | * if arg is a reserved operand it is returned as it is | |
17 | * W. Kahan's magic square root | |
f47ef219 ZAL |
18 | * Coded by Heidi Stettner and revised by Emile LeBlanc 8/18/82. |
19 | * Re-coded in tahoe assembly language by Z. Alex Liu 7/13/87. | |
2be22526 ZAL |
20 | * |
21 | * entry points:_d_sqrt address of double arg is on the stack | |
22 | * _sqrt double arg is on the stack | |
23 | */ | |
24 | .text | |
f47ef219 | 25 | .align 2 |
2be22526 ZAL |
26 | .globl _sqrt |
27 | .globl _d_sqrt | |
28 | .globl libm$dsqrt_r5 | |
29 | .set EDOM,33 | |
30 | ||
31 | _d_sqrt: | |
32 | .word 0x003c # save r2-r5 | |
33 | movl 4(fp),r2 | |
34 | movl (r2),r0 | |
35 | movl 4(r2),r1 # r0:r1 = x | |
36 | brb 1f | |
37 | _sqrt: | |
38 | .word 0x003c # save r2-r5 | |
39 | movl 4(fp),r0 | |
40 | movl 8(fp),r1 # r0:r1 = x | |
41 | 1: andl3 $0x7f800000,r0,r2 # r2 = biased exponent | |
42 | bneq 2f | |
43 | ret # biased exponent is zero -> 0 or reserved op. | |
44 | /* | |
45 | * # internal procedure | |
f47ef219 | 46 | * # ENTRY POINT FOR cdabs and cdsqrt |
2be22526 | 47 | */ |
f47ef219 ZAL |
48 | libm$dsqrt_r5: # returns double square root scaled by 2^r6 |
49 | .word 0x0000 # save nothing | |
2be22526 ZAL |
50 | 2: ldd r0 |
51 | std r4 | |
52 | bleq nonpos # argument is not positive | |
53 | andl3 $0xfffe0000,r4,r2 | |
54 | shar $1,r2,r0 | |
55 | addl2 $0x203c0000,r0 # r0 has magic initial approximation | |
56 | /* | |
57 | * # Do two steps of Heron's rule | |
58 | * # ((arg/guess)+guess)/2 = better guess | |
59 | */ | |
60 | ldf r4 | |
61 | divf r0 | |
62 | addf r0 | |
63 | stf r0 | |
64 | subl2 $0x800000,r0 # divide by two | |
65 | ldf r4 | |
66 | divf r0 | |
67 | addf r0 | |
68 | stf r0 | |
69 | subl2 $0x800000,r0 # divide by two | |
70 | /* | |
71 | * # Scale argument and approximation | |
72 | * # to prevent over/underflow | |
73 | */ | |
74 | andl3 $0x7f800000,r4,r1 | |
75 | subl2 $0x40800000,r1 # r1 contains scaling factor | |
76 | subl2 r1,r4 # r4:r5 = n/s | |
77 | movl r0,r2 | |
78 | subl2 r1,r2 # r2 = a/s | |
79 | /* | |
80 | * # Cubic step | |
81 | * # b = a+2*a*(n-a*a)/(n+3*a*a) where | |
82 | * # b is better approximation, a is approximation | |
83 | * # and n is the original argument. | |
84 | * # s := scale factor. | |
85 | */ | |
86 | clrl r1 # r0:r1 = a | |
87 | clrl r3 # r2:r3 = a/s | |
88 | ldd r0 # acc = a | |
89 | muld r2 # acc = a*a/s | |
90 | std r2 # r2:r3 = a*a/s | |
91 | negd # acc = -a*a/s | |
92 | addd r4 # acc = n/s-a*a/s | |
93 | std r4 # r4:r5 = n/s-a*a/s | |
94 | addl2 $0x1000000,r2 # r2:r3 = 4*a*a/s | |
95 | ldd r2 # acc = 4*a*a/s | |
96 | addd r4 # acc = n/s+3*a*a/s | |
97 | std r2 # r2:r3 = n/s+3*a*a/s | |
98 | ldd r0 # acc = a | |
99 | muld r4 # acc = a*n/s-a*a*a/s | |
100 | divd r2 # acc = a*(n-a*a)/(n+3*a*a) | |
101 | std r4 # r4:r5 = a*(n-a*a)/(n+3*a*a) | |
102 | addl2 $0x800000,r4 # r4:r5 = 2*a*(n-a*a)/(n+3*a*a) | |
103 | ldd r4 # acc = 2*a*(n-a*a)/(n+3*a*a) | |
104 | addd r0 # acc = a+2*a*(n-a*a)/(n+3*a*a) | |
105 | std r0 # r0:r1 = a+2*a*(n-a*a)/(n+3*a*a) | |
106 | ret # rsb | |
107 | nonpos: | |
108 | bneq negarg | |
109 | ret # argument and root are zero | |
110 | negarg: | |
111 | pushl $EDOM | |
112 | callf $8,_infnan # generate the reserved op fault | |
113 | ret |