Commit | Line | Data |
---|---|---|
fe5be67b KB |
1 | # Copyright (c) 1985 Regents of the University of California. |
2 | # All rights reserved. | |
3 | # | |
2bc65d90 | 4 | # %sccs.include.redist.sh% |
fe5be67b | 5 | # |
2bc65d90 | 6 | # @(#)sqrt.s 5.4 (Berkeley) %G% |
fe5be67b | 7 | # |
7c0a3811 GK |
8 | .data |
9 | .align 2 | |
10 | _sccsid: | |
2bc65d90 | 11 | .asciz "@(#)sqrt.s 1.1 (Berkeley) 8/21/85; 5.4 (ucb.elefunt) %G%" |
7c0a3811 GK |
12 | |
13 | /* | |
e5e86c79 ZAL |
14 | * double sqrt(arg) revised August 15,1982 |
15 | * double arg; | |
16 | * if(arg<0.0) { _errno = EDOM; return(<a reserved operand>); } | |
17 | * if arg is a reserved operand it is returned as it is | |
18 | * W. Kahan's magic square root | |
19 | * coded by Heidi Stettner and revised by Emile LeBlanc 8/18/82 | |
20 | * | |
21 | * entry points:_d_sqrt address of double arg is on the stack | |
22 | * _sqrt double arg is on the stack | |
23 | */ | |
24 | .text | |
25 | .align 1 | |
26 | .globl _sqrt | |
27 | .globl _d_sqrt | |
28 | .globl libm$dsqrt_r5 | |
29 | .set EDOM,33 | |
30 | ||
31 | _d_sqrt: | |
32 | .word 0x003c # save r5,r4,r3,r2 | |
33 | movq *4(ap),r0 | |
34 | jmp dsqrt2 | |
35 | _sqrt: | |
36 | .word 0x003c # save r5,r4,r3,r2 | |
37 | movq 4(ap),r0 | |
38 | dsqrt2: bicw3 $0x807f,r0,r2 # check exponent of input | |
39 | jeql noexp # biased exponent is zero -> 0.0 or reserved | |
40 | bsbb libm$dsqrt_r5 | |
41 | noexp: ret | |
42 | ||
43 | /* **************************** internal procedure */ | |
44 | ||
45 | libm$dsqrt_r5: # ENTRY POINT FOR cdabs and cdsqrt | |
46 | # returns double square root scaled by | |
47 | # 2^r6 | |
48 | ||
49 | movd r0,r4 | |
50 | jleq nonpos # argument is not positive | |
51 | movzwl r4,r2 | |
52 | ashl $-1,r2,r0 | |
53 | addw2 $0x203c,r0 # r0 has magic initial approximation | |
54 | /* | |
55 | * Do two steps of Heron's rule | |
56 | * ((arg/guess) + guess) / 2 = better guess | |
57 | */ | |
58 | divf3 r0,r4,r2 | |
59 | addf2 r2,r0 | |
60 | subw2 $0x80,r0 # divide by two | |
61 | ||
62 | divf3 r0,r4,r2 | |
63 | addf2 r2,r0 | |
64 | subw2 $0x80,r0 # divide by two | |
65 | ||
66 | /* Scale argument and approximation to prevent over/underflow */ | |
67 | ||
68 | bicw3 $0x807f,r4,r1 | |
69 | subw2 $0x4080,r1 # r1 contains scaling factor | |
70 | subw2 r1,r4 | |
71 | movl r0,r2 | |
72 | subw2 r1,r2 | |
73 | ||
74 | /* Cubic step | |
75 | * | |
76 | * b = a + 2*a*(n-a*a)/(n+3*a*a) where b is better approximation, | |
77 | * a is approximation, and n is the original argument. | |
78 | * (let s be scale factor in the following comments) | |
79 | */ | |
80 | clrl r1 | |
81 | clrl r3 | |
82 | muld2 r0,r2 # r2:r3 = a*a/s | |
83 | subd2 r2,r4 # r4:r5 = n/s - a*a/s | |
84 | addw2 $0x100,r2 # r2:r3 = 4*a*a/s | |
85 | addd2 r4,r2 # r2:r3 = n/s + 3*a*a/s | |
86 | muld2 r0,r4 # r4:r5 = a*n/s - a*a*a/s | |
87 | divd2 r2,r4 # r4:r5 = a*(n-a*a)/(n+3*a*a) | |
88 | addw2 $0x80,r4 # r4:r5 = 2*a*(n-a*a)/(n+3*a*a) | |
89 | addd2 r4,r0 # r0:r1 = a + 2*a*(n-a*a)/(n+3*a*a) | |
90 | rsb # DONE! | |
91 | nonpos: | |
92 | jneq negarg | |
93 | ret # argument and root are zero | |
94 | negarg: | |
95 | pushl $EDOM | |
96 | calls $1,_infnan # generate the reserved op fault | |
97 | ret |