Commit | Line | Data |
---|---|---|
bc0f2b0e KB |
1 | ; Copyright (c) 1985 Regents of the University of California. |
2 | ; All rights reserved. | |
3 | ; | |
2bc65d90 | 4 | ; %sccs.include.redist.semicolon% |
bc0f2b0e | 5 | ; |
2bc65d90 | 6 | ; @(#)sqrt.s 5.4 (Berkeley) %G% |
a399f6c8 | 7 | ; |
bc0f2b0e | 8 | |
3cd28ebc GK |
9 | ; double sqrt(x) |
10 | ; double x; | |
11 | ; IEEE double precision sqrt | |
12 | ; code in NSC assembly by K.C. Ng | |
13 | ; 12/13/85 | |
14 | ; | |
15 | ; Method: | |
16 | ; Use Kahan's trick to get 8 bits initial approximation | |
17 | ; by integer shift and add/subtract. Then three Newton | |
18 | ; iterations to get down error to within one ulp. Finally | |
19 | ; twiddle the last bit to make to correctly rounded | |
20 | ; according to the rounding mode. | |
21 | ; | |
22 | .vers 2 | |
23 | .text | |
24 | .align 2 | |
25 | .globl _sqrt | |
26 | _sqrt: | |
27 | enter [r3,r4,r5,r6,r7],44 | |
28 | movl f4,tos | |
29 | movl f6,tos | |
30 | movd 2146435072,r2 ; r2 = 0x7ff00000 | |
31 | movl 8(fp),f0 ; f2 = x | |
32 | movd 12(fp),r3 ; r3 = high part of x | |
33 | movd r3,r4 ; make a copy of high part of x in r4 | |
34 | andd r2,r3 ; r3 become the bias exponent of x | |
35 | cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN | |
36 | bne L22 | |
37 | ; to see if x is INF | |
38 | movd 8(fp),r0 ; r0 = low part of x | |
39 | movd r4,r1 ; r1 is high part of x again | |
40 | andd 0xfff00000,r1 ; mask off the sign and exponent of x | |
41 | ord r0,r1 ; or with low part, if 0 then x is INF | |
42 | cmpqd 0,r1 ; | |
43 | bne L1 ; not 0; therefore x is NaN; return x. | |
44 | cmpqd 0,r4 ; now x is Inf, is it +inf? | |
45 | blt L1 ; +INF, return x | |
46 | ; -INF, return NaN by doing 0/0 | |
47 | nan: movl 0f0.0,f0 ; | |
48 | divl f0,f0 | |
49 | br L1 | |
50 | L22: ; now x is finite | |
51 | cmpl 0f0.0,f0 ; x = 0 ? | |
52 | beq L1 ; return x if x is +0 or -0 | |
53 | cmpqd 0,r4 ; Is x < 0 ? | |
54 | bgt nan ; if x < 0 return NaN | |
55 | movqd 0,r5 ; r5 == scalx initialize to zero | |
56 | cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent) | |
57 | bne L21 ; if x is normal, goto L21 | |
58 | movl L30,f2 ; f2 = 2**54 | |
59 | mull f2,f0 ; scale up x by 2**54 | |
60 | subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent | |
61 | L21: | |
62 | ; now x is normal | |
63 | ; notations: | |
64 | ; r1 == copy of fsr | |
65 | ; r2 == mask of e inexact enable flag | |
66 | ; r3 == mask of i inexact flag | |
67 | ; r4 == mask of r rounding mode | |
68 | ; r5 == x's scale factor (already defined) | |
69 | ||
70 | movd 0x20,r2 | |
71 | movd 0x40,r3 | |
72 | movd 0x180,r4 | |
73 | sfsr r0 ; store fsr to r0 | |
74 | movd r0,r1 ; make a copy of fsr to r1 | |
75 | bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest | |
76 | lfsr r0 | |
77 | ||
78 | ; begin to compute sqrt(x) | |
79 | movl f0,-8(fp) | |
80 | movd -4(fp),r0 ; r0 the high part of modified x | |
81 | lshd -1,r0 ; r0 >> 1 | |
82 | addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx. | |
83 | movd r0,r6 | |
84 | lshd -13,r6 ; r6 = r0>>-15 | |
85 | andd 0x7c,r6 ; obtain 4*leading 5 bits of r0 | |
86 | addrd L29,r7 ; r7 = address of L29 = table[0] | |
87 | addd r6,r7 ; r6 = address of L29[r6] = table[r6] | |
88 | subd 0(r7),r0 ; r0 = r0 - table[r6] | |
89 | movd r0,-4(fp) | |
90 | movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits | |
91 | ||
92 | movl 0f0.5,f6 ; f6 = 0.5 | |
93 | movl f0,f4 | |
94 | divl f2,f4 ; t = x/y | |
95 | addl f4,f2 ; y = y + x/y | |
96 | mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx. | |
97 | movl f0,f4 | |
98 | divl f2,f4 ; t = x/y | |
99 | addl f4,f2 ; y = y + x/y | |
100 | mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx. | |
101 | movl f0,f4 | |
102 | divl f2,f4 ; t = x/y | |
103 | subl f2,f4 ; t = x/y - y | |
104 | mull f6,f4 ; t = 0.5(x/y-y) | |
105 | addl f4,f2 ; y = y + 0.5(x/y -y) | |
106 | ; now y approx. sqrt(x) to within 1 ulp | |
107 | ||
108 | ; twiddle last bit to force y correctly rounded | |
109 | movd r1,r0 ; restore the old fsr | |
110 | bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable | |
111 | ord 0x80,r0 ; set rounding mode to round to zero | |
112 | lfsr r0 | |
113 | ||
114 | movl f0,f4 | |
115 | divl f2,f4 ; f4 = x/y | |
116 | sfsr r0 | |
117 | andd r3,r0 ; get the inexact flag | |
118 | cmpqd 0,r0 | |
119 | bne L18 | |
120 | ; if x/y exact, then ... | |
121 | cmpl f2,f4 ; if y == x/y | |
122 | beq L2 | |
123 | movl f4,-8(fp) | |
124 | subd 1,-8(fp) | |
125 | subcd 0,-4(fp) | |
126 | movl -8(fp),f4 ; f4 = f4 - ulp | |
127 | L18: | |
128 | bicd [6],r1 | |
129 | ord r3,r1 ; set inexact flag in r1 | |
130 | ||
131 | andd r1,r4 ; r4 = the old rounding mode | |
132 | cmpqd 0,r4 ; round to nearest? | |
133 | bne L17 | |
134 | movl f4,-8(fp) | |
135 | addd 1,-8(fp) | |
136 | addcd 0,-4(fp) | |
137 | movl -8(fp),f4 ; f4 = f4 + ulp | |
138 | br L16 | |
139 | L17: | |
140 | cmpd 0x100,r4 ; round to positive inf ? | |
141 | bne L16 | |
142 | movl f4,-8(fp) | |
143 | addd 1,-8(fp) | |
144 | addcd 0,-4(fp) | |
145 | movl -8(fp),f4 ; f4 = f4 + ulp | |
146 | ||
147 | movl f2,-8(fp) | |
148 | addd 1,-8(fp) | |
149 | addcd 0,-4(fp) | |
150 | movl -8(fp),f2 ; f2 = f2 + ulp | |
151 | L16: | |
152 | addl f4,f2 ; y = y + t | |
153 | subd 0x100000,r5 ; scalx = scalx - 1 | |
154 | L2: | |
155 | movl f2,-8(fp) | |
156 | addd r5,-4(fp) | |
157 | movl -8(fp),f0 | |
158 | lfsr r1 | |
159 | L1: | |
160 | movl tos,f6 | |
161 | movl tos,f4 | |
162 | exit [r3,r4,r5,r6,r7] | |
163 | ret 0 | |
164 | .data | |
165 | L28: .byte 64,40,35,41,115,113,114,116,46,99 | |
166 | .byte 9,49,46,49,32,40,117,99,98,46 | |
167 | .byte 101,108,101,102,117,110,116,41,32,57 | |
168 | .byte 47,49,57,47,56,53,0 | |
169 | L29: .blkb 4 | |
170 | .double 1204 | |
171 | .double 3062 | |
172 | .double 5746 | |
173 | .double 9193 | |
174 | .double 13348 | |
175 | .double 18162 | |
176 | .double 23592 | |
177 | .double 29598 | |
178 | .double 36145 | |
179 | .double 43202 | |
180 | .double 50740 | |
181 | .double 58733 | |
182 | .double 67158 | |
183 | .double 75992 | |
184 | .double 85215 | |
185 | .double 83599 | |
186 | .double 71378 | |
187 | .double 60428 | |
188 | .double 50647 | |
189 | .double 41945 | |
190 | .double 34246 | |
191 | .double 27478 | |
192 | .double 21581 | |
193 | .double 16499 | |
194 | .double 12183 | |
195 | .double 8588 | |
196 | .double 5674 | |
197 | .double 3403 | |
198 | .double 1742 | |
199 | .double 661 | |
200 | .double 130 | |
201 | L30: .blkb 4 | |
202 | .double 1129316352 ;L30: .double 0,0x43500000 | |
203 | L31: .blkb 4 | |
204 | .double 0x1ff00000 | |
205 | L32: .blkb 4 | |
206 | .double 0x5ff00000 |