Commit | Line | Data |
---|---|---|
3cd28ebc GK |
1 | ; @(#)sqrt.s 1.1 (ucb.elefunt) %G% |
2 | ; | |
3 | ; double sqrt(x) | |
4 | ; double x; | |
5 | ; IEEE double precision sqrt | |
6 | ; code in NSC assembly by K.C. Ng | |
7 | ; 12/13/85 | |
8 | ; | |
9 | ; Method: | |
10 | ; Use Kahan's trick to get 8 bits initial approximation | |
11 | ; by integer shift and add/subtract. Then three Newton | |
12 | ; iterations to get down error to within one ulp. Finally | |
13 | ; twiddle the last bit to make to correctly rounded | |
14 | ; according to the rounding mode. | |
15 | ; | |
16 | .vers 2 | |
17 | .text | |
18 | .align 2 | |
19 | .globl _sqrt | |
20 | _sqrt: | |
21 | enter [r3,r4,r5,r6,r7],44 | |
22 | movl f4,tos | |
23 | movl f6,tos | |
24 | movd 2146435072,r2 ; r2 = 0x7ff00000 | |
25 | movl 8(fp),f0 ; f2 = x | |
26 | movd 12(fp),r3 ; r3 = high part of x | |
27 | movd r3,r4 ; make a copy of high part of x in r4 | |
28 | andd r2,r3 ; r3 become the bias exponent of x | |
29 | cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN | |
30 | bne L22 | |
31 | ; to see if x is INF | |
32 | movd 8(fp),r0 ; r0 = low part of x | |
33 | movd r4,r1 ; r1 is high part of x again | |
34 | andd 0xfff00000,r1 ; mask off the sign and exponent of x | |
35 | ord r0,r1 ; or with low part, if 0 then x is INF | |
36 | cmpqd 0,r1 ; | |
37 | bne L1 ; not 0; therefore x is NaN; return x. | |
38 | cmpqd 0,r4 ; now x is Inf, is it +inf? | |
39 | blt L1 ; +INF, return x | |
40 | ; -INF, return NaN by doing 0/0 | |
41 | nan: movl 0f0.0,f0 ; | |
42 | divl f0,f0 | |
43 | br L1 | |
44 | L22: ; now x is finite | |
45 | cmpl 0f0.0,f0 ; x = 0 ? | |
46 | beq L1 ; return x if x is +0 or -0 | |
47 | cmpqd 0,r4 ; Is x < 0 ? | |
48 | bgt nan ; if x < 0 return NaN | |
49 | movqd 0,r5 ; r5 == scalx initialize to zero | |
50 | cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent) | |
51 | bne L21 ; if x is normal, goto L21 | |
52 | movl L30,f2 ; f2 = 2**54 | |
53 | mull f2,f0 ; scale up x by 2**54 | |
54 | subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent | |
55 | L21: | |
56 | ; now x is normal | |
57 | ; notations: | |
58 | ; r1 == copy of fsr | |
59 | ; r2 == mask of e inexact enable flag | |
60 | ; r3 == mask of i inexact flag | |
61 | ; r4 == mask of r rounding mode | |
62 | ; r5 == x's scale factor (already defined) | |
63 | ||
64 | movd 0x20,r2 | |
65 | movd 0x40,r3 | |
66 | movd 0x180,r4 | |
67 | sfsr r0 ; store fsr to r0 | |
68 | movd r0,r1 ; make a copy of fsr to r1 | |
69 | bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest | |
70 | lfsr r0 | |
71 | ||
72 | ; begin to compute sqrt(x) | |
73 | movl f0,-8(fp) | |
74 | movd -4(fp),r0 ; r0 the high part of modified x | |
75 | lshd -1,r0 ; r0 >> 1 | |
76 | addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx. | |
77 | movd r0,r6 | |
78 | lshd -13,r6 ; r6 = r0>>-15 | |
79 | andd 0x7c,r6 ; obtain 4*leading 5 bits of r0 | |
80 | addrd L29,r7 ; r7 = address of L29 = table[0] | |
81 | addd r6,r7 ; r6 = address of L29[r6] = table[r6] | |
82 | subd 0(r7),r0 ; r0 = r0 - table[r6] | |
83 | movd r0,-4(fp) | |
84 | movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits | |
85 | ||
86 | movl 0f0.5,f6 ; f6 = 0.5 | |
87 | movl f0,f4 | |
88 | divl f2,f4 ; t = x/y | |
89 | addl f4,f2 ; y = y + x/y | |
90 | mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx. | |
91 | movl f0,f4 | |
92 | divl f2,f4 ; t = x/y | |
93 | addl f4,f2 ; y = y + x/y | |
94 | mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx. | |
95 | movl f0,f4 | |
96 | divl f2,f4 ; t = x/y | |
97 | subl f2,f4 ; t = x/y - y | |
98 | mull f6,f4 ; t = 0.5(x/y-y) | |
99 | addl f4,f2 ; y = y + 0.5(x/y -y) | |
100 | ; now y approx. sqrt(x) to within 1 ulp | |
101 | ||
102 | ; twiddle last bit to force y correctly rounded | |
103 | movd r1,r0 ; restore the old fsr | |
104 | bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable | |
105 | ord 0x80,r0 ; set rounding mode to round to zero | |
106 | lfsr r0 | |
107 | ||
108 | movl f0,f4 | |
109 | divl f2,f4 ; f4 = x/y | |
110 | sfsr r0 | |
111 | andd r3,r0 ; get the inexact flag | |
112 | cmpqd 0,r0 | |
113 | bne L18 | |
114 | ; if x/y exact, then ... | |
115 | cmpl f2,f4 ; if y == x/y | |
116 | beq L2 | |
117 | movl f4,-8(fp) | |
118 | subd 1,-8(fp) | |
119 | subcd 0,-4(fp) | |
120 | movl -8(fp),f4 ; f4 = f4 - ulp | |
121 | L18: | |
122 | bicd [6],r1 | |
123 | ord r3,r1 ; set inexact flag in r1 | |
124 | ||
125 | andd r1,r4 ; r4 = the old rounding mode | |
126 | cmpqd 0,r4 ; round to nearest? | |
127 | bne L17 | |
128 | movl f4,-8(fp) | |
129 | addd 1,-8(fp) | |
130 | addcd 0,-4(fp) | |
131 | movl -8(fp),f4 ; f4 = f4 + ulp | |
132 | br L16 | |
133 | L17: | |
134 | cmpd 0x100,r4 ; round to positive inf ? | |
135 | bne L16 | |
136 | movl f4,-8(fp) | |
137 | addd 1,-8(fp) | |
138 | addcd 0,-4(fp) | |
139 | movl -8(fp),f4 ; f4 = f4 + ulp | |
140 | ||
141 | movl f2,-8(fp) | |
142 | addd 1,-8(fp) | |
143 | addcd 0,-4(fp) | |
144 | movl -8(fp),f2 ; f2 = f2 + ulp | |
145 | L16: | |
146 | addl f4,f2 ; y = y + t | |
147 | subd 0x100000,r5 ; scalx = scalx - 1 | |
148 | L2: | |
149 | movl f2,-8(fp) | |
150 | addd r5,-4(fp) | |
151 | movl -8(fp),f0 | |
152 | lfsr r1 | |
153 | L1: | |
154 | movl tos,f6 | |
155 | movl tos,f4 | |
156 | exit [r3,r4,r5,r6,r7] | |
157 | ret 0 | |
158 | .data | |
159 | L28: .byte 64,40,35,41,115,113,114,116,46,99 | |
160 | .byte 9,49,46,49,32,40,117,99,98,46 | |
161 | .byte 101,108,101,102,117,110,116,41,32,57 | |
162 | .byte 47,49,57,47,56,53,0 | |
163 | L29: .blkb 4 | |
164 | .double 1204 | |
165 | .double 3062 | |
166 | .double 5746 | |
167 | .double 9193 | |
168 | .double 13348 | |
169 | .double 18162 | |
170 | .double 23592 | |
171 | .double 29598 | |
172 | .double 36145 | |
173 | .double 43202 | |
174 | .double 50740 | |
175 | .double 58733 | |
176 | .double 67158 | |
177 | .double 75992 | |
178 | .double 85215 | |
179 | .double 83599 | |
180 | .double 71378 | |
181 | .double 60428 | |
182 | .double 50647 | |
183 | .double 41945 | |
184 | .double 34246 | |
185 | .double 27478 | |
186 | .double 21581 | |
187 | .double 16499 | |
188 | .double 12183 | |
189 | .double 8588 | |
190 | .double 5674 | |
191 | .double 3403 | |
192 | .double 1742 | |
193 | .double 661 | |
194 | .double 130 | |
195 | L30: .blkb 4 | |
196 | .double 1129316352 ;L30: .double 0,0x43500000 | |
197 | L31: .blkb 4 | |
198 | .double 0x1ff00000 | |
199 | L32: .blkb 4 | |
200 | .double 0x5ff00000 |