don't copyright the sed script
[unix-history] / usr / src / lib / libm / national / sqrt.s
CommitLineData
bc0f2b0e
KB
1; Copyright (c) 1985 Regents of the University of California.
2; All rights reserved.
3;
2bc65d90 4; %sccs.include.redist.semicolon%
bc0f2b0e 5;
2bc65d90 6; @(#)sqrt.s 5.4 (Berkeley) %G%
a399f6c8 7;
bc0f2b0e 8
3cd28ebc
GK
9; double sqrt(x)
10; double x;
11; IEEE double precision sqrt
12; code in NSC assembly by K.C. Ng
13; 12/13/85
14;
15; Method:
16; Use Kahan's trick to get 8 bits initial approximation
17; by integer shift and add/subtract. Then three Newton
18; iterations to get down error to within one ulp. Finally
19; twiddle the last bit to make to correctly rounded
20; according to the rounding mode.
21;
22 .vers 2
23 .text
24 .align 2
25 .globl _sqrt
26_sqrt:
27 enter [r3,r4,r5,r6,r7],44
28 movl f4,tos
29 movl f6,tos
30 movd 2146435072,r2 ; r2 = 0x7ff00000
31 movl 8(fp),f0 ; f2 = x
32 movd 12(fp),r3 ; r3 = high part of x
33 movd r3,r4 ; make a copy of high part of x in r4
34 andd r2,r3 ; r3 become the bias exponent of x
35 cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN
36 bne L22
37 ; to see if x is INF
38 movd 8(fp),r0 ; r0 = low part of x
39 movd r4,r1 ; r1 is high part of x again
40 andd 0xfff00000,r1 ; mask off the sign and exponent of x
41 ord r0,r1 ; or with low part, if 0 then x is INF
42 cmpqd 0,r1 ;
43 bne L1 ; not 0; therefore x is NaN; return x.
44 cmpqd 0,r4 ; now x is Inf, is it +inf?
45 blt L1 ; +INF, return x
46 ; -INF, return NaN by doing 0/0
47nan: movl 0f0.0,f0 ;
48 divl f0,f0
49 br L1
50L22: ; now x is finite
51 cmpl 0f0.0,f0 ; x = 0 ?
52 beq L1 ; return x if x is +0 or -0
53 cmpqd 0,r4 ; Is x < 0 ?
54 bgt nan ; if x < 0 return NaN
55 movqd 0,r5 ; r5 == scalx initialize to zero
56 cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent)
57 bne L21 ; if x is normal, goto L21
58 movl L30,f2 ; f2 = 2**54
59 mull f2,f0 ; scale up x by 2**54
60 subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent
61L21:
62 ; now x is normal
63 ; notations:
64 ; r1 == copy of fsr
65 ; r2 == mask of e inexact enable flag
66 ; r3 == mask of i inexact flag
67 ; r4 == mask of r rounding mode
68 ; r5 == x's scale factor (already defined)
69
70 movd 0x20,r2
71 movd 0x40,r3
72 movd 0x180,r4
73 sfsr r0 ; store fsr to r0
74 movd r0,r1 ; make a copy of fsr to r1
75 bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest
76 lfsr r0
77
78 ; begin to compute sqrt(x)
79 movl f0,-8(fp)
80 movd -4(fp),r0 ; r0 the high part of modified x
81 lshd -1,r0 ; r0 >> 1
82 addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx.
83 movd r0,r6
84 lshd -13,r6 ; r6 = r0>>-15
85 andd 0x7c,r6 ; obtain 4*leading 5 bits of r0
86 addrd L29,r7 ; r7 = address of L29 = table[0]
87 addd r6,r7 ; r6 = address of L29[r6] = table[r6]
88 subd 0(r7),r0 ; r0 = r0 - table[r6]
89 movd r0,-4(fp)
90 movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits
91
92 movl 0f0.5,f6 ; f6 = 0.5
93 movl f0,f4
94 divl f2,f4 ; t = x/y
95 addl f4,f2 ; y = y + x/y
96 mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx.
97 movl f0,f4
98 divl f2,f4 ; t = x/y
99 addl f4,f2 ; y = y + x/y
100 mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx.
101 movl f0,f4
102 divl f2,f4 ; t = x/y
103 subl f2,f4 ; t = x/y - y
104 mull f6,f4 ; t = 0.5(x/y-y)
105 addl f4,f2 ; y = y + 0.5(x/y -y)
106 ; now y approx. sqrt(x) to within 1 ulp
107
108 ; twiddle last bit to force y correctly rounded
109 movd r1,r0 ; restore the old fsr
110 bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable
111 ord 0x80,r0 ; set rounding mode to round to zero
112 lfsr r0
113
114 movl f0,f4
115 divl f2,f4 ; f4 = x/y
116 sfsr r0
117 andd r3,r0 ; get the inexact flag
118 cmpqd 0,r0
119 bne L18
120 ; if x/y exact, then ...
121 cmpl f2,f4 ; if y == x/y
122 beq L2
123 movl f4,-8(fp)
124 subd 1,-8(fp)
125 subcd 0,-4(fp)
126 movl -8(fp),f4 ; f4 = f4 - ulp
127L18:
128 bicd [6],r1
129 ord r3,r1 ; set inexact flag in r1
130
131 andd r1,r4 ; r4 = the old rounding mode
132 cmpqd 0,r4 ; round to nearest?
133 bne L17
134 movl f4,-8(fp)
135 addd 1,-8(fp)
136 addcd 0,-4(fp)
137 movl -8(fp),f4 ; f4 = f4 + ulp
138 br L16
139L17:
140 cmpd 0x100,r4 ; round to positive inf ?
141 bne L16
142 movl f4,-8(fp)
143 addd 1,-8(fp)
144 addcd 0,-4(fp)
145 movl -8(fp),f4 ; f4 = f4 + ulp
146
147 movl f2,-8(fp)
148 addd 1,-8(fp)
149 addcd 0,-4(fp)
150 movl -8(fp),f2 ; f2 = f2 + ulp
151L16:
152 addl f4,f2 ; y = y + t
153 subd 0x100000,r5 ; scalx = scalx - 1
154L2:
155 movl f2,-8(fp)
156 addd r5,-4(fp)
157 movl -8(fp),f0
158 lfsr r1
159L1:
160 movl tos,f6
161 movl tos,f4
162 exit [r3,r4,r5,r6,r7]
163 ret 0
164 .data
165L28: .byte 64,40,35,41,115,113,114,116,46,99
166 .byte 9,49,46,49,32,40,117,99,98,46
167 .byte 101,108,101,102,117,110,116,41,32,57
168 .byte 47,49,57,47,56,53,0
169L29: .blkb 4
170 .double 1204
171 .double 3062
172 .double 5746
173 .double 9193
174 .double 13348
175 .double 18162
176 .double 23592
177 .double 29598
178 .double 36145
179 .double 43202
180 .double 50740
181 .double 58733
182 .double 67158
183 .double 75992
184 .double 85215
185 .double 83599
186 .double 71378
187 .double 60428
188 .double 50647
189 .double 41945
190 .double 34246
191 .double 27478
192 .double 21581
193 .double 16499
194 .double 12183
195 .double 8588
196 .double 5674
197 .double 3403
198 .double 1742
199 .double 661
200 .double 130
201L30: .blkb 4
202 .double 1129316352 ;L30: .double 0,0x43500000
203L31: .blkb 4
204 .double 0x1ff00000
205L32: .blkb 4
206 .double 0x5ff00000