clean up awk syntax
[unix-history] / usr / src / lib / libm / national / sqrt.s
CommitLineData
3cd28ebc
GK
1; @(#)sqrt.s 1.1 (ucb.elefunt) %G%
2;
3; double sqrt(x)
4; double x;
5; IEEE double precision sqrt
6; code in NSC assembly by K.C. Ng
7; 12/13/85
8;
9; Method:
10; Use Kahan's trick to get 8 bits initial approximation
11; by integer shift and add/subtract. Then three Newton
12; iterations to get down error to within one ulp. Finally
13; twiddle the last bit to make to correctly rounded
14; according to the rounding mode.
15;
16 .vers 2
17 .text
18 .align 2
19 .globl _sqrt
20_sqrt:
21 enter [r3,r4,r5,r6,r7],44
22 movl f4,tos
23 movl f6,tos
24 movd 2146435072,r2 ; r2 = 0x7ff00000
25 movl 8(fp),f0 ; f2 = x
26 movd 12(fp),r3 ; r3 = high part of x
27 movd r3,r4 ; make a copy of high part of x in r4
28 andd r2,r3 ; r3 become the bias exponent of x
29 cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN
30 bne L22
31 ; to see if x is INF
32 movd 8(fp),r0 ; r0 = low part of x
33 movd r4,r1 ; r1 is high part of x again
34 andd 0xfff00000,r1 ; mask off the sign and exponent of x
35 ord r0,r1 ; or with low part, if 0 then x is INF
36 cmpqd 0,r1 ;
37 bne L1 ; not 0; therefore x is NaN; return x.
38 cmpqd 0,r4 ; now x is Inf, is it +inf?
39 blt L1 ; +INF, return x
40 ; -INF, return NaN by doing 0/0
41nan: movl 0f0.0,f0 ;
42 divl f0,f0
43 br L1
44L22: ; now x is finite
45 cmpl 0f0.0,f0 ; x = 0 ?
46 beq L1 ; return x if x is +0 or -0
47 cmpqd 0,r4 ; Is x < 0 ?
48 bgt nan ; if x < 0 return NaN
49 movqd 0,r5 ; r5 == scalx initialize to zero
50 cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent)
51 bne L21 ; if x is normal, goto L21
52 movl L30,f2 ; f2 = 2**54
53 mull f2,f0 ; scale up x by 2**54
54 subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent
55L21:
56 ; now x is normal
57 ; notations:
58 ; r1 == copy of fsr
59 ; r2 == mask of e inexact enable flag
60 ; r3 == mask of i inexact flag
61 ; r4 == mask of r rounding mode
62 ; r5 == x's scale factor (already defined)
63
64 movd 0x20,r2
65 movd 0x40,r3
66 movd 0x180,r4
67 sfsr r0 ; store fsr to r0
68 movd r0,r1 ; make a copy of fsr to r1
69 bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest
70 lfsr r0
71
72 ; begin to compute sqrt(x)
73 movl f0,-8(fp)
74 movd -4(fp),r0 ; r0 the high part of modified x
75 lshd -1,r0 ; r0 >> 1
76 addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx.
77 movd r0,r6
78 lshd -13,r6 ; r6 = r0>>-15
79 andd 0x7c,r6 ; obtain 4*leading 5 bits of r0
80 addrd L29,r7 ; r7 = address of L29 = table[0]
81 addd r6,r7 ; r6 = address of L29[r6] = table[r6]
82 subd 0(r7),r0 ; r0 = r0 - table[r6]
83 movd r0,-4(fp)
84 movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits
85
86 movl 0f0.5,f6 ; f6 = 0.5
87 movl f0,f4
88 divl f2,f4 ; t = x/y
89 addl f4,f2 ; y = y + x/y
90 mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx.
91 movl f0,f4
92 divl f2,f4 ; t = x/y
93 addl f4,f2 ; y = y + x/y
94 mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx.
95 movl f0,f4
96 divl f2,f4 ; t = x/y
97 subl f2,f4 ; t = x/y - y
98 mull f6,f4 ; t = 0.5(x/y-y)
99 addl f4,f2 ; y = y + 0.5(x/y -y)
100 ; now y approx. sqrt(x) to within 1 ulp
101
102 ; twiddle last bit to force y correctly rounded
103 movd r1,r0 ; restore the old fsr
104 bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable
105 ord 0x80,r0 ; set rounding mode to round to zero
106 lfsr r0
107
108 movl f0,f4
109 divl f2,f4 ; f4 = x/y
110 sfsr r0
111 andd r3,r0 ; get the inexact flag
112 cmpqd 0,r0
113 bne L18
114 ; if x/y exact, then ...
115 cmpl f2,f4 ; if y == x/y
116 beq L2
117 movl f4,-8(fp)
118 subd 1,-8(fp)
119 subcd 0,-4(fp)
120 movl -8(fp),f4 ; f4 = f4 - ulp
121L18:
122 bicd [6],r1
123 ord r3,r1 ; set inexact flag in r1
124
125 andd r1,r4 ; r4 = the old rounding mode
126 cmpqd 0,r4 ; round to nearest?
127 bne L17
128 movl f4,-8(fp)
129 addd 1,-8(fp)
130 addcd 0,-4(fp)
131 movl -8(fp),f4 ; f4 = f4 + ulp
132 br L16
133L17:
134 cmpd 0x100,r4 ; round to positive inf ?
135 bne L16
136 movl f4,-8(fp)
137 addd 1,-8(fp)
138 addcd 0,-4(fp)
139 movl -8(fp),f4 ; f4 = f4 + ulp
140
141 movl f2,-8(fp)
142 addd 1,-8(fp)
143 addcd 0,-4(fp)
144 movl -8(fp),f2 ; f2 = f2 + ulp
145L16:
146 addl f4,f2 ; y = y + t
147 subd 0x100000,r5 ; scalx = scalx - 1
148L2:
149 movl f2,-8(fp)
150 addd r5,-4(fp)
151 movl -8(fp),f0
152 lfsr r1
153L1:
154 movl tos,f6
155 movl tos,f4
156 exit [r3,r4,r5,r6,r7]
157 ret 0
158 .data
159L28: .byte 64,40,35,41,115,113,114,116,46,99
160 .byte 9,49,46,49,32,40,117,99,98,46
161 .byte 101,108,101,102,117,110,116,41,32,57
162 .byte 47,49,57,47,56,53,0
163L29: .blkb 4
164 .double 1204
165 .double 3062
166 .double 5746
167 .double 9193
168 .double 13348
169 .double 18162
170 .double 23592
171 .double 29598
172 .double 36145
173 .double 43202
174 .double 50740
175 .double 58733
176 .double 67158
177 .double 75992
178 .double 85215
179 .double 83599
180 .double 71378
181 .double 60428
182 .double 50647
183 .double 41945
184 .double 34246
185 .double 27478
186 .double 21581
187 .double 16499
188 .double 12183
189 .double 8588
190 .double 5674
191 .double 3403
192 .double 1742
193 .double 661
194 .double 130
195L30: .blkb 4
196 .double 1129316352 ;L30: .double 0,0x43500000
197L31: .blkb 4
198 .double 0x1ff00000
199L32: .blkb 4
200 .double 0x5ff00000