Commit | Line | Data |
---|---|---|
f24c459c JH |
1 | /* |
2 | * poly_l2.c | |
3 | * | |
4 | * Compute the base 2 log of a FPU_REG, using a polynomial approximation. | |
5 | * | |
6 | * | |
7 | * Copyright (C) 1992, 1993 W. Metzenthen, 22 Parker St, Ormond, | |
8 | * Vic 3163, Australia. | |
9 | * E-mail apm233m@vaxc.cc.monash.edu.au | |
10 | * All rights reserved. | |
11 | * | |
12 | * This copyright notice covers the redistribution and use of the | |
13 | * FPU emulator developed by W. Metzenthen. It covers only its use | |
14 | * in the 386BSD operating system. Any other use is not permitted | |
15 | * under this copyright. | |
16 | * | |
17 | * Redistribution and use in source and binary forms, with or without | |
18 | * modification, are permitted provided that the following conditions | |
19 | * are met: | |
20 | * 1. Redistributions of source code must retain the above copyright | |
21 | * notice, this list of conditions and the following disclaimer. | |
22 | * 2. Redistributions in binary form must include information specifying | |
23 | * that source code for the emulator is freely available and include | |
24 | * either: | |
25 | * a) an offer to provide the source code for a nominal distribution | |
26 | * fee, or | |
27 | * b) list at least two alternative methods whereby the source | |
28 | * can be obtained, e.g. a publically accessible bulletin board | |
29 | * and an anonymous ftp site from which the software can be | |
30 | * downloaded. | |
31 | * 3. All advertising materials specifically mentioning features or use of | |
32 | * this emulator must acknowledge that it was developed by W. Metzenthen. | |
33 | * 4. The name of W. Metzenthen may not be used to endorse or promote | |
34 | * products derived from this software without specific prior written | |
35 | * permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
38 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
39 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | |
40 | * W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
41 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
42 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
43 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
44 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
45 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
46 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
47 | * | |
48 | */ | |
49 | ||
50 | ||
51 | #include "exception.h" | |
52 | #include "reg_constant.h" | |
53 | #include "fpu_emu.h" | |
54 | #include "control_w.h" | |
55 | ||
56 | ||
57 | ||
58 | #define HIPOWER 9 | |
59 | static unsigned short lterms[HIPOWER][4] = | |
60 | { | |
61 | /* Ideal computation with these coeffs gives about 64.6 bit rel | |
62 | * accuracy. */ | |
63 | {0xe177, 0xb82f, 0x7652, 0x7154}, | |
64 | {0xee0f, 0xe80f, 0x2770, 0x7b1c}, | |
65 | {0x0fc0, 0xbe87, 0xb143, 0x49dd}, | |
66 | {0x78b9, 0xdadd, 0xec54, 0x34c2}, | |
67 | {0x003a, 0x5de9, 0x628b, 0x2909}, | |
68 | {0x5588, 0xed16, 0x4abf, 0x2193}, | |
69 | {0xb461, 0x85f7, 0x347a, 0x1c6a}, | |
70 | {0x0975, 0x87b3, 0xd5bf, 0x1876}, | |
71 | {0xe85c, 0xcec9, 0x84e7, 0x187d} | |
72 | }; | |
73 | ||
74 | ||
75 | ||
76 | ||
77 | /*--- poly_l2() -------------------------------------------------------------+ | |
78 | | Base 2 logarithm by a polynomial approximation. | | |
79 | +---------------------------------------------------------------------------*/ | |
80 | void | |
81 | poly_l2(FPU_REG * arg, FPU_REG * result) | |
82 | { | |
83 | short exponent; | |
84 | char zero; /* flag for an Xx == 0 */ | |
85 | unsigned short bits, shift; | |
86 | long long Xsq; | |
87 | FPU_REG accum, denom, num, Xx; | |
88 | ||
89 | ||
90 | exponent = arg->exp - EXP_BIAS; | |
91 | ||
92 | accum.tag = TW_Valid; /* set the tags to Valid */ | |
93 | ||
94 | if (arg->sigh > (unsigned) 0xb504f334) { | |
95 | /* This is good enough for the computation of the polynomial | |
96 | * sum, but actually results in a loss of precision for the | |
97 | * computation of Xx. This will matter only if exponent | |
98 | * becomes zero. */ | |
99 | exponent++; | |
100 | accum.sign = 1; /* sign to negative */ | |
101 | num.exp = EXP_BIAS; /* needed to prevent errors in div | |
102 | * routine */ | |
103 | reg_u_div(&CONST_1, arg, &num, FULL_PRECISION); | |
104 | } else { | |
105 | accum.sign = 0; /* set the sign to positive */ | |
106 | num.sigl = arg->sigl; /* copy the mantissa */ | |
107 | num.sigh = arg->sigh; | |
108 | } | |
109 | ||
110 | ||
111 | /* shift num left, lose the ms bit */ | |
112 | num.sigh <<= 1; | |
113 | if (num.sigl & 0x80000000) | |
114 | num.sigh |= 1; | |
115 | num.sigl <<= 1; | |
116 | ||
117 | denom.sigl = num.sigl; | |
118 | denom.sigh = num.sigh; | |
119 | poly_div4((long long *) &(denom.sigl)); | |
120 | denom.sigh += 0x80000000; /* set the msb */ | |
121 | Xx.exp = EXP_BIAS; /* needed to prevent errors in div routine */ | |
122 | reg_u_div(&num, &denom, &Xx, FULL_PRECISION); | |
123 | ||
124 | zero = !(Xx.sigh | Xx.sigl); | |
125 | ||
126 | mul64((long long *) &Xx.sigl, (long long *) &Xx.sigl, &Xsq); | |
127 | poly_div16(&Xsq); | |
128 | ||
129 | accum.exp = -1; /* exponent of accum */ | |
130 | ||
131 | /* Do the basic fixed point polynomial evaluation */ | |
132 | polynomial((unsigned *) &accum.sigl, (unsigned *) &Xsq, lterms, HIPOWER - 1); | |
133 | ||
134 | if (!exponent) { | |
135 | /* If the exponent is zero, then we would lose precision by | |
136 | * sticking to fixed point computation here */ | |
137 | /* We need to re-compute Xx because of loss of precision. */ | |
138 | FPU_REG lXx; | |
139 | char sign; | |
140 | ||
141 | sign = accum.sign; | |
142 | accum.sign = 0; | |
143 | ||
144 | /* make accum compatible and normalize */ | |
145 | accum.exp = EXP_BIAS + accum.exp; | |
146 | normalize(&accum); | |
147 | ||
148 | if (zero) { | |
149 | reg_move(&CONST_Z, result); | |
150 | } else { | |
151 | /* we need to re-compute lXx to better accuracy */ | |
152 | num.tag = TW_Valid; /* set the tags to Vaild */ | |
153 | num.sign = 0; /* set the sign to positive */ | |
154 | num.exp = EXP_BIAS - 1; | |
155 | if (sign) { | |
156 | /* The argument is of the form 1-x */ | |
157 | /* Use 1-1/(1-x) = x/(1-x) */ | |
158 | *((long long *) &num.sigl) = -*((long long *) &(arg->sigl)); | |
159 | normalize(&num); | |
160 | reg_div(&num, arg, &num, FULL_PRECISION); | |
161 | } else { | |
162 | normalize(&num); | |
163 | } | |
164 | ||
165 | denom.tag = TW_Valid; /* set the tags to Valid */ | |
166 | denom.sign = SIGN_POS; /* set the sign to positive */ | |
167 | denom.exp = EXP_BIAS; | |
168 | ||
169 | reg_div(&num, &denom, &lXx, FULL_PRECISION); | |
170 | ||
171 | reg_u_mul(&lXx, &accum, &accum, FULL_PRECISION); | |
172 | ||
173 | reg_u_add(&lXx, &accum, result, FULL_PRECISION); | |
174 | ||
175 | normalize(result); | |
176 | } | |
177 | ||
178 | result->sign = sign; | |
179 | return; | |
180 | } | |
181 | mul64((long long *) &accum.sigl, | |
182 | (long long *) &Xx.sigl, (long long *) &accum.sigl); | |
183 | ||
184 | *((long long *) (&accum.sigl)) += *((long long *) (&Xx.sigl)); | |
185 | ||
186 | if (Xx.sigh > accum.sigh) { | |
187 | /* There was an overflow */ | |
188 | ||
189 | poly_div2((long long *) &accum.sigl); | |
190 | accum.sigh |= 0x80000000; | |
191 | accum.exp++; | |
192 | } | |
193 | /* When we add the exponent to the accum result later, we will require | |
194 | * that their signs are the same. Here we ensure that this is so. */ | |
195 | if (exponent && ((exponent < 0) ^ (accum.sign))) { | |
196 | /* signs are different */ | |
197 | ||
198 | accum.sign = !accum.sign; | |
199 | ||
200 | /* An exceptional case is when accum is zero */ | |
201 | if (accum.sigl | accum.sigh) { | |
202 | /* find 1-accum */ | |
203 | /* Shift to get exponent == 0 */ | |
204 | if (accum.exp < 0) { | |
205 | poly_div2((long long *) &accum.sigl); | |
206 | accum.exp++; | |
207 | } | |
208 | /* Just negate, but throw away the sign */ | |
209 | *((long long *) &(accum.sigl)) = -*((long long *) &(accum.sigl)); | |
210 | if (exponent < 0) | |
211 | exponent++; | |
212 | else | |
213 | exponent--; | |
214 | } | |
215 | } | |
216 | shift = exponent >= 0 ? exponent : -exponent; | |
217 | bits = 0; | |
218 | if (shift) { | |
219 | if (accum.exp) { | |
220 | accum.exp++; | |
221 | poly_div2((long long *) &accum.sigl); | |
222 | } | |
223 | while (shift) { | |
224 | poly_div2((long long *) &accum.sigl); | |
225 | if (shift & 1) | |
226 | accum.sigh |= 0x80000000; | |
227 | shift >>= 1; | |
228 | bits++; | |
229 | } | |
230 | } | |
231 | /* Convert to 64 bit signed-compatible */ | |
232 | accum.exp += bits + EXP_BIAS - 1; | |
233 | ||
234 | reg_move(&accum, result); | |
235 | normalize(result); | |
236 | ||
237 | return; | |
238 | } | |
239 | ||
240 | ||
241 | /*--- poly_l2p1() -----------------------------------------------------------+ | |
242 | | Base 2 logarithm by a polynomial approximation. | | |
243 | | log2(x+1) | | |
244 | +---------------------------------------------------------------------------*/ | |
245 | int | |
246 | poly_l2p1(FPU_REG * arg, FPU_REG * result) | |
247 | { | |
248 | char sign = 0; | |
249 | long long Xsq; | |
250 | FPU_REG arg_pl1, denom, accum, local_arg, poly_arg; | |
251 | ||
252 | ||
253 | sign = arg->sign; | |
254 | ||
255 | reg_add(arg, &CONST_1, &arg_pl1, FULL_PRECISION); | |
256 | ||
257 | if ((arg_pl1.sign) | (arg_pl1.tag)) { /* We need a valid positive | |
258 | * number! */ | |
259 | return 1; | |
260 | } | |
261 | reg_add(&CONST_1, &arg_pl1, &denom, FULL_PRECISION); | |
262 | reg_div(arg, &denom, &local_arg, FULL_PRECISION); | |
263 | local_arg.sign = 0; /* Make the sign positive */ | |
264 | ||
265 | /* Now we need to check that |local_arg| is less than 3-2*sqrt(2) = | |
266 | * 0.17157.. = .0xafb0ccc0 * 2^-2 */ | |
267 | ||
268 | if (local_arg.exp >= EXP_BIAS - 3) { | |
269 | if ((local_arg.exp > EXP_BIAS - 3) || | |
270 | (local_arg.sigh > (unsigned) 0xafb0ccc0)) { | |
271 | /* The argument is large */ | |
272 | poly_l2(&arg_pl1, result); | |
273 | return 0; | |
274 | } | |
275 | } | |
276 | /* Make a copy of local_arg */ | |
277 | reg_move(&local_arg, &poly_arg); | |
278 | ||
279 | /* Get poly_arg bits aligned as required */ | |
280 | shrx((unsigned *) &(poly_arg.sigl), -(poly_arg.exp - EXP_BIAS + 3)); | |
281 | ||
282 | mul64((long long *) &(poly_arg.sigl), (long long *) &(poly_arg.sigl), &Xsq); | |
283 | poly_div16(&Xsq); | |
284 | ||
285 | /* Do the basic fixed point polynomial evaluation */ | |
286 | polynomial(&(accum.sigl), (unsigned *) &Xsq, lterms, HIPOWER - 1); | |
287 | ||
288 | accum.tag = TW_Valid; /* set the tags to Valid */ | |
289 | accum.sign = SIGN_POS; /* and make accum positive */ | |
290 | ||
291 | /* make accum compatible and normalize */ | |
292 | accum.exp = EXP_BIAS - 1; | |
293 | normalize(&accum); | |
294 | ||
295 | reg_u_mul(&local_arg, &accum, &accum, FULL_PRECISION); | |
296 | ||
297 | reg_u_add(&local_arg, &accum, result, FULL_PRECISION); | |
298 | ||
299 | /* Multiply the result by 2 */ | |
300 | result->exp++; | |
301 | ||
302 | result->sign = sign; | |
303 | ||
304 | return 0; | |
305 | } |