Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: fpsim_n2.c | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | /**************************************************************************** | |
24 | * fpsim_n2.c -- Floating-point Simulation Library for SPARC (Niagara2) | |
25 | * | |
26 | * Author -- | |
27 | * Robert Rethemeyer - Sun Microsystems, Inc. | |
28 | * | |
29 | * Date -- | |
30 | * Aug 12, 2005 | |
31 | * | |
32 | * Design -- dynamically loaded shared object; compile with C or C++. | |
33 | * Models FP instruction behavior according to N2 PRM Appendix I. | |
34 | * The general strategy is to use the SPARC FP instructions | |
35 | * while filtering out the cases where N2 requires exceptions | |
36 | * but Solaris would simulate the operation (e.g. subnormals). | |
37 | * | |
38 | * (c) COPYRIGHT 2005-2006 Sun Microsystems, Inc. | |
39 | * Sun Confidential: Sun SSG Only | |
40 | ***************************************************************************/ | |
41 | ||
42 | /********************************************************************* | |
43 | * ATTENTION: This code is part of a library shared by multiple | |
44 | * projects. DO NOT MAKE CHANGES TO THIS CODE WITHIN YOUR PROJECT. | |
45 | * Instead, contact the owner/maintainer of the library, currently: | |
46 | * Robert.Rethemeyer@Sun.COM +1-408-616-5717 (x45717) | |
47 | * Systems Group: TVT: FrontEnd Technologies | |
48 | * The CVS source code repository for the library is at: | |
49 | * /import/ftap-blimp1/cvs/fpsim | |
50 | * DO NOT COMMIT CHANGES TO THAT REPOSITORY: contact the maintainer. | |
51 | ********************************************************************/ | |
52 | ||
53 | /*tab length=4*/ | |
54 | ||
55 | static const char cvsid[] = | |
56 | "$Id: fpsim_n2.c,v 1.6 2006/12/07 20:52:04 bobsmail Exp $"; | |
57 | ||
58 | ||
59 | #include "fpsim_support.h" | |
60 | #include "fpsim.h" | |
61 | ||
62 | ||
63 | #ifdef __cplusplus | |
64 | extern "C" { | |
65 | #endif | |
66 | static int dissect_double( double fpnum, fpdouble* data, int std, int* exc ); | |
67 | static int dissect_single( float fpnum, fpsingle* data, int std, int* exc ); | |
68 | static void overflow( void* rslt, int dbl, int rm, int si ); | |
69 | #ifdef __cplusplus | |
70 | } | |
71 | #endif | |
72 | ||
73 | #define FSR_NS 0x00400000 /*FSR nonstandard bit*/ | |
74 | #define GSR_IM 0x08000000 /*GSR interval mode bit*/ | |
75 | #define STDONLY 1 /*Standard mode only*/ | |
76 | #define STDMODE(FSR,GSR) ((((FSR>>22)^1)|(GSR>>27)) & 1) | |
77 | #define RNDMODE(FSR,GSR) (int)(((GSR & GSR_IM) ? (GSR>>25) : (FSR>>30)) & 3) | |
78 | ||
79 | /*===========================================================================*/ | |
80 | ||
81 | ||
82 | ||
83 | //------------------------------------------------------------------- | |
84 | // FADDD | |
85 | //------------------------------------------------------------------- | |
86 | int | |
87 | fpsim_faddd( const double* p_op1, const double* p_op2, double* p_res, | |
88 | uint64 p_fsr, uint64 p_gsr ) | |
89 | { | |
90 | fpdouble op1, op2, res; | |
91 | int stdmode = STDMODE(p_fsr,p_gsr); | |
92 | int scr[2], exc=0; | |
93 | ||
94 | int type1 = dissect_double(*p_op1, &op1, stdmode, &exc); | |
95 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc); | |
96 | ||
97 | if(type1 < fp_infinity | |
98 | && type2 < fp_infinity | |
99 | &&(fp_subnormal==type1 || fp_subnormal==type2)) | |
100 | { | |
101 | return FPX_UN; | |
102 | } | |
103 | if(type1>=fp_infinity || type2>=fp_infinity) exc = 0; | |
104 | ||
105 | double result; | |
106 | int rnd = RNDMODE(p_fsr,p_gsr); | |
107 | exc |= asm_faddd(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
108 | ||
109 | int rtype = dissect_double(result, &res, STDONLY, NULL); | |
110 | if(fp_subnormal == rtype | |
111 | || (exc & FPX_UF) ) | |
112 | { | |
113 | if(stdmode) return FPX_UN; | |
114 | //else gross underflow, zero result | |
115 | res.fp.inte = (uint64)res.sign << 63; | |
116 | exc = FPX_UF|FPX_NX; | |
117 | } | |
118 | *p_res = res.fp.num; | |
119 | return exc; | |
120 | } | |
121 | ||
122 | ||
123 | ||
124 | //------------------------------------------------------------------- | |
125 | // FSUBD | |
126 | //------------------------------------------------------------------- | |
127 | int | |
128 | fpsim_fsubd( const double* p_op1, const double* p_op2, double* p_res, | |
129 | uint64 p_fsr, uint64 p_gsr ) | |
130 | { | |
131 | fpdouble op1, op2, res; | |
132 | int stdmode = STDMODE(p_fsr,p_gsr); | |
133 | int scr[2], exc=0; | |
134 | ||
135 | int type1 = dissect_double(*p_op1, &op1, stdmode, &exc); | |
136 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc); | |
137 | ||
138 | if(type1 < fp_infinity | |
139 | && type2 < fp_infinity | |
140 | &&(fp_subnormal==type1 || fp_subnormal==type2)) | |
141 | { | |
142 | return FPX_UN; | |
143 | } | |
144 | if(type1>=fp_infinity || type2>=fp_infinity) exc = 0; | |
145 | ||
146 | double result; | |
147 | int rnd = RNDMODE(p_fsr,p_gsr); | |
148 | exc |= asm_fsubd(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
149 | ||
150 | int rtype = dissect_double(result, &res, STDONLY, NULL); | |
151 | if(fp_subnormal == rtype | |
152 | || (exc & FPX_UF) ) | |
153 | { | |
154 | if(stdmode) return FPX_UN; | |
155 | //else gross underflow, zero result | |
156 | res.fp.inte = (uint64)res.sign << 63; | |
157 | exc = FPX_UF|FPX_NX; | |
158 | } | |
159 | *p_res = res.fp.num; | |
160 | return exc; | |
161 | } | |
162 | ||
163 | ||
164 | ||
165 | //------------------------------------------------------------------- | |
166 | // FMULD | |
167 | //------------------------------------------------------------------- | |
168 | int | |
169 | fpsim_fmuld( const double* p_op1, const double* p_op2, double* p_res, | |
170 | uint64 p_fsr, uint64 p_gsr ) | |
171 | { | |
172 | fpdouble op1, op2, res; | |
173 | int stdmode = STDMODE(p_fsr,p_gsr); | |
174 | int scr[2], exc1=0, exc2=0; | |
175 | ||
176 | int type1 = dissect_double(*p_op1, &op1, stdmode, &exc1); | |
177 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc2); | |
178 | int exc = exc1|exc2; | |
179 | ||
180 | int er = op1.exp + op2.exp - 1023; | |
181 | int si = op1.sign ^ op2.sign; | |
182 | int rnd = RNDMODE(p_fsr,p_gsr); | |
183 | ||
184 | if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero) | |
185 | || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero)) | |
186 | { | |
187 | if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
188 | return FPX_UN; | |
189 | //else gross underflow, zero result | |
190 | res.fp.inte = (uint64)si << 63; | |
191 | *p_res = res.fp.num; | |
192 | return FPX_UF|FPX_NX; | |
193 | } | |
194 | // some operands preclude setting NX for flushed subnormal | |
195 | if(type1>=fp_infinity | |
196 | || type2>=fp_infinity | |
197 | || (fp_zero==type1 && 0==exc1) // either op exactly zero? | |
198 | || (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX | |
199 | ||
200 | double result; | |
201 | exc |= asm_fmuld(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
202 | ||
203 | int rtype = dissect_double(result, &res, STDONLY, NULL); | |
204 | if(rtype <= fp_subnormal | |
205 | && type1 == fp_normal | |
206 | && type2 == fp_normal | |
207 | && er <= 0) //subnormal result? | |
208 | { | |
209 | if(stdmode) | |
210 | { | |
211 | if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
212 | return FPX_UN; | |
213 | } | |
214 | //else gross underflow, zero result | |
215 | res.fp.inte = (uint64)si << 63; | |
216 | exc = FPX_UF|FPX_NX; | |
217 | } | |
218 | *p_res = res.fp.num; | |
219 | return exc; | |
220 | } | |
221 | ||
222 | ||
223 | ||
224 | //------------------------------------------------------------------- | |
225 | // FDIVD | |
226 | //------------------------------------------------------------------- | |
227 | int | |
228 | fpsim_fdivd( const double* p_op1, const double* p_op2, double* p_res, | |
229 | uint64 p_fsr, uint64 p_gsr ) | |
230 | { | |
231 | fpdouble op1, op2, res; | |
232 | int stdmode = STDMODE(p_fsr,p_gsr); | |
233 | int scr[2], exc=0; | |
234 | ||
235 | int type1 = dissect_double(*p_op1, &op1, stdmode, &exc); | |
236 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc); | |
237 | ||
238 | int er = op1.exp - op2.exp + 1023 - 1; | |
239 | int si = op1.sign ^ op2.sign; | |
240 | int rnd = RNDMODE(p_fsr,p_gsr); | |
241 | ||
242 | if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero) | |
243 | || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero)) | |
244 | { | |
245 | int ef = er + 1; | |
246 | uint64 frac1 = op1.frac; | |
247 | uint64 frac2 = op2.frac; | |
248 | if(fp_normal == type1) frac1 |= 0x0010000000000000; | |
249 | if(fp_normal == type2) frac2 |= 0x0010000000000000; | |
250 | if(frac1 < frac2) ef--; | |
251 | /*** if(ef > 2046) */ | |
252 | if(ef > 2047) // **PRM error now frozen in HW** Metrax 109086 | |
253 | { | |
254 | overflow(p_res, 1, rnd, si); | |
255 | return FPX_OF|FPX_NX; | |
256 | } | |
257 | if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
258 | return FPX_UN; | |
259 | //else gross underflow, zero result | |
260 | res.fp.inte = (uint64)si << 63; | |
261 | *p_res = res.fp.num; | |
262 | return FPX_UF|FPX_NX; | |
263 | } | |
264 | // some operands preclude setting NX for flushed subnormal | |
265 | if(fp_zero==type2 //DZ cancels NX | |
266 | || type1>=fp_infinity | |
267 | || type2>=fp_infinity) exc = 0; | |
268 | ||
269 | double result; | |
270 | exc |= asm_fdivd(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
271 | ||
272 | int rtype = dissect_double(result, &res, STDONLY, NULL); | |
273 | if(rtype <= fp_subnormal | |
274 | && type1 == fp_normal | |
275 | && type2 == fp_normal | |
276 | && er <= 0) //subnormal result? | |
277 | { | |
278 | if(stdmode) | |
279 | { | |
280 | if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
281 | return FPX_UN; | |
282 | } | |
283 | //else gross underflow, zero result | |
284 | res.fp.inte = (uint64)si << 63; | |
285 | exc = FPX_UF|FPX_NX; | |
286 | } | |
287 | *p_res = res.fp.num; | |
288 | return exc; | |
289 | } | |
290 | ||
291 | ||
292 | ||
293 | //------------------------------------------------------------------- | |
294 | // FSQRTD | |
295 | //------------------------------------------------------------------- | |
296 | int | |
297 | fpsim_fsqrtd( const double* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr ) | |
298 | { | |
299 | fpdouble op2; | |
300 | int stdmode = STDMODE(p_fsr,p_gsr); | |
301 | int scr[2], exc=0; | |
302 | ||
303 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc); | |
304 | ||
305 | if(stdmode | |
306 | && (fp_subnormal==type2) | |
307 | && (0==op2.sign)) | |
308 | { | |
309 | return FPX_UN; | |
310 | } | |
311 | ||
312 | int rnd = RNDMODE(p_fsr,p_gsr); | |
313 | exc |= asm_fsqrtd(&op2.fp.num, p_res, rnd, scr); | |
314 | return exc; | |
315 | } | |
316 | ||
317 | ||
318 | ||
319 | //------------------------------------------------------------------- | |
320 | // FADDS | |
321 | //------------------------------------------------------------------- | |
322 | int | |
323 | fpsim_fadds( const float* p_op1, const float* p_op2, float* p_res, | |
324 | uint64 p_fsr, uint64 p_gsr ) | |
325 | { | |
326 | fpsingle op1, op2, res; | |
327 | int stdmode = STDMODE(p_fsr,p_gsr); | |
328 | int scr[2], exc=0; | |
329 | ||
330 | int type1 = dissect_single(*p_op1, &op1, stdmode, &exc); | |
331 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc); | |
332 | ||
333 | if(type1 < fp_infinity | |
334 | && type2 < fp_infinity | |
335 | &&(fp_subnormal==type1 || fp_subnormal==type2)) | |
336 | { | |
337 | return FPX_UN; | |
338 | } | |
339 | if(type1>=fp_infinity || type2>=fp_infinity) exc = 0; | |
340 | ||
341 | float result; | |
342 | int rnd = RNDMODE(p_fsr,p_gsr); | |
343 | exc |= asm_fadds(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
344 | ||
345 | int rtype = dissect_single(result, &res, STDONLY, NULL); | |
346 | if(fp_subnormal == rtype | |
347 | || (exc & FPX_UF) ) | |
348 | { | |
349 | if(stdmode) return FPX_UN; | |
350 | //else gross underflow, zero result | |
351 | res.fp.inte = res.sign << 31; | |
352 | exc = FPX_UF|FPX_NX; | |
353 | } | |
354 | *p_res = res.fp.num; | |
355 | return exc; | |
356 | } | |
357 | ||
358 | ||
359 | ||
360 | //------------------------------------------------------------------- | |
361 | // FSUBS | |
362 | //------------------------------------------------------------------- | |
363 | int | |
364 | fpsim_fsubs( const float* p_op1, const float* p_op2, float* p_res, | |
365 | uint64 p_fsr, uint64 p_gsr ) | |
366 | { | |
367 | fpsingle op1, op2, res; | |
368 | int stdmode = STDMODE(p_fsr,p_gsr); | |
369 | int scr[2], exc=0; | |
370 | ||
371 | int type1 = dissect_single(*p_op1, &op1, stdmode, &exc); | |
372 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc); | |
373 | ||
374 | if(type1 < fp_infinity | |
375 | && type2 < fp_infinity | |
376 | &&(fp_subnormal==type1 || fp_subnormal==type2)) | |
377 | { | |
378 | return FPX_UN; | |
379 | } | |
380 | if(type1>=fp_infinity || type2>=fp_infinity) exc = 0; | |
381 | ||
382 | float result; | |
383 | int rnd = RNDMODE(p_fsr,p_gsr); | |
384 | exc |= asm_fsubs(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
385 | ||
386 | int rtype = dissect_single(result, &res, STDONLY, NULL); | |
387 | if(fp_subnormal == rtype | |
388 | || (exc & FPX_UF) ) | |
389 | { | |
390 | if(stdmode) return FPX_UN; | |
391 | //else gross underflow, zero result | |
392 | res.fp.inte = res.sign << 31; | |
393 | exc = FPX_UF|FPX_NX; | |
394 | } | |
395 | *p_res = res.fp.num; | |
396 | return exc; | |
397 | } | |
398 | ||
399 | ||
400 | ||
401 | //------------------------------------------------------------------- | |
402 | // FMULS | |
403 | //------------------------------------------------------------------- | |
404 | int | |
405 | fpsim_fmuls( const float* p_op1, const float* p_op2, float* p_res, | |
406 | uint64 p_fsr, uint64 p_gsr ) | |
407 | { | |
408 | fpsingle op1, op2, res; | |
409 | int stdmode = STDMODE(p_fsr,p_gsr); | |
410 | int scr[2], exc1=0, exc2=0; | |
411 | ||
412 | int type1 = dissect_single(*p_op1, &op1, stdmode, &exc1); | |
413 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc2); | |
414 | int exc = exc1|exc2; | |
415 | ||
416 | int er = op1.exp + op2.exp - 127; | |
417 | int si = op1.sign ^ op2.sign; | |
418 | int rnd = RNDMODE(p_fsr,p_gsr); | |
419 | ||
420 | if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero) | |
421 | || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero)) | |
422 | { | |
423 | if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
424 | return FPX_UN; | |
425 | //else gross underflow, zero result | |
426 | res.fp.inte = (uint)si << 31; | |
427 | *p_res = res.fp.num; | |
428 | return FPX_UF|FPX_NX; | |
429 | } | |
430 | // some operands preclude setting NX for flushed subnormal | |
431 | if(type1>=fp_infinity | |
432 | || type2>=fp_infinity | |
433 | || (fp_zero==type1 && 0==exc1) // either op exactly zero? | |
434 | || (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX | |
435 | ||
436 | float result; | |
437 | exc |= asm_fmuls(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
438 | ||
439 | int rtype = dissect_single(result, &res, STDONLY, NULL); | |
440 | if(rtype <= fp_subnormal | |
441 | && type1 == fp_normal | |
442 | && type2 == fp_normal | |
443 | && er <= 0) //subnormal result? | |
444 | { | |
445 | if(stdmode) | |
446 | { | |
447 | if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
448 | return FPX_UN; | |
449 | } | |
450 | //else gross underflow, zero result | |
451 | res.fp.inte = (uint)si << 31; | |
452 | exc = FPX_UF|FPX_NX; | |
453 | } | |
454 | *p_res = res.fp.num; | |
455 | return exc; | |
456 | } | |
457 | ||
458 | ||
459 | ||
460 | //------------------------------------------------------------------- | |
461 | // FSMULD | |
462 | //------------------------------------------------------------------- | |
463 | int | |
464 | fpsim_fsmuld( const float* p_op1, const float* p_op2, double* p_res, | |
465 | uint64 p_fsr, uint64 p_gsr ) | |
466 | { | |
467 | fpsingle op1, op2; | |
468 | int stdmode = STDMODE(p_fsr,p_gsr); | |
469 | int scr[2], exc1=0, exc2=0; | |
470 | ||
471 | int type1 = dissect_single(*p_op1, &op1, stdmode, &exc1); | |
472 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc2); | |
473 | int exc = exc1|exc2; | |
474 | ||
475 | if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero) | |
476 | || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero)) | |
477 | { | |
478 | return FPX_UN; | |
479 | } | |
480 | // some operands preclude setting NX for flushed subnormal | |
481 | if(type1>=fp_infinity | |
482 | || type2>=fp_infinity | |
483 | || (fp_zero==type1 && 0==exc1) // either op exactly zero? | |
484 | || (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX | |
485 | ||
486 | exc |= asm_fsmuld(&op1.fp.num, &op2.fp.num, p_res, scr); | |
487 | return exc; | |
488 | } | |
489 | ||
490 | ||
491 | ||
492 | //------------------------------------------------------------------- | |
493 | // FDIVS | |
494 | //------------------------------------------------------------------- | |
495 | int | |
496 | fpsim_fdivs( const float* p_op1, const float* p_op2, float* p_res, | |
497 | uint64 p_fsr, uint64 p_gsr ) | |
498 | { | |
499 | fpsingle op1, op2, res; | |
500 | int stdmode = STDMODE(p_fsr,p_gsr); | |
501 | int scr[2], exc=0; | |
502 | ||
503 | int type1 = dissect_single(*p_op1, &op1, stdmode, &exc); | |
504 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc); | |
505 | ||
506 | int er = op1.exp - op2.exp + 127 - 1; | |
507 | int si = op1.sign ^ op2.sign; | |
508 | int rnd = RNDMODE(p_fsr,p_gsr); | |
509 | ||
510 | if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero) | |
511 | || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero)) | |
512 | { | |
513 | int ef = er + 1; | |
514 | int frac1 = op1.frac; | |
515 | int frac2 = op2.frac; | |
516 | if(fp_normal == type1) frac1 |= 0x800000; | |
517 | if(fp_normal == type2) frac2 |= 0x800000; | |
518 | if(frac1 < frac2) ef--; | |
519 | /*** if(ef > 254) */ | |
520 | if(ef > 255) // **PRM error now frozen in HW** Metrax 109086 | |
521 | { | |
522 | overflow(p_res, 0, rnd, si); | |
523 | return FPX_OF|FPX_NX; | |
524 | } | |
525 | if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
526 | return FPX_UN; | |
527 | //else gross underflow, zero result | |
528 | res.fp.inte = (uint)si << 31; | |
529 | *p_res = res.fp.num; | |
530 | return FPX_UF|FPX_NX; | |
531 | } | |
532 | // some operands preclude setting NX for flushed subnormal | |
533 | if(fp_zero==type2 //DZ cancels NX | |
534 | || type1>=fp_infinity | |
535 | || type2>=fp_infinity) exc = 0; | |
536 | ||
537 | float result; | |
538 | exc |= asm_fdivs(&op1.fp.num, &op2.fp.num, &result, rnd, scr); | |
539 | ||
540 | int rtype = dissect_single(result, &res, STDONLY, NULL); | |
541 | if(rtype <= fp_subnormal | |
542 | && type1 == fp_normal | |
543 | && type2 == fp_normal | |
544 | && er <= 0) //subnormal result? | |
545 | { | |
546 | if(stdmode) | |
547 | { | |
548 | if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP))) | |
549 | return FPX_UN; | |
550 | } | |
551 | //else gross underflow, zero result | |
552 | res.fp.inte = (uint)si << 31; | |
553 | exc = FPX_UF|FPX_NX; | |
554 | } | |
555 | *p_res = res.fp.num; | |
556 | return exc; | |
557 | } | |
558 | ||
559 | ||
560 | ||
561 | //------------------------------------------------------------------- | |
562 | // FSQRTS | |
563 | //------------------------------------------------------------------- | |
564 | int | |
565 | fpsim_fsqrts( const float* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr ) | |
566 | { | |
567 | fpsingle op2; | |
568 | int stdmode = STDMODE(p_fsr,p_gsr); | |
569 | int scr[2], exc=0; | |
570 | ||
571 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc); | |
572 | ||
573 | if(stdmode | |
574 | && (fp_subnormal==type2) | |
575 | && (0==op2.sign)) | |
576 | { | |
577 | return FPX_UN; | |
578 | } | |
579 | ||
580 | int rnd = RNDMODE(p_fsr,p_gsr); | |
581 | exc |= asm_fsqrts(&op2.fp.num, p_res, rnd, scr); | |
582 | return exc; | |
583 | } | |
584 | ||
585 | ||
586 | ||
587 | //------------------------------------------------------------------- | |
588 | // FSTOD | |
589 | //------------------------------------------------------------------- | |
590 | int | |
591 | fpsim_fstod( const float* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr ) | |
592 | { | |
593 | fpsingle op2; | |
594 | int stdmode = STDMODE(p_fsr,p_gsr); | |
595 | int scr[2], exc=0; | |
596 | ||
597 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc); | |
598 | if(fp_subnormal==type2) return FPX_UN; | |
599 | ||
600 | int rnd = RNDMODE(p_fsr,p_gsr); | |
601 | exc |= asm_fstod(&op2.fp.num, p_res, rnd, scr); | |
602 | return exc; | |
603 | } | |
604 | ||
605 | ||
606 | ||
607 | //------------------------------------------------------------------- | |
608 | // FDTOS | |
609 | //------------------------------------------------------------------- | |
610 | int | |
611 | fpsim_fdtos( const double* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr ) | |
612 | { | |
613 | fpdouble op2; | |
614 | fpsingle res; | |
615 | int stdmode = STDMODE(p_fsr,p_gsr); | |
616 | int scr[2], exc=0; | |
617 | ||
618 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc); | |
619 | int er = op2.exp - 1023 + 127; | |
620 | int rnd = RNDMODE(p_fsr,p_gsr); | |
621 | ||
622 | if(fp_subnormal==type2) | |
623 | { | |
624 | if(op2.sign ? (rnd==FP_RM) : (rnd==FP_RP)) return FPX_UN; | |
625 | //else gross underflow, zero result | |
626 | res.fp.inte = (uint)op2.sign << 31; | |
627 | *p_res = res.fp.num; | |
628 | return FPX_UF|FPX_NX; | |
629 | } | |
630 | ||
631 | float result; | |
632 | exc |= asm_fdtos(&op2.fp.num, &result, rnd, scr); | |
633 | ||
634 | int rtype = dissect_single(result, &res, STDONLY, NULL); | |
635 | if(rtype <= fp_subnormal | |
636 | && type2 == fp_normal | |
637 | && er <= 0) //subnormal result? | |
638 | { | |
639 | if(stdmode) | |
640 | { | |
641 | if((er > -25) || (res.sign ? (rnd==FP_RM) : (rnd==FP_RP))) | |
642 | return FPX_UN; | |
643 | } | |
644 | //else gross underflow, zero result | |
645 | res.fp.inte = (uint)res.sign << 31; | |
646 | exc = FPX_UF|FPX_NX; | |
647 | } | |
648 | *p_res = res.fp.num; | |
649 | return exc; | |
650 | } | |
651 | ||
652 | ||
653 | ||
654 | //------------------------------------------------------------------- | |
655 | // FSTOX | |
656 | //------------------------------------------------------------------- | |
657 | int | |
658 | fpsim_fstox( const float* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr ) | |
659 | { | |
660 | fpsingle op2; | |
661 | int stdmode = STDMODE(p_fsr,p_gsr); | |
662 | int scr[2], exc=0; | |
663 | ||
664 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc); | |
665 | if(fp_subnormal==type2) return FPX_UN; | |
666 | ||
667 | int rnd = RNDMODE(p_fsr,p_gsr); | |
668 | exc |= asm_fstox(&op2.fp.num, p_res, rnd, scr); | |
669 | return exc; | |
670 | } | |
671 | ||
672 | ||
673 | ||
674 | //------------------------------------------------------------------- | |
675 | // FDTOX | |
676 | //------------------------------------------------------------------- | |
677 | int | |
678 | fpsim_fdtox( const double* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr ) | |
679 | { | |
680 | fpdouble op2; | |
681 | int stdmode = STDMODE(p_fsr,p_gsr); | |
682 | int scr[2], exc=0; | |
683 | ||
684 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc); | |
685 | if(fp_subnormal==type2) return FPX_UN; | |
686 | ||
687 | int rnd = RNDMODE(p_fsr,p_gsr); | |
688 | exc |= asm_fdtox(&op2.fp.num, p_res, rnd, scr); | |
689 | return exc; | |
690 | } | |
691 | ||
692 | ||
693 | ||
694 | //------------------------------------------------------------------- | |
695 | // FSTOI | |
696 | //------------------------------------------------------------------- | |
697 | int | |
698 | fpsim_fstoi( const float* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr ) | |
699 | { | |
700 | fpsingle op2; | |
701 | int stdmode = STDMODE(p_fsr,p_gsr); | |
702 | int scr[2], exc=0; | |
703 | ||
704 | int type2 = dissect_single(*p_op2, &op2, stdmode, &exc); | |
705 | if(fp_subnormal==type2) return FPX_UN; | |
706 | ||
707 | int rnd = RNDMODE(p_fsr,p_gsr); | |
708 | exc |= asm_fstoi(&op2.fp.num, p_res, rnd, scr); | |
709 | return exc; | |
710 | } | |
711 | ||
712 | ||
713 | ||
714 | //------------------------------------------------------------------- | |
715 | // FDTOI | |
716 | //------------------------------------------------------------------- | |
717 | int | |
718 | fpsim_fdtoi( const double* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr ) | |
719 | { | |
720 | fpdouble op2; | |
721 | int stdmode = STDMODE(p_fsr,p_gsr); | |
722 | int scr[2], exc=0; | |
723 | ||
724 | int type2 = dissect_double(*p_op2, &op2, stdmode, &exc); | |
725 | if(fp_subnormal==type2) return FPX_UN; | |
726 | ||
727 | int rnd = RNDMODE(p_fsr,p_gsr); | |
728 | exc |= asm_fdtoi(&op2.fp.num, p_res, rnd, scr); | |
729 | return exc; | |
730 | } | |
731 | ||
732 | ||
733 | ||
734 | //------------------------------------------------------------------- | |
735 | // FXTOS | |
736 | //------------------------------------------------------------------- | |
737 | int | |
738 | fpsim_fxtos( const uint64* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr ) | |
739 | { | |
740 | int scr[2]; | |
741 | int rnd = RNDMODE(p_fsr,p_gsr); | |
742 | int exc = asm_fxtos(p_op2, p_res, rnd, scr); | |
743 | return exc; | |
744 | } | |
745 | ||
746 | ||
747 | ||
748 | //------------------------------------------------------------------- | |
749 | // FXTOD | |
750 | //------------------------------------------------------------------- | |
751 | int | |
752 | fpsim_fxtod( const uint64* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr ) | |
753 | { | |
754 | int scr[2]; | |
755 | int rnd = RNDMODE(p_fsr,p_gsr); | |
756 | int exc = asm_fxtod(p_op2, p_res, rnd, scr); | |
757 | return exc; | |
758 | } | |
759 | ||
760 | ||
761 | ||
762 | //------------------------------------------------------------------- | |
763 | // FITOS | |
764 | //------------------------------------------------------------------- | |
765 | int | |
766 | fpsim_fitos( const uint* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr ) | |
767 | { | |
768 | int scr[2]; | |
769 | int rnd = RNDMODE(p_fsr,p_gsr); | |
770 | int exc = asm_fitos(p_op2, p_res, rnd, scr); | |
771 | return exc; | |
772 | } | |
773 | ||
774 | ||
775 | ||
776 | //------------------------------------------------------------------- | |
777 | // FITOD | |
778 | //------------------------------------------------------------------- | |
779 | int | |
780 | fpsim_fitod( const uint* p_op2, double* p_res ) | |
781 | { | |
782 | asm_fitod(p_op2, p_res); | |
783 | return 0; | |
784 | } | |
785 | ||
786 | ||
787 | ||
788 | //------------------------------------------------------------------- | |
789 | // GSR_MASK | |
790 | //------------------------------------------------------------------- | |
791 | ||
792 | uint64 fpsim_gsr_mask(void) { return 0xFFFFFFFF0E0000FF; } | |
793 | ||
794 | ||
795 | //------------------------------------------------------------------- | |
796 | // UPDATE_FSR | |
797 | //------------------------------------------------------------------- | |
798 | ||
799 | /* Merges a FP exception returned by one of the instruction sim routines | |
800 | into the caller's FSR, and indicates whether the caller should | |
801 | post a trap (nonzero return value is trap-type code). | |
802 | */ | |
803 | ||
804 | int | |
805 | fpsim_update_fsr( int p_exc, uint64* p_fsr ) | |
806 | { | |
807 | int trap; | |
808 | uint64 fsr = *p_fsr; | |
809 | ||
810 | // non-IEEE trap? (unfinished or illegal) | |
811 | if(p_exc & FPX_TRAP) | |
812 | { | |
813 | trap = p_exc & 0xFF; // trap reason in bits 7:0 | |
814 | if(0x10 == trap) return trap; // illegal_instr: no fsr update | |
815 | ||
816 | // FP_other trap: ftt in reason code | |
817 | fsr = (fsr & ~0x1C000) | (trap << 14); | |
818 | trap = 0x22; | |
819 | } | |
820 | else // IEEE trap or completion | |
821 | { | |
822 | int taken = p_exc & (fsr>>23) & 0x1F; // mask with fsr.tem | |
823 | ||
824 | fsr &= ~0x1C01F; // clear old fsr.cexc and fsr.ftt | |
825 | ||
826 | // To Trap or Not To Trap? | |
827 | if(taken != 0) // trap taken? | |
828 | { | |
829 | // cancel NX bit for OF/UF trap | |
830 | if(taken & (FPX_OF|FPX_UF)) | |
831 | { | |
832 | taken &= ~FPX_NX; | |
833 | } | |
834 | fsr |= (taken | 0x04000); // set fsr.cexc only; fsr.ftt=1 | |
835 | trap = 0x21; // IEEE_754_exception | |
836 | } | |
837 | else // no trap | |
838 | { | |
839 | // set both cexc,aexc identically | |
840 | fsr |= (p_exc | (p_exc<<5)); | |
841 | trap = 0; | |
842 | } | |
843 | } | |
844 | *p_fsr = fsr; // update caller's FSR | |
845 | return trap; | |
846 | } | |
847 | ||
848 | ||
849 | ||
850 | /*===========================================================================*/ | |
851 | ||
852 | /* Dissection routines: picks apart the FP number into sign,exp,fraction | |
853 | and flushes subnormal numbers to zero in nonstandard mode. | |
854 | Returns FP number class: 0=zero 1=subnorm 2=normal 3=inf 4=qnan 5=snan | |
855 | */ | |
856 | ||
857 | static int | |
858 | dissect_double( double p_fpnum, fpdouble* p_data, int p_std, int* p_exc ) | |
859 | { | |
860 | p_data->fp.num = p_fpnum; | |
861 | p_data->sign = p_data->fp.inte >> 63; | |
862 | p_data->frac = p_data->fp.inte & 0x000FFFFFFFFFFFFF; | |
863 | p_data->exp = (p_data->fp.inte >> 52) & 0x7FF; | |
864 | ||
865 | // classify the number | |
866 | int fpclass = fp_normal; | |
867 | if(0 == p_data->exp) // exponent zero? | |
868 | { | |
869 | fpclass = (0 == p_data->frac) | |
870 | ? fp_zero //true | |
871 | : fp_subnormal; //false | |
872 | } | |
873 | else if(0x7FF == p_data->exp) // exponent all ones? | |
874 | { | |
875 | if(0 == p_data->frac) fpclass = fp_infinity; | |
876 | else | |
877 | { | |
878 | fpclass = (p_data->frac & 0x0008000000000000) //frac.msb==1? | |
879 | ? fp_quiet //true | |
880 | : fp_signaling; //false | |
881 | } | |
882 | } | |
883 | ||
884 | // nonstandard mode: flush subnormals to 0 | |
885 | if(0 == p_std | |
886 | && fp_subnormal == fpclass) | |
887 | { | |
888 | p_data->fp.inte = (uint64)p_data->sign << 63; | |
889 | p_data->frac = 0; | |
890 | fpclass = fp_zero; | |
891 | *p_exc = FPX_NX; | |
892 | } | |
893 | return fpclass; | |
894 | } | |
895 | ||
896 | static int | |
897 | dissect_single( float p_fpnum, fpsingle* p_data, int p_std, int* p_exc ) | |
898 | { | |
899 | p_data->fp.num = p_fpnum; | |
900 | p_data->sign = p_data->fp.inte >> 31; | |
901 | p_data->frac = p_data->fp.inte & 0x007FFFFF; | |
902 | p_data->exp = (p_data->fp.inte >> 23) & 0xFF; | |
903 | ||
904 | // classify the number | |
905 | int fpclass = fp_normal; | |
906 | if(0 == p_data->exp) // exponent zero? | |
907 | { | |
908 | fpclass = (0 == p_data->frac) | |
909 | ? fp_zero //true | |
910 | : fp_subnormal; //false | |
911 | } | |
912 | else if(0xFF == p_data->exp) // exponent all ones? | |
913 | { | |
914 | if(0 == p_data->frac) fpclass = fp_infinity; | |
915 | else | |
916 | { | |
917 | fpclass = (p_data->frac & 0x00400000) //frac.msb==1? | |
918 | ? fp_quiet //true | |
919 | : fp_signaling; //false | |
920 | } | |
921 | } | |
922 | ||
923 | // nonstandard mode: flush subnormals to 0 | |
924 | if(0 == p_std | |
925 | && fp_subnormal == fpclass) | |
926 | { | |
927 | p_data->fp.inte = p_data->sign << 31; | |
928 | p_data->frac = 0; | |
929 | fpclass = fp_zero; | |
930 | *p_exc = FPX_NX; | |
931 | } | |
932 | return fpclass; | |
933 | } | |
934 | ||
935 | ||
936 | /*=======================================================================*/ | |
937 | ||
938 | /* overflow routine: returns proper result for overflow in rounding mode | |
939 | */ | |
940 | ||
941 | static void | |
942 | overflow( void* p_res, int p_dbl, int p_rnd, int p_sign ) | |
943 | { | |
944 | enum { MAXV=0, INFV=1 } rtype; // result type: 1=infinity 0=max_value | |
945 | ||
946 | switch(p_rnd) //rounding mode? | |
947 | { | |
948 | case FP_RN: rtype = INFV; break; | |
949 | case FP_RZ: rtype = MAXV; break; | |
950 | case FP_RP: rtype = p_sign ? MAXV : INFV; break; | |
951 | case FP_RM: rtype = p_sign ? INFV : MAXV; break; | |
952 | } | |
953 | ||
954 | if(p_dbl) //double | |
955 | { | |
956 | uint64 d_res = rtype ? 0x7FF0000000000000 : 0x7FEFFFFFFFFFFFFF; | |
957 | *(uint64*)p_res = d_res | ((uint64)p_sign << 63); | |
958 | } | |
959 | else //single | |
960 | { | |
961 | uint s_res = rtype ? 0x7F800000 : 0x7F7FFFFF; | |
962 | *(uint*)p_res = s_res | (p_sign << 31); | |
963 | } | |
964 | } | |
965 | ||
966 | ||
967 | //------------------------------------------------------------------- | |
968 | // un-implemented ops return illegal_instruction trap if called: | |
969 | //------------------------------------------------------------------- | |
970 | ||
971 | int fpsim_fnaddd(const double* a,const double* b,double* c,uint64 d,uint64 e) | |
972 | {return FPX_ILL;} | |
973 | int fpsim_fnadds(const float* a,const float* b,float* c,uint64 d,uint64 e) | |
974 | {return FPX_ILL;} | |
975 | int fpsim_fnmuld(const double* a,const double* b,double* c,uint64 d,uint64 e) | |
976 | {return FPX_ILL;} | |
977 | int fpsim_fnmuls(const float* a,const float* b,float* c,uint64 d,uint64 e) | |
978 | {return FPX_ILL;} | |
979 | int fpsim_fnsmuld(const float* a,const float* b,double* c,uint64 d,uint64 e) | |
980 | {return FPX_ILL;} | |
981 | int fpsim_fhaddd(const double* a,const double* b,double* c,uint64 d,uint64 e, | |
982 | fpsim_fha_subtype f) {return FPX_ILL;} | |
983 | int fpsim_fhadds(const float* a,const float* b,float* c,uint64 d,uint64 e, | |
984 | fpsim_fha_subtype f) {return FPX_ILL;} | |
985 | int fpsim_fmaddd(const double* a,const double* b,const double* c,double* d, | |
986 | uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;} | |
987 | int fpsim_fmadds(const float* a,const float* b,const float* c,float* d, | |
988 | uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;} | |
989 | int fpsim_fumaddd(const double* a,const double* b,const double* c,double* d, | |
990 | uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;} | |
991 | int fpsim_fumadds(const float* a,const float* b,const float* c,float* d, | |
992 | uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;} | |
993 | ||
994 | ||
995 | /*===========================================================================*/ | |
996 | ||
997 | // Function pointer structure | |
998 | ||
999 | struct fpsim_functions fpsim_funclist = | |
1000 | { | |
1001 | fpsim_update_fsr, fpsim_gsr_mask, | |
1002 | ||
1003 | fpsim_faddd, fpsim_fsubd, fpsim_fmuld, fpsim_fdivd, fpsim_fsqrtd, | |
1004 | fpsim_fadds, fpsim_fsubs, fpsim_fmuls, fpsim_fdivs, fpsim_fsqrts, | |
1005 | fpsim_fsmuld, | |
1006 | ||
1007 | fpsim_fstod, fpsim_fdtos, fpsim_fstox, fpsim_fdtox, fpsim_fstoi, | |
1008 | fpsim_fdtoi, fpsim_fxtos, fpsim_fxtod, fpsim_fitos, fpsim_fitod, | |
1009 | ||
1010 | fpsim_fnaddd, fpsim_fnadds, fpsim_fnmuld, fpsim_fnmuls, fpsim_fnsmuld, | |
1011 | fpsim_fhaddd, fpsim_fhadds, | |
1012 | fpsim_fmaddd, fpsim_fmadds, fpsim_fumaddd, fpsim_fumadds | |
1013 | }; | |
1014 | ||
1015 | char fpsim_fp_model[] = "FPSIM Niagara2 (N2) " __DATE__ ; |