Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_Fpu.cc
CommitLineData
920dae64
AT
1// ========== Copyright Header Begin ==========================================
2//
3// OpenSPARC T2 Processor File: SS_Fpu.cc
4// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
5// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
6//
7// The above named program is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public
9// License version 2 as published by the Free Software Foundation.
10//
11// The above named program is distributed in the hope that it will be
12// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15//
16// You should have received a copy of the GNU General Public
17// License along with this work; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19//
20// ========== Copyright Header End ============================================
21/*
22===============================================================================
23
24This C source file is adapted from the SoftFloat IEC/IEEE Floating-point
25Arithmetic Package, Release 2a.
26
27Written by John R. Hauser. This work was made possible in part by the
28International Computer Science Institute, located at Suite 600, 1947 Center
29Street, Berkeley, California 94704. Funding was partially provided by the
30National Science Foundation under grant MIP-9311980. The original version
31of this code was written as part of a project to build a fixed-point vector
32processor in collaboration with the University of California at Berkeley,
33overseen by Profs. Nelson Morgan and John Wawrzynek. More information
34is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
35arithmetic/SoftFloat.html'.
36
37THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
38has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
39TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
40PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
41AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
42
43Derivative works are acceptable, even for commercial purposes, so long as
44(1) they include prominent notice that the work is derivative, and (2) they
45include prominent notice akin to these four paragraphs for those parts of
46this code that are retained.
47
48===============================================================================
49*/
50
51#include "SS_Fpu.h"
52
53SS_Fpu::SS_Fpu()/*{{{*/
54 :
55 float_rounding_mode(ROUND_NEAREST),
56 float_exception_flags(EXC_NONE),
57 float_partial_exception_flags(EXC_NONE),
58 float_round_needed(0),
59 float_detect_tininess(TINY_BEFORE_ROUNDING)
60{}
61/*}}}*/
62
63int32_t SS_Fpu::roundAndPackInt32( int zSign, uint64_t absZ )/*{{{*/
64{
65 /* -------------------------------------------------------------------------------
66Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
67and 7, and returns the properly rounded 32-bit integer corresponding to the
68input. If `zSign' is 1, the input is negated before being converted to an
69integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
70is simply rounded to an integer, with the inexact exception raised if the
71input cannot be represented exactly as an integer. However, if the fixed-
72point input is too large, the invalid exception is raised and the largest
73positive or negative integer is returned.
74------------------------------------------------------------------------------- */
75
76 int8_t roundingMode;
77 int roundNearestEven;
78 int8_t roundIncrement, roundBits;
79 int32_t z;
80
81 roundingMode = float_rounding_mode;
82 roundNearestEven = ( roundingMode == ROUND_NEAREST );
83 roundIncrement = 0x40;
84 if ( ! roundNearestEven ) {
85 if ( roundingMode == ROUND_TO_ZERO ) {
86 roundIncrement = 0;
87 }
88 else {
89 roundIncrement = 0x7F;
90 if ( zSign ) {
91 if ( roundingMode == ROUND_UP ) roundIncrement = 0;
92 }
93 else {
94 if ( roundingMode == ROUND_DOWN ) roundIncrement = 0;
95 }
96 }
97 }
98 roundBits = absZ & 0x7F;
99 absZ = ( absZ + roundIncrement )>>7;
100 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
101 z = (int32_t) absZ;
102 if ( zSign ) z = - z;
103 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
104 float_raise( EXC_INVALID );
105 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
106 }
107 if ( roundBits ) float_exception_flags = float_exception_flags | EXC_INEXACT;
108 return z;
109
110}
111/*}}}*/
112int64_t SS_Fpu::roundAndPackInt64( int zSign, uint64_t absZ0, uint64_t absZ1 )/*{{{*/
113{
114/* -------------------------------------------------------------------------------
115Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
116`absZ1', with binary point between bits 63 and 64 (between the input words),
117and returns the properly rounded 64-bit integer corresponding to the input.
118If `zSign' is 1, the input is negated before being converted to an integer.
119Ordinarily, the fixed-point input is simply rounded to an integer, with
120the inexact exception raised if the input cannot be represented exactly as
121an integer. However, if the fixed-point input is too large, the invalid
122exception is raised and the largest positive or negative integer is
123returned.
124------------------------------------------------------------------------------- */
125
126 int8_t roundingMode;
127 int roundNearestEven, increment;
128 int64_t z;
129
130 roundingMode = float_rounding_mode;
131 roundNearestEven = ( roundingMode == ROUND_NEAREST );
132 increment = ( (int64_t) absZ1 < 0 );
133 if ( ! roundNearestEven ) {
134 if ( roundingMode == ROUND_TO_ZERO ) {
135 increment = 0;
136 }
137 else {
138 if ( zSign ) {
139 increment = ( roundingMode == ROUND_DOWN ) && absZ1;
140 }
141 else {
142 increment = ( roundingMode == ROUND_UP ) && absZ1;
143 }
144 }
145 }
146 if ( increment ) {
147 ++absZ0;
148 if ( absZ0 == 0 ) goto overflow;
149 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
150 }
151 z = absZ0;
152 if ( zSign ) z = - z;
153 if ( z && ( ( z < 0 ) ^ zSign ) ) {
154 overflow:
155 float_raise( EXC_INVALID );
156 return
157 zSign ? (int64_t) 0x8000000000000000
158 : 0x7FFFFFFFFFFFFFFF ;
159 }
160 if ( absZ1 ) float_exception_flags = float_exception_flags | EXC_INEXACT;
161 return z;
162
163}
164/*}}}*/
165
166uint32_t SS_Fpu::roundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig )/*{{{*/
167{
168/* -------------------------------------------------------------------------------
169Takes an abstract floating-point value having sign `zSign', exponent `zExp',
170and significand `zSig', and returns the proper single-precision floating-
171point value corresponding to the abstract input. Ordinarily, the abstract
172value is simply rounded and packed into the single-precision format, with
173the inexact exception raised if the abstract input cannot be represented
174exactly. However, if the abstract value is too large, the overflow and
175inexact exceptions are raised and an infinity or maximal finite value is
176returned. If the abstract value is too small, the input value is rounded to
177a subnormal number, and the underflow and inexact exceptions are raised if
178the abstract input cannot be represented exactly as a subnormal single-
179precision floating-point number.
180 The input significand `zSig' has its binary point between bits 30
181and 29, which is 7 bits to the left of the usual location. This shifted
182significand must be normalized or smaller. If `zSig' is not normalized,
183`zExp' must be 0; in that case, the result returned is a subnormal number,
184and it must not require rounding. In the usual case that `zSig' is
185normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
186The handling of underflow and overflow follows the IEC/IEEE Standard for
187Binary Floating-Point Arithmetic.
188-------------------------------------------------------------------------------
189*/
190 int8_t roundingMode;
191 int roundNearestEven;
192 int8_t roundIncrement, roundBits;
193 int isTiny;
194
195 roundingMode = float_rounding_mode;
196 roundNearestEven = ( roundingMode == ROUND_NEAREST );
197 roundIncrement = 0x40;
198 if ( ! roundNearestEven ) {
199 if ( roundingMode == ROUND_TO_ZERO ) {
200 roundIncrement = 0;
201 }
202 else {
203 roundIncrement = 0x7F;
204 if ( zSign ) {
205 if ( roundingMode == ROUND_UP ) roundIncrement = 0;
206 }
207 else {
208 if ( roundingMode == ROUND_DOWN ) roundIncrement = 0;
209 }
210 }
211 }
212 roundBits = zSig & 0x7F;
213 if ( 0xFD <= (uint16_t) zExp ) {
214 if ( ( 0xFD < zExp )
215 || ( ( zExp == 0xFD )
216 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
217 ) {
218 float_raise( EXC_OVERFLOW | EXC_INEXACT );
219 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
220 }
221 if ( zExp < 0 ) {
222 isTiny =
223 ( float_detect_tininess == TINY_BEFORE_ROUNDING )
224 || ( zExp < -1 )
225 || ( zSig + roundIncrement < 0x80000000 );
226 shift32RightJamming( zSig, - zExp, &zSig );
227 zExp = 0;
228 roundBits = zSig & 0x7F;
229 if ( isTiny && roundBits ) float_raise( EXC_UNDERFLOW );
230 }
231 }
232 if ( roundBits ) float_exception_flags = float_exception_flags | EXC_INEXACT;
233 zSig = ( zSig + roundIncrement )>>7;
234 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
235 if ( zSig == 0 ) zExp = 0;
236 return packFloat32( zSign, zExp, zSig );
237
238}
239/*}}}*/
240uint64_t SS_Fpu::roundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig )/*{{{*/
241{
242/* -------------------------------------------------------------------------------
243Takes an abstract floating-point value having sign `zSign', exponent `zExp',
244and significand `zSig', and returns the proper double-precision floating-
245point value corresponding to the abstract input. Ordinarily, the abstract
246value is simply rounded and packed into the double-precision format, with
247the inexact exception raised if the abstract input cannot be represented
248exactly. However, if the abstract value is too large, the overflow and
249inexact exceptions are raised and an infinity or maximal finite value is
250returned. If the abstract value is too small, the input value is rounded to
251a subnormal number, and the underflow and inexact exceptions are raised if
252the abstract input cannot be represented exactly as a subnormal double-
253precision floating-point number.
254 The input significand `zSig' has its binary point between bits 62
255and 61, which is 10 bits to the left of the usual location. This shifted
256significand must be normalized or smaller. If `zSig' is not normalized,
257`zExp' must be 0; in that case, the result returned is a subnormal number,
258and it must not require rounding. In the usual case that `zSig' is
259normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
260The handling of underflow and overflow follows the IEC/IEEE Standard for
261Binary Floating-Point Arithmetic.
262-------------------------------------------------------------------------------
263*/
264 int8_t roundingMode;
265 int roundNearestEven;
266 int16_t roundIncrement, roundBits;
267 int isTiny;
268
269 roundingMode = float_rounding_mode;
270 roundNearestEven = ( roundingMode == ROUND_NEAREST );
271 roundIncrement = 0x200;
272 if ( ! roundNearestEven ) {
273 if ( roundingMode == ROUND_TO_ZERO ) {
274 roundIncrement = 0;
275 }
276 else {
277 roundIncrement = 0x3FF;
278 if ( zSign ) {
279 if ( roundingMode == ROUND_UP ) roundIncrement = 0;
280 }
281 else {
282 if ( roundingMode == ROUND_DOWN ) roundIncrement = 0;
283 }
284 }
285 }
286 roundBits = zSig & 0x3FF;
287 if ( 0x7FD <= (uint16_t) zExp ) {
288 if ( ( 0x7FD < zExp )
289 || ( ( zExp == 0x7FD )
290 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
291 ) {
292 float_raise( EXC_OVERFLOW | EXC_INEXACT );
293 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
294 }
295 if ( zExp < 0 ) {
296 isTiny =
297 ( float_detect_tininess == TINY_BEFORE_ROUNDING )
298 || ( zExp < -1 )
299 || ( zSig + roundIncrement < 0x8000000000000000 );
300 shift64RightJamming( zSig, - zExp, &zSig );
301 zExp = 0;
302 roundBits = zSig & 0x3FF;
303 if ( isTiny && roundBits ) float_raise( EXC_UNDERFLOW );
304 }
305 }
306 if ( roundBits ) float_exception_flags = float_exception_flags | EXC_INEXACT;
307 zSig = ( zSig + roundIncrement )>>10;
308 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
309 if ( zSig == 0 ) zExp = 0;
310 return packFloat64( zSign, zExp, zSig );
311
312}
313/*}}}*/
314
315uint32_t SS_Fpu::normalizeRoundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig )/*{{{*/
316{
317/* -------------------------------------------------------------------------------
318Takes an abstract floating-point value having sign `zSign', exponent `zExp',
319and significand `zSig', and returns the proper single-precision floating-
320point value corresponding to the abstract input. This routine is just like
321`roundAndPackFloat32' except that `zSig' does not have to be normalized.
322Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
323floating-point exponent.
324------------------------------------------------------------------------------- */
325
326 int8_t shiftCount;
327
328 shiftCount = countLeadingZeros32( zSig ) - 1;
329 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
330
331}
332/*}}}*/
333uint64_t SS_Fpu::normalizeRoundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig )/*{{{*/
334{
335/* -------------------------------------------------------------------------------
336Takes an abstract floating-point value having sign `zSign', exponent `zExp',
337and significand `zSig', and returns the proper double-precision floating-
338point value corresponding to the abstract input. This routine is just like
339`roundAndPackFloat64' except that `zSig' does not have to be normalized.
340Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
341floating-point exponent.
342------------------------------------------------------------------------------- */
343
344 int8_t shiftCount;
345
346 shiftCount = countLeadingZeros64( zSig ) - 1;
347 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
348
349}
350/*}}}*/
351
352void SS_Fpu::normalizeFloat32Subnormal( uint32_t aSig, int16_t *zExpPtr, uint32_t *zSigPtr )/*{{{*/
353{
354/* -------------------------------------------------------------------------------
355Normalizes the subnormal single-precision floating-point value represented
356by the denormalized significand `aSig'. The normalized exponent and
357significand are stored at the locations pointed to by `zExpPtr' and
358`zSigPtr', respectively.
359------------------------------------------------------------------------------- */
360
361 int8_t shiftCount;
362
363 shiftCount = countLeadingZeros32( aSig ) - 8;
364 *zSigPtr = aSig<<shiftCount;
365 *zExpPtr = 1 - shiftCount;
366
367}
368/*}}}*/
369void SS_Fpu::normalizeFloat64Subnormal( uint64_t aSig, int16_t *zExpPtr, uint64_t *zSigPtr )/*{{{*/
370{
371/* -------------------------------------------------------------------------------
372Normalizes the subnormal double-precision floating-point value represented
373by the denormalized significand `aSig'. The normalized exponent and
374significand are stored at the locations pointed to by `zExpPtr' and
375`zSigPtr', respectively.
376------------------------------------------------------------------------------- */
377
378 int8_t shiftCount;
379
380 shiftCount = countLeadingZeros64( aSig ) - 11;
381 *zSigPtr = aSig<<shiftCount;
382 *zExpPtr = 1 - shiftCount;
383
384}
385/*}}}*/
386
387uint32_t SS_Fpu::addFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half )/*{{{*/
388{
389/* -------------------------------------------------------------------------------
390Returns the result of adding the absolute values of the single-precision
391floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
392before being returned. `zSign' is ignored if the result is a NaN.
393The addition is performed according to the IEC/IEEE Standard for Binary
394Floating-Point Arithmetic.
395------------------------------------------------------------------------------- */
396 int16_t aExp, bExp, zExp;
397 uint32_t aSig, bSig, zSig;
398 int16_t expDiff;
399
400 aSig = extractFloat32Frac( a );
401 aExp = extractFloat32Exp( a );
402 bSig = extractFloat32Frac( b );
403 bExp = extractFloat32Exp( b );
404 expDiff = aExp - bExp;
405 aSig <<= 6;
406 bSig <<= 6;
407 if ( 0 < expDiff ) {
408 if ( aExp == 0xFF ) {
409 if ( aSig ) return propagateFloat32NaN( a, b );
410 return packFloat32( zSign, 0xFF, 0 );
411 }
412 if ( bExp == 0 ) {
413 --expDiff;
414 }
415 else {
416 bSig |= 0x20000000;
417 }
418 shift32RightJamming( bSig, expDiff, &bSig );
419 zExp = aExp;
420 }
421 else if ( expDiff < 0 ) {
422 if ( bExp == 0xFF ) {
423 if ( bSig ) return propagateFloat32NaN( a, b );
424 return packFloat32( zSign, 0xFF, 0 );
425 }
426 if ( aExp == 0 ) {
427 ++expDiff;
428 }
429 else {
430 aSig |= 0x20000000;
431 }
432 shift32RightJamming( aSig, - expDiff, &aSig );
433 zExp = bExp;
434 }
435 else {
436 if ( aExp == 0xFF ) {
437 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
438 return packFloat32( zSign, 0xFF, 0 );
439 }
440 if ( aExp == 0 ) {
441 if (half) {
442 zSig = aSig + bSig;
443 if (zSig) {
444 int8_t shiftCount = countLeadingZeros32(zSig) - 1;
445 zSig <<= shiftCount;
446 aExp -= shiftCount;
447 return roundAndPackFloat32( zSign, aExp, zSig );
448 }
449 else
450 return packFloat32( zSign, 0, 0 );
451 }
452 else
453 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
454 }
455 zSig = 0x40000000 + aSig + bSig;
456 zExp = aExp;
457 goto roundAndPack;
458 }
459 aSig |= 0x20000000;
460 zSig = ( aSig + bSig )<<1;
461 --zExp;
462 if ( (int32_t) zSig < 0 ) {
463 zSig = aSig + bSig;
464 ++zExp;
465 }
466 roundAndPack:
467 if (half)
468 --zExp;
469 return roundAndPackFloat32( zSign, zExp, zSig );
470
471}
472/*}}}*/
473uint32_t SS_Fpu::subFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half )/*{{{*/
474{
475/* -------------------------------------------------------------------------------
476Returns the result of subtracting the absolute values of the single-
477precision floating-point values `a' and `b'. If `zSign' is 1, the
478difference is negated before being returned. `zSign' is ignored if the
479result is a NaN. The subtraction is performed according to the IEC/IEEE
480Standard for Binary Floating-Point Arithmetic.
481------------------------------------------------------------------------------- */
482 int16_t aExp, bExp, zExp;
483 uint32_t aSig, bSig, zSig;
484 int16_t expDiff;
485
486 aSig = extractFloat32Frac( a );
487 aExp = extractFloat32Exp( a );
488 bSig = extractFloat32Frac( b );
489 bExp = extractFloat32Exp( b );
490 expDiff = aExp - bExp;
491 aSig <<= 7;
492 bSig <<= 7;
493 if ( 0 < expDiff ) goto aExpBigger;
494 if ( expDiff < 0 ) goto bExpBigger;
495 if ( aExp == 0xFF ) {
496 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
497 float_raise( EXC_INVALID );
498 return 0x7FFFFFFF;
499 }
500 if ( aExp == 0 ) {
501 aExp = 1;
502 bExp = 1;
503 }
504 if ( bSig < aSig ) goto aBigger;
505 if ( aSig < bSig ) goto bBigger;
506 return packFloat32( float_rounding_mode == ROUND_DOWN, 0, 0 );
507 bExpBigger:
508 if ( bExp == 0xFF ) {
509 if ( bSig ) return propagateFloat32NaN( a, b );
510 return packFloat32( zSign ^ 1, 0xFF, 0 );
511 }
512 if ( aExp == 0 ) {
513 ++expDiff;
514 }
515 else {
516 aSig |= 0x40000000;
517 }
518 shift32RightJamming( aSig, - expDiff, &aSig );
519 bSig |= 0x40000000;
520 bBigger:
521 zSig = bSig - aSig;
522 zExp = bExp;
523 zSign ^= 1;
524 goto normalizeRoundAndPack;
525 aExpBigger:
526 if ( aExp == 0xFF ) {
527 if ( aSig ) return propagateFloat32NaN( a, b );
528 return packFloat32( zSign, 0xFF, 0 );
529 }
530 if ( bExp == 0 ) {
531 --expDiff;
532 }
533 else {
534 bSig |= 0x40000000;
535 }
536 shift32RightJamming( bSig, expDiff, &bSig );
537 aSig |= 0x40000000;
538 aBigger:
539 zSig = aSig - bSig;
540 zExp = aExp;
541 normalizeRoundAndPack:
542 if (half)
543 zExp -= 2;
544 else
545 --zExp;
546 return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
547
548}
549/*}}}*/
550uint64_t SS_Fpu::addFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half )/*{{{*/
551{
552/* -------------------------------------------------------------------------------
553Returns the result of adding the absolute values of the double-precision
554floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
555before being returned. `zSign' is ignored if the result is a NaN.
556The addition is performed according to the IEC/IEEE Standard for Binary
557Floating-Point Arithmetic.
558------------------------------------------------------------------------------- */
559 int16_t aExp, bExp, zExp;
560 uint64_t aSig, bSig, zSig;
561 int16_t expDiff;
562
563 aSig = extractFloat64Frac( a );
564 aExp = extractFloat64Exp( a );
565 bSig = extractFloat64Frac( b );
566 bExp = extractFloat64Exp( b );
567 expDiff = aExp - bExp;
568 aSig <<= 9;
569 bSig <<= 9;
570 if ( 0 < expDiff ) {
571 if ( aExp == 0x7FF ) {
572 if ( aSig ) return propagateFloat64NaN( a, b );
573 return packFloat64( zSign, 0x7FF, 0 );
574 }
575 if ( bExp == 0 ) {
576 --expDiff;
577 }
578 else {
579 bSig |= 0x2000000000000000 ;
580 }
581 shift64RightJamming( bSig, expDiff, &bSig );
582 zExp = aExp;
583 }
584 else if ( expDiff < 0 ) {
585 if ( bExp == 0x7FF ) {
586 if ( bSig ) return propagateFloat64NaN( a, b );
587 return packFloat64( zSign, 0x7FF, 0 );
588 }
589 if ( aExp == 0 ) {
590 ++expDiff;
591 }
592 else {
593 aSig |= 0x2000000000000000 ;
594 }
595 shift64RightJamming( aSig, - expDiff, &aSig );
596 zExp = bExp;
597 }
598 else {
599 if ( aExp == 0x7FF ) {
600 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
601 return packFloat64( zSign, 0x7FF, 0 );
602 }
603 if ( aExp == 0 ) {
604 if (half) {
605 zSig = aSig + bSig;
606 if (zSig) {
607 int8_t shiftCount = countLeadingZeros64(zSig) - 1;
608 zSig <<= shiftCount;
609 aExp -= shiftCount;
610 return roundAndPackFloat64( zSign, aExp, zSig );
611 }
612 else
613 return packFloat64( zSign, 0, 0 );
614 }
615 else
616 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
617 }
618 zSig = 0x4000000000000000 + aSig + bSig;
619 zExp = aExp;
620 goto roundAndPack;
621 }
622 aSig |= 0x2000000000000000 ;
623 zSig = ( aSig + bSig )<<1;
624 --zExp;
625 if ( (int64_t) zSig < 0 ) {
626 zSig = aSig + bSig;
627 ++zExp;
628 }
629 roundAndPack:
630 if (half)
631 --zExp;
632 return roundAndPackFloat64( zSign, zExp, zSig );
633
634}
635/*}}}*/
636uint64_t SS_Fpu::subFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half )/*{{{*/
637{
638/* -------------------------------------------------------------------------------
639Returns the result of subtracting the absolute values of the double-
640precision floating-point values `a' and `b'. If `zSign' is 1, the
641difference is negated before being returned. `zSign' is ignored if the
642result is a NaN. The subtraction is performed according to the IEC/IEEE
643Standard for Binary Floating-Point Arithmetic.
644------------------------------------------------------------------------------- */
645 int16_t aExp, bExp, zExp;
646 uint64_t aSig, bSig, zSig;
647 int16_t expDiff;
648
649 aSig = extractFloat64Frac( a );
650 aExp = extractFloat64Exp( a );
651 bSig = extractFloat64Frac( b );
652 bExp = extractFloat64Exp( b );
653 expDiff = aExp - bExp;
654 aSig <<= 10;
655 bSig <<= 10;
656 if ( 0 < expDiff ) goto aExpBigger;
657 if ( expDiff < 0 ) goto bExpBigger;
658 if ( aExp == 0x7FF ) {
659 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
660 float_raise( EXC_INVALID );
661 return 0x7FFFFFFFFFFFFFFF;
662 }
663 if ( aExp == 0 ) {
664 aExp = 1;
665 bExp = 1;
666 }
667 if ( bSig < aSig ) goto aBigger;
668 if ( aSig < bSig ) goto bBigger;
669 return packFloat64( float_rounding_mode == ROUND_DOWN, 0, 0 );
670 bExpBigger:
671 if ( bExp == 0x7FF ) {
672 if ( bSig ) return propagateFloat64NaN( a, b );
673 return packFloat64( zSign ^ 1, 0x7FF, 0 );
674 }
675 if ( aExp == 0 ) {
676 ++expDiff;
677 }
678 else {
679 aSig |= 0x4000000000000000 ;
680 }
681 shift64RightJamming( aSig, - expDiff, &aSig );
682 bSig |= 0x4000000000000000 ;
683 bBigger:
684 zSig = bSig - aSig;
685 zExp = bExp;
686 zSign ^= 1;
687 goto normalizeRoundAndPack;
688 aExpBigger:
689 if ( aExp == 0x7FF ) {
690 if ( aSig ) return propagateFloat64NaN( a, b );
691 return packFloat64( zSign, 0x7FF, 0 );
692 }
693 if ( bExp == 0 ) {
694 --expDiff;
695 }
696 else {
697 bSig |= 0x4000000000000000 ;
698 }
699 shift64RightJamming( bSig, expDiff, &bSig );
700 aSig |= 0x4000000000000000 ;
701 aBigger:
702 zSig = aSig - bSig;
703 zExp = aExp;
704 normalizeRoundAndPack:
705 if (half)
706 zExp -= 2;
707 else
708 --zExp;
709 return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
710
711}
712/*}}}*/
713
714uint32_t SS_Fpu::int32_to_float32( int32_t a )/*{{{*/
715{
716/* -------------------------------------------------------------------------------
717Returns the result of converting the 32-bit two's complement integer `a'
718to the single-precision floating-point format. The conversion is performed
719according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
720------------------------------------------------------------------------------- */
721 int zSign;
722
723 if ( a == 0 ) return 0;
724 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
725 zSign = ( a < 0 );
726 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
727
728}
729/*}}}*/
730uint64_t SS_Fpu::int32_to_float64( int32_t a )/*{{{*/
731{
732/* -------------------------------------------------------------------------------
733Returns the result of converting the 32-bit two's complement integer `a'
734to the double-precision floating-point format. The conversion is performed
735according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
736------------------------------------------------------------------------------- */
737 int zSign;
738 uint32_t absA;
739 int8_t shiftCount;
740 uint64_t zSig;
741
742 if ( a == 0 ) return 0;
743 zSign = ( a < 0 );
744 absA = zSign ? - a : a;
745 shiftCount = countLeadingZeros32( absA ) + 21;
746 zSig = absA;
747 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
748
749}
750/*}}}*/
751
752uint32_t SS_Fpu::int64_to_float32( int64_t a )/*{{{*/
753{
754/* -------------------------------------------------------------------------------
755Returns the result of converting the 64-bit two's complement integer `a'
756to the single-precision floating-point format. The conversion is performed
757according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
758------------------------------------------------------------------------------- */
759 int zSign;
760 uint64_t absA;
761 int8_t shiftCount;
762
763 if ( a == 0 ) return 0;
764 zSign = ( a < 0 );
765 absA = zSign ? - a : a;
766 shiftCount = countLeadingZeros64( absA ) - 40;
767 if ( 0 <= shiftCount ) {
768 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
769 }
770 else {
771 shiftCount += 7;
772 if ( shiftCount < 0 ) {
773 shift64RightJamming( absA, - shiftCount, &absA );
774 }
775 else {
776 absA <<= shiftCount;
777 }
778 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
779 }
780
781}
782/*}}}*/
783uint64_t SS_Fpu::int64_to_float64( int64_t a )/*{{{*/
784{
785/* -------------------------------------------------------------------------------
786Returns the result of converting the 64-bit two's complement integer `a'
787to the double-precision floating-point format. The conversion is performed
788according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
789------------------------------------------------------------------------------- */
790 int zSign;
791
792 if ( a == 0 ) return 0;
793 if ( a == (int64_t) 0x8000000000000000 ) {
794 return packFloat64( 1, 0x43E, 0 );
795 }
796 zSign = ( a < 0 );
797 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
798
799}
800/*}}}*/
801
802int32_t SS_Fpu::float32_to_int32( uint32_t a )/*{{{*/
803{
804/* -------------------------------------------------------------------------------
805Returns the result of converting the single-precision floating-point value
806`a' to the 32-bit two's complement integer format. The conversion is
807performed according to the IEC/IEEE Standard for Binary Floating-Point
808Arithmetic---which means in particular that the conversion is rounded
809according to the current rounding mode. If `a' is a NaN, the largest
810positive integer is returned. Otherwise, if the conversion overflows, the
811largest integer with the same sign as `a' is returned.
812------------------------------------------------------------------------------- */
813
814 int aSign;
815 int16_t aExp, shiftCount;
816 uint32_t aSig;
817 uint64_t aSig64;
818
819 aSig = extractFloat32Frac( a );
820 aExp = extractFloat32Exp( a );
821 aSign = extractFloat32Sign( a );
822 //if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
823 if ( aExp ) aSig |= 0x00800000;
824 shiftCount = 0xAF - aExp;
825 aSig64 = aSig;
826 aSig64 <<= 32;
827 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
828 return roundAndPackInt32( aSign, aSig64 );
829
830}
831/*}}}*/
832int64_t SS_Fpu::float32_to_int64( uint32_t a )/*{{{*/
833{
834/* -------------------------------------------------------------------------------
835Returns the result of converting the single-precision floating-point value
836`a' to the 64-bit two's complement integer format. The conversion is
837performed according to the IEC/IEEE Standard for Binary Floating-Point
838Arithmetic---which means in particular that the conversion is rounded
839according to the current rounding mode. If `a' is a NaN, the largest
840positive integer is returned. Otherwise, if the conversion overflows, the
841largest integer with the same sign as `a' is returned.
842------------------------------------------------------------------------------- */
843 int aSign;
844 int16_t aExp, shiftCount;
845 uint32_t aSig;
846 uint64_t aSig64, aSigExtra;
847
848 aSig = extractFloat32Frac( a );
849 aExp = extractFloat32Exp( a );
850 aSign = extractFloat32Sign( a );
851 shiftCount = 0xBE - aExp;
852 if ( shiftCount < 0 ) {
853 float_raise( EXC_INVALID );
854 if ( ! aSign ) {
855 return 0x7FFFFFFFFFFFFFFF ;
856 }
857 return (int64_t) 0x8000000000000000 ;
858 }
859 if ( aExp ) aSig |= 0x00800000;
860 aSig64 = aSig;
861 aSig64 <<= 40;
862 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
863 return roundAndPackInt64( aSign, aSig64, aSigExtra );
864
865}
866/*}}}*/
867uint64_t SS_Fpu::float32_to_float64( uint32_t a )/*{{{*/
868{
869/* -------------------------------------------------------------------------------
870Returns the result of converting the single-precision floating-point value
871`a' to the double-precision floating-point format. The conversion is
872performed according to the IEC/IEEE Standard for Binary Floating-Point
873Arithmetic.
874------------------------------------------------------------------------------- */
875 int aSign;
876 int16_t aExp;
877 uint32_t aSig;
878
879 aSig = extractFloat32Frac( a );
880 aExp = extractFloat32Exp( a );
881 aSign = extractFloat32Sign( a );
882 if ( aExp == 0xFF ) {
883 if ( aSig )
884 {
885 if ( float32_is_signaling_nan( a ) ) float_raise( EXC_INVALID );
886 return (((uint64_t)(a >> 31)) <<63 ) | 0x7FF8000000000000 | (uint64_t(aSig) << 29);
887 }
888 return packFloat64( aSign, 0x7FF, 0 );
889 }
890 if ( aExp == 0 ) {
891 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
892 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
893 --aExp;
894 }
895 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
896
897}
898/*}}}*/
899
900int32_t SS_Fpu::float64_to_int32( uint64_t a )/*{{{*/
901{
902/* -------------------------------------------------------------------------------
903Returns the result of converting the double-precision floating-point value
904`a' to the 32-bit two's complement integer format. The conversion is
905performed according to the IEC/IEEE Standard for Binary Floating-Point
906Arithmetic---which means in particular that the conversion is rounded
907according to the current rounding mode. If `a' is a NaN, the largest
908positive integer is returned. Otherwise, if the conversion overflows, the
909largest integer with the same sign as `a' is returned.
910------------------------------------------------------------------------------- */
911 int aSign;
912 int16_t aExp, shiftCount;
913 uint64_t aSig;
914
915 aSig = extractFloat64Frac( a );
916 aExp = extractFloat64Exp( a );
917 aSign = extractFloat64Sign( a );
918 //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
919 if ( aExp ) aSig |= 0x0010000000000000 ;
920 shiftCount = 0x42C - aExp;
921 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
922 return roundAndPackInt32( aSign, aSig );
923
924}
925/*}}}*/
926int64_t SS_Fpu::float64_to_int64( uint64_t a )/*{{{*/
927{
928/* -------------------------------------------------------------------------------
929Returns the result of converting the double-precision floating-point value
930`a' to the 64-bit two's complement integer format. The conversion is
931performed according to the IEC/IEEE Standard for Binary Floating-Point
932Arithmetic---which means in particular that the conversion is rounded
933according to the current rounding mode. If `a' is a NaN, the largest
934positive integer is returned. Otherwise, if the conversion overflows, the
935largest integer with the same sign as `a' is returned.
936------------------------------------------------------------------------------- */
937 int aSign;
938 int16_t aExp, shiftCount;
939 uint64_t aSig, aSigExtra;
940
941 aSig = extractFloat64Frac( a );
942 aExp = extractFloat64Exp( a );
943 aSign = extractFloat64Sign( a );
944 if ( aExp ) aSig |= 0x0010000000000000 ;
945 shiftCount = 0x433 - aExp;
946 if ( shiftCount <= 0 ) {
947 if ( 0x43E < aExp ) {
948 float_raise( EXC_INVALID );
949 if (! aSign) {
950 return 0x7FFFFFFFFFFFFFFF ;
951 }
952 return (int64_t) 0x8000000000000000 ;
953 }
954 aSigExtra = 0;
955 aSig <<= - shiftCount;
956 }
957 else {
958 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
959 }
960 return roundAndPackInt64( aSign, aSig, aSigExtra );
961
962}
963/*}}}*/
964uint32_t SS_Fpu::float64_to_float32( uint64_t a )/*{{{*/
965{
966/* -------------------------------------------------------------------------------
967Returns the result of converting the double-precision floating-point value
968`a' to the single-precision floating-point format. The conversion is
969performed according to the IEC/IEEE Standard for Binary Floating-Point
970Arithmetic.
971------------------------------------------------------------------------------- */
972 int aSign;
973 int16_t aExp;
974 uint64_t aSig;
975 uint32_t zSig;
976
977 aSig = extractFloat64Frac( a );
978 aExp = extractFloat64Exp( a );
979 aSign = extractFloat64Sign( a );
980 if ( aExp == 0x7FF ) {
981 if ( aSig )
982 {
983 if ( float64_is_signaling_nan( a ) ) float_raise( EXC_INVALID );
984 return (uint32_t(a >>63) << 31) | 0x7FC00000 | ((a << 12) >> 41);
985 }
986 return packFloat32( aSign, 0xFF, 0 );
987 }
988 shift64RightJamming( aSig, 22, &aSig );
989 zSig = aSig;
990 if ( aExp || zSig ) {
991 zSig |= 0x40000000;
992 aExp -= 0x381;
993 }
994 return roundAndPackFloat32( aSign, aExp, zSig );
995
996}
997/*}}}*/
998
999uint32_t SS_Fpu::float32_add( uint32_t a, uint32_t b, int flipSign, int half )/*{{{*/
1000{
1001/* -------------------------------------------------------------------------------
1002Returns the result of adding the single-precision floating-point values `a'
1003and `b'. The operation is performed according to the IEC/IEEE Standard for
1004Binary Floating-Point Arithmetic.
1005------------------------------------------------------------------------------- */
1006 int aSign, bSign, zSign;
1007
1008 aSign = extractFloat32Sign( a );
1009 bSign = extractFloat32Sign( b );
1010 zSign = aSign;
1011 if (flipSign)
1012 zSign ^= 1;
1013 if ( aSign == bSign )
1014 return addFloat32Sigs( a, b, zSign, half );
1015 else
1016 return subFloat32Sigs( a, b, zSign, half );
1017}
1018/*}}}*/
1019uint32_t SS_Fpu::float32_sub( uint32_t a, uint32_t b, int flipSign, int half )/*{{{*/
1020{
1021/* -------------------------------------------------------------------------------
1022Returns the result of subtracting the single-precision floating-point values
1023`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1024for Binary Floating-Point Arithmetic.
1025------------------------------------------------------------------------------- */
1026 int aSign, bSign, zSign;
1027
1028 aSign = extractFloat32Sign( a );
1029 bSign = extractFloat32Sign( b );
1030 zSign = aSign;
1031 if (flipSign)
1032 zSign ^= 1;
1033 if ( aSign == bSign )
1034 return subFloat32Sigs( a, b, zSign, half );
1035 else
1036 return addFloat32Sigs( a, b, zSign, half );
1037}
1038/*}}}*/
1039uint32_t SS_Fpu::float32_mul( uint32_t a, uint32_t b, int flipSign )/*{{{*/
1040{
1041/* -------------------------------------------------------------------------------
1042Returns the result of multiplying the single-precision floating-point values
1043`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1044for Binary Floating-Point Arithmetic.
1045------------------------------------------------------------------------------- */
1046 int aSign, bSign, zSign;
1047 int16_t aExp, bExp, zExp;
1048 uint32_t aSig, bSig;
1049 uint64_t zSig64;
1050 uint32_t zSig;
1051 uint32_t ra, rb;
1052 int check_float_round_needed = 0;
1053
1054 float_round_needed = 0;
1055 aSig = extractFloat32Frac( a );
1056 aExp = extractFloat32Exp( a );
1057 aSign = extractFloat32Sign( a );
1058 bSig = extractFloat32Frac( b );
1059 bExp = extractFloat32Exp( b );
1060 bSign = extractFloat32Sign( b );
1061 zSign = aSign ^ bSign;
1062 if (flipSign)
1063 zSign ^= 1;
1064 if ( aExp == 0xFF ) {
1065 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1066 return propagateFloat32NaN( a, b );
1067 }
1068 if ( ( bExp | bSig ) == 0 ) {
1069 float_raise( EXC_INVALID );
1070 return 0x7FFFFFFF;
1071 }
1072 return packFloat32( zSign, 0xFF, 0 );
1073 }
1074 if ( bExp == 0xFF ) {
1075 if ( bSig ) return propagateFloat32NaN( a, b );
1076 if ( ( aExp | aSig ) == 0 ) {
1077 float_raise( EXC_INVALID );
1078 return 0x7FFFFFFF;
1079 }
1080 return packFloat32( zSign, 0xFF, 0 );
1081 }
1082 if ( aExp == 0 ) {
1083 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1084 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1085 }
1086 if ( bExp == 0 ) {
1087 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1088 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1089 }
1090 zExp = aExp + bExp - 0x7F;
1091 aSig = ( aSig | 0x00800000 )<<7;
1092 bSig = ( bSig | 0x00800000 )<<8;
1093 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
1094 zSig = zSig64;
1095 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
1096 zSig <<= 1;
1097 --zExp;
1098 if (((zSig >> 7) & 0x7) == 0x7)
1099 check_float_round_needed = 1;
1100 }
1101 /* Check overflow/underflow before rounding for unfused multiply. Remember,
1102 * zExp is one less than true value. */
1103 if (zExp > 0xFD)
1104 float_partial_exception_flags = float_partial_exception_flags | EXC_OVERFLOW;
1105 else if (zExp < 0)
1106 float_partial_exception_flags = float_partial_exception_flags | EXC_UNDERFLOW;
1107 if (check_float_round_needed && (zExp == 0xFD)) {
1108 ra = packFloat32( zSign, zExp, zSig >> 7 );
1109 rb = roundAndPackFloat32( zSign, zExp, zSig );
1110 if (ra != rb)
1111 float_round_needed = 1;
1112 return rb;
1113 }
1114 return roundAndPackFloat32( zSign, zExp, zSig );
1115
1116}
1117/*}}}*/
1118uint32_t SS_Fpu::float32_div( uint32_t a, uint32_t b )/*{{{*/
1119{
1120/* -------------------------------------------------------------------------------
1121Returns the result of dividing the single-precision floating-point value `a'
1122by the corresponding value `b'. The operation is performed according to the
1123IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1124------------------------------------------------------------------------------- */
1125 int aSign, bSign, zSign;
1126 int16_t aExp, bExp, zExp;
1127 uint32_t aSig, bSig, zSig;
1128
1129 aSig = extractFloat32Frac( a );
1130 aExp = extractFloat32Exp( a );
1131 aSign = extractFloat32Sign( a );
1132 bSig = extractFloat32Frac( b );
1133 bExp = extractFloat32Exp( b );
1134 bSign = extractFloat32Sign( b );
1135 zSign = aSign ^ bSign;
1136 if ( aExp == 0xFF ) {
1137 if ( aSig ) return propagateFloat32NaN( a, b );
1138 if ( bExp == 0xFF ) {
1139 if ( bSig ) return propagateFloat32NaN( a, b );
1140 float_raise( EXC_INVALID );
1141 return 0x7FFFFFFF;
1142 }
1143 return packFloat32( zSign, 0xFF, 0 );
1144 }
1145 if ( bExp == 0xFF ) {
1146 if ( bSig ) return propagateFloat32NaN( a, b );
1147 return packFloat32( zSign, 0, 0 );
1148 }
1149 if ( bExp == 0 ) {
1150 if ( bSig == 0 ) {
1151 if ( ( aExp | aSig ) == 0 ) {
1152 float_raise( EXC_INVALID );
1153 return 0x7FFFFFFF;
1154 }
1155 float_raise( EXC_DIVBYZERO );
1156 return packFloat32( zSign, 0xFF, 0 );
1157 }
1158 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1159 }
1160 if ( aExp == 0 ) {
1161 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1162 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1163 }
1164 zExp = aExp - bExp + 0x7D;
1165 aSig = ( aSig | 0x00800000 )<<7;
1166 bSig = ( bSig | 0x00800000 )<<8;
1167 if ( bSig <= ( aSig + aSig ) ) {
1168 aSig >>= 1;
1169 ++zExp;
1170 }
1171 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
1172 if ( ( zSig & 0x3F ) == 0 ) {
1173 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
1174 }
1175 return roundAndPackFloat32( zSign, zExp, zSig );
1176
1177}
1178/*}}}*/
1179uint32_t SS_Fpu::float32_sqrt( uint32_t a )/*{{{*/
1180{
1181/* -------------------------------------------------------------------------------
1182Returns the square root of the single-precision floating-point value `a'.
1183The operation is performed according to the IEC/IEEE Standard for Binary
1184Floating-Point Arithmetic.
1185------------------------------------------------------------------------------- */
1186 int aSign;
1187 int16_t aExp, zExp;
1188 uint32_t aSig, zSig;
1189 uint64_t rem, term;
1190
1191 aSig = extractFloat32Frac( a );
1192 aExp = extractFloat32Exp( a );
1193 aSign = extractFloat32Sign( a );
1194 if ( aExp == 0xFF ) {
1195 if ( aSig ) return propagateFloat32NaN( a, 0 );
1196 if ( ! aSign ) return a;
1197 float_raise( EXC_INVALID );
1198 return 0x7FFFFFFF;
1199 }
1200 if ( aSign ) {
1201 if ( ( aExp | aSig ) == 0 ) return a;
1202 float_raise( EXC_INVALID );
1203 return 0x7FFFFFFF;
1204 }
1205 if ( aExp == 0 ) {
1206 if ( aSig == 0 ) return 0;
1207 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1208 }
1209 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1210 aSig = ( aSig | 0x00800000 )<<8;
1211 zSig = estimateSqrt32( aExp, aSig ) + 2;
1212 if ( ( zSig & 0x7F ) <= 5 ) {
1213 if ( zSig < 2 ) {
1214 zSig = 0x7FFFFFFF;
1215 goto roundAndPack;
1216 }
1217 aSig >>= aExp & 1;
1218 term = ( (uint64_t) zSig ) * zSig;
1219 rem = ( ( (uint64_t) aSig )<<32 ) - term;
1220 while ( (int64_t) rem < 0 ) {
1221 --zSig;
1222 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
1223 }
1224 zSig |= ( rem != 0 );
1225 }
1226 shift32RightJamming( zSig, 1, &zSig );
1227 roundAndPack:
1228 return roundAndPackFloat32( 0, zExp, zSig );
1229
1230}
1231/*}}}*/
1232uint32_t SS_Fpu::float32_rsqrt( uint32_t a )/*{{{*/
1233{
1234/* -------------------------------------------------------------------------------
1235Reciprocal square root.
1236------------------------------------------------------------------------------- */
1237 int aSign;
1238 int16_t aExp, tExp, zExp;
1239 uint32_t aSig, tSig, oneSig, zSig;
1240 uint64_t rem, term;
1241
1242 aSig = extractFloat32Frac( a );
1243 aExp = extractFloat32Exp( a );
1244 aSign = extractFloat32Sign( a );
1245 if ( aExp == 0xFF ) {
1246 if ( aSig ) return propagateFloat32NaN( a, 0 );
1247 if ( ! aSign ) return 0;
1248 float_raise( EXC_INVALID );
1249 return 0x7FFFFFFF;
1250 }
1251 if ( aSign ) {
1252 float_raise( EXC_INVALID );
1253 return 0x7FFFFFFF;
1254 }
1255 if ( aExp == 0 ) {
1256 if ( aSig == 0 ) {
1257 float_raise( EXC_DIVBYZERO );
1258 return 0x7F800000;
1259 }
1260 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1261 }
1262 tExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1263 aSig = ( aSig | 0x00800000 )<<8;
1264 tSig = estimateSqrt32( aExp, aSig ) + 2;
1265 if ( ( tSig & 0x7F ) <= 5 ) {
1266 if ( tSig < 2 ) {
1267 tSig = 0x7FFFFFFF;
1268 goto cont1;
1269 }
1270 aSig >>= aExp & 1;
1271 term = ( (uint64_t) tSig ) * tSig;
1272 rem = ( ( (uint64_t) aSig )<<32 ) - term;
1273 while ( (int64_t) rem < 0 ) {
1274 --tSig;
1275 rem += ( ( (uint64_t) tSig )<<1 ) | 1;
1276 }
1277 tSig |= ( rem != 0 );
1278 }
1279 shift32RightJamming( tSig, 1, &tSig );
1280
1281 cont1:
1282 /* We have just finished sqrt() now comes reciprocal 1/sqrt().
1283 * Remember that tExp is 1 less than true value.
1284 */
1285 zExp = 0xFB - tExp;
1286 oneSig = 0x40000000;
1287 tSig <<= 1;
1288 if ( tSig <= ( oneSig + oneSig ) ) {
1289 oneSig >>= 1;
1290 ++zExp;
1291 }
1292 zSig = ( ( (int64_t) oneSig )<<32 ) / tSig;
1293 if ( ( zSig & 0x3F ) == 0 ) {
1294 zSig |= ( (int64_t) tSig * zSig != ( (int64_t) oneSig )<<32 );
1295 }
1296 return roundAndPackFloat32( 0, zExp, zSig );
1297}
1298/*}}}*/
1299
1300uint64_t SS_Fpu::float64_add( uint64_t a, uint64_t b, int flipSign, int half )/*{{{*/
1301{
1302/* -------------------------------------------------------------------------------
1303Returns the result of adding the double-precision floating-point values `a'
1304and `b'. The operation is performed according to the IEC/IEEE Standard for
1305Binary Floating-Point Arithmetic.
1306------------------------------------------------------------------------------- */
1307 int aSign, bSign, zSign;
1308
1309 aSign = extractFloat64Sign( a );
1310 bSign = extractFloat64Sign( b );
1311 zSign = aSign;
1312 if (flipSign)
1313 zSign ^= 1;
1314 if ( aSign == bSign )
1315 return addFloat64Sigs( a, b, zSign, half );
1316 else
1317 return subFloat64Sigs( a, b, zSign, half );
1318}
1319/*}}}*/
1320uint64_t SS_Fpu::float64_sub( uint64_t a, uint64_t b, int flipSign, int half )/*{{{*/
1321{
1322/* -------------------------------------------------------------------------------
1323Returns the result of subtracting the double-precision floating-point values
1324`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1325for Binary Floating-Point Arithmetic.
1326------------------------------------------------------------------------------- */
1327 int aSign, bSign, zSign;
1328
1329 aSign = extractFloat64Sign( a );
1330 bSign = extractFloat64Sign( b );
1331 zSign = aSign;
1332 if (flipSign)
1333 zSign ^= 1;
1334 if ( aSign == bSign )
1335 return subFloat64Sigs( a, b, zSign, half );
1336 else
1337 return addFloat64Sigs( a, b, zSign, half );
1338}
1339/*}}}*/
1340uint64_t SS_Fpu::float64_mul( uint64_t a, uint64_t b, int flipSign )/*{{{*/
1341{
1342/* -------------------------------------------------------------------------------
1343Returns the result of multiplying the double-precision floating-point values
1344`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1345for Binary Floating-Point Arithmetic.
1346------------------------------------------------------------------------------- */
1347 int aSign, bSign, zSign;
1348 int16_t aExp, bExp, zExp;
1349 uint64_t aSig, bSig, zSig0, zSig1;
1350 uint64_t ra, rb;
1351 int check_float_round_needed = 0;
1352
1353 float_round_needed = 0;
1354 aSig = extractFloat64Frac( a );
1355 aExp = extractFloat64Exp( a );
1356 aSign = extractFloat64Sign( a );
1357 bSig = extractFloat64Frac( b );
1358 bExp = extractFloat64Exp( b );
1359 bSign = extractFloat64Sign( b );
1360 zSign = aSign ^ bSign;
1361 if (flipSign)
1362 zSign ^= 1;
1363 if ( aExp == 0x7FF ) {
1364 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
1365 return propagateFloat64NaN( a, b );
1366 }
1367 if ( ( bExp | bSig ) == 0 ) {
1368 float_raise( EXC_INVALID );
1369 return 0x7FFFFFFFFFFFFFFF;
1370 }
1371 return packFloat64( zSign, 0x7FF, 0 );
1372 }
1373 if ( bExp == 0x7FF ) {
1374 if ( bSig ) return propagateFloat64NaN( a, b );
1375 if ( ( aExp | aSig ) == 0 ) {
1376 float_raise( EXC_INVALID );
1377 return 0x7FFFFFFFFFFFFFFF;
1378 }
1379 return packFloat64( zSign, 0x7FF, 0 );
1380 }
1381 if ( aExp == 0 ) {
1382 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
1383 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1384 }
1385 if ( bExp == 0 ) {
1386 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
1387 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
1388 }
1389 zExp = aExp + bExp - 0x3FF;
1390 aSig = ( aSig | 0x0010000000000000 )<<10;
1391 bSig = ( bSig | 0x0010000000000000 )<<11;
1392 mul64To128( aSig, bSig, &zSig0, &zSig1 );
1393 zSig0 |= ( zSig1 != 0 );
1394 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
1395 zSig0 <<= 1;
1396 --zExp;
1397 if (((zSig0 >> 10) & 0x7) == 0x7)
1398 check_float_round_needed = 1;
1399 }
1400 /* Check overflow/underflow before rounding for unfused multiply. Remember,
1401 * zExp is one less than true value. */
1402 if (zExp > 0x7FD)
1403 float_partial_exception_flags = float_partial_exception_flags | EXC_OVERFLOW;
1404 else if (zExp < 0)
1405 float_partial_exception_flags = float_partial_exception_flags | EXC_UNDERFLOW;
1406 if (check_float_round_needed && (zExp == 0x7FD)) {
1407 ra = packFloat64( zSign, zExp, zSig0 >> 10 );
1408 rb = roundAndPackFloat64( zSign, zExp, zSig0 );
1409 if (ra != rb)
1410 float_round_needed = 1;
1411 return rb;
1412 }
1413 return roundAndPackFloat64( zSign, zExp, zSig0 );
1414
1415}
1416/*}}}*/
1417uint64_t SS_Fpu::float64_div( uint64_t a, uint64_t b )/*{{{*/
1418{
1419/* -------------------------------------------------------------------------------
1420Returns the result of dividing the double-precision floating-point value `a'
1421by the corresponding value `b'. The operation is performed according to
1422the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1423------------------------------------------------------------------------------- */
1424 int aSign, bSign, zSign;
1425 int16_t aExp, bExp, zExp;
1426 uint64_t aSig, bSig, zSig;
1427 uint64_t rem0, rem1;
1428 uint64_t term0, term1;
1429
1430 aSig = extractFloat64Frac( a );
1431 aExp = extractFloat64Exp( a );
1432 aSign = extractFloat64Sign( a );
1433 bSig = extractFloat64Frac( b );
1434 bExp = extractFloat64Exp( b );
1435 bSign = extractFloat64Sign( b );
1436 zSign = aSign ^ bSign;
1437 if ( aExp == 0x7FF ) {
1438 if ( aSig ) return propagateFloat64NaN( a, b );
1439 if ( bExp == 0x7FF ) {
1440 if ( bSig ) return propagateFloat64NaN( a, b );
1441 float_raise( EXC_INVALID );
1442 return 0x7FFFFFFFFFFFFFFF;
1443 }
1444 return packFloat64( zSign, 0x7FF, 0 );
1445 }
1446 if ( bExp == 0x7FF ) {
1447 if ( bSig ) return propagateFloat64NaN( a, b );
1448 return packFloat64( zSign, 0, 0 );
1449 }
1450 if ( bExp == 0 ) {
1451 if ( bSig == 0 ) {
1452 if ( ( aExp | aSig ) == 0 ) {
1453 float_raise( EXC_INVALID );
1454 return 0x7FFFFFFFFFFFFFFF;
1455 }
1456 float_raise( EXC_DIVBYZERO );
1457 return packFloat64( zSign, 0x7FF, 0 );
1458 }
1459 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
1460 }
1461 if ( aExp == 0 ) {
1462 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
1463 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1464 }
1465 zExp = aExp - bExp + 0x3FD;
1466 aSig = ( aSig | 0x0010000000000000 )<<10;
1467 bSig = ( bSig | 0x0010000000000000 )<<11;
1468 if ( bSig <= ( aSig + aSig ) ) {
1469 aSig >>= 1;
1470 ++zExp;
1471 }
1472 zSig = estimateDiv128To64( aSig, 0, bSig );
1473 if ( ( zSig & 0x1FF ) <= 2 ) {
1474 mul64To128( bSig, zSig, &term0, &term1 );
1475 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
1476 while ( (int64_t) rem0 < 0 ) {
1477 --zSig;
1478 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
1479 }
1480 zSig |= ( rem1 != 0 );
1481 }
1482 return roundAndPackFloat64( zSign, zExp, zSig );
1483
1484}
1485/*}}}*/
1486uint64_t SS_Fpu::float64_sqrt( uint64_t a )/*{{{*/
1487{
1488/* -------------------------------------------------------------------------------
1489Returns the square root of the double-precision floating-point value `a'.
1490The operation is performed according to the IEC/IEEE Standard for Binary
1491Floating-Point Arithmetic.
1492------------------------------------------------------------------------------- */
1493 int aSign;
1494 int16_t aExp, zExp;
1495 uint64_t aSig, zSig, doubleZSig;
1496 uint64_t rem0, rem1, term0, term1;
1497
1498 aSig = extractFloat64Frac( a );
1499 aExp = extractFloat64Exp( a );
1500 aSign = extractFloat64Sign( a );
1501 if ( aExp == 0x7FF ) {
1502 if ( aSig ) return propagateFloat64NaN( a, a );
1503 if ( ! aSign ) return a;
1504 float_raise( EXC_INVALID );
1505 return 0x7FFFFFFFFFFFFFFF;
1506 }
1507 if ( aSign ) {
1508 if ( ( aExp | aSig ) == 0 ) return a;
1509 float_raise( EXC_INVALID );
1510 return 0x7FFFFFFFFFFFFFFF;
1511 }
1512 if ( aExp == 0 ) {
1513 if ( aSig == 0 ) return 0;
1514 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1515 }
1516 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
1517 aSig |= 0x0010000000000000 ;
1518 zSig = estimateSqrt32( aExp, aSig>>21 );
1519 aSig <<= 9 - ( aExp & 1 );
1520 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
1521 if ( ( zSig & 0x1FF ) <= 5 ) {
1522 doubleZSig = zSig<<1;
1523 mul64To128( zSig, zSig, &term0, &term1 );
1524 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
1525 while ( (int64_t) rem0 < 0 ) {
1526 --zSig;
1527 doubleZSig -= 2;
1528 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
1529 }
1530 zSig |= ( ( rem0 | rem1 ) != 0 );
1531 }
1532 return roundAndPackFloat64( 0, zExp, zSig );
1533
1534}
1535/*}}}*/
1536uint64_t SS_Fpu::float64_rsqrt( uint64_t a )/*{{{*/
1537{
1538/* -------------------------------------------------------------------------------
1539Reciprocal square root.
1540------------------------------------------------------------------------------- */
1541 int aSign;
1542 int16_t aExp, tExp, zExp;
1543 uint64_t aSig, tSig, zSig, doubleZSig, oneSig;
1544 uint64_t rem0, rem1, term0, term1;
1545
1546 aSig = extractFloat64Frac( a );
1547 aExp = extractFloat64Exp( a );
1548 aSign = extractFloat64Sign( a );
1549 if ( aExp == 0x7FF ) {
1550 if ( aSig ) return propagateFloat64NaN( a, a );
1551 if ( ! aSign ) return 0;
1552 float_raise( EXC_INVALID );
1553 return 0x7FFFFFFFFFFFFFFF;
1554 }
1555 if ( aSign ) {
1556 float_raise( EXC_INVALID );
1557 return 0x7FFFFFFFFFFFFFFF;
1558 }
1559 if ( aExp == 0 ) {
1560 if ( aSig == 0 ) {
1561 float_raise( EXC_DIVBYZERO );
1562 return 0x7FF0000000000000;
1563 }
1564 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1565 }
1566 tExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
1567 aSig |= 0x0010000000000000 ;
1568 tSig = estimateSqrt32( aExp, aSig>>21 );
1569 aSig <<= 9 - ( aExp & 1 );
1570 tSig = estimateDiv128To64( aSig, 0, tSig<<32 ) + ( tSig<<30 );
1571 if ( ( tSig & 0x1FF ) <= 5 ) {
1572 doubleZSig = tSig<<1;
1573 mul64To128( tSig, tSig, &term0, &term1 );
1574 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
1575 while ( (int64_t) rem0 < 0 ) {
1576 --tSig;
1577 doubleZSig -= 2;
1578 add128( rem0, rem1, tSig>>63, doubleZSig | 1, &rem0, &rem1 );
1579 }
1580 tSig |= ( ( rem0 | rem1 ) != 0 );
1581 }
1582
1583 /* We have just finished sqrt() now comes reciprocal 1/sqrt().
1584 * Remember that tExp is 1 less than true value.
1585 */
1586 zExp = 0x7FB - tExp;
1587 oneSig = 0x4000000000000000;
1588 tSig <<= 1;
1589 if ( tSig <= ( oneSig + oneSig ) ) {
1590 oneSig >>= 1;
1591 ++zExp;
1592 }
1593 zSig = estimateDiv128To64( oneSig, 0, tSig );
1594 if ( ( zSig & 0x1FF ) <= 2 ) {
1595 mul64To128( tSig, zSig, &term0, &term1 );
1596 sub128( oneSig, 0, term0, term1, &rem0, &rem1 );
1597 while ( (int64_t) rem0 < 0 ) {
1598 --zSig;
1599 add128( rem0, rem1, 0, tSig, &rem0, &rem1 );
1600 }
1601 zSig |= ( rem1 != 0 );
1602 }
1603
1604 return roundAndPackFloat64( 0, zExp, zSig );
1605}
1606/*}}}*/
1607
1608int SS_Fpu::float32_eq( uint32_t a, uint32_t b )/*{{{*/
1609{
1610/* -------------------------------------------------------------------------------
1611Returns 1 if the single-precision floating-point value `a' is equal to
1612the corresponding value `b', and 0 otherwise. The comparison is performed
1613according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1614------------------------------------------------------------------------------- */
1615
1616 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1617 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1618 ) {
1619 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1620 float_raise( EXC_INVALID );
1621 }
1622 return 0;
1623 }
1624 return ( a == b ) || ( (uint32_t) ( ( a | b )<<1 ) == 0 );
1625
1626}
1627/*}}}*/
1628int SS_Fpu::float32_lt( uint32_t a, uint32_t b )/*{{{*/
1629{
1630/* -------------------------------------------------------------------------------
1631Returns 1 if the single-precision floating-point value `a' is less than
1632the corresponding value `b', and 0 otherwise. The comparison is performed
1633according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1634------------------------------------------------------------------------------- */
1635 int aSign, bSign;
1636
1637 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1638 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1639 ) {
1640 float_raise( EXC_INVALID );
1641 return 0;
1642 }
1643 aSign = extractFloat32Sign( a );
1644 bSign = extractFloat32Sign( b );
1645 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( a | b )<<1 ) != 0 );
1646 return ( a != b ) && ( aSign ^ ( a < b ) );
1647
1648}
1649/*}}}*/
1650int SS_Fpu::float32_eq_signaling( uint32_t a, uint32_t b )/*{{{*/
1651{
1652/* -------------------------------------------------------------------------------
1653Returns 1 if the single-precision floating-point value `a' is equal to
1654the corresponding value `b', and 0 otherwise. The invalid exception is
1655raised if either operand is a NaN. Otherwise, the comparison is performed
1656according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1657------------------------------------------------------------------------------- */
1658
1659 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1660 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1661 ) {
1662 float_raise( EXC_INVALID );
1663 return 0;
1664 }
1665 return ( a == b ) || ( (uint32_t) ( ( a | b )<<1 ) == 0 );
1666
1667}
1668/*}}}*/
1669int SS_Fpu::float32_lt_quiet( uint32_t a, uint32_t b )/*{{{*/
1670{
1671/* -------------------------------------------------------------------------------
1672Returns 1 if the single-precision floating-point value `a' is less than
1673the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1674exception. Otherwise, the comparison is performed according to the IEC/IEEE
1675Standard for Binary Floating-Point Arithmetic.
1676------------------------------------------------------------------------------- */
1677 int aSign, bSign;
1678
1679 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1680 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1681 ) {
1682 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1683 float_raise( EXC_INVALID );
1684 }
1685 return 0;
1686 }
1687 aSign = extractFloat32Sign( a );
1688 bSign = extractFloat32Sign( b );
1689 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( a | b )<<1 ) != 0 );
1690 return ( a != b ) && ( aSign ^ ( a < b ) );
1691
1692}
1693/*}}}*/
1694
1695int SS_Fpu::float64_eq( uint64_t a, uint64_t b )/*{{{*/
1696{
1697/* -------------------------------------------------------------------------------
1698Returns 1 if the double-precision floating-point value `a' is equal to the
1699corresponding value `b', and 0 otherwise. The comparison is performed
1700according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1701------------------------------------------------------------------------------- */
1702
1703 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
1704 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
1705 ) {
1706 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
1707 float_raise( EXC_INVALID );
1708 }
1709 return 0;
1710 }
1711 return ( a == b ) || ( (uint64_t) ( ( a | b )<<1 ) == 0 );
1712
1713}
1714/*}}}*/
1715int SS_Fpu::float64_lt( uint64_t a, uint64_t b )/*{{{*/
1716{
1717/* -------------------------------------------------------------------------------
1718Returns 1 if the double-precision floating-point value `a' is less than
1719the corresponding value `b', and 0 otherwise. The comparison is performed
1720according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1721------------------------------------------------------------------------------- */
1722 int aSign, bSign;
1723
1724 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
1725 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
1726 ) {
1727 float_raise( EXC_INVALID );
1728 return 0;
1729 }
1730 aSign = extractFloat64Sign( a );
1731 bSign = extractFloat64Sign( b );
1732 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( a | b )<<1 ) != 0 );
1733 return ( a != b ) && ( aSign ^ ( a < b ) );
1734
1735}
1736/*}}}*/
1737int SS_Fpu::float64_eq_signaling( uint64_t a, uint64_t b )/*{{{*/
1738{
1739/* -------------------------------------------------------------------------------
1740Returns 1 if the double-precision floating-point value `a' is equal to the
1741corresponding value `b', and 0 otherwise. The invalid exception is raised
1742if either operand is a NaN. Otherwise, the comparison is performed
1743according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1744------------------------------------------------------------------------------- */
1745
1746 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
1747 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
1748 ) {
1749 float_raise( EXC_INVALID );
1750 return 0;
1751 }
1752 return ( a == b ) || ( (uint64_t) ( ( a | b )<<1 ) == 0 );
1753
1754}
1755/*}}}*/
1756int SS_Fpu::float64_lt_quiet( uint64_t a, uint64_t b )/*{{{*/
1757{
1758/* -------------------------------------------------------------------------------
1759Returns 1 if the double-precision floating-point value `a' is less than
1760the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1761exception. Otherwise, the comparison is performed according to the IEC/IEEE
1762Standard for Binary Floating-Point Arithmetic.
1763------------------------------------------------------------------------------- */
1764 int aSign, bSign;
1765
1766 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
1767 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
1768 ) {
1769 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
1770 float_raise( EXC_INVALID );
1771 }
1772 return 0;
1773 }
1774 aSign = extractFloat64Sign( a );
1775 bSign = extractFloat64Sign( b );
1776 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( a | b )<<1 ) != 0 );
1777 return ( a != b ) && ( aSign ^ ( a < b ) );
1778
1779}
1780/*}}}*/
1781uint32_t SS_Fpu::float32_madd(uint32_t a, uint32_t b, uint32_t c, int isSub, int flipSign)/*{{{*/
1782{
1783/*
1784-------------------------------------------------------------------------------
1785Returns the result (a*b+c) of fused multiply-add operation on the
1786single-precision floating-point values `a', `b', and `c'. The operation is
1787performed according to the IEC/IEEE Standard for Binary Floating-Point
1788Arithmetic.
1789-------------------------------------------------------------------------------
1790*/
1791 int aSign, bSign, cSign, pSign, zSign;
1792 int16_t aExp, bExp, cExp, pExp, zExp, expDiff;
1793 uint32_t aSig, bSig, cSig, zSig;
1794 uint64_t pSig64, cSig64, zSig64;
1795 uint32_t pRes;
1796 int8_t shiftCount;
1797
1798 aSign = extractFloat32Sign(a);
1799 aExp = extractFloat32Exp(a);
1800 aSig = extractFloat32Frac(a);
1801 bSign = extractFloat32Sign(b);
1802 bExp = extractFloat32Exp(b);
1803 bSig = extractFloat32Frac(b);
1804
1805 /* Checking for invalid exception 0 * inf */
1806 if ((aExp == 0xFF) && (aSig == 0)) {
1807 if ((bExp | bSig) == 0) {
1808 /* a is infinity and b is zero -> invalid operation exception */
1809 float_raise( EXC_INVALID );
1810 return propagateFloat32NaN(0x7FFFFFFF, c);
1811 }
1812 }
1813 if ((bExp == 0xFF) && (bSig == 0)) {
1814 if ((aExp | aSig) == 0) {
1815 /* b is infinity and a is zero -> invalid operation exception */
1816 float_raise( EXC_INVALID );
1817 return propagateFloat32NaN(0x7FFFFFFF, c);
1818 }
1819 }
1820
1821 /* NaN propagation */
1822 if (float32_is_nan(a) || float32_is_nan(b) || float32_is_nan(c))
1823 return propagate3Float32NaN(a, b, c);
1824
1825 if (isSub)
1826 c ^= 0x80000000;
1827
1828 cSign = extractFloat32Sign(c);
1829 cExp = extractFloat32Exp(c);
1830 cSig = extractFloat32Frac(c);
1831 pSign = aSign ^ bSign;
1832
1833 /* Checking for infinities */
1834 if (aExp == 0xFF) {
1835 if (cExp == 0xFF) {
1836 if (pSign != cSign) {
1837 /* sum of infinities of different signs -> invalid operation */
1838 float_raise( EXC_INVALID );
1839 return 0x7FFFFFFF;
1840 }
1841 }
1842 return packFloat32(pSign ^ flipSign, 0xFF, 0);
1843 }
1844 if (bExp == 0xFF) {
1845 if (cExp == 0xFF) {
1846 if (pSign != cSign) {
1847 /* sum of infinities of different signs -> invalid operation */
1848 float_raise( EXC_INVALID );
1849 return 0x7FFFFFFF;
1850 }
1851 }
1852 return packFloat32(pSign ^ flipSign, 0xFF, 0);
1853 }
1854 if (cExp == 0xFF) {
1855 return packFloat32(cSign ^ flipSign, 0xFF, 0);
1856 }
1857
1858 /* Case a*b is zero */
1859 if (((aExp | aSig) == 0) || ((bExp | bSig) == 0)) {
1860 if ((cExp | cSig) == 0) { /* c is zero */
1861 if (pSign == cSign)
1862 return packFloat32(pSign ^ flipSign, 0, 0);
1863 else
1864 return packFloat32(float_rounding_mode == ROUND_DOWN, 0, 0);
1865 }
1866 /* return c, flip sign bit if necessary */
1867 if (flipSign)
1868 c ^= 0x80000000;
1869 return c;
1870 }
1871
1872 /* Normal computation */
1873 if (aExp == 0)
1874 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
1875 if (bExp == 0)
1876 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
1877 pExp = aExp + bExp - 0x7F;
1878 aSig = (aSig | 0x00800000) << 7;
1879 bSig = (bSig | 0x00800000) << 7;
1880 pSig64 = ((uint64_t) aSig) * bSig; /* binary pt btwn bits 60-59 */
1881 /* Normalize pSig64 with bin pt btwn bits 61-60 */
1882 if (pSig64 & 0x2000000000000000)
1883 ++pExp;
1884 else
1885 pSig64 <<= 1;
1886
1887 if (cExp == 0) {
1888 if (cSig == 0) {
1889 /* c is zero, returns the product */
1890 --pExp;
1891 shift64RightJamming(pSig64, 31, &pSig64);
1892 zSig = pSig64;
1893 return roundAndPackFloat32(pSign ^ flipSign, pExp, zSig);
1894 }
1895 else
1896 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
1897 }
1898 else
1899 cSig |= 0x00800000;
1900 cSig64 = ((uint64_t) cSig) << 38;/* binary pt btwn bits 61-60 */
1901 expDiff = pExp - cExp;
1902 if (0 < expDiff) {
1903 /* product exp is bigger */
1904 zExp = pExp;
1905 zSign = pSign;
1906 shift64RightJamming(cSig64, expDiff, &cSig64);
1907 if (pSign == cSign)
1908 zSig64 = pSig64 + cSig64;
1909 else
1910 zSig64 = pSig64 - cSig64;
1911 }
1912 else if (expDiff < 0) {
1913 /* c exp is bigger */
1914 zExp = cExp;
1915 zSign = cSign;
1916 shift64RightJamming(pSig64, -expDiff, &pSig64);
1917 if (pSign == cSign)
1918 zSig64 = cSig64 + pSig64;
1919 else
1920 zSig64 = cSig64 - pSig64;
1921 }
1922 else {
1923 /* p exp and c exp are equal */
1924 zExp = pExp;
1925 zSign = pSign;
1926 if (pSign == cSign)
1927 zSig64 = pSig64 + cSig64;
1928 else {
1929 if (pSig64 > cSig64) {
1930 zSig64 = pSig64 - cSig64;
1931 }
1932 else if (cSig64 > pSig64) {
1933 zSign = cSign;
1934 zSig64 = cSig64 - pSig64;
1935 }
1936 else {
1937 /* return zero value */
1938 return packFloat32(float_rounding_mode == ROUND_DOWN, 0, 0);
1939 }
1940 }
1941 }
1942
1943 shiftCount = countLeadingZeros64(zSig64);
1944 zSig64 <<= (shiftCount -1);
1945 zExp -= (shiftCount - 1);
1946 shift64RightJamming(zSig64, 32, &zSig64);
1947 zSig = zSig64;
1948 return roundAndPackFloat32(zSign ^ flipSign, zExp, zSig);
1949}
1950/*}}}*/
1951uint64_t SS_Fpu::float64_madd(uint64_t a, uint64_t b, uint64_t c, int isSub, int flipSign)/*{{{*/
1952{
1953/*
1954-------------------------------------------------------------------------------
1955Returns the result (a*b+c) of fused multiply-add operation on the
1956double-precision floating-point values `a', `b', and `c'. The operation is
1957performed according to the IEC/IEEE Standard for Binary Floating-Point
1958Arithmetic.
1959-------------------------------------------------------------------------------
1960*/
1961 int aSign, bSign, cSign, pSign, zSign;
1962 int16_t aExp, bExp, cExp, pExp, zExp, expDiff;
1963 uint64_t aSig, bSig, cSig0, cSig1, pSig0, pSig1, zSig0, zSig1;
1964 uint64_t pRes;
1965 int8_t shiftCount;
1966
1967 aSign = extractFloat64Sign(a);
1968 aExp = extractFloat64Exp(a);
1969 aSig = extractFloat64Frac(a);
1970 bSign = extractFloat64Sign(b);
1971 bExp = extractFloat64Exp(b);
1972 bSig = extractFloat64Frac(b);
1973
1974 /* Checking for invalid exception 0 * inf */
1975 if ((aExp == 0x7FF) && (aSig == 0)) {
1976 if ((bExp | bSig) == 0) {
1977 /* a is infinity and b is zero -> invalid operation exception */
1978 float_raise( EXC_INVALID );
1979 return propagateFloat64NaN(0x7FFFFFFFFFFFFFFF, c);
1980 }
1981 }
1982 if ((bExp == 0x7FF) && (bSig == 0)) {
1983 if ((aExp | aSig) == 0) {
1984 /* b is infinity and a is zero -> invalid operation exception */
1985 float_raise( EXC_INVALID );
1986 return propagateFloat64NaN(0x7FFFFFFFFFFFFFFF, c);
1987 }
1988 }
1989
1990 /* NaN propagation */
1991 if (float64_is_nan(a) || float64_is_nan(b) || float64_is_nan(c))
1992 return propagate3Float64NaN(a, b, c);
1993
1994 if (isSub)
1995 c ^= 0x8000000000000000;
1996
1997 cSign = extractFloat64Sign(c);
1998 cExp = extractFloat64Exp(c);
1999 cSig0 = extractFloat64Frac(c);
2000 pSign = aSign ^ bSign;
2001
2002 /* Checking for infinities */
2003 if (aExp == 0x7FF) {
2004 if (cExp == 0x7FF) {
2005 if (pSign != cSign) {
2006 /* sum of infinities of different signs -> invalid operation */
2007 float_raise( EXC_INVALID );
2008 return 0x7FFFFFFFFFFFFFFF;
2009 }
2010 }
2011 return packFloat64(pSign ^ flipSign, 0x7FF, 0);
2012 }
2013 if (bExp == 0x7FF) {
2014 if (cExp == 0x7FF) {
2015 if (pSign != cSign) {
2016 /* sum of infinities of different signs -> invalid operation */
2017 float_raise( EXC_INVALID );
2018 return 0x7FFFFFFFFFFFFFFF;
2019 }
2020 }
2021 return packFloat64(pSign ^ flipSign, 0x7FF, 0);
2022 }
2023 if (cExp == 0x7FF) {
2024 return packFloat64(cSign ^ flipSign, 0x7FF, 0);
2025 }
2026
2027 /* Case a*b is zero */
2028 if (((aExp | aSig) == 0) || ((bExp | bSig) == 0)) {
2029 if ((cExp | cSig0) == 0) { /* c is zero */
2030 if (pSign == cSign)
2031 return packFloat64(pSign ^ flipSign, 0, 0);
2032 else
2033 return packFloat64(float_rounding_mode == ROUND_DOWN, 0, 0);
2034 }
2035 /* return c, flip sign bit if necessary */
2036 if (flipSign)
2037 c ^= 0x8000000000000000;
2038 return c;
2039 }
2040
2041 /* Normal computation */
2042 if (aExp == 0)
2043 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
2044 if (bExp == 0)
2045 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
2046 pExp = aExp + bExp - 0x3FF;
2047 aSig = (aSig | 0x0010000000000000) << 10;
2048 bSig = (bSig | 0x0010000000000000) << 10;
2049 mul64To128(aSig, bSig, &pSig0, &pSig1); /* bin pt btwn bits 60-59 of pSig0 */
2050 /* Normalize pSig with bin pt btwn bits 61-60 of pSig0 */
2051 if (pSig0 & 0x2000000000000000)
2052 ++pExp;
2053 else
2054 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
2055
2056 if (cExp == 0) {
2057 if (cSig0 == 0) {
2058 /* c is zero, returns the product */
2059 --pExp;
2060 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
2061 pSig0 |= (pSig1 != 0);
2062 return roundAndPackFloat64(pSign ^ flipSign, pExp, pSig0);
2063 }
2064 else
2065 normalizeFloat64Subnormal(cSig0, &cExp, &cSig0);
2066 }
2067 else
2068 cSig0 |= 0x0010000000000000;
2069 cSig0 <<= 9; /* bin pt btwn bits 61-60 */
2070 cSig1 = 0;
2071 expDiff = pExp - cExp;
2072 if (0 < expDiff) {
2073 /* product exp is bigger */
2074 zExp = pExp;
2075 zSign = pSign;
2076 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
2077 if (pSign == cSign)
2078 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
2079 else
2080 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
2081 }
2082 else if (expDiff < 0) {
2083 /* c exp is bigger */
2084 zExp = cExp;
2085 zSign = cSign;
2086 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
2087 if (pSign == cSign)
2088 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
2089 else
2090 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
2091 }
2092 else {
2093 /* p exp and c exp are equal */
2094 zExp = pExp;
2095 zSign = pSign;
2096 if (pSign == cSign)
2097 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
2098 else {
2099 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
2100 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
2101 }
2102 else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
2103 zSign = cSign;
2104 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
2105 }
2106 else {
2107 /* return zero value */
2108 return packFloat64(float_rounding_mode == ROUND_DOWN, 0, 0);
2109 }
2110 }
2111 }
2112
2113 shiftCount = countLeadingZeros64(zSig0);
2114 if (shiftCount != 64) {
2115 shortShift128Left(zSig0, zSig1, shiftCount-1, &zSig0, &zSig1);
2116 zExp -= (shiftCount-1);
2117 zSig0 |= (zSig1 != 0);
2118 }
2119 else {
2120 shiftCount = countLeadingZeros64(zSig1);
2121 if (shiftCount == 0) {
2122 zSig1 >>= 1;
2123 zExp -= 63;
2124 }
2125 else {
2126 zSig1 <<= (shiftCount-1);
2127 zExp -= (63 + shiftCount);
2128 }
2129 zSig0 = zSig1;
2130 }
2131 return roundAndPackFloat64(zSign ^ flipSign, zExp, zSig0);
2132}
2133/*}}}*/