* ========== Copyright Header Begin ==========================================
* OpenSPARC T2 Processor File: SS_Fpu.h
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* ========== Copyright Header End ============================================
enum Tininess
// Global configuration for rounding
enum Rounding
// Field value of fsr.rd and gsr.irnd
enum Exception
// Fields in fsr.cexc, fsr.aexc, fsr.tem
enum FloatTrapType
// Field value of fsr.ftt
FTT_IEEE_754_EXCEPTION
= 1,
FTT_UNIMPLEMENTED_FPOP
= 3, // Reserved, used in V9
FTT_SEQUENCE_ERROR
= 4, // Reserved, used in V9
FTT_HARDWARE_ERROR
= 5, // Reserved, used in V9
FTT_INVALID_FP_REGISTER
= 6, // Only used in quad pricision, e.g. never
FTT_RESERVED
= 7 // Reserved
friend Exception
operator|( Exception a
, Exception b
) { return Exception(int(a
)|int(b
)); }
friend Exception
operator&( Exception a
, Exception b
) { return Exception(int(a
)&int(b
)); }
enum ConditionCode
// Field value of fsr.fcc0, fsr.fcc1, fsr.fcc2, fsr.fcc3
enum ConditionField
// The condition fields in fsr
void set_fcc( SS_Fsr
& fsr
, ConditionField cr
, ConditionCode cc
)
case FCC0
: fsr
.fcc0(cc
); break;
case FCC1
: fsr
.fcc1(cc
); break;
case FCC2
: fsr
.fcc2(cc
); break;
case FCC3
: fsr
.fcc3(cc
); break;
SS_Vaddr
exe_end( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* i
, FloatTrapType ftt
, int exc
)
case FTT_UNFINISHED_FPOP
:
s
->fsr
.ftt(FTT_UNFINISHED_FPOP
);
return (s
->trap
)(pc
,npc
,s
,i
,SS_Trap::FP_EXCEPTION_OTHER
);
case FTT_IEEE_754_EXCEPTION
:
s
->fsr
.cexc(exc
).ftt(ftt
);
return (s
->trap
)(pc
,npc
,s
,i
,SS_Trap::FP_EXCEPTION_IEEE_754
);
s
->fsr
.aexc(s
->fsr
.aexc()|exc
).cexc(exc
).ftt(FTT_NOTRAP
);
uint32_t int32_to_float32( int32_t );
uint64_t int32_to_float64( int32_t );
uint32_t int64_to_float32( int64_t );
uint64_t int64_to_float64( int64_t );
int32_t float32_to_int32( uint32_t );
int64_t float32_to_int64( uint32_t );
uint64_t float32_to_float64( uint32_t );
int32_t float64_to_int32( uint64_t );
int64_t float64_to_int64( uint64_t );
uint32_t float64_to_float32( uint64_t );
uint32_t float32_add( uint32_t, uint32_t, int, int half
=0 );
uint32_t float32_sub( uint32_t, uint32_t, int, int half
=0 );
uint32_t float32_mul( uint32_t, uint32_t, int );
uint32_t float32_div( uint32_t, uint32_t );
uint32_t float32_sqrt( uint32_t );
uint32_t float32_rsqrt( uint32_t );
uint32_t float32_madd( uint32_t, uint32_t, uint32_t, int, int);
uint64_t float64_add( uint64_t, uint64_t, int, int half
=0 );
uint64_t float64_sub( uint64_t, uint64_t, int, int half
=0 );
uint64_t float64_mul( uint64_t, uint64_t, int );
uint64_t float64_div( uint64_t, uint64_t );
uint64_t float64_sqrt( uint64_t );
uint64_t float64_rsqrt( uint64_t );
uint64_t float64_madd( uint64_t, uint64_t, uint64_t, int, int );
int float32_eq( uint32_t, uint32_t );
int float32_lt( uint32_t, uint32_t );
int float32_eq_signaling( uint32_t, uint32_t );
int float32_lt_quiet( uint32_t, uint32_t );
int float64_eq( uint64_t, uint64_t );
int float64_lt( uint64_t, uint64_t );
int float64_eq_signaling( uint64_t, uint64_t );
int float64_lt_quiet( uint64_t, uint64_t );
// Parameters and results
Rounding float_rounding_mode
;
int float_exception_flags
;
int float_partial_exception_flags
;
Tininess float_detect_tininess
;
// to_sgn() takes the sign from the number are return it in bit 0
// to_exp() takes the exponent and returns it in the lower bits
// to_mnt() takes the mantissa and returns it
uint32_t to_sgn( uint32_t f
) { return f
>> 31; }
uint32_t to_exp( uint32_t f
) { return (f
<< 1) >> 24; }
uint32_t to_mnt( uint32_t f
) { return (f
<< 9) >> 9; }
uint64_t to_sgn( uint64_t f
) { return f
>> 63; }
uint64_t to_exp( uint64_t f
) { return (f
<< 1) >> 53; }
uint64_t to_mnt( uint64_t f
) { return (f
<< 12) >> 12; }
uint32_t to_num( uint32_t s
, uint32_t e
, uint32_t m
) { return (s
<< 31) + (e
<< 23) + m
; }
uint64_t to_num( uint64_t s
, uint64_t e
, uint64_t m
) { return (s
<< 63) + (e
<< 52) + m
; }
uint32_t to_nil( uint32_t f
) { return (f
>> 31) << 31; }
uint64_t to_nil( uint64_t f
) { return (f
>> 63) << 63; }
uint32_t to_abs( uint32_t f
) { return (f
<< 1) >> 1; }
uint64_t to_abs( uint64_t f
) { return (f
<< 1) >> 1; }
uint32_t to_neg( uint32_t f
) { return f
^ (uint32_t(1) << 31); }
uint64_t to_neg( uint64_t f
) { return f
^ (uint64_t(1) << 63); }
uint32_t to_nans( uint64_t f
)
return to_num(s
,e
,uint32_t(m
>> (52 - 23)));
uint64_t to_nand( uint32_t f
)
return to_num(s
,e
,m
<< (52 - 23));
uint32_t to_qnan( uint32_t f
) { return f
| (1 << 22); }
uint32_t to_snan( uint32_t f
) { return f
&~(1 << 22); }
uint64_t to_qnan( uint64_t f
) { return f
| (uint64_t(1) << 51); }
uint64_t to_snan( uint64_t f
) { return f
&~(uint64_t(1) << 51); }
// s11111111qmmmmmmmmmmmmmmmmmmmmmm nan q=1 ? qnan : snan
// s1111111100000000000000000000000 inf
// seeeeeeeemmmmmmmmmmmmmmmmmmmmmmm num 0 < e < 0xff
// s00000000mmmmmmmmmmmmmmmmmmmmmmm sub
// s0000000000000000000000000000000 nil
int is_nan_or_inf( uint32_t f
) { uint32_t em
= f
<< 1; return (em
>> 24) == 0xff; }
int is_sub_or_nil( uint32_t f
) { uint32_t em
= f
<< 1; return (em
>> 24) == 0x00; }
int is_nan_or_inf( uint64_t f
) { uint64_t em
= f
<< 1; return (em
>> 53) == 0x7ff; }
int is_sub_or_nil( uint64_t f
) { uint64_t em
= f
<< 1; return (em
>> 53) == 0x000; }
// is_nil() tests for zero
// is_sub() tests for subnormals
// is_inf() tests for infinite
// is_nan() tests for nan (not-a-number) and
// is_qnan() tests for quite-nan
// is_snan() tests for ignalling-nan.
int is_nil( uint32_t f
) { return (f
<< 1) == 0; }
int is_sub( uint32_t f
) { uint32_t em
= f
<< 1; return ((em
>> 24) == 0x00) && ((em
<< 8) != 0); }
int is_inf( uint32_t f
) { uint32_t em
= f
<< 1; return ((em
>> 24) == 0xff) && ((em
<< 8) == 0); }
int is_nan( uint32_t f
) { uint32_t em
= f
<< 1; return ((em
>> 24) == 0xff) && ((em
<< 8) != 0); }
int is_qnan( uint32_t f
) { uint32_t em
= f
<< 1; return ((em
>> 23) == 0x1ff) && ((em
<< 8) != 0); }
int is_snan( uint32_t f
) { uint32_t em
= f
<< 1; return ((em
>> 23) == 0x1fe) && ((em
<< 8) != 0); }
int is_nil( uint64_t f
) { return (f
<< 1) == 0; }
int is_sub( uint64_t f
) { uint64_t em
= f
<< 1; return ((em
>> 53) == 0x000) && ((em
<< 11) != 0); }
int is_inf( uint64_t f
) { uint64_t em
= f
<< 1; return ((em
>> 53) == 0x7ff) && ((em
<< 11) == 0); }
int is_nan( uint64_t f
) { uint64_t em
= f
<< 1; return ((em
>> 53) == 0x7ff) && ((em
<< 11) != 0); }
int is_qnan( uint64_t f
) { uint64_t em
= f
<< 1; return ((em
>> 52) == 0xfff) && ((em
<< 11) != 0); }
int is_snan( uint64_t f
) { uint64_t em
= f
<< 1; return ((em
>> 52) == 0xffe) && ((em
<< 11) != 0); }
void float_raise( Exception flags
) { float_exception_flags
= float_exception_flags
| flags
; }
int32_t roundAndPackInt32( int zSign
, uint64_t absZ
);
int64_t roundAndPackInt64( int zSign
, uint64_t absZ0
, uint64_t absZ1
);
uint32_t roundAndPackFloat32( int zSign
, int16_t zExp
, uint32_t zSig
);
uint64_t roundAndPackFloat64( int zSign
, int16_t zExp
, uint64_t zSig
);
uint32_t normalizeRoundAndPackFloat32( int zSign
, int16_t zExp
, uint32_t zSig
);
uint64_t normalizeRoundAndPackFloat64( int zSign
, int16_t zExp
, uint64_t zSig
);
void normalizeFloat32Subnormal( uint32_t aSig
, int16_t *zExpPtr
, uint32_t *zSigPtr
);
void normalizeFloat64Subnormal( uint64_t aSig
, int16_t *zExpPtr
, uint64_t *zSigPtr
);
uint32_t addFloat32Sigs( uint32_t a
, uint32_t b
, int zSign
, int half
);
uint32_t subFloat32Sigs( uint32_t a
, uint32_t b
, int zSign
, int half
);
uint64_t addFloat64Sigs( uint64_t a
, uint64_t b
, int zSign
, int half
);
uint64_t subFloat64Sigs( uint64_t a
, uint64_t b
, int zSign
, int half
);
static const uint32_t QNAN_32
= 0x7FFFFFFF;
static const uint64_t QNAN_64
= 0x7FFFFFFFFFFFFFFF;
// is_nil() returns true when both f and g are zero ignoring the sign.
// Note that +0.0 and -0.0 compare equal. This is a quick test for that.
static int is_nil( uint32_t f
, uint32_t g
) { return ((f
| g
) << 1) == 0; }
static int is_nil( uint64_t f
, uint64_t g
) { return ((f
| g
) << 1) == 0; }
// abs() and neg() do the obvious thing to the floating point argument
static CondCode
cmp( uint32_t f
, uint32_t g
)
if ((f
== g
) || is_nil(f
,g
))
return (f
< g
) ? (sgn(g
) ? GT
: LT
) : (sgn(f
) ? LT
: GT
);
static CondCode
cmp( uint64_t f
, uint64_t g
)
if ((f
== g
) || is_nil(f
,g
))
return (f
< g
) ? (sgn(g
) ? GT
: LT
) : (sgn(f
) ? LT
: GT
);
// Shifts `a' right by the number of bits given in `count'. If any nonzero
// bits are shifted off, they are ``jammed'' into the least significant bit of
// the result by setting the least significant bit to 1. The value of `count'
// can be arbitrarily large; in particular, if `count' is greater than 32, the
// result will be either 0 or 1, depending on whether `a' is zero or nonzero.
// The result is stored in the location pointed to by `zPtr'.
void shift32RightJamming( uint32_t a
, int16_t count
, uint32_t *z
)
*z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 31 ) ) != 0 );
// Shifts `a' right by the number of bits given in `count'. If any nonzero
// bits are shifted off, they are ``jammed'' into the least significant bit of
// the result by setting the least significant bit to 1. The value of `count'
// can be arbitrarily large; in particular, if `count' is greater than 64, the
// result will be either 0 or 1, depending on whether `a' is zero or nonzero.
// The result is stored in the location pointed to by `zPtr'.
void shift64RightJamming( uint64_t a
, int16_t count
, uint64_t *z
)
*z
= ( a
>>count
) | ( ( a
<<( ( - count
) & 63 ) ) != 0 );
// Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
// _plus_ the number of bits given in `count'. The shifted result is at most
// 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
// bits shifted off form a second 64-bit result as follows: The _last_ bit
// shifted off is the most-significant bit of the extra result, and the other
// 63 bits of the extra result are all zero if and only if _all_but_the_last_
// bits shifted off were all zero. This extra result is stored in the location
// pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
// (This routine makes more sense if `a0' and `a1' are considered to form a
// fixed-point value with binary point between `a0' and `a1'. This fixed-point
// value is shifted right by the number of bits given in `count', and the
// integer part of the result is returned at the location pointed to by
// `z0Ptr'. The fractional part of the result may be slightly corrupted as
// described above, and is returned at the location pointed to by `z1Ptr'.)
void shift64ExtraRightJamming( uint64_t a0
, uint64_t a1
, int16_t count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
int8_t negCount
= ( - count
) & 63;
z1
= ( a0
<<negCount
) | ( a1
!= 0 );
z1
= ( ( a0
| a1
) != 0 );
// Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
// number of bits given in `count'. If any nonzero bits are shifted off, they
// are ``jammed'' into the least significant bit of the result by setting the
// least significant bit to 1. The value of `count' can be arbitrarily large;
// in particular, if `count' is greater than 128, the result will be either
// 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
// nonzero. The result is broken into two 64-bit pieces which are stored at
// the locations pointed to by `z0Ptr' and `z1Ptr'.
void shift128RightJamming( uint64_t a0
, uint64_t a1
, int16_t count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
int8_t negCount
= ( - count
) & 63;
z1
= ( a0
<<negCount
) | ( a1
>>count
) | ( ( a1
<<negCount
) != 0 );
z1
= ( a0
>>( count
& 63 ) ) | ( ( ( a0
<<negCount
) | a1
) != 0 );
z1
= ( ( a0
| a1
) != 0 );
// Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
// number of bits given in `count'. Any bits shifted off are lost. The value
// of `count' must be less than 64. The result is broken into two 64-bit
// pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
void shortShift128Left( uint64_t a0
, uint64_t a1
, int16_t count
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
*z0Ptr
= ( count
== 0 ) ? a0
: ( a0
<<count
) | ( a1
>>( ( - count
) & 63 ) );
// Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
// value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
// any carry out is lost. The result is broken into two 64-bit pieces which
// are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
void add128( uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
*z0Ptr
= a0
+ b0
+ ( z1
< a1
);
// Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
// 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
// 2^128, so any borrow out (carry out) is lost. The result is broken into two
// 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
void sub128( uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
*z0Ptr
= a0
- b0
- ( a1
< b1
);
// Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
// into two 64-bit pieces which are stored at the locations pointed to by
void mul64To128( uint64_t a
, uint64_t b
, uint64_t *z0Ptr
, uint64_t *z1Ptr
)
uint32_t aHigh
, aLow
, bHigh
, bLow
;
uint64_t z0
, zMiddleA
, zMiddleB
, z1
;
z1
= ( (uint64_t) aLow
) * bLow
;
zMiddleA
= ( (uint64_t) aLow
) * bHigh
;
zMiddleB
= ( (uint64_t) aHigh
) * bLow
;
z0
= ( (uint64_t) aHigh
) * bHigh
;
z0
+= ( ( (uint64_t) ( zMiddleA
< zMiddleB
) )<<32 ) + ( zMiddleA
>>32 );
// Returns an approximation to the 64-bit integer quotient obtained by dividing
// `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
// divisor `b' must be at least 2^63. If q is the exact quotient truncated
// toward zero, the approximation returned lies between q and q + 2 inclusive.
// If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
// unsigned integer is returned.
uint64_t estimateDiv128To64( uint64_t a0
, uint64_t a1
, uint64_t b
)
uint64_t rem0
, rem1
, term0
, term1
;
if ( b
<= a0
) return 0xFFFFFFFFFFFFFFFF ;
z
= ( b0
<<32 <= a0
) ? 0xFFFFFFFF00000000 : ( a0
/ b0
)<<32;
mul64To128( b
, z
, &term0
, &term1
);
sub128( a0
, a1
, term0
, term1
, &rem0
, &rem1
);
while ( ( (int64_t) rem0
) < 0 ) {
add128( rem0
, rem1
, b0
, b1
, &rem0
, &rem1
);
rem0
= ( rem0
<<32 ) | ( rem1
>>32 );
z
|= ( b0
<<32 <= rem0
) ? 0xFFFFFFFF : rem0
/ b0
;
// Returns an approximation to the square root of the 32-bit significand given
// by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
// `aExp' (the least significant bit) is 1, the integer returned approximates
// 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
// is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
// case, the approximation returned lies strictly within +/-2 of the exact
uint32_t estimateSqrt32( int16_t aExp
, uint32_t a
)
static const uint16_t sqrtOddAdjustments
[] = {
0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
static const uint16_t sqrtEvenAdjustments
[] = {
0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
z
= 0x4000 + ( a
>>17 ) - sqrtOddAdjustments
[ index
];
z
= ( ( a
/ z
)<<14 ) + ( z
<<15 );
z
= 0x8000 + ( a
>>17 ) - sqrtEvenAdjustments
[ index
];
z
= ( 0x20000 <= z
) ? 0xFFFF8000 : ( z
<<15 );
if ( z
<= a
) return (uint32_t) ( ( (int32_t) a
)>>1 );
return ( (uint32_t) ( ( ( (uint64_t) a
)<<31 ) / z
) ) + ( z
>>1 );
// Returns the number of leading 0 bits before the most-significant 1 bit of
// `a'. If `a' is zero, 32 is returned.
int8_t countLeadingZeros32( uint32_t a
)
static const int8_t countLeadingZerosHigh
[] =
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
shiftCount
+= countLeadingZerosHigh
[ a
>>24 ];
// Returns the number of leading 0 bits before the most-significant 1 bit of
// `a'. If `a' is zero, 64 is returned.
int8_t countLeadingZeros64( uint64_t a
)
if ( a
< ( (uint64_t) 1 )<<32 ) {
shiftCount
+= countLeadingZeros32( a
);
// Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
// than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
int lt128( uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
return ( a0
< b0
) || ( ( a0
== b0
) && ( a1
< b1
) );
// Functions and definitions to determine: (1) whether tininess for underflow
// is detected before or after rounding by default, (2) what (if anything)
// happens when exceptions are raised, (3) how signaling NaNs are distinguished
// from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
// are propagated from function inputs to output. These details are target-
int float32_is_nan( uint32_t a
)
return ( 0xFF000000 < (uint32_t) ( a
<<1 ) );
int float32_is_signaling_nan( uint32_t a
)
return ( ( ( a
>>22 ) & 0x1FF ) == 0x1FE ) && ( a
& 0x003FFFFF );
int float64_is_nan( uint64_t a
)
return ( 0xFFE0000000000000 < (uint64_t) ( a
<<1 ) );
int float64_is_signaling_nan( uint64_t a
)
return ( ( ( a
>>51 ) & 0xFFF ) == 0xFFE ) && ( a
& 0x0007FFFFFFFFFFFF );
uint32_t propagateFloat32NaN( uint32_t a
, uint32_t b
)
int aIsSignalingNaN
, bIsNaN
, bIsSignalingNaN
;
aIsSignalingNaN
= float32_is_signaling_nan( a
);
bIsNaN
= float32_is_nan( b
);
bIsSignalingNaN
= float32_is_signaling_nan( b
);
if ( aIsSignalingNaN
| bIsSignalingNaN
) float_raise( EXC_INVALID
);
return bIsSignalingNaN
? b
: aIsSignalingNaN
? a
: bIsNaN
? b
: a
;
uint32_t propagate3Float32NaN( uint32_t a
, uint32_t b
, uint32_t c
)
int aIsNaN
, bIsNaN
, cIsNaN
;
int aIsSignalingNaN
, bIsSignalingNaN
, cIsSignalingNaN
;
aIsNaN
= float32_is_nan( a
);
aIsSignalingNaN
= float32_is_signaling_nan( a
);
bIsNaN
= float32_is_nan( b
);
bIsSignalingNaN
= float32_is_signaling_nan( b
);
cIsNaN
= float32_is_nan( c
);
cIsSignalingNaN
= float32_is_signaling_nan( c
);
if ( aIsSignalingNaN
| bIsSignalingNaN
| cIsSignalingNaN
)
float_raise( EXC_INVALID
);
return cIsSignalingNaN
? c
: bIsSignalingNaN
? b
: aIsSignalingNaN
? a
:
cIsNaN
? c
: bIsNaN
? b
: a
;
uint64_t propagateFloat64NaN( uint64_t a
, uint64_t b
)
int aIsSignalingNaN
, bIsNaN
, bIsSignalingNaN
;
aIsSignalingNaN
= float64_is_signaling_nan( a
);
bIsNaN
= float64_is_nan( b
);
bIsSignalingNaN
= float64_is_signaling_nan( b
);
a
|= 0x0008000000000000 ;
b
|= 0x0008000000000000 ;
if ( aIsSignalingNaN
| bIsSignalingNaN
) float_raise( EXC_INVALID
);
return bIsSignalingNaN
? b
: aIsSignalingNaN
? a
: bIsNaN
? b
: a
;
uint64_t propagate3Float64NaN( uint64_t a
, uint64_t b
, uint64_t c
)
int aIsNaN
, bIsNaN
, cIsNaN
;
int aIsSignalingNaN
, bIsSignalingNaN
, cIsSignalingNaN
;
aIsNaN
= float64_is_nan( a
);
aIsSignalingNaN
= float64_is_signaling_nan( a
);
bIsNaN
= float64_is_nan( b
);
bIsSignalingNaN
= float64_is_signaling_nan( b
);
cIsNaN
= float64_is_nan( c
);
cIsSignalingNaN
= float64_is_signaling_nan( c
);
a
|= 0x0008000000000000 ;
b
|= 0x0008000000000000 ;
c
|= 0x0008000000000000 ;
if ( aIsSignalingNaN
| bIsSignalingNaN
| cIsSignalingNaN
)
float_raise( EXC_INVALID
);
return cIsSignalingNaN
? c
: bIsSignalingNaN
? b
: aIsSignalingNaN
? a
:
cIsNaN
? c
: bIsNaN
? b
: a
;
uint32_t extractFloat32Frac( uint32_t a
) { return a
& 0x007FFFFF; }
int16_t extractFloat32Exp( uint32_t a
) { return ( a
>>23 ) & 0xFF; }
int extractFloat32Sign( uint32_t a
) { return a
>>31; }
uint32_t packFloat32( int zSign
, int16_t zExp
, uint32_t zSig
)
return ( ( (uint32_t) zSign
)<<31 ) + ( ( (uint32_t) zExp
)<<23 ) + zSig
;
uint64_t extractFloat64Frac( uint64_t a
) { return a
& 0x000FFFFFFFFFFFFF ; }
int16_t extractFloat64Exp( uint64_t a
) { return ( a
>>52 ) & 0x7FF; }
int extractFloat64Sign( uint64_t a
) { return a
>>63; }
uint64_t packFloat64( int zSign
, int16_t zExp
, uint64_t zSig
)
return ( ( (uint64_t) zSign
)<<63 ) + ( ( (uint64_t) zExp
)<<52 ) + zSig
;