Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_Fpu.h
/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: SS_Fpu.h
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
#ifndef __SS_Fpu_h__
#define __SS_Fpu_h__
#include "SS_Types.h"
#include "SS_Strand.h"
class SS_Fpu
{
public:
enum Tininess // Global configuration for rounding
{
TINY_AFTER_ROUNDING = 0,
TINY_BEFORE_ROUNDING = 1
};
enum Rounding // Field value of fsr.rd and gsr.irnd
{
ROUND_NEAREST = 0,
ROUND_TO_ZERO = 1,
ROUND_UP = 2,
ROUND_DOWN = 3
};
enum Exception // Fields in fsr.cexc, fsr.aexc, fsr.tem
{
EXC_NONE = 0,
EXC_INEXACT = 1, // nx
EXC_DIVBYZERO = 2, // dz
EXC_UNDERFLOW = 4, // uf
EXC_OVERFLOW = 8, // of
EXC_INVALID = 16 // nv
};
enum FloatTrapType // Field value of fsr.ftt
{
FTT_NOTRAP = 0,
FTT_IEEE_754_EXCEPTION = 1,
FTT_UNFINISHED_FPOP = 2,
FTT_UNIMPLEMENTED_FPOP = 3, // Reserved, used in V9
FTT_SEQUENCE_ERROR = 4, // Reserved, used in V9
FTT_HARDWARE_ERROR = 5, // Reserved, used in V9
FTT_INVALID_FP_REGISTER = 6, // Only used in quad pricision, e.g. never
FTT_RESERVED = 7 // Reserved
};
friend Exception operator|( Exception a, Exception b ) { return Exception(int(a)|int(b)); }
friend Exception operator&( Exception a, Exception b ) { return Exception(int(a)&int(b)); }
enum ConditionCode // Field value of fsr.fcc0, fsr.fcc1, fsr.fcc2, fsr.fcc3
{
EQ = 0,
LT = 1,
GT = 2,
UN = 3
};
enum ConditionField // The condition fields in fsr
{
FCC0 = 0,
FCC1 = 1,
FCC2 = 2,
FCC3 = 3
};
SS_Fpu();
void set_fcc( SS_Fsr& fsr, ConditionField cr, ConditionCode cc )
{
switch (cr)
{
case FCC0 : fsr.fcc0(cc); break;
case FCC1 : fsr.fcc1(cc); break;
case FCC2 : fsr.fcc2(cc); break;
case FCC3 : fsr.fcc3(cc); break;
default: assert(0);
}
}
SS_Vaddr exe_end( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* i, FloatTrapType ftt, int exc )
{
switch (ftt)
{
case FTT_UNFINISHED_FPOP:
s->fsr.ftt(FTT_UNFINISHED_FPOP);
s->set_fsr();
return (s->trap)(pc,npc,s,i,SS_Trap::FP_EXCEPTION_OTHER);
case FTT_IEEE_754_EXCEPTION:
s->fsr.cexc(exc).ftt(ftt);
s->set_fsr();
return (s->trap)(pc,npc,s,i,SS_Trap::FP_EXCEPTION_IEEE_754);
case FTT_NOTRAP:
s->fsr.aexc(s->fsr.aexc()|exc).cexc(exc).ftt(FTT_NOTRAP);
s->set_fsr();
s->npc = npc + 4;
return npc;
default:
assert(0);
return -1;
}
}
// Conversion methods
uint32_t int32_to_float32( int32_t );
uint64_t int32_to_float64( int32_t );
uint32_t int64_to_float32( int64_t );
uint64_t int64_to_float64( int64_t );
int32_t float32_to_int32( uint32_t );
int64_t float32_to_int64( uint32_t );
uint64_t float32_to_float64( uint32_t );
int32_t float64_to_int32( uint64_t );
int64_t float64_to_int64( uint64_t );
uint32_t float64_to_float32( uint64_t );
// Computation methods
uint32_t float32_add( uint32_t, uint32_t, int, int half=0 );
uint32_t float32_sub( uint32_t, uint32_t, int, int half=0 );
uint32_t float32_mul( uint32_t, uint32_t, int );
uint32_t float32_div( uint32_t, uint32_t );
uint32_t float32_sqrt( uint32_t );
uint32_t float32_rsqrt( uint32_t );
uint32_t float32_madd( uint32_t, uint32_t, uint32_t, int, int);
uint64_t float64_add( uint64_t, uint64_t, int, int half=0 );
uint64_t float64_sub( uint64_t, uint64_t, int, int half=0 );
uint64_t float64_mul( uint64_t, uint64_t, int );
uint64_t float64_div( uint64_t, uint64_t );
uint64_t float64_sqrt( uint64_t );
uint64_t float64_rsqrt( uint64_t );
uint64_t float64_madd( uint64_t, uint64_t, uint64_t, int, int );
// Comparisons
int float32_eq( uint32_t, uint32_t );
int float32_lt( uint32_t, uint32_t );
int float32_eq_signaling( uint32_t, uint32_t );
int float32_lt_quiet( uint32_t, uint32_t );
int float64_eq( uint64_t, uint64_t );
int float64_lt( uint64_t, uint64_t );
int float64_eq_signaling( uint64_t, uint64_t );
int float64_lt_quiet( uint64_t, uint64_t );
// Parameters and results
Rounding float_rounding_mode;
int float_exception_flags;
int float_partial_exception_flags;
int float_round_needed;
int float_unfinished_op;
Tininess float_detect_tininess;
// to_sgn() takes the sign from the number are return it in bit 0
// to_exp() takes the exponent and returns it in the lower bits
// to_mnt() takes the mantissa and returns it
uint32_t to_sgn( uint32_t f ) { return f >> 31; }
uint32_t to_exp( uint32_t f ) { return (f << 1) >> 24; }
uint32_t to_mnt( uint32_t f ) { return (f << 9) >> 9; }
uint64_t to_sgn( uint64_t f ) { return f >> 63; }
uint64_t to_exp( uint64_t f ) { return (f << 1) >> 53; }
uint64_t to_mnt( uint64_t f ) { return (f << 12) >> 12; }
uint32_t to_num( uint32_t s, uint32_t e, uint32_t m ) { return (s << 31) + (e << 23) + m; }
uint64_t to_num( uint64_t s, uint64_t e, uint64_t m ) { return (s << 63) + (e << 52) + m; }
uint32_t to_nil( uint32_t f ) { return (f >> 31) << 31; }
uint64_t to_nil( uint64_t f ) { return (f >> 63) << 63; }
uint32_t to_abs( uint32_t f ) { return (f << 1) >> 1; }
uint64_t to_abs( uint64_t f ) { return (f << 1) >> 1; }
uint32_t to_neg( uint32_t f ) { return f ^ (uint32_t(1) << 31); }
uint64_t to_neg( uint64_t f ) { return f ^ (uint64_t(1) << 63); }
uint32_t to_nans( uint64_t f )
{
uint32_t s = to_sgn(f);
uint32_t e = 0x0ff;
uint64_t m = to_mnt(f);
return to_num(s,e,uint32_t(m >> (52 - 23)));
}
uint64_t to_nand( uint32_t f )
{
uint64_t s = to_sgn(f);
uint64_t e = 0x7ff;
uint64_t m = to_mnt(f);
return to_num(s,e,m << (52 - 23));
}
uint32_t to_qnan( uint32_t f ) { return f | (1 << 22); }
uint32_t to_snan( uint32_t f ) { return f &~(1 << 22); }
uint64_t to_qnan( uint64_t f ) { return f | (uint64_t(1) << 51); }
uint64_t to_snan( uint64_t f ) { return f &~(uint64_t(1) << 51); }
// s11111111qmmmmmmmmmmmmmmmmmmmmmm nan q=1 ? qnan : snan
// s1111111100000000000000000000000 inf
// seeeeeeeemmmmmmmmmmmmmmmmmmmmmmm num 0 < e < 0xff
// s00000000mmmmmmmmmmmmmmmmmmmmmmm sub
// s0000000000000000000000000000000 nil
int is_nan_or_inf( uint32_t f ) { uint32_t em = f << 1; return (em >> 24) == 0xff; }
int is_sub_or_nil( uint32_t f ) { uint32_t em = f << 1; return (em >> 24) == 0x00; }
int is_nan_or_inf( uint64_t f ) { uint64_t em = f << 1; return (em >> 53) == 0x7ff; }
int is_sub_or_nil( uint64_t f ) { uint64_t em = f << 1; return (em >> 53) == 0x000; }
// is_nil() tests for zero
// is_sub() tests for subnormals
// is_inf() tests for infinite
// is_nan() tests for nan (not-a-number) and
// is_qnan() tests for quite-nan
// is_snan() tests for ignalling-nan.
int is_nil( uint32_t f ) { return (f << 1) == 0; }
int is_sub( uint32_t f ) { uint32_t em = f << 1; return ((em >> 24) == 0x00) && ((em << 8) != 0); }
int is_inf( uint32_t f ) { uint32_t em = f << 1; return ((em >> 24) == 0xff) && ((em << 8) == 0); }
int is_nan( uint32_t f ) { uint32_t em = f << 1; return ((em >> 24) == 0xff) && ((em << 8) != 0); }
int is_qnan( uint32_t f ) { uint32_t em = f << 1; return ((em >> 23) == 0x1ff) && ((em << 8) != 0); }
int is_snan( uint32_t f ) { uint32_t em = f << 1; return ((em >> 23) == 0x1fe) && ((em << 8) != 0); }
int is_nil( uint64_t f ) { return (f << 1) == 0; }
int is_sub( uint64_t f ) { uint64_t em = f << 1; return ((em >> 53) == 0x000) && ((em << 11) != 0); }
int is_inf( uint64_t f ) { uint64_t em = f << 1; return ((em >> 53) == 0x7ff) && ((em << 11) == 0); }
int is_nan( uint64_t f ) { uint64_t em = f << 1; return ((em >> 53) == 0x7ff) && ((em << 11) != 0); }
int is_qnan( uint64_t f ) { uint64_t em = f << 1; return ((em >> 52) == 0xfff) && ((em << 11) != 0); }
int is_snan( uint64_t f ) { uint64_t em = f << 1; return ((em >> 52) == 0xffe) && ((em << 11) != 0); }
protected:
void float_raise( Exception flags ) { float_exception_flags = float_exception_flags | flags; }
int32_t roundAndPackInt32( int zSign, uint64_t absZ );
int64_t roundAndPackInt64( int zSign, uint64_t absZ0, uint64_t absZ1 );
uint32_t roundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig );
uint64_t roundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig );
uint32_t normalizeRoundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig );
uint64_t normalizeRoundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig );
void normalizeFloat32Subnormal( uint32_t aSig, int16_t *zExpPtr, uint32_t *zSigPtr );
void normalizeFloat64Subnormal( uint64_t aSig, int16_t *zExpPtr, uint64_t *zSigPtr );
uint32_t addFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half );
uint32_t subFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half );
uint64_t addFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half );
uint64_t subFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half );
static const uint32_t QNAN_32 = 0x7FFFFFFF;
static const uint64_t QNAN_64 = 0x7FFFFFFFFFFFFFFF;
#if 0
// is_nil() returns true when both f and g are zero ignoring the sign.
// Note that +0.0 and -0.0 compare equal. This is a quick test for that.
static int is_nil( uint32_t f, uint32_t g ) { return ((f | g) << 1) == 0; }
static int is_nil( uint64_t f, uint64_t g ) { return ((f | g) << 1) == 0; }
// abs() and neg() do the obvious thing to the floating point argument
static CondCode cmp( uint32_t f, uint32_t g )
{
if ((f == g) || is_nil(f,g))
return EQ;
else
return (f < g) ? (sgn(g) ? GT : LT) : (sgn(f) ? LT : GT);
}
static CondCode cmp( uint64_t f, uint64_t g )
{
if ((f == g) || is_nil(f,g))
return EQ;
else
return (f < g) ? (sgn(g) ? GT : LT) : (sgn(f) ? LT : GT);
}
#endif
// Shifts `a' right by the number of bits given in `count'. If any nonzero
// bits are shifted off, they are ``jammed'' into the least significant bit of
// the result by setting the least significant bit to 1. The value of `count'
// can be arbitrarily large; in particular, if `count' is greater than 32, the
// result will be either 0 or 1, depending on whether `a' is zero or nonzero.
// The result is stored in the location pointed to by `zPtr'.
void shift32RightJamming( uint32_t a, int16_t count, uint32_t *z )
{
if ( count == 0 )
*z = a;
else if ( count < 32 )
*z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
else
*z = ( a != 0 );
}
// Shifts `a' right by the number of bits given in `count'. If any nonzero
// bits are shifted off, they are ``jammed'' into the least significant bit of
// the result by setting the least significant bit to 1. The value of `count'
// can be arbitrarily large; in particular, if `count' is greater than 64, the
// result will be either 0 or 1, depending on whether `a' is zero or nonzero.
// The result is stored in the location pointed to by `zPtr'.
void shift64RightJamming( uint64_t a, int16_t count, uint64_t *z )
{
if ( count == 0 )
*z = a;
else if ( count < 64 )
*z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
else
*z = ( a != 0 );
}
// Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
// _plus_ the number of bits given in `count'. The shifted result is at most
// 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
// bits shifted off form a second 64-bit result as follows: The _last_ bit
// shifted off is the most-significant bit of the extra result, and the other
// 63 bits of the extra result are all zero if and only if _all_but_the_last_
// bits shifted off were all zero. This extra result is stored in the location
// pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
// (This routine makes more sense if `a0' and `a1' are considered to form a
// fixed-point value with binary point between `a0' and `a1'. This fixed-point
// value is shifted right by the number of bits given in `count', and the
// integer part of the result is returned at the location pointed to by
// `z0Ptr'. The fractional part of the result may be slightly corrupted as
// described above, and is returned at the location pointed to by `z1Ptr'.)
void shift64ExtraRightJamming( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
uint64_t z0, z1;
int8_t negCount = ( - count ) & 63;
if ( count == 0 )
{
z1 = a1;
z0 = a0;
}
else if ( count < 64 )
{
z1 = ( a0<<negCount ) | ( a1 != 0 );
z0 = a0>>count;
}
else
{
if ( count == 64 )
z1 = a0 | ( a1 != 0 );
else
z1 = ( ( a0 | a1 ) != 0 );
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
// Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
// number of bits given in `count'. If any nonzero bits are shifted off, they
// are ``jammed'' into the least significant bit of the result by setting the
// least significant bit to 1. The value of `count' can be arbitrarily large;
// in particular, if `count' is greater than 128, the result will be either
// 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
// nonzero. The result is broken into two 64-bit pieces which are stored at
// the locations pointed to by `z0Ptr' and `z1Ptr'.
void shift128RightJamming( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
uint64_t z0, z1;
int8_t negCount = ( - count ) & 63;
if ( count == 0 )
{
z1 = a1;
z0 = a0;
}
else if ( count < 64 )
{
z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
z0 = a0>>count;
}
else
{
if ( count == 64 )
z1 = a0 | ( a1 != 0 );
else if ( count < 128 )
z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
else
z1 = ( ( a0 | a1 ) != 0 );
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
// Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
// number of bits given in `count'. Any bits shifted off are lost. The value
// of `count' must be less than 64. The result is broken into two 64-bit
// pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
void shortShift128Left( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
*z1Ptr = a1<<count;
*z0Ptr = ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
}
// Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
// value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
// any carry out is lost. The result is broken into two 64-bit pieces which
// are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
void add128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
uint64_t z1;
z1 = a1 + b1;
*z1Ptr = z1;
*z0Ptr = a0 + b0 + ( z1 < a1 );
}
// Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
// 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
// 2^128, so any borrow out (carry out) is lost. The result is broken into two
// 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
// `z1Ptr'.
void sub128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
*z1Ptr = a1 - b1;
*z0Ptr = a0 - b0 - ( a1 < b1 );
}
// Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
// into two 64-bit pieces which are stored at the locations pointed to by
// `z0Ptr' and `z1Ptr'.
void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
uint32_t aHigh, aLow, bHigh, bLow;
uint64_t z0, zMiddleA, zMiddleB, z1;
aLow = a;
aHigh = a>>32;
bLow = b;
bHigh = b>>32;
z1 = ( (uint64_t) aLow ) * bLow;
zMiddleA = ( (uint64_t) aLow ) * bHigh;
zMiddleB = ( (uint64_t) aHigh ) * bLow;
z0 = ( (uint64_t) aHigh ) * bHigh;
zMiddleA += zMiddleB;
z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
zMiddleA <<= 32;
z1 += zMiddleA;
z0 += ( z1 < zMiddleA );
*z1Ptr = z1;
*z0Ptr = z0;
}
// Returns an approximation to the 64-bit integer quotient obtained by dividing
// `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
// divisor `b' must be at least 2^63. If q is the exact quotient truncated
// toward zero, the approximation returned lies between q and q + 2 inclusive.
// If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
// unsigned integer is returned.
uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
{
uint64_t b0, b1;
uint64_t rem0, rem1, term0, term1;
uint64_t z;
if ( b <= a0 ) return 0xFFFFFFFFFFFFFFFF ;
b0 = b>>32;
z = ( b0<<32 <= a0 ) ? 0xFFFFFFFF00000000 : ( a0 / b0 )<<32;
mul64To128( b, z, &term0, &term1 );
sub128( a0, a1, term0, term1, &rem0, &rem1 );
while ( ( (int64_t) rem0 ) < 0 ) {
z -= 0x100000000 ;
b1 = b<<32;
add128( rem0, rem1, b0, b1, &rem0, &rem1 );
}
rem0 = ( rem0<<32 ) | ( rem1>>32 );
z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
return z;
}
// Returns an approximation to the square root of the 32-bit significand given
// by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
// `aExp' (the least significant bit) is 1, the integer returned approximates
// 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
// is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
// case, the approximation returned lies strictly within +/-2 of the exact
// value.
uint32_t estimateSqrt32( int16_t aExp, uint32_t a )
{
static const uint16_t sqrtOddAdjustments[] = {
0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
};
static const uint16_t sqrtEvenAdjustments[] = {
0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
};
int8_t index;
uint32_t z;
index = ( a>>27 ) & 15;
if ( aExp & 1 ) {
z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
z = ( ( a / z )<<14 ) + ( z<<15 );
a >>= 1;
}
else {
z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
z = a / z + z;
z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
}
return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
}
// Returns the number of leading 0 bits before the most-significant 1 bit of
// `a'. If `a' is zero, 32 is returned.
int8_t countLeadingZeros32( uint32_t a )
{
static const int8_t countLeadingZerosHigh[] =
{
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int8_t shiftCount;
shiftCount = 0;
if ( a < 0x10000 ) {
shiftCount += 16;
a <<= 16;
}
if ( a < 0x1000000 ) {
shiftCount += 8;
a <<= 8;
}
shiftCount += countLeadingZerosHigh[ a>>24 ];
return shiftCount;
}
// Returns the number of leading 0 bits before the most-significant 1 bit of
// `a'. If `a' is zero, 64 is returned.
int8_t countLeadingZeros64( uint64_t a )
{
int8_t shiftCount;
shiftCount = 0;
if ( a < ( (uint64_t) 1 )<<32 ) {
shiftCount += 32;
}
else {
a >>= 32;
}
shiftCount += countLeadingZeros32( a );
return shiftCount;
}
// Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
// than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
// returns 0.
int lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
{
return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
}
// Functions and definitions to determine: (1) whether tininess for underflow
// is detected before or after rounding by default, (2) what (if anything)
// happens when exceptions are raised, (3) how signaling NaNs are distinguished
// from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
// are propagated from function inputs to output. These details are target-
// specific.
int float32_is_nan( uint32_t a )
{
return ( 0xFF000000 < (uint32_t) ( a<<1 ) );
}
int float32_is_signaling_nan( uint32_t a )
{
return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
}
int float64_is_nan( uint64_t a )
{
return ( 0xFFE0000000000000 < (uint64_t) ( a<<1 ) );
}
int float64_is_signaling_nan( uint64_t a )
{
return ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) && ( a & 0x0007FFFFFFFFFFFF );
}
uint32_t propagateFloat32NaN( uint32_t a, uint32_t b )
{
int aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
aIsSignalingNaN = float32_is_signaling_nan( a );
bIsNaN = float32_is_nan( b );
bIsSignalingNaN = float32_is_signaling_nan( b );
a |= 0x00400000;
b |= 0x00400000;
if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( EXC_INVALID );
return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
}
uint32_t propagate3Float32NaN( uint32_t a, uint32_t b, uint32_t c )
{
int aIsNaN, bIsNaN, cIsNaN;
int aIsSignalingNaN, bIsSignalingNaN, cIsSignalingNaN;
aIsNaN = float32_is_nan( a );
aIsSignalingNaN = float32_is_signaling_nan( a );
bIsNaN = float32_is_nan( b );
bIsSignalingNaN = float32_is_signaling_nan( b );
cIsNaN = float32_is_nan( c );
cIsSignalingNaN = float32_is_signaling_nan( c );
a |= 0x00400000;
b |= 0x00400000;
c |= 0x00400000;
if ( aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN )
float_raise( EXC_INVALID );
return cIsSignalingNaN ? c : bIsSignalingNaN ? b : aIsSignalingNaN ? a :
cIsNaN ? c : bIsNaN ? b : a;
}
uint64_t propagateFloat64NaN( uint64_t a, uint64_t b )
{
int aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
aIsSignalingNaN = float64_is_signaling_nan( a );
bIsNaN = float64_is_nan( b );
bIsSignalingNaN = float64_is_signaling_nan( b );
a |= 0x0008000000000000 ;
b |= 0x0008000000000000 ;
if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( EXC_INVALID );
return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
}
uint64_t propagate3Float64NaN( uint64_t a, uint64_t b, uint64_t c )
{
int aIsNaN, bIsNaN, cIsNaN;
int aIsSignalingNaN, bIsSignalingNaN, cIsSignalingNaN;
aIsNaN = float64_is_nan( a );
aIsSignalingNaN = float64_is_signaling_nan( a );
bIsNaN = float64_is_nan( b );
bIsSignalingNaN = float64_is_signaling_nan( b );
cIsNaN = float64_is_nan( c );
cIsSignalingNaN = float64_is_signaling_nan( c );
a |= 0x0008000000000000 ;
b |= 0x0008000000000000 ;
c |= 0x0008000000000000 ;
if ( aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN )
float_raise( EXC_INVALID );
return cIsSignalingNaN ? c : bIsSignalingNaN ? b : aIsSignalingNaN ? a :
cIsNaN ? c : bIsNaN ? b : a;
}
uint32_t extractFloat32Frac( uint32_t a ) { return a & 0x007FFFFF; }
int16_t extractFloat32Exp( uint32_t a ) { return ( a>>23 ) & 0xFF; }
int extractFloat32Sign( uint32_t a ) { return a>>31; }
uint32_t packFloat32( int zSign, int16_t zExp, uint32_t zSig )
{
return ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig;
}
uint64_t extractFloat64Frac( uint64_t a ) { return a & 0x000FFFFFFFFFFFFF ; }
int16_t extractFloat64Exp( uint64_t a ) { return ( a>>52 ) & 0x7FF; }
int extractFloat64Sign( uint64_t a ) { return a>>63; }
uint64_t packFloat64( int zSign, int16_t zExp, uint64_t zSig )
{
return ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig;
}
};
#endif