[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / ss / lib / cpu / src / SS_Fpu.h

/*
* ========== Copyright Header Begin ==========================================
* 
* OpenSPARC T2 Processor File: SS_Fpu.h
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* 
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* 
* The above named program is distributed in the hope that it will be 
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
* 
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* 
* ========== Copyright Header End ============================================
*/

#ifndef __SS_Fpu_h__
#define __SS_Fpu_h__

#include "SS_Types.h"
#include "SS_Strand.h"

class SS_Fpu
{
  public:
    enum Tininess	// Global configuration for rounding
    {
      TINY_AFTER_ROUNDING  = 0,
      TINY_BEFORE_ROUNDING = 1
    };
    enum Rounding	// Field value of fsr.rd and gsr.irnd
    {
      ROUND_NEAREST = 0,
      ROUND_TO_ZERO = 1,
      ROUND_UP      = 2,
      ROUND_DOWN    = 3
    };
    enum Exception	// Fields in fsr.cexc, fsr.aexc, fsr.tem
    {
      EXC_NONE      =  0,
      EXC_INEXACT   =  1,	// nx
      EXC_DIVBYZERO =  2,	// dz
      EXC_UNDERFLOW =  4,	// uf
      EXC_OVERFLOW  =  8,	// of
      EXC_INVALID   = 16	// nv
    };
    enum FloatTrapType	// Field value of fsr.ftt
    {
      FTT_NOTRAP              = 0,
      FTT_IEEE_754_EXCEPTION  = 1,
      FTT_UNFINISHED_FPOP     = 2,
      FTT_UNIMPLEMENTED_FPOP  = 3,	// Reserved, used in V9
      FTT_SEQUENCE_ERROR      = 4,	// Reserved, used in V9
      FTT_HARDWARE_ERROR      = 5,	// Reserved, used in V9
      FTT_INVALID_FP_REGISTER = 6,	// Only used in quad pricision, e.g. never
      FTT_RESERVED            = 7	// Reserved
    };

    friend Exception operator|( Exception a, Exception b )	{ return Exception(int(a)|int(b)); }
    friend Exception operator&( Exception a, Exception b )	{ return Exception(int(a)&int(b)); }

    enum ConditionCode	// Field value of fsr.fcc0, fsr.fcc1, fsr.fcc2, fsr.fcc3
    {
      EQ = 0, 
      LT = 1, 
      GT = 2, 
      UN = 3 
    };
    enum ConditionField	// The condition fields in fsr
    {
      FCC0 = 0,
      FCC1 = 1,
      FCC2 = 2,
      FCC3 = 3
    };

    SS_Fpu();

    void set_fcc( SS_Fsr& fsr, ConditionField cr, ConditionCode cc )
    {
      switch (cr)
      {
	case FCC0 : fsr.fcc0(cc); break;
	case FCC1 : fsr.fcc1(cc); break;
	case FCC2 : fsr.fcc2(cc); break;
	case FCC3 : fsr.fcc3(cc); break;
	default: assert(0);
      }
    }

    SS_Vaddr exe_end( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* i, FloatTrapType ftt, int exc )
    {
      switch (ftt)
      {
	case FTT_UNFINISHED_FPOP:
	  s->fsr.ftt(FTT_UNFINISHED_FPOP);
	  s->set_fsr();
	  return (s->trap)(pc,npc,s,i,SS_Trap::FP_EXCEPTION_OTHER);

	case FTT_IEEE_754_EXCEPTION:
	  s->fsr.cexc(exc).ftt(ftt);
	  s->set_fsr();
	  return (s->trap)(pc,npc,s,i,SS_Trap::FP_EXCEPTION_IEEE_754);

	case FTT_NOTRAP:
	  s->fsr.aexc(s->fsr.aexc()|exc).cexc(exc).ftt(FTT_NOTRAP);
	  s->set_fsr();
	  s->npc = npc + 4;
	  return npc;

	default:
	  assert(0);
	  return -1;
      }
    }

    // Conversion methods
 
    uint32_t int32_to_float32( int32_t );
    uint64_t int32_to_float64( int32_t );
    
    uint32_t int64_to_float32( int64_t );
    uint64_t int64_to_float64( int64_t );

    int32_t  float32_to_int32( uint32_t );
    int64_t  float32_to_int64( uint32_t );
    uint64_t float32_to_float64( uint32_t );

    int32_t  float64_to_int32( uint64_t );
    int64_t  float64_to_int64( uint64_t );
    uint32_t float64_to_float32( uint64_t );

    // Computation methods
 
    uint32_t float32_add( uint32_t, uint32_t, int, int half=0 );
    uint32_t float32_sub( uint32_t, uint32_t, int, int half=0 );
    uint32_t float32_mul( uint32_t, uint32_t, int );
    uint32_t float32_div( uint32_t, uint32_t );
    uint32_t float32_sqrt( uint32_t );
    uint32_t float32_rsqrt( uint32_t );
    uint32_t float32_madd( uint32_t, uint32_t, uint32_t, int, int);

    uint64_t float64_add( uint64_t, uint64_t, int, int half=0 );
    uint64_t float64_sub( uint64_t, uint64_t, int, int half=0 );
    uint64_t float64_mul( uint64_t, uint64_t, int );
    uint64_t float64_div( uint64_t, uint64_t );
    uint64_t float64_sqrt( uint64_t );
    uint64_t float64_rsqrt( uint64_t );
    uint64_t float64_madd( uint64_t, uint64_t, uint64_t, int, int );
 
    // Comparisons

    int      float32_eq( uint32_t, uint32_t );
    int      float32_lt( uint32_t, uint32_t );
    int      float32_eq_signaling( uint32_t, uint32_t );
    int      float32_lt_quiet( uint32_t, uint32_t );

    int      float64_eq( uint64_t, uint64_t );
    int      float64_lt( uint64_t, uint64_t );
    int      float64_eq_signaling( uint64_t, uint64_t );
    int      float64_lt_quiet( uint64_t, uint64_t );

    // Parameters and results
 
    Rounding  float_rounding_mode;
    int       float_exception_flags;
    int       float_partial_exception_flags;
    int       float_round_needed;
    int       float_unfinished_op;
    Tininess  float_detect_tininess;
 
    // to_sgn() takes the sign from the number are return it in bit 0
    // to_exp() takes the exponent and returns it in the lower bits
    // to_mnt() takes the mantissa and returns it

    uint32_t to_sgn( uint32_t f )	{ return f >> 31; }
    uint32_t to_exp( uint32_t f )	{ return (f << 1) >> 24; }
    uint32_t to_mnt( uint32_t f )	{ return (f << 9) >> 9; }
 
    uint64_t to_sgn( uint64_t f )	{ return f >> 63; }
    uint64_t to_exp( uint64_t f )	{ return (f << 1) >> 53; }
    uint64_t to_mnt( uint64_t f )	{ return (f << 12) >> 12; }
 
    uint32_t to_num( uint32_t s, uint32_t e, uint32_t m ) { return (s << 31) + (e << 23) + m; }
    uint64_t to_num( uint64_t s, uint64_t e, uint64_t m ) { return (s << 63) + (e << 52) + m; }

    uint32_t to_nil( uint32_t f )	{ return (f >> 31) << 31; }
    uint64_t to_nil( uint64_t f )	{ return (f >> 63) << 63; }

    uint32_t to_abs( uint32_t f )	{ return (f << 1) >> 1; }
    uint64_t to_abs( uint64_t f )	{ return (f << 1) >> 1; }

    uint32_t to_neg( uint32_t f )	{ return f ^ (uint32_t(1) << 31); }
    uint64_t to_neg( uint64_t f )	{ return f ^ (uint64_t(1) << 63); }

    uint32_t to_nans( uint64_t f )	
    { 
      uint32_t s = to_sgn(f);
      uint32_t e = 0x0ff;
      uint64_t m = to_mnt(f);
      return to_num(s,e,uint32_t(m >> (52 - 23))); 
    }
    uint64_t to_nand( uint32_t f )	
    { 
      uint64_t s = to_sgn(f);
      uint64_t e = 0x7ff;
      uint64_t m = to_mnt(f);
      return to_num(s,e,m << (52 - 23)); 
    }

    uint32_t to_qnan( uint32_t f )	{ return f | (1 << 22); }
    uint32_t to_snan( uint32_t f )	{ return f &~(1 << 22); }
    uint64_t to_qnan( uint64_t f )	{ return f | (uint64_t(1) << 51); }
    uint64_t to_snan( uint64_t f )	{ return f &~(uint64_t(1) << 51); }

    // s11111111qmmmmmmmmmmmmmmmmmmmmmm    nan  q=1 ? qnan : snan
    // s1111111100000000000000000000000    inf
    // seeeeeeeemmmmmmmmmmmmmmmmmmmmmmm    num  0 < e < 0xff
    // s00000000mmmmmmmmmmmmmmmmmmmmmmm    sub
    // s0000000000000000000000000000000    nil
    
    int is_nan_or_inf( uint32_t f )	{ uint32_t em = f << 1; return (em >> 24) == 0xff; }
    int is_sub_or_nil( uint32_t f )	{ uint32_t em = f << 1; return (em >> 24) == 0x00; }
      
    int is_nan_or_inf( uint64_t f )	{ uint64_t em = f << 1; return (em >> 53) == 0x7ff; }
    int is_sub_or_nil( uint64_t f )	{ uint64_t em = f << 1; return (em >> 53) == 0x000; }
      
    // is_nil() tests for zero
    // is_sub() tests for subnormals
    // is_inf() tests for infinite
    // is_nan() tests for nan (not-a-number) and 
    // is_qnan() tests for quite-nan 
    // is_snan() tests for ignalling-nan.

    int is_nil( uint32_t f )	{ return (f << 1) == 0; }
    int is_sub( uint32_t f )	{ uint32_t em = f << 1; return ((em >> 24) == 0x00)  && ((em << 8) != 0); }
    int is_inf( uint32_t f )	{ uint32_t em = f << 1; return ((em >> 24) == 0xff)  && ((em << 8) == 0); }
    int is_nan( uint32_t f )	{ uint32_t em = f << 1; return ((em >> 24) == 0xff)  && ((em << 8) != 0); }
    int is_qnan( uint32_t f )	{ uint32_t em = f << 1; return ((em >> 23) == 0x1ff) && ((em << 8) != 0); }
    int is_snan( uint32_t f )	{ uint32_t em = f << 1; return ((em >> 23) == 0x1fe) && ((em << 8) != 0); }

    int is_nil( uint64_t f )	{ return (f << 1) == 0; }
    int is_sub( uint64_t f )	{ uint64_t em = f << 1; return ((em >> 53) == 0x000) && ((em << 11) != 0); }
    int is_inf( uint64_t f )	{ uint64_t em = f << 1; return ((em >> 53) == 0x7ff) && ((em << 11) == 0); }
    int is_nan( uint64_t f )	{ uint64_t em = f << 1; return ((em >> 53) == 0x7ff) && ((em << 11) != 0); }
    int is_qnan( uint64_t f )	{ uint64_t em = f << 1; return ((em >> 52) == 0xfff) && ((em << 11) != 0); }
    int is_snan( uint64_t f )	{ uint64_t em = f << 1; return ((em >> 52) == 0xffe) && ((em << 11) != 0); }


  protected:
    void float_raise( Exception flags )		{ float_exception_flags = float_exception_flags | flags; }

    int32_t  roundAndPackInt32( int zSign, uint64_t absZ );
    int64_t  roundAndPackInt64( int zSign, uint64_t absZ0, uint64_t absZ1 );
    uint32_t roundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig );
    uint64_t roundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig );
    uint32_t normalizeRoundAndPackFloat32( int zSign, int16_t zExp, uint32_t zSig );
    uint64_t normalizeRoundAndPackFloat64( int zSign, int16_t zExp, uint64_t zSig );
    void     normalizeFloat32Subnormal( uint32_t aSig, int16_t *zExpPtr, uint32_t *zSigPtr );
    void     normalizeFloat64Subnormal( uint64_t aSig, int16_t *zExpPtr, uint64_t *zSigPtr );

    uint32_t addFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half );
    uint32_t subFloat32Sigs( uint32_t a, uint32_t b, int zSign, int half );
    uint64_t addFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half );
    uint64_t subFloat64Sigs( uint64_t a, uint64_t b, int zSign, int half );

    static const uint32_t QNAN_32 =   0x7FFFFFFF;
    static const uint64_t QNAN_64 =   0x7FFFFFFFFFFFFFFF;
#if 0
    
    // is_nil() returns true when both f and g are zero ignoring the sign. 
    // Note that +0.0 and -0.0 compare equal. This is a quick test for that.
 
    static int is_nil( uint32_t f, uint32_t g ) { return ((f | g) << 1) == 0; }
    static int is_nil( uint64_t f, uint64_t g ) { return ((f | g) << 1) == 0; }

    // abs() and neg() do the obvious thing to the floating point argument

    static CondCode cmp( uint32_t f, uint32_t g )
    {
      if ((f == g) || is_nil(f,g))
	return EQ;
      else 
	return (f < g) ? (sgn(g) ? GT : LT) : (sgn(f) ? LT : GT);
    }

    static CondCode cmp( uint64_t f, uint64_t g )
    {
      if ((f == g) || is_nil(f,g))
	return EQ;
      else 
	return (f < g) ? (sgn(g) ? GT : LT) : (sgn(f) ? LT : GT);
    }
#endif


    // Shifts `a' right by the number of bits given in `count'.  If any nonzero
    // bits are shifted off, they are ``jammed'' into the least significant bit of
    // the result by setting the least significant bit to 1.  The value of `count'
    // can be arbitrarily large; in particular, if `count' is greater than 32, the
    // result will be either 0 or 1, depending on whether `a' is zero or nonzero.
    // The result is stored in the location pointed to by `zPtr'.
    
    void shift32RightJamming( uint32_t a, int16_t count, uint32_t *z )
    {
      if ( count == 0 ) 
	*z = a;
      else if ( count < 32 ) 
	*z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
      else 
	*z = ( a != 0 );
    }

    // Shifts `a' right by the number of bits given in `count'.  If any nonzero
    // bits are shifted off, they are ``jammed'' into the least significant bit of
    // the result by setting the least significant bit to 1.  The value of `count'
    // can be arbitrarily large; in particular, if `count' is greater than 64, the
    // result will be either 0 or 1, depending on whether `a' is zero or nonzero.
    // The result is stored in the location pointed to by `zPtr'.

    void shift64RightJamming( uint64_t a, int16_t count, uint64_t *z )
    {
      if ( count == 0 ) 
	*z = a;
      else if ( count < 64 ) 
	*z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
      else 
	*z = ( a != 0 );
    }

    // Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
    // _plus_ the number of bits given in `count'.  The shifted result is at most
    // 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
    // bits shifted off form a second 64-bit result as follows:  The _last_ bit
    // shifted off is the most-significant bit of the extra result, and the other
    // 63 bits of the extra result are all zero if and only if _all_but_the_last_
    // bits shifted off were all zero.  This extra result is stored in the location
    // pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
    // (This routine makes more sense if `a0' and `a1' are considered to form a
    // fixed-point value with binary point between `a0' and `a1'.  This fixed-point
    // value is shifted right by the number of bits given in `count', and the
    // integer part of the result is returned at the location pointed to by
    // `z0Ptr'.  The fractional part of the result may be slightly corrupted as
    // described above, and is returned at the location pointed to by `z1Ptr'.)

    void shift64ExtraRightJamming( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
    {
      uint64_t z0, z1;
      int8_t negCount = ( - count ) & 63;

      if ( count == 0 ) 
      {
	z1 = a1;
	z0 = a0;
      }
      else if ( count < 64 ) 
      {
	z1 = ( a0<<negCount ) | ( a1 != 0 );
	z0 = a0>>count;
      }
      else 
      {
	if ( count == 64 ) 
	  z1 = a0 | ( a1 != 0 );
	else 
	  z1 = ( ( a0 | a1 ) != 0 );
	z0 = 0;
      }
      *z1Ptr = z1;
      *z0Ptr = z0;
    }

    // Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    // number of bits given in `count'.  If any nonzero bits are shifted off, they
    // are ``jammed'' into the least significant bit of the result by setting the
    // least significant bit to 1.  The value of `count' can be arbitrarily large;
    // in particular, if `count' is greater than 128, the result will be either
    // 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    // nonzero.  The result is broken into two 64-bit pieces which are stored at
    // the locations pointed to by `z0Ptr' and `z1Ptr'.

    void shift128RightJamming( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
    {
      uint64_t z0, z1;
      int8_t negCount = ( - count ) & 63;

      if ( count == 0 ) 
      {
	z1 = a1;
	z0 = a0;
      }
      else if ( count < 64 ) 
      {
	z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
	z0 = a0>>count;
      }
      else 
      {
	if ( count == 64 ) 
	  z1 = a0 | ( a1 != 0 );
	else if ( count < 128 ) 
	  z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
	else 
	  z1 = ( ( a0 | a1 ) != 0 );
	z0 = 0;
      }
      *z1Ptr = z1;
      *z0Ptr = z0;
    }

    // Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    // number of bits given in `count'.  Any bits shifted off are lost.  The value
    // of `count' must be less than 64.  The result is broken into two 64-bit
    // pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

    void shortShift128Left( uint64_t a0, uint64_t a1, int16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr )
    {
      *z1Ptr = a1<<count;
      *z0Ptr = ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    }

    // Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    // value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    // any carry out is lost.  The result is broken into two 64-bit pieces which
    // are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

    void add128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
    {
      uint64_t z1;

      z1 = a1 + b1;
      *z1Ptr = z1;
      *z0Ptr = a0 + b0 + ( z1 < a1 );
    }

    // Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    // 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    // 2^128, so any borrow out (carry out) is lost.  The result is broken into two
    // 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    // `z1Ptr'.

    void sub128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
    {
      *z1Ptr = a1 - b1;
      *z0Ptr = a0 - b0 - ( a1 < b1 );
    }

    // Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    // into two 64-bit pieces which are stored at the locations pointed to by
    // `z0Ptr' and `z1Ptr'.

    void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
    {
      uint32_t aHigh, aLow, bHigh, bLow;
      uint64_t z0, zMiddleA, zMiddleB, z1;

      aLow = a;
      aHigh = a>>32;
      bLow = b;
      bHigh = b>>32;
      z1 = ( (uint64_t) aLow ) * bLow;
      zMiddleA = ( (uint64_t) aLow ) * bHigh;
      zMiddleB = ( (uint64_t) aHigh ) * bLow;
      z0 = ( (uint64_t) aHigh ) * bHigh;
      zMiddleA += zMiddleB;
      z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
      zMiddleA <<= 32;
      z1 += zMiddleA;
      z0 += ( z1 < zMiddleA );
      *z1Ptr = z1;
      *z0Ptr = z0;
    }

    // Returns an approximation to the 64-bit integer quotient obtained by dividing
    // `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    // divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    // toward zero, the approximation returned lies between q and q + 2 inclusive.
    // If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    // unsigned integer is returned.

    uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
    {
      uint64_t b0, b1;
      uint64_t rem0, rem1, term0, term1;
      uint64_t z;

      if ( b <= a0 ) return  0xFFFFFFFFFFFFFFFF ;
      b0 = b>>32;
      z = ( b0<<32 <= a0 ) ?  0xFFFFFFFF00000000  : ( a0 / b0 )<<32;
      mul64To128( b, z, &term0, &term1 );
      sub128( a0, a1, term0, term1, &rem0, &rem1 );
      while ( ( (int64_t) rem0 ) < 0 ) {
	z -=  0x100000000 ;
	b1 = b<<32;
	add128( rem0, rem1, b0, b1, &rem0, &rem1 );
      }
      rem0 = ( rem0<<32 ) | ( rem1>>32 );
      z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
      return z;
    }

    // Returns an approximation to the square root of the 32-bit significand given
    // by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    // `aExp' (the least significant bit) is 1, the integer returned approximates
    // 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    // is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    // case, the approximation returned lies strictly within +/-2 of the exact
    // value.

    uint32_t estimateSqrt32( int16_t aExp, uint32_t a )
    {
      static const uint16_t sqrtOddAdjustments[] = {
	  0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
	  0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
      };
      static const uint16_t sqrtEvenAdjustments[] = {
	  0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
	  0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
      };
      int8_t index;
      uint32_t z;

      index = ( a>>27 ) & 15;
      if ( aExp & 1 ) {
	z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
	z = ( ( a / z )<<14 ) + ( z<<15 );
	a >>= 1;
      }
      else {
	z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
	z = a / z + z;
	z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
	if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
      }
      return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
    }

    // Returns the number of leading 0 bits before the most-significant 1 bit of
    // `a'.  If `a' is zero, 32 is returned.

    int8_t countLeadingZeros32( uint32_t a )
    {
      static const int8_t countLeadingZerosHigh[] = 
      {
	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
      };
      int8_t shiftCount;

      shiftCount = 0;
      if ( a < 0x10000 ) {
	  shiftCount += 16;
	  a <<= 16;
      }
      if ( a < 0x1000000 ) {
	  shiftCount += 8;
	  a <<= 8;
      }
      shiftCount += countLeadingZerosHigh[ a>>24 ];
      return shiftCount;
    }

    // Returns the number of leading 0 bits before the most-significant 1 bit of
    // `a'.  If `a' is zero, 64 is returned.

    int8_t countLeadingZeros64( uint64_t a )
    {
      int8_t shiftCount;

      shiftCount = 0;
      if ( a < ( (uint64_t) 1 )<<32 ) {
	  shiftCount += 32;
      }
      else {
	  a >>= 32;
      }
      shiftCount += countLeadingZeros32( a );
      return shiftCount;
    }

    // Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    // than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    // returns 0.

    int lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
    {
      return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    }

    // Functions and definitions to determine:  (1) whether tininess for underflow
    // is detected before or after rounding by default, (2) what (if anything)
    // happens when exceptions are raised, (3) how signaling NaNs are distinguished
    // from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
    // are propagated from function inputs to output.  These details are target-
    // specific.

    int float32_is_nan( uint32_t a )
    {
      return ( 0xFF000000 < (uint32_t) ( a<<1 ) );
    }
    int float32_is_signaling_nan( uint32_t a )
    {
      return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
    }
    int float64_is_nan( uint64_t a )
    {
      return (  0xFFE0000000000000  < (uint64_t) ( a<<1 ) );
    }
    int float64_is_signaling_nan( uint64_t a )
    {
      return ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) && ( a &  0x0007FFFFFFFFFFFF  );
    }

    uint32_t propagateFloat32NaN( uint32_t a, uint32_t b )
    {
      int aIsSignalingNaN, bIsNaN, bIsSignalingNaN;

      aIsSignalingNaN = float32_is_signaling_nan( a );
      bIsNaN = float32_is_nan( b );
      bIsSignalingNaN = float32_is_signaling_nan( b );
      a |= 0x00400000;
      b |= 0x00400000;
      if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( EXC_INVALID );
      return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
    }
    uint32_t propagate3Float32NaN( uint32_t a, uint32_t b, uint32_t c )
    {
      int aIsNaN, bIsNaN, cIsNaN;
      int aIsSignalingNaN, bIsSignalingNaN, cIsSignalingNaN;
      aIsNaN = float32_is_nan( a );
      aIsSignalingNaN = float32_is_signaling_nan( a );
      bIsNaN = float32_is_nan( b );
      bIsSignalingNaN = float32_is_signaling_nan( b );
      cIsNaN = float32_is_nan( c );
      cIsSignalingNaN = float32_is_signaling_nan( c );
      a |= 0x00400000;
      b |= 0x00400000;
      c |= 0x00400000;
      if ( aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN )
	float_raise( EXC_INVALID );
      return cIsSignalingNaN ? c : bIsSignalingNaN ? b : aIsSignalingNaN ? a :
	cIsNaN ? c : bIsNaN ? b : a;
    }

    uint64_t propagateFloat64NaN( uint64_t a, uint64_t b )
    {
      int aIsSignalingNaN, bIsNaN, bIsSignalingNaN;

      aIsSignalingNaN = float64_is_signaling_nan( a );
      bIsNaN = float64_is_nan( b );
      bIsSignalingNaN = float64_is_signaling_nan( b );
      a |=  0x0008000000000000 ;
      b |=  0x0008000000000000 ;
      if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( EXC_INVALID );
      return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
    }
    uint64_t propagate3Float64NaN( uint64_t a, uint64_t b, uint64_t c )
    {
      int aIsNaN, bIsNaN, cIsNaN;
      int aIsSignalingNaN, bIsSignalingNaN, cIsSignalingNaN;
      aIsNaN = float64_is_nan( a );
      aIsSignalingNaN = float64_is_signaling_nan( a );
      bIsNaN = float64_is_nan( b );
      bIsSignalingNaN = float64_is_signaling_nan( b );
      cIsNaN = float64_is_nan( c );
      cIsSignalingNaN = float64_is_signaling_nan( c );
      a |=  0x0008000000000000 ;
      b |=  0x0008000000000000 ;
      c |=  0x0008000000000000 ;
      if ( aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN )
	float_raise( EXC_INVALID );
      return cIsSignalingNaN ? c : bIsSignalingNaN ? b : aIsSignalingNaN ? a :
	cIsNaN ? c : bIsNaN ? b : a;
    }

    uint32_t extractFloat32Frac( uint32_t a ) { return a & 0x007FFFFF; }
    int16_t extractFloat32Exp( uint32_t a ) { return ( a>>23 ) & 0xFF; }
    int extractFloat32Sign( uint32_t a ) { return a>>31; }

    uint32_t packFloat32( int zSign, int16_t zExp, uint32_t zSig )
    {
      return ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig;
    }

    uint64_t extractFloat64Frac( uint64_t a ) { return a &  0x000FFFFFFFFFFFFF ; }
    int16_t extractFloat64Exp( uint64_t a ) { return ( a>>52 ) & 0x7FF; }
    int extractFloat64Sign( uint64_t a ) { return a>>63; }

    uint64_t packFloat64( int zSign, int16_t zExp, uint64_t zSig )
    {
      return ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig;
    }


};

#endif