legion/src/procs/sunsparc/libniagara2/fpsim_n2.c

/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: fpsim_n2.c
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
/****************************************************************************
 *  fpsim_n2.c -- Floating-point Simulation Library for SPARC (Niagara2)
 *
 *  Author --
 *  Robert Rethemeyer - Sun Microsystems, Inc.
 *
 *  Date --
 *  Aug 12, 2005
 *
 *  Design -- dynamically loaded shared object; compile with C or C++.
 *   Models FP instruction behavior according to N2 PRM Appendix I.
 *       The general strategy is to use the SPARC FP instructions
 *   while filtering out the cases where N2 requires exceptions
 *   but Solaris would simulate the operation (e.g. subnormals).
 *
 *  (c) COPYRIGHT 2005-2006 Sun Microsystems, Inc.
 *  Sun Confidential: Sun SSG Only
 ***************************************************************************/

/*********************************************************************
 * ATTENTION: This code is part of a library shared by multiple
 * projects.  DO NOT MAKE CHANGES TO THIS CODE WITHIN YOUR PROJECT.
 * Instead, contact the owner/maintainer of the library, currently:
 *    Robert.Rethemeyer@Sun.COM   +1-408-616-5717  (x45717)
 *    Systems Group: TVT: FrontEnd Technologies
 * The CVS source code repository for the library is at:
 *    /import/ftap-blimp1/cvs/fpsim
 * DO NOT COMMIT CHANGES TO THAT REPOSITORY: contact the maintainer.
 ********************************************************************/

/*tab length=4*/

static const char cvsid[] =
 "$Id: fpsim_n2.c,v 1.6 2006/12/07 20:52:04 bobsmail Exp $";


#include "fpsim_support.h"
#include "fpsim.h"


#ifdef __cplusplus
extern "C" {
#endif
static int dissect_double( double fpnum, fpdouble* data, int std, int* exc );
static int dissect_single( float  fpnum, fpsingle* data, int std, int* exc );
static void overflow( void* rslt, int dbl, int rm, int si );
#ifdef __cplusplus
}
#endif

#define FSR_NS 0x00400000  /*FSR nonstandard bit*/
#define GSR_IM 0x08000000  /*GSR interval mode bit*/
#define STDONLY 1          /*Standard mode only*/
#define STDMODE(FSR,GSR) ((((FSR>>22)^1)|(GSR>>27)) & 1)
#define RNDMODE(FSR,GSR) (int)(((GSR & GSR_IM) ? (GSR>>25) : (FSR>>30)) & 3)

/*===========================================================================*/


//-------------------------------------------------------------------
// FADDD
//-------------------------------------------------------------------
int
fpsim_faddd( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type1 = dissect_double(*p_op1, &op1, stdmode, &exc);
        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);

        if(type1 < fp_infinity
        && type2 < fp_infinity
        &&(fp_subnormal==type1 || fp_subnormal==type2))
        {
                return FPX_UN;
        }
        if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

        double result;
        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_faddd(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(fp_subnormal == rtype
        || (exc & FPX_UF) )
        {
                if(stdmode) return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = (uint64)res.sign << 63;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSUBD
//-------------------------------------------------------------------
int
fpsim_fsubd( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type1 = dissect_double(*p_op1, &op1, stdmode, &exc);
        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);

        if(type1 < fp_infinity
        && type2 < fp_infinity
        &&(fp_subnormal==type1 || fp_subnormal==type2))
        {
                return FPX_UN;
        }
        if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

        double result;
        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fsubd(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(fp_subnormal == rtype
        || (exc & FPX_UF) )
        {
                if(stdmode) return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = (uint64)res.sign << 63;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FMULD
//-------------------------------------------------------------------
int
fpsim_fmuld( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc1=0, exc2=0;

        int type1 = dissect_double(*p_op1, &op1, stdmode, &exc1);
        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc2);
        int exc = exc1|exc2;

        int er = op1.exp + op2.exp - 1023;
        int si = op1.sign ^ op2.sign;
        int rnd = RNDMODE(p_fsr,p_gsr);

        if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
        || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
        {
                if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                         return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = (uint64)si << 63;
                *p_res = res.fp.num;
                return FPX_UF|FPX_NX;
        }
        // some operands preclude setting NX for flushed subnormal
        if(type1>=fp_infinity
        || type2>=fp_infinity
        || (fp_zero==type1 && 0==exc1) // either op exactly zero?
        || (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX

        double result;
        exc |= asm_fmuld(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(rtype <= fp_subnormal
        && type1 == fp_normal
        && type2 == fp_normal
        && er <= 0) //subnormal result?
        {
                if(stdmode)
                {
                        if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                                return FPX_UN;
                }
                //else gross underflow, zero result
                res.fp.inte = (uint64)si << 63;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FDIVD
//-------------------------------------------------------------------
int
fpsim_fdivd( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type1 = dissect_double(*p_op1, &op1, stdmode, &exc);
        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);

        int er = op1.exp - op2.exp + 1023 - 1;
        int si = op1.sign ^ op2.sign;
        int rnd = RNDMODE(p_fsr,p_gsr);

        if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
        || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
        {
                int ef = er + 1;
                uint64 frac1 = op1.frac;
                uint64 frac2 = op2.frac;
                if(fp_normal == type1) frac1 |= 0x0010000000000000;
                if(fp_normal == type2) frac2 |= 0x0010000000000000;
                if(frac1 < frac2) ef--;
/***    if(ef > 2046) */
                if(ef > 2047)  // **PRM error now frozen in HW** Metrax 109086
                {
                        overflow(p_res, 1, rnd, si);
                        return FPX_OF|FPX_NX;
                }
                if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                         return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = (uint64)si << 63;
                *p_res = res.fp.num;
                return FPX_UF|FPX_NX;
        }
        // some operands preclude setting NX for flushed subnormal
        if(fp_zero==type2  //DZ cancels NX
        || type1>=fp_infinity
        || type2>=fp_infinity) exc = 0;

        double result;
        exc |= asm_fdivd(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(rtype <= fp_subnormal
        && type1 == fp_normal
        && type2 == fp_normal
        && er <= 0) //subnormal result?
        {
                if(stdmode)
                {
                        if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                                return FPX_UN;
                }
                //else gross underflow, zero result
                res.fp.inte = (uint64)si << 63;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSQRTD
//-------------------------------------------------------------------
int
fpsim_fsqrtd( const double* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);

        if(stdmode
        && (fp_subnormal==type2)
        && (0==op2.sign))
        {
                return FPX_UN;
        }

        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fsqrtd(&op2.fp.num, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FADDS
//-------------------------------------------------------------------
int
fpsim_fadds( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type1 = dissect_single(*p_op1, &op1, stdmode, &exc);
        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);

        if(type1 < fp_infinity
        && type2 < fp_infinity
        &&(fp_subnormal==type1 || fp_subnormal==type2))
        {
                return FPX_UN;
        }
        if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

        float result;
        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fadds(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(fp_subnormal == rtype
        || (exc & FPX_UF) )
        {
                if(stdmode) return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = res.sign << 31;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSUBS
//-------------------------------------------------------------------
int
fpsim_fsubs( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type1 = dissect_single(*p_op1, &op1, stdmode, &exc);
        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);

        if(type1 < fp_infinity
        && type2 < fp_infinity
        &&(fp_subnormal==type1 || fp_subnormal==type2))
        {
                return FPX_UN;
        }
        if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

        float result;
        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fsubs(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(fp_subnormal == rtype
        || (exc & FPX_UF) )
        {
                if(stdmode) return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = res.sign << 31;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FMULS
//-------------------------------------------------------------------
int
fpsim_fmuls( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc1=0, exc2=0;

        int type1 = dissect_single(*p_op1, &op1, stdmode, &exc1);
        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc2);
        int exc = exc1|exc2;

        int er = op1.exp + op2.exp - 127;
        int si = op1.sign ^ op2.sign;
        int rnd = RNDMODE(p_fsr,p_gsr);

        if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
        || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
        {
                if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                         return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = (uint)si << 31;
                *p_res = res.fp.num;
                return FPX_UF|FPX_NX;
        }
        // some operands preclude setting NX for flushed subnormal
        if(type1>=fp_infinity
        || type2>=fp_infinity
        || (fp_zero==type1 && 0==exc1) // either op exactly zero?
        || (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX

        float result;
        exc |= asm_fmuls(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(rtype <= fp_subnormal
        && type1 == fp_normal
        && type2 == fp_normal
        && er <= 0) //subnormal result?
        {
                if(stdmode)
                {
                        if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                                return FPX_UN;
                }
                //else gross underflow, zero result
                res.fp.inte = (uint)si << 31;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSMULD
//-------------------------------------------------------------------
int
fpsim_fsmuld( const float* p_op1, const float* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op1, op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc1=0, exc2=0;

        int type1 = dissect_single(*p_op1, &op1, stdmode, &exc1);
        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc2);
        int exc = exc1|exc2;

        if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
        || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
        {
                 return FPX_UN;
        }
        // some operands preclude setting NX for flushed subnormal
        if(type1>=fp_infinity
        || type2>=fp_infinity
        || (fp_zero==type1 && 0==exc1) // either op exactly zero?
        || (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX

        exc |= asm_fsmuld(&op1.fp.num, &op2.fp.num, p_res, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDIVS
//-------------------------------------------------------------------
int
fpsim_fdivs( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op1, op2, res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type1 = dissect_single(*p_op1, &op1, stdmode, &exc);
        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);

        int er = op1.exp - op2.exp + 127 - 1;
        int si = op1.sign ^ op2.sign;
        int rnd = RNDMODE(p_fsr,p_gsr);

        if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
        || (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
        {
                int ef = er + 1;
                int frac1 = op1.frac;
                int frac2 = op2.frac;
                if(fp_normal == type1) frac1 |= 0x800000;
                if(fp_normal == type2) frac2 |= 0x800000;
                if(frac1 < frac2) ef--;
/***    if(ef > 254) */
                if(ef > 255)  // **PRM error now frozen in HW** Metrax 109086
                {
                        overflow(p_res, 0, rnd, si);
                        return FPX_OF|FPX_NX;
                }
                if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                         return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = (uint)si << 31;
                *p_res = res.fp.num;
                return FPX_UF|FPX_NX;
        }
        // some operands preclude setting NX for flushed subnormal
        if(fp_zero==type2  //DZ cancels NX
        || type1>=fp_infinity
        || type2>=fp_infinity) exc = 0;

        float result;
        exc |= asm_fdivs(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(rtype <= fp_subnormal
        && type1 == fp_normal
        && type2 == fp_normal
        && er <= 0) //subnormal result?
        {
                if(stdmode)
                {
                        if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
                                return FPX_UN;
                }
                //else gross underflow, zero result
                res.fp.inte = (uint)si << 31;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSQRTS
//-------------------------------------------------------------------
int
fpsim_fsqrts( const float* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);

        if(stdmode
        && (fp_subnormal==type2)
        && (0==op2.sign))
        {
                return FPX_UN;
        }

        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fsqrts(&op2.fp.num, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FSTOD
//-------------------------------------------------------------------
int
fpsim_fstod( const float* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
        if(fp_subnormal==type2) return FPX_UN;

        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fstod(&op2.fp.num, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDTOS
//-------------------------------------------------------------------
int
fpsim_fdtos( const double* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op2;
        fpsingle res;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
        int er = op2.exp - 1023 + 127;
        int rnd = RNDMODE(p_fsr,p_gsr);

        if(fp_subnormal==type2)
        {
                if(op2.sign ? (rnd==FP_RM) : (rnd==FP_RP)) return FPX_UN;
                //else gross underflow, zero result
                res.fp.inte = (uint)op2.sign << 31;
                *p_res = res.fp.num;
                return FPX_UF|FPX_NX;
        }

        float result;
        exc |= asm_fdtos(&op2.fp.num, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(rtype <= fp_subnormal
        && type2 == fp_normal
        && er <= 0) //subnormal result?
        {
                if(stdmode)
                {
                        if((er > -25) || (res.sign ? (rnd==FP_RM) : (rnd==FP_RP)))
                                return FPX_UN;
                }
                //else gross underflow, zero result
                res.fp.inte = (uint)res.sign << 31;
                exc = FPX_UF|FPX_NX;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSTOX
//-------------------------------------------------------------------
int
fpsim_fstox( const float* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
        if(fp_subnormal==type2) return FPX_UN;

        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fstox(&op2.fp.num, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDTOX
//-------------------------------------------------------------------
int
fpsim_fdtox( const double* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
        if(fp_subnormal==type2) return FPX_UN;

        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fdtox(&op2.fp.num, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FSTOI
//-------------------------------------------------------------------
int
fpsim_fstoi( const float* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpsingle op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
        if(fp_subnormal==type2) return FPX_UN;

        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fstoi(&op2.fp.num, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDTOI
//-------------------------------------------------------------------
int
fpsim_fdtoi( const double* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpdouble op2;
        int stdmode = STDMODE(p_fsr,p_gsr);
        int scr[2], exc=0;

        int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
        if(fp_subnormal==type2) return FPX_UN;

        int rnd = RNDMODE(p_fsr,p_gsr);
        exc |= asm_fdtoi(&op2.fp.num, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FXTOS
//-------------------------------------------------------------------
int
fpsim_fxtos( const uint64* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fxtos(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FXTOD
//-------------------------------------------------------------------
int
fpsim_fxtod( const uint64* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fxtod(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FITOS
//-------------------------------------------------------------------
int
fpsim_fitos( const uint* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fitos(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FITOD
//-------------------------------------------------------------------
int
fpsim_fitod( const uint* p_op2, double* p_res )
{
        asm_fitod(p_op2, p_res);
        return 0;
}


//-------------------------------------------------------------------
// GSR_MASK
//-------------------------------------------------------------------

uint64 fpsim_gsr_mask(void) { return 0xFFFFFFFF0E0000FF; }


//-------------------------------------------------------------------
// UPDATE_FSR
//-------------------------------------------------------------------

/* Merges a FP exception returned by one of the instruction sim routines
   into the caller's FSR, and indicates whether the caller should
   post a trap (nonzero return value is trap-type code).
*/

int
fpsim_update_fsr( int p_exc, uint64* p_fsr )
{
        int trap;
        uint64 fsr = *p_fsr;

        // non-IEEE trap?  (unfinished or illegal)
        if(p_exc & FPX_TRAP)
        {
                trap = p_exc & 0xFF; // trap reason in bits 7:0
                if(0x10 == trap) return trap; // illegal_instr: no fsr update

                // FP_other trap: ftt in reason code
                fsr = (fsr & ~0x1C000) | (trap << 14);
                trap = 0x22;
        }
        else // IEEE trap or completion
        {
                int taken = p_exc & (fsr>>23) & 0x1F; // mask with fsr.tem

                fsr &= ~0x1C01F; // clear old fsr.cexc and fsr.ftt

                // To Trap or Not To Trap?
                if(taken != 0)  // trap taken?
                {
                        // cancel NX bit for OF/UF trap
                        if(taken & (FPX_OF|FPX_UF))
                        {
                                taken &= ~FPX_NX;
                        }
                        fsr |= (taken | 0x04000); // set fsr.cexc only; fsr.ftt=1
                        trap = 0x21;  // IEEE_754_exception
                }
                else // no trap
                {
                        // set both cexc,aexc identically
                        fsr |= (p_exc | (p_exc<<5));
                        trap = 0;
                }
        }
        *p_fsr = fsr; // update caller's FSR
        return trap;
}


/*===========================================================================*/

/* Dissection routines:  picks apart the FP number into sign,exp,fraction
    and flushes subnormal numbers to zero in nonstandard mode.
        Returns FP number class:  0=zero 1=subnorm 2=normal 3=inf 4=qnan 5=snan
*/

static int
dissect_double( double p_fpnum, fpdouble* p_data, int p_std, int* p_exc )
{
        p_data->fp.num = p_fpnum;
        p_data->sign = p_data->fp.inte >> 63;
        p_data->frac = p_data->fp.inte & 0x000FFFFFFFFFFFFF;
        p_data->exp  = (p_data->fp.inte >> 52) & 0x7FF;

        // classify the number
        int fpclass = fp_normal;
        if(0 == p_data->exp) // exponent zero?
        {
                fpclass = (0 == p_data->frac)
                                ? fp_zero               //true
                                : fp_subnormal; //false
        }
        else if(0x7FF == p_data->exp) // exponent all ones?
        {
                if(0 == p_data->frac) fpclass = fp_infinity;
                else
                {
                        fpclass = (p_data->frac & 0x0008000000000000) //frac.msb==1?
                                        ? fp_quiet              //true
                                        : fp_signaling; //false
                }
        }

        // nonstandard mode: flush subnormals to 0
        if(0 == p_std
        && fp_subnormal == fpclass)
        {
                p_data->fp.inte = (uint64)p_data->sign << 63;
                p_data->frac = 0;
                fpclass = fp_zero;
                *p_exc = FPX_NX;
        }
        return fpclass;
}

static int
dissect_single( float p_fpnum, fpsingle* p_data, int p_std, int* p_exc )
{
        p_data->fp.num = p_fpnum;
        p_data->sign = p_data->fp.inte >> 31;
        p_data->frac = p_data->fp.inte & 0x007FFFFF;
        p_data->exp  = (p_data->fp.inte >> 23) & 0xFF;

        // classify the number
        int fpclass = fp_normal;
        if(0 == p_data->exp) // exponent zero?
        {
                fpclass = (0 == p_data->frac)
                                ? fp_zero               //true
                                : fp_subnormal; //false
        }
        else if(0xFF == p_data->exp) // exponent all ones?
        {
                if(0 == p_data->frac) fpclass = fp_infinity;
                else
                {
                        fpclass = (p_data->frac & 0x00400000) //frac.msb==1?
                                        ? fp_quiet              //true
                                        : fp_signaling; //false
                }
        }

        // nonstandard mode: flush subnormals to 0
        if(0 == p_std
        && fp_subnormal == fpclass)
        {
                p_data->fp.inte = p_data->sign << 31;
                p_data->frac = 0;
                fpclass = fp_zero;
                *p_exc = FPX_NX;
        }
        return fpclass;
}


/*=======================================================================*/

/* overflow routine:  returns proper result for overflow in rounding mode
*/

static void
overflow( void* p_res, int p_dbl, int p_rnd, int p_sign )
{
        enum { MAXV=0, INFV=1 } rtype;  // result type: 1=infinity 0=max_value

        switch(p_rnd) //rounding mode?
        {
        case FP_RN:  rtype = INFV;  break;
        case FP_RZ:  rtype = MAXV;  break;
        case FP_RP:  rtype = p_sign ? MAXV : INFV;  break;
        case FP_RM:  rtype = p_sign ? INFV : MAXV;  break;
        }

        if(p_dbl) //double
        {
                uint64 d_res = rtype ? 0x7FF0000000000000 : 0x7FEFFFFFFFFFFFFF;
                *(uint64*)p_res = d_res | ((uint64)p_sign << 63);
        }
        else //single
        {
                uint s_res = rtype ? 0x7F800000 : 0x7F7FFFFF;
                *(uint*)p_res = s_res | (p_sign << 31);
        }
}


//-------------------------------------------------------------------
// un-implemented ops return illegal_instruction trap if called:
//-------------------------------------------------------------------

int fpsim_fnaddd(const double* a,const double* b,double* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnadds(const float* a,const float* b,float* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnmuld(const double* a,const double* b,double* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnmuls(const float* a,const float* b,float* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnsmuld(const float* a,const float* b,double* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fhaddd(const double* a,const double* b,double* c,uint64 d,uint64 e,
                        fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fhadds(const float* a,const float* b,float* c,uint64 d,uint64 e,
                        fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fmaddd(const double* a,const double* b,const double* c,double* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fmadds(const float* a,const float* b,const float* c,float* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumaddd(const double* a,const double* b,const double* c,double* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumadds(const float* a,const float* b,const float* c,float* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}


/*===========================================================================*/

// Function pointer structure

struct fpsim_functions  fpsim_funclist =
{
        fpsim_update_fsr, fpsim_gsr_mask,

        fpsim_faddd, fpsim_fsubd, fpsim_fmuld, fpsim_fdivd, fpsim_fsqrtd,
        fpsim_fadds, fpsim_fsubs, fpsim_fmuls, fpsim_fdivs, fpsim_fsqrts,
        fpsim_fsmuld,

        fpsim_fstod, fpsim_fdtos, fpsim_fstox, fpsim_fdtox, fpsim_fstoi,
        fpsim_fdtoi, fpsim_fxtos, fpsim_fxtod, fpsim_fitos, fpsim_fitod,

        fpsim_fnaddd, fpsim_fnadds, fpsim_fnmuld, fpsim_fnmuls, fpsim_fnsmuld,
        fpsim_fhaddd, fpsim_fhadds,
        fpsim_fmaddd, fpsim_fmadds, fpsim_fumaddd, fpsim_fumadds
};

char fpsim_fp_model[] = "FPSIM Niagara2 (N2) " __DATE__ ;