[OpenSPARC-T2-SAM] / legion / src / procs / sunsparc / libniagara2 / fpsim_n2.c

/*
* ========== Copyright Header Begin ==========================================
* 
* OpenSPARC T2 Processor File: fpsim_n2.c
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* 
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* 
* The above named program is distributed in the hope that it will be 
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
* 
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* 
* ========== Copyright Header End ============================================
*/
/****************************************************************************
 *  fpsim_n2.c -- Floating-point Simulation Library for SPARC (Niagara2)
 *
 *  Author --
 *  Robert Rethemeyer - Sun Microsystems, Inc.
 *
 *  Date --
 *  Aug 12, 2005
 *
 *  Design -- dynamically loaded shared object; compile with C or C++.
 *   Models FP instruction behavior according to N2 PRM Appendix I.
 *	 The general strategy is to use the SPARC FP instructions
 *   while filtering out the cases where N2 requires exceptions
 *   but Solaris would simulate the operation (e.g. subnormals).
 *
 *  (c) COPYRIGHT 2005-2006 Sun Microsystems, Inc.
 *  Sun Confidential: Sun SSG Only
 ***************************************************************************/

/*********************************************************************
 * ATTENTION: This code is part of a library shared by multiple
 * projects.  DO NOT MAKE CHANGES TO THIS CODE WITHIN YOUR PROJECT.
 * Instead, contact the owner/maintainer of the library, currently:
 *    Robert.Rethemeyer@Sun.COM   +1-408-616-5717  (x45717)
 *    Systems Group: TVT: FrontEnd Technologies
 * The CVS source code repository for the library is at:
 *    /import/ftap-blimp1/cvs/fpsim
 * DO NOT COMMIT CHANGES TO THAT REPOSITORY: contact the maintainer.
 ********************************************************************/

/*tab length=4*/

static const char cvsid[] = 
 "$Id: fpsim_n2.c,v 1.6 2006/12/07 20:52:04 bobsmail Exp $";


#include "fpsim_support.h"
#include "fpsim.h"


#ifdef __cplusplus
extern "C" {
#endif
static int dissect_double( double fpnum, fpdouble* data, int std, int* exc );
static int dissect_single( float  fpnum, fpsingle* data, int std, int* exc );
static void overflow( void* rslt, int dbl, int rm, int si );
#ifdef __cplusplus
}
#endif

#define FSR_NS 0x00400000  /*FSR nonstandard bit*/
#define GSR_IM 0x08000000  /*GSR interval mode bit*/
#define STDONLY 1          /*Standard mode only*/
#define STDMODE(FSR,GSR) ((((FSR>>22)^1)|(GSR>>27)) & 1)
#define RNDMODE(FSR,GSR) (int)(((GSR & GSR_IM) ? (GSR>>25) : (FSR>>30)) & 3)

/*===========================================================================*/


//-------------------------------------------------------------------
// FADDD
//-------------------------------------------------------------------
int 
fpsim_faddd( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type1 = dissect_double(*p_op1, &op1, stdmode, &exc);
	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
	
	if(type1 < fp_infinity
	&& type2 < fp_infinity
	&&(fp_subnormal==type1 || fp_subnormal==type2))
	{
		return FPX_UN;
	}
	if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

	double result;
	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_faddd(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(fp_subnormal == rtype
	|| (exc & FPX_UF) )
	{
		if(stdmode) return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = (uint64)res.sign << 63;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSUBD
//-------------------------------------------------------------------
int 
fpsim_fsubd( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type1 = dissect_double(*p_op1, &op1, stdmode, &exc);
	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
	
	if(type1 < fp_infinity
	&& type2 < fp_infinity
	&&(fp_subnormal==type1 || fp_subnormal==type2))
	{
		return FPX_UN;
	}
	if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

	double result;
	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fsubd(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(fp_subnormal == rtype
	|| (exc & FPX_UF) )
	{
		if(stdmode) return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = (uint64)res.sign << 63;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FMULD
//-------------------------------------------------------------------
int 
fpsim_fmuld( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc1=0, exc2=0;

	int type1 = dissect_double(*p_op1, &op1, stdmode, &exc1);
	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc2);
	int exc = exc1|exc2;
	
	int er = op1.exp + op2.exp - 1023;
	int si = op1.sign ^ op2.sign;
	int rnd = RNDMODE(p_fsr,p_gsr);
		
	if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
	|| (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
	{
		if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
			 return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = (uint64)si << 63;
		*p_res = res.fp.num;
		return FPX_UF|FPX_NX;
	}
	// some operands preclude setting NX for flushed subnormal
	if(type1>=fp_infinity 
	|| type2>=fp_infinity
	|| (fp_zero==type1 && 0==exc1) // either op exactly zero?
	|| (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX

	double result;
	exc |= asm_fmuld(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(rtype <= fp_subnormal 
	&& type1 == fp_normal
	&& type2 == fp_normal
	&& er <= 0) //subnormal result?
	{
		if(stdmode)
		{
			if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
				return FPX_UN;
		}
		//else gross underflow, zero result
		res.fp.inte = (uint64)si << 63;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FDIVD
//-------------------------------------------------------------------
int 
fpsim_fdivd( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type1 = dissect_double(*p_op1, &op1, stdmode, &exc);
	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
	
	int er = op1.exp - op2.exp + 1023 - 1;
	int si = op1.sign ^ op2.sign;
	int rnd = RNDMODE(p_fsr,p_gsr);
	
	if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
	|| (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
	{
		int ef = er + 1;
		uint64 frac1 = op1.frac;
		uint64 frac2 = op2.frac;
		if(fp_normal == type1) frac1 |= 0x0010000000000000;
		if(fp_normal == type2) frac2 |= 0x0010000000000000;
		if(frac1 < frac2) ef--;
/***	if(ef > 2046) */
		if(ef > 2047)  // **PRM error now frozen in HW** Metrax 109086
		{
			overflow(p_res, 1, rnd, si);
			return FPX_OF|FPX_NX;
		}
		if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
			 return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = (uint64)si << 63;
		*p_res = res.fp.num;
		return FPX_UF|FPX_NX;
	}
	// some operands preclude setting NX for flushed subnormal
	if(fp_zero==type2  //DZ cancels NX
	|| type1>=fp_infinity 
	|| type2>=fp_infinity) exc = 0;

	double result;
	exc |= asm_fdivd(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(rtype <= fp_subnormal 
	&& type1 == fp_normal
	&& type2 == fp_normal
	&& er <= 0) //subnormal result?
	{
		if(stdmode)
		{
			if((er > -54) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
				return FPX_UN;
		}
		//else gross underflow, zero result
		res.fp.inte = (uint64)si << 63;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSQRTD
//-------------------------------------------------------------------
int 
fpsim_fsqrtd( const double* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
	
	if(stdmode
	&& (fp_subnormal==type2)
	&& (0==op2.sign))
	{
		return FPX_UN;
	}

	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fsqrtd(&op2.fp.num, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FADDS
//-------------------------------------------------------------------
int 
fpsim_fadds( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type1 = dissect_single(*p_op1, &op1, stdmode, &exc);
	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
	
	if(type1 < fp_infinity
	&& type2 < fp_infinity
	&&(fp_subnormal==type1 || fp_subnormal==type2))
	{
		return FPX_UN;
	}
	if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

	float result;
	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fadds(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(fp_subnormal == rtype
	|| (exc & FPX_UF) )
	{
		if(stdmode) return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = res.sign << 31;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSUBS
//-------------------------------------------------------------------
int 
fpsim_fsubs( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type1 = dissect_single(*p_op1, &op1, stdmode, &exc);
	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
	
	if(type1 < fp_infinity
	&& type2 < fp_infinity
	&&(fp_subnormal==type1 || fp_subnormal==type2))
	{
		return FPX_UN;
	}
	if(type1>=fp_infinity || type2>=fp_infinity) exc = 0;

	float result;
	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fsubs(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(fp_subnormal == rtype
	|| (exc & FPX_UF) )
	{
		if(stdmode) return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = res.sign << 31;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FMULS
//-------------------------------------------------------------------
int 
fpsim_fmuls( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc1=0, exc2=0;

	int type1 = dissect_single(*p_op1, &op1, stdmode, &exc1);
	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc2);
	int exc = exc1|exc2;
	
	int er = op1.exp + op2.exp - 127;
	int si = op1.sign ^ op2.sign;
	int rnd = RNDMODE(p_fsr,p_gsr);
		
	if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
	|| (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
	{
		if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
			 return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = (uint)si << 31;
		*p_res = res.fp.num;
		return FPX_UF|FPX_NX;
	}
	// some operands preclude setting NX for flushed subnormal
	if(type1>=fp_infinity 
	|| type2>=fp_infinity
	|| (fp_zero==type1 && 0==exc1) // either op exactly zero?
	|| (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX

	float result;
	exc |= asm_fmuls(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(rtype <= fp_subnormal 
	&& type1 == fp_normal
	&& type2 == fp_normal
	&& er <= 0) //subnormal result?
	{
		if(stdmode)
		{
			if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
				return FPX_UN;
		}
		//else gross underflow, zero result
		res.fp.inte = (uint)si << 31;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSMULD
//-------------------------------------------------------------------
int 
fpsim_fsmuld( const float* p_op1, const float* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op1, op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc1=0, exc2=0;

	int type1 = dissect_single(*p_op1, &op1, stdmode, &exc1);
	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc2);
	int exc = exc1|exc2;
	
	if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
	|| (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
	{
		 return FPX_UN;
	}
	// some operands preclude setting NX for flushed subnormal
	if(type1>=fp_infinity 
	|| type2>=fp_infinity
	|| (fp_zero==type1 && 0==exc1) // either op exactly zero?
	|| (fp_zero==type2 && 0==exc2)) exc = 0; // cancel NX

	exc |= asm_fsmuld(&op1.fp.num, &op2.fp.num, p_res, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDIVS
//-------------------------------------------------------------------
int 
fpsim_fdivs( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op1, op2, res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type1 = dissect_single(*p_op1, &op1, stdmode, &exc);
	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
	
	int er = op1.exp - op2.exp + 127 - 1;
	int si = op1.sign ^ op2.sign;
	int rnd = RNDMODE(p_fsr,p_gsr);
	
	if((fp_subnormal==type1 && type2<fp_infinity && type2!=fp_zero)
	|| (fp_subnormal==type2 && type1<fp_infinity && type1!=fp_zero))
	{
		int ef = er + 1;
		int frac1 = op1.frac;
		int frac2 = op2.frac;
		if(fp_normal == type1) frac1 |= 0x800000;
		if(fp_normal == type2) frac2 |= 0x800000;
		if(frac1 < frac2) ef--;
/***	if(ef > 254) */
		if(ef > 255)  // **PRM error now frozen in HW** Metrax 109086
		{
			overflow(p_res, 0, rnd, si);
			return FPX_OF|FPX_NX;
		}
		if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
			 return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = (uint)si << 31;
		*p_res = res.fp.num;
		return FPX_UF|FPX_NX;
	}
	// some operands preclude setting NX for flushed subnormal
	if(fp_zero==type2  //DZ cancels NX
	|| type1>=fp_infinity 
	|| type2>=fp_infinity) exc = 0;

	float result;
	exc |= asm_fdivs(&op1.fp.num, &op2.fp.num, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(rtype <= fp_subnormal
	&& type1 == fp_normal
	&& type2 == fp_normal
	&& er <= 0) //subnormal result?
	{
		if(stdmode)
		{
			if((er > -25) || (si ? (rnd==FP_RM) : (rnd==FP_RP)))
				return FPX_UN;
		}
		//else gross underflow, zero result
		res.fp.inte = (uint)si << 31;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSQRTS
//-------------------------------------------------------------------
int 
fpsim_fsqrts( const float* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
	
	if(stdmode
	&& (fp_subnormal==type2)
	&& (0==op2.sign))
	{
		return FPX_UN;
	}

	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fsqrts(&op2.fp.num, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FSTOD
//-------------------------------------------------------------------
int 
fpsim_fstod( const float* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;

	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
	if(fp_subnormal==type2) return FPX_UN;

	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fstod(&op2.fp.num, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDTOS
//-------------------------------------------------------------------
int 
fpsim_fdtos( const double* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op2;
	fpsingle res;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;
	
	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
	int er = op2.exp - 1023 + 127;
	int rnd = RNDMODE(p_fsr,p_gsr);

	if(fp_subnormal==type2) 
	{
		if(op2.sign ? (rnd==FP_RM) : (rnd==FP_RP)) return FPX_UN;
		//else gross underflow, zero result
		res.fp.inte = (uint)op2.sign << 31;
		*p_res = res.fp.num;
		return FPX_UF|FPX_NX;		
	}

	float result;
	exc |= asm_fdtos(&op2.fp.num, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(rtype <= fp_subnormal 
	&& type2 == fp_normal
	&& er <= 0) //subnormal result?
	{
		if(stdmode)
		{
			if((er > -25) || (res.sign ? (rnd==FP_RM) : (rnd==FP_RP)))
				return FPX_UN;
		}
		//else gross underflow, zero result
		res.fp.inte = (uint)res.sign << 31;
		exc = FPX_UF|FPX_NX;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSTOX
//-------------------------------------------------------------------
int 
fpsim_fstox( const float* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;
	
	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
	if(fp_subnormal==type2) return FPX_UN;

	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fstox(&op2.fp.num, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDTOX
//-------------------------------------------------------------------
int 
fpsim_fdtox( const double* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;
	
	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
	if(fp_subnormal==type2) return FPX_UN;

	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fdtox(&op2.fp.num, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FSTOI
//-------------------------------------------------------------------
int 
fpsim_fstoi( const float* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpsingle op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;
	
	int type2 = dissect_single(*p_op2, &op2, stdmode, &exc);
	if(fp_subnormal==type2) return FPX_UN;

	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fstoi(&op2.fp.num, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDTOI
//-------------------------------------------------------------------
int 
fpsim_fdtoi( const double* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpdouble op2;
	int stdmode = STDMODE(p_fsr,p_gsr);
	int scr[2], exc=0;
	
	int type2 = dissect_double(*p_op2, &op2, stdmode, &exc);
	if(fp_subnormal==type2) return FPX_UN;

	int rnd = RNDMODE(p_fsr,p_gsr);
	exc |= asm_fdtoi(&op2.fp.num, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FXTOS
//-------------------------------------------------------------------
int 
fpsim_fxtos( const uint64* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fxtos(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FXTOD
//-------------------------------------------------------------------
int 
fpsim_fxtod( const uint64* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fxtod(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FITOS
//-------------------------------------------------------------------
int 
fpsim_fitos( const uint* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fitos(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FITOD
//-------------------------------------------------------------------
int 
fpsim_fitod( const uint* p_op2, double* p_res )
{
	asm_fitod(p_op2, p_res);
	return 0;
}


//-------------------------------------------------------------------
// GSR_MASK
//-------------------------------------------------------------------

uint64 fpsim_gsr_mask(void) { return 0xFFFFFFFF0E0000FF; }


//-------------------------------------------------------------------
// UPDATE_FSR
//-------------------------------------------------------------------

/* Merges a FP exception returned by one of the instruction sim routines
   into the caller's FSR, and indicates whether the caller should
   post a trap (nonzero return value is trap-type code). 
*/

int
fpsim_update_fsr( int p_exc, uint64* p_fsr )
{
	int trap;
	uint64 fsr = *p_fsr;

	// non-IEEE trap?  (unfinished or illegal)
	if(p_exc & FPX_TRAP)
	{
		trap = p_exc & 0xFF; // trap reason in bits 7:0
		if(0x10 == trap) return trap; // illegal_instr: no fsr update
		
		// FP_other trap: ftt in reason code
		fsr = (fsr & ~0x1C000) | (trap << 14);
		trap = 0x22;
	}
	else // IEEE trap or completion
	{
		int taken = p_exc & (fsr>>23) & 0x1F; // mask with fsr.tem

		fsr &= ~0x1C01F; // clear old fsr.cexc and fsr.ftt

		// To Trap or Not To Trap?
		if(taken != 0)  // trap taken?
		{
			// cancel NX bit for OF/UF trap
			if(taken & (FPX_OF|FPX_UF)) 
			{
				taken &= ~FPX_NX;
			}
			fsr |= (taken | 0x04000); // set fsr.cexc only; fsr.ftt=1
			trap = 0x21;  // IEEE_754_exception
		}
		else // no trap
		{
			// set both cexc,aexc identically
			fsr |= (p_exc | (p_exc<<5));
			trap = 0;
		}
	}
	*p_fsr = fsr; // update caller's FSR
	return trap;
}


/*===========================================================================*/

/* Dissection routines:  picks apart the FP number into sign,exp,fraction
    and flushes subnormal numbers to zero in nonstandard mode.
	Returns FP number class:  0=zero 1=subnorm 2=normal 3=inf 4=qnan 5=snan
*/

static int 
dissect_double( double p_fpnum, fpdouble* p_data, int p_std, int* p_exc )
{
	p_data->fp.num = p_fpnum;
	p_data->sign = p_data->fp.inte >> 63;
	p_data->frac = p_data->fp.inte & 0x000FFFFFFFFFFFFF;
	p_data->exp  = (p_data->fp.inte >> 52) & 0x7FF;

	// classify the number
	int fpclass = fp_normal;
	if(0 == p_data->exp) // exponent zero?
	{
		fpclass = (0 == p_data->frac)
				? fp_zero		//true
				: fp_subnormal; //false
	}
	else if(0x7FF == p_data->exp) // exponent all ones?
	{
		if(0 == p_data->frac) fpclass = fp_infinity;
		else
		{
			fpclass = (p_data->frac & 0x0008000000000000) //frac.msb==1?
					? fp_quiet		//true
					: fp_signaling; //false
		}
	}

	// nonstandard mode: flush subnormals to 0
	if(0 == p_std
	&& fp_subnormal == fpclass)
	{
		p_data->fp.inte = (uint64)p_data->sign << 63;
		p_data->frac = 0;
		fpclass = fp_zero;
		*p_exc = FPX_NX;
	}
	return fpclass;
}

static int 
dissect_single( float p_fpnum, fpsingle* p_data, int p_std, int* p_exc )
{
	p_data->fp.num = p_fpnum;
	p_data->sign = p_data->fp.inte >> 31;
	p_data->frac = p_data->fp.inte & 0x007FFFFF;
	p_data->exp  = (p_data->fp.inte >> 23) & 0xFF;

	// classify the number
	int fpclass = fp_normal;
	if(0 == p_data->exp) // exponent zero?
	{
		fpclass = (0 == p_data->frac)
				? fp_zero		//true
				: fp_subnormal; //false
	}
	else if(0xFF == p_data->exp) // exponent all ones?
	{
		if(0 == p_data->frac) fpclass = fp_infinity;
		else
		{
			fpclass = (p_data->frac & 0x00400000) //frac.msb==1?
					? fp_quiet		//true
					: fp_signaling; //false
		}
	}

	// nonstandard mode: flush subnormals to 0
	if(0 == p_std
	&& fp_subnormal == fpclass)
	{
		p_data->fp.inte = p_data->sign << 31;
		p_data->frac = 0;
		fpclass = fp_zero;
		*p_exc = FPX_NX;
	}
	return fpclass;
}


/*=======================================================================*/

/* overflow routine:  returns proper result for overflow in rounding mode
*/

static void 
overflow( void* p_res, int p_dbl, int p_rnd, int p_sign )
{
	enum { MAXV=0, INFV=1 } rtype;  // result type: 1=infinity 0=max_value
	
	switch(p_rnd) //rounding mode?
	{
	case FP_RN:  rtype = INFV;  break;
	case FP_RZ:  rtype = MAXV;  break;
	case FP_RP:  rtype = p_sign ? MAXV : INFV;  break;
	case FP_RM:  rtype = p_sign ? INFV : MAXV;  break;
	}
	
	if(p_dbl) //double
	{
		uint64 d_res = rtype ? 0x7FF0000000000000 : 0x7FEFFFFFFFFFFFFF;
		*(uint64*)p_res = d_res | ((uint64)p_sign << 63);
	}
	else //single
	{
		uint s_res = rtype ? 0x7F800000 : 0x7F7FFFFF;
		*(uint*)p_res = s_res | (p_sign << 31);
	}
}


//-------------------------------------------------------------------
// un-implemented ops return illegal_instruction trap if called:
//-------------------------------------------------------------------

int fpsim_fnaddd(const double* a,const double* b,double* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnadds(const float* a,const float* b,float* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnmuld(const double* a,const double* b,double* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnmuls(const float* a,const float* b,float* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnsmuld(const float* a,const float* b,double* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fhaddd(const double* a,const double* b,double* c,uint64 d,uint64 e,
			fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fhadds(const float* a,const float* b,float* c,uint64 d,uint64 e,
			fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fmaddd(const double* a,const double* b,const double* c,double* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fmadds(const float* a,const float* b,const float* c,float* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumaddd(const double* a,const double* b,const double* c,double* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumadds(const float* a,const float* b,const float* c,float* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}


/*===========================================================================*/

// Function pointer structure

struct fpsim_functions  fpsim_funclist =
{
	fpsim_update_fsr, fpsim_gsr_mask,

	fpsim_faddd, fpsim_fsubd, fpsim_fmuld, fpsim_fdivd, fpsim_fsqrtd,
	fpsim_fadds, fpsim_fsubs, fpsim_fmuls, fpsim_fdivs, fpsim_fsqrts, 
	fpsim_fsmuld,
	
	fpsim_fstod, fpsim_fdtos, fpsim_fstox, fpsim_fdtox, fpsim_fstoi,  
	fpsim_fdtoi, fpsim_fxtos, fpsim_fxtod, fpsim_fitos, fpsim_fitod,

	fpsim_fnaddd, fpsim_fnadds, fpsim_fnmuld, fpsim_fnmuls, fpsim_fnsmuld,
	fpsim_fhaddd, fpsim_fhadds,
	fpsim_fmaddd, fpsim_fmadds, fpsim_fumaddd, fpsim_fumadds
};

char fpsim_fp_model[] = "FPSIM Niagara2 (N2) " __DATE__ ;