[OpenSPARC-T2-SAM] / legion / src / procs / sunsparc / libniagara / fpsim_bw.c

/*
* ========== Copyright Header Begin ==========================================
* 
* OpenSPARC T2 Processor File: fpsim_bw.c
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* 
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* 
* The above named program is distributed in the hope that it will be 
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
* 
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* 
* ========== Copyright Header End ============================================
*/
/****************************************************************************
 *  fpsim_bw.c -- Floating-point Simulation Library for SPARC (Niagara-1)
 *
 *  Author --
 *  Robert Rethemeyer - Sun Microsystems, Inc.
 *
 *  Date --
 *  Mar 2, 2006
 *
 *  Design -- dynamically loaded shared object; compile with C or C++.
 *   Models FP instruction behavior according to:
 *    - UltraSPARC Architecture 2005
 *    - Niagara PRM Appendix I
 *	 The general strategy is to use the SPARC FP instructions
 *   with special handling for the cases where Niagara requires exceptions.
 *
 *  (c) COPYRIGHT 2005-2006 Sun Microsystems, Inc.
 *  Sun Confidential: Sun SSG Only
 ***************************************************************************/

/*********************************************************************
 * ATTENTION: This code is part of a library shared by multiple
 * projects.  DO NOT MAKE CHANGES TO THIS CODE WITHIN YOUR PROJECT.
 * Instead, contact the owner/maintainer of the library, currently:
 *    Robert.Rethemeyer@Sun.COM   +1-408-616-5717  (x45717)
 *    Systems Group: TVT: FrontEnd Technologies
 * The CVS source code repository for the library is at:
 *    /import/ftap-blimp1/cvs/fpsim
 * DO NOT COMMIT CHANGES TO THAT REPOSITORY: contact the maintainer.
 ********************************************************************/

/*tab length=4*/

static const char cvsid[] = 
 "$Id: fpsim_bw.c,v 1.6 2006/12/07 20:52:04 bobsmail Exp $";


#include "fpsim_support.h"
#include "fpsim.h"


#ifdef __cplusplus
extern "C" {
#endif
static int dissect_double( double fpnum, fpdouble* data, int std, int* exc );
static int dissect_single( float  fpnum, fpsingle* data, int std, int* exc );
#ifdef __cplusplus
}
#endif


#define FSR_UFM 0x2000000  /*FSR.TEM underflow mask*/
#define GSR_IM 0x08000000  /*GSR interval mode bit*/
#define STDONLY 1          /*Standard mode only*/
#define RNDMODE(FSR,GSR) (int)(((GSR & GSR_IM) ? (GSR>>25) : (FSR>>30)) & 3)

/*===========================================================================*/


//-------------------------------------------------------------------
// FADDD
//-------------------------------------------------------------------
int 
fpsim_faddd( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble res;
	int scr[2];
	double result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_faddd(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSUBD
//-------------------------------------------------------------------
int 
fpsim_fsubd( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble res;
	int scr[2];
	double result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fsubd(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FMULD
//-------------------------------------------------------------------
int 
fpsim_fmuld( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble res;
	int scr[2];
	double result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc= asm_fmuld(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FDIVD
//-------------------------------------------------------------------
int 
fpsim_fdivd( const double* p_op1, const double* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpdouble res;
	int scr[2];
	double result;
	
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fdivd(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_double(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FADDS
//-------------------------------------------------------------------
int 
fpsim_fadds( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle res;
	int scr[2];
	float result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fadds(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSUBS
//-------------------------------------------------------------------
int 
fpsim_fsubs( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle res;
	int scr[2];
	float result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fsubs(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FMULS
//-------------------------------------------------------------------
int 
fpsim_fmuls( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle res;
	int scr[2];
	float result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fmuls(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSMULD
//-------------------------------------------------------------------
int 
fpsim_fsmuld( const float* p_op1, const float* p_op2, double* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int exc = asm_fsmuld(p_op1, p_op2, p_res, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDIVS
//-------------------------------------------------------------------
int 
fpsim_fdivs( const float* p_op1, const float* p_op2, float* p_res,
			uint64 p_fsr, uint64 p_gsr )
{
	fpsingle res;
	int scr[2];
	float result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fdivs(p_op1, p_op2, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSTOD
//-------------------------------------------------------------------
int 
fpsim_fstod( const float* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fstod(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDTOS
//-------------------------------------------------------------------
int 
fpsim_fdtos( const double* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
	fpsingle res;
	int scr[2];
	float result;

	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fdtos(p_op2, &result, rnd, scr);

	int rtype = dissect_single(result, &res, STDONLY, NULL);
	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
	{
		if(p_fsr & FSR_UFM) exc = FPX_UF;
	}
	*p_res = res.fp.num;
	return exc;
}


//-------------------------------------------------------------------
// FSTOX
//-------------------------------------------------------------------
int 
fpsim_fstox( const float* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fstox(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDTOX
//-------------------------------------------------------------------
int 
fpsim_fdtox( const double* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fdtox(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FSTOI
//-------------------------------------------------------------------
int 
fpsim_fstoi( const float* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fstoi(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FDTOI
//-------------------------------------------------------------------
int 
fpsim_fdtoi( const double* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fdtoi(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FXTOS
//-------------------------------------------------------------------
int 
fpsim_fxtos( const uint64* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fxtos(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FXTOD
//-------------------------------------------------------------------
int 
fpsim_fxtod( const uint64* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fxtod(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FITOS
//-------------------------------------------------------------------
int 
fpsim_fitos( const uint* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
	int scr[2];
	int rnd = RNDMODE(p_fsr,p_gsr);
	int exc = asm_fitos(p_op2, p_res, rnd, scr);
	return exc;
}


//-------------------------------------------------------------------
// FITOD
//-------------------------------------------------------------------
int 
fpsim_fitod( const uint* p_op2, double* p_res )
{
	asm_fitod(p_op2, p_res);
	return 0;
}


//-------------------------------------------------------------------
// GSR_MASK
//-------------------------------------------------------------------

uint64 fpsim_gsr_mask(void) { return 0xFFFFFFFF0E0000FF; }


//-------------------------------------------------------------------
// UPDATE_FSR
//-------------------------------------------------------------------

/* Merges a FP exception returned by one of the instruction sim routines
   into the caller's FSR, and indicates whether the caller should
   post a trap (nonzero return value is trap-type code). 
*/

int
fpsim_update_fsr( int p_exc, uint64* p_fsr )
{
	int trap;
	uint64 fsr = *p_fsr;

	// non-IEEE trap?  (unfinished or illegal)
	if(p_exc & FPX_TRAP)
	{
		trap = p_exc & 0xFF; // trap reason in bits 7:0
		if(0x10 == trap) return trap; // illegal_instr: no fsr update
		
		// FP_other trap: ftt in reason code
		fsr = (fsr & ~0x1C000) | (trap << 14);
		trap = 0x22;   // fp_exception_other
	}
	else // IEEE trap or completion
	{
		int taken = p_exc & (fsr>>23) & 0x1F; // mask with fsr.tem

		fsr &= ~0x1C01F; // clear old fsr.cexc and fsr.ftt

		// To Trap or Not To Trap?
		if(taken != 0)  // trap taken?
		{
			// cancel NX bit for OF/UF trap
			if(taken & (FPX_OF|FPX_UF)) 
			{
				taken &= ~FPX_NX;
			}
			fsr |= (taken | 0x04000); // set fsr.cexc only; fsr.ftt=1
			trap = 0x21;  // IEEE_754_exception
		}
		else // no trap
		{
			// set both cexc,aexc identically
			fsr |= (p_exc | (p_exc<<5));
			trap = 0;
		}
	}
	*p_fsr = fsr; // update caller's FSR
	return trap;
}


/*=======================================================================*/

/* Dissection routines:  picks apart the FP number into sign,exp,fraction
	Returns FP number class:  0=zero 1=subnorm 2=normal 3=inf 4=qnan 5=snan
*/

static int 
dissect_double( double p_fpnum, fpdouble* p_data, int p_std, int* p_rc )
{
	p_data->fp.num = p_fpnum;
	p_data->sign = p_data->fp.inte >> 63;
	p_data->frac = p_data->fp.inte & 0x000FFFFFFFFFFFFF;
	p_data->exp  = (p_data->fp.inte >> 52) & 0x7FF;

	// classify the number
	int fpclass = fp_normal;
	if(0 == p_data->exp) // exponent zero?
	{
		fpclass = (0 == p_data->frac)
				? fp_zero		//true
				: fp_subnormal; //false
	}
	else if(0x7FF == p_data->exp) // exponent all ones?
	{
		if(0 == p_data->frac) fpclass = fp_infinity;
		else
		{
			fpclass = (p_data->frac & 0x0008000000000000) //frac.msb==1?
					? fp_quiet		//true
					: fp_signaling; //false
		}
	}
	return fpclass;
}

static int 
dissect_single( float p_fpnum, fpsingle* p_data, int p_std, int* p_rc )
{
	p_data->fp.num = p_fpnum;
	p_data->sign = p_data->fp.inte >> 31;
	p_data->frac = p_data->fp.inte & 0x007FFFFF;
	p_data->exp  = (p_data->fp.inte >> 23) & 0xFF;

	// classify the number
	int fpclass = fp_normal;
	if(0 == p_data->exp) // exponent zero?
	{
		fpclass = (0 == p_data->frac)
				? fp_zero		//true
				: fp_subnormal; //false
	}
	else if(0xFF == p_data->exp) // exponent all ones?
	{
		if(0 == p_data->frac) fpclass = fp_infinity;
		else
		{
			fpclass = (p_data->frac & 0x00400000) //frac.msb==1?
					? fp_quiet		//true
					: fp_signaling; //false
		}
	}
	return fpclass;
}


//-------------------------------------------------------------------
// un-implemented ops return fp_exc_other/unimplemented:
//-------------------------------------------------------------------

int fpsim_fsqrtd(const double* a,double* b,uint64 c,uint64 d)
			{return FPX_UIMP;}
int fpsim_fsqrts(const float* a,float* b,uint64 c,uint64 d)
			{return FPX_UIMP;}

//-------------------------------------------------------------------
// newer ops return illegal_instruction trap if called:
//-------------------------------------------------------------------

int fpsim_fnaddd(const double* a,const double* b,double* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnadds(const float* a,const float* b,float* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnmuld(const double* a,const double* b,double* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnmuls(const float* a,const float* b,float* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fnsmuld(const float* a,const float* b,double* c,uint64 d,uint64 e) 
			{return FPX_ILL;}
int fpsim_fhaddd(const double* a,const double* b,double* c,uint64 d,uint64 e,
			fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fhadds(const float* a,const float* b,float* c,uint64 d,uint64 e,
			fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fmaddd(const double* a,const double* b,const double* c,double* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fmadds(const float* a,const float* b,const float* c,float* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumaddd(const double* a,const double* b,const double* c,double* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumadds(const float* a,const float* b,const float* c,float* d,
				uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}


/*=======================================================================*/

// Function pointer structure

struct fpsim_functions  fpsim_funclist =
{
	fpsim_update_fsr, fpsim_gsr_mask,

	fpsim_faddd, fpsim_fsubd, fpsim_fmuld, fpsim_fdivd, fpsim_fsqrtd,
	fpsim_fadds, fpsim_fsubs, fpsim_fmuls, fpsim_fdivs, fpsim_fsqrts, 
	fpsim_fsmuld,

	fpsim_fstod, fpsim_fdtos, fpsim_fstox, fpsim_fdtox, fpsim_fstoi,  
	fpsim_fdtoi, fpsim_fxtos, fpsim_fxtod, fpsim_fitos, fpsim_fitod,
	
	fpsim_fnaddd, fpsim_fnadds, fpsim_fnmuld, fpsim_fnmuls, fpsim_fnsmuld,
	fpsim_fhaddd, fpsim_fhadds,
	fpsim_fmaddd, fpsim_fmadds, fpsim_fumaddd, fpsim_fumadds
};

char fpsim_fp_model[] = "FPSIM Niagara1 (BW) " __DATE__ ;
Commit	Line	Data
920dae64 AT	1	/*
	2	* ========== Copyright Header Begin ==========================================
	3	*
	4	* OpenSPARC T2 Processor File: fpsim_bw.c
	5	* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
	6	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
	7	*
	8	* The above named program is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU General Public
	10	* License version 2 as published by the Free Software Foundation.
	11	*
	12	* The above named program is distributed in the hope that it will be
	13	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	15	* General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public
	18	* License along with this work; if not, write to the Free Software
	19	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
	20	*
	21	* ========== Copyright Header End ============================================
	22	*/
	23	/****************************************************************************
	24	* fpsim_bw.c -- Floating-point Simulation Library for SPARC (Niagara-1)
	25	*
	26	* Author --
	27	* Robert Rethemeyer - Sun Microsystems, Inc.
	28	*
	29	* Date --
	30	* Mar 2, 2006
	31	*
	32	* Design -- dynamically loaded shared object; compile with C or C++.
	33	* Models FP instruction behavior according to:
	34	* - UltraSPARC Architecture 2005
	35	* - Niagara PRM Appendix I
	36	* The general strategy is to use the SPARC FP instructions
	37	* with special handling for the cases where Niagara requires exceptions.
	38	*
	39	* (c) COPYRIGHT 2005-2006 Sun Microsystems, Inc.
	40	* Sun Confidential: Sun SSG Only
	41	***************************************************************************/
	42
	43	/*********************************************************************
	44	* ATTENTION: This code is part of a library shared by multiple
	45	* projects. DO NOT MAKE CHANGES TO THIS CODE WITHIN YOUR PROJECT.
	46	* Instead, contact the owner/maintainer of the library, currently:
	47	* Robert.Rethemeyer@Sun.COM +1-408-616-5717 (x45717)
	48	* Systems Group: TVT: FrontEnd Technologies
	49	* The CVS source code repository for the library is at:
	50	* /import/ftap-blimp1/cvs/fpsim
	51	* DO NOT COMMIT CHANGES TO THAT REPOSITORY: contact the maintainer.
	52	********************************************************************/
	53
	54	/tab length=4/
	55
	56	static const char cvsid[] =
	57	"$Id: fpsim_bw.c,v 1.6 2006/12/07 20:52:04 bobsmail Exp $";
	58
	59
	60	#include "fpsim_support.h"
	61	#include "fpsim.h"
	62
	63
	64	#ifdef __cplusplus
65	extern "C" {
66	#endif
67	static int dissect_double( double fpnum, fpdouble* data, int std, int* exc );
68	static int dissect_single( float fpnum, fpsingle* data, int std, int* exc );
69	#ifdef __cplusplus
70	}
71	#endif
72
73
74	#define FSR_UFM 0x2000000 /FSR.TEM underflow mask/
75	#define GSR_IM 0x08000000 /GSR interval mode bit/
76	#define STDONLY 1 /Standard mode only/
77	#define RNDMODE(FSR,GSR) (int)(((GSR & GSR_IM) ? (GSR>>25) : (FSR>>30)) & 3)
78
79	/===========================================================================/
80
81
82
83	//-------------------------------------------------------------------
84	// FADDD
85	//-------------------------------------------------------------------
86	int
87	fpsim_faddd( const double* p_op1, const double* p_op2, double* p_res,
88	uint64 p_fsr, uint64 p_gsr )
89	{
90	fpdouble res;
91	int scr[2];
92	double result;
93
94	int rnd = RNDMODE(p_fsr,p_gsr);
95	int exc = asm_faddd(p_op1, p_op2, &result, rnd, scr);
96
97	int rtype = dissect_double(result, &res, STDONLY, NULL);
98	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
99	{
100	if(p_fsr & FSR_UFM) exc = FPX_UF;
101	}
102	*p_res = res.fp.num;
103	return exc;
104	}
105
106
107
108	//-------------------------------------------------------------------
109	// FSUBD
110	//-------------------------------------------------------------------
111	int
112	fpsim_fsubd( const double* p_op1, const double* p_op2, double* p_res,
113	uint64 p_fsr, uint64 p_gsr )
114	{
115	fpdouble res;
116	int scr[2];
117	double result;
118
119	int rnd = RNDMODE(p_fsr,p_gsr);
120	int exc = asm_fsubd(p_op1, p_op2, &result, rnd, scr);
121
122	int rtype = dissect_double(result, &res, STDONLY, NULL);
123	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
124	{
125	if(p_fsr & FSR_UFM) exc = FPX_UF;
126	}
127	*p_res = res.fp.num;
128	return exc;
129	}
130
131
132
133	//-------------------------------------------------------------------
134	// FMULD
135	//-------------------------------------------------------------------
136	int
137	fpsim_fmuld( const double* p_op1, const double* p_op2, double* p_res,
138	uint64 p_fsr, uint64 p_gsr )
139	{
140	fpdouble res;
141	int scr[2];
142	double result;
143
144	int rnd = RNDMODE(p_fsr,p_gsr);
145	int exc= asm_fmuld(p_op1, p_op2, &result, rnd, scr);
146
147	int rtype = dissect_double(result, &res, STDONLY, NULL);
148	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
149	{
150	if(p_fsr & FSR_UFM) exc = FPX_UF;
151	}
152	*p_res = res.fp.num;
153	return exc;
154	}
155
156
157
158	//-------------------------------------------------------------------
159	// FDIVD
160	//-------------------------------------------------------------------
161	int
162	fpsim_fdivd( const double* p_op1, const double* p_op2, double* p_res,
163	uint64 p_fsr, uint64 p_gsr )
164	{
165	fpdouble res;
166	int scr[2];
167	double result;
168
169	int rnd = RNDMODE(p_fsr,p_gsr);
170	int exc = asm_fdivd(p_op1, p_op2, &result, rnd, scr);
171
172	int rtype = dissect_double(result, &res, STDONLY, NULL);
173	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
174	{
175	if(p_fsr & FSR_UFM) exc = FPX_UF;
176	}
177	*p_res = res.fp.num;
178	return exc;
179	}
180
181
182
183	//-------------------------------------------------------------------
184	// FADDS
185	//-------------------------------------------------------------------
186	int
187	fpsim_fadds( const float* p_op1, const float* p_op2, float* p_res,
188	uint64 p_fsr, uint64 p_gsr )
189	{
190	fpsingle res;
191	int scr[2];
192	float result;
193
194	int rnd = RNDMODE(p_fsr,p_gsr);
195	int exc = asm_fadds(p_op1, p_op2, &result, rnd, scr);
196
197	int rtype = dissect_single(result, &res, STDONLY, NULL);
198	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
199	{
200	if(p_fsr & FSR_UFM) exc = FPX_UF;
201	}
202	*p_res = res.fp.num;
203	return exc;
204	}
205
206
207
208	//-------------------------------------------------------------------
209	// FSUBS
210	//-------------------------------------------------------------------
211	int
212	fpsim_fsubs( const float* p_op1, const float* p_op2, float* p_res,
213	uint64 p_fsr, uint64 p_gsr )
214	{
215	fpsingle res;
216	int scr[2];
217	float result;
218
219	int rnd = RNDMODE(p_fsr,p_gsr);
220	int exc = asm_fsubs(p_op1, p_op2, &result, rnd, scr);
221
222	int rtype = dissect_single(result, &res, STDONLY, NULL);
223	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
224	{
225	if(p_fsr & FSR_UFM) exc = FPX_UF;
226	}
227	*p_res = res.fp.num;
228	return exc;
229	}
230
231
232
233	//-------------------------------------------------------------------
234	// FMULS
235	//-------------------------------------------------------------------
236	int
237	fpsim_fmuls( const float* p_op1, const float* p_op2, float* p_res,
238	uint64 p_fsr, uint64 p_gsr )
239	{
240	fpsingle res;
241	int scr[2];
242	float result;
243
244	int rnd = RNDMODE(p_fsr,p_gsr);
245	int exc = asm_fmuls(p_op1, p_op2, &result, rnd, scr);
246
247	int rtype = dissect_single(result, &res, STDONLY, NULL);
248	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
249	{
250	if(p_fsr & FSR_UFM) exc = FPX_UF;
251	}
252	*p_res = res.fp.num;
253	return exc;
254	}
255
256
257
258	//-------------------------------------------------------------------
259	// FSMULD
260	//-------------------------------------------------------------------
261	int
262	fpsim_fsmuld( const float* p_op1, const float* p_op2, double* p_res,
263	uint64 p_fsr, uint64 p_gsr )
264	{
265	int scr[2];
266	int exc = asm_fsmuld(p_op1, p_op2, p_res, scr);
267	return exc;
268	}
269
270
271
272	//-------------------------------------------------------------------
273	// FDIVS
274	//-------------------------------------------------------------------
275	int
276	fpsim_fdivs( const float* p_op1, const float* p_op2, float* p_res,
277	uint64 p_fsr, uint64 p_gsr )
278	{
279	fpsingle res;
280	int scr[2];
281	float result;
282
283	int rnd = RNDMODE(p_fsr,p_gsr);
284	int exc = asm_fdivs(p_op1, p_op2, &result, rnd, scr);
285
286	int rtype = dissect_single(result, &res, STDONLY, NULL);
287	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
288	{
289	if(p_fsr & FSR_UFM) exc = FPX_UF;
290	}
291	*p_res = res.fp.num;
292	return exc;
293	}
294
295
296
297	//-------------------------------------------------------------------
298	// FSTOD
299	//-------------------------------------------------------------------
300	int
301	fpsim_fstod( const float* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
302	{
303	int scr[2];
304	int rnd = RNDMODE(p_fsr,p_gsr);
305	int exc = asm_fstod(p_op2, p_res, rnd, scr);
306	return exc;
307	}
308
309
310
311	//-------------------------------------------------------------------
312	// FDTOS
313	//-------------------------------------------------------------------
314	int
315	fpsim_fdtos( const double* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
316	{
317	fpsingle res;
318	int scr[2];
319	float result;
320
321	int rnd = RNDMODE(p_fsr,p_gsr);
322	int exc = asm_fdtos(p_op2, &result, rnd, scr);
323
324	int rtype = dissect_single(result, &res, STDONLY, NULL);
325	if(0==exc && fp_subnormal==rtype) //untrapped underflow?
326	{
327	if(p_fsr & FSR_UFM) exc = FPX_UF;
328	}
329	*p_res = res.fp.num;
330	return exc;
331	}
332
333
334
335	//-------------------------------------------------------------------
336	// FSTOX
337	//-------------------------------------------------------------------
338	int
339	fpsim_fstox( const float* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
340	{
341	int scr[2];
342	int rnd = RNDMODE(p_fsr,p_gsr);
343	int exc = asm_fstox(p_op2, p_res, rnd, scr);
344	return exc;
345	}
346
347
348
349	//-------------------------------------------------------------------
350	// FDTOX
351	//-------------------------------------------------------------------
352	int
353	fpsim_fdtox( const double* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
354	{
355	int scr[2];
356	int rnd = RNDMODE(p_fsr,p_gsr);
357	int exc = asm_fdtox(p_op2, p_res, rnd, scr);
358	return exc;
359	}
360
361
362
363	//-------------------------------------------------------------------
364	// FSTOI
365	//-------------------------------------------------------------------
366	int
367	fpsim_fstoi( const float* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
368	{
369	int scr[2];
370	int rnd = RNDMODE(p_fsr,p_gsr);
371	int exc = asm_fstoi(p_op2, p_res, rnd, scr);
372	return exc;
373	}
374
375
376
377	//-------------------------------------------------------------------
378	// FDTOI
379	//-------------------------------------------------------------------
380	int
381	fpsim_fdtoi( const double* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
382	{
383	int scr[2];
384	int rnd = RNDMODE(p_fsr,p_gsr);
385	int exc = asm_fdtoi(p_op2, p_res, rnd, scr);
386	return exc;
387	}
388
389
390
391	//-------------------------------------------------------------------
392	// FXTOS
393	//-------------------------------------------------------------------
394	int
395	fpsim_fxtos( const uint64* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
396	{
397	int scr[2];
398	int rnd = RNDMODE(p_fsr,p_gsr);
399	int exc = asm_fxtos(p_op2, p_res, rnd, scr);
400	return exc;
401	}
402
403
404
405	//-------------------------------------------------------------------
406	// FXTOD
407	//-------------------------------------------------------------------
408	int
409	fpsim_fxtod( const uint64* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
410	{
411	int scr[2];
412	int rnd = RNDMODE(p_fsr,p_gsr);
413	int exc = asm_fxtod(p_op2, p_res, rnd, scr);
414	return exc;
415	}
416
417
418
419	//-------------------------------------------------------------------
420	// FITOS
421	//-------------------------------------------------------------------
422	int
423	fpsim_fitos( const uint* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
424	{
425	int scr[2];
426	int rnd = RNDMODE(p_fsr,p_gsr);
427	int exc = asm_fitos(p_op2, p_res, rnd, scr);
428	return exc;
429	}
430
431
432
433	//-------------------------------------------------------------------
434	// FITOD
435	//-------------------------------------------------------------------
436	int
437	fpsim_fitod( const uint* p_op2, double* p_res )
438	{
439	asm_fitod(p_op2, p_res);
440	return 0;
441	}
442
443
444
445	//-------------------------------------------------------------------
446	// GSR_MASK
447	//-------------------------------------------------------------------
448
449	uint64 fpsim_gsr_mask(void) { return 0xFFFFFFFF0E0000FF; }
450
451
452
453	//-------------------------------------------------------------------
454	// UPDATE_FSR
455	//-------------------------------------------------------------------
456
457	/* Merges a FP exception returned by one of the instruction sim routines
458	into the caller's FSR, and indicates whether the caller should
459	post a trap (nonzero return value is trap-type code).
460	*/
461
462	int
463	fpsim_update_fsr( int p_exc, uint64* p_fsr )
464	{
465	int trap;
466	uint64 fsr = *p_fsr;
467
468	// non-IEEE trap? (unfinished or illegal)
469	if(p_exc & FPX_TRAP)
470	{
471	trap = p_exc & 0xFF; // trap reason in bits 7:0
472	if(0x10 == trap) return trap; // illegal_instr: no fsr update
473
474	// FP_other trap: ftt in reason code
475	fsr = (fsr & ~0x1C000) \| (trap << 14);
476	trap = 0x22; // fp_exception_other
477	}
478	else // IEEE trap or completion
479	{
480	int taken = p_exc & (fsr>>23) & 0x1F; // mask with fsr.tem
481
482	fsr &= ~0x1C01F; // clear old fsr.cexc and fsr.ftt
483
484	// To Trap or Not To Trap?
485	if(taken != 0) // trap taken?
486	{
487	// cancel NX bit for OF/UF trap
488	if(taken & (FPX_OF\|FPX_UF))
489	{
490	taken &= ~FPX_NX;
491	}
492	fsr \|= (taken \| 0x04000); // set fsr.cexc only; fsr.ftt=1
493	trap = 0x21; // IEEE_754_exception
494	}
495	else // no trap
496	{
497	// set both cexc,aexc identically
498	fsr \|= (p_exc \| (p_exc<<5));
499	trap = 0;
500	}
501	}
502	*p_fsr = fsr; // update caller's FSR
503	return trap;
504	}
505
506
507
508
509	/=======================================================================/
510
511	/* Dissection routines: picks apart the FP number into sign,exp,fraction
512	Returns FP number class: 0=zero 1=subnorm 2=normal 3=inf 4=qnan 5=snan
513	*/
514
515	static int
516	dissect_double( double p_fpnum, fpdouble* p_data, int p_std, int* p_rc )
517	{
518	p_data->fp.num = p_fpnum;
519	p_data->sign = p_data->fp.inte >> 63;
520	p_data->frac = p_data->fp.inte & 0x000FFFFFFFFFFFFF;
521	p_data->exp = (p_data->fp.inte >> 52) & 0x7FF;
522
523	// classify the number
524	int fpclass = fp_normal;
525	if(0 == p_data->exp) // exponent zero?
526	{
527	fpclass = (0 == p_data->frac)
528	? fp_zero //true
529	: fp_subnormal; //false
530	}
531	else if(0x7FF == p_data->exp) // exponent all ones?
532	{
533	if(0 == p_data->frac) fpclass = fp_infinity;
534	else
535	{
536	fpclass = (p_data->frac & 0x0008000000000000) //frac.msb==1?
537	? fp_quiet //true
538	: fp_signaling; //false
539	}
540	}
541	return fpclass;
542	}
543
544	static int
545	dissect_single( float p_fpnum, fpsingle* p_data, int p_std, int* p_rc )
546	{
547	p_data->fp.num = p_fpnum;
548	p_data->sign = p_data->fp.inte >> 31;
549	p_data->frac = p_data->fp.inte & 0x007FFFFF;
550	p_data->exp = (p_data->fp.inte >> 23) & 0xFF;
551
552	// classify the number
553	int fpclass = fp_normal;
554	if(0 == p_data->exp) // exponent zero?
555	{
556	fpclass = (0 == p_data->frac)
557	? fp_zero //true
558	: fp_subnormal; //false
559	}
560	else if(0xFF == p_data->exp) // exponent all ones?
561	{
562	if(0 == p_data->frac) fpclass = fp_infinity;
563	else
564	{
565	fpclass = (p_data->frac & 0x00400000) //frac.msb==1?
566	? fp_quiet //true
567	: fp_signaling; //false
568	}
569	}
570	return fpclass;
571	}
572
573
574	//-------------------------------------------------------------------
575	// un-implemented ops return fp_exc_other/unimplemented:
576	//-------------------------------------------------------------------
577
578	int fpsim_fsqrtd(const double* a,double* b,uint64 c,uint64 d)
579	{return FPX_UIMP;}
580	int fpsim_fsqrts(const float* a,float* b,uint64 c,uint64 d)
581	{return FPX_UIMP;}
582
583	//-------------------------------------------------------------------
584	// newer ops return illegal_instruction trap if called:
585	//-------------------------------------------------------------------
586
587	int fpsim_fnaddd(const double* a,const double* b,double* c,uint64 d,uint64 e)
588	{return FPX_ILL;}
589	int fpsim_fnadds(const float* a,const float* b,float* c,uint64 d,uint64 e)
590	{return FPX_ILL;}
591	int fpsim_fnmuld(const double* a,const double* b,double* c,uint64 d,uint64 e)
592	{return FPX_ILL;}
593	int fpsim_fnmuls(const float* a,const float* b,float* c,uint64 d,uint64 e)
594	{return FPX_ILL;}
595	int fpsim_fnsmuld(const float* a,const float* b,double* c,uint64 d,uint64 e)
596	{return FPX_ILL;}
597	int fpsim_fhaddd(const double* a,const double* b,double* c,uint64 d,uint64 e,
598	fpsim_fha_subtype f) {return FPX_ILL;}
599	int fpsim_fhadds(const float* a,const float* b,float* c,uint64 d,uint64 e,
600	fpsim_fha_subtype f) {return FPX_ILL;}
601	int fpsim_fmaddd(const double* a,const double* b,const double* c,double* d,
602	uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;}
603	int fpsim_fmadds(const float* a,const float* b,const float* c,float* d,
604	uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;}
605	int fpsim_fumaddd(const double* a,const double* b,const double* c,double* d,
606	uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;}
607	int fpsim_fumadds(const float* a,const float* b,const float* c,float* d,
608	uint64 e,uint64 f,fpsim_fma_subtype g) {return FPX_ILL;}
609
610
611	/=======================================================================/
612
613	// Function pointer structure
614
615	struct fpsim_functions fpsim_funclist =
616	{
617	fpsim_update_fsr, fpsim_gsr_mask,
618
619	fpsim_faddd, fpsim_fsubd, fpsim_fmuld, fpsim_fdivd, fpsim_fsqrtd,
620	fpsim_fadds, fpsim_fsubs, fpsim_fmuls, fpsim_fdivs, fpsim_fsqrts,
621	fpsim_fsmuld,
622
623	fpsim_fstod, fpsim_fdtos, fpsim_fstox, fpsim_fdtox, fpsim_fstoi,
624	fpsim_fdtoi, fpsim_fxtos, fpsim_fxtod, fpsim_fitos, fpsim_fitod,
625
626	fpsim_fnaddd, fpsim_fnadds, fpsim_fnmuld, fpsim_fnmuls, fpsim_fnsmuld,
627	fpsim_fhaddd, fpsim_fhadds,
628	fpsim_fmaddd, fpsim_fmadds, fpsim_fumaddd, fpsim_fumadds
629	};
630
631	char fpsim_fp_model[] = "FPSIM Niagara1 (BW) " __DATE__ ;