legion/src/procs/sunsparc/libniagara/fpsim_bw.c

/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: fpsim_bw.c
* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
/****************************************************************************
 *  fpsim_bw.c -- Floating-point Simulation Library for SPARC (Niagara-1)
 *
 *  Author --
 *  Robert Rethemeyer - Sun Microsystems, Inc.
 *
 *  Date --
 *  Mar 2, 2006
 *
 *  Design -- dynamically loaded shared object; compile with C or C++.
 *   Models FP instruction behavior according to:
 *    - UltraSPARC Architecture 2005
 *    - Niagara PRM Appendix I
 *       The general strategy is to use the SPARC FP instructions
 *   with special handling for the cases where Niagara requires exceptions.
 *
 *  (c) COPYRIGHT 2005-2006 Sun Microsystems, Inc.
 *  Sun Confidential: Sun SSG Only
 ***************************************************************************/

/*********************************************************************
 * ATTENTION: This code is part of a library shared by multiple
 * projects.  DO NOT MAKE CHANGES TO THIS CODE WITHIN YOUR PROJECT.
 * Instead, contact the owner/maintainer of the library, currently:
 *    Robert.Rethemeyer@Sun.COM   +1-408-616-5717  (x45717)
 *    Systems Group: TVT: FrontEnd Technologies
 * The CVS source code repository for the library is at:
 *    /import/ftap-blimp1/cvs/fpsim
 * DO NOT COMMIT CHANGES TO THAT REPOSITORY: contact the maintainer.
 ********************************************************************/

/*tab length=4*/

static const char cvsid[] =
 "$Id: fpsim_bw.c,v 1.6 2006/12/07 20:52:04 bobsmail Exp $";


#include "fpsim_support.h"
#include "fpsim.h"


#ifdef __cplusplus
extern "C" {
#endif
static int dissect_double( double fpnum, fpdouble* data, int std, int* exc );
static int dissect_single( float  fpnum, fpsingle* data, int std, int* exc );
#ifdef __cplusplus
}
#endif


#define FSR_UFM 0x2000000  /*FSR.TEM underflow mask*/
#define GSR_IM 0x08000000  /*GSR interval mode bit*/
#define STDONLY 1          /*Standard mode only*/
#define RNDMODE(FSR,GSR) (int)(((GSR & GSR_IM) ? (GSR>>25) : (FSR>>30)) & 3)

/*===========================================================================*/


//-------------------------------------------------------------------
// FADDD
//-------------------------------------------------------------------
int
fpsim_faddd( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble res;
        int scr[2];
        double result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_faddd(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSUBD
//-------------------------------------------------------------------
int
fpsim_fsubd( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble res;
        int scr[2];
        double result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fsubd(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FMULD
//-------------------------------------------------------------------
int
fpsim_fmuld( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble res;
        int scr[2];
        double result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc= asm_fmuld(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FDIVD
//-------------------------------------------------------------------
int
fpsim_fdivd( const double* p_op1, const double* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpdouble res;
        int scr[2];
        double result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fdivd(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_double(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FADDS
//-------------------------------------------------------------------
int
fpsim_fadds( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle res;
        int scr[2];
        float result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fadds(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSUBS
//-------------------------------------------------------------------
int
fpsim_fsubs( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle res;
        int scr[2];
        float result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fsubs(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FMULS
//-------------------------------------------------------------------
int
fpsim_fmuls( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle res;
        int scr[2];
        float result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fmuls(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSMULD
//-------------------------------------------------------------------
int
fpsim_fsmuld( const float* p_op1, const float* p_op2, double* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int exc = asm_fsmuld(p_op1, p_op2, p_res, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDIVS
//-------------------------------------------------------------------
int
fpsim_fdivs( const float* p_op1, const float* p_op2, float* p_res,
                        uint64 p_fsr, uint64 p_gsr )
{
        fpsingle res;
        int scr[2];
        float result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fdivs(p_op1, p_op2, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSTOD
//-------------------------------------------------------------------
int
fpsim_fstod( const float* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fstod(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDTOS
//-------------------------------------------------------------------
int
fpsim_fdtos( const double* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
        fpsingle res;
        int scr[2];
        float result;

        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fdtos(p_op2, &result, rnd, scr);

        int rtype = dissect_single(result, &res, STDONLY, NULL);
        if(0==exc && fp_subnormal==rtype) //untrapped underflow?
        {
                if(p_fsr & FSR_UFM) exc = FPX_UF;
        }
        *p_res = res.fp.num;
        return exc;
}


//-------------------------------------------------------------------
// FSTOX
//-------------------------------------------------------------------
int
fpsim_fstox( const float* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fstox(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDTOX
//-------------------------------------------------------------------
int
fpsim_fdtox( const double* p_op2, uint64* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fdtox(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FSTOI
//-------------------------------------------------------------------
int
fpsim_fstoi( const float* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fstoi(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FDTOI
//-------------------------------------------------------------------
int
fpsim_fdtoi( const double* p_op2, uint* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fdtoi(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FXTOS
//-------------------------------------------------------------------
int
fpsim_fxtos( const uint64* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fxtos(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FXTOD
//-------------------------------------------------------------------
int
fpsim_fxtod( const uint64* p_op2, double* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fxtod(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FITOS
//-------------------------------------------------------------------
int
fpsim_fitos( const uint* p_op2, float* p_res, uint64 p_fsr, uint64 p_gsr )
{
        int scr[2];
        int rnd = RNDMODE(p_fsr,p_gsr);
        int exc = asm_fitos(p_op2, p_res, rnd, scr);
        return exc;
}


//-------------------------------------------------------------------
// FITOD
//-------------------------------------------------------------------
int
fpsim_fitod( const uint* p_op2, double* p_res )
{
        asm_fitod(p_op2, p_res);
        return 0;
}


//-------------------------------------------------------------------
// GSR_MASK
//-------------------------------------------------------------------

uint64 fpsim_gsr_mask(void) { return 0xFFFFFFFF0E0000FF; }


//-------------------------------------------------------------------
// UPDATE_FSR
//-------------------------------------------------------------------

/* Merges a FP exception returned by one of the instruction sim routines
   into the caller's FSR, and indicates whether the caller should
   post a trap (nonzero return value is trap-type code).
*/

int
fpsim_update_fsr( int p_exc, uint64* p_fsr )
{
        int trap;
        uint64 fsr = *p_fsr;

        // non-IEEE trap?  (unfinished or illegal)
        if(p_exc & FPX_TRAP)
        {
                trap = p_exc & 0xFF; // trap reason in bits 7:0
                if(0x10 == trap) return trap; // illegal_instr: no fsr update

                // FP_other trap: ftt in reason code
                fsr = (fsr & ~0x1C000) | (trap << 14);
                trap = 0x22;   // fp_exception_other
        }
        else // IEEE trap or completion
        {
                int taken = p_exc & (fsr>>23) & 0x1F; // mask with fsr.tem

                fsr &= ~0x1C01F; // clear old fsr.cexc and fsr.ftt

                // To Trap or Not To Trap?
                if(taken != 0)  // trap taken?
                {
                        // cancel NX bit for OF/UF trap
                        if(taken & (FPX_OF|FPX_UF))
                        {
                                taken &= ~FPX_NX;
                        }
                        fsr |= (taken | 0x04000); // set fsr.cexc only; fsr.ftt=1
                        trap = 0x21;  // IEEE_754_exception
                }
                else // no trap
                {
                        // set both cexc,aexc identically
                        fsr |= (p_exc | (p_exc<<5));
                        trap = 0;
                }
        }
        *p_fsr = fsr; // update caller's FSR
        return trap;
}


/*=======================================================================*/

/* Dissection routines:  picks apart the FP number into sign,exp,fraction
        Returns FP number class:  0=zero 1=subnorm 2=normal 3=inf 4=qnan 5=snan
*/

static int
dissect_double( double p_fpnum, fpdouble* p_data, int p_std, int* p_rc )
{
        p_data->fp.num = p_fpnum;
        p_data->sign = p_data->fp.inte >> 63;
        p_data->frac = p_data->fp.inte & 0x000FFFFFFFFFFFFF;
        p_data->exp  = (p_data->fp.inte >> 52) & 0x7FF;

        // classify the number
        int fpclass = fp_normal;
        if(0 == p_data->exp) // exponent zero?
        {
                fpclass = (0 == p_data->frac)
                                ? fp_zero               //true
                                : fp_subnormal; //false
        }
        else if(0x7FF == p_data->exp) // exponent all ones?
        {
                if(0 == p_data->frac) fpclass = fp_infinity;
                else
                {
                        fpclass = (p_data->frac & 0x0008000000000000) //frac.msb==1?
                                        ? fp_quiet              //true
                                        : fp_signaling; //false
                }
        }
        return fpclass;
}

static int
dissect_single( float p_fpnum, fpsingle* p_data, int p_std, int* p_rc )
{
        p_data->fp.num = p_fpnum;
        p_data->sign = p_data->fp.inte >> 31;
        p_data->frac = p_data->fp.inte & 0x007FFFFF;
        p_data->exp  = (p_data->fp.inte >> 23) & 0xFF;

        // classify the number
        int fpclass = fp_normal;
        if(0 == p_data->exp) // exponent zero?
        {
                fpclass = (0 == p_data->frac)
                                ? fp_zero               //true
                                : fp_subnormal; //false
        }
        else if(0xFF == p_data->exp) // exponent all ones?
        {
                if(0 == p_data->frac) fpclass = fp_infinity;
                else
                {
                        fpclass = (p_data->frac & 0x00400000) //frac.msb==1?
                                        ? fp_quiet              //true
                                        : fp_signaling; //false
                }
        }
        return fpclass;
}


//-------------------------------------------------------------------
// un-implemented ops return fp_exc_other/unimplemented:
//-------------------------------------------------------------------

int fpsim_fsqrtd(const double* a,double* b,uint64 c,uint64 d)
                        {return FPX_UIMP;}
int fpsim_fsqrts(const float* a,float* b,uint64 c,uint64 d)
                        {return FPX_UIMP;}

//-------------------------------------------------------------------
// newer ops return illegal_instruction trap if called:
//-------------------------------------------------------------------

int fpsim_fnaddd(const double* a,const double* b,double* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnadds(const float* a,const float* b,float* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnmuld(const double* a,const double* b,double* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnmuls(const float* a,const float* b,float* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fnsmuld(const float* a,const float* b,double* c,uint64 d,uint64 e)
                        {return FPX_ILL;}
int fpsim_fhaddd(const double* a,const double* b,double* c,uint64 d,uint64 e,
                        fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fhadds(const float* a,const float* b,float* c,uint64 d,uint64 e,
                        fpsim_fha_subtype f) {return FPX_ILL;}
int fpsim_fmaddd(const double* a,const double* b,const double* c,double* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fmadds(const float* a,const float* b,const float* c,float* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumaddd(const double* a,const double* b,const double* c,double* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}
int fpsim_fumadds(const float* a,const float* b,const float* c,float* d,
                                uint64 e,uint64 f,fpsim_fma_subtype g)  {return FPX_ILL;}


/*=======================================================================*/

// Function pointer structure

struct fpsim_functions  fpsim_funclist =
{
        fpsim_update_fsr, fpsim_gsr_mask,

        fpsim_faddd, fpsim_fsubd, fpsim_fmuld, fpsim_fdivd, fpsim_fsqrtd,
        fpsim_fadds, fpsim_fsubs, fpsim_fmuls, fpsim_fdivs, fpsim_fsqrts,
        fpsim_fsmuld,

        fpsim_fstod, fpsim_fdtos, fpsim_fstox, fpsim_fdtox, fpsim_fstoi,
        fpsim_fdtoi, fpsim_fxtos, fpsim_fxtod, fpsim_fitos, fpsim_fitod,

        fpsim_fnaddd, fpsim_fnadds, fpsim_fnmuld, fpsim_fnmuls, fpsim_fnsmuld,
        fpsim_fhaddd, fpsim_fhadds,
        fpsim_fmaddd, fpsim_fmadds, fpsim_fumaddd, fpsim_fumadds
};

char fpsim_fp_model[] = "FPSIM Niagara1 (BW) " __DATE__ ;