[unix-history] / sys / gnu / fpemul / reg_round.s

	.file "reg_round.S"
/*
 *  reg_round.S
 *
 * Rounding/truncation/etc for FPU basic arithmetic functions.
 *
 * This code has four possible entry points.
 * The following must be entered by a jmp intruction:
 *   FPU_round, FPU_round_sqrt, and FPU_Arith_exit.
 *
 * The _round_reg entry point is intended to be used by C code.
 * From C, call as:
 * void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w)
 *
 *
 * Copyright (C) 1992,1993,1994
 *                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,
 *                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au
 * All rights reserved.
 *
 * This copyright notice covers the redistribution and use of the
 * FPU emulator developed by W. Metzenthen. It covers only its use
 * in the 386BSD, FreeBSD and NetBSD operating systems. Any other
 * use is not permitted under this copyright.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must include information specifying
 *    that source code for the emulator is freely available and include
 *    either:
 *      a) an offer to provide the source code for a nominal distribution
 *         fee, or
 *      b) list at least two alternative methods whereby the source
 *         can be obtained, e.g. a publically accessible bulletin board
 *         and an anonymous ftp site from which the software can be
 *         downloaded.
 * 3. All advertising materials specifically mentioning features or use of
 *    this emulator must acknowledge that it was developed by W. Metzenthen.
 * 4. The name of W. Metzenthen may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
 * W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *
 * The purpose of this copyright, based upon the Berkeley copyright, is to
 * ensure that the covered software remains freely available to everyone.
 *
 * The software (with necessary differences) is also available, but under
 * the terms of the GNU copyleft, for the Linux operating system and for
 * the djgpp ms-dos extender.
 *
 * W. Metzenthen   June 1994.
 *
 *
 *     $Id: reg_round.s,v 1.2 1994/04/29 21:30:23 gclarkii Exp $
 *
 */


/*---------------------------------------------------------------------------+
 | Four entry points.                                                        |
 |                                                                           |
 | Needed by both the FPU_round and FPU_round_sqrt entry points:             |
 |  %eax:%ebx  64 bit significand                                            |
 |  %edx       32 bit extension of the significand                           |
 |  %edi       pointer to an FPU_REG for the result to be stored             |
 |  stack      calling function must have set up a C stack frame and         |
 |             pushed %esi, %edi, and %ebx                                   |
 |                                                                           |
 | Needed just for the FPU_round_sqrt entry point:                           |
 |  %cx  A control word in the same format as the FPU control word.          |
 | Otherwise, PARAM4 must give such a value.                                 |
 |                                                                           |
 |                                                                           |
 | The significand and its extension are assumed to be exact in the          |
 | following sense:                                                          |
 |   If the significand by itself is the exact result then the significand   |
 |   extension (%edx) must contain 0, otherwise the significand extension    |
 |   must be non-zero.                                                       |
 |   If the significand extension is non-zero then the significand is        |
 |   smaller than the magnitude of the correct exact result by an amount     |
 |   greater than zero and less than one ls bit of the significand.          |
 |   The significand extension is only required to have three possible       |
 |   non-zero values:                                                        |
 |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
 |                                 bit smaller than the magnitude of the     |
 |                                 true exact result.                        |
 |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
 |                                 smaller than the magnitude of the true    |
 |                                 exact result.                             |
 |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
 |                                 bit smaller than the magnitude of the     |
 |                                 true exact result.                        |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  The code in this module has become quite complex, but it should handle   |
 |  all of the FPU flags which are set at this stage of the basic arithmetic |
 |  computations.                                                            |
 |  There are a few rare cases where the results are not set identically to  |
 |  a real FPU. These require a bit more thought because at this stage the   |
 |  results of the code here appear to be more consistent...                 |
 |  This may be changed in a future version.                                 |
 +---------------------------------------------------------------------------*/


#include "fpu_asm.h"
#include "exception.h"
#include "control_w.h"

#define	LOST_DOWN	$1
#define	LOST_UP		$2
#define	DENORMAL	$1
#define	UNMASKED_UNDERFLOW $2

.data
	.align 2,0
FPU_bits_lost:
	.byte	0
FPU_denormal:
	.byte	0

.text
	.align 2,144
.globl FPU_round
.globl FPU_round_sqrt
.globl FPU_Arith_exit
.globl _round_reg

/* Entry point when called from C */
_round_reg:
	pushl	%ebp
	movl	%esp,%ebp
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM1,%edi
	movl	SIGH(%edi),%eax
	movl	SIGL(%edi),%ebx
	movl	PARAM2,%edx
	movl	PARAM3,%ecx
	jmp	FPU_round_sqrt

FPU_round:		/* Normal entry point */
	movl	PARAM4,%ecx

FPU_round_sqrt:		/* Entry point from wm_sqrt.S */

#ifdef PARANOID
/* Cannot use this here yet */
/*	orl	%eax,%eax */
/*	jns	L_entry_bugged */
#endif PARANOID

	cmpl	EXP_UNDER,EXP(%edi)
	jle	xMake_denorm			/* The number is a de-normal*/

	movb	$0,FPU_denormal			/* 0 -> not a de-normal*/

xDenorm_done:
	movb	$0,FPU_bits_lost		/*No bits yet lost in rounding*/

	movl	%ecx,%esi
	andl	CW_PC,%ecx
	cmpl	PR_64_BITS,%ecx
	je	LRound_To_64

	cmpl	PR_53_BITS,%ecx
	je	LRound_To_53

	cmpl	PR_24_BITS,%ecx
	je	LRound_To_24

#ifdef PARANOID
	jmp	L_bugged	/* There is no bug, just a bad control word */
#endif PARANOID


/* Round etc to 24 bit precision */
LRound_To_24:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_24

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_24

	cmpl	RC_UP,%ecx		/* Towards +infinity */
	je	LUp_24

	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
	je	LDown_24

#ifdef PARANOID
	jmp	L_bugged
#endif PARANOID

LUp_24:
	cmpb	SIGN_POS,SIGN(%edi)
	jne	LCheck_truncate_24	/* If negative then  up==truncate */

	jmp	LCheck_24_round_up

LDown_24:
	cmpb	SIGN_POS,SIGN(%edi)
	je	LCheck_truncate_24	/* If positive then  down==truncate */

LCheck_24_round_up:
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	orl	%ebx,%ecx
	orl	%edx,%ecx
	jnz	LDo_24_round_up
	jmp	LRe_normalise

LRound_nearest_24:
	/* Do rounding of the 24th bit if needed (nearest or even) */
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	cmpl	$0x00000080,%ecx
	jc	LCheck_truncate_24	/*less than half, no increment needed*/

	jne	LGreater_Half_24	/* greater than half, increment needed*/

	/* Possibly half, we need to check the ls bits */
	orl	%ebx,%ebx
	jnz	LGreater_Half_24	/* greater than half, increment needed*/

	orl	%edx,%edx
	jnz	LGreater_Half_24	/* greater than half, increment needed*/

	/* Exactly half, increment only if 24th bit is 1 (round to even)*/
	testl	$0x00000100,%eax
	jz	LDo_truncate_24

LGreater_Half_24:			/*Rounding: increment at the 24th bit*/
LDo_24_round_up:
	andl	$0xffffff00,%eax	/*Truncate to 24 bits*/
	xorl	%ebx,%ebx
	movb	LOST_UP,FPU_bits_lost
	addl	$0x00000100,%eax
	jmp	LCheck_Round_Overflow

LCheck_truncate_24:
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	orl	%ebx,%ecx
	orl	%edx,%ecx
	jz	LRe_normalise			/* No truncation needed*/

LDo_truncate_24:
	andl	$0xffffff00,%eax	/* Truncate to 24 bits*/
	xorl	%ebx,%ebx
	movb	LOST_DOWN,FPU_bits_lost
	jmp	LRe_normalise


/* Round etc to 53 bit precision */
LRound_To_53:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_53

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_53

	cmpl	RC_UP,%ecx		/* Towards +infinity*/
	je	LUp_53

	cmpl	RC_DOWN,%ecx		/* Towards -infinity*/
	je	LDown_53

#ifdef PARANOID
	jmp	L_bugged
#endif PARANOID

LUp_53:
	cmpb	SIGN_POS,SIGN(%edi)
	jne	LCheck_truncate_53	/* If negative then  up==truncate*/

	jmp	LCheck_53_round_up

LDown_53:
	cmpb	SIGN_POS,SIGN(%edi)
	je	LCheck_truncate_53	/* If positive then  down==truncate*/

LCheck_53_round_up:
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	orl	%edx,%ecx
	jnz	LDo_53_round_up
	jmp	LRe_normalise

LRound_nearest_53:
	/*Do rounding of the 53rd bit if needed (nearest or even)*/
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	cmpl	$0x00000400,%ecx
	jc	LCheck_truncate_53	/* less than half, no increment needed*/

	jnz	LGreater_Half_53	/* greater than half, increment needed*/

	/*Possibly half, we need to check the ls bits*/
	orl	%edx,%edx
	jnz	LGreater_Half_53	/* greater than half, increment needed*/

	/* Exactly half, increment only if 53rd bit is 1 (round to even)*/
	testl	$0x00000800,%ebx
	jz	LTruncate_53

LGreater_Half_53:			/*Rounding: increment at the 53rd bit*/
LDo_53_round_up:
	movb	LOST_UP,FPU_bits_lost
	andl	$0xfffff800,%ebx	/* Truncate to 53 bits*/
	addl	$0x00000800,%ebx
	adcl	$0,%eax
	jmp	LCheck_Round_Overflow

LCheck_truncate_53:
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	orl	%edx,%ecx
	jz	LRe_normalise

LTruncate_53:
	movb	LOST_DOWN,FPU_bits_lost
	andl	$0xfffff800,%ebx	/* Truncate to 53 bits*/
	jmp	LRe_normalise


/* Round etc to 64 bit precision*/
LRound_To_64:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_64

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_64

	cmpl	RC_UP,%ecx		/* Towards +infinity*/
	je	LUp_64

	cmpl	RC_DOWN,%ecx		/* Towards -infinity*/
	je	LDown_64

#ifdef PARANOID
	jmp	L_bugged
#endif PARANOID

LUp_64:
	cmpb	SIGN_POS,SIGN(%edi)
	jne	LCheck_truncate_64	/* If negative then  up==truncate*/

	orl	%edx,%edx
	jnz	LDo_64_round_up
	jmp	LRe_normalise

LDown_64:
	cmpb	SIGN_POS,SIGN(%edi)
	je	LCheck_truncate_64	/*If positive then  down==truncate*/

	orl	%edx,%edx
	jnz	LDo_64_round_up
	jmp	LRe_normalise

LRound_nearest_64:
	cmpl	$0x80000000,%edx
	jc	LCheck_truncate_64

	jne	LDo_64_round_up

	/* Now test for round-to-even */
	testb	$1,%ebx
	jz	LCheck_truncate_64

LDo_64_round_up:
	movb	LOST_UP,FPU_bits_lost
	addl	$1,%ebx
	adcl	$0,%eax

LCheck_Round_Overflow:
	jnc	LRe_normalise		/* Rounding done, no overflow */

	/* Overflow, adjust the result (to 1.0) */
	rcrl	$1,%eax
	rcrl	$1,%ebx
	incl	EXP(%edi)
	jmp	LRe_normalise

LCheck_truncate_64:
	orl	%edx,%edx
	jz	LRe_normalise

LTruncate_64:
	movb	LOST_DOWN,FPU_bits_lost

LRe_normalise:
	testb	$0xff,FPU_denormal
	jnz	xNormalise_result

xL_Normalised:
	cmpb	LOST_UP,FPU_bits_lost
	je	xL_precision_lost_up

	cmpb	LOST_DOWN,FPU_bits_lost
	je	xL_precision_lost_down

xL_no_precision_loss:
	cmpl	EXP_OVER,EXP(%edi)
	jge	L_overflow

	/* store the result */
	movb	TW_Valid,TAG(%edi)

xL_Store_significand:
	movl	%eax,SIGH(%edi)
	movl	%ebx,SIGL(%edi)

FPU_Arith_exit:
	popl	%ebx
	popl	%edi
	popl	%esi
	leave
	ret


/* Set the FPU status flags to represent precision loss due to*/
/* round-up.*/
xL_precision_lost_up:
	push	%eax
	call	_set_precision_flag_up
	popl	%eax
	jmp	xL_no_precision_loss

/* Set the FPU status flags to represent precision loss due to*/
/* truncation.*/
xL_precision_lost_down:
	push	%eax
	call	_set_precision_flag_down
	popl	%eax
	jmp	xL_no_precision_loss


/* The number is a denormal (which might get rounded up to a normal)
// Shift the number right the required number of bits, which will
// have to be undone later...*/
xMake_denorm:
	/* The action to be taken depends upon whether the underflow
	// exception is masked*/
	testb	CW_Underflow,%cl		/* Underflow mask.*/
	jz	xUnmasked_underflow		/* Do not make a denormal.*/

	movb	DENORMAL,FPU_denormal

	pushl	%ecx		/* Save*/
	movl	EXP(%edi),%ecx
	subl	EXP_UNDER+1,%ecx
	negl	%ecx

	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
	jnc	xDenorm_shift_more_than_63

	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
	jnc	xDenorm_shift_more_than_32

/* We got here without jumps by assuming that the most common requirement
//   is for a small de-normalising shift.
// Shift by [1..31] bits */
	addl	%ecx,EXP(%edi)
	orl	%edx,%edx	/* extension*/
	setne	%ch
	xorl	%edx,%edx
	shrd	%cl,%ebx,%edx
	shrd	%cl,%eax,%ebx
	shr	%cl,%eax
	orb	%ch,%dl
	popl	%ecx
	jmp	xDenorm_done

/* Shift by [32..63] bits*/
xDenorm_shift_more_than_32:
	addl	%ecx,EXP(%edi)
	subb	$32,%cl
	orl	%edx,%edx
	setne	%ch
	orb	%ch,%bl
	xorl	%edx,%edx
	shrd	%cl,%ebx,%edx
	shrd	%cl,%eax,%ebx
	shr	%cl,%eax
	orl	%edx,%edx		/*test these 32 bits*/
	setne	%cl
	orb	%ch,%bl
	orb	%cl,%bl
	movl	%ebx,%edx
	movl	%eax,%ebx
	xorl	%eax,%eax
	popl	%ecx
	jmp	xDenorm_done

/* Shift by [64..) bits*/
xDenorm_shift_more_than_63:
	cmpl	$64,%ecx
	jne	xDenorm_shift_more_than_64

/* Exactly 64 bit shift*/
	addl	%ecx,EXP(%edi)
	xorl	%ecx,%ecx
	orl	%edx,%edx
	setne	%cl
	orl	%ebx,%ebx
	setne	%ch
	orb	%ch,%cl
	orb	%cl,%al
	movl	%eax,%edx
	xorl	%eax,%eax
	xorl	%ebx,%ebx
	popl	%ecx
	jmp	xDenorm_done

xDenorm_shift_more_than_64:
	movl	EXP_UNDER+1,EXP(%edi)
/* This is easy, %eax must be non-zero, so..*/
	movl	$1,%edx
	xorl	%eax,%eax
	xorl	%ebx,%ebx
	popl	%ecx
	jmp	xDenorm_done


xUnmasked_underflow:
	/* Increase the exponent by the magic number*/
	addl	$(3*(1<<13)),EXP(%edi)
	movb	UNMASKED_UNDERFLOW,FPU_denormal
	jmp	xDenorm_done


/* Undo the de-normalisation.*/
xNormalise_result:
	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
	je	xSignal_underflow

/* The number must be a denormal if we got here.*/
#ifdef PARANOID
	/* But check it... just in case.*/
	cmpl	EXP_UNDER+1,EXP(%edi)
	jne	L_norm_bugged
#endif PARANOID

	orl	%eax,%eax	/* ms bits*/
	jnz	LNormalise_shift_up_to_31	/* Shift left 0 - 31 bits*/

	orl	%ebx,%ebx
	jz	L_underflow_to_zero	/* The contents are zero*/

/* Shift left 32 - 63 bits*/
	movl	%ebx,%eax
	xorl	%ebx,%ebx
	subl	$32,EXP(%edi)

LNormalise_shift_up_to_31:
	bsrl	%eax,%ecx	/* get the required shift in %ecx */
	subl	$31,%ecx
	negl	%ecx
	shld	%cl,%ebx,%eax
	shl	%cl,%ebx
	subl	%ecx,EXP(%edi)

LNormalise_shift_done:
	testb	$0xff,FPU_bits_lost	/* bits lost == underflow*/
	jz	xL_Normalised

	/* There must be a masked underflow*/
	push	%eax
	pushl	EX_Underflow
	call	_exception
	popl	%eax
	popl	%eax
	jmp	xL_Normalised


/* The operations resulted in a number too small to represent.
// Masked response.*/
L_underflow_to_zero:
	push	%eax
	call	_set_precision_flag_down
	popl	%eax

	push	%eax
	pushl	EX_Underflow
	call	_exception
	popl	%eax
	popl	%eax

	movb	TW_Zero,TAG(%edi)
	jmp	xL_Store_significand


/* The operations resulted in a number too large to represent.*/
L_overflow:
	push	%edi
	call	_arith_overflow
	pop	%edi
	jmp	FPU_Arith_exit


xSignal_underflow:
	push	%eax
	pushl	EX_Underflow
	call	EXCEPTION
	popl	%eax
	popl	%eax
	jmp	xL_Normalised


#ifdef PARANOID
/* If we ever get here then we have problems! */
L_bugged:
	pushl	EX_INTERNAL|0x201
	call	EXCEPTION
	popl	%ebx
	jmp	FPU_Arith_exit

L_norm_bugged:
	pushl	EX_INTERNAL|0x216
	call	EXCEPTION
	popl	%ebx
	jmp	FPU_Arith_exit

L_entry_bugged:
	pushl	EX_INTERNAL|0x217
	call	EXCEPTION
	popl	%ebx
	jmp	FPU_Arith_exit
#endif PARANOID
Commit	Line	Data
7c650d4e GCI	1	.file "reg_round.S"
	2	/*
	3	* reg_round.S
	4	*
	5	* Rounding/truncation/etc for FPU basic arithmetic functions.
	6	*
	7	* This code has four possible entry points.
	8	* The following must be entered by a jmp intruction:
	9	* FPU_round, FPU_round_sqrt, and FPU_Arith_exit.
	10	*
	11	* The _round_reg entry point is intended to be used by C code.
	12	* From C, call as:
	13	* void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w)
	14	*
	15	*
fb0297e9 PR	16	* Copyright (C) 1992,1993,1994
	17	* W. Metzenthen, 22 Parker St, Ormond, Vic 3163,
	18	* Australia. E-mail billm@vaxc.cc.monash.edu.au
7c650d4e GCI	19	* All rights reserved.
	20	*
	21	* This copyright notice covers the redistribution and use of the
	22	* FPU emulator developed by W. Metzenthen. It covers only its use
fb0297e9 PR	23	* in the 386BSD, FreeBSD and NetBSD operating systems. Any other
fb0297e9 PR	24	* use is not permitted under this copyright.
7c650d4e GCI	25	*
	26	* Redistribution and use in source and binary forms, with or without
	27	* modification, are permitted provided that the following conditions
	28	* are met:
	29	* 1. Redistributions of source code must retain the above copyright
	30	* notice, this list of conditions and the following disclaimer.
	31	* 2. Redistributions in binary form must include information specifying
	32	* that source code for the emulator is freely available and include
	33	* either:
	34	* a) an offer to provide the source code for a nominal distribution
	35	* fee, or
	36	* b) list at least two alternative methods whereby the source
	37	* can be obtained, e.g. a publically accessible bulletin board
	38	* and an anonymous ftp site from which the software can be
	39	* downloaded.
	40	* 3. All advertising materials specifically mentioning features or use of
	41	* this emulator must acknowledge that it was developed by W. Metzenthen.
	42	* 4. The name of W. Metzenthen may not be used to endorse or promote
	43	* products derived from this software without specific prior written
	44	* permission.
	45	*
	46	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
	47	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
	48	* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
	49	* W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	50	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	51	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	52	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	53	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	54	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	55	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	56	*
f0c19449	57	*
fb0297e9 PR	58	* The purpose of this copyright, based upon the Berkeley copyright, is to
	59	* ensure that the covered software remains freely available to everyone.
	60	*
	61	* The software (with necessary differences) is also available, but under
	62	* the terms of the GNU copyleft, for the Linux operating system and for
	63	* the djgpp ms-dos extender.
	64	*
	65	* W. Metzenthen June 1994.
	66	*
	67	*
	68	* $Id: reg_round.s,v 1.2 1994/04/29 21:30:23 gclarkii Exp $
f0c19449	69	*
7c650d4e GCI	70	*/
	71
	72
	73	/*---------------------------------------------------------------------------+
	74	\| Four entry points. \|
	75	\| \|
	76	\| Needed by both the FPU_round and FPU_round_sqrt entry points: \|
	77	\| %eax:%ebx 64 bit significand \|
	78	\| %edx 32 bit extension of the significand \|
	79	\| %edi pointer to an FPU_REG for the result to be stored \|
	80	\| stack calling function must have set up a C stack frame and \|
	81	\| pushed %esi, %edi, and %ebx \|
	82	\| \|
	83	\| Needed just for the FPU_round_sqrt entry point: \|
	84	\| %cx A control word in the same format as the FPU control word. \|
	85	\| Otherwise, PARAM4 must give such a value. \|
	86	\| \|
	87	\| \|
	88	\| The significand and its extension are assumed to be exact in the \|
	89	\| following sense: \|
	90	\| If the significand by itself is the exact result then the significand \|
	91	\| extension (%edx) must contain 0, otherwise the significand extension \|
	92	\| must be non-zero. \|
	93	\| If the significand extension is non-zero then the significand is \|
	94	\| smaller than the magnitude of the correct exact result by an amount \|
	95	\| greater than zero and less than one ls bit of the significand. \|
	96	\| The significand extension is only required to have three possible \|
	97	\| non-zero values: \|
	98	\| less than 0x80000000 <=> the significand is less than 1/2 an ls \|
	99	\| bit smaller than the magnitude of the \|
	100	\| true exact result. \|
	101	\| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit \|
	102	\| smaller than the magnitude of the true \|
	103	\| exact result. \|
	104	\| greater than 0x80000000 <=> the significand is more than 1/2 an ls \|
	105	\| bit smaller than the magnitude of the \|
	106	\| true exact result. \|
	107	\| \|
	108	+---------------------------------------------------------------------------*/
	109
	110	/*---------------------------------------------------------------------------+
	111	\| The code in this module has become quite complex, but it should handle \|
	112	\| all of the FPU flags which are set at this stage of the basic arithmetic \|
	113	\| computations. \|
	114	\| There are a few rare cases where the results are not set identically to \|
	115	\| a real FPU. These require a bit more thought because at this stage the \|
	116	\| results of the code here appear to be more consistent... \|
	117	\| This may be changed in a future version. \|
	118	+---------------------------------------------------------------------------*/
	119
	120
	121	#include "fpu_asm.h"
	122	#include "exception.h"
	123	#include "control_w.h"
	124
	125	#define LOST_DOWN $1
	126	#define LOST_UP $2
	127	#define DENORMAL $1
	128	#define UNMASKED_UNDERFLOW $2
	129
	130	.data
	131	.align 2,0
	132	FPU_bits_lost:
	133	.byte 0
134	FPU_denormal:
135	.byte 0
136
137	.text
138	.align 2,144
139	.globl FPU_round
140	.globl FPU_round_sqrt
141	.globl FPU_Arith_exit
142	.globl _round_reg
143
144	/* Entry point when called from C */
145	_round_reg:
146	pushl %ebp
147	movl %esp,%ebp
148	pushl %esi
149	pushl %edi
150	pushl %ebx
151
152	movl PARAM1,%edi
153	movl SIGH(%edi),%eax
154	movl SIGL(%edi),%ebx
155	movl PARAM2,%edx
156	movl PARAM3,%ecx
157	jmp FPU_round_sqrt
158
159	FPU_round: /* Normal entry point */
160	movl PARAM4,%ecx
161
162	FPU_round_sqrt: /* Entry point from wm_sqrt.S */
163
164	#ifdef PARANOID
165	/* Cannot use this here yet */
166	/* orl %eax,%eax */
167	/* jns L_entry_bugged */
168	#endif PARANOID
169
170	cmpl EXP_UNDER,EXP(%edi)
171	jle xMake_denorm /* The number is a de-normal*/
172
173	movb $0,FPU_denormal /* 0 -> not a de-normal*/
174
175	xDenorm_done:
176	movb $0,FPU_bits_lost /No bits yet lost in rounding/
177
178	movl %ecx,%esi
179	andl CW_PC,%ecx
180	cmpl PR_64_BITS,%ecx
181	je LRound_To_64
182
183	cmpl PR_53_BITS,%ecx
184	je LRound_To_53
185
186	cmpl PR_24_BITS,%ecx
187	je LRound_To_24
188
189	#ifdef PARANOID
190	jmp L_bugged /* There is no bug, just a bad control word */
191	#endif PARANOID
192
193
194	/* Round etc to 24 bit precision */
195	LRound_To_24:
196	movl %esi,%ecx
197	andl CW_RC,%ecx
198	cmpl RC_RND,%ecx
199	je LRound_nearest_24
200
201	cmpl RC_CHOP,%ecx
202	je LCheck_truncate_24
203
204	cmpl RC_UP,%ecx /* Towards +infinity */
205	je LUp_24
206
207	cmpl RC_DOWN,%ecx /* Towards -infinity */
208	je LDown_24
209
210	#ifdef PARANOID
211	jmp L_bugged
212	#endif PARANOID
213
214	LUp_24:
215	cmpb SIGN_POS,SIGN(%edi)
216	jne LCheck_truncate_24 /* If negative then up==truncate */
217
218	jmp LCheck_24_round_up
219
220	LDown_24:
221	cmpb SIGN_POS,SIGN(%edi)
222	je LCheck_truncate_24 /* If positive then down==truncate */
223
224	LCheck_24_round_up:
225	movl %eax,%ecx
226	andl $0x000000ff,%ecx
227	orl %ebx,%ecx
228	orl %edx,%ecx
229	jnz LDo_24_round_up
230	jmp LRe_normalise
231
232	LRound_nearest_24:
233	/* Do rounding of the 24th bit if needed (nearest or even) */
234	movl %eax,%ecx
235	andl $0x000000ff,%ecx
236	cmpl $0x00000080,%ecx
237	jc LCheck_truncate_24 /less than half, no increment needed/
238
239	jne LGreater_Half_24 /* greater than half, increment needed*/
240
241	/* Possibly half, we need to check the ls bits */
242	orl %ebx,%ebx
243	jnz LGreater_Half_24 /* greater than half, increment needed*/
244
245	orl %edx,%edx
246	jnz LGreater_Half_24 /* greater than half, increment needed*/
247
248	/* Exactly half, increment only if 24th bit is 1 (round to even)*/
249	testl $0x00000100,%eax
250	jz LDo_truncate_24
251
252	LGreater_Half_24: /Rounding: increment at the 24th bit/
253	LDo_24_round_up:
254	andl $0xffffff00,%eax /Truncate to 24 bits/
255	xorl %ebx,%ebx
256	movb LOST_UP,FPU_bits_lost
257	addl $0x00000100,%eax
258	jmp LCheck_Round_Overflow
259
260	LCheck_truncate_24:
261	movl %eax,%ecx
262	andl $0x000000ff,%ecx
263	orl %ebx,%ecx
264	orl %edx,%ecx
265	jz LRe_normalise /* No truncation needed*/
266
267	LDo_truncate_24:
268	andl $0xffffff00,%eax /* Truncate to 24 bits*/
269	xorl %ebx,%ebx
270	movb LOST_DOWN,FPU_bits_lost
271	jmp LRe_normalise
272
273
274	/* Round etc to 53 bit precision */
275	LRound_To_53:
276	movl %esi,%ecx
277	andl CW_RC,%ecx
278	cmpl RC_RND,%ecx
279	je LRound_nearest_53
280
281	cmpl RC_CHOP,%ecx
282	je LCheck_truncate_53
283
284	cmpl RC_UP,%ecx /* Towards +infinity*/
285	je LUp_53
286
287	cmpl RC_DOWN,%ecx /* Towards -infinity*/
288	je LDown_53
289
290	#ifdef PARANOID
291	jmp L_bugged
292	#endif PARANOID
293
294	LUp_53:
295	cmpb SIGN_POS,SIGN(%edi)
296	jne LCheck_truncate_53 /* If negative then up==truncate*/
297
298	jmp LCheck_53_round_up
299
300	LDown_53:
301	cmpb SIGN_POS,SIGN(%edi)
302	je LCheck_truncate_53 /* If positive then down==truncate*/
303
304	LCheck_53_round_up:
305	movl %ebx,%ecx
306	andl $0x000007ff,%ecx
307	orl %edx,%ecx
308	jnz LDo_53_round_up
309	jmp LRe_normalise
310
311	LRound_nearest_53:
312	/Do rounding of the 53rd bit if needed (nearest or even)/
313	movl %ebx,%ecx
314	andl $0x000007ff,%ecx
315	cmpl $0x00000400,%ecx
316	jc LCheck_truncate_53 /* less than half, no increment needed*/
317
318	jnz LGreater_Half_53 /* greater than half, increment needed*/
319
320	/Possibly half, we need to check the ls bits/
321	orl %edx,%edx
322	jnz LGreater_Half_53 /* greater than half, increment needed*/
323
324	/* Exactly half, increment only if 53rd bit is 1 (round to even)*/
325	testl $0x00000800,%ebx
326	jz LTruncate_53
327
328	LGreater_Half_53: /Rounding: increment at the 53rd bit/
329	LDo_53_round_up:
330	movb LOST_UP,FPU_bits_lost
331	andl $0xfffff800,%ebx /* Truncate to 53 bits*/
332	addl $0x00000800,%ebx
333	adcl $0,%eax
334	jmp LCheck_Round_Overflow
335
336	LCheck_truncate_53:
337	movl %ebx,%ecx
338	andl $0x000007ff,%ecx
339	orl %edx,%ecx
340	jz LRe_normalise
341
342	LTruncate_53:
343	movb LOST_DOWN,FPU_bits_lost
344	andl $0xfffff800,%ebx /* Truncate to 53 bits*/
345	jmp LRe_normalise
346
347
348	/* Round etc to 64 bit precision*/
349	LRound_To_64:
350	movl %esi,%ecx
351	andl CW_RC,%ecx
352	cmpl RC_RND,%ecx
353	je LRound_nearest_64
354
355	cmpl RC_CHOP,%ecx
356	je LCheck_truncate_64
357
358	cmpl RC_UP,%ecx /* Towards +infinity*/
359	je LUp_64
360
361	cmpl RC_DOWN,%ecx /* Towards -infinity*/
362	je LDown_64
363
364	#ifdef PARANOID
365	jmp L_bugged
366	#endif PARANOID
367
368	LUp_64:
369	cmpb SIGN_POS,SIGN(%edi)
370	jne LCheck_truncate_64 /* If negative then up==truncate*/
371
372	orl %edx,%edx
373	jnz LDo_64_round_up
374	jmp LRe_normalise
375
376	LDown_64:
377	cmpb SIGN_POS,SIGN(%edi)
378	je LCheck_truncate_64 /If positive then down==truncate/
379
380	orl %edx,%edx
381	jnz LDo_64_round_up
382	jmp LRe_normalise
383
384	LRound_nearest_64:
385	cmpl $0x80000000,%edx
386	jc LCheck_truncate_64
387
388	jne LDo_64_round_up
389
390	/* Now test for round-to-even */
391	testb $1,%ebx
392	jz LCheck_truncate_64
393
394	LDo_64_round_up:
395	movb LOST_UP,FPU_bits_lost
396	addl $1,%ebx
397	adcl $0,%eax
398
399	LCheck_Round_Overflow:
400	jnc LRe_normalise /* Rounding done, no overflow */
401
402	/* Overflow, adjust the result (to 1.0) */
403	rcrl $1,%eax
404	rcrl $1,%ebx
405	incl EXP(%edi)
406	jmp LRe_normalise
407
408	LCheck_truncate_64:
409	orl %edx,%edx
410	jz LRe_normalise
411
412	LTruncate_64:
413	movb LOST_DOWN,FPU_bits_lost
414
415	LRe_normalise:
416	testb $0xff,FPU_denormal
417	jnz xNormalise_result
418
419	xL_Normalised:
420	cmpb LOST_UP,FPU_bits_lost
421	je xL_precision_lost_up
422
423	cmpb LOST_DOWN,FPU_bits_lost
424	je xL_precision_lost_down
425
426	xL_no_precision_loss:
427	cmpl EXP_OVER,EXP(%edi)
428	jge L_overflow
429
430	/* store the result */
431	movb TW_Valid,TAG(%edi)
432
433	xL_Store_significand:
434	movl %eax,SIGH(%edi)
435	movl %ebx,SIGL(%edi)
436
437	FPU_Arith_exit:
438	popl %ebx
439	popl %edi
440	popl %esi
441	leave
442	ret
443
444
445	/* Set the FPU status flags to represent precision loss due to*/
446	/* round-up.*/
447	xL_precision_lost_up:
448	push %eax
449	call _set_precision_flag_up
450	popl %eax
451	jmp xL_no_precision_loss
452
453	/* Set the FPU status flags to represent precision loss due to*/
454	/* truncation.*/
455	xL_precision_lost_down:
456	push %eax
457	call _set_precision_flag_down
458	popl %eax
459	jmp xL_no_precision_loss
460
461
462	/* The number is a denormal (which might get rounded up to a normal)
463	// Shift the number right the required number of bits, which will
464	// have to be undone later...*/
465	xMake_denorm:
466	/* The action to be taken depends upon whether the underflow
467	// exception is masked*/
468	testb CW_Underflow,%cl /* Underflow mask.*/
469	jz xUnmasked_underflow /* Do not make a denormal.*/
470
471	movb DENORMAL,FPU_denormal
472
473	pushl %ecx /* Save*/
474	movl EXP(%edi),%ecx
475	subl EXP_UNDER+1,%ecx
476	negl %ecx
477
478	cmpl $64,%ecx /* shrd only works for 0..31 bits */
479	jnc xDenorm_shift_more_than_63
480
481	cmpl $32,%ecx /* shrd only works for 0..31 bits */
482	jnc xDenorm_shift_more_than_32
483
484	/* We got here without jumps by assuming that the most common requirement
485	// is for a small de-normalising shift.
486	// Shift by [1..31] bits */
487	addl %ecx,EXP(%edi)
488	orl %edx,%edx /* extension*/
489	setne %ch
490	xorl %edx,%edx
491	shrd %cl,%ebx,%edx
492	shrd %cl,%eax,%ebx
493	shr %cl,%eax
494	orb %ch,%dl
495	popl %ecx
496	jmp xDenorm_done
497
498	/* Shift by [32..63] bits*/
499	xDenorm_shift_more_than_32:
500	addl %ecx,EXP(%edi)
501	subb $32,%cl
502	orl %edx,%edx
503	setne %ch
504	orb %ch,%bl
505	xorl %edx,%edx
506	shrd %cl,%ebx,%edx
507	shrd %cl,%eax,%ebx
508	shr %cl,%eax
509	orl %edx,%edx /test these 32 bits/
510	setne %cl
511	orb %ch,%bl
512	orb %cl,%bl
513	movl %ebx,%edx
514	movl %eax,%ebx
515	xorl %eax,%eax
516	popl %ecx
517	jmp xDenorm_done
518
519	/* Shift by [64..) bits*/
520	xDenorm_shift_more_than_63:
521	cmpl $64,%ecx
522	jne xDenorm_shift_more_than_64
523
524	/* Exactly 64 bit shift*/
525	addl %ecx,EXP(%edi)
526	xorl %ecx,%ecx
527	orl %edx,%edx
528	setne %cl
529	orl %ebx,%ebx
530	setne %ch
531	orb %ch,%cl
532	orb %cl,%al
533	movl %eax,%edx
534	xorl %eax,%eax
535	xorl %ebx,%ebx
536	popl %ecx
537	jmp xDenorm_done
538
539	xDenorm_shift_more_than_64:
540	movl EXP_UNDER+1,EXP(%edi)
541	/* This is easy, %eax must be non-zero, so..*/
542	movl $1,%edx
543	xorl %eax,%eax
544	xorl %ebx,%ebx
545	popl %ecx
546	jmp xDenorm_done
547
548
549	xUnmasked_underflow:
550	/* Increase the exponent by the magic number*/
551	addl $(3*(1<<13)),EXP(%edi)
552	movb UNMASKED_UNDERFLOW,FPU_denormal
553	jmp xDenorm_done
554
555
556	/* Undo the de-normalisation.*/
557	xNormalise_result:
558	cmpb UNMASKED_UNDERFLOW,FPU_denormal
559	je xSignal_underflow
560
561	/* The number must be a denormal if we got here.*/
562	#ifdef PARANOID
563	/* But check it... just in case.*/
564	cmpl EXP_UNDER+1,EXP(%edi)
565	jne L_norm_bugged
566	#endif PARANOID
567
568	orl %eax,%eax /* ms bits*/
569	jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits*/
570
571	orl %ebx,%ebx
572	jz L_underflow_to_zero /* The contents are zero*/
573
574	/* Shift left 32 - 63 bits*/
575	movl %ebx,%eax
576	xorl %ebx,%ebx
577	subl $32,EXP(%edi)
578
579	LNormalise_shift_up_to_31:
580	bsrl %eax,%ecx /* get the required shift in %ecx */
581	subl $31,%ecx
582	negl %ecx
583	shld %cl,%ebx,%eax
584	shl %cl,%ebx
585	subl %ecx,EXP(%edi)
586
587	LNormalise_shift_done:
588	testb $0xff,FPU_bits_lost /* bits lost == underflow*/
589	jz xL_Normalised
590
591	/* There must be a masked underflow*/
592	push %eax
593	pushl EX_Underflow
594	call _exception
595	popl %eax
596	popl %eax
597	jmp xL_Normalised
598
599
600	/* The operations resulted in a number too small to represent.
601	// Masked response.*/
602	L_underflow_to_zero:
603	push %eax
604	call _set_precision_flag_down
605	popl %eax
606
607	push %eax
608	pushl EX_Underflow
609	call _exception
610	popl %eax
611	popl %eax
612
613	movb TW_Zero,TAG(%edi)
614	jmp xL_Store_significand
615
616
617	/* The operations resulted in a number too large to represent.*/
618	L_overflow:
619	push %edi
620	call _arith_overflow
621	pop %edi
622	jmp FPU_Arith_exit
623
624
625	xSignal_underflow:
626	push %eax
627	pushl EX_Underflow
628	call EXCEPTION
629	popl %eax
630	popl %eax
631	jmp xL_Normalised
632
633
634	#ifdef PARANOID
635	/* If we ever get here then we have problems! */
636	L_bugged:
637	pushl EX_INTERNAL\|0x201
638	call EXCEPTION
639	popl %ebx
640	jmp FPU_Arith_exit
641
642	L_norm_bugged:
643	pushl EX_INTERNAL\|0x216
644	call EXCEPTION
645	popl %ebx
646	jmp FPU_Arith_exit
647
648	L_entry_bugged:
649	pushl EX_INTERNAL\|0x217
650	call EXCEPTION
651	popl %ebx
652	jmp FPU_Arith_exit
653	#endif PARANOID