Copyright changes per the author.
[unix-history] / sys / gnu / fpemul / reg_round.s
CommitLineData
7c650d4e
GCI
1 .file "reg_round.S"
2/*
3 * reg_round.S
4 *
5 * Rounding/truncation/etc for FPU basic arithmetic functions.
6 *
7 * This code has four possible entry points.
8 * The following must be entered by a jmp intruction:
9 * FPU_round, FPU_round_sqrt, and FPU_Arith_exit.
10 *
11 * The _round_reg entry point is intended to be used by C code.
12 * From C, call as:
13 * void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w)
14 *
15 *
fb0297e9
PR
16 * Copyright (C) 1992,1993,1994
17 * W. Metzenthen, 22 Parker St, Ormond, Vic 3163,
18 * Australia. E-mail billm@vaxc.cc.monash.edu.au
7c650d4e
GCI
19 * All rights reserved.
20 *
21 * This copyright notice covers the redistribution and use of the
22 * FPU emulator developed by W. Metzenthen. It covers only its use
fb0297e9
PR
23 * in the 386BSD, FreeBSD and NetBSD operating systems. Any other
24 * use is not permitted under this copyright.
7c650d4e
GCI
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must include information specifying
32 * that source code for the emulator is freely available and include
33 * either:
34 * a) an offer to provide the source code for a nominal distribution
35 * fee, or
36 * b) list at least two alternative methods whereby the source
37 * can be obtained, e.g. a publically accessible bulletin board
38 * and an anonymous ftp site from which the software can be
39 * downloaded.
40 * 3. All advertising materials specifically mentioning features or use of
41 * this emulator must acknowledge that it was developed by W. Metzenthen.
42 * 4. The name of W. Metzenthen may not be used to endorse or promote
43 * products derived from this software without specific prior written
44 * permission.
45 *
46 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
47 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
48 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
49 * W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
52 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
53 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
54 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
55 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 *
f0c19449 57 *
fb0297e9
PR
58 * The purpose of this copyright, based upon the Berkeley copyright, is to
59 * ensure that the covered software remains freely available to everyone.
60 *
61 * The software (with necessary differences) is also available, but under
62 * the terms of the GNU copyleft, for the Linux operating system and for
63 * the djgpp ms-dos extender.
64 *
65 * W. Metzenthen June 1994.
66 *
67 *
68 * $Id: reg_round.s,v 1.2 1994/04/29 21:30:23 gclarkii Exp $
f0c19449 69 *
7c650d4e
GCI
70 */
71
72
73/*---------------------------------------------------------------------------+
74 | Four entry points. |
75 | |
76 | Needed by both the FPU_round and FPU_round_sqrt entry points: |
77 | %eax:%ebx 64 bit significand |
78 | %edx 32 bit extension of the significand |
79 | %edi pointer to an FPU_REG for the result to be stored |
80 | stack calling function must have set up a C stack frame and |
81 | pushed %esi, %edi, and %ebx |
82 | |
83 | Needed just for the FPU_round_sqrt entry point: |
84 | %cx A control word in the same format as the FPU control word. |
85 | Otherwise, PARAM4 must give such a value. |
86 | |
87 | |
88 | The significand and its extension are assumed to be exact in the |
89 | following sense: |
90 | If the significand by itself is the exact result then the significand |
91 | extension (%edx) must contain 0, otherwise the significand extension |
92 | must be non-zero. |
93 | If the significand extension is non-zero then the significand is |
94 | smaller than the magnitude of the correct exact result by an amount |
95 | greater than zero and less than one ls bit of the significand. |
96 | The significand extension is only required to have three possible |
97 | non-zero values: |
98 | less than 0x80000000 <=> the significand is less than 1/2 an ls |
99 | bit smaller than the magnitude of the |
100 | true exact result. |
101 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
102 | smaller than the magnitude of the true |
103 | exact result. |
104 | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
105 | bit smaller than the magnitude of the |
106 | true exact result. |
107 | |
108 +---------------------------------------------------------------------------*/
109
110/*---------------------------------------------------------------------------+
111 | The code in this module has become quite complex, but it should handle |
112 | all of the FPU flags which are set at this stage of the basic arithmetic |
113 | computations. |
114 | There are a few rare cases where the results are not set identically to |
115 | a real FPU. These require a bit more thought because at this stage the |
116 | results of the code here appear to be more consistent... |
117 | This may be changed in a future version. |
118 +---------------------------------------------------------------------------*/
119
120
121#include "fpu_asm.h"
122#include "exception.h"
123#include "control_w.h"
124
125#define LOST_DOWN $1
126#define LOST_UP $2
127#define DENORMAL $1
128#define UNMASKED_UNDERFLOW $2
129
130.data
131 .align 2,0
132FPU_bits_lost:
133 .byte 0
134FPU_denormal:
135 .byte 0
136
137.text
138 .align 2,144
139.globl FPU_round
140.globl FPU_round_sqrt
141.globl FPU_Arith_exit
142.globl _round_reg
143
144/* Entry point when called from C */
145_round_reg:
146 pushl %ebp
147 movl %esp,%ebp
148 pushl %esi
149 pushl %edi
150 pushl %ebx
151
152 movl PARAM1,%edi
153 movl SIGH(%edi),%eax
154 movl SIGL(%edi),%ebx
155 movl PARAM2,%edx
156 movl PARAM3,%ecx
157 jmp FPU_round_sqrt
158
159FPU_round: /* Normal entry point */
160 movl PARAM4,%ecx
161
162FPU_round_sqrt: /* Entry point from wm_sqrt.S */
163
164#ifdef PARANOID
165/* Cannot use this here yet */
166/* orl %eax,%eax */
167/* jns L_entry_bugged */
168#endif PARANOID
169
170 cmpl EXP_UNDER,EXP(%edi)
171 jle xMake_denorm /* The number is a de-normal*/
172
173 movb $0,FPU_denormal /* 0 -> not a de-normal*/
174
175xDenorm_done:
176 movb $0,FPU_bits_lost /*No bits yet lost in rounding*/
177
178 movl %ecx,%esi
179 andl CW_PC,%ecx
180 cmpl PR_64_BITS,%ecx
181 je LRound_To_64
182
183 cmpl PR_53_BITS,%ecx
184 je LRound_To_53
185
186 cmpl PR_24_BITS,%ecx
187 je LRound_To_24
188
189#ifdef PARANOID
190 jmp L_bugged /* There is no bug, just a bad control word */
191#endif PARANOID
192
193
194/* Round etc to 24 bit precision */
195LRound_To_24:
196 movl %esi,%ecx
197 andl CW_RC,%ecx
198 cmpl RC_RND,%ecx
199 je LRound_nearest_24
200
201 cmpl RC_CHOP,%ecx
202 je LCheck_truncate_24
203
204 cmpl RC_UP,%ecx /* Towards +infinity */
205 je LUp_24
206
207 cmpl RC_DOWN,%ecx /* Towards -infinity */
208 je LDown_24
209
210#ifdef PARANOID
211 jmp L_bugged
212#endif PARANOID
213
214LUp_24:
215 cmpb SIGN_POS,SIGN(%edi)
216 jne LCheck_truncate_24 /* If negative then up==truncate */
217
218 jmp LCheck_24_round_up
219
220LDown_24:
221 cmpb SIGN_POS,SIGN(%edi)
222 je LCheck_truncate_24 /* If positive then down==truncate */
223
224LCheck_24_round_up:
225 movl %eax,%ecx
226 andl $0x000000ff,%ecx
227 orl %ebx,%ecx
228 orl %edx,%ecx
229 jnz LDo_24_round_up
230 jmp LRe_normalise
231
232LRound_nearest_24:
233 /* Do rounding of the 24th bit if needed (nearest or even) */
234 movl %eax,%ecx
235 andl $0x000000ff,%ecx
236 cmpl $0x00000080,%ecx
237 jc LCheck_truncate_24 /*less than half, no increment needed*/
238
239 jne LGreater_Half_24 /* greater than half, increment needed*/
240
241 /* Possibly half, we need to check the ls bits */
242 orl %ebx,%ebx
243 jnz LGreater_Half_24 /* greater than half, increment needed*/
244
245 orl %edx,%edx
246 jnz LGreater_Half_24 /* greater than half, increment needed*/
247
248 /* Exactly half, increment only if 24th bit is 1 (round to even)*/
249 testl $0x00000100,%eax
250 jz LDo_truncate_24
251
252LGreater_Half_24: /*Rounding: increment at the 24th bit*/
253LDo_24_round_up:
254 andl $0xffffff00,%eax /*Truncate to 24 bits*/
255 xorl %ebx,%ebx
256 movb LOST_UP,FPU_bits_lost
257 addl $0x00000100,%eax
258 jmp LCheck_Round_Overflow
259
260LCheck_truncate_24:
261 movl %eax,%ecx
262 andl $0x000000ff,%ecx
263 orl %ebx,%ecx
264 orl %edx,%ecx
265 jz LRe_normalise /* No truncation needed*/
266
267LDo_truncate_24:
268 andl $0xffffff00,%eax /* Truncate to 24 bits*/
269 xorl %ebx,%ebx
270 movb LOST_DOWN,FPU_bits_lost
271 jmp LRe_normalise
272
273
274/* Round etc to 53 bit precision */
275LRound_To_53:
276 movl %esi,%ecx
277 andl CW_RC,%ecx
278 cmpl RC_RND,%ecx
279 je LRound_nearest_53
280
281 cmpl RC_CHOP,%ecx
282 je LCheck_truncate_53
283
284 cmpl RC_UP,%ecx /* Towards +infinity*/
285 je LUp_53
286
287 cmpl RC_DOWN,%ecx /* Towards -infinity*/
288 je LDown_53
289
290#ifdef PARANOID
291 jmp L_bugged
292#endif PARANOID
293
294LUp_53:
295 cmpb SIGN_POS,SIGN(%edi)
296 jne LCheck_truncate_53 /* If negative then up==truncate*/
297
298 jmp LCheck_53_round_up
299
300LDown_53:
301 cmpb SIGN_POS,SIGN(%edi)
302 je LCheck_truncate_53 /* If positive then down==truncate*/
303
304LCheck_53_round_up:
305 movl %ebx,%ecx
306 andl $0x000007ff,%ecx
307 orl %edx,%ecx
308 jnz LDo_53_round_up
309 jmp LRe_normalise
310
311LRound_nearest_53:
312 /*Do rounding of the 53rd bit if needed (nearest or even)*/
313 movl %ebx,%ecx
314 andl $0x000007ff,%ecx
315 cmpl $0x00000400,%ecx
316 jc LCheck_truncate_53 /* less than half, no increment needed*/
317
318 jnz LGreater_Half_53 /* greater than half, increment needed*/
319
320 /*Possibly half, we need to check the ls bits*/
321 orl %edx,%edx
322 jnz LGreater_Half_53 /* greater than half, increment needed*/
323
324 /* Exactly half, increment only if 53rd bit is 1 (round to even)*/
325 testl $0x00000800,%ebx
326 jz LTruncate_53
327
328LGreater_Half_53: /*Rounding: increment at the 53rd bit*/
329LDo_53_round_up:
330 movb LOST_UP,FPU_bits_lost
331 andl $0xfffff800,%ebx /* Truncate to 53 bits*/
332 addl $0x00000800,%ebx
333 adcl $0,%eax
334 jmp LCheck_Round_Overflow
335
336LCheck_truncate_53:
337 movl %ebx,%ecx
338 andl $0x000007ff,%ecx
339 orl %edx,%ecx
340 jz LRe_normalise
341
342LTruncate_53:
343 movb LOST_DOWN,FPU_bits_lost
344 andl $0xfffff800,%ebx /* Truncate to 53 bits*/
345 jmp LRe_normalise
346
347
348/* Round etc to 64 bit precision*/
349LRound_To_64:
350 movl %esi,%ecx
351 andl CW_RC,%ecx
352 cmpl RC_RND,%ecx
353 je LRound_nearest_64
354
355 cmpl RC_CHOP,%ecx
356 je LCheck_truncate_64
357
358 cmpl RC_UP,%ecx /* Towards +infinity*/
359 je LUp_64
360
361 cmpl RC_DOWN,%ecx /* Towards -infinity*/
362 je LDown_64
363
364#ifdef PARANOID
365 jmp L_bugged
366#endif PARANOID
367
368LUp_64:
369 cmpb SIGN_POS,SIGN(%edi)
370 jne LCheck_truncate_64 /* If negative then up==truncate*/
371
372 orl %edx,%edx
373 jnz LDo_64_round_up
374 jmp LRe_normalise
375
376LDown_64:
377 cmpb SIGN_POS,SIGN(%edi)
378 je LCheck_truncate_64 /*If positive then down==truncate*/
379
380 orl %edx,%edx
381 jnz LDo_64_round_up
382 jmp LRe_normalise
383
384LRound_nearest_64:
385 cmpl $0x80000000,%edx
386 jc LCheck_truncate_64
387
388 jne LDo_64_round_up
389
390 /* Now test for round-to-even */
391 testb $1,%ebx
392 jz LCheck_truncate_64
393
394LDo_64_round_up:
395 movb LOST_UP,FPU_bits_lost
396 addl $1,%ebx
397 adcl $0,%eax
398
399LCheck_Round_Overflow:
400 jnc LRe_normalise /* Rounding done, no overflow */
401
402 /* Overflow, adjust the result (to 1.0) */
403 rcrl $1,%eax
404 rcrl $1,%ebx
405 incl EXP(%edi)
406 jmp LRe_normalise
407
408LCheck_truncate_64:
409 orl %edx,%edx
410 jz LRe_normalise
411
412LTruncate_64:
413 movb LOST_DOWN,FPU_bits_lost
414
415LRe_normalise:
416 testb $0xff,FPU_denormal
417 jnz xNormalise_result
418
419xL_Normalised:
420 cmpb LOST_UP,FPU_bits_lost
421 je xL_precision_lost_up
422
423 cmpb LOST_DOWN,FPU_bits_lost
424 je xL_precision_lost_down
425
426xL_no_precision_loss:
427 cmpl EXP_OVER,EXP(%edi)
428 jge L_overflow
429
430 /* store the result */
431 movb TW_Valid,TAG(%edi)
432
433xL_Store_significand:
434 movl %eax,SIGH(%edi)
435 movl %ebx,SIGL(%edi)
436
437FPU_Arith_exit:
438 popl %ebx
439 popl %edi
440 popl %esi
441 leave
442 ret
443
444
445/* Set the FPU status flags to represent precision loss due to*/
446/* round-up.*/
447xL_precision_lost_up:
448 push %eax
449 call _set_precision_flag_up
450 popl %eax
451 jmp xL_no_precision_loss
452
453/* Set the FPU status flags to represent precision loss due to*/
454/* truncation.*/
455xL_precision_lost_down:
456 push %eax
457 call _set_precision_flag_down
458 popl %eax
459 jmp xL_no_precision_loss
460
461
462/* The number is a denormal (which might get rounded up to a normal)
463// Shift the number right the required number of bits, which will
464// have to be undone later...*/
465xMake_denorm:
466 /* The action to be taken depends upon whether the underflow
467 // exception is masked*/
468 testb CW_Underflow,%cl /* Underflow mask.*/
469 jz xUnmasked_underflow /* Do not make a denormal.*/
470
471 movb DENORMAL,FPU_denormal
472
473 pushl %ecx /* Save*/
474 movl EXP(%edi),%ecx
475 subl EXP_UNDER+1,%ecx
476 negl %ecx
477
478 cmpl $64,%ecx /* shrd only works for 0..31 bits */
479 jnc xDenorm_shift_more_than_63
480
481 cmpl $32,%ecx /* shrd only works for 0..31 bits */
482 jnc xDenorm_shift_more_than_32
483
484/* We got here without jumps by assuming that the most common requirement
485// is for a small de-normalising shift.
486// Shift by [1..31] bits */
487 addl %ecx,EXP(%edi)
488 orl %edx,%edx /* extension*/
489 setne %ch
490 xorl %edx,%edx
491 shrd %cl,%ebx,%edx
492 shrd %cl,%eax,%ebx
493 shr %cl,%eax
494 orb %ch,%dl
495 popl %ecx
496 jmp xDenorm_done
497
498/* Shift by [32..63] bits*/
499xDenorm_shift_more_than_32:
500 addl %ecx,EXP(%edi)
501 subb $32,%cl
502 orl %edx,%edx
503 setne %ch
504 orb %ch,%bl
505 xorl %edx,%edx
506 shrd %cl,%ebx,%edx
507 shrd %cl,%eax,%ebx
508 shr %cl,%eax
509 orl %edx,%edx /*test these 32 bits*/
510 setne %cl
511 orb %ch,%bl
512 orb %cl,%bl
513 movl %ebx,%edx
514 movl %eax,%ebx
515 xorl %eax,%eax
516 popl %ecx
517 jmp xDenorm_done
518
519/* Shift by [64..) bits*/
520xDenorm_shift_more_than_63:
521 cmpl $64,%ecx
522 jne xDenorm_shift_more_than_64
523
524/* Exactly 64 bit shift*/
525 addl %ecx,EXP(%edi)
526 xorl %ecx,%ecx
527 orl %edx,%edx
528 setne %cl
529 orl %ebx,%ebx
530 setne %ch
531 orb %ch,%cl
532 orb %cl,%al
533 movl %eax,%edx
534 xorl %eax,%eax
535 xorl %ebx,%ebx
536 popl %ecx
537 jmp xDenorm_done
538
539xDenorm_shift_more_than_64:
540 movl EXP_UNDER+1,EXP(%edi)
541/* This is easy, %eax must be non-zero, so..*/
542 movl $1,%edx
543 xorl %eax,%eax
544 xorl %ebx,%ebx
545 popl %ecx
546 jmp xDenorm_done
547
548
549xUnmasked_underflow:
550 /* Increase the exponent by the magic number*/
551 addl $(3*(1<<13)),EXP(%edi)
552 movb UNMASKED_UNDERFLOW,FPU_denormal
553 jmp xDenorm_done
554
555
556/* Undo the de-normalisation.*/
557xNormalise_result:
558 cmpb UNMASKED_UNDERFLOW,FPU_denormal
559 je xSignal_underflow
560
561/* The number must be a denormal if we got here.*/
562#ifdef PARANOID
563 /* But check it... just in case.*/
564 cmpl EXP_UNDER+1,EXP(%edi)
565 jne L_norm_bugged
566#endif PARANOID
567
568 orl %eax,%eax /* ms bits*/
569 jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits*/
570
571 orl %ebx,%ebx
572 jz L_underflow_to_zero /* The contents are zero*/
573
574/* Shift left 32 - 63 bits*/
575 movl %ebx,%eax
576 xorl %ebx,%ebx
577 subl $32,EXP(%edi)
578
579LNormalise_shift_up_to_31:
580 bsrl %eax,%ecx /* get the required shift in %ecx */
581 subl $31,%ecx
582 negl %ecx
583 shld %cl,%ebx,%eax
584 shl %cl,%ebx
585 subl %ecx,EXP(%edi)
586
587LNormalise_shift_done:
588 testb $0xff,FPU_bits_lost /* bits lost == underflow*/
589 jz xL_Normalised
590
591 /* There must be a masked underflow*/
592 push %eax
593 pushl EX_Underflow
594 call _exception
595 popl %eax
596 popl %eax
597 jmp xL_Normalised
598
599
600/* The operations resulted in a number too small to represent.
601// Masked response.*/
602L_underflow_to_zero:
603 push %eax
604 call _set_precision_flag_down
605 popl %eax
606
607 push %eax
608 pushl EX_Underflow
609 call _exception
610 popl %eax
611 popl %eax
612
613 movb TW_Zero,TAG(%edi)
614 jmp xL_Store_significand
615
616
617/* The operations resulted in a number too large to represent.*/
618L_overflow:
619 push %edi
620 call _arith_overflow
621 pop %edi
622 jmp FPU_Arith_exit
623
624
625xSignal_underflow:
626 push %eax
627 pushl EX_Underflow
628 call EXCEPTION
629 popl %eax
630 popl %eax
631 jmp xL_Normalised
632
633
634#ifdef PARANOID
635/* If we ever get here then we have problems! */
636L_bugged:
637 pushl EX_INTERNAL|0x201
638 call EXCEPTION
639 popl %ebx
640 jmp FPU_Arith_exit
641
642L_norm_bugged:
643 pushl EX_INTERNAL|0x216
644 call EXCEPTION
645 popl %ebx
646 jmp FPU_Arith_exit
647
648L_entry_bugged:
649 pushl EX_INTERNAL|0x217
650 call EXCEPTION
651 popl %ebx
652 jmp FPU_Arith_exit
653#endif PARANOID