From 42d414703b3ab6f26b923db2b6357664eadf5b94 Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Sun, 23 May 1993 00:00:00 +0000 Subject: [PATCH] Major rewrite of npx code (npx-0.5 + 2 patches to it) This is version 0.5 of Bruce Evans wonderful npx code. He supplied 2 more patches to me after shipping me the initial 0.5 release, they have been applied to this code. I have also added a small change to the npx.c file that causes the probe information to report the correct number of registers. There were also some small changes to the original diffs by Bruce to make this install on top of a 0.2.3 patch kit source tree. It includes fixes that now allow this code to work on 386+387 machines! Date: Wed Apr 21 06:41:47 PDT 1993 AUTHOR: Bruce Evans (???) 386BSD-Patchkit: patch00154 --- usr/src/sys.386bsd/i386/i386/genassym.c | 10 + usr/src/sys.386bsd/i386/i386/locore.s | 155 ++++-- usr/src/sys.386bsd/i386/i386/machdep.c | 5 +- usr/src/sys.386bsd/i386/i386/vm_machdep.c | 21 +- usr/src/sys.386bsd/i386/include/npx.h | 59 +- usr/src/sys.386bsd/i386/include/pcb.h | 10 + usr/src/sys.386bsd/i386/include/specialreg.h | 34 +- usr/src/sys.386bsd/i386/isa/icu.s | 8 +- usr/src/sys.386bsd/i386/isa/npx.c | 558 +++++++++++++++---- 9 files changed, 688 insertions(+), 172 deletions(-) diff --git a/usr/src/sys.386bsd/i386/i386/genassym.c b/usr/src/sys.386bsd/i386/i386/genassym.c index 2f27c3d0bf..18ec37b289 100644 --- a/usr/src/sys.386bsd/i386/i386/genassym.c +++ b/usr/src/sys.386bsd/i386/i386/genassym.c @@ -34,6 +34,14 @@ * SUCH DAMAGE. * * @(#)genassym.c 5.11 (Berkeley) 5/10/91 + * + * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE + * -------------------- ----- ---------------------- + * CURRENT PATCH LEVEL: 1 00154 + * -------------------- ----- ---------------------- + * + * 24 Apr 93 Bruce Evans/Dave Rivers Npx-0.5 support + * */ static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/genassym.c,v 1.2 92/01/21 14:22:02 william Exp $"; @@ -146,9 +154,11 @@ main() printf("#define\tRU_MINFLT %d\n", &rup->ru_minflt); printf("#define\tPCB_FLAGS %d\n", &pcb->pcb_flags); printf("#define\tPCB_SAVEFPU %d\n", &pcb->pcb_savefpu); +#ifdef notused printf("#define\tFP_WASUSED %d\n", FP_WASUSED); printf("#define\tFP_NEEDSSAVE %d\n", FP_NEEDSSAVE); printf("#define\tFP_NEEDSRESTORE %d\n", FP_NEEDSRESTORE); +#endif printf("#define\tFP_USESEMC %d\n", FP_USESEMC); printf("#define\tPCB_SAVEEMC %d\n", &pcb->pcb_saveemc); printf("#define\tPCB_CMAP2 %d\n", &pcb->pcb_cmap2); diff --git a/usr/src/sys.386bsd/i386/i386/locore.s b/usr/src/sys.386bsd/i386/i386/locore.s index b6a159b00a..e875760469 100644 --- a/usr/src/sys.386bsd/i386/i386/locore.s +++ b/usr/src/sys.386bsd/i386/i386/locore.s @@ -37,13 +37,14 @@ * * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 3 00117 + * CURRENT PATCH LEVEL: 4 00154 * -------------------- ----- ---------------------- * * 06 Aug 92 Pace Willisson Allow VGA memory to be mapped * 28 Nov 92 Frank MacLachlan Aligned addresses and data * on 32bit boundaries. * 25 Mar 93 Kevin Lahey Add syscall counter for vmstat + * 20 Apr 93 Bruce Evans New npx-0.5 code */ @@ -61,6 +62,10 @@ #include "machine/trap.h" +#include "machine/specialreg.h" + +#define KDSEL 0x10 + /* * Note: This version greatly munged to avoid various assembler errors * that may be fixed in newer versions of gas. Perhaps newer versions @@ -1204,20 +1209,16 @@ ENTRY(swtch) movl %edi, PCB_EDI(%ecx) #ifdef NPX - movb PCB_FLAGS(%ecx),%al /* have we used fp, and need a save? */ - andb $ FP_WASUSED|FP_NEEDSSAVE,%al - cmpb $ FP_WASUSED|FP_NEEDSSAVE,%al + mov _curproc,%eax + cmp %eax,_npxproc jne 1f - movl %cr0,%eax /* insure fp is enabled */ - andb $0xfb,%al - movl %eax,%cr0 - fnsave PCB_SAVEFPU(%ecx) - orb $4,%al /* disable it */ - movl %eax,%cr0 - movb PCB_FLAGS(%ecx),%al - xorb $ FP_NEEDSSAVE,%al /* save processed */ - movb %al,PCB_FLAGS(%ecx) + pushl %ecx /* h/w bugs make saving complicated */ + leal PCB_SAVEFPU(%ecx),%eax + pushl %eax + call _npxsave /* do it in a big C function */ + popl %eax + popl %ecx 1: #endif @@ -1288,15 +1289,6 @@ swfnd: movl PCB_EIP(%edx), %eax movl %eax, (%esp) -#ifdef NPX - movb PCB_FLAGS(%edx),%al - /* if fp could be used, a dna trap will do a restore */ - testb $ FP_WASUSED,%al - je 1f - orb $ FP_NEEDSRESTORE,PCB_FLAGS(%edx) -1: -#endif - movl PCB_CMAP2(%edx),%eax # get temporary map movl %eax,_CMAP2 # reload temporary map PTE @@ -1350,18 +1342,45 @@ ENTRY(savectx) movl %ebp, PCB_EBP(%ecx) movl %esi, PCB_ESI(%ecx) movl %edi, PCB_EDI(%ecx) + #ifdef NPX - /* have we ever used fp, and need to save? */ - testb $ FP_WASUSED, PCB_FLAGS(%ecx) + /* + * If npxproc == NULL, then the npx h/w state is irrelevant and the + * state had better already be in the pcb. This is true for forks + * but not for dumps (the old book-keeping with FP flags in the pcb + * always lost for dumps because the dump pcb has 0 flags). + * + * If npxproc != NULL, then we have to save the npx h/w state to + * npxproc's pcb and copy it to the requested pcb, or save to the + * requested pcb and reload. Copying is easier because we would + * have to handle h/w bugs for reloading. We used to lose the + * parent's npx state for forks by forgetting to reload. + */ + mov _npxproc,%eax + testl %eax,%eax je 1f - movl %cr0, %edx - andb $0xfb, %dl - movl %edx, %cr0 - fnsave PCB_SAVEFPU(%ecx) - orb $4, %edx - movl %edx, %cr0 + + pushl %ecx + movl P_ADDR(%eax),%eax + leal PCB_SAVEFPU(%eax),%eax + pushl %eax + pushl %eax + call _npxsave + popl %eax + popl %eax + popl %ecx + + pushl %ecx + pushl $108+8*2 /* XXX h/w state size + padding */ + leal PCB_SAVEFPU(%ecx),%ecx + pushl %ecx + pushl %eax + call _bcopy + addl $12,%esp + popl %ecx 1: #endif + movl _CMAP2, %edx # save temporary map PTE movl %edx, PCB_CMAP2(%ecx) # in our context @@ -1496,7 +1515,31 @@ IDTVEC(page) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) +#ifdef NPX + /* + * Handle like an interrupt so that we can call npxintr to clear the + * error. It would be better to handle npx interrupts as traps but + * this is difficult for nested interrupts. + */ + pushl $0 /* dummy error code */ + pushl $T_ASTFLT + pushal + nop /* silly, the bug is for popal and it only + * bites when the next instruction has a + * complicated address mode */ + pushl %ds + pushl %es /* now the stack frame is a trap frame */ + movl $KDSEL,%eax + movl %ax,%ds + movl %ax,%es + pushl _cpl + pushl $0 /* dummy unit to finish building intr frame */ + incl _cnt+V_TRAP + call _npxintr + jmp doreti +#else pushl $0; TRAP(T_ARITHTRAP) +#endif /* 17 - 31 reserved for future exp */ IDTVEC(rsvd0) pushl $0; TRAP(17) @@ -1542,13 +1585,14 @@ alltraps: calltrap: incl _cnt+V_TRAP call _trap - call _spl0 - pop %es - pop %ds - popal - nop - addl $8,%esp # pop type, code - iret + /* + * Return through doreti to handle ASTs. Have to change trap frame + * to interrupt frame. + */ + movl $T_ASTFLT,4+4+32(%esp) /* new trap type (err code not used) */ + pushl _cpl + pushl $0 /* dummy unit */ + jmp doreti #ifdef KGDB /* @@ -1581,20 +1625,37 @@ IDTVEC(syscall) pushfl # only for stupid carry bit and more stupid wait3 cc kludge pushal # only need eax,ecx,edx - trap resaves others nop - # movw $KDSEL,%ax - movw $0x10,%ax # switch to kernel segments - movw %ax,%ds - movw %ax,%es + movl $KDSEL,%eax # switch to kernel segments + movl %ax,%ds + movl %ax,%es incl _cnt+V_SYSCALL # kml 3/25/93 call _syscall - call _spl0 - movw __udatasel,%ax # switch back to user segments - movw %ax,%ds - movw %ax,%es + /* + * Return through doreti to handle ASTs. Have to change syscall frame + * to interrupt frame. + * + * XXX - we should have set up the frame earlier to avoid the + * following popal/pushal (not much can be done to avoid shuffling + * the flags). Consistent frames would simplify things all over. + */ + movl 32+0(%esp),%eax /* old flags, shuffle to above cs:eip */ + movl 32+4(%esp),%ebx /* `int' frame should have been ef, eip, cs */ + movl 32+8(%esp),%ecx + movl %ebx,32+0(%esp) + movl %ecx,32+4(%esp) + movl %eax,32+8(%esp) popal nop - popfl - lret + pushl $0 /* dummy error code */ + pushl $T_ASTFLT + pushal + nop + movl __udatasel,%eax /* switch back to user segments */ + push %eax /* XXX - better to preserve originals? */ + push %eax + pushl _cpl + pushl $0 + jmp doreti ALIGN32 ENTRY(htonl) diff --git a/usr/src/sys.386bsd/i386/i386/machdep.c b/usr/src/sys.386bsd/i386/i386/machdep.c index 6abd6b2879..26e989b1f0 100644 --- a/usr/src/sys.386bsd/i386/i386/machdep.c +++ b/usr/src/sys.386bsd/i386/i386/machdep.c @@ -38,7 +38,7 @@ * * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 3 00113 + * CURRENT PATCH LEVEL: 4 00154 * -------------------- ----- ---------------------- * * 15 Aug 92 William Jolitz Large memory bug @@ -46,6 +46,7 @@ * 25 Mar 93 Sean Eric Fagan Added #ifdef HZ around microtime for * the new microtime.s routine * 08 Apr 93 Andrew Herbert Fixes for kmem_alloc panics + * 20 Apr 93 Bruce Evans New npx-0.5 code */ static char rcsid[] = "$Header: /usr/src/sys.386bsd/i386/i386/RCS/machdep.c,v 1.2 92/01/21 14:22:09 william Exp Locker: root $"; @@ -630,7 +631,7 @@ setregs(p, entry) p->p_regs[sEIP] = entry; p->p_addr->u_pcb.pcb_flags = 0; /* no fp at all */ - load_cr0(rcr0() | CR0_EM); /* start emulating */ + load_cr0(rcr0() | CR0_TS); /* start emulating */ #ifdef NPX npxinit(__INITIAL_NPXCW__); #endif diff --git a/usr/src/sys.386bsd/i386/i386/vm_machdep.c b/usr/src/sys.386bsd/i386/i386/vm_machdep.c index 8322733306..27ef912e85 100644 --- a/usr/src/sys.386bsd/i386/i386/vm_machdep.c +++ b/usr/src/sys.386bsd/i386/i386/vm_machdep.c @@ -36,6 +36,14 @@ * SUCH DAMAGE. * * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 + * + * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE + * -------------------- ----- ---------------------- + * CURRENT PATCH LEVEL: 1 00154 + * -------------------- ----- ---------------------- + * + * 20 Apr 93 Bruce Evans New npx-0.5 code + * */ /* @@ -115,8 +123,6 @@ cpu_fork(p1, p2) return (0); } -extern struct proc *npxproc; - #ifdef notyet /* * cpu_exit is called as the last action during exit. @@ -138,8 +144,9 @@ cpu_exit(p) { static struct pcb nullpcb; /* pcb to overwrite on last swtch */ - /* free cporcessor (if we have it) */ - if( p == npxproc) npxproc =0; +#ifdef NPX + npxexit(p); +#endif /* move to inactive space and stack, passing arg accross */ p = swtch_to_inactive(p); @@ -158,9 +165,9 @@ cpu_exit(p) register struct proc *p; { - /* free coprocessor (if we have it) */ - if( p == npxproc) npxproc =0; - +#ifdef NPX + npxexit(p); +#endif splclock(); swtch(); } diff --git a/usr/src/sys.386bsd/i386/include/npx.h b/usr/src/sys.386bsd/i386/include/npx.h index ad710fc9cf..134e0c12ae 100644 --- a/usr/src/sys.386bsd/i386/include/npx.h +++ b/usr/src/sys.386bsd/i386/include/npx.h @@ -34,6 +34,14 @@ * SUCH DAMAGE. * * @(#)npx.h 5.3 (Berkeley) 1/18/91 + * + * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE + * -------------------- ----- ---------------------- + * CURRENT PATCH LEVEL: 1 00154 + * -------------------- ----- ---------------------- + * + * 20 Apr 93 Bruce Evans New npx-0.5 code + * */ /* @@ -58,16 +66,25 @@ struct env87 { /* Contents of each floating point accumulator */ struct fpacc87 { +#ifdef dontdef /* too unportable */ u_long fp_mantlo; /* mantissa low (31:0) */ u_long fp_manthi; /* mantissa high (63:32) */ int fp_exp:15; /* exponent */ int fp_sgn:1; /* mantissa sign */ +#else + u_char fp_bytes[10]; +#endif }; /* Floating point context */ struct save87 { struct env87 sv_env; /* floating point control/status */ struct fpacc87 sv_ac[8]; /* accumulator contents, 0-7 */ +#ifndef dontdef + u_long sv_ex_sw; /* status word for last exception (was pad) */ + u_long sv_ex_tw; /* tag word for last exception (was pad) */ + u_char sv_pad[8 * 2 - 2 * 4]; /* bogus historical padding */ +#endif }; /* Cyrix EMC memory - mapped coprocessor context switch information */ @@ -77,15 +94,53 @@ struct emcsts { long em_dl; /* memory mapped D low register when swtched */ }; -/* Intel prefer's long real (53 bit) precision */ +/* Intel prefers long real (53 bit) precision */ #define __iBCS_NPXCW__ 0x262 -/* wfj prefer's temporary real (64 bit) precision */ +/* wfj prefers temporary real (64 bit) precision */ #define __386BSD_NPXCW__ 0x362 +/* + * bde prefers 53 bit precision and all exceptions masked. + * + * The standard control word from finit is 0x37F, giving: + * + * round to nearest + * 64-bit precision + * all exceptions masked. + * + * Now I want: + * + * affine mode for 287's (if they work at all) (1 in bitfield 1<<12) + * 53-bit precision (2 in bitfield 3<<8) + * overflow exception unmasked (0 in bitfield 1<<3) + * zero divide exception unmasked (0 in bitfield 1<<2) + * invalid-operand exception unmasked (0 in bitfield 1<<0). + * + * 64-bit precision often gives bad results with high level languages + * because it makes the results of calculations depend on whether + * intermediate values are stored in memory or in FPU registers. + * + * The "Intel" and wfj control words have: + * + * underflow exception unmasked (0 in bitfield 1<<4) + * + * but that causes an unexpected exception in the test program 'paranoia' + * and makes denormals useless (DBL_MIN / 2 underflows). It doesn't make + * a lot of sense to trap underflow without trapping denormals. + * + * Later I will want the IEEE default of all exceptions masked. See the + * 0.0 math manpage for why this is better. The 0.1 math manpage is empty. + */ +#define __BDE_NPXCW__ 0x1272 +#define __BETTER_BDE_NPXCW__ 0x127f +#ifdef __BROKEN_NPXCW__ #ifdef __386BSD__ #define __INITIAL_NPXCW__ __386BSD_NPXCW__ #else #define __INITIAL_NPXCW__ __iBCS_NPXCW__ #endif +#else +#define __INITIAL_NPXCW__ __BDE_NPXCW__ +#endif #endif ___NPX87___ diff --git a/usr/src/sys.386bsd/i386/include/pcb.h b/usr/src/sys.386bsd/i386/include/pcb.h index 29fa36de5f..92bd810ca0 100644 --- a/usr/src/sys.386bsd/i386/include/pcb.h +++ b/usr/src/sys.386bsd/i386/include/pcb.h @@ -34,6 +34,14 @@ * SUCH DAMAGE. * * @(#)pcb.h 5.10 (Berkeley) 5/12/91 + * + * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE + * -------------------- ----- ---------------------- + * CURRENT PATCH LEVEL: 1 00154 + * -------------------- ----- ---------------------- + * + * 20 Apr 93 Bruce Evans New npx-0.5 code + * */ /* @@ -60,9 +68,11 @@ struct pcb { * Software pcb (extension) */ int pcb_flags; +#ifdef notused #define FP_WASUSED 0x01 /* process has used fltng pnt hardware */ #define FP_NEEDSSAVE 0x02 /* ... that needs save on next context switch */ #define FP_NEEDSRESTORE 0x04 /* ... that needs restore on next DNA fault */ +#endif #define FP_USESEMC 0x08 /* process uses EMC memory-mapped mode */ #define FM_TRAP 0x10 /* process entered kernel on a trap frame */ #define FP_SOFTFP 0x20 /* process using software fltng pnt emulator */ diff --git a/usr/src/sys.386bsd/i386/include/specialreg.h b/usr/src/sys.386bsd/i386/include/specialreg.h index eb1a5b48c5..d1908c9371 100644 --- a/usr/src/sys.386bsd/i386/include/specialreg.h +++ b/usr/src/sys.386bsd/i386/include/specialreg.h @@ -31,15 +31,37 @@ * SUCH DAMAGE. * * @(#)specialreg.h 7.1 (Berkeley) 5/9/91 + * + * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE + * -------------------- ----- ---------------------- + * CURRENT PATCH LEVEL: 1 00154 + * -------------------- ----- ---------------------- + * + * 20 Apr 93 Bruce Evans New npx-0.5 code + * */ /* - * 386 Special registers: + * Bits in 386 special registers: */ #define CR0_PE 0x00000001 /* Protected mode Enable */ -#define CR0_MP 0x00000002 /* "Math" Present (e.g. npx), wait for it */ -#define CR0_EM 0x00000004 /* EMulate NPX, e.g. trap, don't execute code */ -#define CR0_TS 0x00000008 /* Process has done Task Switch, do NPX save */ -#define CR0_ET 0x00000010 /* 32 bit (if set) vs 16 bit (387 vs 287) */ -#define CR0_PG 0x80000000 /* Paging Enable */ +#define CR0_MP 0x00000002 /* "Math" Present (NPX or NPX emulator) */ +#ifdef notused +#define CR0_EM 0x00000004 /* EMulate non-NPX coproc. (trap ESC only) */ +#endif +#define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */ +#ifdef notused +#define CR0_ET 0x00000010 /* Extension Type (387 (if set) vs 287) */ +#endif +#define CR0_PG 0x80000000 /* PaGing enable */ + +/* + * Bits in 486 special registers: + */ + +#define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */ +#define CR0_WP 0x00010000 /* Write Protect (honor ~PG_W in all modes) */ +#ifdef notyet +#define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */ +#endif diff --git a/usr/src/sys.386bsd/i386/isa/icu.s b/usr/src/sys.386bsd/i386/isa/icu.s index 0a96854bfb..12f93b4a56 100644 --- a/usr/src/sys.386bsd/i386/isa/icu.s +++ b/usr/src/sys.386bsd/i386/isa/icu.s @@ -38,13 +38,14 @@ * * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 2 00117 + * CURRENT PATCH LEVEL: 3 00154 * -------------------- ----- ---------------------- * * 28 Nov 92 Frank MacLachlan Aligned addresses and data * on 32bit boundaries. * 24 Mar 93 Rodney W. Grimes Added interrupt counters for vmstat * also stray and false intr counters added + * 20 Apr 93 Bruce Evans New npx-0.5 code */ /* @@ -119,7 +120,7 @@ doreti: ALIGN32 1: cmpl $0,_netisr # check for softint s/traps jne 1f - cmpl $0,_want_resched + cmpl $0,_astpending jne 1f pop %es # none, going back to base pri @@ -181,8 +182,9 @@ doreti: 1: cmpw $0x1f,13*4(%esp) # to user? jne 2f # nope, leave - cmpl $0,_want_resched + cmpl $0,_astpending je 2f + movl $0,_astpending call _trap 2: pop %es diff --git a/usr/src/sys.386bsd/i386/isa/npx.c b/usr/src/sys.386bsd/i386/isa/npx.c index 9023383acd..73392fabfc 100644 --- a/usr/src/sys.386bsd/i386/isa/npx.c +++ b/usr/src/sys.386bsd/i386/isa/npx.c @@ -32,8 +32,21 @@ * SUCH DAMAGE. * * @(#)npx.c 7.2 (Berkeley) 5/12/91 + * + * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE + * -------------------- ----- ---------------------- + * CURRENT PATCH LEVEL: 1 00154 + * -------------------- ----- ---------------------- + * + * 20 Apr 93 Bruce Evans New npx-0.5 code + * 23 May 93 Rodney W. Grimes Return a special value of -1 from + * the probe code to keep isa_config from + * printing out the I/O address when we + * are using trap 16 handling. + * */ static char rcsid[] = "$Header: /usr/bill/working/sys/i386/isa/RCS/npx.c,v 1.2 92/01/21 14:34:27 william Exp $"; + #include "npx.h" #if NNPX > 0 @@ -47,170 +60,505 @@ static char rcsid[] = "$Header: /usr/bill/working/sys/i386/isa/RCS/npx.c,v 1.2 9 #include "machine/trap.h" #include "ioctl.h" #include "machine/specialreg.h" +#include "i386/isa/icu.h" #include "i386/isa/isa_device.h" -#include "icu.h" +#include "i386/isa/isa.h" + /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ -int npxprobe(), npxattach(), npxintr(); +#ifdef __GNUC__ + +#define disable_intr() __asm("cli") +#define enable_intr() __asm("sti") +#define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) +#define fnclex() __asm("fnclex") +#define fninit() __asm("fninit") +#define fnsave(addr) __asm("fnsave %0" : "=m" (*addr) : "0" (*addr)) +#define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr) : "0" (*addr)) +#define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr) : "0" (*addr)) +#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait") +#define frstor(addr) __asm("frstor %0" : : "m" (*addr)) +#define fwait() __asm("fwait") +#define read_eflags() ({u_long ef; \ + __asm("pushf; popl %0" : "=a" (ef)); \ + ef; }) +#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ + : : "n" (CR0_TS) : "ax") +#define stop_emulating() __asm("clts") +#define write_eflags(ef) __asm("pushl %0; popf" : : "a" ((u_long) ef)) + +#else /* not __GNUC__ */ + +void disable_intr __P((void)); +void enable_intr __P((void)); +void fldcw __P((caddr_t addr)); +void fnclex __P((void)); +void fninit __P((void)); +void fnsave __P((caddr_t addr)); +void fnstcw __P((caddr_t addr)); +void fnstsw __P((caddr_t addr)); +void fp_divide_by_0 __P((void)); +void frstor __P((caddr_t addr)); +void fwait __P((void)); +u_long read_eflags __P((void)); +void start_emulating __P((void)); +void stop_emulating __P((void)); +void write_eflags __P((u_long ef)); + +#endif /* __GNUC__ */ + +typedef u_char bool_t; + +extern struct gate_descriptor idt[]; + +int npxdna __P((void)); +void npxexit __P((struct proc *p)); +void npxinit __P((u_int control)); +void npxintr __P((struct intrframe frame)); +void npxsave __P((struct save87 *addr)); +static int npxattach __P((struct isa_device *dvp)); +static int npxprobe __P((struct isa_device *dvp)); +static int npxprobe1 __P((struct isa_device *dvp)); + struct isa_driver npxdriver = { npxprobe, npxattach, "npx", }; -struct proc *npxproc; /* process who owns device, otherwise zero */ -struct pcb *npxpcb; /* owners context structure */ -int npxexists; -extern long npx0mask; +u_int npx0mask; +struct proc *npxproc; + +static bool_t npx_ex16; +static bool_t npx_exists; +static struct gate_descriptor npx_idt_probeintr; +static int npx_intrno; +static volatile u_int npx_intrs_while_probing; +static bool_t npx_irq13; +static volatile u_int npx_traps_while_probing; + +/* + * Special interrupt handlers. Someday intr0-intr15 will be used to count + * interrupts. We'll still need a special exception 16 handler. The busy + * latch stuff in probintr() can be moved to npxprobe(). + */ +void probeintr(void); +asm +(" + .text +_probeintr: + ss + incl _npx_intrs_while_probing + pushl %eax + movb $0x20,%al /* EOI (asm in strings loses cpp features) */ + outb %al,$0xa0 /* IO_ICU2 */ + outb %al,$0x20 /* IO_ICU1 */ + movb $0,%al + outb %al,$0xf0 /* clear BUSY# latch */ + popl %eax + iret +"); + +void probetrap(void); +asm +(" + .text +_probetrap: + ss + incl _npx_traps_while_probing + fnclex + iret +"); /* - * Probe routine - look device, otherwise set emulator bit + * Probe routine. Initialize cr0 to give correct behaviour for [f]wait + * whether the device exists or not (XXX should be elsewhere). Set flags + * to tell npxattach() what to do. Modify device struct if npx doesn't + * need to use interrupts. Return 1 if device exists. */ +static int npxprobe(dvp) struct isa_device *dvp; -{ static status, control; +{ + int result; + u_long save_eflags; + u_char save_icu1_mask; + u_char save_icu2_mask; + struct gate_descriptor save_idt_npxintr; + struct gate_descriptor save_idt_npxtrap; + /* + * This routine is now just a wrapper for npxprobe1(), to install + * special npx interrupt and trap handlers, to enable npx interrupts + * and to disable other interrupts. Someday isa_configure() will + * install suitable handlers and run with interrupts enabled so we + * won't need to do so much here. + */ + npx_intrno = NRSVIDT + ffs(dvp->id_irq) - 1; + save_eflags = read_eflags(); + disable_intr(); + save_icu1_mask = inb(IO_ICU1 + 1); + save_icu2_mask = inb(IO_ICU2 + 1); + save_idt_npxintr = idt[npx_intrno]; + save_idt_npxtrap = idt[16]; + outb(IO_ICU1 + 1, ~(IRQ_SLAVE | dvp->id_irq)); + outb(IO_ICU2 + 1, ~(dvp->id_irq >> 8)); + setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL); + setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL); + npx_idt_probeintr = idt[npx_intrno]; + enable_intr(); + result = npxprobe1(dvp); + disable_intr(); + outb(IO_ICU1 + 1, save_icu1_mask); + outb(IO_ICU2 + 1, save_icu2_mask); + idt[npx_intrno] = save_idt_npxintr; + idt[16] = save_idt_npxtrap; + write_eflags(save_eflags); + return (result); +} +static int +npxprobe1(dvp) + struct isa_device *dvp; +{ + int control; + int status; #ifdef lint npxintr(); #endif - - /* insure EM bit off */ - load_cr0(rcr0() & ~CR0_EM); /* stop emulating */ - asm(" fninit "); /* put device in known state */ - - /* check for a proper status of zero */ - status = 0x5a5a; - asm (" fnstsw %0 " : "=m" (status) : "m" (status) ); - - if ((status&0xff) == 0) { - - /* good, now check for a proper control word */ - asm (" fnstcw %0 " : "=m" (status) : "m" (status)); - - if ((status&0x103f) == 0x3f) { - /* then we have a numeric coprocessor */ - /* XXX should force an exception here to generate an intr */ - return (1); + /* + * Partially reset the coprocessor, if any. Some BIOS's don't reset + * it after a warm boot. + */ + outb(0xf1, 0); /* full reset on some systems, NOP on others */ + outb(0xf0, 0); /* clear BUSY# latch */ + /* + * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT + * instructions. We must set the CR0_MP bit and use the CR0_TS + * bit to control the trap, because setting the CR0_EM bit does + * not cause WAIT instructions to trap. It's important to trap + * WAIT instructions - otherwise the "wait" variants of no-wait + * control instructions would degenerate to the "no-wait" variants + * after FP context switches but work correctly otherwise. It's + * particularly important to trap WAITs when there is no NPX - + * otherwise the "wait" variants would always degenerate. + * + * Try setting CR0_NE to get correct error reporting on 486DX's. + * Setting it should fail or do nothing on lesser processors. + */ + load_cr0(rcr0() | CR0_MP | CR0_NE); + /* + * But don't trap while we're probing. + */ + stop_emulating(); + /* + * Finish resetting the coprocessor, if any. If there is an error + * pending, then we may get a bogus IRQ13, but probeintr() will handle + * it OK. Bogus halts have never been observed, but we enabled + * IRQ13 and cleared the BUSY# latch early to handle them anyway. + */ + fninit(); + DELAY(1000); /* wait for any IRQ13 (fwait might hang) */ +#ifdef DIAGNOSTIC + if (npx_intrs_while_probing != 0) + printf("fninit caused %u bogus npx interrupt(s)\n", + npx_intrs_while_probing); + if (npx_traps_while_probing != 0) + printf("fninit caused %u bogus npx trap(s)\n", + npx_traps_while_probing); +#endif + /* + * Check for a status of mostly zero. + */ + status = 0x5a5a; + fnstsw(&status); + if ((status & 0xb8ff) == 0) { + /* + * Good, now check for a proper control word. + */ + control = 0x5a5a; + fnstcw(&control); + if ((control & 0x1f3f) == 0x033f) { + npx_exists = 1; + /* + * We have an npx, now divide by 0 to see if exception + * 16 works. + */ + control &= ~(1 << 2); /* enable divide by 0 trap */ + fldcw(&control); + npx_traps_while_probing = npx_intrs_while_probing = 0; + fp_divide_by_0(); + if (npx_traps_while_probing != 0) { + /* + * Good, exception 16 works. + */ + npx_ex16 = 1; + dvp->id_irq = 0; /* zap the interrupt */ + /* + * special return value to flag that we do not + * actually use any I/O registers + */ + return (-1); + } + if (npx_intrs_while_probing != 0) { + /* + * Bad, we are stuck with IRQ13. + */ + npx_irq13 = 1; + npx0mask = dvp->id_irq; /* npxattach too late */ + return (IO_NPXSIZE); + } + /* + * Worse, even IRQ13 is broken. Use emulator. + */ } } - - /* insure EM bit on */ - load_cr0(rcr0() | CR0_EM); /* start emulating */ - return (0); + /* + * Probe failed, but we want to get to npxattach to initialize the + * emulator and say that it has been installed. XXX handle devices + * that aren't really devices better. + */ + dvp->id_irq = 0; + return (IO_NPXSIZE); } /* * Attach routine - announce which it is, and wire into system */ +int npxattach(dvp) struct isa_device *dvp; { - + if (npx_ex16) + printf(" "); + else if (npx_irq13) + printf(" "); + else if (npx_exists) + printf(" "); + else + printf(" <387 Emulator>"); npxinit(__INITIAL_NPXCW__); - npxexists++; - npx0mask = dvp->id_irq; + return (1); /* XXX unused */ } /* * Initialize floating point unit. */ -npxinit(control) { - static short wd; - - if (npxexists == 0) return; - - - wd = control; - wd = 0x272; - load_cr0(rcr0() & ~CR0_EM); /* stop emulating */ - asm (" fninit"); - asm(" fldcw %0" : : "g" (wd)); - if (curpcb) { - asm(" fnsave %0 " : : "g" (curpcb->pcb_savefpu) ); - curpcb->pcb_flags |= FP_NEEDSRESTORE; - } - load_cr0(rcr0() | CR0_EM); /* start emulating */ - outb(0xb1,0); /* reset processor */ -} - -/* - * Load floating point context and record ownership to suite - */ -npxload() { +void +npxinit(control) + u_int control; +{ + struct save87 dummy; - if (npxproc) panic ("npxload"); - npxproc = curproc; - npxpcb = curpcb; - asm(" frstor %0 " : : "g" (curpcb->pcb_savefpu) ); + if (!npx_exists) + return; + /* + * fninit has the same h/w bugs as fnsave. Use the detoxified + * fnsave to throw away any junk in the fpu. fnsave initializes + * the fpu and sets npxproc = NULL as important side effects. + */ + npxsave(&dummy); + stop_emulating(); + fldcw(&control); + if (curpcb != NULL) + fnsave(&curpcb->pcb_savefpu); + start_emulating(); } /* - * Unload floating point context and relinquish ownership + * Free coprocessor (if we have it). */ -npxunload() { +void +npxexit(p) + struct proc *p; +{ - if (npxproc == 0) panic ("npxunload"); - asm(" fsave %0 " : : "g" (npxpcb->pcb_savefpu) ); - npxproc = 0 ; + if (p == npxproc) { + start_emulating(); + npxproc = NULL; + } } /* - * Record information needed in processing an exception and clear status word + * Record the FPU state and reinitialize it all except for the control word. + * Then generate a SIGFPE. + * + * Reinitializing the state allows naive SIGFPE handlers to longjmp without + * doing any fixups. + * + * XXX there is currently no way to pass the full error state to signal + * handlers, and if this is a nested interrupt there is no way to pass even + * a status code! So there is no way to have a non-naive SIGFPE handler. At + * best a handler could do an fninit followed by an fldcw of a static value. + * fnclex would be of little use because it would leave junk on the FPU stack. + * Returning from the handler would be even less safe than usual because + * IRQ13 exception handling makes exceptions even less precise than usual. */ -npxintr(frame) struct intrframe frame; { +void +npxintr(frame) + struct intrframe frame; +{ int code; -static status; - - outb(0xf0,0); /* reset processor */ -/*pg("npxintr");*/ - asm (" fnstsw %0 " : "=m" (status) : "m" (status) ); - /* sync state in process context structure, in advance of debugger/process looking for it */ - if (npxproc == 0 || npxexists == 0) panic ("npxintr"); - asm (" fnsave %0 " : : "g" (npxpcb->pcb_savefpu) ); + if (npxproc == NULL || !npx_exists) { + /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */ + printf("npxintr: npxproc = %lx, curproc = %lx, npx_exists = %d\n", + (u_long) npxproc, (u_long) curproc, npx_exists); + panic("npxintr from nowhere"); + } + if (npxproc != curproc) { + printf("npxintr: npxproc = %lx, curproc = %lx, npx_exists = %d\n", + (u_long) npxproc, (u_long) curproc, npx_exists); + panic("npxintr from non-current process"); + } + /* + * Save state. This does an implied fninit. It had better not halt + * the cpu or we'll hang. + */ + outb(0xf0, 0); + fnsave(&curpcb->pcb_savefpu); + fwait(); + /* + * Restore control word (was clobbered by fnsave). + */ + fldcw(&curpcb->pcb_savefpu.sv_env.en_cw); + fwait(); + /* + * Remember the exception status word and tag word. The current + * (almost fninit'ed) fpu state is in the fpu and the exception + * state just saved will soon be junk. However, the implied fninit + * doesn't change the error pointers or register contents, and we + * preserved the control word and will copy the status and tag + * words, so the complete exception state can be recovered. + */ + curpcb->pcb_savefpu.sv_ex_sw = curpcb->pcb_savefpu.sv_env.en_sw; + curpcb->pcb_savefpu.sv_ex_tw = curpcb->pcb_savefpu.sv_env.en_tw; + /* + * Pass exception to process. + */ + if (ISPL(frame.if_cs) == SEL_UPL) { + /* + * Interrupt is essentially a trap, so we can afford to call + * the SIGFPE handler (if any) as soon as the interrupt + * returns. + * + * XXX little or nothing is gained from this, and plenty is + * lost - the interrupt frame has to contain the trap frame + * (this is otherwise only necessary for the rescheduling trap + * in doreti, and the frame for that could easily be set up + * just before it is used). + */ + curproc->p_regs = (int *)&frame.if_es; + curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ #ifdef notyet - /* encode the appropriate code for detailed information on this exception */ - code = ???; + /* + * Encode the appropriate code for detailed information on + * this exception. + */ + code = XXX_ENCODE(curpcb->pcb_savefpu.sv_ex_sw); #else - code = 0; /* XXX */ + code = 0; /* XXX */ #endif - -/*if((pg("status %x", status) & 0x7f) == 't') {*/ - /* pass exception to process, which may not be the current one */ - if (npxproc == curproc) { - /* Q: what if in an interrupt, or in trap processing? */ - if (ISPL(frame.if_cs) == SEL_UPL) { - curproc->p_regs = (int *)&frame.if_es; - curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ - } /* else printf("*");*/ trapsignal(curproc, SIGFPE, code); - curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ + curpcb->pcb_flags &= ~FM_TRAP; } else { - /* printf("P");*/ + /* + * Nested interrupt. These losers occur when: + * o an IRQ13 is bogusly generated at a bogus time, e.g.: + * o immediately after an fnsave or frstor of an + * error state. + * o a couple of 386 instructions after + * "fstpl _memvar" causes a stack overflow. + * These are especially nasty when combined with a + * trace trap. + * o an IRQ13 occurs at the same time as another higher- + * priority interrupt. + * + * Treat them like a true async interrupt. + */ psignal(npxproc, SIGFPE); } -/*}*/ - - /* clear the exception so we can catch others like it */ - asm (" fnclex"); } /* * Implement device not available (DNA) exception + * + * It would be better to switch FP context here (only). This would require + * saving the state in the proc table instead of in the pcb. */ -npxdna() { -/*pg("npxdna");*/ - - - if (npxexists == 0) return(0); - load_cr0(rcr0() & ~CR0_EM); /* stop emulating */ - if (curpcb->pcb_flags & FP_NEEDSRESTORE) - asm(" frstor %0 " : : "g" (curpcb->pcb_savefpu)); - curpcb->pcb_flags |= FP_WASUSED | FP_NEEDSSAVE; - curpcb->pcb_flags &= ~FP_NEEDSRESTORE; +int +npxdna() +{ + if (!npx_exists) + return (0); + if (npxproc != NULL) { + printf("npxdna: npxproc = %lx, curproc = %lx\n", + (u_long) npxproc, (u_long) curproc); + panic("npxdna"); + } + stop_emulating(); + /* + * Record new context early in case frstor causes an IRQ13. + */ npxproc = curproc; - npxpcb = curpcb; + /* + * The following frstor may cause an IRQ13 when the state being + * restored has a pending error. The error will appear to have been + * triggered by the current (npx) user instruction even when that + * instruction is a no-wait instruction that should not trigger an + * error (e.g., fnclex). On at least one 486 system all of the + * no-wait instructions are broken the same as frstor, so our + * treatment does not amplify the breakage. On at least one + * 386/Cyrix 387 system, fnclex works correctly while frstor and + * fnsave are broken, so our treatment breaks fnclex if it is the + * first FPU instruction after a context switch. + */ + frstor(&curpcb->pcb_savefpu); + return (1); } -#endif + +/* + * Wrapper for fnsave instruction to handle h/w bugs. If there is an error + * pending, then fnsave generates a bogus IRQ13 on some systems. Force + * any IRQ13 to be handled immediately, and then ignore it. This routine is + * often called at splhigh so it must not use many system services. In + * particular, it's much easier to install a special handler than to + * guarantee that it's safe to use npxintr() and its supporting code. + */ +void +npxsave(addr) + struct save87 *addr; +{ + u_char icu1_mask; + u_char icu2_mask; + u_char old_icu1_mask; + u_char old_icu2_mask; + struct gate_descriptor save_idt_npxintr; + + disable_intr(); + old_icu1_mask = inb(IO_ICU1 + 1); + old_icu2_mask = inb(IO_ICU2 + 1); + save_idt_npxintr = idt[npx_intrno]; + outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0mask)); + outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0mask >> 8)); + idt[npx_intrno] = npx_idt_probeintr; + enable_intr(); + stop_emulating(); + fnsave(addr); + fwait(); + start_emulating(); + npxproc = NULL; + disable_intr(); + icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ + icu2_mask = inb(IO_ICU2 + 1); + outb(IO_ICU1 + 1, + (icu1_mask & ~npx0mask) | (old_icu1_mask & npx0mask)); + outb(IO_ICU2 + 1, + (icu2_mask & ~(npx0mask >> 8)) + | (old_icu2_mask & (npx0mask >> 8))); + idt[npx_intrno] = save_idt_npxintr; + enable_intr(); /* back to usual state */ +} + +#endif /* NNPX > 0 */ -- 2.20.1