Badblock reporting and timeout fixes for wd.c, interm hacks.
authorBrett Lymn <blymn@mulga.awadi.com.AU>
Mon, 17 May 1993 00:00:00 +0000 (00:00 +0000)
committerBrett Lymn <blymn@mulga.awadi.com.AU>
Mon, 17 May 1993 00:00:00 +0000 (00:00 +0000)
Right now, if the user executes dd if=/dev/rwd0c of=/dev/null
bs=conv,noerror, the kernel will not report any bad blocks it encounters.
The version of bad144 by Jim Bevier will not work without this patch.

Looking at the affected code, it seems that the wd driver should only
be turning off kernel error messages on partition d (WDRAW is 3), but that is
not the case.  My guess is that the kernel is calling wdopen on partition d
even if one is trying to dd from wd0a.  Actually, can you think of any valid
cases where the DKFL_QUIET flag should be set?  It seems to me that the user
would always want to know when an umapped bad block is encountered.  Because
checking the DKFL_QUIET flag does take some cycles, we might as well remove it
complete from wd.c if we think we would always want error messages enabled.  If
you think that is a good idea, then I can produce another patch to do that.

Terry
Date: Tue, 6 Apr 1993 13:05:03 -0500

The Problem:
        On my 486/25 with a DTC controller and Seagate Wren ESDI disk
I would sometimes experience a system lockup where the computer would
just sit there with the disk controller led on all the time.  This
lock up would occur usually when I was doing some disk intensive
activity like making /usr/src or committing things to CVS.

The Fix:
        In wd.c there is a function called wdstart that has three
while loops that loop waiting for the controller/disk to come ready.
In my case the disk sometimes would never become ready so the system
would hang in wdstart.  I have inserted timeout code into all the
while loops in wdstart so that if the controller or disk are busy for
too long then the controller is reset and the command is restarted.
The code is written so that you can override the default timeout by
putting the WDCTIMEOUT option in the kernel config file with the
approriate number.

Brett Lymn                              | "Hey, you can't do that here"
Date: Sat, 10 Apr 1993 16:35:34 +0930 (CST)

Additional fixes:
There where still several spin wait loops that used a hard
coded constant of 1000000, these have been replaced with WDCTIMEOUT.
I also increased WDCTIMEOUT to 10000000 per Bruce Evans as now that
the spl code is so fast it seems to be needed.

Rod Grimes
Date: Mon May 17 10:01:50 PDT 1993

AUTHOR: Terry Lee <terry@uivlsisd.csl.uiuc.edu>
AUTHOR: Brett Lymn (blymn@mulga.awadi.com.AU)
AUTHOR: Rodney W. Grimes (rgrimes@agora.rain.com)
386BSD-Patchkit: patch00155

usr/src/sys.386bsd/i386/isa/wd.c

index b63e4ea..bbec45b 100644 (file)
@@ -37,7 +37,7 @@
  *
  * PATCHES MAGIC                LEVEL   PATCH THAT GOT US HERE
  * --------------------         -----   ----------------------
  *
  * PATCHES MAGIC                LEVEL   PATCH THAT GOT US HERE
  * --------------------         -----   ----------------------
- * CURRENT PATCH LEVEL:         5       00115
+ * CURRENT PATCH LEVEL:         6       00155
  * --------------------         -----   ----------------------
  *
  * 17 Sep 92   Frank Maclachlan        Fixed I/O error reporting on raw device
  * --------------------         -----   ----------------------
  *
  * 17 Sep 92   Frank Maclachlan        Fixed I/O error reporting on raw device
  *                                     driver initialization, and cylinder
  *                                     boundary conditions.
  * 28 Mar 93   Charles Hannum          Add missing splx calls.
  *                                     driver initialization, and cylinder
  *                                     boundary conditions.
  * 28 Mar 93   Charles Hannum          Add missing splx calls.
+ * 20 Apr 93   Terry Lee               Always report disk errors
+ * 20 Apr 93   Brett Lymn              Change infinite while loops to
+ *                                     timeouts
+ * 17 May 93   Rodney W. Grimes        Fixed all 1000000 to use WDCTIMEOUT,
+ *                                     and increased to 1000000*10 for new
+ *                                     intr-0.1 code.
  */
 
 /* TODO:peel out buffer at low ipl, speed improvement */
  */
 
 /* TODO:peel out buffer at low ipl, speed improvement */
 
 #define _NWD  (NWD - 1)       /* One is for the controller XXX 31 Jul 92*/
 
 
 #define _NWD  (NWD - 1)       /* One is for the controller XXX 31 Jul 92*/
 
+#ifndef WDCTIMEOUT
+#define WDCTIMEOUT     10000000  /* arbitrary timeout for drive ready waits */
+#endif
+
 #define        RETRIES         5       /* number of retries before giving up */
 #define        MAXTRANSFER     32      /* max size of transfer in page clusters */
 
 #define        RETRIES         5       /* number of retries before giving up */
 #define        MAXTRANSFER     32      /* max size of transfer in page clusters */
 
@@ -356,7 +366,7 @@ wdstart()
        struct buf *dp;
        register struct bt_bad *bt_ptr;
        long    blknum, pagcnt, cylin, head, sector;
        struct buf *dp;
        register struct bt_bad *bt_ptr;
        long    blknum, pagcnt, cylin, head, sector;
-       long    secpertrk, secpercyl, addr, i;
+       long    secpertrk, secpercyl, addr, i, timeout;
        int     unit, s, wdc;
 
 loop:
        int     unit, s, wdc;
 
 loop:
@@ -446,14 +456,29 @@ loop:
        wdtab.b_active = 1;             /* mark controller active */
        wdc = du->dk_port;
 
        wdtab.b_active = 1;             /* mark controller active */
        wdc = du->dk_port;
 
+RETRY:
        /* if starting a multisector transfer, or doing single transfers */
        if (du->dk_skip == 0 || (du->dk_flags & DKFL_SINGLE)) {
                if (wdtab.b_errcnt && (bp->b_flags & B_READ) == 0)
                        du->dk_bc += DEV_BSIZE;
 
                /* controller idle? */
        /* if starting a multisector transfer, or doing single transfers */
        if (du->dk_skip == 0 || (du->dk_flags & DKFL_SINGLE)) {
                if (wdtab.b_errcnt && (bp->b_flags & B_READ) == 0)
                        du->dk_bc += DEV_BSIZE;
 
                /* controller idle? */
+               timeout = 0;
                while (inb(wdc+wd_status) & WDCS_BUSY)
                while (inb(wdc+wd_status) & WDCS_BUSY)
-                       ;
+               {
+                       if (++timeout > WDCTIMEOUT)
+                       {
+                               printf("wd.c: Controller busy too long!\n");
+                               /* reset the device */
+                               outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS));
+                               DELAY(1000);
+                               outb(wdc+wd_ctlr, WDCTL_IDS);
+                               DELAY(1000);
+                               (void) inb(wdc+wd_error);       /* XXX! */
+                               outb(wdc+wd_ctlr, WDCTL_4BIT);
+                               break;
+                       }
+               }
 
                /* stuff the task file */
                outb(wdc+wd_precomp, lp->d_precompcyl / 4);
 
                /* stuff the task file */
                outb(wdc+wd_precomp, lp->d_precompcyl / 4);
@@ -480,8 +505,22 @@ loop:
                outb(wdc+wd_sdh, WDSD_IBM | (unit<<4) | (head & 0xf));
 
                /* wait for drive to become ready */
                outb(wdc+wd_sdh, WDSD_IBM | (unit<<4) | (head & 0xf));
 
                /* wait for drive to become ready */
+               timeout = 0;
                while ((inb(wdc+wd_status) & WDCS_READY) == 0)
                while ((inb(wdc+wd_status) & WDCS_READY) == 0)
-                       ;
+               {
+                       if (++timeout > WDCTIMEOUT)
+                       {
+                               printf("wd.c: Drive busy too long!\n");
+                               /* reset the device */
+                               outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS));
+                               DELAY(1000);
+                               outb(wdc+wd_ctlr, WDCTL_IDS);
+                               DELAY(1000);
+                               (void) inb(wdc+wd_error);       /* XXX! */
+                               outb(wdc+wd_ctlr, WDCTL_4BIT);
+                               goto RETRY;
+                       }
+               }
 
                /* initiate command! */
 #ifdef B_FORMAT
 
                /* initiate command! */
 #ifdef B_FORMAT
@@ -501,8 +540,22 @@ loop:
        if (bp->b_flags & B_READ) return;
 
        /* ready to send data?  */
        if (bp->b_flags & B_READ) return;
 
        /* ready to send data?  */
+       timeout = 0;
        while ((inb(wdc+wd_status) & WDCS_DRQ) == 0)
        while ((inb(wdc+wd_status) & WDCS_DRQ) == 0)
-               ;
+       {
+               if (++timeout > WDCTIMEOUT)
+               {
+                       printf("wd.c: Drive not ready for too long!\n");
+                       /* reset the device */
+                       outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS));
+                       DELAY(1000);
+                       outb(wdc+wd_ctlr, WDCTL_IDS);
+                       DELAY(1000);
+                       (void) inb(wdc+wd_error);       /* XXX! */
+                       outb(wdc+wd_ctlr, WDCTL_4BIT);
+                       goto RETRY;
+               }
+       }
 
        /* then send it! */
        outsw (wdc+wd_data, addr+du->dk_skip * DEV_BSIZE,
 
        /* then send it! */
        outsw (wdc+wd_data, addr+du->dk_skip * DEV_BSIZE,
@@ -699,10 +752,7 @@ wdopen(dev_t dev, int flags, int fmt, struct proc *p)
                du->dk_dd.d_secpercyl = 17*8;
                du->dk_state = WANTOPEN;
                du->dk_unit = unit;
                du->dk_dd.d_secpercyl = 17*8;
                du->dk_state = WANTOPEN;
                du->dk_unit = unit;
-               if (part == WDRAW)
-                       du->dk_flags |= DKFL_QUIET;
-               else
-                       du->dk_flags &= ~DKFL_QUIET;
+               du->dk_flags &= ~DKFL_QUIET;
 
                /* read label using "c" partition */
                if (msg = readdisklabel(makewddev(major(dev), wdunit(dev), WDRAW),
 
                /* read label using "c" partition */
                if (msg = readdisklabel(makewddev(major(dev), wdunit(dev), WDRAW),
@@ -808,7 +858,7 @@ wdcontrol(register struct buf *bp)
                wdtab.b_active = 1;
 
                /* wait for drive and controller to become ready */
                wdtab.b_active = 1;
 
                /* wait for drive and controller to become ready */
-               for (i = 1000000; (inb(wdc+wd_status) & (WDCS_READY|WDCS_BUSY))
+               for (i = WDCTIMEOUT; (inb(wdc+wd_status) & (WDCS_READY|WDCS_BUSY))
                                  != WDCS_READY && i-- != 0; )
                        ;
                outb(wdc+wd_command, WDCC_RESTORE | WD_STEP);
                                  != WDCS_READY && i-- != 0; )
                        ;
                outb(wdc+wd_command, WDCC_RESTORE | WD_STEP);
@@ -864,7 +914,7 @@ badopen:
  */
 static int
 wdcommand(struct disk *du, int cmd) {
  */
 static int
 wdcommand(struct disk *du, int cmd) {
-       int timeout = 1000000, stat, wdc;
+       int timeout = WDCTIMEOUT, stat, wdc;
 
        /* controller ready for command? */
        wdc = du->dk_port;
 
        /* controller ready for command? */
        wdc = du->dk_port;
@@ -1281,7 +1331,7 @@ wddump(dev_t dev)                 /* dump core after a system crash */
                if (inb(wdc+wd_status) & WDCS_DRQ) return(EIO) ;
 
                /* wait for completion */
                if (inb(wdc+wd_status) & WDCS_DRQ) return(EIO) ;
 
                /* wait for completion */
-               for ( i = 1000000 ; inb(wdc+wd_status) & WDCS_BUSY ; i--) {
+               for ( i = WDCTIMEOUT ; inb(wdc+wd_status) & WDCS_BUSY ; i--) {
                                if (i < 0) return (EIO) ;
                }
                /* error check the xfer */
                                if (i < 0) return (EIO) ;
                }
                /* error check the xfer */