Return to BSD News archive
Newsgroups: comp.os.386bsd.bugs Path: sserve!newshost.anu.edu.au!munnari.oz.au!constellation!news.uoknor.edu!ns1.nodak.edu!netnews.nwnet.net!news.clark.edu!spool.mu.edu!howland.reston.ans.net!pipex!sunic!trane.uninett.no!news.eunet.no!nuug!dkuug!login.dkuug.dk!sos From: sos@login.dkuug.dk (S|ren Schmidt) Subject: IDE-disk hangs - solution/patches NetBSD/FreeBSD Message-ID: <sos.753438746@login.dkuug.dk> Summary: fixes for lost interrupts with IDE disks Keywords: hanging-disk, IDE-disk, lost-interrupt Sender: news@diana.dknet.dk Nntp-Posting-Host: login.dkuug.dk Organization: DKnet Date: Tue, 16 Nov 1993 08:32:26 GMT Lines: 767 Due to "popular" demand I'm posting these patches to NetBSD/FreeBSD instead of mailing them around the world :-) As many have found out there is a problem when using IDE disks on both FreeBSD/NetBSD. Following is two patches (one for NetBSD-current and one for FreeBSD) that fixes the problem with lost intterrupts. Both fixes is based on a patch posted here some month ago by Stefan Behrens?? (sorry I've lost the original article). But anyway it works (for me :-). Basically it does a timeout on lost interrupts, starting the operation again and logging and error message on the console. The NetBSD version additionally fixes some of the (newer ending) while loops (that made it work with two IDE disks !) The FreeBSD version additionally makes the allready present while loop timeouts independent of CPU speed, and adds minor numbers for easy access to dos partitions. (This was actually sent to freebsd-wd@freefall in early august) Enjoy !! Soeren Schmidt sos@login.dkuug.dk ===8<======8<======8<======8<======8<======8<======8<======8<=== NetBSD-current version: *** wd.c.orig Wed Aug 25 12:52:08 1993 --- wd.c Wed Nov 3 19:35:35 1993 *************** *** 141,155 **** short dkc_port; }; - void bad144intern(struct disk *); - void wddisksort(); - struct board wdcontroller[NWDC]; struct disk *wddrives[NWD]; /* table of units */ struct buf wdtab[NWDC]; /* various per-controller info */ struct buf wdutab[NWD]; /* head of queue per drive */ struct buf rwdbuf[NWD]; /* buffers for raw IO */ long wdxfer[NWD]; /* count of transfers */ int wdprobe(), wdattach(); --- 141,153 ---- short dkc_port; }; struct board wdcontroller[NWDC]; struct disk *wddrives[NWD]; /* table of units */ struct buf wdtab[NWDC]; /* various per-controller info */ struct buf wdutab[NWD]; /* head of queue per drive */ struct buf rwdbuf[NWD]; /* buffers for raw IO */ long wdxfer[NWD]; /* count of transfers */ + int wdtimeoutstatus[NWD]; /* timeout counters */ int wdprobe(), wdattach(); *************** *** 157,168 **** wdprobe, wdattach, "wdc", }; ! void wdustart(struct disk *); ! void wdstart(int); ! int wdcommand(struct disk *, int); ! int wdcontrol(struct buf *); ! int wdsetctlr(dev_t, struct disk *); ! int wdgetctlr(int, struct disk *); /* * Probe for controller. --- 155,170 ---- wdprobe, wdattach, "wdc", }; ! static void wdustart(struct disk *); ! static void wdstart(int); ! static int wdcommand(struct disk *, int); ! static int wdcontrol(struct buf *); ! static int wdsetctlr(dev_t, struct disk *); ! static int wdgetctlr(int, struct disk *); ! static void bad144intern(struct disk *); ! static void wddisksort(); ! static int wdreset(int, int, int); ! static int wdtimeout(caddr_t); /* * Probe for controller. *************** *** 238,244 **** bzero(&wdutab[lunit], sizeof(struct buf)); bzero(&rwdbuf[lunit], sizeof(struct buf)); wdxfer[lunit] = 0; ! du->dk_ctrlr = dvp->id_masunit; du->dk_unit = unit; du->dk_lunit = lunit; --- 240,247 ---- bzero(&wdutab[lunit], sizeof(struct buf)); bzero(&rwdbuf[lunit], sizeof(struct buf)); wdxfer[lunit] = 0; ! wdtimeoutstatus[lunit] = 0; ! wdtimeout(lunit); du->dk_ctrlr = dvp->id_masunit; du->dk_unit = unit; du->dk_lunit = lunit; *************** *** 595,602 **** } /* if this is a read operation, just go away until it's done. */ ! if (bp->b_flags & B_READ) return; /* ready to send data? */ for (timeout=0; (inb(wdc+wd_altsts) & WDCS_DRQ) == 0; ) { --- 598,607 ---- } /* if this is a read operation, just go away until it's done. */ ! if (bp->b_flags & B_READ) { ! wdtimeoutstatus[lunit] = 2; return; + } /* ready to send data? */ for (timeout=0; (inb(wdc+wd_altsts) & WDCS_DRQ) == 0; ) { *************** *** 617,622 **** --- 622,628 ---- DEV_BSIZE/sizeof(short)); du->dk_bc -= DEV_BSIZE; du->dk_bct -= DEV_BSIZE; + wdtimeoutstatus[lunit] = 2; } /* Interrupt routine for the controller. Acknowledge the interrupt, check for *************** *** 629,635 **** { register struct disk *du; register struct buf *bp, *dp; ! int status, wdc, ctrlr; ctrlr = wdif.if_vec; --- 635,641 ---- { register struct disk *du; register struct buf *bp, *dp; ! int status, wdc, ctrlr, timeout; ctrlr = wdif.if_vec; *************** *** 642,654 **** bp = dp->b_actf; du = wddrives[wdunit(bp->b_dev)]; wdc = du->dk_port; #ifdef WDDEBUG printf("I%d ", ctrlr); #endif ! while ((status = inb(wdc+wd_status)) & WDCS_BUSY) ! ; /* is it not a transfer, but a control operation? */ if (du->dk_state < OPEN) { --- 648,668 ---- bp = dp->b_actf; du = wddrives[wdunit(bp->b_dev)]; wdc = du->dk_port; + wdtimeoutstatus[wdunit(bp->b_dev)] = 0; #ifdef WDDEBUG printf("I%d ", ctrlr); #endif ! for (timeout=0; ((status=inb(wdc+wd_status)) & WDCS_BUSY); ) { ! DELAY(WDCDELAY); ! if (++timeout < WDCNDELAY/20) ! continue; ! wdstart(ctrlr); ! /* #ifdef WDDEBUG */ ! printf("wdc%d: timeout in wdintr WDCS_BUSY\n", ctrlr); ! /* #endif */ ! } /* is it not a transfer, but a control operation? */ if (du->dk_state < OPEN) { *************** *** 708,716 **** chk = min(DEV_BSIZE / sizeof(short), du->dk_bc / sizeof(short)); /* ready to receive data? */ ! while ((inb(wdc+wd_status) & WDCS_DRQ) == 0) ! ; ! /* suck in data */ insw (wdc+wd_data, (int)bp->b_un.b_addr + du->dk_skip * DEV_BSIZE, chk); --- 722,738 ---- chk = min(DEV_BSIZE / sizeof(short), du->dk_bc / sizeof(short)); /* ready to receive data? */ ! for (timeout=0; (inb(wdc+wd_status) & WDCS_DRQ) == 0; ) { ! DELAY(WDCDELAY); ! if (++timeout < WDCNDELAY/20) ! continue; ! wdstart(ctrlr); ! /* #ifdef WDDEBUG */ ! printf("wdc%d: timeout in wdintr WDCS_DRQ\n", ctrlr); ! /* #endif */ ! break; ! } ! /* suck in data */ insw (wdc+wd_data, (int)bp->b_un.b_addr + du->dk_skip * DEV_BSIZE, chk); *************** *** 1690,1697 **** dp->b_actl = bp; } ! wdreset(ctrlr, wdc, err) ! int ctrlr; { int stat, timeout; --- 1712,1719 ---- dp->b_actl = bp; } ! static int ! wdreset(int ctrlr, int wdc, int err) { int stat, timeout; *************** *** 1714,1717 **** --- 1736,1770 ---- if(timeout>WDCNDELAY_DEBUG) printf("wdc%d: timeout took %dus\n", ctrlr, WDCDELAY * timeout); #endif + } + + + static int + wdtimeout(caddr_t arg) + { + int x = splbio(); + register int unit = (int) arg; + + if (wdtimeoutstatus[unit]) { + if (--wdtimeoutstatus[unit] == 0) { + struct disk *du = wddrives[unit]; + int wdc = du->dk_port; + /* #ifdef WDDEBUG */ + printf("wd%d: lost interrupt - status %x, error %x\n", + unit, inb(wdc+wd_status), inb(wdc+wd_error)); + /* #endif */ + outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS)); + DELAY(1000); + outb(wdc+wd_ctlr, WDCTL_IDS); + DELAY(1000); + (void) inb(wdc+wd_error); + outb(wdc+wd_ctlr, WDCTL_4BIT); + du->dk_skip = 0; + du->dk_flags |= DKFL_SINGLE; + wdstart(du->dk_ctrlr); /* start controller */ + } + } + timeout((timeout_t)wdtimeout, (caddr_t)unit, 50); + splx(x); + return (0); } ===8<======8<======8<======8<======8<======8<======8<======8<=== FreeBSD version: *** wd.c.orig Sun Aug 8 09:03:07 1993 --- wd.c Wed Sep 8 11:31:42 1993 *************** *** 82,91 **** #include "syslog.h" #include "vm/vm.h" ! #define _NWD (NWD - 1) /* One is for the controller XXX 31 Jul 92*/ #ifndef WDCTIMEOUT ! #define WDCTIMEOUT 10000000 /* arbitrary timeout for drive ready waits */ #endif #define RETRIES 5 /* number of retries before giving up */ --- 94,103 ---- #include "syslog.h" #include "vm/vm.h" ! #define _NWD (NWD - 1) /* One is for the controller XXX */ #ifndef WDCTIMEOUT ! #define WDCTIMEOUT 300000 /* timeout for waits in units of 10 usecs */ #endif #define RETRIES 5 /* number of retries before giving up */ *************** *** 146,151 **** --- 158,164 ---- struct buf wdutab[_NWD]; /* head of queue per drive */ struct buf rwdbuf[_NWD]; /* buffers for raw IO */ long wdxfer[_NWD]; /* count of transfers */ + int wdtimeout_status[_NWD]; /* timeout status */ #ifdef WDDEBUG int wddebug; #endif *************** *** 160,165 **** --- 173,180 ---- static int wdcontrol(struct buf *); static int wdsetctlr(dev_t, struct disk *); static int wdgetctlr(int, struct disk *); + static int wdtimeout(int); + static int wdreset(int); /* * Probe for controller. *************** *** 171,183 **** struct disk *du; int wdc; ! if (unit >= _NWD) /* 31 Jul 92*/ return(0); if ((du = wddrives[unit]) == 0) { du = wddrives[unit] = (struct disk *) malloc (sizeof(struct disk), M_TEMP, M_NOWAIT); ! bzero (du, sizeof(struct disk)); /* 31 Jul 92*/ du->dk_unit = unit; } --- 186,198 ---- struct disk *du; int wdc; ! if (unit >= _NWD) return(0); if ((du = wddrives[unit]) == 0) { du = wddrives[unit] = (struct disk *) malloc (sizeof(struct disk), M_TEMP, M_NOWAIT); ! bzero (du, sizeof(struct disk)); du->dk_unit = unit; } *************** *** 226,231 **** --- 241,249 ---- du->dk_unit = unit; du->dk_port = dvp->id_iobase; } + /* initialize timeout */ + wdtimeout_status[unit] = 0; + wdtimeout(unit); /* print out description of drive, suppressing multiple blanks*/ if(wdgetctlr(unit, du) == 0) { *************** *** 286,292 **** } /* have partitions and want to use them? */ ! if ((du->dk_flags & DKFL_BSDLABEL) != 0 && wdpart(bp->b_dev) != WDRAW) { /* * do bounds checking, adjust transfer. if error, process. --- 304,312 ---- } /* have partitions and want to use them? */ ! if ((du->dk_flags & DKFL_BSDLABEL) != 0 ! && wdpart(bp->b_dev) != WDRAW ! && wddospart(bp->b_dev) == 0) { /* * do bounds checking, adjust transfer. if error, process. *************** *** 359,364 **** --- 379,385 ---- register struct disk *du; /* disk unit for IO */ register struct buf *bp; struct disklabel *lp; + struct dos_partition *dosp; struct buf *dp; register struct bt_bad *bt_ptr; long blknum, pagcnt, cylin, head, sector; *************** *** 405,413 **** du->dk_bc = bp->b_bcount; lp = &du->dk_dd; secpertrk = lp->d_nsectors; secpercyl = lp->d_secpercyl; ! if ((du->dk_flags & DKFL_BSDLABEL) != 0 && wdpart(bp->b_dev) != WDRAW) blknum += lp->d_partitions[wdpart(bp->b_dev)].p_offset; cylin = blknum / secpercyl; head = (blknum % secpercyl) / secpertrk; --- 426,439 ---- du->dk_bc = bp->b_bcount; lp = &du->dk_dd; + dosp = du->dk_dospartitions; + if (wddospart(bp->b_dev)) + blknum += dosp[wdpart(bp->b_dev)].dp_start; secpertrk = lp->d_nsectors; secpercyl = lp->d_secpercyl; ! if ((du->dk_flags & DKFL_BSDLABEL) != 0 ! && wdpart(bp->b_dev) != WDRAW ! && wddospart(bp->b_dev) == 0) blknum += lp->d_partitions[wdpart(bp->b_dev)].p_offset; cylin = blknum / secpercyl; head = (blknum % secpercyl) / secpertrk; *************** *** 466,486 **** du->dk_bc += DEV_BSIZE; /* controller idle? */ ! timeout = 0; ! while (inb(wdc+wd_status) & WDCS_BUSY) ! { ! if (++timeout > WDCTIMEOUT) ! { ! printf("wd.c: Controller busy too long!\n"); ! /* reset the device */ ! outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS)); ! DELAY(1000); ! outb(wdc+wd_ctlr, WDCTL_IDS); ! DELAY(1000); ! (void) inb(wdc+wd_error); /* XXX! */ ! outb(wdc+wd_ctlr, WDCTL_4BIT); ! break; ! } } /* stuff the task file */ --- 492,503 ---- du->dk_bc += DEV_BSIZE; /* controller idle? */ ! timeout = WDCTIMEOUT; ! while ((inb(wdc+wd_status) & WDCS_BUSY) && timeout--) ! DELAY(10); ! if (!timeout) { ! printf("wd: busy timeout\n"); ! wdreset(wdc); } /* stuff the task file */ *************** *** 508,528 **** outb(wdc+wd_sdh, WDSD_IBM | (unit<<4) | (head & 0xf)); /* wait for drive to become ready */ ! timeout = 0; ! while ((inb(wdc+wd_status) & WDCS_READY) == 0) ! { ! if (++timeout > WDCTIMEOUT) ! { ! printf("wd.c: Drive busy too long!\n"); ! /* reset the device */ ! outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS)); ! DELAY(1000); ! outb(wdc+wd_ctlr, WDCTL_IDS); ! DELAY(1000); ! (void) inb(wdc+wd_error); /* XXX! */ ! outb(wdc+wd_ctlr, WDCTL_4BIT); ! goto RETRY; ! } } /* initiate command! */ --- 525,537 ---- outb(wdc+wd_sdh, WDSD_IBM | (unit<<4) | (head & 0xf)); /* wait for drive to become ready */ ! timeout = WDCTIMEOUT; ! while ((inb(wdc+wd_status) & WDCS_READY) == 0 && timeout--) ! DELAY(10); ! if (!timeout) { ! printf("wd: ready timeout\n"); ! wdreset(wdc); ! goto RETRY; } /* initiate command! */ *************** *** 540,569 **** } /* if this is a read operation, just go away until it's done. */ ! if (bp->b_flags & B_READ) return; /* ready to send data? */ ! timeout = 0; ! while ((inb(wdc+wd_status) & WDCS_DRQ) == 0) ! { ! if (++timeout > WDCTIMEOUT) ! { ! printf("wd.c: Drive not ready for too long!\n"); ! /* reset the device */ ! outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS)); ! DELAY(1000); ! outb(wdc+wd_ctlr, WDCTL_IDS); ! DELAY(1000); ! (void) inb(wdc+wd_error); /* XXX! */ ! outb(wdc+wd_ctlr, WDCTL_4BIT); ! goto RETRY; ! } } /* then send it! */ outsw (wdc+wd_data, addr+du->dk_skip * DEV_BSIZE, DEV_BSIZE/sizeof(short)); du->dk_bc -= DEV_BSIZE; } /* Interrupt routine for the controller. Acknowledge the interrupt, check for --- 549,574 ---- } /* if this is a read operation, just go away until it's done. */ ! if (bp->b_flags & B_READ) { ! wdtimeout_status[unit] = 2; ! return; ! } /* ready to send data? */ ! timeout = WDCTIMEOUT; ! while ((inb(wdc+wd_status) & WDCS_DRQ) == 0 && timeout--) ! DELAY(10); ! if (!timeout) { ! printf("wd: drq timeout\n"); ! wdreset(wdc); ! goto RETRY; } /* then send it! */ outsw (wdc+wd_data, addr+du->dk_skip * DEV_BSIZE, DEV_BSIZE/sizeof(short)); du->dk_bc -= DEV_BSIZE; + wdtimeout_status[unit] = 2; } /* Interrupt routine for the controller. Acknowledge the interrupt, check for *************** *** 590,595 **** --- 595,601 ---- bp = dp->b_actf; du = wddrives[wdunit(bp->b_dev)]; wdc = du->dk_port; + wdtimeout_status[wdunit(bp->b_dev)] = 0; #ifdef WDDEBUG printf("I "); *************** *** 781,787 **** * that overlaps another partition which is open * unless one is the "raw" partition (whole disk). */ ! if ((du->dk_openpart & mask) == 0 /*&& part != RAWPART*/ && part != WDRAW) { int start, end; pp = &du->dk_dd.d_partitions[part]; --- 787,794 ---- * that overlaps another partition which is open * unless one is the "raw" partition (whole disk). */ ! if ((du->dk_openpart & mask) == 0 /*&& part != RAWPART*/ ! && part != WDRAW && wddospart(dev) == 0) { int start, end; pp = &du->dk_dd.d_partitions[part]; *************** *** 805,811 **** pp - du->dk_dd.d_partitions + 'a'); } } ! if (part >= du->dk_dd.d_npartitions && part != WDRAW) return (ENXIO); /* insure only one open at a time */ --- 812,819 ---- pp - du->dk_dd.d_partitions + 'a'); } } ! if (part >= du->dk_dd.d_npartitions ! && part != WDRAW && wddospart(dev) == 0) return (ENXIO); /* insure only one open at a time */ *************** *** 856,862 **** wdtab.b_active = 1; /* wait for drive and controller to become ready */ ! for (i = WDCTIMEOUT; (inb(wdc+wd_status) & (WDCS_READY|WDCS_BUSY)) != WDCS_READY && i-- != 0; ) ; outb(wdc+wd_command, WDCC_RESTORE | WD_STEP); --- 864,870 ---- wdtab.b_active = 1; /* wait for drive and controller to become ready */ ! for (i=WDCTIMEOUT; (inb(wdc+wd_status) & (WDCS_READY|WDCS_BUSY)) != WDCS_READY && i-- != 0; ) ; outb(wdc+wd_command, WDCC_RESTORE | WD_STEP); *************** *** 908,938 **** */ static int wdcommand(struct disk *du, int cmd) { ! int timeout = WDCTIMEOUT, stat, wdc; /* controller ready for command? */ wdc = du->dk_port; ! while (((stat = inb(wdc + wd_status)) & WDCS_BUSY) && timeout > 0) ! timeout--; if (timeout <= 0) return(-1); /* send command, await results */ outb(wdc+wd_command, cmd); ! while (((stat = inb(wdc+wd_status)) & WDCS_BUSY) && timeout > 0) ! timeout--; if (timeout <= 0) return(-1); if (cmd != WDCC_READP) return (stat); /* is controller ready to return data? */ ! while (((stat = inb(wdc+wd_status)) & (WDCS_ERR|WDCS_DRQ)) == 0 && ! timeout > 0) ! timeout--; if (timeout <= 0) return(-1); - return (stat); } --- 916,948 ---- */ static int wdcommand(struct disk *du, int cmd) { ! int timeout, stat, wdc; /* controller ready for command? */ wdc = du->dk_port; ! timeout = WDCTIMEOUT; ! while (((stat = inb(wdc + wd_status)) & WDCS_BUSY) && timeout--) ! DELAY(10); if (timeout <= 0) return(-1); /* send command, await results */ outb(wdc+wd_command, cmd); ! timeout = WDCTIMEOUT; ! while (((stat = inb(wdc+wd_status)) & WDCS_BUSY) && timeout--) ! DELAY(10); if (timeout <= 0) return(-1); if (cmd != WDCC_READP) return (stat); /* is controller ready to return data? */ ! timeout = WDCTIMEOUT; ! while (((stat=inb(wdc+wd_status)) & (WDCS_ERR|WDCS_DRQ)) == 0 ! && timeout--) ! DELAY(10); if (timeout <= 0) return(-1); return (stat); } *************** *** 1190,1196 **** int unit = wdunit(dev), part = wdpart(dev), val; struct disk *du; ! if (unit >= _NWD) /* 31 Jul 92*/ return(-1); du = wddrives[unit]; --- 1200,1206 ---- int unit = wdunit(dev), part = wdpart(dev), val; struct disk *du; ! if (unit >= _NWD) return(-1); du = wddrives[unit]; *************** *** 1202,1207 **** --- 1212,1250 ---- return((int)du->dk_dd.d_partitions[part].p_size); } + static int + wdreset(int wdc) + { + outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS)); + DELAY(1000); + outb(wdc+wd_ctlr, WDCTL_IDS); + DELAY(1000); + (void) inb(wdc+wd_error); /* XXX! */ + outb(wdc+wd_ctlr, WDCTL_4BIT); + } + + static int + wdtimeout(int unit) + { + int x = splbio(); + + if (wdtimeout_status[unit]) { + if (--wdtimeout_status[unit] == 0) { + struct disk *du = wddrives[unit]; + int wdc = du->dk_port; + + printf("wd: interupt timeout\n"); + wdreset(wdc); + du->dk_skip = 0; + du->dk_flags |= DKFL_SINGLE; + wdstart(); + } + } + timeout(wdtimeout, unit, 100); + splx(x); + return (0); + } + extern char *vmmap; /* poor name! */ int *************** *** 1230,1236 **** unit = wdunit(dev); /* eventually support floppies? */ part = wdpart(dev); /* file system */ /* check for acceptable drive number */ ! if (unit >= _NWD) return(ENXIO); /* 31 Jul 92*/ du = wddrives[unit]; if (du == 0) return(ENXIO); --- 1273,1279 ---- unit = wdunit(dev); /* eventually support floppies? */ part = wdpart(dev); /* file system */ /* check for acceptable drive number */ ! if (unit >= _NWD) return(ENXIO); du = wddrives[unit]; if (du == 0) return(ENXIO); ===8<======8<======8<======8<======8<======8<======8<======8<=== end.