X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/f7287e4bda9b6faf24bebb281190054084c856ac..9cf42d55e0bbe77d51dbb0b1775653bb22439d15:/usr/src/sys/ufs/ffs/ffs_alloc.c diff --git a/usr/src/sys/ufs/ffs/ffs_alloc.c b/usr/src/sys/ufs/ffs/ffs_alloc.c index d102310809..416461f279 100644 --- a/usr/src/sys/ufs/ffs/ffs_alloc.c +++ b/usr/src/sys/ufs/ffs/ffs_alloc.c @@ -1,17 +1,23 @@ -/* Copyright (c) 1981 Regents of the University of California */ - -static char vers[] = "@(#)ffs_alloc.c 1.19 %G%"; - -/* alloc.c 4.8 81/03/08 */ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * %sccs.include.redist.c% + * + * @(#)ffs_alloc.c 7.21 (Berkeley) %G% + */ -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/mount.h" -#include "../h/fs.h" -#include "../h/conf.h" -#include "../h/buf.h" -#include "../h/inode.h" -#include "../h/user.h" +#include "param.h" +#include "systm.h" +#include "buf.h" +#include "user.h" +#include "vnode.h" +#include "kernel.h" +#include "syslog.h" +#include "cmap.h" +#include "../ufs/quota.h" +#include "../ufs/inode.h" +#include "../ufs/fs.h" extern u_long hashalloc(); extern ino_t ialloccg(); @@ -42,43 +48,51 @@ extern unsigned char *fragtbl[]; * 2) quadradically rehash into other cylinder groups, until an * available block is located. */ -struct buf * -alloc(ip, bpref, size) +alloc(ip, lbn, bpref, size, bnp) register struct inode *ip; - daddr_t bpref; + daddr_t lbn, bpref; int size; + daddr_t *bnp; { daddr_t bno; register struct fs *fs; register struct buf *bp; - int cg; + int cg, error; + struct ucred *cred = u.u_cred; /* XXX */ + *bnp = 0; fs = ip->i_fs; - if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) + if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) { + printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); panic("alloc: bad size"); + } if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) goto nospace; - if (u.u_uid != 0 && - fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree < - fs->fs_dsize * fs->fs_minfree / 100) + if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) goto nospace; +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(size), cred, 0)) + return (error); +#endif if (bpref >= fs->fs_size) bpref = 0; if (bpref == 0) cg = itog(fs, ip->i_number); else cg = dtog(fs, bpref); - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, alloccg); - if (bno == 0) - goto nospace; - bp = getblk(ip->i_dev, fsbtodb(fs, bno), size); - clrbuf(bp); - return (bp); + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, + (u_long (*)())alloccg); + if (bno > 0) { + ip->i_blocks += btodb(size); + ip->i_flag |= IUPD|ICHG; + *bnp = bno; + return (0); + } nospace: - fserr(fs, "file system full"); + fserr(fs, cred->cr_uid, "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); - u.u_error = ENOSPC; - return (NULL); + return (ENOSPC); } /* @@ -89,71 +103,141 @@ nospace: * the original block. Failing that, the regular block allocator is * invoked to get an appropriate block. */ -struct buf * -realloccg(ip, bprev, bpref, osize, nsize) +realloccg(ip, lbprev, bpref, osize, nsize, bpp) register struct inode *ip; - daddr_t bprev, bpref; + off_t lbprev; + daddr_t bpref; int osize, nsize; + struct buf **bpp; { - daddr_t bno; register struct fs *fs; - register struct buf *bp, *obp; - caddr_t cp; - int cg; + struct buf *bp, *obp; + int cg, request; + daddr_t bprev, bno, bn; + int i, error, count; + struct ucred *cred = u.u_cred; /* XXX */ + *bpp = 0; fs = ip->i_fs; if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || - (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) + (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { + printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); panic("realloccg: bad size"); - if (u.u_uid != 0 && - fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree < - fs->fs_dsize * fs->fs_minfree / 100) + } + if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) goto nospace; - if (bprev != 0) - cg = dtog(fs, bprev); - else + if ((bprev = ip->i_db[lbprev]) == 0) { + printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); panic("realloccg: bad bprev"); - bno = fragextend(ip, cg, (long)bprev, osize, nsize); - if (bno != 0) { - bp = bread(ip->i_dev, fsbtodb(fs, bno), osize); - if (bp->b_flags & B_ERROR) { - brelse(bp); - return 0; - } - bp->b_bcount = nsize; - blkclr(bp->b_un.b_addr + osize, nsize - osize); - return (bp); } +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) + return (error); +#endif + /* + * Allocate the extra space in the buffer. + */ + if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + allocbuf(bp, nsize); + bp->b_flags |= B_DONE; + bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize); + /* + * Check for extension in the existing location. + */ + cg = dtog(fs, bprev); + if (bno = fragextend(ip, cg, (long)bprev, osize, nsize)) { + if (bp->b_blkno != fsbtodb(fs, bno)) + panic("bad blockno"); + ip->i_blocks += btodb(nsize - osize); + ip->i_flag |= IUPD|ICHG; + *bpp = bp; + return (0); + } + /* + * Allocate a new disk location. + */ if (bpref >= fs->fs_size) bpref = 0; - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, nsize, alloccg); - if (bno != 0) { + switch ((int)fs->fs_optim) { + case FS_OPTSPACE: /* - * make a new copy + * Allocate an exact sized fragment. Although this makes + * best use of space, we will waste time relocating it if + * the file continues to grow. If the fragmentation is + * less than half of the minimum free reserve, we choose + * to begin optimizing for time. */ - obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize); - if (obp->b_flags & B_ERROR) { - brelse(obp); - return 0; - } - bp = getblk(ip->i_dev, fsbtodb(fs, bno), nsize); - cp = bp->b_un.b_addr; - bp->b_un.b_addr = obp->b_un.b_addr; - obp->b_un.b_addr = cp; - obp->b_flags |= B_INVAL; - brelse(obp); - fre(ip, bprev, (off_t)osize); - blkclr(bp->b_un.b_addr + osize, nsize - osize); - return(bp); + request = nsize; + if (fs->fs_minfree < 5 || + fs->fs_cstotal.cs_nffree > + fs->fs_dsize * fs->fs_minfree / (2 * 100)) + break; + log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTTIME; + break; + case FS_OPTTIME: + /* + * At this point we have discovered a file that is trying + * to grow a small fragment to a larger fragment. To save + * time, we allocate a full sized block, then free the + * unused portion. If the file continues to grow, the + * `fragextend' call above will be able to grow it in place + * without further copying. If aberrant programs cause + * disk fragmentation to grow within 2% of the free reserve, + * we choose to begin optimizing for space. + */ + request = fs->fs_bsize; + if (fs->fs_cstotal.cs_nffree < + fs->fs_dsize * (fs->fs_minfree - 2) / 100) + break; + log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTSPACE; + break; + default: + printf("dev = 0x%x, optim = %d, fs = %s\n", + ip->i_dev, fs->fs_optim, fs->fs_fsmnt); + panic("realloccg: bad optim"); + /* NOTREACHED */ + } + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request, + (u_long (*)())alloccg); + if (bno > 0) { +#ifdef SECSIZE + obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize, + fs->fs_dbsize); +#else SECSIZE + bp->b_blkno = bn = fsbtodb(fs, bno); + count = howmany(osize, CLBYTES); + for (i = 0; i < count; i++) +#ifdef SECSIZE + munhash(ip->i_dev, bn + i * CLBYTES / fs->fs_dbsize); +#else SECSIZE + munhash(ip->i_dev, bn + i * CLBYTES / DEV_BSIZE); +#endif SECSIZE + blkfree(ip, bprev, (off_t)osize); + if (nsize < request) + blkfree(ip, bno + numfrags(fs, nsize), + (off_t)(request - nsize)); + ip->i_blocks += btodb(nsize - osize); + ip->i_flag |= IUPD|ICHG; + *bpp = bp; + return (0); } + brelse(bp); nospace: /* * no space available */ - fserr(fs, "file system full"); + fserr(fs, cred->cr_uid, "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); - u.u_error = ENOSPC; - return (NULL); + return (ENOSPC); } /* @@ -171,17 +255,19 @@ nospace: * 2) quadradically rehash into other cylinder groups, until an * available inode is located. */ -struct inode * -ialloc(pip, ipref, mode) +ialloc(pip, ipref, mode, cred, ipp) register struct inode *pip; ino_t ipref; int mode; + struct ucred *cred; + struct inode **ipp; { ino_t ino; register struct fs *fs; register struct inode *ip; - int cg; + int cg, error; + *ipp = 0; fs = pip->i_fs; if (fs->fs_cstotal.cs_nifree == 0) goto noinodes; @@ -191,19 +277,34 @@ ialloc(pip, ipref, mode) ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg); if (ino == 0) goto noinodes; - ip = iget(pip->i_dev, pip->i_fs, ino); - if (ip == NULL) { - ifree(ip, ino, 0); - return (NULL); + error = iget(pip, ino, ipp); + if (error) { + ifree(pip, ino, mode); + return (error); } - if (ip->i_mode) + ip = *ipp; + if (ip->i_mode) { + printf("mode = 0%o, inum = %d, fs = %s\n", + ip->i_mode, ip->i_number, fs->fs_fsmnt); panic("ialloc: dup alloc"); - return (ip); + } + if (ip->i_blocks) { /* XXX */ + printf("free inode %s/%d had %d blocks\n", + fs->fs_fsmnt, ino, ip->i_blocks); + ip->i_blocks = 0; + } + ip->i_flags = 0; + /* + * Set up a new generation number for this inode. + */ + if (++nextgennumber < (u_long)time.tv_sec) + nextgennumber = time.tv_sec; + ip->i_gen = nextgennumber; + return (0); noinodes: - fserr(fs, "out of inodes"); - uprintf("\n%s: create failed, no inodes free\n", fs->fs_fsmnt); - u.u_error = ENOSPC; - return (NULL); + fserr(fs, cred->cr_uid, "out of inodes"); + uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); + return (ENOSPC); } /* @@ -213,6 +314,7 @@ noinodes: * among those cylinder groups with above the average number of * free inodes, the one with the smallest number of directories. */ +ino_t dirpref(fs) register struct fs *fs; { @@ -227,35 +329,96 @@ dirpref(fs) mincg = cg; minndir = fs->fs_cs(fs, cg).cs_ndir; } - return (fs->fs_ipg * mincg); + return ((ino_t)(fs->fs_ipg * mincg)); } /* - * Select a cylinder to place a large block of data. - * - * The policy implemented by this algorithm is to maintain a - * rotor that sweeps the cylinder groups. When a block is - * needed, the rotor is advanced until a cylinder group with - * greater than the average number of free blocks is found. + * Select the desired position for the next block in a file. The file is + * logically divided into sections. The first section is composed of the + * direct blocks. Each additional section contains fs_maxbpg blocks. + * + * If no blocks have been allocated in the first section, the policy is to + * request a block in the same cylinder group as the inode that describes + * the file. If no blocks have been allocated in any other section, the + * policy is to place the section in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found + * by using a rotor that sweeps the cylinder groups. When a new group of + * blocks is needed, the sweep begins in the cylinder group following the + * cylinder group from which the previous allocation was made. The sweep + * continues until a cylinder group with greater than the average number + * of free blocks is found. If the allocation is for the first block in an + * indirect block, the information on the previous allocation is unavailable; + * here a best guess is made based upon the logical block number being + * allocated. + * + * If a section is already partially allocated, the policy is to + * contiguously allocate fs_maxcontig blocks. The end of one of these + * contiguous blocks and the beginning of the next is physically separated + * so that the disk head will be in transit between them for at least + * fs_rotdelay milliseconds. This is to allow time for the processor to + * schedule another I/O transfer. */ daddr_t -blkpref(fs) - register struct fs *fs; +blkpref(ip, lbn, indx, bap) + struct inode *ip; + daddr_t lbn; + int indx; + daddr_t *bap; { - int cg, avgbfree; + register struct fs *fs; + register int cg; + int avgbfree, startcg; + daddr_t nextblk; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - for (cg = fs->fs_cgrotor + 1; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - for (cg = 0; cg <= fs->fs_cgrotor; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; + fs = ip->i_fs; + if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { + if (lbn < NDADDR) { + cg = itog(fs, ip->i_number); return (fs->fs_fpg * cg + fs->fs_frag); } - return (0); + /* + * Find a cylinder with greater than average number of + * unused data blocks. + */ + if (indx == 0 || bap[indx - 1] == 0) + startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg; + else + startcg = dtog(fs, bap[indx - 1]) + 1; + startcg %= fs->fs_ncg; + avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; + for (cg = startcg; cg < fs->fs_ncg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + for (cg = 0; cg <= startcg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + return (NULL); + } + /* + * One or more previous blocks have been laid out. If less + * than fs_maxcontig previous blocks are contiguous, the + * next block is requested contiguously, otherwise it is + * requested rotationally delayed by fs_rotdelay milliseconds. + */ + nextblk = bap[indx - 1] + fs->fs_frag; + if (indx > fs->fs_maxcontig && + bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig) + != nextblk) + return (nextblk); + if (fs->fs_rotdelay != 0) + /* + * Here we convert ms of delay to frags as: + * (frags) = (ms) * (rev/sec) * (sect/rev) / + * ((sect/frag) * (ms/sec)) + * then round up to the next block. + */ + nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / + (NSPF(fs) * 1000), fs->fs_frag); + return (nextblk); } /* @@ -299,9 +462,11 @@ hashalloc(ip, cg, pref, size, allocator) } /* * 3: brute force search + * Note that we start at i == 2, since 0 was checked initially, + * and 1 is always checked in the quadratic rehash. */ - cg = icg; - for (i = 0; i < fs->fs_ncg; i++) { + cg = (icg + 2) % fs->fs_ncg; + for (i = 2; i < fs->fs_ncg; i++) { result = (*allocator)(ip, cg, 0, size); if (result) return (result); @@ -309,7 +474,7 @@ hashalloc(ip, cg, pref, size, allocator) if (cg == fs->fs_ncg) cg = 0; } - return (0); + return (NULL); } /* @@ -326,30 +491,43 @@ fragextend(ip, cg, bprev, osize, nsize) int osize, nsize; { register struct fs *fs; - register struct buf *bp; register struct cg *cgp; + struct buf *bp; long bno; int frags, bbase; - int i; + int i, error; fs = ip->i_fs; + if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) + return (NULL); frags = numfrags(fs, nsize); - bbase = fragoff(fs, bprev); - if (bbase > (bprev + frags - 1) % fs->fs_frag) { - /* cannot extend across a block boundry */ - return (0); + bbase = fragnum(fs, bprev); + if (bbase > fragnum(fs, (bprev + frags - 1))) { + /* cannot extend across a block boundary */ + return (NULL); } - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); - if (bp->b_flags & B_ERROR) { +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { brelse(bp); - return 0; + return (NULL); } +#endif SECSIZE cgp = bp->b_un.b_cg; + if (!cg_chkmagic(cgp)) { + brelse(bp); + return (NULL); + } + cgp->cg_time = time.tv_sec; bno = dtogd(fs, bprev); for (i = numfrags(fs, osize); i < frags; i++) - if (isclr(cgp->cg_free, bno + i)) { + if (isclr(cg_blksfree(cgp), bno + i)) { brelse(bp); - return (0); + return (NULL); } /* * the current fragment can be extended @@ -358,13 +536,13 @@ fragextend(ip, cg, bprev, osize, nsize) * allocate the extended piece */ for (i = frags; i < fs->fs_frag - bbase; i++) - if (isclr(cgp->cg_free, bno + i)) + if (isclr(cg_blksfree(cgp), bno + i)) break; cgp->cg_frsum[i - numfrags(fs, osize)]--; if (i != frags) cgp->cg_frsum[i - frags]++; for (i = numfrags(fs, osize); i < frags; i++) { - clrbit(cgp->cg_free, bno + i); + clrbit(cg_blksfree(cgp), bno + i); cgp->cg_cs.cs_nffree--; fs->fs_cstotal.cs_nffree--; fs->fs_cs(fs, cg).cs_nffree--; @@ -388,21 +566,32 @@ alloccg(ip, cg, bpref, size) int size; { register struct fs *fs; - register struct buf *bp; register struct cg *cgp; - int bno, frags; - int allocsiz; + struct buf *bp; register int i; + int error, bno, frags, allocsiz; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) - return (0); - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); - if (bp->b_flags & B_ERROR) { + return (NULL); +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { brelse(bp); - return 0; + return (NULL); } +#endif SECSIZE cgp = bp->b_un.b_cg; + if (!cg_chkmagic(cgp) || + (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { + brelse(bp); + return (NULL); + } + cgp->cg_time = time.tv_sec; if (size == fs->fs_bsize) { bno = alloccgblk(fs, cgp, bpref); bdwrite(bp); @@ -424,28 +613,32 @@ alloccg(ip, cg, bpref, size) */ if (cgp->cg_cs.cs_nbfree == 0) { brelse(bp); - return (0); + return (NULL); } bno = alloccgblk(fs, cgp, bpref); bpref = dtogd(fs, bno); for (i = frags; i < fs->fs_frag; i++) - setbit(cgp->cg_free, bpref + i); + setbit(cg_blksfree(cgp), bpref + i); i = fs->fs_frag - frags; cgp->cg_cs.cs_nffree += i; fs->fs_cstotal.cs_nffree += i; fs->fs_cs(fs, cg).cs_nffree += i; + fs->fs_fmod++; cgp->cg_frsum[i]++; bdwrite(bp); return (bno); } bno = mapsearch(fs, cgp, bpref, allocsiz); - if (bno == 0) - return (0); + if (bno < 0) { + brelse(bp); + return (NULL); + } for (i = 0; i < frags; i++) - clrbit(cgp->cg_free, bno + i); + clrbit(cg_blksfree(cgp), bno + i); cgp->cg_cs.cs_nffree -= frags; fs->fs_cstotal.cs_nffree -= frags; fs->fs_cs(fs, cg).cs_nffree -= frags; + fs->fs_fmod++; cgp->cg_frsum[allocsiz]--; if (frags != allocsiz) cgp->cg_frsum[allocsiz - frags]++; @@ -471,7 +664,7 @@ alloccgblk(fs, cgp, bpref) daddr_t bpref; { daddr_t bno; - int cylno, pos; + int cylno, pos, delta; short *cylbp; register int i; @@ -479,12 +672,12 @@ alloccgblk(fs, cgp, bpref) bpref = cgp->cg_rotor; goto norot; } - bpref &= ~(fs->fs_frag - 1); + bpref = blknum(fs, bpref); bpref = dtogd(fs, bpref); /* * if the requested block is available, use it */ - if (isblock(fs, cgp->cg_free, bpref/fs->fs_frag)) { + if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { bno = bpref; goto gotit; } @@ -492,7 +685,7 @@ alloccgblk(fs, cgp, bpref) * check for a block available on the same cylinder */ cylno = cbtocylno(fs, bpref); - if (cgp->cg_btot[cylno] == 0) + if (cg_blktot(cgp)[cylno] == 0) goto norot; if (fs->fs_cpc == 0) { /* @@ -502,33 +695,17 @@ alloccgblk(fs, cgp, bpref) bpref = howmany(fs->fs_spc * cylno, NSPF(fs)); goto norot; } - /* - * find a block that is rotationally optimal - */ - cylbp = cgp->cg_b[cylno]; - if (fs->fs_rotdelay == 0) { - pos = cbtorpos(fs, bpref); - } else { - /* - * here we convert ms of delay to frags as: - * (frags) = (ms) * (rev/sec) * (sect/rev) / - * ((sect/frag) * (ms/sec)) - * then round up to the next rotational position - */ - bpref += fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / - (NSPF(fs) * 1000); - pos = cbtorpos(fs, bpref); - pos = (pos + 1) % NRPOS; - } /* * check the summary information to see if a block is * available in the requested cylinder starting at the - * optimal rotational position and proceeding around. + * requested rotational position and proceeding around. */ - for (i = pos; i < NRPOS; i++) + cylbp = cg_blks(fs, cgp, cylno); + pos = cbtorpos(fs, bpref); + for (i = pos; i < fs->fs_nrpos; i++) if (cylbp[i] > 0) break; - if (i == NRPOS) + if (i == fs->fs_nrpos) for (i = 0; i < pos; i++) if (cylbp[i] > 0) break; @@ -539,16 +716,23 @@ alloccgblk(fs, cgp, bpref) */ pos = cylno % fs->fs_cpc; bno = (cylno - pos) * fs->fs_spc / NSPB(fs); - if (fs->fs_postbl[pos][i] == -1) + if (fs_postbl(fs, pos)[i] == -1) { + printf("pos = %d, i = %d, fs = %s\n", + pos, i, fs->fs_fsmnt); panic("alloccgblk: cyl groups corrupted"); - for (i = fs->fs_postbl[pos][i]; ; i += fs->fs_rotbl[i]) { - if (isblock(fs, cgp->cg_free, bno + i)) { - bno = (bno + i) * fs->fs_frag; + } + for (i = fs_postbl(fs, pos)[i];; ) { + if (isblock(fs, cg_blksfree(cgp), bno + i)) { + bno = blkstofrags(fs, (bno + i)); goto gotit; } - if (fs->fs_rotbl[i] == 0) + delta = fs_rotbl(fs)[i]; + if (delta <= 0 || + delta + i > fragstoblks(fs, fs->fs_fpg)) break; + i += delta; } + printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); panic("alloccgblk: can't find blk in cyl"); } norot: @@ -556,22 +740,22 @@ norot: * no blocks in the requested cylinder, so take next * available one in this cylinder group. */ - bno = mapsearch(fs, cgp, bpref, fs->fs_frag); - if (bno == 0) - return (0); + bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag); + if (bno < 0) + return (NULL); cgp->cg_rotor = bno; gotit: - clrblock(fs, cgp->cg_free, bno/fs->fs_frag); + clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno)); cgp->cg_cs.cs_nbfree--; fs->fs_cstotal.cs_nbfree--; fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; cylno = cbtocylno(fs, bno); - cgp->cg_b[cylno][cbtorpos(fs, bno)]--; - cgp->cg_btot[cylno]--; + cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; + cg_blktot(cgp)[cylno]--; fs->fs_fmod++; return (cgp->cg_cgx * fs->fs_fpg + bno); } - + /* * Determine whether an inode can be allocated. * @@ -589,38 +773,63 @@ ialloccg(ip, cg, ipref, mode) int mode; { register struct fs *fs; - register struct buf *bp; register struct cg *cgp; - int i; + struct buf *bp; + int error, start, len, loc, map, i; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nifree == 0) - return (0); - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); - if (bp->b_flags & B_ERROR) { + return (NULL); +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { brelse(bp); - return 0; + return (NULL); } +#endif SECSIZE cgp = bp->b_un.b_cg; + if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { + brelse(bp); + return (NULL); + } + cgp->cg_time = time.tv_sec; if (ipref) { ipref %= fs->fs_ipg; - if (isclr(cgp->cg_iused, ipref)) + if (isclr(cg_inosused(cgp), ipref)) goto gotit; - } else - ipref = cgp->cg_irotor; - for (i = 0; i < fs->fs_ipg; i++) { - ipref++; - if (ipref >= fs->fs_ipg) - ipref = 0; - if (isclr(cgp->cg_iused, ipref)) { + } + start = cgp->cg_irotor / NBBY; + len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); + loc = skpc(0xff, len, &cg_inosused(cgp)[start]); + if (loc == 0) { + len = start + 1; + start = 0; + loc = skpc(0xff, len, &cg_inosused(cgp)[0]); + if (loc == 0) { + printf("cg = %s, irotor = %d, fs = %s\n", + cg, cgp->cg_irotor, fs->fs_fsmnt); + panic("ialloccg: map corrupted"); + /* NOTREACHED */ + } + } + i = start + len - loc; + map = cg_inosused(cgp)[i]; + ipref = i * NBBY; + for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { + if ((map & i) == 0) { cgp->cg_irotor = ipref; goto gotit; } } - brelse(bp); - return (0); + printf("fs = %s\n", fs->fs_fsmnt); + panic("ialloccg: block not in map"); + /* NOTREACHED */ gotit: - setbit(cgp->cg_iused, ipref); + setbit(cg_inosused(cgp), ipref); cgp->cg_cs.cs_nifree--; fs->fs_cstotal.cs_nifree--; fs->fs_cs(fs, cg).cs_nifree--; @@ -641,70 +850,93 @@ gotit: * free map. If a fragment is deallocated, a possible * block reassembly is checked. */ -fre(ip, bno, size) +blkfree(ip, bno, size) register struct inode *ip; daddr_t bno; off_t size; { register struct fs *fs; register struct cg *cgp; - register struct buf *bp; - int cg, blk, frags, bbase; + struct buf *bp; + int error, cg, blk, frags, bbase; register int i; + struct ucred *cred = u.u_cred; /* XXX */ fs = ip->i_fs; - if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) - panic("free: bad size"); + if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) { + printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + panic("blkfree: bad size"); + } cg = dtog(fs, bno); - if (badblock(fs, bno)) + if ((unsigned)bno >= fs->fs_size) { + printf("bad block %d, ino %d\n", bno, ip->i_number); + fserr(fs, cred->cr_uid, "bad block"); return; - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); - if (bp->b_flags & B_ERROR) { + } +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { brelse(bp); return; } +#endif SECSIZE cgp = bp->b_un.b_cg; + if (!cg_chkmagic(cgp)) { + brelse(bp); + return; + } + cgp->cg_time = time.tv_sec; bno = dtogd(fs, bno); if (size == fs->fs_bsize) { - if (isblock(fs, cgp->cg_free, bno/fs->fs_frag)) - panic("free: freeing free block"); - setblock(fs, cgp->cg_free, bno/fs->fs_frag); + if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) { + printf("dev = 0x%x, block = %d, fs = %s\n", + ip->i_dev, bno, fs->fs_fsmnt); + panic("blkfree: freeing free block"); + } + setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; i = cbtocylno(fs, bno); - cgp->cg_b[i][cbtorpos(fs, bno)]++; - cgp->cg_btot[i]++; + cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; + cg_blktot(cgp)[i]++; } else { - bbase = bno - (bno % fs->fs_frag); + bbase = bno - fragnum(fs, bno); /* * decrement the counts associated with the old frags */ - blk = ((cgp->cg_free[bbase / NBBY] >> (bbase % NBBY)) & - (0xff >> (NBBY - fs->fs_frag))); + blk = blkmap(fs, cg_blksfree(cgp), bbase); fragacct(fs, blk, cgp->cg_frsum, -1); /* * deallocate the fragment */ frags = numfrags(fs, size); for (i = 0; i < frags; i++) { - if (isset(cgp->cg_free, bno + i)) - panic("free: freeing free frag"); - setbit(cgp->cg_free, bno + i); - cgp->cg_cs.cs_nffree++; - fs->fs_cstotal.cs_nffree++; - fs->fs_cs(fs, cg).cs_nffree++; + if (isset(cg_blksfree(cgp), bno + i)) { + printf("dev = 0x%x, block = %d, fs = %s\n", + ip->i_dev, bno + i, fs->fs_fsmnt); + panic("blkfree: freeing free frag"); + } + setbit(cg_blksfree(cgp), bno + i); } + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + fs->fs_cs(fs, cg).cs_nffree += i; /* * add back in counts associated with the new frags */ - blk = ((cgp->cg_free[bbase / NBBY] >> (bbase % NBBY)) & - (0xff >> (NBBY - fs->fs_frag))); + blk = blkmap(fs, cg_blksfree(cgp), bbase); fragacct(fs, blk, cgp->cg_frsum, 1); /* * if a complete block has been reassembled, account for it */ - if (isblock(fs, cgp->cg_free, bbase / fs->fs_frag)) { + if (isblock(fs, cg_blksfree(cgp), + (daddr_t)fragstoblks(fs, bbase))) { cgp->cg_cs.cs_nffree -= fs->fs_frag; fs->fs_cstotal.cs_nffree -= fs->fs_frag; fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; @@ -712,8 +944,8 @@ fre(ip, bno, size) fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; i = cbtocylno(fs, bbase); - cgp->cg_b[i][cbtorpos(fs, bbase)]++; - cgp->cg_btot[i]++; + cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; + cg_blktot(cgp)[i]++; } } fs->fs_fmod++; @@ -732,23 +964,42 @@ ifree(ip, ino, mode) { register struct fs *fs; register struct cg *cgp; - register struct buf *bp; - int cg; + struct buf *bp; + int error, cg; fs = ip->i_fs; - if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) + if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) { + printf("dev = 0x%x, ino = %d, fs = %s\n", + ip->i_dev, ino, fs->fs_fsmnt); panic("ifree: range"); + } cg = itog(fs, ino); - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); - if (bp->b_flags & B_ERROR) { +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { brelse(bp); return; } +#endif SECSIZE cgp = bp->b_un.b_cg; + if (!cg_chkmagic(cgp)) { + brelse(bp); + return; + } + cgp->cg_time = time.tv_sec; ino %= fs->fs_ipg; - if (isclr(cgp->cg_iused, ino)) + if (isclr(cg_inosused(cgp), ino)) { + printf("dev = 0x%x, ino = %d, fs = %s\n", + ip->i_dev, ino, fs->fs_fsmnt); panic("ifree: freeing free inode"); - clrbit(cgp->cg_iused, ino); + } + clrbit(cg_inosused(cgp), ino); + if (ino < cgp->cg_irotor) + cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; fs->fs_cstotal.cs_nifree++; fs->fs_cs(fs, cg).cs_nifree++; @@ -787,17 +1038,20 @@ mapsearch(fs, cgp, bpref, allocsiz) else start = cgp->cg_frotor / NBBY; len = howmany(fs->fs_fpg, NBBY) - start; - loc = scanc(len, &cgp->cg_free[start], fragtbl[fs->fs_frag], - 1 << (allocsiz - 1)); + loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); if (loc == 0) { - loc = fs->fs_dblkno / NBBY; - len = start - loc + 1; - start = loc; - loc = scanc(len, &cgp->cg_free[start], fragtbl[fs->fs_frag], - 1 << (allocsiz - 1)); + len = start + 1; + start = 0; + loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); if (loc == 0) { + printf("start = %d, len = %d, fs = %s\n", + start, len, fs->fs_fsmnt); panic("alloccg: map corrupted"); - return (0); + /* NOTREACHED */ } } bno = (start + len - loc) * NBBY; @@ -806,98 +1060,21 @@ mapsearch(fs, cgp, bpref, allocsiz) * found the byte in the map * sift through the bits to find the selected frag */ - for (i = 0; i < NBBY; i += fs->fs_frag) { - blk = (cgp->cg_free[bno / NBBY] >> i) & - (0xff >> NBBY - fs->fs_frag); + for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { + blk = blkmap(fs, cg_blksfree(cgp), bno); blk <<= 1; field = around[allocsiz]; subfield = inside[allocsiz]; for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { - if ((blk & field) == subfield) { - return (bno + i + pos); - } + if ((blk & field) == subfield) + return (bno + pos); field <<= 1; subfield <<= 1; } } + printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt); panic("alloccg: block not in map"); - return (0); -} - -/* - * Update the frsum fields to reflect addition or deletion - * of some frags. - */ -fragacct(fs, fragmap, fraglist, cnt) - struct fs *fs; - int fragmap; - long fraglist[]; - int cnt; -{ - int inblk; - register int field, subfield; - register int siz, pos; - - inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; - fragmap <<= 1; - for (siz = 1; siz < fs->fs_frag; siz++) { - if (((1 << siz) & inblk) == 0) - continue; - field = around[siz]; - subfield = inside[siz]; - for (pos = siz; pos <= fs->fs_frag; pos++) { - if ((fragmap & field) == subfield) { - fraglist[siz] += cnt; - pos += siz; - field <<= siz; - subfield <<= siz; - } - field <<= 1; - subfield <<= 1; - } - } -} - -/* - * Check that a specified block number is in range. - */ -badblock(fs, bn) - register struct fs *fs; - daddr_t bn; -{ - - if ((unsigned)bn >= fs->fs_size || bn < cgdmin(fs, dtog(fs, bn))) { - fserr(fs, "bad block"); - return (1); - } - return (0); -} - -/* - * Getfs maps a device number into a pointer to the incore super block. - * - * The algorithm is a linear search through the mount table. A - * consistency check of the super block magic number is performed. - * - * panic: no fs -- the device is not mounted. - * this "cannot happen" - */ -struct fs * -getfs(dev) - dev_t dev; -{ - register struct mount *mp; - register struct fs *fs; - - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) - if (mp->m_bufp != NULL && mp->m_dev == dev) { - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_magic != FS_MAGIC) - panic("getfs: bad magic"); - return (fs); - } - panic("getfs: no fs"); - return (NULL); + return (-1); } /* @@ -906,182 +1083,11 @@ getfs(dev) * The form of the error message is: * fs: error message */ -fserr(fs, cp) +fserr(fs, uid, cp) struct fs *fs; + uid_t uid; char *cp; { - printf("%s: %s\n", fs->fs_fsmnt, cp); -} - -/* - * Getfsx returns the index in the file system - * table of the specified device. The swap device - * is also assigned a pseudo-index. The index may - * be used as a compressed indication of the location - * of a block, recording - * - * rather than - * - * provided the information need remain valid only - * as long as the file system is mounted. - */ -getfsx(dev) - dev_t dev; -{ - register struct mount *mp; - - if (dev == swapdev) - return (MSWAPX); - for(mp = &mount[0]; mp < &mount[NMOUNT]; mp++) - if (mp->m_dev == dev) - return (mp - &mount[0]); - return (-1); -} - -/* - * Update is the internal name of 'sync'. It goes through the disk - * queues to initiate sandbagged IO; goes through the inodes to write - * modified nodes; and it goes through the mount table to initiate - * the writing of the modified super blocks. - */ -update() -{ - register struct inode *ip; - register struct mount *mp; - register struct buf *bp; - struct fs *fs; - time_t tim; - int i, blks; - - if (updlock) - return; - updlock++; - /* - * Write back modified superblocks. - * Consistency check that the superblock - * of each file system is still in the buffer cache. - */ - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) - if (mp->m_bufp != NULL) { - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_fmod == 0) - continue; - if (fs->fs_ronly != 0) - panic("update: rofs mod"); - bp = getblk(mp->m_dev, SBLOCK, SBSIZE); - fs->fs_fmod = 0; - fs->fs_time = TIME; - if (bp->b_un.b_fs != fs) - panic("update: bad b_fs"); - bwrite(bp); - blks = howmany(fs->fs_cssize, fs->fs_bsize); - for (i = 0; i < blks; i++) { - bp = getblk(mp->m_dev, - fsbtodb(fs, fs->fs_csaddr + (i * fs->fs_frag)), - fs->fs_bsize); - bwrite(bp); - } - } - /* - * Write back each (modified) inode. - */ - for (ip = inode; ip < inodeNINODE; ip++) - if((ip->i_flag&ILOCK)==0 && ip->i_count) { - ip->i_flag |= ILOCK; - ip->i_count++; - tim = TIME; - iupdat(ip, &tim, &tim, 0); - iput(ip); - } - updlock = 0; - /* - * Force stale buffer cache information to be flushed, - * for all devices. - */ - bflush(NODEV); -} - -/* - * block operations - * - * check if a block is available - */ -isblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - int h; -{ - unsigned char mask; - - switch (fs->fs_frag) { - case 8: - return (cp[h] == 0xff); - case 4: - mask = 0x0f << ((h & 0x1) << 2); - return ((cp[h >> 1] & mask) == mask); - case 2: - mask = 0x03 << ((h & 0x3) << 1); - return ((cp[h >> 2] & mask) == mask); - case 1: - mask = 0x01 << (h & 0x7); - return ((cp[h >> 3] & mask) == mask); - default: - panic("isblock bad fs_frag"); - return; - } -} - -/* - * take a block out of the map - */ -clrblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - int h; -{ - switch ((fs)->fs_frag) { - case 8: - cp[h] = 0; - return; - case 4: - cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] &= ~(0x01 << (h & 0x7)); - return; - default: - panic("clrblock bad fs_frag"); - return; - } -} - -/* - * put a block into the map - */ -setblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - int h; -{ - switch (fs->fs_frag) { - case 8: - cp[h] = 0xff; - return; - case 4: - cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] |= (0x01 << (h & 0x7)); - return; - default: - panic("setblock bad fs_frag"); - return; - } + log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); }