X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/995c1535d930a56facf9dc57062c65005e4470f5..7188ac27426632d20adaac4191443dfefebf5789:/usr/src/sys/ufs/lfs/lfs_alloc.c diff --git a/usr/src/sys/ufs/lfs/lfs_alloc.c b/usr/src/sys/ufs/lfs/lfs_alloc.c index 14a3aab448..5d2399b34e 100644 --- a/usr/src/sys/ufs/lfs/lfs_alloc.c +++ b/usr/src/sys/ufs/lfs/lfs_alloc.c @@ -1,14 +1,34 @@ -/* lfs_alloc.c 2.8 82/07/15 */ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * @(#)lfs_alloc.c 7.9 (Berkeley) %G% + */ -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/mount.h" -#include "../h/fs.h" -#include "../h/conf.h" -#include "../h/buf.h" -#include "../h/inode.h" -#include "../h/dir.h" -#include "../h/user.h" +#include "param.h" +#include "systm.h" +#include "mount.h" +#include "buf.h" +#include "user.h" +#include "vnode.h" +#include "kernel.h" +#include "syslog.h" +#include "cmap.h" +#include "../ufs/quota.h" +#include "../ufs/inode.h" +#include "../ufs/fs.h" extern u_long hashalloc(); extern ino_t ialloccg(); @@ -39,17 +59,19 @@ extern unsigned char *fragtbl[]; * 2) quadradically rehash into other cylinder groups, until an * available block is located. */ -struct buf * -alloc(ip, bpref, size) +alloc(ip, bpref, size, bpp, flags) register struct inode *ip; daddr_t bpref; int size; + struct buf **bpp; + int flags; { daddr_t bno; register struct fs *fs; register struct buf *bp; - int cg; + int cg, error; + *bpp = 0; fs = ip->i_fs; if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) { printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", @@ -58,27 +80,36 @@ alloc(ip, bpref, size) } if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) goto nospace; - if (u.u_uid != 0 && - fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree < - fs->fs_dsize * fs->fs_minfree / 100) + if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) goto nospace; +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(size), 0)) + return (error); +#endif if (bpref >= fs->fs_size) bpref = 0; if (bpref == 0) cg = itog(fs, ip->i_number); else cg = dtog(fs, bpref); - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, alloccg); + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, + (u_long (*)())alloccg); if (bno <= 0) goto nospace; - bp = getblk(ip->i_dev, fsbtodb(fs, bno), size); - clrbuf(bp); - return (bp); + ip->i_blocks += btodb(size); + ip->i_flag |= IUPD|ICHG; +#ifdef SECSIZE + bp = getblk(ip->i_dev, fsbtodb(fs, bno), size, fs->fs_dbsize); +#else SECSIZE + bp = getblk(ip->i_devvp, fsbtodb(fs, bno), size); + if (flags & B_CLRBUF) + clrbuf(bp); + *bpp = bp; + return (0); nospace: fserr(fs, "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); - u.u_error = ENOSPC; - return (NULL); + return (ENOSPC); } /* @@ -89,17 +120,19 @@ nospace: * the original block. Failing that, the regular block allocator is * invoked to get an appropriate block. */ -struct buf * -realloccg(ip, bprev, bpref, osize, nsize) +realloccg(ip, bprev, bpref, osize, nsize, bpp) register struct inode *ip; daddr_t bprev, bpref; int osize, nsize; + struct buf **bpp; { - daddr_t bno; register struct fs *fs; - register struct buf *bp, *obp; - int cg; + struct buf *bp, *obp; + int cg, request; + daddr_t bno, bn; + int i, error, count; + *bpp = 0; fs = ip->i_fs; if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { @@ -107,44 +140,124 @@ realloccg(ip, bprev, bpref, osize, nsize) ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); panic("realloccg: bad size"); } - if (u.u_uid != 0 && - fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree < - fs->fs_dsize * fs->fs_minfree / 100) + if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) goto nospace; if (bprev == 0) { printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n", ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); panic("realloccg: bad bprev"); } +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(nsize - osize), 0)) + return (error); +#endif cg = dtog(fs, bprev); bno = fragextend(ip, cg, (long)bprev, osize, nsize); if (bno != 0) { do { - bp = bread(ip->i_dev, fsbtodb(fs, bno), osize); - if (bp->b_flags & B_ERROR) { +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, bno), osize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, bno), + osize, &bp); + if (error) { brelse(bp); - return (NULL); + return (error); } } while (brealloc(bp, nsize) == 0); bp->b_flags |= B_DONE; - bzero(bp->b_un.b_addr + osize, nsize - osize); - return (bp); + bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize); + ip->i_blocks += btodb(nsize - osize); + ip->i_flag |= IUPD|ICHG; + *bpp = bp; + return (0); } if (bpref >= fs->fs_size) bpref = 0; - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, nsize, alloccg); + switch ((int)fs->fs_optim) { + case FS_OPTSPACE: + /* + * Allocate an exact sized fragment. Although this makes + * best use of space, we will waste time relocating it if + * the file continues to grow. If the fragmentation is + * less than half of the minimum free reserve, we choose + * to begin optimizing for time. + */ + request = nsize; + if (fs->fs_minfree < 5 || + fs->fs_cstotal.cs_nffree > + fs->fs_dsize * fs->fs_minfree / (2 * 100)) + break; + log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTTIME; + break; + case FS_OPTTIME: + /* + * At this point we have discovered a file that is trying + * to grow a small fragment to a larger fragment. To save + * time, we allocate a full sized block, then free the + * unused portion. If the file continues to grow, the + * `fragextend' call above will be able to grow it in place + * without further copying. If aberrant programs cause + * disk fragmentation to grow within 2% of the free reserve, + * we choose to begin optimizing for space. + */ + request = fs->fs_bsize; + if (fs->fs_cstotal.cs_nffree < + fs->fs_dsize * (fs->fs_minfree - 2) / 100) + break; + log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTSPACE; + break; + default: + printf("dev = 0x%x, optim = %d, fs = %s\n", + ip->i_dev, fs->fs_optim, fs->fs_fsmnt); + panic("realloccg: bad optim"); + /* NOTREACHED */ + } + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request, + (u_long (*)())alloccg); if (bno > 0) { - obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize); - if (obp->b_flags & B_ERROR) { +#ifdef SECSIZE + obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, bprev), osize, &obp); + if (error) { brelse(obp); - return (NULL); + return (error); + } + bn = fsbtodb(fs, bno); +#ifdef SECSIZE + bp = getblk(ip->i_dev, bn, nsize, fs->fs_dbsize); +#else SECSIZE + bp = getblk(ip->i_devvp, bn, nsize); +#endif SECSIZE + bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize); + count = howmany(osize, CLBYTES); + for (i = 0; i < count; i++) +#ifdef SECSIZE + munhash(ip->i_dev, bn + i * CLBYTES / fs->fs_dbsize); +#else SECSIZE + munhash(ip->i_dev, bn + i * CLBYTES / DEV_BSIZE); +#endif SECSIZE + bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize); + if (obp->b_flags & B_DELWRI) { + obp->b_flags &= ~B_DELWRI; + u.u_ru.ru_oublock--; /* delete charge */ } - bp = getblk(ip->i_dev, fsbtodb(fs, bno), nsize); - bcopy(obp->b_un.b_addr, bp->b_un.b_addr, osize); - bzero(bp->b_un.b_addr + osize, nsize - osize); brelse(obp); - fre(ip, bprev, (off_t)osize); - return (bp); + blkfree(ip, bprev, (off_t)osize); + if (nsize < request) + blkfree(ip, bno + numfrags(fs, nsize), + (off_t)(request - nsize)); + ip->i_blocks += btodb(nsize - osize); + ip->i_flag |= IUPD|ICHG; + *bpp = bp; + return (0); } nospace: /* @@ -152,8 +265,7 @@ nospace: */ fserr(fs, "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); - u.u_error = ENOSPC; - return (NULL); + return (ENOSPC); } /* @@ -171,42 +283,52 @@ nospace: * 2) quadradically rehash into other cylinder groups, until an * available inode is located. */ -struct inode * -ialloc(pip, ipref, mode) +ialloc(pip, ipref, mode, ipp) register struct inode *pip; ino_t ipref; int mode; + struct inode **ipp; { ino_t ino; register struct fs *fs; register struct inode *ip; - int cg; + int cg, error; + *ipp = 0; fs = pip->i_fs; if (fs->fs_cstotal.cs_nifree == 0) goto noinodes; +#ifdef QUOTA + if (error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0)) + return (error); +#endif if (ipref >= fs->fs_ncg * fs->fs_ipg) ipref = 0; cg = itog(fs, ipref); ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg); if (ino == 0) goto noinodes; - ip = iget(pip->i_dev, pip->i_fs, ino); - if (ip == NULL) { - ifree(ip, ino, 0); - return (NULL); + error = iget(pip, ino, ipp); + ip = *ipp; + if (error) { + ifree(pip, ino, 0); + return (error); } if (ip->i_mode) { printf("mode = 0%o, inum = %d, fs = %s\n", ip->i_mode, ip->i_number, fs->fs_fsmnt); panic("ialloc: dup alloc"); } - return (ip); + if (ip->i_blocks) { /* XXX */ + printf("free inode %s/%d had %d blocks\n", + fs->fs_fsmnt, ino, ip->i_blocks); + ip->i_blocks = 0; + } + return (0); noinodes: fserr(fs, "out of inodes"); uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); - u.u_error = ENOSPC; - return (NULL); + return (ENOSPC); } /* @@ -216,6 +338,7 @@ noinodes: * among those cylinder groups with above the average number of * free inodes, the one with the smallest number of directories. */ +ino_t dirpref(fs) register struct fs *fs; { @@ -230,35 +353,96 @@ dirpref(fs) mincg = cg; minndir = fs->fs_cs(fs, cg).cs_ndir; } - return (fs->fs_ipg * mincg); + return ((ino_t)(fs->fs_ipg * mincg)); } /* - * Select a cylinder to place a large block of data. - * - * The policy implemented by this algorithm is to maintain a - * rotor that sweeps the cylinder groups. When a block is - * needed, the rotor is advanced until a cylinder group with - * greater than the average number of free blocks is found. + * Select the desired position for the next block in a file. The file is + * logically divided into sections. The first section is composed of the + * direct blocks. Each additional section contains fs_maxbpg blocks. + * + * If no blocks have been allocated in the first section, the policy is to + * request a block in the same cylinder group as the inode that describes + * the file. If no blocks have been allocated in any other section, the + * policy is to place the section in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found + * by using a rotor that sweeps the cylinder groups. When a new group of + * blocks is needed, the sweep begins in the cylinder group following the + * cylinder group from which the previous allocation was made. The sweep + * continues until a cylinder group with greater than the average number + * of free blocks is found. If the allocation is for the first block in an + * indirect block, the information on the previous allocation is unavailable; + * here a best guess is made based upon the logical block number being + * allocated. + * + * If a section is already partially allocated, the policy is to + * contiguously allocate fs_maxcontig blocks. The end of one of these + * contiguous blocks and the beginning of the next is physically separated + * so that the disk head will be in transit between them for at least + * fs_rotdelay milliseconds. This is to allow time for the processor to + * schedule another I/O transfer. */ daddr_t -blkpref(fs) - register struct fs *fs; +blkpref(ip, lbn, indx, bap) + struct inode *ip; + daddr_t lbn; + int indx; + daddr_t *bap; { - int cg, avgbfree; + register struct fs *fs; + register int cg; + int avgbfree, startcg; + daddr_t nextblk; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - for (cg = fs->fs_cgrotor + 1; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - for (cg = 0; cg <= fs->fs_cgrotor; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; + fs = ip->i_fs; + if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { + if (lbn < NDADDR) { + cg = itog(fs, ip->i_number); return (fs->fs_fpg * cg + fs->fs_frag); } - return (NULL); + /* + * Find a cylinder with greater than average number of + * unused data blocks. + */ + if (indx == 0 || bap[indx - 1] == 0) + startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg; + else + startcg = dtog(fs, bap[indx - 1]) + 1; + startcg %= fs->fs_ncg; + avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; + for (cg = startcg; cg < fs->fs_ncg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + for (cg = 0; cg <= startcg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + return (NULL); + } + /* + * One or more previous blocks have been laid out. If less + * than fs_maxcontig previous blocks are contiguous, the + * next block is requested contiguously, otherwise it is + * requested rotationally delayed by fs_rotdelay milliseconds. + */ + nextblk = bap[indx - 1] + fs->fs_frag; + if (indx > fs->fs_maxcontig && + bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig) + != nextblk) + return (nextblk); + if (fs->fs_rotdelay != 0) + /* + * Here we convert ms of delay to frags as: + * (frags) = (ms) * (rev/sec) * (sect/rev) / + * ((sect/frag) * (ms/sec)) + * then round up to the next block. + */ + nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / + (NSPF(fs) * 1000), fs->fs_frag); + return (nextblk); } /* @@ -302,9 +486,11 @@ hashalloc(ip, cg, pref, size, allocator) } /* * 3: brute force search + * Note that we start at i == 2, since 0 was checked initially, + * and 1 is always checked in the quadratic rehash. */ - cg = icg; - for (i = 0; i < fs->fs_ncg; i++) { + cg = (icg + 2) % fs->fs_ncg; + for (i = 2; i < fs->fs_ncg; i++) { result = (*allocator)(ip, cg, 0, size); if (result) return (result); @@ -329,31 +515,41 @@ fragextend(ip, cg, bprev, osize, nsize) int osize, nsize; { register struct fs *fs; - register struct buf *bp; register struct cg *cgp; + struct buf *bp; long bno; int frags, bbase; - int i; + int i, error; fs = ip->i_fs; - if (fs->fs_cs(fs, cg).cs_nffree < nsize - osize) + if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) return (NULL); frags = numfrags(fs, nsize); - bbase = fragoff(fs, bprev); - if (bbase > (bprev + frags - 1) % fs->fs_frag) { - /* cannot extend across a block boundry */ + bbase = fragnum(fs, bprev); + if (bbase > fragnum(fs, (bprev + frags - 1))) { + /* cannot extend across a block boundary */ + return (NULL); + } +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, &bp); + if (error) { + brelse(bp); return (NULL); } - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); +#endif SECSIZE cgp = bp->b_un.b_cg; - if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) { + if (!cg_chkmagic(cgp)) { brelse(bp); return (NULL); } - cgp->cg_time = time; + cgp->cg_time = time.tv_sec; bno = dtogd(fs, bprev); for (i = numfrags(fs, osize); i < frags; i++) - if (isclr(cgp->cg_free, bno + i)) { + if (isclr(cg_blksfree(cgp), bno + i)) { brelse(bp); return (NULL); } @@ -364,13 +560,13 @@ fragextend(ip, cg, bprev, osize, nsize) * allocate the extended piece */ for (i = frags; i < fs->fs_frag - bbase; i++) - if (isclr(cgp->cg_free, bno + i)) + if (isclr(cg_blksfree(cgp), bno + i)) break; cgp->cg_frsum[i - numfrags(fs, osize)]--; if (i != frags) cgp->cg_frsum[i - frags]++; for (i = numfrags(fs, osize); i < frags; i++) { - clrbit(cgp->cg_free, bno + i); + clrbit(cg_blksfree(cgp), bno + i); cgp->cg_cs.cs_nffree--; fs->fs_cstotal.cs_nffree--; fs->fs_cs(fs, cg).cs_nffree--; @@ -394,22 +590,32 @@ alloccg(ip, cg, bpref, size) int size; { register struct fs *fs; - register struct buf *bp; register struct cg *cgp; - int bno, frags; - int allocsiz; + struct buf *bp; register int i; + int error, bno, frags, allocsiz; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) return (NULL); - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, &bp); + if (error) { + brelse(bp); + return (NULL); + } +#endif SECSIZE cgp = bp->b_un.b_cg; - if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) { + if (!cg_chkmagic(cgp) || + (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { brelse(bp); return (NULL); } - cgp->cg_time = time; + cgp->cg_time = time.tv_sec; if (size == fs->fs_bsize) { bno = alloccgblk(fs, cgp, bpref); bdwrite(bp); @@ -436,23 +642,27 @@ alloccg(ip, cg, bpref, size) bno = alloccgblk(fs, cgp, bpref); bpref = dtogd(fs, bno); for (i = frags; i < fs->fs_frag; i++) - setbit(cgp->cg_free, bpref + i); + setbit(cg_blksfree(cgp), bpref + i); i = fs->fs_frag - frags; cgp->cg_cs.cs_nffree += i; fs->fs_cstotal.cs_nffree += i; fs->fs_cs(fs, cg).cs_nffree += i; + fs->fs_fmod++; cgp->cg_frsum[i]++; bdwrite(bp); return (bno); } bno = mapsearch(fs, cgp, bpref, allocsiz); - if (bno < 0) + if (bno < 0) { + brelse(bp); return (NULL); + } for (i = 0; i < frags; i++) - clrbit(cgp->cg_free, bno + i); + clrbit(cg_blksfree(cgp), bno + i); cgp->cg_cs.cs_nffree -= frags; fs->fs_cstotal.cs_nffree -= frags; fs->fs_cs(fs, cg).cs_nffree -= frags; + fs->fs_fmod++; cgp->cg_frsum[allocsiz]--; if (frags != allocsiz) cgp->cg_frsum[allocsiz - frags]++; @@ -486,24 +696,20 @@ alloccgblk(fs, cgp, bpref) bpref = cgp->cg_rotor; goto norot; } - bpref &= ~(fs->fs_frag - 1); + bpref = blknum(fs, bpref); bpref = dtogd(fs, bpref); /* * if the requested block is available, use it */ -/* - * disallow sequential layout. - * - if (isblock(fs, cgp->cg_free, bpref/fs->fs_frag)) { + if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { bno = bpref; goto gotit; } - */ /* * check for a block available on the same cylinder */ cylno = cbtocylno(fs, bpref); - if (cgp->cg_btot[cylno] == 0) + if (cg_blktot(cgp)[cylno] == 0) goto norot; if (fs->fs_cpc == 0) { /* @@ -513,33 +719,17 @@ alloccgblk(fs, cgp, bpref) bpref = howmany(fs->fs_spc * cylno, NSPF(fs)); goto norot; } - /* - * find a block that is rotationally optimal - */ - cylbp = cgp->cg_b[cylno]; - if (fs->fs_rotdelay == 0) { - pos = cbtorpos(fs, bpref); - } else { - /* - * here we convert ms of delay to frags as: - * (frags) = (ms) * (rev/sec) * (sect/rev) / - * ((sect/frag) * (ms/sec)) - * then round up to the next rotational position - */ - bpref += fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / - (NSPF(fs) * 1000); - pos = cbtorpos(fs, bpref); - pos = (pos + 1) % NRPOS; - } /* * check the summary information to see if a block is * available in the requested cylinder starting at the - * optimal rotational position and proceeding around. + * requested rotational position and proceeding around. */ - for (i = pos; i < NRPOS; i++) + cylbp = cg_blks(fs, cgp, cylno); + pos = cbtorpos(fs, bpref); + for (i = pos; i < fs->fs_nrpos; i++) if (cylbp[i] > 0) break; - if (i == NRPOS) + if (i == fs->fs_nrpos) for (i = 0; i < pos; i++) if (cylbp[i] > 0) break; @@ -550,18 +740,19 @@ alloccgblk(fs, cgp, bpref) */ pos = cylno % fs->fs_cpc; bno = (cylno - pos) * fs->fs_spc / NSPB(fs); - if (fs->fs_postbl[pos][i] == -1) { + if (fs_postbl(fs, pos)[i] == -1) { printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); panic("alloccgblk: cyl groups corrupted"); } - for (i = fs->fs_postbl[pos][i];; ) { - if (isblock(fs, cgp->cg_free, bno + i)) { - bno = (bno + i) * fs->fs_frag; + for (i = fs_postbl(fs, pos)[i];; ) { + if (isblock(fs, cg_blksfree(cgp), bno + i)) { + bno = blkstofrags(fs, (bno + i)); goto gotit; } - delta = fs->fs_rotbl[i]; - if (delta <= 0 || delta > MAXBPC - i) + delta = fs_rotbl(fs)[i]; + if (delta <= 0 || + delta + i > fragstoblks(fs, fs->fs_fpg)) break; i += delta; } @@ -573,22 +764,22 @@ norot: * no blocks in the requested cylinder, so take next * available one in this cylinder group. */ - bno = mapsearch(fs, cgp, bpref, fs->fs_frag); + bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag); if (bno < 0) return (NULL); cgp->cg_rotor = bno; gotit: - clrblock(fs, cgp->cg_free, bno/fs->fs_frag); + clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno)); cgp->cg_cs.cs_nbfree--; fs->fs_cstotal.cs_nbfree--; fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; cylno = cbtocylno(fs, bno); - cgp->cg_b[cylno][cbtorpos(fs, bno)]--; - cgp->cg_btot[cylno]--; + cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; + cg_blktot(cgp)[cylno]--; fs->fs_fmod++; return (cgp->cg_cgx * fs->fs_fpg + bno); } - + /* * Determine whether an inode can be allocated. * @@ -606,39 +797,63 @@ ialloccg(ip, cg, ipref, mode) int mode; { register struct fs *fs; - register struct buf *bp; register struct cg *cgp; - int i; + struct buf *bp; + int error, start, len, loc, map, i; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nifree == 0) return (NULL); - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, &bp); + if (error) { + brelse(bp); + return (NULL); + } +#endif SECSIZE cgp = bp->b_un.b_cg; - if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) { + if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { brelse(bp); return (NULL); } - cgp->cg_time = time; + cgp->cg_time = time.tv_sec; if (ipref) { ipref %= fs->fs_ipg; - if (isclr(cgp->cg_iused, ipref)) + if (isclr(cg_inosused(cgp), ipref)) goto gotit; - } else - ipref = cgp->cg_irotor; - for (i = 0; i < fs->fs_ipg; i++) { - ipref++; - if (ipref >= fs->fs_ipg) - ipref = 0; - if (isclr(cgp->cg_iused, ipref)) { + } + start = cgp->cg_irotor / NBBY; + len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); + loc = skpc(0xff, len, &cg_inosused(cgp)[start]); + if (loc == 0) { + len = start + 1; + start = 0; + loc = skpc(0xff, len, &cg_inosused(cgp)[0]); + if (loc == 0) { + printf("cg = %s, irotor = %d, fs = %s\n", + cg, cgp->cg_irotor, fs->fs_fsmnt); + panic("ialloccg: map corrupted"); + /* NOTREACHED */ + } + } + i = start + len - loc; + map = cg_inosused(cgp)[i]; + ipref = i * NBBY; + for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { + if ((map & i) == 0) { cgp->cg_irotor = ipref; goto gotit; } } - brelse(bp); - return (NULL); + printf("fs = %s\n", fs->fs_fsmnt); + panic("ialloccg: block not in map"); + /* NOTREACHED */ gotit: - setbit(cgp->cg_iused, ipref); + setbit(cg_inosused(cgp), ipref); cgp->cg_cs.cs_nifree--; fs->fs_cstotal.cs_nifree--; fs->fs_cs(fs, cg).cs_nifree--; @@ -659,67 +874,77 @@ gotit: * free map. If a fragment is deallocated, a possible * block reassembly is checked. */ -fre(ip, bno, size) +blkfree(ip, bno, size) register struct inode *ip; daddr_t bno; off_t size; { register struct fs *fs; register struct cg *cgp; - register struct buf *bp; - int cg, blk, frags, bbase; + struct buf *bp; + int error, cg, blk, frags, bbase; register int i; fs = ip->i_fs; if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) { printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); - panic("free: bad size"); + panic("blkfree: bad size"); } cg = dtog(fs, bno); if (badblock(fs, bno)) { printf("bad block %d, ino %d\n", bno, ip->i_number); return; } - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, &bp); + if (error) { + brelse(bp); + return; + } +#endif SECSIZE cgp = bp->b_un.b_cg; - if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) { + if (!cg_chkmagic(cgp)) { brelse(bp); return; } - cgp->cg_time = time; + cgp->cg_time = time.tv_sec; bno = dtogd(fs, bno); if (size == fs->fs_bsize) { - if (isblock(fs, cgp->cg_free, bno/fs->fs_frag)) { + if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) { printf("dev = 0x%x, block = %d, fs = %s\n", ip->i_dev, bno, fs->fs_fsmnt); - panic("free: freeing free block"); + panic("blkfree: freeing free block"); } - setblock(fs, cgp->cg_free, bno/fs->fs_frag); + setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; i = cbtocylno(fs, bno); - cgp->cg_b[i][cbtorpos(fs, bno)]++; - cgp->cg_btot[i]++; + cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; + cg_blktot(cgp)[i]++; } else { - bbase = bno - (bno % fs->fs_frag); + bbase = bno - fragnum(fs, bno); /* * decrement the counts associated with the old frags */ - blk = blkmap(fs, cgp->cg_free, bbase); + blk = blkmap(fs, cg_blksfree(cgp), bbase); fragacct(fs, blk, cgp->cg_frsum, -1); /* * deallocate the fragment */ frags = numfrags(fs, size); for (i = 0; i < frags; i++) { - if (isset(cgp->cg_free, bno + i)) { + if (isset(cg_blksfree(cgp), bno + i)) { printf("dev = 0x%x, block = %d, fs = %s\n", ip->i_dev, bno + i, fs->fs_fsmnt); - panic("free: freeing free frag"); + panic("blkfree: freeing free frag"); } - setbit(cgp->cg_free, bno + i); + setbit(cg_blksfree(cgp), bno + i); } cgp->cg_cs.cs_nffree += i; fs->fs_cstotal.cs_nffree += i; @@ -727,12 +952,13 @@ fre(ip, bno, size) /* * add back in counts associated with the new frags */ - blk = blkmap(fs, cgp->cg_free, bbase); + blk = blkmap(fs, cg_blksfree(cgp), bbase); fragacct(fs, blk, cgp->cg_frsum, 1); /* * if a complete block has been reassembled, account for it */ - if (isblock(fs, cgp->cg_free, bbase / fs->fs_frag)) { + if (isblock(fs, cg_blksfree(cgp), + (daddr_t)fragstoblks(fs, bbase))) { cgp->cg_cs.cs_nffree -= fs->fs_frag; fs->fs_cstotal.cs_nffree -= fs->fs_frag; fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; @@ -740,8 +966,8 @@ fre(ip, bno, size) fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; i = cbtocylno(fs, bbase); - cgp->cg_b[i][cbtorpos(fs, bbase)]++; - cgp->cg_btot[i]++; + cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; + cg_blktot(cgp)[i]++; } } fs->fs_fmod++; @@ -760,8 +986,8 @@ ifree(ip, ino, mode) { register struct fs *fs; register struct cg *cgp; - register struct buf *bp; - int cg; + struct buf *bp; + int error, cg; fs = ip->i_fs; if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) { @@ -770,20 +996,32 @@ ifree(ip, ino, mode) panic("ifree: range"); } cg = itog(fs, ino); - bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), fs->fs_bsize); +#ifdef SECSIZE + bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + fs->fs_dbsize); +#else SECSIZE + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, &bp); + if (error) { + brelse(bp); + return; + } +#endif SECSIZE cgp = bp->b_un.b_cg; - if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) { + if (!cg_chkmagic(cgp)) { brelse(bp); return; } - cgp->cg_time = time; + cgp->cg_time = time.tv_sec; ino %= fs->fs_ipg; - if (isclr(cgp->cg_iused, ino)) { + if (isclr(cg_inosused(cgp), ino)) { printf("dev = 0x%x, ino = %d, fs = %s\n", ip->i_dev, ino, fs->fs_fsmnt); panic("ifree: freeing free inode"); } - clrbit(cgp->cg_iused, ino); + clrbit(cg_inosused(cgp), ino); + if (ino < cgp->cg_irotor) + cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; fs->fs_cstotal.cs_nifree++; fs->fs_cs(fs, cg).cs_nifree++; @@ -822,18 +1060,20 @@ mapsearch(fs, cgp, bpref, allocsiz) else start = cgp->cg_frotor / NBBY; len = howmany(fs->fs_fpg, NBBY) - start; - loc = scanc(len, &cgp->cg_free[start], fragtbl[fs->fs_frag], - 1 << (allocsiz - 1 + (fs->fs_frag % NBBY))); + loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); if (loc == 0) { len = start + 1; start = 0; - loc = scanc(len, &cgp->cg_free[start], fragtbl[fs->fs_frag], - 1 << (allocsiz - 1 + (fs->fs_frag % NBBY))); + loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); if (loc == 0) { printf("start = %d, len = %d, fs = %s\n", start, len, fs->fs_fsmnt); panic("alloccg: map corrupted"); - return (-1); + /* NOTREACHED */ } } bno = (start + len - loc) * NBBY; @@ -843,7 +1083,7 @@ mapsearch(fs, cgp, bpref, allocsiz) * sift through the bits to find the selected frag */ for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { - blk = blkmap(fs, cgp->cg_free, bno); + blk = blkmap(fs, cg_blksfree(cgp), bno); blk <<= 1; field = around[allocsiz]; subfield = inside[allocsiz]; @@ -859,87 +1099,6 @@ mapsearch(fs, cgp, bpref, allocsiz) return (-1); } -/* - * Update the frsum fields to reflect addition or deletion - * of some frags. - */ -fragacct(fs, fragmap, fraglist, cnt) - struct fs *fs; - int fragmap; - long fraglist[]; - int cnt; -{ - int inblk; - register int field, subfield; - register int siz, pos; - - inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; - fragmap <<= 1; - for (siz = 1; siz < fs->fs_frag; siz++) { - if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) - continue; - field = around[siz]; - subfield = inside[siz]; - for (pos = siz; pos <= fs->fs_frag; pos++) { - if ((fragmap & field) == subfield) { - fraglist[siz] += cnt; - pos += siz; - field <<= siz; - subfield <<= siz; - } - field <<= 1; - subfield <<= 1; - } - } -} - -/* - * Check that a specified block number is in range. - */ -badblock(fs, bn) - register struct fs *fs; - daddr_t bn; -{ - - if ((unsigned)bn >= fs->fs_size) { - printf("bad block %d, ", bn); - fserr(fs, "bad block"); - return (1); - } - return (0); -} - -/* - * Getfs maps a device number into a pointer to the incore super block. - * - * The algorithm is a linear search through the mount table. A - * consistency check of the super block magic number is performed. - * - * panic: no fs -- the device is not mounted. - * this "cannot happen" - */ -struct fs * -getfs(dev) - dev_t dev; -{ - register struct mount *mp; - register struct fs *fs; - - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { - if (mp->m_bufp == NULL || mp->m_dev != dev) - continue; - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_magic != FS_MAGIC) { - printf("dev = 0x%x, fs = %s\n", dev, fs->fs_fsmnt); - panic("getfs: bad magic"); - } - return (fs); - } - printf("dev = 0x%x\n", dev); - panic("getfs: no fs"); - return (NULL); -} - /* * Fserr prints the name of a file system with an error diagnostic. * @@ -951,170 +1110,5 @@ fserr(fs, cp) char *cp; { - printf("%s: %s\n", fs->fs_fsmnt, cp); -} - -/* - * Getfsx returns the index in the file system - * table of the specified device. The swap device - * is also assigned a pseudo-index. The index may - * be used as a compressed indication of the location - * of a block, recording - * - * rather than - * - * provided the information need remain valid only - * as long as the file system is mounted. - */ -getfsx(dev) - dev_t dev; -{ - register struct mount *mp; - - if (dev == swapdev) - return (MSWAPX); - for(mp = &mount[0]; mp < &mount[NMOUNT]; mp++) - if (mp->m_dev == dev) - return (mp - &mount[0]); - return (-1); -} - -/* - * Update is the internal name of 'sync'. It goes through the disk - * queues to initiate sandbagged IO; goes through the inodes to write - * modified nodes; and it goes through the mount table to initiate - * the writing of the modified super blocks. - */ -update(flag) - int flag; -{ - register struct inode *ip; - register struct mount *mp; - register struct buf *bp; - struct fs *fs; - int i, blks; - - if (updlock) - return; - updlock++; - /* - * Write back modified superblocks. - * Consistency check that the superblock - * of each file system is still in the buffer cache. - */ - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { - if (mp->m_bufp == NULL) - continue; - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_fmod == 0) - continue; - if (fs->fs_ronly != 0) { /* ### */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("update: rofs mod"); - } - fs->fs_fmod = 0; - fs->fs_time = time; - sbupdate(mp); - } - /* - * Write back each (modified) inode. - */ - for (ip = inode; ip < inodeNINODE; ip++) { - if ((ip->i_flag & ILOCK) != 0 || ip->i_count == 0) - continue; - ip->i_flag |= ILOCK; - ip->i_count++; - iupdat(ip, &time, &time, 0); - iput(ip); - } - updlock = 0; - /* - * Force stale buffer cache information to be flushed, - * for all devices. - */ - bflush(NODEV); -} - -/* - * block operations - * - * check if a block is available - */ -isblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - int h; -{ - unsigned char mask; - - switch (fs->fs_frag) { - case 8: - return (cp[h] == 0xff); - case 4: - mask = 0x0f << ((h & 0x1) << 2); - return ((cp[h >> 1] & mask) == mask); - case 2: - mask = 0x03 << ((h & 0x3) << 1); - return ((cp[h >> 2] & mask) == mask); - case 1: - mask = 0x01 << (h & 0x7); - return ((cp[h >> 3] & mask) == mask); - default: - panic("isblock"); - return (NULL); - } -} - -/* - * take a block out of the map - */ -clrblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - int h; -{ - switch ((fs)->fs_frag) { - case 8: - cp[h] = 0; - return; - case 4: - cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] &= ~(0x01 << (h & 0x7)); - return; - default: - panic("clrblock"); - return; - } -} - -/* - * put a block into the map - */ -setblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - int h; -{ - switch (fs->fs_frag) { - case 8: - cp[h] = 0xff; - return; - case 4: - cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] |= (0x01 << (h & 0x7)); - return; - default: - panic("setblock"); - return; - } + log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp); }