UFS/FFS split for LFS version 1
[unix-history] / usr / src / sys / ufs / ffs / ffs_alloc.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
7 * @(#)ffs_alloc.c 7.28 (Berkeley) %G%
8 */
9
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/buf.h>
13#include <sys/proc.h>
14#include <sys/vnode.h>
15#include <sys/kernel.h>
16#include <sys/syslog.h>
17
18#include <ufs/ufs/quota.h>
19#include <ufs/ufs/inode.h>
20
21#include <ufs/ffs/fs.h>
22#include <ufs/ffs/ffs_extern.h>
23
24extern u_long nextgennumber;
25
26static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int));
27static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t));
28static ino_t ffs_dirpref __P((struct fs *));
29static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
30static void ffs_fserr __P((struct fs *, u_int, char *));
31static u_long ffs_hashalloc
32 __P((struct inode *, int, long, int, u_long (*)()));
33static ino_t ffs_ialloccg __P((struct inode *, int, daddr_t, int));
34static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int));
35
36/*
37 * Allocate a block in the file system.
38 *
39 * The size of the requested block is given, which must be some
40 * multiple of fs_fsize and <= fs_bsize.
41 * A preference may be optionally specified. If a preference is given
42 * the following hierarchy is used to allocate a block:
43 * 1) allocate the requested block.
44 * 2) allocate a rotationally optimal block in the same cylinder.
45 * 3) allocate a block in the same cylinder group.
46 * 4) quadradically rehash into other cylinder groups, until an
47 * available block is located.
48 * If no block preference is given the following heirarchy is used
49 * to allocate a block:
50 * 1) allocate a block in the cylinder group that contains the
51 * inode for the file.
52 * 2) quadradically rehash into other cylinder groups, until an
53 * available block is located.
54 */
55ffs_alloc(ip, lbn, bpref, size, bnp)
56 register struct inode *ip;
57 daddr_t lbn, bpref;
58 int size;
59 daddr_t *bnp;
60{
61 daddr_t bno;
62 register struct fs *fs;
63 register struct buf *bp;
64 int cg, error;
65 struct ucred *cred = curproc->p_ucred; /* XXX */
66
67 *bnp = 0;
68 fs = ip->i_fs;
69 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
70 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
71 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
72 panic("ffs_alloc: bad size");
73 }
74 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
75 goto nospace;
76 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
77 goto nospace;
78#ifdef QUOTA
79 if (error = chkdq(ip, (long)btodb(size), cred, 0))
80 return (error);
81#endif
82 if (bpref >= fs->fs_size)
83 bpref = 0;
84 if (bpref == 0)
85 cg = itog(fs, ip->i_number);
86 else
87 cg = dtog(fs, bpref);
88 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
89 (u_long (*)())ffs_alloccg);
90 if (bno > 0) {
91 ip->i_blocks += btodb(size);
92 ip->i_flag |= IUPD|ICHG;
93 *bnp = bno;
94 return (0);
95 }
96#ifdef QUOTA
97 /*
98 * Restore user's disk quota because allocation failed.
99 */
100 (void) chkdq(ip, (long)-btodb(size), cred, FORCE);
101#endif
102nospace:
103 ffs_fserr(fs, cred->cr_uid, "file system full");
104 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
105 return (ENOSPC);
106}
107
108/*
109 * Reallocate a fragment to a bigger size
110 *
111 * The number and size of the old block is given, and a preference
112 * and new size is also specified. The allocator attempts to extend
113 * the original block. Failing that, the regular block allocator is
114 * invoked to get an appropriate block.
115 */
116ffs_realloccg(ip, lbprev, bpref, osize, nsize, bpp)
117 register struct inode *ip;
118 off_t lbprev;
119 daddr_t bpref;
120 int osize, nsize;
121 struct buf **bpp;
122{
123 register struct fs *fs;
124 struct buf *bp, *obp;
125 int cg, request, error;
126 daddr_t bprev, bno;
127 struct ucred *cred = curproc->p_ucred; /* XXX */
128
129 *bpp = 0;
130 fs = ip->i_fs;
131 if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
132 (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
133 printf(
134 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
135 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
136 panic("ffs_realloccg: bad size");
137 }
138 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
139 goto nospace;
140 if ((bprev = ip->i_db[lbprev]) == 0) {
141 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
142 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
143 panic("ffs_realloccg: bad bprev");
144 }
145 /*
146 * Allocate the extra space in the buffer.
147 */
148 if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
149 brelse(bp);
150 return (error);
151 }
152#ifdef QUOTA
153 if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) {
154 brelse(bp);
155 return (error);
156 }
157#endif
158 /*
159 * Check for extension in the existing location.
160 */
161 cg = dtog(fs, bprev);
162 if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
163 if (bp->b_blkno != fsbtodb(fs, bno))
164 panic("bad blockno");
165 ip->i_blocks += btodb(nsize - osize);
166 ip->i_flag |= IUPD|ICHG;
167 allocbuf(bp, nsize);
168 bp->b_flags |= B_DONE;
169 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
170 *bpp = bp;
171 return (0);
172 }
173 /*
174 * Allocate a new disk location.
175 */
176 if (bpref >= fs->fs_size)
177 bpref = 0;
178 switch ((int)fs->fs_optim) {
179 case FS_OPTSPACE:
180 /*
181 * Allocate an exact sized fragment. Although this makes
182 * best use of space, we will waste time relocating it if
183 * the file continues to grow. If the fragmentation is
184 * less than half of the minimum free reserve, we choose
185 * to begin optimizing for time.
186 */
187 request = nsize;
188 if (fs->fs_minfree < 5 ||
189 fs->fs_cstotal.cs_nffree >
190 fs->fs_dsize * fs->fs_minfree / (2 * 100))
191 break;
192 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
193 fs->fs_fsmnt);
194 fs->fs_optim = FS_OPTTIME;
195 break;
196 case FS_OPTTIME:
197 /*
198 * At this point we have discovered a file that is trying to
199 * grow a small fragment to a larger fragment. To save time,
200 * we allocate a full sized block, then free the unused portion.
201 * If the file continues to grow, the `ffs_fragextend' call
202 * above will be able to grow it in place without further
203 * copying. If aberrant programs cause disk fragmentation to
204 * grow within 2% of the free reserve, we choose to begin
205 * optimizing for space.
206 */
207 request = fs->fs_bsize;
208 if (fs->fs_cstotal.cs_nffree <
209 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
210 break;
211 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
212 fs->fs_fsmnt);
213 fs->fs_optim = FS_OPTSPACE;
214 break;
215 default:
216 printf("dev = 0x%x, optim = %d, fs = %s\n",
217 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
218 panic("ffs_realloccg: bad optim");
219 /* NOTREACHED */
220 }
221 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
222 (u_long (*)())ffs_alloccg);
223 if (bno > 0) {
224#ifdef SECSIZE
225 obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize,
226 fs->fs_dbsize);
227#else SECSIZE
228 count = howmany(osize, CLBYTES);
229 for (i = 0; i < count; i++)
230#ifdef SECSIZE
231 munhash(ip->i_dev, bn + i * CLBYTES / fs->fs_dbsize);
232#else SECSIZE
233 munhash(ip->i_dev, bn + i * CLBYTES / DEV_BSIZE);
234#endif SECSIZE
235 ffs_blkfree(ip, bprev, (off_t)osize);
236 if (nsize < request)
237 ffs_blkfree(ip, bno + numfrags(fs, nsize),
238 (off_t)(request - nsize));
239 ip->i_blocks += btodb(nsize - osize);
240 ip->i_flag |= IUPD|ICHG;
241 allocbuf(bp, nsize);
242 bp->b_flags |= B_DONE;
243 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
244 *bpp = bp;
245 return (0);
246 }
247#ifdef QUOTA
248 /*
249 * Restore user's disk quota because allocation failed.
250 */
251 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
252#endif
253 brelse(bp);
254nospace:
255 /*
256 * no space available
257 */
258 ffs_fserr(fs, cred->cr_uid, "file system full");
259 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
260 return (ENOSPC);
261}
262
263/*
264 * Allocate an inode in the file system.
265 *
266 * If allocating a directory, use ffs_dirpref to select the inode.
267 * If allocating in a directory, the following hierarchy is followed:
268 * 1) allocate the preferred inode.
269 * 2) allocate an inode in the same cylinder group.
270 * 3) quadradically rehash into other cylinder groups, until an
271 * available inode is located.
272 * If no inode preference is given the following heirarchy is used
273 * to allocate an inode:
274 * 1) allocate an inode in cylinder group 0.
275 * 2) quadradically rehash into other cylinder groups, until an
276 * available inode is located.
277 */
278ffs_ialloc(pip, mode, cred, ipp)
279 register struct inode *pip;
280 int mode;
281 struct ucred *cred;
282 struct inode **ipp;
283{
284 register struct fs *fs;
285 register struct inode *ip;
286 ino_t ino, ipref;
287 int cg, error;
288
289 *ipp = NULL;
290 fs = pip->i_fs;
291 if (fs->fs_cstotal.cs_nifree == 0)
292 goto noinodes;
293
294 if ((mode & IFMT) == IFDIR)
295 ipref = ffs_dirpref(pip->i_fs);
296 else
297 ipref = pip->i_number;
298 if (ipref >= fs->fs_ncg * fs->fs_ipg)
299 ipref = 0;
300 cg = itog(fs, ipref);
301 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_ialloccg);
302 if (ino == 0)
303 goto noinodes;
304 error = ffs_iget(pip, ino, ipp);
305 if (error) {
306 ffs_ifree(pip, ino, mode); /* XXX already freed? */
307 return (error);
308 }
309 ip = *ipp;
310 if (ip->i_mode) {
311 printf("mode = 0%o, inum = %d, fs = %s\n",
312 ip->i_mode, ip->i_number, fs->fs_fsmnt);
313 panic("ffs_ialloc: dup alloc");
314 }
315 if (ip->i_blocks) { /* XXX */
316 printf("free inode %s/%d had %d blocks\n",
317 fs->fs_fsmnt, ino, ip->i_blocks);
318 ip->i_blocks = 0;
319 }
320 ip->i_flags = 0;
321 /*
322 * Set up a new generation number for this inode.
323 */
324 if (++nextgennumber < (u_long)time.tv_sec)
325 nextgennumber = time.tv_sec;
326 ip->i_gen = nextgennumber;
327 return (0);
328noinodes:
329 ffs_fserr(fs, cred->cr_uid, "out of inodes");
330 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
331 return (ENOSPC);
332}
333
334/*
335 * Find a cylinder to place a directory.
336 *
337 * The policy implemented by this algorithm is to select from
338 * among those cylinder groups with above the average number of
339 * free inodes, the one with the smallest number of directories.
340 */
341static ino_t
342ffs_dirpref(fs)
343 register struct fs *fs;
344{
345 int cg, minndir, mincg, avgifree;
346
347 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
348 minndir = fs->fs_ipg;
349 mincg = 0;
350 for (cg = 0; cg < fs->fs_ncg; cg++)
351 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
352 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
353 mincg = cg;
354 minndir = fs->fs_cs(fs, cg).cs_ndir;
355 }
356 return ((ino_t)(fs->fs_ipg * mincg));
357}
358
359/*
360 * Select the desired position for the next block in a file. The file is
361 * logically divided into sections. The first section is composed of the
362 * direct blocks. Each additional section contains fs_maxbpg blocks.
363 *
364 * If no blocks have been allocated in the first section, the policy is to
365 * request a block in the same cylinder group as the inode that describes
366 * the file. If no blocks have been allocated in any other section, the
367 * policy is to place the section in a cylinder group with a greater than
368 * average number of free blocks. An appropriate cylinder group is found
369 * by using a rotor that sweeps the cylinder groups. When a new group of
370 * blocks is needed, the sweep begins in the cylinder group following the
371 * cylinder group from which the previous allocation was made. The sweep
372 * continues until a cylinder group with greater than the average number
373 * of free blocks is found. If the allocation is for the first block in an
374 * indirect block, the information on the previous allocation is unavailable;
375 * here a best guess is made based upon the logical block number being
376 * allocated.
377 *
378 * If a section is already partially allocated, the policy is to
379 * contiguously allocate fs_maxcontig blocks. The end of one of these
380 * contiguous blocks and the beginning of the next is physically separated
381 * so that the disk head will be in transit between them for at least
382 * fs_rotdelay milliseconds. This is to allow time for the processor to
383 * schedule another I/O transfer.
384 */
385daddr_t
386ffs_blkpref(ip, lbn, indx, bap)
387 struct inode *ip;
388 daddr_t lbn;
389 int indx;
390 daddr_t *bap;
391{
392 register struct fs *fs;
393 register int cg;
394 int avgbfree, startcg;
395 daddr_t nextblk;
396
397 fs = ip->i_fs;
398 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
399 if (lbn < NDADDR) {
400 cg = itog(fs, ip->i_number);
401 return (fs->fs_fpg * cg + fs->fs_frag);
402 }
403 /*
404 * Find a cylinder with greater than average number of
405 * unused data blocks.
406 */
407 if (indx == 0 || bap[indx - 1] == 0)
408 startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
409 else
410 startcg = dtog(fs, bap[indx - 1]) + 1;
411 startcg %= fs->fs_ncg;
412 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
413 for (cg = startcg; cg < fs->fs_ncg; cg++)
414 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
415 fs->fs_cgrotor = cg;
416 return (fs->fs_fpg * cg + fs->fs_frag);
417 }
418 for (cg = 0; cg <= startcg; cg++)
419 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
420 fs->fs_cgrotor = cg;
421 return (fs->fs_fpg * cg + fs->fs_frag);
422 }
423 return (NULL);
424 }
425 /*
426 * One or more previous blocks have been laid out. If less
427 * than fs_maxcontig previous blocks are contiguous, the
428 * next block is requested contiguously, otherwise it is
429 * requested rotationally delayed by fs_rotdelay milliseconds.
430 */
431 nextblk = bap[indx - 1] + fs->fs_frag;
432 if (indx > fs->fs_maxcontig &&
433 bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
434 != nextblk)
435 return (nextblk);
436 if (fs->fs_rotdelay != 0)
437 /*
438 * Here we convert ms of delay to frags as:
439 * (frags) = (ms) * (rev/sec) * (sect/rev) /
440 * ((sect/frag) * (ms/sec))
441 * then round up to the next block.
442 */
443 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
444 (NSPF(fs) * 1000), fs->fs_frag);
445 return (nextblk);
446}
447
448/*
449 * Implement the cylinder overflow algorithm.
450 *
451 * The policy implemented by this algorithm is:
452 * 1) allocate the block in its requested cylinder group.
453 * 2) quadradically rehash on the cylinder group number.
454 * 3) brute force search for a free block.
455 */
456/*VARARGS5*/
457static u_long
458ffs_hashalloc(ip, cg, pref, size, allocator)
459 struct inode *ip;
460 int cg;
461 long pref;
462 int size; /* size for data blocks, mode for inodes */
463 u_long (*allocator)();
464{
465 register struct fs *fs;
466 long result;
467 int i, icg = cg;
468
469 fs = ip->i_fs;
470 /*
471 * 1: preferred cylinder group
472 */
473 result = (*allocator)(ip, cg, pref, size);
474 if (result)
475 return (result);
476 /*
477 * 2: quadratic rehash
478 */
479 for (i = 1; i < fs->fs_ncg; i *= 2) {
480 cg += i;
481 if (cg >= fs->fs_ncg)
482 cg -= fs->fs_ncg;
483 result = (*allocator)(ip, cg, 0, size);
484 if (result)
485 return (result);
486 }
487 /*
488 * 3: brute force search
489 * Note that we start at i == 2, since 0 was checked initially,
490 * and 1 is always checked in the quadratic rehash.
491 */
492 cg = (icg + 2) % fs->fs_ncg;
493 for (i = 2; i < fs->fs_ncg; i++) {
494 result = (*allocator)(ip, cg, 0, size);
495 if (result)
496 return (result);
497 cg++;
498 if (cg == fs->fs_ncg)
499 cg = 0;
500 }
501 return (NULL);
502}
503
504/*
505 * Determine whether a fragment can be extended.
506 *
507 * Check to see if the necessary fragments are available, and
508 * if they are, allocate them.
509 */
510static daddr_t
511ffs_fragextend(ip, cg, bprev, osize, nsize)
512 struct inode *ip;
513 int cg;
514 long bprev;
515 int osize, nsize;
516{
517 register struct fs *fs;
518 register struct cg *cgp;
519 struct buf *bp;
520 long bno;
521 int frags, bbase;
522 int i, error;
523
524 fs = ip->i_fs;
525 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
526 return (NULL);
527 frags = numfrags(fs, nsize);
528 bbase = fragnum(fs, bprev);
529 if (bbase > fragnum(fs, (bprev + frags - 1))) {
530 /* cannot extend across a block boundary */
531 return (NULL);
532 }
533#ifdef SECSIZE
534 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
535 fs->fs_dbsize);
536#else SECSIZE
537 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
538 (int)fs->fs_cgsize, NOCRED, &bp);
539 if (error) {
540 brelse(bp);
541 return (NULL);
542 }
543#endif SECSIZE
544 cgp = bp->b_un.b_cg;
545 if (!cg_chkmagic(cgp)) {
546 brelse(bp);
547 return (NULL);
548 }
549 cgp->cg_time = time.tv_sec;
550 bno = dtogd(fs, bprev);
551 for (i = numfrags(fs, osize); i < frags; i++)
552 if (isclr(cg_blksfree(cgp), bno + i)) {
553 brelse(bp);
554 return (NULL);
555 }
556 /*
557 * the current fragment can be extended
558 * deduct the count on fragment being extended into
559 * increase the count on the remaining fragment (if any)
560 * allocate the extended piece
561 */
562 for (i = frags; i < fs->fs_frag - bbase; i++)
563 if (isclr(cg_blksfree(cgp), bno + i))
564 break;
565 cgp->cg_frsum[i - numfrags(fs, osize)]--;
566 if (i != frags)
567 cgp->cg_frsum[i - frags]++;
568 for (i = numfrags(fs, osize); i < frags; i++) {
569 clrbit(cg_blksfree(cgp), bno + i);
570 cgp->cg_cs.cs_nffree--;
571 fs->fs_cstotal.cs_nffree--;
572 fs->fs_cs(fs, cg).cs_nffree--;
573 }
574 fs->fs_fmod = 1;
575 bdwrite(bp);
576 return (bprev);
577}
578
579/*
580 * Determine whether a block can be allocated.
581 *
582 * Check to see if a block of the apprpriate size is available,
583 * and if it is, allocate it.
584 */
585daddr_t
586ffs_alloccg(ip, cg, bpref, size)
587 struct inode *ip;
588 int cg;
589 daddr_t bpref;
590 int size;
591{
592 register struct fs *fs;
593 register struct cg *cgp;
594 struct buf *bp;
595 register int i;
596 int error, bno, frags, allocsiz;
597
598 fs = ip->i_fs;
599 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
600 return (NULL);
601#ifdef SECSIZE
602 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
603 fs->fs_dbsize);
604#else SECSIZE
605 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
606 (int)fs->fs_cgsize, NOCRED, &bp);
607 if (error) {
608 brelse(bp);
609 return (NULL);
610 }
611#endif SECSIZE
612 cgp = bp->b_un.b_cg;
613 if (!cg_chkmagic(cgp) ||
614 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
615 brelse(bp);
616 return (NULL);
617 }
618 cgp->cg_time = time.tv_sec;
619 if (size == fs->fs_bsize) {
620 bno = ffs_alloccgblk(fs, cgp, bpref);
621 bdwrite(bp);
622 return (bno);
623 }
624 /*
625 * check to see if any fragments are already available
626 * allocsiz is the size which will be allocated, hacking
627 * it down to a smaller size if necessary
628 */
629 frags = numfrags(fs, size);
630 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
631 if (cgp->cg_frsum[allocsiz] != 0)
632 break;
633 if (allocsiz == fs->fs_frag) {
634 /*
635 * no fragments were available, so a block will be
636 * allocated, and hacked up
637 */
638 if (cgp->cg_cs.cs_nbfree == 0) {
639 brelse(bp);
640 return (NULL);
641 }
642 bno = ffs_alloccgblk(fs, cgp, bpref);
643 bpref = dtogd(fs, bno);
644 for (i = frags; i < fs->fs_frag; i++)
645 setbit(cg_blksfree(cgp), bpref + i);
646 i = fs->fs_frag - frags;
647 cgp->cg_cs.cs_nffree += i;
648 fs->fs_cstotal.cs_nffree += i;
649 fs->fs_cs(fs, cg).cs_nffree += i;
650 fs->fs_fmod = 1;
651 cgp->cg_frsum[i]++;
652 bdwrite(bp);
653 return (bno);
654 }
655 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
656 if (bno < 0) {
657 brelse(bp);
658 return (NULL);
659 }
660 for (i = 0; i < frags; i++)
661 clrbit(cg_blksfree(cgp), bno + i);
662 cgp->cg_cs.cs_nffree -= frags;
663 fs->fs_cstotal.cs_nffree -= frags;
664 fs->fs_cs(fs, cg).cs_nffree -= frags;
665 fs->fs_fmod = 1;
666 cgp->cg_frsum[allocsiz]--;
667 if (frags != allocsiz)
668 cgp->cg_frsum[allocsiz - frags]++;
669 bdwrite(bp);
670 return (cg * fs->fs_fpg + bno);
671}
672
673/*
674 * Allocate a block in a cylinder group.
675 *
676 * This algorithm implements the following policy:
677 * 1) allocate the requested block.
678 * 2) allocate a rotationally optimal block in the same cylinder.
679 * 3) allocate the next available block on the block rotor for the
680 * specified cylinder group.
681 * Note that this routine only allocates fs_bsize blocks; these
682 * blocks may be fragmented by the routine that allocates them.
683 */
684static daddr_t
685ffs_alloccgblk(fs, cgp, bpref)
686 register struct fs *fs;
687 register struct cg *cgp;
688 daddr_t bpref;
689{
690 daddr_t bno;
691 int cylno, pos, delta;
692 short *cylbp;
693 register int i;
694
695 if (bpref == 0) {
696 bpref = cgp->cg_rotor;
697 goto norot;
698 }
699 bpref = blknum(fs, bpref);
700 bpref = dtogd(fs, bpref);
701 /*
702 * if the requested block is available, use it
703 */
704 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
705 bno = bpref;
706 goto gotit;
707 }
708 /*
709 * check for a block available on the same cylinder
710 */
711 cylno = cbtocylno(fs, bpref);
712 if (cg_blktot(cgp)[cylno] == 0)
713 goto norot;
714 if (fs->fs_cpc == 0) {
715 /*
716 * block layout info is not available, so just have
717 * to take any block in this cylinder.
718 */
719 bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
720 goto norot;
721 }
722 /*
723 * check the summary information to see if a block is
724 * available in the requested cylinder starting at the
725 * requested rotational position and proceeding around.
726 */
727 cylbp = cg_blks(fs, cgp, cylno);
728 pos = cbtorpos(fs, bpref);
729 for (i = pos; i < fs->fs_nrpos; i++)
730 if (cylbp[i] > 0)
731 break;
732 if (i == fs->fs_nrpos)
733 for (i = 0; i < pos; i++)
734 if (cylbp[i] > 0)
735 break;
736 if (cylbp[i] > 0) {
737 /*
738 * found a rotational position, now find the actual
739 * block. A panic if none is actually there.
740 */
741 pos = cylno % fs->fs_cpc;
742 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
743 if (fs_postbl(fs, pos)[i] == -1) {
744 printf("pos = %d, i = %d, fs = %s\n",
745 pos, i, fs->fs_fsmnt);
746 panic("ffs_alloccgblk: cyl groups corrupted");
747 }
748 for (i = fs_postbl(fs, pos)[i];; ) {
749 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
750 bno = blkstofrags(fs, (bno + i));
751 goto gotit;
752 }
753 delta = fs_rotbl(fs)[i];
754 if (delta <= 0 ||
755 delta + i > fragstoblks(fs, fs->fs_fpg))
756 break;
757 i += delta;
758 }
759 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
760 panic("ffs_alloccgblk: can't find blk in cyl");
761 }
762norot:
763 /*
764 * no blocks in the requested cylinder, so take next
765 * available one in this cylinder group.
766 */
767 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
768 if (bno < 0)
769 return (NULL);
770 cgp->cg_rotor = bno;
771gotit:
772 ffs_clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
773 cgp->cg_cs.cs_nbfree--;
774 fs->fs_cstotal.cs_nbfree--;
775 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
776 cylno = cbtocylno(fs, bno);
777 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
778 cg_blktot(cgp)[cylno]--;
779 fs->fs_fmod = 1;
780 return (cgp->cg_cgx * fs->fs_fpg + bno);
781}
782
783/*
784 * Determine whether an inode can be allocated.
785 *
786 * Check to see if an inode is available, and if it is,
787 * allocate it using the following policy:
788 * 1) allocate the requested inode.
789 * 2) allocate the next available inode after the requested
790 * inode in the specified cylinder group.
791 */
792static ino_t
793ffs_ialloccg(ip, cg, ipref, mode)
794 struct inode *ip;
795 int cg;
796 daddr_t ipref;
797 int mode;
798{
799 register struct fs *fs;
800 register struct cg *cgp;
801 struct buf *bp;
802 int error, start, len, loc, map, i;
803
804 fs = ip->i_fs;
805 if (fs->fs_cs(fs, cg).cs_nifree == 0)
806 return (NULL);
807#ifdef SECSIZE
808 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
809 fs->fs_dbsize);
810#else SECSIZE
811 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
812 (int)fs->fs_cgsize, NOCRED, &bp);
813 if (error) {
814 brelse(bp);
815 return (NULL);
816 }
817#endif SECSIZE
818 cgp = bp->b_un.b_cg;
819 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
820 brelse(bp);
821 return (NULL);
822 }
823 cgp->cg_time = time.tv_sec;
824 if (ipref) {
825 ipref %= fs->fs_ipg;
826 if (isclr(cg_inosused(cgp), ipref))
827 goto gotit;
828 }
829 start = cgp->cg_irotor / NBBY;
830 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
831 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
832 if (loc == 0) {
833 len = start + 1;
834 start = 0;
835 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
836 if (loc == 0) {
837 printf("cg = %s, irotor = %d, fs = %s\n",
838 cg, cgp->cg_irotor, fs->fs_fsmnt);
839 panic("ffs_ialloccg: map corrupted");
840 /* NOTREACHED */
841 }
842 }
843 i = start + len - loc;
844 map = cg_inosused(cgp)[i];
845 ipref = i * NBBY;
846 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
847 if ((map & i) == 0) {
848 cgp->cg_irotor = ipref;
849 goto gotit;
850 }
851 }
852 printf("fs = %s\n", fs->fs_fsmnt);
853 panic("ffs_ialloccg: block not in map");
854 /* NOTREACHED */
855gotit:
856 setbit(cg_inosused(cgp), ipref);
857 cgp->cg_cs.cs_nifree--;
858 fs->fs_cstotal.cs_nifree--;
859 fs->fs_cs(fs, cg).cs_nifree--;
860 fs->fs_fmod = 1;
861 if ((mode & IFMT) == IFDIR) {
862 cgp->cg_cs.cs_ndir++;
863 fs->fs_cstotal.cs_ndir++;
864 fs->fs_cs(fs, cg).cs_ndir++;
865 }
866 bdwrite(bp);
867 return (cg * fs->fs_ipg + ipref);
868}
869
870/*
871 * Free a block or fragment.
872 *
873 * The specified block or fragment is placed back in the
874 * free map. If a fragment is deallocated, a possible
875 * block reassembly is checked.
876 */
877ffs_blkfree(ip, bno, size)
878 register struct inode *ip;
879 daddr_t bno;
880 off_t size;
881{
882 register struct fs *fs;
883 register struct cg *cgp;
884 struct buf *bp;
885 int error, cg, blk, frags, bbase;
886 register int i;
887 struct ucred *cred = curproc->p_ucred; /* XXX */
888
889 fs = ip->i_fs;
890 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
891 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
892 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
893 panic("blkfree: bad size");
894 }
895 cg = dtog(fs, bno);
896 if ((unsigned)bno >= fs->fs_size) {
897 printf("bad block %d, ino %d\n", bno, ip->i_number);
898 ffs_fserr(fs, cred->cr_uid, "bad block");
899 return;
900 }
901#ifdef SECSIZE
902 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
903 fs->fs_dbsize);
904#else SECSIZE
905 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
906 (int)fs->fs_cgsize, NOCRED, &bp);
907 if (error) {
908 brelse(bp);
909 return;
910 }
911#endif SECSIZE
912 cgp = bp->b_un.b_cg;
913 if (!cg_chkmagic(cgp)) {
914 brelse(bp);
915 return;
916 }
917 cgp->cg_time = time.tv_sec;
918 bno = dtogd(fs, bno);
919 if (size == fs->fs_bsize) {
920 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
921 printf("dev = 0x%x, block = %d, fs = %s\n",
922 ip->i_dev, bno, fs->fs_fsmnt);
923 panic("blkfree: freeing free block");
924 }
925 ffs_setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
926 cgp->cg_cs.cs_nbfree++;
927 fs->fs_cstotal.cs_nbfree++;
928 fs->fs_cs(fs, cg).cs_nbfree++;
929 i = cbtocylno(fs, bno);
930 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
931 cg_blktot(cgp)[i]++;
932 } else {
933 bbase = bno - fragnum(fs, bno);
934 /*
935 * decrement the counts associated with the old frags
936 */
937 blk = blkmap(fs, cg_blksfree(cgp), bbase);
938 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
939 /*
940 * deallocate the fragment
941 */
942 frags = numfrags(fs, size);
943 for (i = 0; i < frags; i++) {
944 if (isset(cg_blksfree(cgp), bno + i)) {
945 printf("dev = 0x%x, block = %d, fs = %s\n",
946 ip->i_dev, bno + i, fs->fs_fsmnt);
947 panic("blkfree: freeing free frag");
948 }
949 setbit(cg_blksfree(cgp), bno + i);
950 }
951 cgp->cg_cs.cs_nffree += i;
952 fs->fs_cstotal.cs_nffree += i;
953 fs->fs_cs(fs, cg).cs_nffree += i;
954 /*
955 * add back in counts associated with the new frags
956 */
957 blk = blkmap(fs, cg_blksfree(cgp), bbase);
958 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
959 /*
960 * if a complete block has been reassembled, account for it
961 */
962 if (ffs_isblock(fs, cg_blksfree(cgp),
963 (daddr_t)fragstoblks(fs, bbase))) {
964 cgp->cg_cs.cs_nffree -= fs->fs_frag;
965 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
966 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
967 cgp->cg_cs.cs_nbfree++;
968 fs->fs_cstotal.cs_nbfree++;
969 fs->fs_cs(fs, cg).cs_nbfree++;
970 i = cbtocylno(fs, bbase);
971 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
972 cg_blktot(cgp)[i]++;
973 }
974 }
975 fs->fs_fmod = 1;
976 bdwrite(bp);
977}
978
979/*
980 * Free an inode.
981 *
982 * The specified inode is placed back in the free map.
983 */
984void
985ffs_ifree(pip, ino, mode)
986 struct inode *pip;
987 ino_t ino;
988 int mode;
989{
990 register struct fs *fs;
991 register struct cg *cgp;
992 struct buf *bp;
993 int error, cg;
994
995 fs = pip->i_fs;
996 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
997 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
998 pip->i_dev, ino, fs->fs_fsmnt);
999 cg = itog(fs, ino);
1000#ifdef SECSIZE
1001 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1002 fs->fs_dbsize);
1003#else SECSIZE
1004 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1005 (int)fs->fs_cgsize, NOCRED, &bp);
1006 if (error) {
1007 brelse(bp);
1008 return;
1009 }
1010#endif SECSIZE
1011 cgp = bp->b_un.b_cg;
1012 if (!cg_chkmagic(cgp)) {
1013 brelse(bp);
1014 return;
1015 }
1016 cgp->cg_time = time.tv_sec;
1017 ino %= fs->fs_ipg;
1018 if (isclr(cg_inosused(cgp), ino)) {
1019 printf("dev = 0x%x, ino = %d, fs = %s\n",
1020 pip->i_dev, ino, fs->fs_fsmnt);
1021 if (fs->fs_ronly == 0)
1022 panic("ifree: freeing free inode");
1023 }
1024 clrbit(cg_inosused(cgp), ino);
1025 if (ino < cgp->cg_irotor)
1026 cgp->cg_irotor = ino;
1027 cgp->cg_cs.cs_nifree++;
1028 fs->fs_cstotal.cs_nifree++;
1029 fs->fs_cs(fs, cg).cs_nifree++;
1030 if ((mode & IFMT) == IFDIR) {
1031 cgp->cg_cs.cs_ndir--;
1032 fs->fs_cstotal.cs_ndir--;
1033 fs->fs_cs(fs, cg).cs_ndir--;
1034 }
1035 fs->fs_fmod = 1;
1036 bdwrite(bp);
1037}
1038
1039/*
1040 * Find a block of the specified size in the specified cylinder group.
1041 *
1042 * It is a panic if a request is made to find a block if none are
1043 * available.
1044 */
1045static daddr_t
1046ffs_mapsearch(fs, cgp, bpref, allocsiz)
1047 register struct fs *fs;
1048 register struct cg *cgp;
1049 daddr_t bpref;
1050 int allocsiz;
1051{
1052 daddr_t bno;
1053 int start, len, loc, i;
1054 int blk, field, subfield, pos;
1055
1056 /*
1057 * find the fragment by searching through the free block
1058 * map for an appropriate bit pattern
1059 */
1060 if (bpref)
1061 start = dtogd(fs, bpref) / NBBY;
1062 else
1063 start = cgp->cg_frotor / NBBY;
1064 len = howmany(fs->fs_fpg, NBBY) - start;
1065 loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1066 (u_char *)fragtbl[fs->fs_frag],
1067 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1068 if (loc == 0) {
1069 len = start + 1;
1070 start = 0;
1071 loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1072 (u_char *)fragtbl[fs->fs_frag],
1073 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1074 if (loc == 0) {
1075 printf("start = %d, len = %d, fs = %s\n",
1076 start, len, fs->fs_fsmnt);
1077 panic("ffs_alloccg: map corrupted");
1078 /* NOTREACHED */
1079 }
1080 }
1081 bno = (start + len - loc) * NBBY;
1082 cgp->cg_frotor = bno;
1083 /*
1084 * found the byte in the map
1085 * sift through the bits to find the selected frag
1086 */
1087 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1088 blk = blkmap(fs, cg_blksfree(cgp), bno);
1089 blk <<= 1;
1090 field = around[allocsiz];
1091 subfield = inside[allocsiz];
1092 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1093 if ((blk & field) == subfield)
1094 return (bno + pos);
1095 field <<= 1;
1096 subfield <<= 1;
1097 }
1098 }
1099 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1100 panic("ffs_alloccg: block not in map");
1101 return (-1);
1102}
1103
1104/*
1105 * Fserr prints the name of a file system with an error diagnostic.
1106 *
1107 * The form of the error message is:
1108 * fs: error message
1109 */
1110static void
1111ffs_fserr(fs, uid, cp)
1112 struct fs *fs;
1113 u_int uid;
1114 char *cp;
1115{
1116
1117 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1118}