macroize cylinder group array references to allow dynamic tables
[unix-history] / usr / src / sys / ufs / lfs / lfs_alloc.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1982, 1986 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
6 * @(#)lfs_alloc.c 7.5 (Berkeley) %G%
7 */
8
9#include "param.h"
10#include "systm.h"
11#include "mount.h"
12#include "fs.h"
13#include "buf.h"
14#include "inode.h"
15#include "dir.h"
16#include "user.h"
17#include "quota.h"
18#include "kernel.h"
19#include "syslog.h"
20#include "cmap.h"
21
22extern u_long hashalloc();
23extern ino_t ialloccg();
24extern daddr_t alloccg();
25extern daddr_t alloccgblk();
26extern daddr_t fragextend();
27extern daddr_t blkpref();
28extern daddr_t mapsearch();
29extern int inside[], around[];
30extern unsigned char *fragtbl[];
31
32/*
33 * Allocate a block in the file system.
34 *
35 * The size of the requested block is given, which must be some
36 * multiple of fs_fsize and <= fs_bsize.
37 * A preference may be optionally specified. If a preference is given
38 * the following hierarchy is used to allocate a block:
39 * 1) allocate the requested block.
40 * 2) allocate a rotationally optimal block in the same cylinder.
41 * 3) allocate a block in the same cylinder group.
42 * 4) quadradically rehash into other cylinder groups, until an
43 * available block is located.
44 * If no block preference is given the following heirarchy is used
45 * to allocate a block:
46 * 1) allocate a block in the cylinder group that contains the
47 * inode for the file.
48 * 2) quadradically rehash into other cylinder groups, until an
49 * available block is located.
50 */
51struct buf *
52alloc(ip, bpref, size)
53 register struct inode *ip;
54 daddr_t bpref;
55 int size;
56{
57 daddr_t bno;
58 register struct fs *fs;
59 register struct buf *bp;
60 int cg;
61
62 fs = ip->i_fs;
63 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
64 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
65 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
66 panic("alloc: bad size");
67 }
68 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
69 goto nospace;
70 if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
71 goto nospace;
72#ifdef QUOTA
73 u.u_error = chkdq(ip, (long)btodb(size), 0);
74 if (u.u_error)
75 return (NULL);
76#endif
77 if (bpref >= fs->fs_size)
78 bpref = 0;
79 if (bpref == 0)
80 cg = itog(fs, ip->i_number);
81 else
82 cg = dtog(fs, bpref);
83 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
84 (u_long (*)())alloccg);
85 if (bno <= 0)
86 goto nospace;
87 ip->i_blocks += btodb(size);
88 ip->i_flag |= IUPD|ICHG;
89#ifdef SECSIZE
90 bp = getblk(ip->i_dev, fsbtodb(fs, bno), size, fs->fs_dbsize);
91#else SECSIZE
92 bp = getblk(ip->i_dev, fsbtodb(fs, bno), size);
93#endif SECSIZE
94 clrbuf(bp);
95 return (bp);
96nospace:
97 fserr(fs, "file system full");
98 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
99 u.u_error = ENOSPC;
100 return (NULL);
101}
102
103/*
104 * Reallocate a fragment to a bigger size
105 *
106 * The number and size of the old block is given, and a preference
107 * and new size is also specified. The allocator attempts to extend
108 * the original block. Failing that, the regular block allocator is
109 * invoked to get an appropriate block.
110 */
111struct buf *
112realloccg(ip, bprev, bpref, osize, nsize)
113 register struct inode *ip;
114 daddr_t bprev, bpref;
115 int osize, nsize;
116{
117 register struct fs *fs;
118 register struct buf *bp, *obp;
119 int cg, request;
120 daddr_t bno, bn;
121 int i, count;
122
123 fs = ip->i_fs;
124 if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
125 (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
126 printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
127 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
128 panic("realloccg: bad size");
129 }
130 if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
131 goto nospace;
132 if (bprev == 0) {
133 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
134 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
135 panic("realloccg: bad bprev");
136 }
137#ifdef QUOTA
138 u.u_error = chkdq(ip, (long)btodb(nsize - osize), 0);
139 if (u.u_error)
140 return (NULL);
141#endif
142 cg = dtog(fs, bprev);
143 bno = fragextend(ip, cg, (long)bprev, osize, nsize);
144 if (bno != 0) {
145 do {
146#ifdef SECSIZE
147 bp = bread(ip->i_dev, fsbtodb(fs, bno), osize,
148 fs->fs_dbsize);
149#else SECSIZE
150 bp = bread(ip->i_dev, fsbtodb(fs, bno), osize);
151#endif SECSIZE
152 if (bp->b_flags & B_ERROR) {
153 brelse(bp);
154 return (NULL);
155 }
156 } while (brealloc(bp, nsize) == 0);
157 bp->b_flags |= B_DONE;
158 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
159 ip->i_blocks += btodb(nsize - osize);
160 ip->i_flag |= IUPD|ICHG;
161 return (bp);
162 }
163 if (bpref >= fs->fs_size)
164 bpref = 0;
165 switch ((int)fs->fs_optim) {
166 case FS_OPTSPACE:
167 /*
168 * Allocate an exact sized fragment. Although this makes
169 * best use of space, we will waste time relocating it if
170 * the file continues to grow. If the fragmentation is
171 * less than half of the minimum free reserve, we choose
172 * to begin optimizing for time.
173 */
174 request = nsize;
175 if (fs->fs_minfree < 5 ||
176 fs->fs_cstotal.cs_nffree >
177 fs->fs_dsize * fs->fs_minfree / (2 * 100))
178 break;
179 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
180 fs->fs_fsmnt);
181 fs->fs_optim = FS_OPTTIME;
182 break;
183 case FS_OPTTIME:
184 /*
185 * At this point we have discovered a file that is trying
186 * to grow a small fragment to a larger fragment. To save
187 * time, we allocate a full sized block, then free the
188 * unused portion. If the file continues to grow, the
189 * `fragextend' call above will be able to grow it in place
190 * without further copying. If aberrant programs cause
191 * disk fragmentation to grow within 2% of the free reserve,
192 * we choose to begin optimizing for space.
193 */
194 request = fs->fs_bsize;
195 if (fs->fs_cstotal.cs_nffree <
196 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
197 break;
198 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
199 fs->fs_fsmnt);
200 fs->fs_optim = FS_OPTSPACE;
201 break;
202 default:
203 printf("dev = 0x%x, optim = %d, fs = %s\n",
204 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
205 panic("realloccg: bad optim");
206 /* NOTREACHED */
207 }
208 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
209 (u_long (*)())alloccg);
210 if (bno > 0) {
211#ifdef SECSIZE
212 obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize,
213 fs->fs_dbsize);
214#else SECSIZE
215 obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize);
216#endif SECSIZE
217 if (obp->b_flags & B_ERROR) {
218 brelse(obp);
219 return (NULL);
220 }
221 bn = fsbtodb(fs, bno);
222#ifdef SECSIZE
223 bp = getblk(ip->i_dev, bn, nsize, fs->fs_dbsize);
224#else SECSIZE
225 bp = getblk(ip->i_dev, bn, nsize);
226#endif SECSIZE
227 bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize);
228 count = howmany(osize, CLBYTES);
229 for (i = 0; i < count; i++)
230#ifdef SECSIZE
231 munhash(ip->i_dev, bn + i * CLBYTES / fs->fs_dbsize);
232#else SECSIZE
233 munhash(ip->i_dev, bn + i * CLBYTES / DEV_BSIZE);
234#endif SECSIZE
235 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
236 if (obp->b_flags & B_DELWRI) {
237 obp->b_flags &= ~B_DELWRI;
238 u.u_ru.ru_oublock--; /* delete charge */
239 }
240 brelse(obp);
241 blkfree(ip, bprev, (off_t)osize);
242 if (nsize < request)
243 blkfree(ip, bno + numfrags(fs, nsize),
244 (off_t)(request - nsize));
245 ip->i_blocks += btodb(nsize - osize);
246 ip->i_flag |= IUPD|ICHG;
247 return (bp);
248 }
249nospace:
250 /*
251 * no space available
252 */
253 fserr(fs, "file system full");
254 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
255 u.u_error = ENOSPC;
256 return (NULL);
257}
258
259/*
260 * Allocate an inode in the file system.
261 *
262 * A preference may be optionally specified. If a preference is given
263 * the following hierarchy is used to allocate an inode:
264 * 1) allocate the requested inode.
265 * 2) allocate an inode in the same cylinder group.
266 * 3) quadradically rehash into other cylinder groups, until an
267 * available inode is located.
268 * If no inode preference is given the following heirarchy is used
269 * to allocate an inode:
270 * 1) allocate an inode in cylinder group 0.
271 * 2) quadradically rehash into other cylinder groups, until an
272 * available inode is located.
273 */
274struct inode *
275ialloc(pip, ipref, mode)
276 register struct inode *pip;
277 ino_t ipref;
278 int mode;
279{
280 ino_t ino;
281 register struct fs *fs;
282 register struct inode *ip;
283 int cg;
284
285 fs = pip->i_fs;
286 if (fs->fs_cstotal.cs_nifree == 0)
287 goto noinodes;
288#ifdef QUOTA
289 u.u_error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0);
290 if (u.u_error)
291 return (NULL);
292#endif
293 if (ipref >= fs->fs_ncg * fs->fs_ipg)
294 ipref = 0;
295 cg = itog(fs, ipref);
296 ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
297 if (ino == 0)
298 goto noinodes;
299 ip = iget(pip->i_dev, pip->i_fs, ino);
300 if (ip == NULL) {
301 ifree(pip, ino, 0);
302 return (NULL);
303 }
304 if (ip->i_mode) {
305 printf("mode = 0%o, inum = %d, fs = %s\n",
306 ip->i_mode, ip->i_number, fs->fs_fsmnt);
307 panic("ialloc: dup alloc");
308 }
309 if (ip->i_blocks) { /* XXX */
310 printf("free inode %s/%d had %d blocks\n",
311 fs->fs_fsmnt, ino, ip->i_blocks);
312 ip->i_blocks = 0;
313 }
314 return (ip);
315noinodes:
316 fserr(fs, "out of inodes");
317 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
318 u.u_error = ENOSPC;
319 return (NULL);
320}
321
322/*
323 * Find a cylinder to place a directory.
324 *
325 * The policy implemented by this algorithm is to select from
326 * among those cylinder groups with above the average number of
327 * free inodes, the one with the smallest number of directories.
328 */
329ino_t
330dirpref(fs)
331 register struct fs *fs;
332{
333 int cg, minndir, mincg, avgifree;
334
335 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
336 minndir = fs->fs_ipg;
337 mincg = 0;
338 for (cg = 0; cg < fs->fs_ncg; cg++)
339 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
340 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
341 mincg = cg;
342 minndir = fs->fs_cs(fs, cg).cs_ndir;
343 }
344 return ((ino_t)(fs->fs_ipg * mincg));
345}
346
347/*
348 * Select the desired position for the next block in a file. The file is
349 * logically divided into sections. The first section is composed of the
350 * direct blocks. Each additional section contains fs_maxbpg blocks.
351 *
352 * If no blocks have been allocated in the first section, the policy is to
353 * request a block in the same cylinder group as the inode that describes
354 * the file. If no blocks have been allocated in any other section, the
355 * policy is to place the section in a cylinder group with a greater than
356 * average number of free blocks. An appropriate cylinder group is found
357 * by using a rotor that sweeps the cylinder groups. When a new group of
358 * blocks is needed, the sweep begins in the cylinder group following the
359 * cylinder group from which the previous allocation was made. The sweep
360 * continues until a cylinder group with greater than the average number
361 * of free blocks is found. If the allocation is for the first block in an
362 * indirect block, the information on the previous allocation is unavailable;
363 * here a best guess is made based upon the logical block number being
364 * allocated.
365 *
366 * If a section is already partially allocated, the policy is to
367 * contiguously allocate fs_maxcontig blocks. The end of one of these
368 * contiguous blocks and the beginning of the next is physically separated
369 * so that the disk head will be in transit between them for at least
370 * fs_rotdelay milliseconds. This is to allow time for the processor to
371 * schedule another I/O transfer.
372 */
373daddr_t
374blkpref(ip, lbn, indx, bap)
375 struct inode *ip;
376 daddr_t lbn;
377 int indx;
378 daddr_t *bap;
379{
380 register struct fs *fs;
381 register int cg;
382 int avgbfree, startcg;
383 daddr_t nextblk;
384
385 fs = ip->i_fs;
386 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
387 if (lbn < NDADDR) {
388 cg = itog(fs, ip->i_number);
389 return (fs->fs_fpg * cg + fs->fs_frag);
390 }
391 /*
392 * Find a cylinder with greater than average number of
393 * unused data blocks.
394 */
395 if (indx == 0 || bap[indx - 1] == 0)
396 startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
397 else
398 startcg = dtog(fs, bap[indx - 1]) + 1;
399 startcg %= fs->fs_ncg;
400 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
401 for (cg = startcg; cg < fs->fs_ncg; cg++)
402 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
403 fs->fs_cgrotor = cg;
404 return (fs->fs_fpg * cg + fs->fs_frag);
405 }
406 for (cg = 0; cg <= startcg; cg++)
407 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
408 fs->fs_cgrotor = cg;
409 return (fs->fs_fpg * cg + fs->fs_frag);
410 }
411 return (NULL);
412 }
413 /*
414 * One or more previous blocks have been laid out. If less
415 * than fs_maxcontig previous blocks are contiguous, the
416 * next block is requested contiguously, otherwise it is
417 * requested rotationally delayed by fs_rotdelay milliseconds.
418 */
419 nextblk = bap[indx - 1] + fs->fs_frag;
420 if (indx > fs->fs_maxcontig &&
421 bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
422 != nextblk)
423 return (nextblk);
424 if (fs->fs_rotdelay != 0)
425 /*
426 * Here we convert ms of delay to frags as:
427 * (frags) = (ms) * (rev/sec) * (sect/rev) /
428 * ((sect/frag) * (ms/sec))
429 * then round up to the next block.
430 */
431 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
432 (NSPF(fs) * 1000), fs->fs_frag);
433 return (nextblk);
434}
435
436/*
437 * Implement the cylinder overflow algorithm.
438 *
439 * The policy implemented by this algorithm is:
440 * 1) allocate the block in its requested cylinder group.
441 * 2) quadradically rehash on the cylinder group number.
442 * 3) brute force search for a free block.
443 */
444/*VARARGS5*/
445u_long
446hashalloc(ip, cg, pref, size, allocator)
447 struct inode *ip;
448 int cg;
449 long pref;
450 int size; /* size for data blocks, mode for inodes */
451 u_long (*allocator)();
452{
453 register struct fs *fs;
454 long result;
455 int i, icg = cg;
456
457 fs = ip->i_fs;
458 /*
459 * 1: preferred cylinder group
460 */
461 result = (*allocator)(ip, cg, pref, size);
462 if (result)
463 return (result);
464 /*
465 * 2: quadratic rehash
466 */
467 for (i = 1; i < fs->fs_ncg; i *= 2) {
468 cg += i;
469 if (cg >= fs->fs_ncg)
470 cg -= fs->fs_ncg;
471 result = (*allocator)(ip, cg, 0, size);
472 if (result)
473 return (result);
474 }
475 /*
476 * 3: brute force search
477 * Note that we start at i == 2, since 0 was checked initially,
478 * and 1 is always checked in the quadratic rehash.
479 */
480 cg = (icg + 2) % fs->fs_ncg;
481 for (i = 2; i < fs->fs_ncg; i++) {
482 result = (*allocator)(ip, cg, 0, size);
483 if (result)
484 return (result);
485 cg++;
486 if (cg == fs->fs_ncg)
487 cg = 0;
488 }
489 return (NULL);
490}
491
492/*
493 * Determine whether a fragment can be extended.
494 *
495 * Check to see if the necessary fragments are available, and
496 * if they are, allocate them.
497 */
498daddr_t
499fragextend(ip, cg, bprev, osize, nsize)
500 struct inode *ip;
501 int cg;
502 long bprev;
503 int osize, nsize;
504{
505 register struct fs *fs;
506 register struct buf *bp;
507 register struct cg *cgp;
508 long bno;
509 int frags, bbase;
510 int i;
511
512 fs = ip->i_fs;
513 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
514 return (NULL);
515 frags = numfrags(fs, nsize);
516 bbase = fragnum(fs, bprev);
517 if (bbase > fragnum(fs, (bprev + frags - 1))) {
518 /* cannot extend across a block boundary */
519 return (NULL);
520 }
521#ifdef SECSIZE
522 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
523 fs->fs_dbsize);
524#else SECSIZE
525 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
526#endif SECSIZE
527 cgp = bp->b_un.b_cg;
528 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
529 brelse(bp);
530 return (NULL);
531 }
532 cgp->cg_time = time.tv_sec;
533 bno = dtogd(fs, bprev);
534 for (i = numfrags(fs, osize); i < frags; i++)
535 if (isclr(cg_blksfree(cgp), bno + i)) {
536 brelse(bp);
537 return (NULL);
538 }
539 /*
540 * the current fragment can be extended
541 * deduct the count on fragment being extended into
542 * increase the count on the remaining fragment (if any)
543 * allocate the extended piece
544 */
545 for (i = frags; i < fs->fs_frag - bbase; i++)
546 if (isclr(cg_blksfree(cgp), bno + i))
547 break;
548 cgp->cg_frsum[i - numfrags(fs, osize)]--;
549 if (i != frags)
550 cgp->cg_frsum[i - frags]++;
551 for (i = numfrags(fs, osize); i < frags; i++) {
552 clrbit(cg_blksfree(cgp), bno + i);
553 cgp->cg_cs.cs_nffree--;
554 fs->fs_cstotal.cs_nffree--;
555 fs->fs_cs(fs, cg).cs_nffree--;
556 }
557 fs->fs_fmod++;
558 bdwrite(bp);
559 return (bprev);
560}
561
562/*
563 * Determine whether a block can be allocated.
564 *
565 * Check to see if a block of the apprpriate size is available,
566 * and if it is, allocate it.
567 */
568daddr_t
569alloccg(ip, cg, bpref, size)
570 struct inode *ip;
571 int cg;
572 daddr_t bpref;
573 int size;
574{
575 register struct fs *fs;
576 register struct buf *bp;
577 register struct cg *cgp;
578 int bno, frags;
579 int allocsiz;
580 register int i;
581
582 fs = ip->i_fs;
583 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
584 return (NULL);
585#ifdef SECSIZE
586 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
587 fs->fs_dbsize);
588#else SECSIZE
589 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
590#endif SECSIZE
591 cgp = bp->b_un.b_cg;
592 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
593 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
594 brelse(bp);
595 return (NULL);
596 }
597 cgp->cg_time = time.tv_sec;
598 if (size == fs->fs_bsize) {
599 bno = alloccgblk(fs, cgp, bpref);
600 bdwrite(bp);
601 return (bno);
602 }
603 /*
604 * check to see if any fragments are already available
605 * allocsiz is the size which will be allocated, hacking
606 * it down to a smaller size if necessary
607 */
608 frags = numfrags(fs, size);
609 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
610 if (cgp->cg_frsum[allocsiz] != 0)
611 break;
612 if (allocsiz == fs->fs_frag) {
613 /*
614 * no fragments were available, so a block will be
615 * allocated, and hacked up
616 */
617 if (cgp->cg_cs.cs_nbfree == 0) {
618 brelse(bp);
619 return (NULL);
620 }
621 bno = alloccgblk(fs, cgp, bpref);
622 bpref = dtogd(fs, bno);
623 for (i = frags; i < fs->fs_frag; i++)
624 setbit(cg_blksfree(cgp), bpref + i);
625 i = fs->fs_frag - frags;
626 cgp->cg_cs.cs_nffree += i;
627 fs->fs_cstotal.cs_nffree += i;
628 fs->fs_cs(fs, cg).cs_nffree += i;
629 fs->fs_fmod++;
630 cgp->cg_frsum[i]++;
631 bdwrite(bp);
632 return (bno);
633 }
634 bno = mapsearch(fs, cgp, bpref, allocsiz);
635 if (bno < 0) {
636 brelse(bp);
637 return (NULL);
638 }
639 for (i = 0; i < frags; i++)
640 clrbit(cg_blksfree(cgp), bno + i);
641 cgp->cg_cs.cs_nffree -= frags;
642 fs->fs_cstotal.cs_nffree -= frags;
643 fs->fs_cs(fs, cg).cs_nffree -= frags;
644 fs->fs_fmod++;
645 cgp->cg_frsum[allocsiz]--;
646 if (frags != allocsiz)
647 cgp->cg_frsum[allocsiz - frags]++;
648 bdwrite(bp);
649 return (cg * fs->fs_fpg + bno);
650}
651
652/*
653 * Allocate a block in a cylinder group.
654 *
655 * This algorithm implements the following policy:
656 * 1) allocate the requested block.
657 * 2) allocate a rotationally optimal block in the same cylinder.
658 * 3) allocate the next available block on the block rotor for the
659 * specified cylinder group.
660 * Note that this routine only allocates fs_bsize blocks; these
661 * blocks may be fragmented by the routine that allocates them.
662 */
663daddr_t
664alloccgblk(fs, cgp, bpref)
665 register struct fs *fs;
666 register struct cg *cgp;
667 daddr_t bpref;
668{
669 daddr_t bno;
670 int cylno, pos, delta;
671 short *cylbp;
672 register int i;
673
674 if (bpref == 0) {
675 bpref = cgp->cg_rotor;
676 goto norot;
677 }
678 bpref = blknum(fs, bpref);
679 bpref = dtogd(fs, bpref);
680 /*
681 * if the requested block is available, use it
682 */
683 if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
684 bno = bpref;
685 goto gotit;
686 }
687 /*
688 * check for a block available on the same cylinder
689 */
690 cylno = cbtocylno(fs, bpref);
691 if (cg_blktot(cgp)[cylno] == 0)
692 goto norot;
693 if (fs->fs_cpc == 0) {
694 /*
695 * block layout info is not available, so just have
696 * to take any block in this cylinder.
697 */
698 bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
699 goto norot;
700 }
701 /*
702 * check the summary information to see if a block is
703 * available in the requested cylinder starting at the
704 * requested rotational position and proceeding around.
705 */
706 cylbp = cg_blks(fs, cgp, cylno);
707 pos = cbtorpos(fs, bpref);
708 for (i = pos; i < fs->fs_nrpos; i++)
709 if (cylbp[i] > 0)
710 break;
711 if (i == fs->fs_nrpos)
712 for (i = 0; i < pos; i++)
713 if (cylbp[i] > 0)
714 break;
715 if (cylbp[i] > 0) {
716 /*
717 * found a rotational position, now find the actual
718 * block. A panic if none is actually there.
719 */
720 pos = cylno % fs->fs_cpc;
721 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
722 if (fs_postbl(fs, pos)[i] == -1) {
723 printf("pos = %d, i = %d, fs = %s\n",
724 pos, i, fs->fs_fsmnt);
725 panic("alloccgblk: cyl groups corrupted");
726 }
727 for (i = fs_postbl(fs, pos)[i];; ) {
728 if (isblock(fs, cg_blksfree(cgp), bno + i)) {
729 bno = blkstofrags(fs, (bno + i));
730 goto gotit;
731 }
732 delta = fs_rotbl(fs)[i];
733 if (delta <= 0 ||
734 delta + i > fragstoblks(fs, fs->fs_fpg))
735 break;
736 i += delta;
737 }
738 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
739 panic("alloccgblk: can't find blk in cyl");
740 }
741norot:
742 /*
743 * no blocks in the requested cylinder, so take next
744 * available one in this cylinder group.
745 */
746 bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
747 if (bno < 0)
748 return (NULL);
749 cgp->cg_rotor = bno;
750gotit:
751 clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
752 cgp->cg_cs.cs_nbfree--;
753 fs->fs_cstotal.cs_nbfree--;
754 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
755 cylno = cbtocylno(fs, bno);
756 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
757 cg_blktot(cgp)[cylno]--;
758 fs->fs_fmod++;
759 return (cgp->cg_cgx * fs->fs_fpg + bno);
760}
761
762/*
763 * Determine whether an inode can be allocated.
764 *
765 * Check to see if an inode is available, and if it is,
766 * allocate it using the following policy:
767 * 1) allocate the requested inode.
768 * 2) allocate the next available inode after the requested
769 * inode in the specified cylinder group.
770 */
771ino_t
772ialloccg(ip, cg, ipref, mode)
773 struct inode *ip;
774 int cg;
775 daddr_t ipref;
776 int mode;
777{
778 register struct fs *fs;
779 register struct cg *cgp;
780 struct buf *bp;
781 int start, len, loc, map, i;
782
783 fs = ip->i_fs;
784 if (fs->fs_cs(fs, cg).cs_nifree == 0)
785 return (NULL);
786#ifdef SECSIZE
787 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
788 fs->fs_dbsize);
789#else SECSIZE
790 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
791#endif SECSIZE
792 cgp = bp->b_un.b_cg;
793 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
794 cgp->cg_cs.cs_nifree == 0) {
795 brelse(bp);
796 return (NULL);
797 }
798 cgp->cg_time = time.tv_sec;
799 if (ipref) {
800 ipref %= fs->fs_ipg;
801 if (isclr(cg_inosused(cgp), ipref))
802 goto gotit;
803 }
804 start = cgp->cg_irotor / NBBY;
805 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
806 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
807 if (loc == 0) {
808 len = start + 1;
809 start = 0;
810 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
811 if (loc == 0) {
812 printf("cg = %s, irotor = %d, fs = %s\n",
813 cg, cgp->cg_irotor, fs->fs_fsmnt);
814 panic("ialloccg: map corrupted");
815 /* NOTREACHED */
816 }
817 }
818 i = start + len - loc;
819 map = cg_inosused(cgp)[i];
820 ipref = i * NBBY;
821 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
822 if ((map & i) == 0) {
823 cgp->cg_irotor = ipref;
824 goto gotit;
825 }
826 }
827 printf("fs = %s\n", fs->fs_fsmnt);
828 panic("ialloccg: block not in map");
829 /* NOTREACHED */
830gotit:
831 setbit(cg_inosused(cgp), ipref);
832 cgp->cg_cs.cs_nifree--;
833 fs->fs_cstotal.cs_nifree--;
834 fs->fs_cs(fs, cg).cs_nifree--;
835 fs->fs_fmod++;
836 if ((mode & IFMT) == IFDIR) {
837 cgp->cg_cs.cs_ndir++;
838 fs->fs_cstotal.cs_ndir++;
839 fs->fs_cs(fs, cg).cs_ndir++;
840 }
841 bdwrite(bp);
842 return (cg * fs->fs_ipg + ipref);
843}
844
845/*
846 * Free a block or fragment.
847 *
848 * The specified block or fragment is placed back in the
849 * free map. If a fragment is deallocated, a possible
850 * block reassembly is checked.
851 */
852blkfree(ip, bno, size)
853 register struct inode *ip;
854 daddr_t bno;
855 off_t size;
856{
857 register struct fs *fs;
858 register struct cg *cgp;
859 register struct buf *bp;
860 int cg, blk, frags, bbase;
861 register int i;
862
863 fs = ip->i_fs;
864 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
865 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
866 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
867 panic("blkfree: bad size");
868 }
869 cg = dtog(fs, bno);
870 if (badblock(fs, bno)) {
871 printf("bad block %d, ino %d\n", bno, ip->i_number);
872 return;
873 }
874#ifdef SECSIZE
875 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
876 fs->fs_dbsize);
877#else SECSIZE
878 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
879#endif SECSIZE
880 cgp = bp->b_un.b_cg;
881 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
882 brelse(bp);
883 return;
884 }
885 cgp->cg_time = time.tv_sec;
886 bno = dtogd(fs, bno);
887 if (size == fs->fs_bsize) {
888 if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
889 printf("dev = 0x%x, block = %d, fs = %s\n",
890 ip->i_dev, bno, fs->fs_fsmnt);
891 panic("blkfree: freeing free block");
892 }
893 setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
894 cgp->cg_cs.cs_nbfree++;
895 fs->fs_cstotal.cs_nbfree++;
896 fs->fs_cs(fs, cg).cs_nbfree++;
897 i = cbtocylno(fs, bno);
898 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
899 cg_blktot(cgp)[i]++;
900 } else {
901 bbase = bno - fragnum(fs, bno);
902 /*
903 * decrement the counts associated with the old frags
904 */
905 blk = blkmap(fs, cg_blksfree(cgp), bbase);
906 fragacct(fs, blk, cgp->cg_frsum, -1);
907 /*
908 * deallocate the fragment
909 */
910 frags = numfrags(fs, size);
911 for (i = 0; i < frags; i++) {
912 if (isset(cg_blksfree(cgp), bno + i)) {
913 printf("dev = 0x%x, block = %d, fs = %s\n",
914 ip->i_dev, bno + i, fs->fs_fsmnt);
915 panic("blkfree: freeing free frag");
916 }
917 setbit(cg_blksfree(cgp), bno + i);
918 }
919 cgp->cg_cs.cs_nffree += i;
920 fs->fs_cstotal.cs_nffree += i;
921 fs->fs_cs(fs, cg).cs_nffree += i;
922 /*
923 * add back in counts associated with the new frags
924 */
925 blk = blkmap(fs, cg_blksfree(cgp), bbase);
926 fragacct(fs, blk, cgp->cg_frsum, 1);
927 /*
928 * if a complete block has been reassembled, account for it
929 */
930 if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bbase))) {
931 cgp->cg_cs.cs_nffree -= fs->fs_frag;
932 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
933 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
934 cgp->cg_cs.cs_nbfree++;
935 fs->fs_cstotal.cs_nbfree++;
936 fs->fs_cs(fs, cg).cs_nbfree++;
937 i = cbtocylno(fs, bbase);
938 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
939 cg_blktot(cgp)[i]++;
940 }
941 }
942 fs->fs_fmod++;
943 bdwrite(bp);
944}
945
946/*
947 * Free an inode.
948 *
949 * The specified inode is placed back in the free map.
950 */
951ifree(ip, ino, mode)
952 struct inode *ip;
953 ino_t ino;
954 int mode;
955{
956 register struct fs *fs;
957 register struct cg *cgp;
958 register struct buf *bp;
959 int cg;
960
961 fs = ip->i_fs;
962 if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
963 printf("dev = 0x%x, ino = %d, fs = %s\n",
964 ip->i_dev, ino, fs->fs_fsmnt);
965 panic("ifree: range");
966 }
967 cg = itog(fs, ino);
968#ifdef SECSIZE
969 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
970 fs->fs_dbsize);
971#else SECSIZE
972 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
973#endif SECSIZE
974 cgp = bp->b_un.b_cg;
975 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
976 brelse(bp);
977 return;
978 }
979 cgp->cg_time = time.tv_sec;
980 ino %= fs->fs_ipg;
981 if (isclr(cg_inosused(cgp), ino)) {
982 printf("dev = 0x%x, ino = %d, fs = %s\n",
983 ip->i_dev, ino, fs->fs_fsmnt);
984 panic("ifree: freeing free inode");
985 }
986 clrbit(cg_inosused(cgp), ino);
987 if (ino < cgp->cg_irotor)
988 cgp->cg_irotor = ino;
989 cgp->cg_cs.cs_nifree++;
990 fs->fs_cstotal.cs_nifree++;
991 fs->fs_cs(fs, cg).cs_nifree++;
992 if ((mode & IFMT) == IFDIR) {
993 cgp->cg_cs.cs_ndir--;
994 fs->fs_cstotal.cs_ndir--;
995 fs->fs_cs(fs, cg).cs_ndir--;
996 }
997 fs->fs_fmod++;
998 bdwrite(bp);
999}
1000
1001/*
1002 * Find a block of the specified size in the specified cylinder group.
1003 *
1004 * It is a panic if a request is made to find a block if none are
1005 * available.
1006 */
1007daddr_t
1008mapsearch(fs, cgp, bpref, allocsiz)
1009 register struct fs *fs;
1010 register struct cg *cgp;
1011 daddr_t bpref;
1012 int allocsiz;
1013{
1014 daddr_t bno;
1015 int start, len, loc, i;
1016 int blk, field, subfield, pos;
1017
1018 /*
1019 * find the fragment by searching through the free block
1020 * map for an appropriate bit pattern
1021 */
1022 if (bpref)
1023 start = dtogd(fs, bpref) / NBBY;
1024 else
1025 start = cgp->cg_frotor / NBBY;
1026 len = howmany(fs->fs_fpg, NBBY) - start;
1027 loc = scanc((unsigned)len, (caddr_t)&cg_blksfree(cgp)[start],
1028 (caddr_t)fragtbl[fs->fs_frag],
1029 (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1030 if (loc == 0) {
1031 len = start + 1;
1032 start = 0;
1033 loc = scanc((unsigned)len, (caddr_t)&cg_blksfree(cgp)[0],
1034 (caddr_t)fragtbl[fs->fs_frag],
1035 (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1036 if (loc == 0) {
1037 printf("start = %d, len = %d, fs = %s\n",
1038 start, len, fs->fs_fsmnt);
1039 panic("alloccg: map corrupted");
1040 /* NOTREACHED */
1041 }
1042 }
1043 bno = (start + len - loc) * NBBY;
1044 cgp->cg_frotor = bno;
1045 /*
1046 * found the byte in the map
1047 * sift through the bits to find the selected frag
1048 */
1049 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1050 blk = blkmap(fs, cg_blksfree(cgp), bno);
1051 blk <<= 1;
1052 field = around[allocsiz];
1053 subfield = inside[allocsiz];
1054 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1055 if ((blk & field) == subfield)
1056 return (bno + pos);
1057 field <<= 1;
1058 subfield <<= 1;
1059 }
1060 }
1061 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1062 panic("alloccg: block not in map");
1063 return (-1);
1064}
1065
1066/*
1067 * Fserr prints the name of a file system with an error diagnostic.
1068 *
1069 * The form of the error message is:
1070 * fs: error message
1071 */
1072fserr(fs, cp)
1073 struct fs *fs;
1074 char *cp;
1075{
1076
1077 log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp);
1078}