allow users to specify optimization preference
[unix-history] / usr / src / sys / ufs / lfs / lfs_alloc.c
CommitLineData
da7c5cc6
KM
1/*
2 * Copyright (c) 1982 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
f7fa0c54 6 * @(#)lfs_alloc.c 6.15 (Berkeley) %G%
da7c5cc6 7 */
e3fe2d69 8
94368568
JB
9#include "param.h"
10#include "systm.h"
11#include "mount.h"
12#include "fs.h"
94368568
JB
13#include "buf.h"
14#include "inode.h"
15#include "dir.h"
16#include "user.h"
17#include "quota.h"
18#include "kernel.h"
7580177b 19#include "syslog.h"
e3fe2d69 20
daaf7bee 21extern u_long hashalloc();
4f083fd7
SL
22extern ino_t ialloccg();
23extern daddr_t alloccg();
743f1ef7
KM
24extern daddr_t alloccgblk();
25extern daddr_t fragextend();
26extern daddr_t blkpref();
27extern daddr_t mapsearch();
1d7a08c5 28extern int inside[], around[];
b6407c9d 29extern unsigned char *fragtbl[];
e3fe2d69 30
502770a3
KM
31/*
32 * Allocate a block in the file system.
33 *
34 * The size of the requested block is given, which must be some
35 * multiple of fs_fsize and <= fs_bsize.
36 * A preference may be optionally specified. If a preference is given
37 * the following hierarchy is used to allocate a block:
38 * 1) allocate the requested block.
39 * 2) allocate a rotationally optimal block in the same cylinder.
40 * 3) allocate a block in the same cylinder group.
41 * 4) quadradically rehash into other cylinder groups, until an
42 * available block is located.
43 * If no block preference is given the following heirarchy is used
44 * to allocate a block:
45 * 1) allocate a block in the cylinder group that contains the
46 * inode for the file.
47 * 2) quadradically rehash into other cylinder groups, until an
48 * available block is located.
49 */
e3fe2d69 50struct buf *
f7287e4b 51alloc(ip, bpref, size)
f3c028b7 52 register struct inode *ip;
e3fe2d69
KM
53 daddr_t bpref;
54 int size;
55{
56 daddr_t bno;
57 register struct fs *fs;
f3c028b7 58 register struct buf *bp;
e3fe2d69
KM
59 int cg;
60
f7287e4b 61 fs = ip->i_fs;
ffd90e52
KM
62 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
63 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
64 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
b6407c9d 65 panic("alloc: bad size");
ffd90e52 66 }
b6407c9d 67 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
0947395d 68 goto nospace;
240a4664 69 if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
e3fe2d69 70 goto nospace;
b4567e9c 71#ifdef QUOTA
2523b389
SL
72 u.u_error = chkdq(ip, (long)btodb(size), 0);
73 if (u.u_error)
74 return (NULL);
ca90a6bf 75#endif
260e5e3c
KM
76 if (bpref >= fs->fs_size)
77 bpref = 0;
e3fe2d69 78 if (bpref == 0)
6994bf5d 79 cg = itog(fs, ip->i_number);
e3fe2d69 80 else
6994bf5d 81 cg = dtog(fs, bpref);
4f083fd7
SL
82 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
83 (u_long (*)())alloccg);
6459ebe0 84 if (bno <= 0)
e3fe2d69 85 goto nospace;
2523b389
SL
86 ip->i_blocks += btodb(size);
87 ip->i_flag |= IUPD|ICHG;
f7287e4b 88 bp = getblk(ip->i_dev, fsbtodb(fs, bno), size);
e3fe2d69
KM
89 clrbuf(bp);
90 return (bp);
91nospace:
92 fserr(fs, "file system full");
93 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
94 u.u_error = ENOSPC;
95 return (NULL);
96}
97
502770a3
KM
98/*
99 * Reallocate a fragment to a bigger size
100 *
101 * The number and size of the old block is given, and a preference
102 * and new size is also specified. The allocator attempts to extend
103 * the original block. Failing that, the regular block allocator is
104 * invoked to get an appropriate block.
105 */
07670f7d 106struct buf *
f7287e4b
KM
107realloccg(ip, bprev, bpref, osize, nsize)
108 register struct inode *ip;
743f1ef7 109 daddr_t bprev, bpref;
07670f7d
KM
110 int osize, nsize;
111{
112 daddr_t bno;
113 register struct fs *fs;
f3c028b7 114 register struct buf *bp, *obp;
f7fa0c54 115 int cg, request;
07670f7d 116
f7287e4b 117 fs = ip->i_fs;
d995d89d 118 if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
ffd90e52
KM
119 (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
120 printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
121 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
b6407c9d 122 panic("realloccg: bad size");
ffd90e52 123 }
240a4664 124 if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
0947395d 125 goto nospace;
ffd90e52
KM
126 if (bprev == 0) {
127 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
128 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
502770a3 129 panic("realloccg: bad bprev");
ffd90e52 130 }
b4567e9c 131#ifdef QUOTA
2523b389
SL
132 u.u_error = chkdq(ip, (long)btodb(nsize - osize), 0);
133 if (u.u_error)
134 return (NULL);
ca90a6bf 135#endif
ae851115 136 cg = dtog(fs, bprev);
f7287e4b 137 bno = fragextend(ip, cg, (long)bprev, osize, nsize);
f3c028b7 138 if (bno != 0) {
9d6d37ce
BJ
139 do {
140 bp = bread(ip->i_dev, fsbtodb(fs, bno), osize);
141 if (bp->b_flags & B_ERROR) {
142 brelse(bp);
143 return (NULL);
144 }
145 } while (brealloc(bp, nsize) == 0);
146 bp->b_flags |= B_DONE;
4f083fd7 147 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
2523b389
SL
148 ip->i_blocks += btodb(nsize - osize);
149 ip->i_flag |= IUPD|ICHG;
f3c028b7
KM
150 return (bp);
151 }
260e5e3c
KM
152 if (bpref >= fs->fs_size)
153 bpref = 0;
f7fa0c54
KM
154 if (fs->fs_optim == FS_OPTSPACE)
155 request = nsize;
156 else /* if (fs->fs_optim == FS_OPTTIME) */
157 request = fs->fs_bsize;
158 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
4f083fd7 159 (u_long (*)())alloccg);
6459ebe0 160 if (bno > 0) {
f7287e4b 161 obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize);
d995d89d
KM
162 if (obp->b_flags & B_ERROR) {
163 brelse(obp);
ae851115 164 return (NULL);
d995d89d 165 }
260ede8e
KM
166 bp = getblk(ip->i_dev, fsbtodb(fs, bno), nsize);
167 bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize);
168 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
169 if (obp->b_flags & B_DELWRI) {
170 obp->b_flags &= ~B_DELWRI;
171 u.u_ru.ru_oublock--; /* delete charge */
172 }
f3c028b7 173 brelse(obp);
4f083fd7 174 free(ip, bprev, (off_t)osize);
f7fa0c54 175 if (nsize < request)
e8727f9b 176 free(ip, bno + numfrags(fs, nsize),
f7fa0c54 177 (off_t)(request - nsize));
2523b389
SL
178 ip->i_blocks += btodb(nsize - osize);
179 ip->i_flag |= IUPD|ICHG;
ae851115 180 return (bp);
f3c028b7 181 }
0947395d 182nospace:
f3c028b7
KM
183 /*
184 * no space available
185 */
07670f7d
KM
186 fserr(fs, "file system full");
187 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
188 u.u_error = ENOSPC;
189 return (NULL);
190}
191
502770a3
KM
192/*
193 * Allocate an inode in the file system.
194 *
195 * A preference may be optionally specified. If a preference is given
196 * the following hierarchy is used to allocate an inode:
197 * 1) allocate the requested inode.
198 * 2) allocate an inode in the same cylinder group.
199 * 3) quadradically rehash into other cylinder groups, until an
200 * available inode is located.
201 * If no inode preference is given the following heirarchy is used
202 * to allocate an inode:
203 * 1) allocate an inode in cylinder group 0.
204 * 2) quadradically rehash into other cylinder groups, until an
205 * available inode is located.
206 */
e3fe2d69 207struct inode *
f7287e4b
KM
208ialloc(pip, ipref, mode)
209 register struct inode *pip;
e3fe2d69
KM
210 ino_t ipref;
211 int mode;
212{
daaf7bee 213 ino_t ino;
e3fe2d69
KM
214 register struct fs *fs;
215 register struct inode *ip;
216 int cg;
217
f7287e4b 218 fs = pip->i_fs;
0947395d 219 if (fs->fs_cstotal.cs_nifree == 0)
e3fe2d69 220 goto noinodes;
b4567e9c 221#ifdef QUOTA
2523b389
SL
222 u.u_error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0);
223 if (u.u_error)
224 return (NULL);
ca90a6bf 225#endif
260e5e3c
KM
226 if (ipref >= fs->fs_ncg * fs->fs_ipg)
227 ipref = 0;
6994bf5d 228 cg = itog(fs, ipref);
f7287e4b 229 ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
e3fe2d69
KM
230 if (ino == 0)
231 goto noinodes;
f7287e4b 232 ip = iget(pip->i_dev, pip->i_fs, ino);
e3fe2d69 233 if (ip == NULL) {
bcd8515e 234 ifree(pip, ino, 0);
e3fe2d69
KM
235 return (NULL);
236 }
ffd90e52
KM
237 if (ip->i_mode) {
238 printf("mode = 0%o, inum = %d, fs = %s\n",
239 ip->i_mode, ip->i_number, fs->fs_fsmnt);
e3fe2d69 240 panic("ialloc: dup alloc");
ffd90e52 241 }
2523b389
SL
242 if (ip->i_blocks) { /* XXX */
243 printf("free inode %s/%d had %d blocks\n",
244 fs->fs_fsmnt, ino, ip->i_blocks);
245 ip->i_blocks = 0;
246 }
e3fe2d69
KM
247 return (ip);
248noinodes:
249 fserr(fs, "out of inodes");
ae851115 250 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
e3fe2d69
KM
251 u.u_error = ENOSPC;
252 return (NULL);
253}
254
743f1ef7 255/*
502770a3
KM
256 * Find a cylinder to place a directory.
257 *
258 * The policy implemented by this algorithm is to select from
259 * among those cylinder groups with above the average number of
260 * free inodes, the one with the smallest number of directories.
743f1ef7 261 */
4f083fd7 262ino_t
f7287e4b 263dirpref(fs)
e3fe2d69 264 register struct fs *fs;
f7287e4b 265{
743f1ef7 266 int cg, minndir, mincg, avgifree;
e3fe2d69 267
0947395d 268 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
743f1ef7 269 minndir = fs->fs_ipg;
e3fe2d69 270 mincg = 0;
743f1ef7 271 for (cg = 0; cg < fs->fs_ncg; cg++)
b6407c9d
KM
272 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
273 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
e3fe2d69 274 mincg = cg;
b6407c9d 275 minndir = fs->fs_cs(fs, cg).cs_ndir;
e3fe2d69 276 }
4f083fd7 277 return ((ino_t)(fs->fs_ipg * mincg));
e3fe2d69
KM
278}
279
743f1ef7 280/*
4f083fd7
SL
281 * Select the desired position for the next block in a file. The file is
282 * logically divided into sections. The first section is composed of the
283 * direct blocks. Each additional section contains fs_maxbpg blocks.
284 *
285 * If no blocks have been allocated in the first section, the policy is to
286 * request a block in the same cylinder group as the inode that describes
287 * the file. If no blocks have been allocated in any other section, the
288 * policy is to place the section in a cylinder group with a greater than
289 * average number of free blocks. An appropriate cylinder group is found
16e7863f
KM
290 * by using a rotor that sweeps the cylinder groups. When a new group of
291 * blocks is needed, the sweep begins in the cylinder group following the
292 * cylinder group from which the previous allocation was made. The sweep
293 * continues until a cylinder group with greater than the average number
294 * of free blocks is found. If the allocation is for the first block in an
295 * indirect block, the information on the previous allocation is unavailable;
296 * here a best guess is made based upon the logical block number being
297 * allocated.
4f083fd7
SL
298 *
299 * If a section is already partially allocated, the policy is to
300 * contiguously allocate fs_maxcontig blocks. The end of one of these
301 * contiguous blocks and the beginning of the next is physically separated
302 * so that the disk head will be in transit between them for at least
303 * fs_rotdelay milliseconds. This is to allow time for the processor to
304 * schedule another I/O transfer.
743f1ef7 305 */
daaf7bee 306daddr_t
4f083fd7
SL
307blkpref(ip, lbn, indx, bap)
308 struct inode *ip;
309 daddr_t lbn;
310 int indx;
311 daddr_t *bap;
f7287e4b 312{
4f083fd7 313 register struct fs *fs;
16e7863f
KM
314 register int cg;
315 int avgbfree, startcg;
4f083fd7 316 daddr_t nextblk;
743f1ef7 317
4f083fd7
SL
318 fs = ip->i_fs;
319 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
320 if (lbn < NDADDR) {
321 cg = itog(fs, ip->i_number);
b6407c9d 322 return (fs->fs_fpg * cg + fs->fs_frag);
743f1ef7 323 }
4f083fd7
SL
324 /*
325 * Find a cylinder with greater than average number of
326 * unused data blocks.
327 */
16e7863f
KM
328 if (indx == 0 || bap[indx - 1] == 0)
329 startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
330 else
331 startcg = dtog(fs, bap[indx - 1]) + 1;
332 startcg %= fs->fs_ncg;
4f083fd7 333 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
16e7863f 334 for (cg = startcg; cg < fs->fs_ncg; cg++)
4f083fd7
SL
335 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
336 fs->fs_cgrotor = cg;
337 return (fs->fs_fpg * cg + fs->fs_frag);
338 }
16e7863f 339 for (cg = 0; cg <= startcg; cg++)
4f083fd7
SL
340 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
341 fs->fs_cgrotor = cg;
342 return (fs->fs_fpg * cg + fs->fs_frag);
343 }
344 return (NULL);
345 }
346 /*
347 * One or more previous blocks have been laid out. If less
348 * than fs_maxcontig previous blocks are contiguous, the
349 * next block is requested contiguously, otherwise it is
350 * requested rotationally delayed by fs_rotdelay milliseconds.
351 */
352 nextblk = bap[indx - 1] + fs->fs_frag;
353 if (indx > fs->fs_maxcontig &&
240a4664 354 bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
4f083fd7
SL
355 != nextblk)
356 return (nextblk);
357 if (fs->fs_rotdelay != 0)
358 /*
359 * Here we convert ms of delay to frags as:
360 * (frags) = (ms) * (rev/sec) * (sect/rev) /
361 * ((sect/frag) * (ms/sec))
362 * then round up to the next block.
363 */
364 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
365 (NSPF(fs) * 1000), fs->fs_frag);
366 return (nextblk);
743f1ef7
KM
367}
368
502770a3
KM
369/*
370 * Implement the cylinder overflow algorithm.
371 *
372 * The policy implemented by this algorithm is:
373 * 1) allocate the block in its requested cylinder group.
374 * 2) quadradically rehash on the cylinder group number.
375 * 3) brute force search for a free block.
376 */
daaf7bee
KM
377/*VARARGS5*/
378u_long
f7287e4b
KM
379hashalloc(ip, cg, pref, size, allocator)
380 struct inode *ip;
e3fe2d69
KM
381 int cg;
382 long pref;
383 int size; /* size for data blocks, mode for inodes */
daaf7bee 384 u_long (*allocator)();
e3fe2d69 385{
f7287e4b 386 register struct fs *fs;
e3fe2d69
KM
387 long result;
388 int i, icg = cg;
389
f7287e4b 390 fs = ip->i_fs;
e3fe2d69
KM
391 /*
392 * 1: preferred cylinder group
393 */
f7287e4b 394 result = (*allocator)(ip, cg, pref, size);
e3fe2d69
KM
395 if (result)
396 return (result);
397 /*
398 * 2: quadratic rehash
399 */
400 for (i = 1; i < fs->fs_ncg; i *= 2) {
401 cg += i;
402 if (cg >= fs->fs_ncg)
403 cg -= fs->fs_ncg;
f7287e4b 404 result = (*allocator)(ip, cg, 0, size);
e3fe2d69
KM
405 if (result)
406 return (result);
407 }
408 /*
409 * 3: brute force search
620b3290
SL
410 * Note that we start at i == 2, since 0 was checked initially,
411 * and 1 is always checked in the quadratic rehash.
e3fe2d69 412 */
2136305e 413 cg = (icg + 2) % fs->fs_ncg;
620b3290 414 for (i = 2; i < fs->fs_ncg; i++) {
f7287e4b 415 result = (*allocator)(ip, cg, 0, size);
e3fe2d69
KM
416 if (result)
417 return (result);
418 cg++;
419 if (cg == fs->fs_ncg)
420 cg = 0;
421 }
ae851115 422 return (NULL);
e3fe2d69
KM
423}
424
502770a3
KM
425/*
426 * Determine whether a fragment can be extended.
427 *
428 * Check to see if the necessary fragments are available, and
429 * if they are, allocate them.
430 */
07670f7d 431daddr_t
f7287e4b
KM
432fragextend(ip, cg, bprev, osize, nsize)
433 struct inode *ip;
07670f7d 434 int cg;
f3c028b7 435 long bprev;
07670f7d
KM
436 int osize, nsize;
437{
f7287e4b 438 register struct fs *fs;
f3c028b7
KM
439 register struct buf *bp;
440 register struct cg *cgp;
441 long bno;
442 int frags, bbase;
07670f7d
KM
443 int i;
444
f7287e4b 445 fs = ip->i_fs;
a8580723 446 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
e5476900 447 return (NULL);
d995d89d 448 frags = numfrags(fs, nsize);
a8580723
KM
449 bbase = fragnum(fs, bprev);
450 if (bbase > fragnum(fs, (bprev + frags - 1))) {
f3c028b7 451 /* cannot extend across a block boundry */
ae851115 452 return (NULL);
f3c028b7 453 }
d65bd829 454 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
e5476900
KM
455 cgp = bp->b_un.b_cg;
456 if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) {
d995d89d 457 brelse(bp);
ae851115 458 return (NULL);
d995d89d 459 }
ad9250ee 460 cgp->cg_time = time.tv_sec;
6994bf5d 461 bno = dtogd(fs, bprev);
d995d89d 462 for (i = numfrags(fs, osize); i < frags; i++)
aca50d72
KM
463 if (isclr(cgp->cg_free, bno + i)) {
464 brelse(bp);
ae851115 465 return (NULL);
aca50d72
KM
466 }
467 /*
468 * the current fragment can be extended
469 * deduct the count on fragment being extended into
470 * increase the count on the remaining fragment (if any)
471 * allocate the extended piece
472 */
473 for (i = frags; i < fs->fs_frag - bbase; i++)
f3c028b7
KM
474 if (isclr(cgp->cg_free, bno + i))
475 break;
d995d89d 476 cgp->cg_frsum[i - numfrags(fs, osize)]--;
aca50d72
KM
477 if (i != frags)
478 cgp->cg_frsum[i - frags]++;
d995d89d 479 for (i = numfrags(fs, osize); i < frags; i++) {
aca50d72
KM
480 clrbit(cgp->cg_free, bno + i);
481 cgp->cg_cs.cs_nffree--;
482 fs->fs_cstotal.cs_nffree--;
483 fs->fs_cs(fs, cg).cs_nffree--;
f3c028b7 484 }
aca50d72
KM
485 fs->fs_fmod++;
486 bdwrite(bp);
487 return (bprev);
07670f7d
KM
488}
489
502770a3
KM
490/*
491 * Determine whether a block can be allocated.
492 *
493 * Check to see if a block of the apprpriate size is available,
494 * and if it is, allocate it.
495 */
4f083fd7 496daddr_t
f7287e4b
KM
497alloccg(ip, cg, bpref, size)
498 struct inode *ip;
e3fe2d69
KM
499 int cg;
500 daddr_t bpref;
501 int size;
502{
f7287e4b 503 register struct fs *fs;
f3c028b7
KM
504 register struct buf *bp;
505 register struct cg *cgp;
506 int bno, frags;
507 int allocsiz;
f3c028b7 508 register int i;
e3fe2d69 509
f7287e4b 510 fs = ip->i_fs;
b6407c9d 511 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
ae851115 512 return (NULL);
d65bd829 513 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
e5476900 514 cgp = bp->b_un.b_cg;
0f538882
KM
515 if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC ||
516 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
d995d89d 517 brelse(bp);
ae851115 518 return (NULL);
d995d89d 519 }
ad9250ee 520 cgp->cg_time = time.tv_sec;
b6407c9d 521 if (size == fs->fs_bsize) {
daaf7bee 522 bno = alloccgblk(fs, cgp, bpref);
f3c028b7
KM
523 bdwrite(bp);
524 return (bno);
525 }
526 /*
527 * check to see if any fragments are already available
528 * allocsiz is the size which will be allocated, hacking
529 * it down to a smaller size if necessary
530 */
d995d89d 531 frags = numfrags(fs, size);
b6407c9d 532 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
f3c028b7
KM
533 if (cgp->cg_frsum[allocsiz] != 0)
534 break;
b6407c9d 535 if (allocsiz == fs->fs_frag) {
f3c028b7
KM
536 /*
537 * no fragments were available, so a block will be
538 * allocated, and hacked up
539 */
0947395d 540 if (cgp->cg_cs.cs_nbfree == 0) {
f3c028b7 541 brelse(bp);
ae851115 542 return (NULL);
f3c028b7 543 }
daaf7bee 544 bno = alloccgblk(fs, cgp, bpref);
6994bf5d 545 bpref = dtogd(fs, bno);
b6407c9d 546 for (i = frags; i < fs->fs_frag; i++)
f3c028b7 547 setbit(cgp->cg_free, bpref + i);
b6407c9d 548 i = fs->fs_frag - frags;
0947395d
KM
549 cgp->cg_cs.cs_nffree += i;
550 fs->fs_cstotal.cs_nffree += i;
b6407c9d 551 fs->fs_cs(fs, cg).cs_nffree += i;
961945a8 552 fs->fs_fmod++;
f3c028b7
KM
553 cgp->cg_frsum[i]++;
554 bdwrite(bp);
555 return (bno);
556 }
743f1ef7 557 bno = mapsearch(fs, cgp, bpref, allocsiz);
0f538882
KM
558 if (bno < 0) {
559 brelse(bp);
ae851115 560 return (NULL);
0f538882 561 }
f3c028b7
KM
562 for (i = 0; i < frags; i++)
563 clrbit(cgp->cg_free, bno + i);
0947395d
KM
564 cgp->cg_cs.cs_nffree -= frags;
565 fs->fs_cstotal.cs_nffree -= frags;
b6407c9d 566 fs->fs_cs(fs, cg).cs_nffree -= frags;
961945a8 567 fs->fs_fmod++;
f3c028b7
KM
568 cgp->cg_frsum[allocsiz]--;
569 if (frags != allocsiz)
570 cgp->cg_frsum[allocsiz - frags]++;
571 bdwrite(bp);
572 return (cg * fs->fs_fpg + bno);
573}
574
502770a3
KM
575/*
576 * Allocate a block in a cylinder group.
577 *
578 * This algorithm implements the following policy:
579 * 1) allocate the requested block.
580 * 2) allocate a rotationally optimal block in the same cylinder.
581 * 3) allocate the next available block on the block rotor for the
582 * specified cylinder group.
583 * Note that this routine only allocates fs_bsize blocks; these
584 * blocks may be fragmented by the routine that allocates them.
585 */
f3c028b7 586daddr_t
daaf7bee 587alloccgblk(fs, cgp, bpref)
f7287e4b 588 register struct fs *fs;
f3c028b7
KM
589 register struct cg *cgp;
590 daddr_t bpref;
591{
743f1ef7 592 daddr_t bno;
ae851115 593 int cylno, pos, delta;
743f1ef7 594 short *cylbp;
aca50d72 595 register int i;
f3c028b7 596
743f1ef7
KM
597 if (bpref == 0) {
598 bpref = cgp->cg_rotor;
aca50d72
KM
599 goto norot;
600 }
a8580723 601 bpref = blknum(fs, bpref);
6994bf5d 602 bpref = dtogd(fs, bpref);
aca50d72
KM
603 /*
604 * if the requested block is available, use it
605 */
240a4664 606 if (isblock(fs, cgp->cg_free, fragstoblks(fs, bpref))) {
aca50d72
KM
607 bno = bpref;
608 goto gotit;
609 }
aca50d72
KM
610 /*
611 * check for a block available on the same cylinder
aca50d72
KM
612 */
613 cylno = cbtocylno(fs, bpref);
502770a3
KM
614 if (cgp->cg_btot[cylno] == 0)
615 goto norot;
616 if (fs->fs_cpc == 0) {
617 /*
618 * block layout info is not available, so just have
619 * to take any block in this cylinder.
620 */
621 bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
622 goto norot;
623 }
aca50d72
KM
624 /*
625 * check the summary information to see if a block is
626 * available in the requested cylinder starting at the
4f083fd7 627 * requested rotational position and proceeding around.
aca50d72 628 */
4f083fd7
SL
629 cylbp = cgp->cg_b[cylno];
630 pos = cbtorpos(fs, bpref);
aca50d72
KM
631 for (i = pos; i < NRPOS; i++)
632 if (cylbp[i] > 0)
633 break;
634 if (i == NRPOS)
635 for (i = 0; i < pos; i++)
743f1ef7
KM
636 if (cylbp[i] > 0)
637 break;
aca50d72
KM
638 if (cylbp[i] > 0) {
639 /*
640 * found a rotational position, now find the actual
641 * block. A panic if none is actually there.
642 */
643 pos = cylno % fs->fs_cpc;
644 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
ffd90e52
KM
645 if (fs->fs_postbl[pos][i] == -1) {
646 printf("pos = %d, i = %d, fs = %s\n",
647 pos, i, fs->fs_fsmnt);
aca50d72 648 panic("alloccgblk: cyl groups corrupted");
ffd90e52 649 }
ae851115 650 for (i = fs->fs_postbl[pos][i];; ) {
aca50d72 651 if (isblock(fs, cgp->cg_free, bno + i)) {
240a4664 652 bno = blkstofrags(fs, (bno + i));
aca50d72 653 goto gotit;
743f1ef7 654 }
ae851115
KM
655 delta = fs->fs_rotbl[i];
656 if (delta <= 0 || delta > MAXBPC - i)
aca50d72 657 break;
ae851115 658 i += delta;
743f1ef7 659 }
ffd90e52 660 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
aca50d72 661 panic("alloccgblk: can't find blk in cyl");
e3fe2d69 662 }
aca50d72
KM
663norot:
664 /*
665 * no blocks in the requested cylinder, so take next
666 * available one in this cylinder group.
667 */
b32450f4 668 bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
6459ebe0 669 if (bno < 0)
ae851115 670 return (NULL);
743f1ef7 671 cgp->cg_rotor = bno;
e3fe2d69 672gotit:
240a4664 673 clrblock(fs, cgp->cg_free, (long)fragstoblks(fs, bno));
0947395d
KM
674 cgp->cg_cs.cs_nbfree--;
675 fs->fs_cstotal.cs_nbfree--;
b6407c9d 676 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
502770a3
KM
677 cylno = cbtocylno(fs, bno);
678 cgp->cg_b[cylno][cbtorpos(fs, bno)]--;
679 cgp->cg_btot[cylno]--;
e3fe2d69 680 fs->fs_fmod++;
743f1ef7 681 return (cgp->cg_cgx * fs->fs_fpg + bno);
e3fe2d69
KM
682}
683
502770a3
KM
684/*
685 * Determine whether an inode can be allocated.
686 *
687 * Check to see if an inode is available, and if it is,
688 * allocate it using the following policy:
689 * 1) allocate the requested inode.
690 * 2) allocate the next available inode after the requested
691 * inode in the specified cylinder group.
692 */
4f083fd7 693ino_t
f7287e4b
KM
694ialloccg(ip, cg, ipref, mode)
695 struct inode *ip;
e3fe2d69
KM
696 int cg;
697 daddr_t ipref;
698 int mode;
699{
f7287e4b 700 register struct fs *fs;
f3c028b7 701 register struct cg *cgp;
4e0c7b8a
KM
702 struct buf *bp;
703 int start, len, loc, map, i;
e3fe2d69 704
f7287e4b 705 fs = ip->i_fs;
b6407c9d 706 if (fs->fs_cs(fs, cg).cs_nifree == 0)
ae851115 707 return (NULL);
d65bd829 708 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
e5476900 709 cgp = bp->b_un.b_cg;
0f538882
KM
710 if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC ||
711 cgp->cg_cs.cs_nifree == 0) {
d995d89d 712 brelse(bp);
ae851115 713 return (NULL);
d995d89d 714 }
ad9250ee 715 cgp->cg_time = time.tv_sec;
e3fe2d69
KM
716 if (ipref) {
717 ipref %= fs->fs_ipg;
718 if (isclr(cgp->cg_iused, ipref))
719 goto gotit;
4e0c7b8a
KM
720 }
721 start = cgp->cg_irotor / NBBY;
722 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
723 loc = skpc(0xff, len, &cgp->cg_iused[start]);
724 if (loc == 0) {
e5889092
KM
725 len = start + 1;
726 start = 0;
727 loc = skpc(0xff, len, &cgp->cg_iused[0]);
728 if (loc == 0) {
729 printf("cg = %s, irotor = %d, fs = %s\n",
730 cg, cgp->cg_irotor, fs->fs_fsmnt);
731 panic("ialloccg: map corrupted");
732 /* NOTREACHED */
733 }
4e0c7b8a
KM
734 }
735 i = start + len - loc;
736 map = cgp->cg_iused[i];
737 ipref = i * NBBY;
738 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
739 if ((map & i) == 0) {
e3fe2d69
KM
740 cgp->cg_irotor = ipref;
741 goto gotit;
742 }
743 }
4e0c7b8a
KM
744 printf("fs = %s\n", fs->fs_fsmnt);
745 panic("ialloccg: block not in map");
746 /* NOTREACHED */
e3fe2d69
KM
747gotit:
748 setbit(cgp->cg_iused, ipref);
0947395d
KM
749 cgp->cg_cs.cs_nifree--;
750 fs->fs_cstotal.cs_nifree--;
b6407c9d 751 fs->fs_cs(fs, cg).cs_nifree--;
e3fe2d69
KM
752 fs->fs_fmod++;
753 if ((mode & IFMT) == IFDIR) {
0947395d
KM
754 cgp->cg_cs.cs_ndir++;
755 fs->fs_cstotal.cs_ndir++;
b6407c9d 756 fs->fs_cs(fs, cg).cs_ndir++;
e3fe2d69
KM
757 }
758 bdwrite(bp);
759 return (cg * fs->fs_ipg + ipref);
760}
761
502770a3
KM
762/*
763 * Free a block or fragment.
764 *
765 * The specified block or fragment is placed back in the
766 * free map. If a fragment is deallocated, a possible
767 * block reassembly is checked.
768 */
4f083fd7 769free(ip, bno, size)
f7287e4b 770 register struct inode *ip;
e3fe2d69 771 daddr_t bno;
daaf7bee 772 off_t size;
e3fe2d69
KM
773{
774 register struct fs *fs;
775 register struct cg *cgp;
776 register struct buf *bp;
f3c028b7
KM
777 int cg, blk, frags, bbase;
778 register int i;
e3fe2d69 779
f7287e4b 780 fs = ip->i_fs;
ffd90e52
KM
781 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
782 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
783 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
b6407c9d 784 panic("free: bad size");
ffd90e52 785 }
6994bf5d 786 cg = dtog(fs, bno);
6459ebe0
KM
787 if (badblock(fs, bno)) {
788 printf("bad block %d, ino %d\n", bno, ip->i_number);
e3fe2d69 789 return;
6459ebe0 790 }
d65bd829 791 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
e5476900
KM
792 cgp = bp->b_un.b_cg;
793 if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) {
d995d89d 794 brelse(bp);
e3fe2d69 795 return;
d995d89d 796 }
ad9250ee 797 cgp->cg_time = time.tv_sec;
6994bf5d 798 bno = dtogd(fs, bno);
b6407c9d 799 if (size == fs->fs_bsize) {
240a4664 800 if (isblock(fs, cgp->cg_free, fragstoblks(fs, bno))) {
ffd90e52
KM
801 printf("dev = 0x%x, block = %d, fs = %s\n",
802 ip->i_dev, bno, fs->fs_fsmnt);
07670f7d 803 panic("free: freeing free block");
6459ebe0 804 }
240a4664 805 setblock(fs, cgp->cg_free, fragstoblks(fs, bno));
0947395d
KM
806 cgp->cg_cs.cs_nbfree++;
807 fs->fs_cstotal.cs_nbfree++;
b6407c9d 808 fs->fs_cs(fs, cg).cs_nbfree++;
502770a3
KM
809 i = cbtocylno(fs, bno);
810 cgp->cg_b[i][cbtorpos(fs, bno)]++;
811 cgp->cg_btot[i]++;
07670f7d 812 } else {
a8580723 813 bbase = bno - fragnum(fs, bno);
f3c028b7
KM
814 /*
815 * decrement the counts associated with the old frags
816 */
ae851115 817 blk = blkmap(fs, cgp->cg_free, bbase);
b6407c9d 818 fragacct(fs, blk, cgp->cg_frsum, -1);
f3c028b7
KM
819 /*
820 * deallocate the fragment
821 */
d995d89d 822 frags = numfrags(fs, size);
f3c028b7 823 for (i = 0; i < frags; i++) {
ffd90e52
KM
824 if (isset(cgp->cg_free, bno + i)) {
825 printf("dev = 0x%x, block = %d, fs = %s\n",
826 ip->i_dev, bno + i, fs->fs_fsmnt);
07670f7d 827 panic("free: freeing free frag");
ffd90e52 828 }
07670f7d 829 setbit(cgp->cg_free, bno + i);
07670f7d 830 }
ae851115
KM
831 cgp->cg_cs.cs_nffree += i;
832 fs->fs_cstotal.cs_nffree += i;
833 fs->fs_cs(fs, cg).cs_nffree += i;
f3c028b7
KM
834 /*
835 * add back in counts associated with the new frags
836 */
ae851115 837 blk = blkmap(fs, cgp->cg_free, bbase);
b6407c9d 838 fragacct(fs, blk, cgp->cg_frsum, 1);
f3c028b7
KM
839 /*
840 * if a complete block has been reassembled, account for it
841 */
240a4664 842 if (isblock(fs, cgp->cg_free, fragstoblks(fs, bbase))) {
b6407c9d
KM
843 cgp->cg_cs.cs_nffree -= fs->fs_frag;
844 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
845 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
0947395d
KM
846 cgp->cg_cs.cs_nbfree++;
847 fs->fs_cstotal.cs_nbfree++;
b6407c9d 848 fs->fs_cs(fs, cg).cs_nbfree++;
502770a3
KM
849 i = cbtocylno(fs, bbase);
850 cgp->cg_b[i][cbtorpos(fs, bbase)]++;
851 cgp->cg_btot[i]++;
07670f7d
KM
852 }
853 }
e3fe2d69 854 fs->fs_fmod++;
e3fe2d69
KM
855 bdwrite(bp);
856}
857
502770a3
KM
858/*
859 * Free an inode.
860 *
861 * The specified inode is placed back in the free map.
862 */
f7287e4b
KM
863ifree(ip, ino, mode)
864 struct inode *ip;
e3fe2d69
KM
865 ino_t ino;
866 int mode;
867{
868 register struct fs *fs;
869 register struct cg *cgp;
870 register struct buf *bp;
e3fe2d69
KM
871 int cg;
872
f7287e4b 873 fs = ip->i_fs;
ffd90e52
KM
874 if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
875 printf("dev = 0x%x, ino = %d, fs = %s\n",
876 ip->i_dev, ino, fs->fs_fsmnt);
e3fe2d69 877 panic("ifree: range");
ffd90e52 878 }
6994bf5d 879 cg = itog(fs, ino);
d65bd829 880 bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
e5476900
KM
881 cgp = bp->b_un.b_cg;
882 if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) {
d995d89d 883 brelse(bp);
e3fe2d69 884 return;
d995d89d 885 }
ad9250ee 886 cgp->cg_time = time.tv_sec;
e3fe2d69 887 ino %= fs->fs_ipg;
ffd90e52
KM
888 if (isclr(cgp->cg_iused, ino)) {
889 printf("dev = 0x%x, ino = %d, fs = %s\n",
890 ip->i_dev, ino, fs->fs_fsmnt);
e3fe2d69 891 panic("ifree: freeing free inode");
ffd90e52 892 }
e3fe2d69 893 clrbit(cgp->cg_iused, ino);
4e0c7b8a
KM
894 if (ino < cgp->cg_irotor)
895 cgp->cg_irotor = ino;
0947395d
KM
896 cgp->cg_cs.cs_nifree++;
897 fs->fs_cstotal.cs_nifree++;
b6407c9d 898 fs->fs_cs(fs, cg).cs_nifree++;
e3fe2d69 899 if ((mode & IFMT) == IFDIR) {
0947395d
KM
900 cgp->cg_cs.cs_ndir--;
901 fs->fs_cstotal.cs_ndir--;
b6407c9d 902 fs->fs_cs(fs, cg).cs_ndir--;
e3fe2d69
KM
903 }
904 fs->fs_fmod++;
905 bdwrite(bp);
906}
907
743f1ef7 908/*
502770a3
KM
909 * Find a block of the specified size in the specified cylinder group.
910 *
743f1ef7
KM
911 * It is a panic if a request is made to find a block if none are
912 * available.
913 */
914daddr_t
915mapsearch(fs, cgp, bpref, allocsiz)
916 register struct fs *fs;
917 register struct cg *cgp;
918 daddr_t bpref;
919 int allocsiz;
920{
921 daddr_t bno;
922 int start, len, loc, i;
923 int blk, field, subfield, pos;
924
925 /*
926 * find the fragment by searching through the free block
927 * map for an appropriate bit pattern
928 */
929 if (bpref)
6994bf5d 930 start = dtogd(fs, bpref) / NBBY;
743f1ef7
KM
931 else
932 start = cgp->cg_frotor / NBBY;
942bd18b 933 len = howmany(fs->fs_fpg, NBBY) - start;
88a7a62a
SL
934 loc = scanc((unsigned)len, (caddr_t)&cgp->cg_free[start],
935 (caddr_t)fragtbl[fs->fs_frag],
936 (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
743f1ef7 937 if (loc == 0) {
e5476900
KM
938 len = start + 1;
939 start = 0;
e5889092 940 loc = scanc((unsigned)len, (caddr_t)&cgp->cg_free[0],
88a7a62a
SL
941 (caddr_t)fragtbl[fs->fs_frag],
942 (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
4e0c7b8a
KM
943 if (loc == 0) {
944 printf("start = %d, len = %d, fs = %s\n",
945 start, len, fs->fs_fsmnt);
946 panic("alloccg: map corrupted");
e5889092 947 /* NOTREACHED */
4e0c7b8a 948 }
743f1ef7
KM
949 }
950 bno = (start + len - loc) * NBBY;
951 cgp->cg_frotor = bno;
952 /*
953 * found the byte in the map
954 * sift through the bits to find the selected frag
955 */
ae851115
KM
956 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
957 blk = blkmap(fs, cgp->cg_free, bno);
743f1ef7
KM
958 blk <<= 1;
959 field = around[allocsiz];
960 subfield = inside[allocsiz];
b6407c9d 961 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
ae851115
KM
962 if ((blk & field) == subfield)
963 return (bno + pos);
743f1ef7
KM
964 field <<= 1;
965 subfield <<= 1;
966 }
967 }
ffd90e52 968 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
743f1ef7 969 panic("alloccg: block not in map");
e5476900 970 return (-1);
743f1ef7
KM
971}
972
e3fe2d69 973/*
502770a3
KM
974 * Fserr prints the name of a file system with an error diagnostic.
975 *
976 * The form of the error message is:
e3fe2d69
KM
977 * fs: error message
978 */
979fserr(fs, cp)
980 struct fs *fs;
981 char *cp;
982{
983
7580177b 984 log(KERN_FAIL, "%s: %s\n", fs->fs_fsmnt, cp);
e3fe2d69 985}