Removed all patch kit headers, sccsid and rcsid strings, put $Id$ in, some
[unix-history] / sys / ufs / ufs_alloc.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ufs_alloc.c 7.26 (Berkeley) 5/2/91
34 *
35 * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE
36 * -------------------- ----- ----------------------
37 * CURRENT PATCH LEVEL: 1 00106
38 * -------------------- ----- ----------------------
39 *
40 * 28 Mar 93 Chris Torek Fix contiguous block allocation.
41 *
42 */
43
44#include "param.h"
45#include "systm.h"
46#include "buf.h"
47#include "proc.h"
48#include "vnode.h"
49#include "kernel.h"
50#include "syslog.h"
51
52#include "quota.h"
53#include "inode.h"
54#include "fs.h"
55
56extern u_long hashalloc();
57extern ino_t ialloccg();
58extern daddr_t alloccg();
59extern daddr_t alloccgblk();
60extern daddr_t fragextend();
61extern daddr_t blkpref();
62extern daddr_t mapsearch();
63extern int inside[], around[];
64extern unsigned char *fragtbl[];
65
66/*
67 * Allocate a block in the file system.
68 *
69 * The size of the requested block is given, which must be some
70 * multiple of fs_fsize and <= fs_bsize.
71 * A preference may be optionally specified. If a preference is given
72 * the following hierarchy is used to allocate a block:
73 * 1) allocate the requested block.
74 * 2) allocate a rotationally optimal block in the same cylinder.
75 * 3) allocate a block in the same cylinder group.
76 * 4) quadradically rehash into other cylinder groups, until an
77 * available block is located.
78 * If no block preference is given the following heirarchy is used
79 * to allocate a block:
80 * 1) allocate a block in the cylinder group that contains the
81 * inode for the file.
82 * 2) quadradically rehash into other cylinder groups, until an
83 * available block is located.
84 */
85alloc(ip, lbn, bpref, size, bnp)
86 register struct inode *ip;
87 daddr_t lbn, bpref;
88 int size;
89 daddr_t *bnp;
90{
91 daddr_t bno;
92 register struct fs *fs;
93 register struct buf *bp;
94 int cg, error;
95 struct ucred *cred = curproc->p_ucred; /* XXX */
96
97 *bnp = 0;
98 fs = ip->i_fs;
99 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
100 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
101 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
102 panic("alloc: bad size");
103 }
104 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
105 goto nospace;
106 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
107 goto nospace;
108#ifdef QUOTA
109 if (error = chkdq(ip, (long)btodb(size), cred, 0))
110 return (error);
111#endif
112 if (bpref >= fs->fs_size)
113 bpref = 0;
114 if (bpref == 0)
115 cg = itog(fs, ip->i_number);
116 else
117 cg = dtog(fs, bpref);
118 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
119 (u_long (*)())alloccg);
120 if (bno > 0) {
121 ip->i_blocks += btodb(size);
122 ip->i_flag |= IUPD|ICHG;
123 *bnp = bno;
124 return (0);
125 }
126#ifdef QUOTA
127 /*
128 * Restore user's disk quota because allocation failed.
129 */
130 (void) chkdq(ip, (long)-btodb(size), cred, FORCE);
131#endif
132nospace:
133 fserr(fs, cred->cr_uid, "file system full");
134 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
135 return (ENOSPC);
136}
137
138/*
139 * Reallocate a fragment to a bigger size
140 *
141 * The number and size of the old block is given, and a preference
142 * and new size is also specified. The allocator attempts to extend
143 * the original block. Failing that, the regular block allocator is
144 * invoked to get an appropriate block.
145 */
146realloccg(ip, lbprev, bpref, osize, nsize, bpp)
147 register struct inode *ip;
148 off_t lbprev;
149 daddr_t bpref;
150 int osize, nsize;
151 struct buf **bpp;
152{
153 register struct fs *fs;
154 struct buf *bp, *obp;
155 int cg, request, error;
156 daddr_t bprev, bno;
157 struct ucred *cred = curproc->p_ucred; /* XXX */
158
159 *bpp = 0;
160 fs = ip->i_fs;
161 if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
162 (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
163 printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
164 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
165 panic("realloccg: bad size");
166 }
167 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
168 goto nospace;
169 if ((bprev = ip->i_db[lbprev]) == 0) {
170 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
171 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
172 panic("realloccg: bad bprev");
173 }
174 /*
175 * Allocate the extra space in the buffer.
176 */
177 if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
178 brelse(bp);
179 return (error);
180 }
181#ifdef QUOTA
182 if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) {
183 brelse(bp);
184 return (error);
185 }
186#endif
187 /*
188 * Check for extension in the existing location.
189 */
190 cg = dtog(fs, bprev);
191 if (bno = fragextend(ip, cg, (long)bprev, osize, nsize)) {
192 if (bp->b_blkno != fsbtodb(fs, bno))
193 panic("bad blockno");
194 ip->i_blocks += btodb(nsize - osize);
195 ip->i_flag |= IUPD|ICHG;
196 allocbuf(bp, nsize);
197 bp->b_flags |= B_DONE;
198 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
199 *bpp = bp;
200 return (0);
201 }
202 /*
203 * Allocate a new disk location.
204 */
205 if (bpref >= fs->fs_size)
206 bpref = 0;
207 switch ((int)fs->fs_optim) {
208 case FS_OPTSPACE:
209 /*
210 * Allocate an exact sized fragment. Although this makes
211 * best use of space, we will waste time relocating it if
212 * the file continues to grow. If the fragmentation is
213 * less than half of the minimum free reserve, we choose
214 * to begin optimizing for time.
215 */
216 request = nsize;
217 if (fs->fs_minfree < 5 ||
218 fs->fs_cstotal.cs_nffree >
219 fs->fs_dsize * fs->fs_minfree / (2 * 100))
220 break;
221 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
222 fs->fs_fsmnt);
223 fs->fs_optim = FS_OPTTIME;
224 break;
225 case FS_OPTTIME:
226 /*
227 * At this point we have discovered a file that is trying
228 * to grow a small fragment to a larger fragment. To save
229 * time, we allocate a full sized block, then free the
230 * unused portion. If the file continues to grow, the
231 * `fragextend' call above will be able to grow it in place
232 * without further copying. If aberrant programs cause
233 * disk fragmentation to grow within 2% of the free reserve,
234 * we choose to begin optimizing for space.
235 */
236 request = fs->fs_bsize;
237 if (fs->fs_cstotal.cs_nffree <
238 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
239 break;
240 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
241 fs->fs_fsmnt);
242 fs->fs_optim = FS_OPTSPACE;
243 break;
244 default:
245 printf("dev = 0x%x, optim = %d, fs = %s\n",
246 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
247 panic("realloccg: bad optim");
248 /* NOTREACHED */
249 }
250 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
251 (u_long (*)())alloccg);
252 if (bno > 0) {
253 bp->b_blkno = fsbtodb(fs, bno);
254 (void) vnode_pager_uncache(ITOV(ip));
255 blkfree(ip, bprev, (off_t)osize);
256 if (nsize < request)
257 blkfree(ip, bno + numfrags(fs, nsize),
258 (off_t)(request - nsize));
259 ip->i_blocks += btodb(nsize - osize);
260 ip->i_flag |= IUPD|ICHG;
261 allocbuf(bp, nsize);
262 bp->b_flags |= B_DONE;
263 bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
264 *bpp = bp;
265 return (0);
266 }
267#ifdef QUOTA
268 /*
269 * Restore user's disk quota because allocation failed.
270 */
271 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
272#endif
273 brelse(bp);
274nospace:
275 /*
276 * no space available
277 */
278 fserr(fs, cred->cr_uid, "file system full");
279 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
280 return (ENOSPC);
281}
282
283/*
284 * Allocate an inode in the file system.
285 *
286 * A preference may be optionally specified. If a preference is given
287 * the following hierarchy is used to allocate an inode:
288 * 1) allocate the requested inode.
289 * 2) allocate an inode in the same cylinder group.
290 * 3) quadradically rehash into other cylinder groups, until an
291 * available inode is located.
292 * If no inode preference is given the following heirarchy is used
293 * to allocate an inode:
294 * 1) allocate an inode in cylinder group 0.
295 * 2) quadradically rehash into other cylinder groups, until an
296 * available inode is located.
297 */
298ialloc(pip, ipref, mode, cred, ipp)
299 register struct inode *pip;
300 ino_t ipref;
301 int mode;
302 struct ucred *cred;
303 struct inode **ipp;
304{
305 ino_t ino;
306 register struct fs *fs;
307 register struct inode *ip;
308 int cg, error;
309
310 *ipp = 0;
311 fs = pip->i_fs;
312 if (fs->fs_cstotal.cs_nifree == 0)
313 goto noinodes;
314 if (ipref >= fs->fs_ncg * fs->fs_ipg)
315 ipref = 0;
316 cg = itog(fs, ipref);
317 ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
318 if (ino == 0)
319 goto noinodes;
320 error = iget(pip, ino, ipp);
321 if (error) {
322 ifree(pip, ino, mode);
323 return (error);
324 }
325 ip = *ipp;
326 if (ip->i_mode) {
327 printf("mode = 0%o, inum = %d, fs = %s\n",
328 ip->i_mode, ip->i_number, fs->fs_fsmnt);
329 panic("ialloc: dup alloc");
330 }
331 if (ip->i_blocks) { /* XXX */
332 printf("free inode %s/%d had %d blocks\n",
333 fs->fs_fsmnt, ino, ip->i_blocks);
334 ip->i_blocks = 0;
335 }
336 ip->i_flags = 0;
337 /*
338 * Set up a new generation number for this inode.
339 */
340 if (++nextgennumber < (u_long)time.tv_sec)
341 nextgennumber = time.tv_sec;
342 ip->i_gen = nextgennumber;
343 return (0);
344noinodes:
345 fserr(fs, cred->cr_uid, "out of inodes");
346 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
347 return (ENOSPC);
348}
349
350/*
351 * Find a cylinder to place a directory.
352 *
353 * The policy implemented by this algorithm is to select from
354 * among those cylinder groups with above the average number of
355 * free inodes, the one with the smallest number of directories.
356 */
357ino_t
358dirpref(fs)
359 register struct fs *fs;
360{
361 int cg, minndir, mincg, avgifree;
362
363 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
364 minndir = fs->fs_ipg;
365 mincg = 0;
366 for (cg = 0; cg < fs->fs_ncg; cg++)
367 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
368 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
369 mincg = cg;
370 minndir = fs->fs_cs(fs, cg).cs_ndir;
371 }
372 return ((ino_t)(fs->fs_ipg * mincg));
373}
374
375/*
376 * Select the desired position for the next block in a file. The file is
377 * logically divided into sections. The first section is composed of the
378 * direct blocks. Each additional section contains fs_maxbpg blocks.
379 *
380 * If no blocks have been allocated in the first section, the policy is to
381 * request a block in the same cylinder group as the inode that describes
382 * the file. If no blocks have been allocated in any other section, the
383 * policy is to place the section in a cylinder group with a greater than
384 * average number of free blocks. An appropriate cylinder group is found
385 * by using a rotor that sweeps the cylinder groups. When a new group of
386 * blocks is needed, the sweep begins in the cylinder group following the
387 * cylinder group from which the previous allocation was made. The sweep
388 * continues until a cylinder group with greater than the average number
389 * of free blocks is found. If the allocation is for the first block in an
390 * indirect block, the information on the previous allocation is unavailable;
391 * here a best guess is made based upon the logical block number being
392 * allocated.
393 *
394 * If a section is already partially allocated, the policy is to
395 * contiguously allocate fs_maxcontig blocks. The end of one of these
396 * contiguous blocks and the beginning of the next is physically separated
397 * so that the disk head will be in transit between them for at least
398 * fs_rotdelay milliseconds. This is to allow time for the processor to
399 * schedule another I/O transfer.
400 */
401daddr_t
402blkpref(ip, lbn, indx, bap)
403 struct inode *ip;
404 daddr_t lbn;
405 int indx;
406 daddr_t *bap;
407{
408 register struct fs *fs;
409 register int cg;
410 int avgbfree, startcg;
411 daddr_t nextblk;
412
413 fs = ip->i_fs;
414 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
415 if (lbn < NDADDR) {
416 cg = itog(fs, ip->i_number);
417 return (fs->fs_fpg * cg + fs->fs_frag);
418 }
419 /*
420 * Find a cylinder with greater than average number of
421 * unused data blocks.
422 */
423 if (indx == 0 || bap[indx - 1] == 0)
424 startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
425 else
426 startcg = dtog(fs, bap[indx - 1]) + 1;
427 startcg %= fs->fs_ncg;
428 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
429 for (cg = startcg; cg < fs->fs_ncg; cg++)
430 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
431 fs->fs_cgrotor = cg;
432 return (fs->fs_fpg * cg + fs->fs_frag);
433 }
434 for (cg = 0; cg <= startcg; cg++)
435 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
436 fs->fs_cgrotor = cg;
437 return (fs->fs_fpg * cg + fs->fs_frag);
438 }
439 return (NULL);
440 }
441 /*
442 * One or more previous blocks have been laid out. If less
443 * than fs_maxcontig previous blocks are contiguous, the
444 * next block is requested contiguously, otherwise it is
445 * requested rotationally delayed by fs_rotdelay milliseconds.
446 */
447 nextblk = bap[indx - 1] + fs->fs_frag;
448 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
449 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
450 return (nextblk);
451 if (fs->fs_rotdelay != 0)
452 /*
453 * Here we convert ms of delay to frags as:
454 * (frags) = (ms) * (rev/sec) * (sect/rev) /
455 * ((sect/frag) * (ms/sec))
456 * then round up to the next block.
457 */
458 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
459 (NSPF(fs) * 1000), fs->fs_frag);
460 return (nextblk);
461}
462
463/*
464 * Implement the cylinder overflow algorithm.
465 *
466 * The policy implemented by this algorithm is:
467 * 1) allocate the block in its requested cylinder group.
468 * 2) quadradically rehash on the cylinder group number.
469 * 3) brute force search for a free block.
470 */
471/*VARARGS5*/
472u_long
473hashalloc(ip, cg, pref, size, allocator)
474 struct inode *ip;
475 int cg;
476 long pref;
477 int size; /* size for data blocks, mode for inodes */
478 u_long (*allocator)();
479{
480 register struct fs *fs;
481 long result;
482 int i, icg = cg;
483
484 fs = ip->i_fs;
485 /*
486 * 1: preferred cylinder group
487 */
488 result = (*allocator)(ip, cg, pref, size);
489 if (result)
490 return (result);
491 /*
492 * 2: quadratic rehash
493 */
494 for (i = 1; i < fs->fs_ncg; i *= 2) {
495 cg += i;
496 if (cg >= fs->fs_ncg)
497 cg -= fs->fs_ncg;
498 result = (*allocator)(ip, cg, 0, size);
499 if (result)
500 return (result);
501 }
502 /*
503 * 3: brute force search
504 * Note that we start at i == 2, since 0 was checked initially,
505 * and 1 is always checked in the quadratic rehash.
506 */
507 cg = (icg + 2) % fs->fs_ncg;
508 for (i = 2; i < fs->fs_ncg; i++) {
509 result = (*allocator)(ip, cg, 0, size);
510 if (result)
511 return (result);
512 cg++;
513 if (cg == fs->fs_ncg)
514 cg = 0;
515 }
516 return (NULL);
517}
518
519/*
520 * Determine whether a fragment can be extended.
521 *
522 * Check to see if the necessary fragments are available, and
523 * if they are, allocate them.
524 */
525daddr_t
526fragextend(ip, cg, bprev, osize, nsize)
527 struct inode *ip;
528 int cg;
529 long bprev;
530 int osize, nsize;
531{
532 register struct fs *fs;
533 register struct cg *cgp;
534 struct buf *bp;
535 long bno;
536 int frags, bbase;
537 int i, error;
538
539 fs = ip->i_fs;
540 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
541 return (NULL);
542 frags = numfrags(fs, nsize);
543 bbase = fragnum(fs, bprev);
544 if (bbase > fragnum(fs, (bprev + frags - 1))) {
545 /* cannot extend across a block boundary */
546 return (NULL);
547 }
548 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
549 (int)fs->fs_cgsize, NOCRED, &bp);
550 if (error) {
551 brelse(bp);
552 return (NULL);
553 }
554 cgp = bp->b_un.b_cg;
555 if (!cg_chkmagic(cgp)) {
556 brelse(bp);
557 return (NULL);
558 }
559 cgp->cg_time = time.tv_sec;
560 bno = dtogd(fs, bprev);
561 for (i = numfrags(fs, osize); i < frags; i++)
562 if (isclr(cg_blksfree(cgp), bno + i)) {
563 brelse(bp);
564 return (NULL);
565 }
566 /*
567 * the current fragment can be extended
568 * deduct the count on fragment being extended into
569 * increase the count on the remaining fragment (if any)
570 * allocate the extended piece
571 */
572 for (i = frags; i < fs->fs_frag - bbase; i++)
573 if (isclr(cg_blksfree(cgp), bno + i))
574 break;
575 cgp->cg_frsum[i - numfrags(fs, osize)]--;
576 if (i != frags)
577 cgp->cg_frsum[i - frags]++;
578 for (i = numfrags(fs, osize); i < frags; i++) {
579 clrbit(cg_blksfree(cgp), bno + i);
580 cgp->cg_cs.cs_nffree--;
581 fs->fs_cstotal.cs_nffree--;
582 fs->fs_cs(fs, cg).cs_nffree--;
583 }
584 fs->fs_fmod++;
585 bdwrite(bp);
586 return (bprev);
587}
588
589/*
590 * Determine whether a block can be allocated.
591 *
592 * Check to see if a block of the apprpriate size is available,
593 * and if it is, allocate it.
594 */
595daddr_t
596alloccg(ip, cg, bpref, size)
597 struct inode *ip;
598 int cg;
599 daddr_t bpref;
600 int size;
601{
602 register struct fs *fs;
603 register struct cg *cgp;
604 struct buf *bp;
605 register int i;
606 int error, bno, frags, allocsiz;
607
608 fs = ip->i_fs;
609 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
610 return (NULL);
611 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
612 (int)fs->fs_cgsize, NOCRED, &bp);
613 if (error) {
614 brelse(bp);
615 return (NULL);
616 }
617 cgp = bp->b_un.b_cg;
618 if (!cg_chkmagic(cgp) ||
619 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
620 brelse(bp);
621 return (NULL);
622 }
623 cgp->cg_time = time.tv_sec;
624 if (size == fs->fs_bsize) {
625 bno = alloccgblk(fs, cgp, bpref);
626 bdwrite(bp);
627 return (bno);
628 }
629 /*
630 * check to see if any fragments are already available
631 * allocsiz is the size which will be allocated, hacking
632 * it down to a smaller size if necessary
633 */
634 frags = numfrags(fs, size);
635 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
636 if (cgp->cg_frsum[allocsiz] != 0)
637 break;
638 if (allocsiz == fs->fs_frag) {
639 /*
640 * no fragments were available, so a block will be
641 * allocated, and hacked up
642 */
643 if (cgp->cg_cs.cs_nbfree == 0) {
644 brelse(bp);
645 return (NULL);
646 }
647 bno = alloccgblk(fs, cgp, bpref);
648 bpref = dtogd(fs, bno);
649 for (i = frags; i < fs->fs_frag; i++)
650 setbit(cg_blksfree(cgp), bpref + i);
651 i = fs->fs_frag - frags;
652 cgp->cg_cs.cs_nffree += i;
653 fs->fs_cstotal.cs_nffree += i;
654 fs->fs_cs(fs, cg).cs_nffree += i;
655 fs->fs_fmod++;
656 cgp->cg_frsum[i]++;
657 bdwrite(bp);
658 return (bno);
659 }
660 bno = mapsearch(fs, cgp, bpref, allocsiz);
661 if (bno < 0) {
662 brelse(bp);
663 return (NULL);
664 }
665 for (i = 0; i < frags; i++)
666 clrbit(cg_blksfree(cgp), bno + i);
667 cgp->cg_cs.cs_nffree -= frags;
668 fs->fs_cstotal.cs_nffree -= frags;
669 fs->fs_cs(fs, cg).cs_nffree -= frags;
670 fs->fs_fmod++;
671 cgp->cg_frsum[allocsiz]--;
672 if (frags != allocsiz)
673 cgp->cg_frsum[allocsiz - frags]++;
674 bdwrite(bp);
675 return (cg * fs->fs_fpg + bno);
676}
677
678/*
679 * Allocate a block in a cylinder group.
680 *
681 * This algorithm implements the following policy:
682 * 1) allocate the requested block.
683 * 2) allocate a rotationally optimal block in the same cylinder.
684 * 3) allocate the next available block on the block rotor for the
685 * specified cylinder group.
686 * Note that this routine only allocates fs_bsize blocks; these
687 * blocks may be fragmented by the routine that allocates them.
688 */
689daddr_t
690alloccgblk(fs, cgp, bpref)
691 register struct fs *fs;
692 register struct cg *cgp;
693 daddr_t bpref;
694{
695 daddr_t bno;
696 int cylno, pos, delta;
697 short *cylbp;
698 register int i;
699
700 if (bpref == 0) {
701 bpref = cgp->cg_rotor;
702 goto norot;
703 }
704 bpref = blknum(fs, bpref);
705 bpref = dtogd(fs, bpref);
706 /*
707 * if the requested block is available, use it
708 */
709 if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
710 bno = bpref;
711 goto gotit;
712 }
713 /*
714 * check for a block available on the same cylinder
715 */
716 cylno = cbtocylno(fs, bpref);
717 if (cg_blktot(cgp)[cylno] == 0)
718 goto norot;
719 if (fs->fs_cpc == 0) {
720 /*
721 * block layout info is not available, so just have
722 * to take any block in this cylinder.
723 */
724 bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
725 goto norot;
726 }
727 /*
728 * check the summary information to see if a block is
729 * available in the requested cylinder starting at the
730 * requested rotational position and proceeding around.
731 */
732 cylbp = cg_blks(fs, cgp, cylno);
733 pos = cbtorpos(fs, bpref);
734 for (i = pos; i < fs->fs_nrpos; i++)
735 if (cylbp[i] > 0)
736 break;
737 if (i == fs->fs_nrpos)
738 for (i = 0; i < pos; i++)
739 if (cylbp[i] > 0)
740 break;
741 if (cylbp[i] > 0) {
742 /*
743 * found a rotational position, now find the actual
744 * block. A panic if none is actually there.
745 */
746 pos = cylno % fs->fs_cpc;
747 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
748 if (fs_postbl(fs, pos)[i] == -1) {
749 printf("pos = %d, i = %d, fs = %s\n",
750 pos, i, fs->fs_fsmnt);
751 panic("alloccgblk: cyl groups corrupted");
752 }
753 for (i = fs_postbl(fs, pos)[i];; ) {
754 if (isblock(fs, cg_blksfree(cgp), bno + i)) {
755 bno = blkstofrags(fs, (bno + i));
756 goto gotit;
757 }
758 delta = fs_rotbl(fs)[i];
759 if (delta <= 0 ||
760 delta + i > fragstoblks(fs, fs->fs_fpg))
761 break;
762 i += delta;
763 }
764 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
765 panic("alloccgblk: can't find blk in cyl");
766 }
767norot:
768 /*
769 * no blocks in the requested cylinder, so take next
770 * available one in this cylinder group.
771 */
772 bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
773 if (bno < 0)
774 return (NULL);
775 cgp->cg_rotor = bno;
776gotit:
777 clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
778 cgp->cg_cs.cs_nbfree--;
779 fs->fs_cstotal.cs_nbfree--;
780 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
781 cylno = cbtocylno(fs, bno);
782 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
783 cg_blktot(cgp)[cylno]--;
784 fs->fs_fmod++;
785 return (cgp->cg_cgx * fs->fs_fpg + bno);
786}
787
788/*
789 * Determine whether an inode can be allocated.
790 *
791 * Check to see if an inode is available, and if it is,
792 * allocate it using the following policy:
793 * 1) allocate the requested inode.
794 * 2) allocate the next available inode after the requested
795 * inode in the specified cylinder group.
796 */
797ino_t
798ialloccg(ip, cg, ipref, mode)
799 struct inode *ip;
800 int cg;
801 daddr_t ipref;
802 int mode;
803{
804 register struct fs *fs;
805 register struct cg *cgp;
806 struct buf *bp;
807 int error, start, len, loc, map, i;
808
809 fs = ip->i_fs;
810 if (fs->fs_cs(fs, cg).cs_nifree == 0)
811 return (NULL);
812 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
813 (int)fs->fs_cgsize, NOCRED, &bp);
814 if (error) {
815 brelse(bp);
816 return (NULL);
817 }
818 cgp = bp->b_un.b_cg;
819 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
820 brelse(bp);
821 return (NULL);
822 }
823 cgp->cg_time = time.tv_sec;
824 if (ipref) {
825 ipref %= fs->fs_ipg;
826 if (isclr(cg_inosused(cgp), ipref))
827 goto gotit;
828 }
829 start = cgp->cg_irotor / NBBY;
830 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
831 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
832 if (loc == 0) {
833 len = start + 1;
834 start = 0;
835 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
836 if (loc == 0) {
837 printf("cg = %s, irotor = %d, fs = %s\n",
838 cg, cgp->cg_irotor, fs->fs_fsmnt);
839 panic("ialloccg: map corrupted");
840 /* NOTREACHED */
841 }
842 }
843 i = start + len - loc;
844 map = cg_inosused(cgp)[i];
845 ipref = i * NBBY;
846 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
847 if ((map & i) == 0) {
848 cgp->cg_irotor = ipref;
849 goto gotit;
850 }
851 }
852 printf("fs = %s\n", fs->fs_fsmnt);
853 panic("ialloccg: block not in map");
854 /* NOTREACHED */
855gotit:
856 setbit(cg_inosused(cgp), ipref);
857 cgp->cg_cs.cs_nifree--;
858 fs->fs_cstotal.cs_nifree--;
859 fs->fs_cs(fs, cg).cs_nifree--;
860 fs->fs_fmod++;
861 if ((mode & IFMT) == IFDIR) {
862 cgp->cg_cs.cs_ndir++;
863 fs->fs_cstotal.cs_ndir++;
864 fs->fs_cs(fs, cg).cs_ndir++;
865 }
866 bdwrite(bp);
867 return (cg * fs->fs_ipg + ipref);
868}
869
870/*
871 * Free a block or fragment.
872 *
873 * The specified block or fragment is placed back in the
874 * free map. If a fragment is deallocated, a possible
875 * block reassembly is checked.
876 */
877blkfree(ip, bno, size)
878 register struct inode *ip;
879 daddr_t bno;
880 off_t size;
881{
882 register struct fs *fs;
883 register struct cg *cgp;
884 struct buf *bp;
885 int error, cg, blk, frags, bbase;
886 register int i;
887 struct ucred *cred = curproc->p_ucred; /* XXX */
888
889 fs = ip->i_fs;
890 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
891 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
892 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
893 panic("blkfree: bad size");
894 }
895 cg = dtog(fs, bno);
896 if ((unsigned)bno >= fs->fs_size) {
897 printf("bad block %d, ino %d\n", bno, ip->i_number);
898 fserr(fs, cred->cr_uid, "bad block");
899 return;
900 }
901 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
902 (int)fs->fs_cgsize, NOCRED, &bp);
903 if (error) {
904 brelse(bp);
905 return;
906 }
907 cgp = bp->b_un.b_cg;
908 if (!cg_chkmagic(cgp)) {
909 brelse(bp);
910 return;
911 }
912 cgp->cg_time = time.tv_sec;
913 bno = dtogd(fs, bno);
914 if (size == fs->fs_bsize) {
915 if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
916 printf("dev = 0x%x, block = %d, fs = %s\n",
917 ip->i_dev, bno, fs->fs_fsmnt);
918 panic("blkfree: freeing free block");
919 }
920 setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
921 cgp->cg_cs.cs_nbfree++;
922 fs->fs_cstotal.cs_nbfree++;
923 fs->fs_cs(fs, cg).cs_nbfree++;
924 i = cbtocylno(fs, bno);
925 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
926 cg_blktot(cgp)[i]++;
927 } else {
928 bbase = bno - fragnum(fs, bno);
929 /*
930 * decrement the counts associated with the old frags
931 */
932 blk = blkmap(fs, cg_blksfree(cgp), bbase);
933 fragacct(fs, blk, cgp->cg_frsum, -1);
934 /*
935 * deallocate the fragment
936 */
937 frags = numfrags(fs, size);
938 for (i = 0; i < frags; i++) {
939 if (isset(cg_blksfree(cgp), bno + i)) {
940 printf("dev = 0x%x, block = %d, fs = %s\n",
941 ip->i_dev, bno + i, fs->fs_fsmnt);
942 panic("blkfree: freeing free frag");
943 }
944 setbit(cg_blksfree(cgp), bno + i);
945 }
946 cgp->cg_cs.cs_nffree += i;
947 fs->fs_cstotal.cs_nffree += i;
948 fs->fs_cs(fs, cg).cs_nffree += i;
949 /*
950 * add back in counts associated with the new frags
951 */
952 blk = blkmap(fs, cg_blksfree(cgp), bbase);
953 fragacct(fs, blk, cgp->cg_frsum, 1);
954 /*
955 * if a complete block has been reassembled, account for it
956 */
957 if (isblock(fs, cg_blksfree(cgp),
958 (daddr_t)fragstoblks(fs, bbase))) {
959 cgp->cg_cs.cs_nffree -= fs->fs_frag;
960 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
961 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
962 cgp->cg_cs.cs_nbfree++;
963 fs->fs_cstotal.cs_nbfree++;
964 fs->fs_cs(fs, cg).cs_nbfree++;
965 i = cbtocylno(fs, bbase);
966 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
967 cg_blktot(cgp)[i]++;
968 }
969 }
970 fs->fs_fmod++;
971 bdwrite(bp);
972}
973
974/*
975 * Free an inode.
976 *
977 * The specified inode is placed back in the free map.
978 */
979ifree(ip, ino, mode)
980 struct inode *ip;
981 ino_t ino;
982 int mode;
983{
984 register struct fs *fs;
985 register struct cg *cgp;
986 struct buf *bp;
987 int error, cg;
988
989 fs = ip->i_fs;
990 if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
991 printf("dev = 0x%x, ino = %d, fs = %s\n",
992 ip->i_dev, ino, fs->fs_fsmnt);
993 panic("ifree: range");
994 }
995 cg = itog(fs, ino);
996 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
997 (int)fs->fs_cgsize, NOCRED, &bp);
998 if (error) {
999 brelse(bp);
1000 return;
1001 }
1002 cgp = bp->b_un.b_cg;
1003 if (!cg_chkmagic(cgp)) {
1004 brelse(bp);
1005 return;
1006 }
1007 cgp->cg_time = time.tv_sec;
1008 ino %= fs->fs_ipg;
1009 if (isclr(cg_inosused(cgp), ino)) {
1010 printf("dev = 0x%x, ino = %d, fs = %s\n",
1011 ip->i_dev, ino, fs->fs_fsmnt);
1012 if (fs->fs_ronly == 0)
1013 panic("ifree: freeing free inode");
1014 }
1015 clrbit(cg_inosused(cgp), ino);
1016 if (ino < cgp->cg_irotor)
1017 cgp->cg_irotor = ino;
1018 cgp->cg_cs.cs_nifree++;
1019 fs->fs_cstotal.cs_nifree++;
1020 fs->fs_cs(fs, cg).cs_nifree++;
1021 if ((mode & IFMT) == IFDIR) {
1022 cgp->cg_cs.cs_ndir--;
1023 fs->fs_cstotal.cs_ndir--;
1024 fs->fs_cs(fs, cg).cs_ndir--;
1025 }
1026 fs->fs_fmod++;
1027 bdwrite(bp);
1028}
1029
1030/*
1031 * Find a block of the specified size in the specified cylinder group.
1032 *
1033 * It is a panic if a request is made to find a block if none are
1034 * available.
1035 */
1036daddr_t
1037mapsearch(fs, cgp, bpref, allocsiz)
1038 register struct fs *fs;
1039 register struct cg *cgp;
1040 daddr_t bpref;
1041 int allocsiz;
1042{
1043 daddr_t bno;
1044 int start, len, loc, i;
1045 int blk, field, subfield, pos;
1046
1047 /*
1048 * find the fragment by searching through the free block
1049 * map for an appropriate bit pattern
1050 */
1051 if (bpref)
1052 start = dtogd(fs, bpref) / NBBY;
1053 else
1054 start = cgp->cg_frotor / NBBY;
1055 len = howmany(fs->fs_fpg, NBBY) - start;
1056 loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1057 (u_char *)fragtbl[fs->fs_frag],
1058 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1059 if (loc == 0) {
1060 len = start + 1;
1061 start = 0;
1062 loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1063 (u_char *)fragtbl[fs->fs_frag],
1064 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1065 if (loc == 0) {
1066 printf("start = %d, len = %d, fs = %s\n",
1067 start, len, fs->fs_fsmnt);
1068 panic("alloccg: map corrupted");
1069 /* NOTREACHED */
1070 }
1071 }
1072 bno = (start + len - loc) * NBBY;
1073 cgp->cg_frotor = bno;
1074 /*
1075 * found the byte in the map
1076 * sift through the bits to find the selected frag
1077 */
1078 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1079 blk = blkmap(fs, cg_blksfree(cgp), bno);
1080 blk <<= 1;
1081 field = around[allocsiz];
1082 subfield = inside[allocsiz];
1083 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1084 if ((blk & field) == subfield)
1085 return (bno + pos);
1086 field <<= 1;
1087 subfield <<= 1;
1088 }
1089 }
1090 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1091 panic("alloccg: block not in map");
1092 return (-1);
1093}
1094
1095/*
1096 * Fserr prints the name of a file system with an error diagnostic.
1097 *
1098 * The form of the error message is:
1099 * fs: error message
1100 */
1101fserr(fs, uid, cp)
1102 struct fs *fs;
1103 uid_t uid;
1104 char *cp;
1105{
1106
1107 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1108}