386BSD 0.1 development
[unix-history] / usr / src / sys.386bsd / ufs / ufs_inode.c
CommitLineData
b688fc87
WJ
1/*
2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ufs_inode.c 7.40 (Berkeley) 5/8/91
34 */
35
36#include "param.h"
37#include "systm.h"
38#include "mount.h"
39#include "proc.h"
40#include "file.h"
41#include "buf.h"
42#include "vnode.h"
43#include "kernel.h"
44#include "malloc.h"
45
46#include "quota.h"
47#include "inode.h"
48#include "fs.h"
49#include "ufsmount.h"
50
51#define INOHSZ 512
52#if ((INOHSZ&(INOHSZ-1)) == 0)
53#define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1))
54#else
55#define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ)
56#endif
57
58union ihead {
59 union ihead *ih_head[2];
60 struct inode *ih_chain[2];
61} ihead[INOHSZ];
62
63int prtactive; /* 1 => print out reclaim of active vnodes */
64
65/*
66 * Initialize hash links for inodes.
67 */
68ufs_init()
69{
70 register int i;
71 register union ihead *ih = ihead;
72
73#ifndef lint
74 if (VN_MAXPRIVATE < sizeof(struct inode))
75 panic("ihinit: too small");
76#endif /* not lint */
77 for (i = INOHSZ; --i >= 0; ih++) {
78 ih->ih_head[0] = ih;
79 ih->ih_head[1] = ih;
80 }
81#ifdef QUOTA
82 dqinit();
83#endif /* QUOTA */
84}
85
86/*
87 * Look up a UFS dinode number to find its incore vnode.
88 * If it is not in core, read it in from the specified device.
89 * If it is in core, wait for the lock bit to clear, then
90 * return the inode locked. Detection and handling of mount
91 * points must be done by the calling routine.
92 */
93iget(xp, ino, ipp)
94 struct inode *xp;
95 ino_t ino;
96 struct inode **ipp;
97{
98 dev_t dev = xp->i_dev;
99 struct mount *mntp = ITOV(xp)->v_mount;
100 register struct fs *fs = VFSTOUFS(mntp)->um_fs;
101 extern struct vnodeops ufs_vnodeops, spec_inodeops;
102 register struct inode *ip, *iq;
103 register struct vnode *vp;
104 struct vnode *nvp;
105 struct buf *bp;
106 struct dinode *dp;
107 union ihead *ih;
108 int i, error;
109
110 ih = &ihead[INOHASH(dev, ino)];
111loop:
112 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
113 if (ino != ip->i_number || dev != ip->i_dev)
114 continue;
115 if ((ip->i_flag&ILOCKED) != 0) {
116 ip->i_flag |= IWANT;
117 sleep((caddr_t)ip, PINOD);
118 goto loop;
119 }
120 if (vget(ITOV(ip)))
121 goto loop;
122 *ipp = ip;
123 return(0);
124 }
125 /*
126 * Allocate a new inode.
127 */
128 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) {
129 *ipp = 0;
130 return (error);
131 }
132 ip = VTOI(nvp);
133 ip->i_vnode = nvp;
134 ip->i_flag = 0;
135 ip->i_devvp = 0;
136 ip->i_mode = 0;
137 ip->i_diroff = 0;
138 ip->i_lockf = 0;
139#ifdef QUOTA
140 for (i = 0; i < MAXQUOTAS; i++)
141 ip->i_dquot[i] = NODQUOT;
142#endif
143 /*
144 * Put it onto its hash chain and lock it so that other requests for
145 * this inode will block if they arrive while we are sleeping waiting
146 * for old data structures to be purged or for the contents of the
147 * disk portion of this inode to be read.
148 */
149 ip->i_dev = dev;
150 ip->i_number = ino;
151 insque(ip, ih);
152 ILOCK(ip);
153 /*
154 * Read in the disk contents for the inode.
155 */
156 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
157 (int)fs->fs_bsize, NOCRED, &bp)) {
158 /*
159 * The inode does not contain anything useful, so it would
160 * be misleading to leave it on its hash chain.
161 * Iput() will take care of putting it back on the free list.
162 */
163 remque(ip);
164 ip->i_forw = ip;
165 ip->i_back = ip;
166 /*
167 * Unlock and discard unneeded inode.
168 */
169 iput(ip);
170 brelse(bp);
171 *ipp = 0;
172 return (error);
173 }
174 dp = bp->b_un.b_dino;
175 dp += itoo(fs, ino);
176 ip->i_din = *dp;
177 brelse(bp);
178 /*
179 * Initialize the associated vnode
180 */
181 vp = ITOV(ip);
182 vp->v_type = IFTOVT(ip->i_mode);
183 if (vp->v_type == VFIFO) {
184#ifdef FIFO
185 extern struct vnodeops fifo_inodeops;
186 vp->v_op = &fifo_inodeops;
187#else
188 iput(ip);
189 *ipp = 0;
190 return (EOPNOTSUPP);
191#endif /* FIFO */
192 }
193 if (vp->v_type == VCHR || vp->v_type == VBLK) {
194 vp->v_op = &spec_inodeops;
195 if (nvp = checkalias(vp, ip->i_rdev, mntp)) {
196 /*
197 * Reinitialize aliased inode.
198 */
199 vp = nvp;
200 iq = VTOI(vp);
201 iq->i_vnode = vp;
202 iq->i_flag = 0;
203 ILOCK(iq);
204 iq->i_din = ip->i_din;
205 iq->i_dev = dev;
206 iq->i_number = ino;
207 insque(iq, ih);
208 /*
209 * Discard unneeded vnode
210 */
211 ip->i_mode = 0;
212 iput(ip);
213 ip = iq;
214 }
215 }
216 if (ino == ROOTINO)
217 vp->v_flag |= VROOT;
218 /*
219 * Finish inode initialization.
220 */
221 ip->i_fs = fs;
222 ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
223 VREF(ip->i_devvp);
224 /*
225 * Set up a generation number for this inode if it does not
226 * already have one. This should only happen on old filesystems.
227 */
228 if (ip->i_gen == 0) {
229 if (++nextgennumber < (u_long)time.tv_sec)
230 nextgennumber = time.tv_sec;
231 ip->i_gen = nextgennumber;
232 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
233 ip->i_flag |= IMOD;
234 }
235 *ipp = ip;
236 return (0);
237}
238
239/*
240 * Unlock and decrement the reference count of an inode structure.
241 */
242iput(ip)
243 register struct inode *ip;
244{
245
246 if ((ip->i_flag & ILOCKED) == 0)
247 panic("iput");
248 IUNLOCK(ip);
249 vrele(ITOV(ip));
250}
251
252/*
253 * Last reference to an inode, write the inode out and if necessary,
254 * truncate and deallocate the file.
255 */
256ufs_inactive(vp, p)
257 struct vnode *vp;
258 struct proc *p;
259{
260 register struct inode *ip = VTOI(vp);
261 int mode, error = 0;
262
263 if (prtactive && vp->v_usecount != 0)
264 vprint("ufs_inactive: pushing active", vp);
265 /*
266 * Get rid of inodes related to stale file handles.
267 */
268 if (ip->i_mode == 0) {
269 if ((vp->v_flag & VXLOCK) == 0)
270 vgone(vp);
271 return (0);
272 }
273 ILOCK(ip);
274 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
275#ifdef QUOTA
276 if (!getinoquota(ip))
277 (void) chkiq(ip, -1, NOCRED, 0);
278#endif
279 error = itrunc(ip, (u_long)0, 0);
280 mode = ip->i_mode;
281 ip->i_mode = 0;
282 ip->i_rdev = 0;
283 ip->i_flag |= IUPD|ICHG;
284 ifree(ip, ip->i_number, mode);
285 }
286 IUPDAT(ip, &time, &time, 0);
287 IUNLOCK(ip);
288 ip->i_flag = 0;
289 /*
290 * If we are done with the inode, reclaim it
291 * so that it can be reused immediately.
292 */
293 if (vp->v_usecount == 0 && ip->i_mode == 0)
294 vgone(vp);
295 return (error);
296}
297
298/*
299 * Reclaim an inode so that it can be used for other purposes.
300 */
301ufs_reclaim(vp)
302 register struct vnode *vp;
303{
304 register struct inode *ip = VTOI(vp);
305 int i;
306
307 if (prtactive && vp->v_usecount != 0)
308 vprint("ufs_reclaim: pushing active", vp);
309 /*
310 * Remove the inode from its hash chain.
311 */
312 remque(ip);
313 ip->i_forw = ip;
314 ip->i_back = ip;
315 /*
316 * Purge old data structures associated with the inode.
317 */
318 cache_purge(vp);
319 if (ip->i_devvp) {
320 vrele(ip->i_devvp);
321 ip->i_devvp = 0;
322 }
323#ifdef QUOTA
324 for (i = 0; i < MAXQUOTAS; i++) {
325 if (ip->i_dquot[i] != NODQUOT) {
326 dqrele(vp, ip->i_dquot[i]);
327 ip->i_dquot[i] = NODQUOT;
328 }
329 }
330#endif
331 ip->i_flag = 0;
332 return (0);
333}
334
335/*
336 * Update the access, modified, and inode change times as specified
337 * by the IACC, IMOD, and ICHG flags respectively. The IUPD flag
338 * is used to specify that the inode needs to be updated but that
339 * the times have already been set. The access and modified times
340 * are taken from the second and third parameters; the inode change
341 * time is always taken from the current time. If waitfor is set,
342 * then wait for the disk write of the inode to complete.
343 */
344iupdat(ip, ta, tm, waitfor)
345 register struct inode *ip;
346 struct timeval *ta, *tm;
347 int waitfor;
348{
349 struct buf *bp;
350 struct vnode *vp = ITOV(ip);
351 struct dinode *dp;
352 register struct fs *fs;
353 int error;
354
355 fs = ip->i_fs;
356 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
357 return (0);
358 if (vp->v_mount->mnt_flag & MNT_RDONLY)
359 return (0);
360 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
361 (int)fs->fs_bsize, NOCRED, &bp);
362 if (error) {
363 brelse(bp);
364 return (error);
365 }
366 if (ip->i_flag&IACC)
367 ip->i_atime = ta->tv_sec;
368 if (ip->i_flag&IUPD)
369 ip->i_mtime = tm->tv_sec;
370 if (ip->i_flag&ICHG)
371 ip->i_ctime = time.tv_sec;
372 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
373 dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
374 *dp = ip->i_din;
375 if (waitfor) {
376 return (bwrite(bp));
377 } else {
378 bdwrite(bp);
379 return (0);
380 }
381}
382
383#define SINGLE 0 /* index of single indirect block */
384#define DOUBLE 1 /* index of double indirect block */
385#define TRIPLE 2 /* index of triple indirect block */
386/*
387 * Truncate the inode ip to at most length size. Free affected disk
388 * blocks -- the blocks of the file are removed in reverse order.
389 *
390 * NB: triple indirect blocks are untested.
391 */
392itrunc(oip, length, flags)
393 register struct inode *oip;
394 u_long length;
395 int flags;
396{
397 register daddr_t lastblock;
398 daddr_t bn, lbn, lastiblock[NIADDR];
399 register struct fs *fs;
400 register struct inode *ip;
401 struct buf *bp;
402 int offset, osize, size, level;
403 long count, nblocks, blocksreleased = 0;
404 register int i;
405 int aflags, error, allerror;
406 struct inode tip;
407
408 vnode_pager_setsize(ITOV(oip), length);
409 if (oip->i_size <= length) {
410 oip->i_flag |= ICHG|IUPD;
411 error = iupdat(oip, &time, &time, 1);
412 return (error);
413 }
414 /*
415 * Calculate index into inode's block list of
416 * last direct and indirect blocks (if any)
417 * which we want to keep. Lastblock is -1 when
418 * the file is truncated to 0.
419 */
420 fs = oip->i_fs;
421 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
422 lastiblock[SINGLE] = lastblock - NDADDR;
423 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
424 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
425 nblocks = btodb(fs->fs_bsize);
426 /*
427 * Update the size of the file. If the file is not being
428 * truncated to a block boundry, the contents of the
429 * partial block following the end of the file must be
430 * zero'ed in case it ever become accessable again because
431 * of subsequent file growth.
432 */
433 osize = oip->i_size;
434 offset = blkoff(fs, length);
435 if (offset == 0) {
436 oip->i_size = length;
437 } else {
438 lbn = lblkno(fs, length);
439 aflags = B_CLRBUF;
440 if (flags & IO_SYNC)
441 aflags |= B_SYNC;
442#ifdef QUOTA
443 if (error = getinoquota(oip))
444 return (error);
445#endif
446 if (error = balloc(oip, lbn, offset, &bp, aflags))
447 return (error);
448 oip->i_size = length;
449 size = blksize(fs, oip, lbn);
450 (void) vnode_pager_uncache(ITOV(oip));
451 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
452 allocbuf(bp, size);
453 if (flags & IO_SYNC)
454 bwrite(bp);
455 else
456 bdwrite(bp);
457 }
458 /*
459 * Update file and block pointers
460 * on disk before we start freeing blocks.
461 * If we crash before free'ing blocks below,
462 * the blocks will be returned to the free list.
463 * lastiblock values are also normalized to -1
464 * for calls to indirtrunc below.
465 */
466 tip = *oip;
467 tip.i_size = osize;
468 for (level = TRIPLE; level >= SINGLE; level--)
469 if (lastiblock[level] < 0) {
470 oip->i_ib[level] = 0;
471 lastiblock[level] = -1;
472 }
473 for (i = NDADDR - 1; i > lastblock; i--)
474 oip->i_db[i] = 0;
475 oip->i_flag |= ICHG|IUPD;
476 vinvalbuf(ITOV(oip), (length > 0));
477 allerror = iupdat(oip, &time, &time, MNT_WAIT);
478
479 /*
480 * Indirect blocks first.
481 */
482 ip = &tip;
483 for (level = TRIPLE; level >= SINGLE; level--) {
484 bn = ip->i_ib[level];
485 if (bn != 0) {
486 error = indirtrunc(ip, bn, lastiblock[level], level,
487 &count);
488 if (error)
489 allerror = error;
490 blocksreleased += count;
491 if (lastiblock[level] < 0) {
492 ip->i_ib[level] = 0;
493 blkfree(ip, bn, (off_t)fs->fs_bsize);
494 blocksreleased += nblocks;
495 }
496 }
497 if (lastiblock[level] >= 0)
498 goto done;
499 }
500
501 /*
502 * All whole direct blocks or frags.
503 */
504 for (i = NDADDR - 1; i > lastblock; i--) {
505 register off_t bsize;
506
507 bn = ip->i_db[i];
508 if (bn == 0)
509 continue;
510 ip->i_db[i] = 0;
511 bsize = (off_t)blksize(fs, ip, i);
512 blkfree(ip, bn, bsize);
513 blocksreleased += btodb(bsize);
514 }
515 if (lastblock < 0)
516 goto done;
517
518 /*
519 * Finally, look for a change in size of the
520 * last direct block; release any frags.
521 */
522 bn = ip->i_db[lastblock];
523 if (bn != 0) {
524 off_t oldspace, newspace;
525
526 /*
527 * Calculate amount of space we're giving
528 * back as old block size minus new block size.
529 */
530 oldspace = blksize(fs, ip, lastblock);
531 ip->i_size = length;
532 newspace = blksize(fs, ip, lastblock);
533 if (newspace == 0)
534 panic("itrunc: newspace");
535 if (oldspace - newspace > 0) {
536 /*
537 * Block number of space to be free'd is
538 * the old block # plus the number of frags
539 * required for the storage we're keeping.
540 */
541 bn += numfrags(fs, newspace);
542 blkfree(ip, bn, oldspace - newspace);
543 blocksreleased += btodb(oldspace - newspace);
544 }
545 }
546done:
547/* BEGIN PARANOIA */
548 for (level = SINGLE; level <= TRIPLE; level++)
549 if (ip->i_ib[level] != oip->i_ib[level])
550 panic("itrunc1");
551 for (i = 0; i < NDADDR; i++)
552 if (ip->i_db[i] != oip->i_db[i])
553 panic("itrunc2");
554/* END PARANOIA */
555 oip->i_blocks -= blocksreleased;
556 if (oip->i_blocks < 0) /* sanity */
557 oip->i_blocks = 0;
558 oip->i_flag |= ICHG;
559#ifdef QUOTA
560 if (!getinoquota(oip))
561 (void) chkdq(oip, -blocksreleased, NOCRED, 0);
562#endif
563 return (allerror);
564}
565
566/*
567 * Release blocks associated with the inode ip and
568 * stored in the indirect block bn. Blocks are free'd
569 * in LIFO order up to (but not including) lastbn. If
570 * level is greater than SINGLE, the block is an indirect
571 * block and recursive calls to indirtrunc must be used to
572 * cleanse other indirect blocks.
573 *
574 * NB: triple indirect blocks are untested.
575 */
576indirtrunc(ip, bn, lastbn, level, countp)
577 register struct inode *ip;
578 daddr_t bn, lastbn;
579 int level;
580 long *countp;
581{
582 register int i;
583 struct buf *bp;
584 register struct fs *fs = ip->i_fs;
585 register daddr_t *bap;
586 daddr_t *copy, nb, last;
587 long blkcount, factor;
588 int nblocks, blocksreleased = 0;
589 int error, allerror = 0;
590
591 /*
592 * Calculate index in current block of last
593 * block to be kept. -1 indicates the entire
594 * block so we need not calculate the index.
595 */
596 factor = 1;
597 for (i = SINGLE; i < level; i++)
598 factor *= NINDIR(fs);
599 last = lastbn;
600 if (lastbn > 0)
601 last /= factor;
602 nblocks = btodb(fs->fs_bsize);
603 /*
604 * Get buffer of block pointers, zero those
605 * entries corresponding to blocks to be free'd,
606 * and update on disk copy first.
607 */
608 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize,
609 NOCRED, &bp);
610 if (error) {
611 brelse(bp);
612 *countp = 0;
613 return (error);
614 }
615 bap = bp->b_un.b_daddr;
616 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
617 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
618 bzero((caddr_t)&bap[last + 1],
619 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
620 if (last == -1)
621 bp->b_flags |= B_INVAL;
622 error = bwrite(bp);
623 if (error)
624 allerror = error;
625 bap = copy;
626
627 /*
628 * Recursively free totally unused blocks.
629 */
630 for (i = NINDIR(fs) - 1; i > last; i--) {
631 nb = bap[i];
632 if (nb == 0)
633 continue;
634 if (level > SINGLE) {
635 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
636 &blkcount);
637 if (error)
638 allerror = error;
639 blocksreleased += blkcount;
640 }
641 blkfree(ip, nb, (off_t)fs->fs_bsize);
642 blocksreleased += nblocks;
643 }
644
645 /*
646 * Recursively free last partial block.
647 */
648 if (level > SINGLE && lastbn >= 0) {
649 last = lastbn % factor;
650 nb = bap[i];
651 if (nb != 0) {
652 error = indirtrunc(ip, nb, last, level - 1, &blkcount);
653 if (error)
654 allerror = error;
655 blocksreleased += blkcount;
656 }
657 }
658 FREE(copy, M_TEMP);
659 *countp = blocksreleased;
660 return (allerror);
661}
662
663/*
664 * Lock an inode. If its already locked, set the WANT bit and sleep.
665 */
666ilock(ip)
667 register struct inode *ip;
668{
669
670 while (ip->i_flag & ILOCKED) {
671 ip->i_flag |= IWANT;
672 if (ip->i_spare0 == curproc->p_pid)
673 panic("locking against myself");
674 ip->i_spare1 = curproc->p_pid;
675 (void) sleep((caddr_t)ip, PINOD);
676 }
677 ip->i_spare1 = 0;
678 ip->i_spare0 = curproc->p_pid;
679 ip->i_flag |= ILOCKED;
680}
681
682/*
683 * Unlock an inode. If WANT bit is on, wakeup.
684 */
685iunlock(ip)
686 register struct inode *ip;
687{
688
689 if ((ip->i_flag & ILOCKED) == 0)
690 vprint("iunlock: unlocked inode", ITOV(ip));
691 ip->i_spare0 = 0;
692 ip->i_flag &= ~ILOCKED;
693 if (ip->i_flag&IWANT) {
694 ip->i_flag &= ~IWANT;
695 wakeup((caddr_t)ip);
696 }
697}