don't nlist /vmunix unless file changes (from dpk@brl)
[unix-history] / usr / src / sys / ufs / ffs / ufs_inode.c
... / ...
CommitLineData
1/* ufs_inode.c 6.6 84/07/02 */
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/mount.h"
6#include "../h/dir.h"
7#include "../h/user.h"
8#include "../h/inode.h"
9#include "../h/fs.h"
10#include "../h/conf.h"
11#include "../h/buf.h"
12#ifdef QUOTA
13#include "../h/quota.h"
14#endif
15#include "../h/kernel.h"
16
17#define INOHSZ 64
18#if ((INOHSZ&(INOHSZ-1)) == 0)
19#define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1))
20#else
21#define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ)
22#endif
23
24union ihead { /* inode LRU cache, Chris Maltby */
25 union ihead *ih_head[2];
26 struct inode *ih_chain[2];
27} ihead[INOHSZ];
28
29struct inode *ifreeh, **ifreet;
30
31/*
32 * Initialize hash links for inodes
33 * and build inode free list.
34 */
35ihinit()
36{
37 register int i;
38 register struct inode *ip = inode;
39 register union ihead *ih = ihead;
40
41 for (i = INOHSZ; --i >= 0; ih++) {
42 ih->ih_head[0] = ih;
43 ih->ih_head[1] = ih;
44 }
45 ifreeh = ip;
46 ifreet = &ip->i_freef;
47 ip->i_freeb = &ifreeh;
48 ip->i_forw = ip;
49 ip->i_back = ip;
50 for (i = ninode; --i > 0; ) {
51 ++ip;
52 ip->i_forw = ip;
53 ip->i_back = ip;
54 *ifreet = ip;
55 ip->i_freeb = ifreet;
56 ifreet = &ip->i_freef;
57 }
58 ip->i_freef = NULL;
59}
60
61#ifdef notdef
62/*
63 * Find an inode if it is incore.
64 * This is the equivalent, for inodes,
65 * of ``incore'' in bio.c or ``pfind'' in subr.c.
66 */
67struct inode *
68ifind(dev, ino)
69 dev_t dev;
70 ino_t ino;
71{
72 register struct inode *ip;
73 register union ihead *ih;
74
75 ih = &ihead[INOHASH(dev, ino)];
76 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
77 if (ino==ip->i_number && dev==ip->i_dev)
78 return (ip);
79 return ((struct inode *)0);
80}
81#endif notdef
82
83/*
84 * Look up an inode by device,inumber.
85 * If it is in core (in the inode structure),
86 * honor the locking protocol.
87 * If it is not in core, read it in from the
88 * specified device.
89 * If the inode is mounted on, perform
90 * the indicated indirection.
91 * In all cases, a pointer to a locked
92 * inode structure is returned.
93 *
94 * panic: no imt -- if the mounted file
95 * system is not in the mount table.
96 * "cannot happen"
97 */
98struct inode *
99iget(dev, fs, ino)
100 dev_t dev;
101 register struct fs *fs;
102 ino_t ino;
103{
104 register struct inode *ip;
105 register union ihead *ih;
106 register struct mount *mp;
107 register struct buf *bp;
108 register struct dinode *dp;
109 register struct inode *iq;
110 struct inode *xp;
111
112
113loop:
114 ih = &ihead[INOHASH(dev, ino)];
115 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
116 if (ino == ip->i_number && dev == ip->i_dev) {
117 /*
118 * Following is essentially an inline expanded
119 * copy of igrab(), expanded inline for speed,
120 * and so that the test for a mounted on inode
121 * can be deferred until after we are sure that
122 * the inode isn't busy.
123 */
124 if ((ip->i_flag&ILOCKED) != 0) {
125 ip->i_flag |= IWANT;
126 sleep((caddr_t)ip, PINOD);
127 goto loop;
128 }
129 if ((ip->i_flag&IMOUNT) != 0) {
130 for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
131 if(mp->m_inodp == ip) {
132 dev = mp->m_dev;
133 fs = mp->m_bufp->b_un.b_fs;
134 ino = ROOTINO;
135 goto loop;
136 }
137 panic("no imt");
138 }
139 if (ip->i_count == 0) { /* ino on free list */
140 if (iq = ip->i_freef)
141 iq->i_freeb = ip->i_freeb;
142 else
143 ifreet = ip->i_freeb;
144 *ip->i_freeb = iq;
145 ip->i_freef = NULL;
146 ip->i_freeb = NULL;
147 }
148 ip->i_count++;
149 ip->i_flag |= ILOCKED;
150 return(ip);
151 }
152
153 if ((ip = ifreeh) == NULL) {
154 tablefull("inode");
155 u.u_error = ENFILE;
156 return(NULL);
157 }
158 if (iq = ip->i_freef)
159 iq->i_freeb = &ifreeh;
160 ifreeh = iq;
161 ip->i_freef = NULL;
162 ip->i_freeb = NULL;
163 /*
164 * Now to take inode off the hash chain it was on
165 * (initially, or after an iflush, it is on a "hash chain"
166 * consisting entirely of itself, and pointed to by no-one,
167 * but that doesn't matter), and put it on the chain for
168 * its new (ino, dev) pair
169 */
170 remque(ip);
171 insque(ip, ih);
172#ifdef QUOTA
173 dqrele(ip->i_dquot);
174#endif
175 ip->i_dev = dev;
176 ip->i_fs = fs;
177 ip->i_number = ino;
178 ip->i_id = ++nextinodeid; /* also used in rename */
179 /*
180 * At an absurd rate of 100 calls/second,
181 * this should occur once every 8 months.
182 */
183 if (nextinodeid < 0)
184 for (nextinodeid = 0, xp = inode; xp < inodeNINODE; xp++)
185 xp->i_id = 0;
186 ip->i_flag = ILOCKED;
187 ip->i_count++;
188 ip->i_lastr = 0;
189 bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
190 /*
191 * Check I/O errors
192 */
193 if ((bp->b_flags&B_ERROR) != 0) {
194 brelse(bp);
195 /*
196 * the inode doesn't contain anything useful, so it would
197 * be misleading to leave it on its hash chain.
198 * 'iput' will take care of putting it back on the free list.
199 */
200 remque(ip);
201 ip->i_forw = ip;
202 ip->i_back = ip;
203 /*
204 * we also loose its inumber, just in case (as iput
205 * doesn't do that any more) - but as it isn't on its
206 * hash chain, I doubt if this is really necessary .. kre
207 * (probably the two methods are interchangable)
208 */
209 ip->i_number = 0;
210#ifdef QUOTA
211 ip->i_dquot = NODQUOT;
212#endif
213 iput(ip);
214 return(NULL);
215 }
216 dp = bp->b_un.b_dino;
217 dp += itoo(fs, ino);
218 ip->i_ic = dp->di_ic;
219 brelse(bp);
220#ifdef QUOTA
221 if (ip->i_mode == 0)
222 ip->i_dquot = NODQUOT;
223 else
224 ip->i_dquot = inoquota(ip);
225#endif
226 return (ip);
227}
228
229/*
230 * Convert a pointer to an inode into a reference to an inode.
231 *
232 * This is basically the internal piece of iget (after the
233 * inode pointer is located) but without the test for mounted
234 * filesystems. It is caller's responsibility to check that
235 * the inode pointer is valid.
236 */
237igrab(ip)
238 register struct inode *ip;
239{
240 while ((ip->i_flag&ILOCKED) != 0) {
241 ip->i_flag |= IWANT;
242 sleep((caddr_t)ip, PINOD);
243 }
244 if (ip->i_count == 0) { /* ino on free list */
245 register struct inode *iq;
246
247 if (iq = ip->i_freef)
248 iq->i_freeb = ip->i_freeb;
249 else
250 ifreet = ip->i_freeb;
251 *ip->i_freeb = iq;
252 ip->i_freef = NULL;
253 ip->i_freeb = NULL;
254 }
255 ip->i_count++;
256 ip->i_flag |= ILOCKED;
257}
258
259/*
260 * Decrement reference count of
261 * an inode structure.
262 * On the last reference,
263 * write the inode out and if necessary,
264 * truncate and deallocate the file.
265 */
266iput(ip)
267 register struct inode *ip;
268{
269
270 if ((ip->i_flag & ILOCKED) == 0)
271 panic("iput");
272 iunlock(ip);
273 irele(ip);
274}
275
276irele(ip)
277 register struct inode *ip;
278{
279 int mode;
280
281 if (ip->i_count == 1) {
282 ip->i_flag |= ILOCKED;
283 if (ip->i_nlink <= 0) {
284 itrunc(ip, (u_long)0);
285 mode = ip->i_mode;
286 ip->i_mode = 0;
287 ip->i_rdev = 0;
288 ip->i_flag |= IUPD|ICHG;
289 ifree(ip, ip->i_number, mode);
290#ifdef QUOTA
291 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
292 dqrele(ip->i_dquot);
293 ip->i_dquot = NODQUOT;
294#endif
295 }
296 IUPDAT(ip, &time, &time, 0);
297 iunlock(ip);
298 ip->i_flag = 0;
299 /*
300 * Put the inode on the end of the free list.
301 * Possibly in some cases it would be better to
302 * put the inode at the head of the free list,
303 * (eg: where i_mode == 0 || i_number == 0)
304 * but I will think about that later .. kre
305 * (i_number is rarely 0 - only after an i/o error in iget,
306 * where i_mode == 0, the inode will probably be wanted
307 * again soon for an ialloc, so possibly we should keep it)
308 */
309 if (ifreeh) {
310 *ifreet = ip;
311 ip->i_freeb = ifreet;
312 } else {
313 ifreeh = ip;
314 ip->i_freeb = &ifreeh;
315 }
316 ip->i_freef = NULL;
317 ifreet = &ip->i_freef;
318 } else if (!(ip->i_flag & ILOCKED))
319 ITIMES(ip, &time, &time);
320 ip->i_count--;
321}
322
323/*
324 * Check accessed and update flags on
325 * an inode structure.
326 * If any is on, update the inode
327 * with the current time.
328 * If waitfor is given, then must insure
329 * i/o order so wait for write to complete.
330 */
331iupdat(ip, ta, tm, waitfor)
332 register struct inode *ip;
333 struct timeval *ta, *tm;
334 int waitfor;
335{
336 register struct buf *bp;
337 struct dinode *dp;
338 register struct fs *fp;
339
340 fp = ip->i_fs;
341 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
342 if (fp->fs_ronly)
343 return;
344 bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
345 (int)fp->fs_bsize);
346 if (bp->b_flags & B_ERROR) {
347 brelse(bp);
348 return;
349 }
350 if (ip->i_flag&IACC)
351 ip->i_atime = ta->tv_sec;
352 if (ip->i_flag&IUPD)
353 ip->i_mtime = tm->tv_sec;
354 if (ip->i_flag&ICHG)
355 ip->i_ctime = time.tv_sec;
356 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
357 dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
358 dp->di_ic = ip->i_ic;
359 if (waitfor)
360 bwrite(bp);
361 else
362 bdwrite(bp);
363 }
364}
365
366#define SINGLE 0 /* index of single indirect block */
367#define DOUBLE 1 /* index of double indirect block */
368#define TRIPLE 2 /* index of triple indirect block */
369/*
370 * Truncate the inode ip to at most
371 * length size. Free affected disk
372 * blocks -- the blocks of the file
373 * are removed in reverse order.
374 *
375 * NB: triple indirect blocks are untested.
376 */
377itrunc(oip, length)
378 struct inode *oip;
379 u_long length;
380{
381 register i;
382 register daddr_t lastblock;
383 daddr_t bn, lastiblock[NIADDR];
384 register struct fs *fs;
385 register struct inode *ip;
386 struct inode tip;
387 long blocksreleased = 0, nblocks;
388 long indirtrunc();
389 int level;
390
391 if (oip->i_size <= length) {
392 oip->i_flag |= ICHG|IUPD;
393 iupdat(oip, &time, &time, 1);
394 return;
395 }
396 /*
397 * Calculate index into inode's block list of
398 * last direct and indirect blocks (if any)
399 * which we want to keep. Lastblock is -1 when
400 * the file is truncated to 0.
401 */
402 fs = oip->i_fs;
403 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
404 lastiblock[SINGLE] = lastblock - NDADDR;
405 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
406 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
407 nblocks = btodb(fs->fs_bsize);
408 /*
409 * Update size of file and block pointers
410 * on disk before we start freeing blocks.
411 * If we crash before free'ing blocks below,
412 * the blocks will be returned to the free list.
413 * lastiblock values are also normalized to -1
414 * for calls to indirtrunc below.
415 * (? fsck doesn't check validity of pointers in indirect blocks)
416 */
417 tip = *oip;
418 for (level = TRIPLE; level >= SINGLE; level--)
419 if (lastiblock[level] < 0) {
420 oip->i_ib[level] = 0;
421 lastiblock[level] = -1;
422 }
423 for (i = NDADDR - 1; i > lastblock; i--)
424 oip->i_db[i] = 0;
425 oip->i_size = length;
426 oip->i_flag |= ICHG|IUPD;
427 iupdat(oip, &time, &time, 1);
428 ip = &tip;
429
430 /*
431 * Indirect blocks first.
432 */
433 for (level = TRIPLE; level >= SINGLE; level--) {
434 bn = ip->i_ib[level];
435 if (bn != 0) {
436 blocksreleased +=
437 indirtrunc(ip, bn, lastiblock[level], level);
438 if (lastiblock[level] < 0) {
439 ip->i_ib[level] = 0;
440 free(ip, bn, (off_t)fs->fs_bsize);
441 blocksreleased += nblocks;
442 }
443 }
444 if (lastiblock[level] >= 0)
445 goto done;
446 }
447
448 /*
449 * All whole direct blocks or frags.
450 */
451 for (i = NDADDR - 1; i > lastblock; i--) {
452 register int size;
453
454 bn = ip->i_db[i];
455 if (bn == 0)
456 continue;
457 ip->i_db[i] = 0;
458 size = (off_t)blksize(fs, ip, i);
459 free(ip, bn, size);
460 blocksreleased += btodb(size);
461 }
462 if (lastblock < 0)
463 goto done;
464
465 /*
466 * Finally, look for a change in size of the
467 * last direct block; release any frags.
468 */
469 bn = ip->i_db[lastblock];
470 if (bn != 0) {
471 int oldspace, newspace;
472
473 /*
474 * Calculate amount of space we're giving
475 * back as old block size minus new block size.
476 */
477 oldspace = blksize(fs, ip, lastblock);
478 ip->i_size = length;
479 newspace = blksize(fs, ip, lastblock);
480 if (newspace == 0)
481 panic("itrunc: newspace");
482 if (oldspace - newspace > 0) {
483 /*
484 * Block number of space to be free'd is
485 * the old block # plus the number of frags
486 * required for the storage we're keeping.
487 */
488 bn += numfrags(fs, newspace);
489 free(ip, bn, oldspace - newspace);
490 blocksreleased += btodb(oldspace - newspace);
491 }
492 }
493done:
494/* BEGIN PARANOIA */
495 for (level = SINGLE; level <= TRIPLE; level++)
496 if (ip->i_ib[level] != oip->i_ib[level])
497 panic("itrunc1");
498 for (i = 0; i < NDADDR; i++)
499 if (ip->i_db[i] != oip->i_db[i])
500 panic("itrunc2");
501/* END PARANOIA */
502 oip->i_blocks -= blocksreleased;
503 if (oip->i_blocks < 0) /* sanity */
504 oip->i_blocks = 0;
505 oip->i_flag |= ICHG;
506#ifdef QUOTA
507 (void) chkdq(oip, -blocksreleased, 0);
508#endif
509}
510
511/*
512 * Release blocks associated with the inode ip and
513 * stored in the indirect block bn. Blocks are free'd
514 * in LIFO order up to (but not including) lastbn. If
515 * level is greater than SINGLE, the block is an indirect
516 * block and recursive calls to indirtrunc must be used to
517 * cleanse other indirect blocks.
518 *
519 * NB: triple indirect blocks are untested.
520 */
521long
522indirtrunc(ip, bn, lastbn, level)
523 register struct inode *ip;
524 daddr_t bn, lastbn;
525 int level;
526{
527 register int i;
528 struct buf *bp, *copy;
529 register daddr_t *bap;
530 register struct fs *fs = ip->i_fs;
531 daddr_t nb, last;
532 long factor;
533 int blocksreleased = 0, nblocks;
534
535 /*
536 * Calculate index in current block of last
537 * block to be kept. -1 indicates the entire
538 * block so we need not calculate the index.
539 */
540 factor = 1;
541 for (i = SINGLE; i < level; i++)
542 factor *= NINDIR(fs);
543 last = lastbn;
544 if (lastbn > 0)
545 last /= factor;
546 nblocks = btodb(fs->fs_bsize);
547 /*
548 * Get buffer of block pointers, zero those
549 * entries corresponding to blocks to be free'd,
550 * and update on disk copy first.
551 */
552 copy = geteblk((int)fs->fs_bsize);
553 bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
554 if (bp->b_flags&B_ERROR) {
555 brelse(copy);
556 brelse(bp);
557 return (0);
558 }
559 bap = bp->b_un.b_daddr;
560 bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
561 bzero((caddr_t)&bap[last + 1],
562 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
563 bwrite(bp);
564 bp = copy, bap = bp->b_un.b_daddr;
565
566 /*
567 * Recursively free totally unused blocks.
568 */
569 for (i = NINDIR(fs) - 1; i > last; i--) {
570 nb = bap[i];
571 if (nb == 0)
572 continue;
573 if (level > SINGLE)
574 blocksreleased +=
575 indirtrunc(ip, nb, (daddr_t)-1, level - 1);
576 free(ip, nb, (int)fs->fs_bsize);
577 blocksreleased += nblocks;
578 }
579
580 /*
581 * Recursively free last partial block.
582 */
583 if (level > SINGLE && lastbn >= 0) {
584 last = lastbn % factor;
585 nb = bap[i];
586 if (nb != 0)
587 blocksreleased += indirtrunc(ip, nb, last, level - 1);
588 }
589 brelse(bp);
590 return (blocksreleased);
591}
592
593/*
594 * remove any inodes in the inode cache belonging to dev
595 *
596 * There should not be any active ones, return error if any are found
597 * (nb: this is a user error, not a system err)
598 *
599 * Also, count the references to dev by block devices - this really
600 * has nothing to do with the object of the procedure, but as we have
601 * to scan the inode table here anyway, we might as well get the
602 * extra benefit.
603 *
604 * this is called from sumount()/sys3.c when dev is being unmounted
605 */
606#ifdef QUOTA
607iflush(dev, iq)
608 dev_t dev;
609 struct inode *iq;
610#else
611iflush(dev)
612 dev_t dev;
613#endif
614{
615 register struct inode *ip;
616 register open = 0;
617
618 for (ip = inode; ip < inodeNINODE; ip++) {
619#ifdef QUOTA
620 if (ip != iq && ip->i_dev == dev)
621#else
622 if (ip->i_dev == dev)
623#endif
624 if (ip->i_count)
625 return(-1);
626 else {
627 remque(ip);
628 ip->i_forw = ip;
629 ip->i_back = ip;
630 /*
631 * as i_count == 0, the inode was on the free
632 * list already, just leave it there, it will
633 * fall off the bottom eventually. We could
634 * perhaps move it to the head of the free
635 * list, but as umounts are done so
636 * infrequently, we would gain very little,
637 * while making the code bigger.
638 */
639#ifdef QUOTA
640 dqrele(ip->i_dquot);
641 ip->i_dquot = NODQUOT;
642#endif
643 }
644 else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
645 ip->i_rdev == dev)
646 open++;
647 }
648 return (open);
649}
650
651/*
652 * Lock an inode. If its already locked, set the WANT bit and sleep.
653 */
654ilock(ip)
655 register struct inode *ip;
656{
657
658 ILOCK(ip);
659}
660
661/*
662 * Unlock an inode. If WANT bit is on, wakeup.
663 */
664iunlock(ip)
665 register struct inode *ip;
666{
667
668 IUNLOCK(ip);
669}