use mandoc macros for arp.4, update the Makefil to install it
[unix-history] / usr / src / sys / kern / vfs_subr.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * %sccs.include.redist.c%
11 *
12 * @(#)vfs_subr.c 8.12 (Berkeley) %G%
13 */
14
15/*
16 * External virtual filesystem routines
17 */
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/proc.h>
22#include <sys/mount.h>
23#include <sys/time.h>
24#include <sys/vnode.h>
25#include <sys/stat.h>
26#include <sys/namei.h>
27#include <sys/ucred.h>
28#include <sys/buf.h>
29#include <sys/errno.h>
30#include <sys/malloc.h>
31#include <sys/domain.h>
32#include <sys/mbuf.h>
33
34#include <vm/vm.h>
35#include <sys/sysctl.h>
36
37#include <miscfs/specfs/specdev.h>
38
39enum vtype iftovt_tab[16] = {
40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
42};
43int vttoif_tab[9] = {
44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
45 S_IFSOCK, S_IFIFO, S_IFMT,
46};
47
48/*
49 * Insq/Remq for the vnode usage lists.
50 */
51#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
52#define bufremvn(bp) { \
53 LIST_REMOVE(bp, b_vnbufs); \
54 (bp)->b_vnbufs.le_next = NOLIST; \
55}
56
57TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
58struct mntlist mountlist; /* mounted filesystem list */
59
60/*
61 * Initialize the vnode management data structures.
62 */
63vntblinit()
64{
65
66 TAILQ_INIT(&vnode_free_list);
67 TAILQ_INIT(&mountlist);
68}
69
70/*
71 * Lock a filesystem.
72 * Used to prevent access to it while mounting and unmounting.
73 */
74vfs_lock(mp)
75 register struct mount *mp;
76{
77
78 while(mp->mnt_flag & MNT_MLOCK) {
79 mp->mnt_flag |= MNT_MWAIT;
80 sleep((caddr_t)mp, PVFS);
81 }
82 mp->mnt_flag |= MNT_MLOCK;
83 return (0);
84}
85
86/*
87 * Unlock a locked filesystem.
88 * Panic if filesystem is not locked.
89 */
90void
91vfs_unlock(mp)
92 register struct mount *mp;
93{
94
95 if ((mp->mnt_flag & MNT_MLOCK) == 0)
96 panic("vfs_unlock: not locked");
97 mp->mnt_flag &= ~MNT_MLOCK;
98 if (mp->mnt_flag & MNT_MWAIT) {
99 mp->mnt_flag &= ~MNT_MWAIT;
100 wakeup((caddr_t)mp);
101 }
102}
103
104/*
105 * Mark a mount point as busy.
106 * Used to synchronize access and to delay unmounting.
107 */
108vfs_busy(mp)
109 register struct mount *mp;
110{
111
112 while(mp->mnt_flag & MNT_MPBUSY) {
113 mp->mnt_flag |= MNT_MPWANT;
114 sleep((caddr_t)&mp->mnt_flag, PVFS);
115 }
116 if (mp->mnt_flag & MNT_UNMOUNT)
117 return (1);
118 mp->mnt_flag |= MNT_MPBUSY;
119 return (0);
120}
121
122/*
123 * Free a busy filesystem.
124 * Panic if filesystem is not busy.
125 */
126vfs_unbusy(mp)
127 register struct mount *mp;
128{
129
130 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
131 panic("vfs_unbusy: not busy");
132 mp->mnt_flag &= ~MNT_MPBUSY;
133 if (mp->mnt_flag & MNT_MPWANT) {
134 mp->mnt_flag &= ~MNT_MPWANT;
135 wakeup((caddr_t)&mp->mnt_flag);
136 }
137}
138
139/*
140 * Lookup a mount point by filesystem identifier.
141 */
142struct mount *
143getvfs(fsid)
144 fsid_t *fsid;
145{
146 register struct mount *mp;
147
148 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
149 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
150 mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
151 return (mp);
152 }
153 return ((struct mount *)0);
154}
155
156/*
157 * Get a new unique fsid
158 */
159void
160getnewfsid(mp, mtype)
161 struct mount *mp;
162 int mtype;
163{
164static u_short xxxfs_mntid;
165
166 fsid_t tfsid;
167
168 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
169 mp->mnt_stat.f_fsid.val[1] = mtype;
170 if (xxxfs_mntid == 0)
171 ++xxxfs_mntid;
172 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
173 tfsid.val[1] = mtype;
174 if (mountlist.tqh_first != NULL) {
175 while (getvfs(&tfsid)) {
176 tfsid.val[0]++;
177 xxxfs_mntid++;
178 }
179 }
180 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
181}
182
183/*
184 * Set vnode attributes to VNOVAL
185 */
186void vattr_null(vap)
187 register struct vattr *vap;
188{
189
190 vap->va_type = VNON;
191 vap->va_size = vap->va_bytes = VNOVAL;
192 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
193 vap->va_fsid = vap->va_fileid =
194 vap->va_blocksize = vap->va_rdev =
195 vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
196 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
197 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
198 vap->va_flags = vap->va_gen = VNOVAL;
199 vap->va_vaflags = 0;
200}
201
202/*
203 * Routines having to do with the management of the vnode table.
204 */
205extern int (**dead_vnodeop_p)();
206extern void vclean();
207long numvnodes;
208extern struct vattr va_null;
209int newnodes = 0;
210int printcnt = 0;
211
212/*
213 * Return the next vnode from the free list.
214 */
215getnewvnode(tag, mp, vops, vpp)
216 enum vtagtype tag;
217 struct mount *mp;
218 int (**vops)();
219 struct vnode **vpp;
220{
221 register struct vnode *vp;
222 int s;
223
224newnodes++;
225 if ((vnode_free_list.tqh_first == NULL &&
226 numvnodes < 2 * desiredvnodes) ||
227 numvnodes < desiredvnodes) {
228 vp = (struct vnode *)malloc((u_long)sizeof *vp,
229 M_VNODE, M_WAITOK);
230 bzero((char *)vp, sizeof *vp);
231 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
232 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
233 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
234 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
235 numvnodes++;
236 vp->v_spare[0] = numvnodes;
237 } else {
238 if ((vp = vnode_free_list.tqh_first) == NULL) {
239 tablefull("vnode");
240 *vpp = 0;
241 return (ENFILE);
242 }
243 if (vp->v_usecount)
244 panic("free vnode isn't");
245 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
246 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
247 panic("getnewvnode: not on queue");
248 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
249 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
250 /* see comment on why 0xdeadb is set at end of vgone (below) */
251 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
252 vp->v_lease = NULL;
253 if (vp->v_type != VBAD)
254 vgone(vp);
255#ifdef DIAGNOSTIC
256 if (vp->v_data)
257 panic("cleaned vnode isn't");
258 s = splbio();
259 if (vp->v_numoutput)
260 panic("Clean vnode has pending I/O's");
261 splx(s);
262#endif
263 vp->v_flag = 0;
264 vp->v_lastr = 0;
265 vp->v_ralen = 0;
266 vp->v_maxra = 0;
267 vp->v_lastw = 0;
268 vp->v_lasta = 0;
269 vp->v_cstart = 0;
270 vp->v_clen = 0;
271 vp->v_socket = 0;
272 }
273 vp->v_type = VNON;
274 cache_purge(vp);
275 vp->v_tag = tag;
276 vp->v_op = vops;
277 insmntque(vp, mp);
278 *vpp = vp;
279 vp->v_usecount = 1;
280 vp->v_data = 0;
281 if (printcnt-- > 0) vprint("getnewvnode got", vp);
282 return (0);
283}
284
285/*
286 * Move a vnode from one mount queue to another.
287 */
288insmntque(vp, mp)
289 register struct vnode *vp;
290 register struct mount *mp;
291{
292
293 /*
294 * Delete from old mount point vnode list, if on one.
295 */
296 if (vp->v_mount != NULL) {
297 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
298 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
299 panic("insmntque: not on queue");
300 LIST_REMOVE(vp, v_mntvnodes);
301 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
302 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
303 }
304 /*
305 * Insert into list of vnodes for the new mount point, if available.
306 */
307 if ((vp->v_mount = mp) == NULL)
308 return;
309 if (vp->v_mntvnodes.le_next != (struct vnode *)0xdeadf ||
310 vp->v_mntvnodes.le_prev != (struct vnode **)0xdeadb)
311 panic("insmntque: already on queue");
312 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
313}
314
315/*
316 * Update outstanding I/O count and do wakeup if requested.
317 */
318vwakeup(bp)
319 register struct buf *bp;
320{
321 register struct vnode *vp;
322
323 bp->b_flags &= ~B_WRITEINPROG;
324 if (vp = bp->b_vp) {
325 vp->v_numoutput--;
326 if (vp->v_numoutput < 0)
327 panic("vwakeup: neg numoutput");
328 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
329 if (vp->v_numoutput < 0)
330 panic("vwakeup: neg numoutput");
331 vp->v_flag &= ~VBWAIT;
332 wakeup((caddr_t)&vp->v_numoutput);
333 }
334 }
335}
336
337/*
338 * Flush out and invalidate all buffers associated with a vnode.
339 * Called with the underlying object locked.
340 */
341int
342vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
343 register struct vnode *vp;
344 int flags;
345 struct ucred *cred;
346 struct proc *p;
347 int slpflag, slptimeo;
348{
349 register struct buf *bp;
350 struct buf *nbp, *blist;
351 int s, error;
352
353 if (flags & V_SAVE) {
354 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
355 return (error);
356 if (vp->v_dirtyblkhd.lh_first != NULL)
357 panic("vinvalbuf: dirty bufs");
358 }
359 for (;;) {
360 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
361 while (blist && blist->b_lblkno < 0)
362 blist = blist->b_vnbufs.le_next;
363 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
364 (flags & V_SAVEMETA))
365 while (blist && blist->b_lblkno < 0)
366 blist = blist->b_vnbufs.le_next;
367 if (!blist)
368 break;
369
370 for (bp = blist; bp; bp = nbp) {
371 nbp = bp->b_vnbufs.le_next;
372 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
373 continue;
374 s = splbio();
375 if (bp->b_flags & B_BUSY) {
376 bp->b_flags |= B_WANTED;
377 error = tsleep((caddr_t)bp,
378 slpflag | (PRIBIO + 1), "vinvalbuf",
379 slptimeo);
380 splx(s);
381 if (error)
382 return (error);
383 break;
384 }
385 bremfree(bp);
386 bp->b_flags |= B_BUSY;
387 splx(s);
388 /*
389 * XXX Since there are no node locks for NFS, I believe
390 * there is a slight chance that a delayed write will
391 * occur while sleeping just above, so check for it.
392 */
393 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
394 (void) VOP_BWRITE(bp);
395 break;
396 }
397 bp->b_flags |= B_INVAL;
398 brelse(bp);
399 }
400 }
401 if (!(flags & V_SAVEMETA) &&
402 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
403 panic("vinvalbuf: flush failed");
404 return (0);
405}
406
407/*
408 * Associate a buffer with a vnode.
409 */
410bgetvp(vp, bp)
411 register struct vnode *vp;
412 register struct buf *bp;
413{
414
415 if (bp->b_vp)
416 panic("bgetvp: not free");
417 VHOLD(vp);
418 bp->b_vp = vp;
419 if (vp->v_type == VBLK || vp->v_type == VCHR)
420 bp->b_dev = vp->v_rdev;
421 else
422 bp->b_dev = NODEV;
423 /*
424 * Insert onto list for new vnode.
425 */
426 bufinsvn(bp, &vp->v_cleanblkhd);
427}
428
429/*
430 * Disassociate a buffer from a vnode.
431 */
432brelvp(bp)
433 register struct buf *bp;
434{
435 struct vnode *vp;
436
437 if (bp->b_vp == (struct vnode *) 0)
438 panic("brelvp: NULL");
439 /*
440 * Delete from old vnode list, if on one.
441 */
442 if (bp->b_vnbufs.le_next != NOLIST)
443 bufremvn(bp);
444 vp = bp->b_vp;
445 bp->b_vp = (struct vnode *) 0;
446 HOLDRELE(vp);
447}
448
449/*
450 * Reassign a buffer from one vnode to another.
451 * Used to assign file specific control information
452 * (indirect blocks) to the vnode to which they belong.
453 */
454reassignbuf(bp, newvp)
455 register struct buf *bp;
456 register struct vnode *newvp;
457{
458 register struct buflists *listheadp;
459
460 if (newvp == NULL) {
461 printf("reassignbuf: NULL");
462 return;
463 }
464 /*
465 * Delete from old vnode list, if on one.
466 */
467 if (bp->b_vnbufs.le_next != NOLIST)
468 bufremvn(bp);
469 /*
470 * If dirty, put on list of dirty buffers;
471 * otherwise insert onto list of clean buffers.
472 */
473 if (bp->b_flags & B_DELWRI)
474 listheadp = &newvp->v_dirtyblkhd;
475 else
476 listheadp = &newvp->v_cleanblkhd;
477 bufinsvn(bp, listheadp);
478}
479
480/*
481 * Create a vnode for a block device.
482 * Used for root filesystem, argdev, and swap areas.
483 * Also used for memory file system special devices.
484 */
485bdevvp(dev, vpp)
486 dev_t dev;
487 struct vnode **vpp;
488{
489 register struct vnode *vp;
490 struct vnode *nvp;
491 int error;
492
493 if (dev == NODEV)
494 return (0);
495 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
496 if (error) {
497 *vpp = 0;
498 return (error);
499 }
500 vp = nvp;
501 vp->v_type = VBLK;
502 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
503 vput(vp);
504 vp = nvp;
505 }
506 *vpp = vp;
507 return (0);
508}
509
510/*
511 * Check to see if the new vnode represents a special device
512 * for which we already have a vnode (either because of
513 * bdevvp() or because of a different vnode representing
514 * the same block device). If such an alias exists, deallocate
515 * the existing contents and return the aliased vnode. The
516 * caller is responsible for filling it with its new contents.
517 */
518struct vnode *
519checkalias(nvp, nvp_rdev, mp)
520 register struct vnode *nvp;
521 dev_t nvp_rdev;
522 struct mount *mp;
523{
524 register struct vnode *vp;
525 struct vnode **vpp;
526
527 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
528 return (NULLVP);
529
530 vpp = &speclisth[SPECHASH(nvp_rdev)];
531loop:
532 for (vp = *vpp; vp; vp = vp->v_specnext) {
533 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
534 continue;
535 /*
536 * Alias, but not in use, so flush it out.
537 */
538 if (vp->v_usecount == 0) {
539 vgone(vp);
540 goto loop;
541 }
542 if (vget(vp, 1))
543 goto loop;
544 break;
545 }
546 if (vp == NULL || vp->v_tag != VT_NON) {
547 MALLOC(nvp->v_specinfo, struct specinfo *,
548 sizeof(struct specinfo), M_VNODE, M_WAITOK);
549 nvp->v_rdev = nvp_rdev;
550 nvp->v_hashchain = vpp;
551 nvp->v_specnext = *vpp;
552 nvp->v_specflags = 0;
553 *vpp = nvp;
554 if (vp != NULL) {
555 nvp->v_flag |= VALIASED;
556 vp->v_flag |= VALIASED;
557 vput(vp);
558 }
559 return (NULLVP);
560 }
561 VOP_UNLOCK(vp);
562 vclean(vp, 0);
563 vp->v_op = nvp->v_op;
564 vp->v_tag = nvp->v_tag;
565 nvp->v_type = VNON;
566 insmntque(vp, mp);
567 return (vp);
568}
569
570/*
571 * Grab a particular vnode from the free list, increment its
572 * reference count and lock it. The vnode lock bit is set the
573 * vnode is being eliminated in vgone. The process is awakened
574 * when the transition is completed, and an error returned to
575 * indicate that the vnode is no longer usable (possibly having
576 * been changed to a new file system type).
577 */
578vget(vp, lockflag)
579 register struct vnode *vp;
580 int lockflag;
581{
582
583 if (vp->v_flag & VXLOCK) {
584 vp->v_flag |= VXWANT;
585 sleep((caddr_t)vp, PINOD);
586 return (1);
587 }
588 if (vp->v_usecount == 0) {
589 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
590 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
591 panic("vget: not on queue");
592 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
593 }
594 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
595 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
596 }
597 vp->v_usecount++;
598 if (lockflag)
599 VOP_LOCK(vp);
600 if (printcnt-- > 0) vprint("vget got", vp);
601 return (0);
602}
603
604int bug_refs = 0;
605
606/*
607 * Vnode reference, just increment the count
608 */
609void vref(vp)
610 struct vnode *vp;
611{
612
613 if (vp->v_usecount <= 0)
614 panic("vref used where vget required");
615 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
616 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
617 panic("vref: not free");
618 vp->v_usecount++;
619 if (printcnt-- > 0) vprint("vref get", vp);
620 if (vp->v_type != VBLK && curproc)
621 curproc->p_spare[0]++;
622 if (bug_refs)
623 vprint("vref: ");
624}
625
626/*
627 * vput(), just unlock and vrele()
628 */
629void vput(vp)
630 register struct vnode *vp;
631{
632
633 VOP_UNLOCK(vp);
634 vrele(vp);
635}
636
637/*
638 * Vnode release.
639 * If count drops to zero, call inactive routine and return to freelist.
640 */
641void vrele(vp)
642 register struct vnode *vp;
643{
644
645#ifdef DIAGNOSTIC
646 if (vp == NULL)
647 panic("vrele: null vp");
648#endif
649 vp->v_usecount--;
650 if (printcnt-- > 0) vprint("vrele put", vp);
651 if (vp->v_type != VBLK && curproc)
652 curproc->p_spare[0]--;
653 if (bug_refs)
654 vprint("vref: ");
655 if (vp->v_usecount > 0)
656 return;
657#ifdef DIAGNOSTIC
658 if (vp->v_usecount != 0 || vp->v_writecount != 0) {
659 vprint("vrele: bad ref count", vp);
660 panic("vrele: ref cnt");
661 }
662#endif
663 /*
664 * insert at tail of LRU list
665 */
666 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
667 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
668 panic("vrele: not free");
669 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
670 VOP_INACTIVE(vp);
671}
672
673/*
674 * Page or buffer structure gets a reference.
675 */
676void vhold(vp)
677 register struct vnode *vp;
678{
679
680 vp->v_holdcnt++;
681}
682
683/*
684 * Page or buffer structure frees a reference.
685 */
686void holdrele(vp)
687 register struct vnode *vp;
688{
689
690 if (vp->v_holdcnt <= 0)
691 panic("holdrele: holdcnt");
692 vp->v_holdcnt--;
693}
694
695/*
696 * Remove any vnodes in the vnode table belonging to mount point mp.
697 *
698 * If MNT_NOFORCE is specified, there should not be any active ones,
699 * return error if any are found (nb: this is a user error, not a
700 * system error). If MNT_FORCE is specified, detach any active vnodes
701 * that are found.
702 */
703#ifdef DIAGNOSTIC
704int busyprt = 0; /* print out busy vnodes */
705struct ctldebug debug1 = { "busyprt", &busyprt };
706#endif
707
708vflush(mp, skipvp, flags)
709 struct mount *mp;
710 struct vnode *skipvp;
711 int flags;
712{
713 register struct vnode *vp, *nvp;
714 int busy = 0;
715
716 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
717 panic("vflush: not busy");
718loop:
719 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
720 if (vp->v_mount != mp)
721 goto loop;
722 nvp = vp->v_mntvnodes.le_next;
723 /*
724 * Skip over a selected vnode.
725 */
726 if (vp == skipvp)
727 continue;
728 /*
729 * Skip over a vnodes marked VSYSTEM.
730 */
731 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
732 continue;
733 /*
734 * If WRITECLOSE is set, only flush out regular file
735 * vnodes open for writing.
736 */
737 if ((flags & WRITECLOSE) &&
738 (vp->v_writecount == 0 || vp->v_type != VREG))
739 continue;
740 /*
741 * With v_usecount == 0, all we need to do is clear
742 * out the vnode data structures and we are done.
743 */
744 if (vp->v_usecount == 0) {
745 vgone(vp);
746 continue;
747 }
748 /*
749 * If FORCECLOSE is set, forcibly close the vnode.
750 * For block or character devices, revert to an
751 * anonymous device. For all other files, just kill them.
752 */
753 if (flags & FORCECLOSE) {
754 if (vp->v_type != VBLK && vp->v_type != VCHR) {
755 vgone(vp);
756 } else {
757 vclean(vp, 0);
758 vp->v_op = spec_vnodeop_p;
759 insmntque(vp, (struct mount *)0);
760 }
761 continue;
762 }
763#ifdef DIAGNOSTIC
764 if (busyprt)
765 vprint("vflush: busy vnode", vp);
766#endif
767 busy++;
768 }
769 if (busy)
770 return (EBUSY);
771 return (0);
772}
773
774/*
775 * Disassociate the underlying file system from a vnode.
776 */
777void
778vclean(vp, flags)
779 register struct vnode *vp;
780 int flags;
781{
782 int active;
783
784 /*
785 * Check to see if the vnode is in use.
786 * If so we have to reference it before we clean it out
787 * so that its count cannot fall to zero and generate a
788 * race against ourselves to recycle it.
789 */
790 if (active = vp->v_usecount)
791 VREF(vp);
792 /*
793 * Even if the count is zero, the VOP_INACTIVE routine may still
794 * have the object locked while it cleans it out. The VOP_LOCK
795 * ensures that the VOP_INACTIVE routine is done with its work.
796 * For active vnodes, it ensures that no other activity can
797 * occur while the underlying object is being cleaned out.
798 */
799 VOP_LOCK(vp);
800 /*
801 * Prevent the vnode from being recycled or
802 * brought into use while we clean it out.
803 */
804 if (vp->v_flag & VXLOCK)
805 panic("vclean: deadlock");
806 vp->v_flag |= VXLOCK;
807 /*
808 * Clean out any buffers associated with the vnode.
809 */
810 if (flags & DOCLOSE)
811 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
812 /*
813 * Any other processes trying to obtain this lock must first
814 * wait for VXLOCK to clear, then call the new lock operation.
815 */
816 VOP_UNLOCK(vp);
817 /*
818 * If purging an active vnode, it must be closed and
819 * deactivated before being reclaimed.
820 */
821 if (active) {
822 if (flags & DOCLOSE)
823 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
824 VOP_INACTIVE(vp);
825 }
826 /*
827 * Reclaim the vnode.
828 */
829 if (VOP_RECLAIM(vp))
830 panic("vclean: cannot reclaim");
831 if (active)
832 vrele(vp);
833
834 /*
835 * Done with purge, notify sleepers of the grim news.
836 */
837 vp->v_op = dead_vnodeop_p;
838 vp->v_tag = VT_NON;
839 vp->v_flag &= ~VXLOCK;
840 if (vp->v_flag & VXWANT) {
841 vp->v_flag &= ~VXWANT;
842 wakeup((caddr_t)vp);
843 }
844}
845
846/*
847 * Eliminate all activity associated with the requested vnode
848 * and with all vnodes aliased to the requested vnode.
849 */
850void vgoneall(vp)
851 register struct vnode *vp;
852{
853 register struct vnode *vq;
854
855 if (vp->v_flag & VALIASED) {
856 /*
857 * If a vgone (or vclean) is already in progress,
858 * wait until it is done and return.
859 */
860 if (vp->v_flag & VXLOCK) {
861 vp->v_flag |= VXWANT;
862 sleep((caddr_t)vp, PINOD);
863 return;
864 }
865 /*
866 * Ensure that vp will not be vgone'd while we
867 * are eliminating its aliases.
868 */
869 vp->v_flag |= VXLOCK;
870 while (vp->v_flag & VALIASED) {
871 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
872 if (vq->v_rdev != vp->v_rdev ||
873 vq->v_type != vp->v_type || vp == vq)
874 continue;
875 vgone(vq);
876 break;
877 }
878 }
879 /*
880 * Remove the lock so that vgone below will
881 * really eliminate the vnode after which time
882 * vgone will awaken any sleepers.
883 */
884 vp->v_flag &= ~VXLOCK;
885 }
886 vgone(vp);
887}
888
889/*
890 * Eliminate all activity associated with a vnode
891 * in preparation for reuse.
892 */
893void vgone(vp)
894 register struct vnode *vp;
895{
896 register struct vnode *vq;
897 struct vnode *vx;
898
899 /*
900 * If a vgone (or vclean) is already in progress,
901 * wait until it is done and return.
902 */
903 if (vp->v_flag & VXLOCK) {
904 vp->v_flag |= VXWANT;
905 sleep((caddr_t)vp, PINOD);
906 return;
907 }
908 /*
909 * Clean out the filesystem specific data.
910 */
911 vclean(vp, DOCLOSE);
912 /*
913 * Delete from old mount point vnode list, if on one.
914 */
915 if (vp->v_mount != NULL) {
916 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
917 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
918 panic("vgone: not on queue");
919 LIST_REMOVE(vp, v_mntvnodes);
920 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
921 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
922 vp->v_mount = NULL;
923 }
924 /*
925 * If special device, remove it from special device alias list.
926 */
927 if (vp->v_type == VBLK || vp->v_type == VCHR) {
928 if (*vp->v_hashchain == vp) {
929 *vp->v_hashchain = vp->v_specnext;
930 } else {
931 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
932 if (vq->v_specnext != vp)
933 continue;
934 vq->v_specnext = vp->v_specnext;
935 break;
936 }
937 if (vq == NULL)
938 panic("missing bdev");
939 }
940 if (vp->v_flag & VALIASED) {
941 vx = NULL;
942 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
943 if (vq->v_rdev != vp->v_rdev ||
944 vq->v_type != vp->v_type)
945 continue;
946 if (vx)
947 break;
948 vx = vq;
949 }
950 if (vx == NULL)
951 panic("missing alias");
952 if (vq == NULL)
953 vx->v_flag &= ~VALIASED;
954 vp->v_flag &= ~VALIASED;
955 }
956 FREE(vp->v_specinfo, M_VNODE);
957 vp->v_specinfo = NULL;
958 }
959 /*
960 * If it is on the freelist and not already at the head,
961 * move it to the head of the list. The test of the back
962 * pointer and the reference count of zero is because
963 * it will be removed from the free list by getnewvnode,
964 * but will not have its reference count incremented until
965 * after calling vgone. If the reference count were
966 * incremented first, vgone would (incorrectly) try to
967 * close the previous instance of the underlying object.
968 * So, the back pointer is explicitly set to `0xdeadb' in
969 * getnewvnode after removing it from the freelist to ensure
970 * that we do not try to move it here.
971 */
972 if (vp->v_usecount == 0 &&
973 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
974 vnode_free_list.tqh_first != vp) {
975 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf)
976 panic("vgone: use 0, not free");
977 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
978 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
979 }
980 vp->v_type = VBAD;
981}
982
983/*
984 * Lookup a vnode by device number.
985 */
986vfinddev(dev, type, vpp)
987 dev_t dev;
988 enum vtype type;
989 struct vnode **vpp;
990{
991 register struct vnode *vp;
992
993 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
994 if (dev != vp->v_rdev || type != vp->v_type)
995 continue;
996 *vpp = vp;
997 return (1);
998 }
999 return (0);
1000}
1001
1002/*
1003 * Calculate the total number of references to a special device.
1004 */
1005vcount(vp)
1006 register struct vnode *vp;
1007{
1008 register struct vnode *vq, *vnext;
1009 int count;
1010
1011loop:
1012 if ((vp->v_flag & VALIASED) == 0)
1013 return (vp->v_usecount);
1014 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1015 vnext = vq->v_specnext;
1016 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1017 continue;
1018 /*
1019 * Alias, but not in use, so flush it out.
1020 */
1021 if (vq->v_usecount == 0 && vq != vp) {
1022 vgone(vq);
1023 goto loop;
1024 }
1025 count += vq->v_usecount;
1026 }
1027 return (count);
1028}
1029
1030/*
1031 * Print out a description of a vnode.
1032 */
1033static char *typename[] =
1034 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1035
1036vprint(label, vp)
1037 char *label;
1038 register struct vnode *vp;
1039{
1040 char buf[64];
1041
1042 if (label != NULL)
1043 printf("%s: ", label);
1044 printf("num %d ", vp->v_spare[0]);
1045 printf("type %s, usecount %d, writecount %d, refcount %d,",
1046 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1047 vp->v_holdcnt);
1048 buf[0] = '\0';
1049 if (vp->v_flag & VROOT)
1050 strcat(buf, "|VROOT");
1051 if (vp->v_flag & VTEXT)
1052 strcat(buf, "|VTEXT");
1053 if (vp->v_flag & VSYSTEM)
1054 strcat(buf, "|VSYSTEM");
1055 if (vp->v_flag & VXLOCK)
1056 strcat(buf, "|VXLOCK");
1057 if (vp->v_flag & VXWANT)
1058 strcat(buf, "|VXWANT");
1059 if (vp->v_flag & VBWAIT)
1060 strcat(buf, "|VBWAIT");
1061 if (vp->v_flag & VALIASED)
1062 strcat(buf, "|VALIASED");
1063 if (buf[0] != '\0')
1064 printf(" flags (%s)", &buf[1]);
1065 if (vp->v_data == NULL) {
1066 printf("\n");
1067 } else {
1068 printf("\n\t");
1069 VOP_PRINT(vp);
1070 }
1071}
1072
1073#ifdef DEBUG
1074/*
1075 * List all of the locked vnodes in the system.
1076 * Called when debugging the kernel.
1077 */
1078printlockedvnodes()
1079{
1080 register struct mount *mp;
1081 register struct vnode *vp;
1082
1083 printf("Locked vnodes\n");
1084 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
1085 for (vp = mp->mnt_vnodelist.lh_first;
1086 vp != NULL;
1087 vp = vp->v_mntvnodes.le_next)
1088 if (VOP_ISLOCKED(vp))
1089 vprint((char *)0, vp);
1090 }
1091}
1092#endif
1093
1094int kinfo_vdebug = 1;
1095int kinfo_vgetfailed;
1096#define KINFO_VNODESLOP 10
1097/*
1098 * Dump vnode list (via sysctl).
1099 * Copyout address of vnode followed by vnode.
1100 */
1101/* ARGSUSED */
1102sysctl_vnode(where, sizep)
1103 char *where;
1104 size_t *sizep;
1105{
1106 register struct mount *mp, *nmp;
1107 struct vnode *vp;
1108 register char *bp = where, *savebp;
1109 char *ewhere;
1110 int error;
1111
1112#define VPTRSZ sizeof (struct vnode *)
1113#define VNODESZ sizeof (struct vnode)
1114 if (where == NULL) {
1115 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1116 return (0);
1117 }
1118 ewhere = where + *sizep;
1119
1120 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
1121 nmp = mp->mnt_list.tqe_next;
1122 if (vfs_busy(mp))
1123 continue;
1124 savebp = bp;
1125again:
1126 for (vp = mp->mnt_vnodelist.lh_first;
1127 vp != NULL;
1128 vp = vp->v_mntvnodes.le_next) {
1129 /*
1130 * Check that the vp is still associated with
1131 * this filesystem. RACE: could have been
1132 * recycled onto the same filesystem.
1133 */
1134 if (vp->v_mount != mp) {
1135 if (kinfo_vdebug)
1136 printf("kinfo: vp changed\n");
1137 bp = savebp;
1138 goto again;
1139 }
1140 if (bp + VPTRSZ + VNODESZ > ewhere) {
1141 *sizep = bp - where;
1142 return (ENOMEM);
1143 }
1144 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1145 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1146 return (error);
1147 bp += VPTRSZ + VNODESZ;
1148 }
1149 vfs_unbusy(mp);
1150 }
1151
1152 *sizep = bp - where;
1153 return (0);
1154}
1155
1156/*
1157 * Check to see if a filesystem is mounted on a block device.
1158 */
1159int
1160vfs_mountedon(vp)
1161 register struct vnode *vp;
1162{
1163 register struct vnode *vq;
1164
1165 if (vp->v_specflags & SI_MOUNTEDON)
1166 return (EBUSY);
1167 if (vp->v_flag & VALIASED) {
1168 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1169 if (vq->v_rdev != vp->v_rdev ||
1170 vq->v_type != vp->v_type)
1171 continue;
1172 if (vq->v_specflags & SI_MOUNTEDON)
1173 return (EBUSY);
1174 }
1175 }
1176 return (0);
1177}
1178
1179/*
1180 * Build hash lists of net addresses and hang them off the mount point.
1181 * Called by ufs_mount() to set up the lists of export addresses.
1182 */
1183static int
1184vfs_hang_addrlist(mp, nep, argp)
1185 struct mount *mp;
1186 struct netexport *nep;
1187 struct export_args *argp;
1188{
1189 register struct netcred *np;
1190 register struct radix_node_head *rnh;
1191 register int i;
1192 struct radix_node *rn;
1193 struct sockaddr *saddr, *smask = 0;
1194 struct domain *dom;
1195 int error;
1196
1197 if (argp->ex_addrlen == 0) {
1198 if (mp->mnt_flag & MNT_DEFEXPORTED)
1199 return (EPERM);
1200 np = &nep->ne_defexported;
1201 np->netc_exflags = argp->ex_flags;
1202 np->netc_anon = argp->ex_anon;
1203 np->netc_anon.cr_ref = 1;
1204 mp->mnt_flag |= MNT_DEFEXPORTED;
1205 return (0);
1206 }
1207 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1208 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1209 bzero((caddr_t)np, i);
1210 saddr = (struct sockaddr *)(np + 1);
1211 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1212 goto out;
1213 if (saddr->sa_len > argp->ex_addrlen)
1214 saddr->sa_len = argp->ex_addrlen;
1215 if (argp->ex_masklen) {
1216 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1217 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1218 if (error)
1219 goto out;
1220 if (smask->sa_len > argp->ex_masklen)
1221 smask->sa_len = argp->ex_masklen;
1222 }
1223 i = saddr->sa_family;
1224 if ((rnh = nep->ne_rtable[i]) == 0) {
1225 /*
1226 * Seems silly to initialize every AF when most are not
1227 * used, do so on demand here
1228 */
1229 for (dom = domains; dom; dom = dom->dom_next)
1230 if (dom->dom_family == i && dom->dom_rtattach) {
1231 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1232 dom->dom_rtoffset);
1233 break;
1234 }
1235 if ((rnh = nep->ne_rtable[i]) == 0) {
1236 error = ENOBUFS;
1237 goto out;
1238 }
1239 }
1240 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1241 np->netc_rnodes);
1242 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1243 error = EPERM;
1244 goto out;
1245 }
1246 np->netc_exflags = argp->ex_flags;
1247 np->netc_anon = argp->ex_anon;
1248 np->netc_anon.cr_ref = 1;
1249 return (0);
1250out:
1251 free(np, M_NETADDR);
1252 return (error);
1253}
1254
1255/* ARGSUSED */
1256static int
1257vfs_free_netcred(rn, w)
1258 struct radix_node *rn;
1259 caddr_t w;
1260{
1261 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1262
1263 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1264 free((caddr_t)rn, M_NETADDR);
1265 return (0);
1266}
1267
1268/*
1269 * Free the net address hash lists that are hanging off the mount points.
1270 */
1271static void
1272vfs_free_addrlist(nep)
1273 struct netexport *nep;
1274{
1275 register int i;
1276 register struct radix_node_head *rnh;
1277
1278 for (i = 0; i <= AF_MAX; i++)
1279 if (rnh = nep->ne_rtable[i]) {
1280 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1281 (caddr_t)rnh);
1282 free((caddr_t)rnh, M_RTABLE);
1283 nep->ne_rtable[i] = 0;
1284 }
1285}
1286
1287int
1288vfs_export(mp, nep, argp)
1289 struct mount *mp;
1290 struct netexport *nep;
1291 struct export_args *argp;
1292{
1293 int error;
1294
1295 if (argp->ex_flags & MNT_DELEXPORT) {
1296 vfs_free_addrlist(nep);
1297 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1298 }
1299 if (argp->ex_flags & MNT_EXPORTED) {
1300 if (error = vfs_hang_addrlist(mp, nep, argp))
1301 return (error);
1302 mp->mnt_flag |= MNT_EXPORTED;
1303 }
1304 return (0);
1305}
1306
1307struct netcred *
1308vfs_export_lookup(mp, nep, nam)
1309 register struct mount *mp;
1310 struct netexport *nep;
1311 struct mbuf *nam;
1312{
1313 register struct netcred *np;
1314 register struct radix_node_head *rnh;
1315 struct sockaddr *saddr;
1316
1317 np = NULL;
1318 if (mp->mnt_flag & MNT_EXPORTED) {
1319 /*
1320 * Lookup in the export list first.
1321 */
1322 if (nam != NULL) {
1323 saddr = mtod(nam, struct sockaddr *);
1324 rnh = nep->ne_rtable[saddr->sa_family];
1325 if (rnh != NULL) {
1326 np = (struct netcred *)
1327 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1328 rnh);
1329 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1330 np = NULL;
1331 }
1332 }
1333 /*
1334 * If no address match, use the default if it exists.
1335 */
1336 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1337 np = &nep->ne_defexported;
1338 }
1339 return (np);
1340}