The test for rootfs (now mountlist) is to avoid panic'ing in sync().
[unix-history] / usr / src / sys / kern / vfs_subr.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
7 * @(#)vfs_subr.c 8.7 (Berkeley) %G%
8 */
9
10/*
11 * External virtual filesystem routines
12 */
13
14#include <sys/param.h>
15#include <sys/systm.h>
16#include <sys/proc.h>
17#include <sys/mount.h>
18#include <sys/time.h>
19#include <sys/vnode.h>
20#include <sys/stat.h>
21#include <sys/namei.h>
22#include <sys/ucred.h>
23#include <sys/buf.h>
24#include <sys/errno.h>
25#include <sys/malloc.h>
26#include <sys/domain.h>
27#include <sys/mbuf.h>
28
29#include <vm/vm.h>
30#include <sys/sysctl.h>
31
32#include <miscfs/specfs/specdev.h>
33
34enum vtype iftovt_tab[16] = {
35 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
36 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
37};
38int vttoif_tab[9] = {
39 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
40 S_IFSOCK, S_IFIFO, S_IFMT,
41};
42
43/*
44 * Insq/Remq for the vnode usage lists.
45 */
46#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
47#define bufremvn(bp) { \
48 LIST_REMOVE(bp, b_vnbufs); \
49 (bp)->b_vnbufs.le_next = NOLIST; \
50}
51
52TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
53struct mntlist mountlist; /* mounted filesystem list */
54
55/*
56 * Initialize the vnode management data structures.
57 */
58vntblinit()
59{
60
61 TAILQ_INIT(&vnode_free_list);
62 TAILQ_INIT(&mountlist);
63}
64
65/*
66 * Lock a filesystem.
67 * Used to prevent access to it while mounting and unmounting.
68 */
69vfs_lock(mp)
70 register struct mount *mp;
71{
72
73 while(mp->mnt_flag & MNT_MLOCK) {
74 mp->mnt_flag |= MNT_MWAIT;
75 sleep((caddr_t)mp, PVFS);
76 }
77 mp->mnt_flag |= MNT_MLOCK;
78 return (0);
79}
80
81/*
82 * Unlock a locked filesystem.
83 * Panic if filesystem is not locked.
84 */
85void
86vfs_unlock(mp)
87 register struct mount *mp;
88{
89
90 if ((mp->mnt_flag & MNT_MLOCK) == 0)
91 panic("vfs_unlock: not locked");
92 mp->mnt_flag &= ~MNT_MLOCK;
93 if (mp->mnt_flag & MNT_MWAIT) {
94 mp->mnt_flag &= ~MNT_MWAIT;
95 wakeup((caddr_t)mp);
96 }
97}
98
99/*
100 * Mark a mount point as busy.
101 * Used to synchronize access and to delay unmounting.
102 */
103vfs_busy(mp)
104 register struct mount *mp;
105{
106
107 while(mp->mnt_flag & MNT_MPBUSY) {
108 mp->mnt_flag |= MNT_MPWANT;
109 sleep((caddr_t)&mp->mnt_flag, PVFS);
110 }
111 if (mp->mnt_flag & MNT_UNMOUNT)
112 return (1);
113 mp->mnt_flag |= MNT_MPBUSY;
114 return (0);
115}
116
117/*
118 * Free a busy filesystem.
119 * Panic if filesystem is not busy.
120 */
121vfs_unbusy(mp)
122 register struct mount *mp;
123{
124
125 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
126 panic("vfs_unbusy: not busy");
127 mp->mnt_flag &= ~MNT_MPBUSY;
128 if (mp->mnt_flag & MNT_MPWANT) {
129 mp->mnt_flag &= ~MNT_MPWANT;
130 wakeup((caddr_t)&mp->mnt_flag);
131 }
132}
133
134/*
135 * Lookup a mount point by filesystem identifier.
136 */
137struct mount *
138getvfs(fsid)
139 fsid_t *fsid;
140{
141 register struct mount *mp;
142
143 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
144 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
145 mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
146 return (mp);
147 }
148 return ((struct mount *)0);
149}
150
151/*
152 * Get a new unique fsid
153 */
154void
155getnewfsid(mp, mtype)
156 struct mount *mp;
157 int mtype;
158{
159static u_short xxxfs_mntid;
160
161 fsid_t tfsid;
162
163 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
164 mp->mnt_stat.f_fsid.val[1] = mtype;
165 if (xxxfs_mntid == 0)
166 ++xxxfs_mntid;
167 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
168 tfsid.val[1] = mtype;
169 if (mountlist.tqh_first != NULL) {
170 while (getvfs(&tfsid)) {
171 tfsid.val[0]++;
172 xxxfs_mntid++;
173 }
174 }
175 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
176}
177
178/*
179 * Set vnode attributes to VNOVAL
180 */
181void vattr_null(vap)
182 register struct vattr *vap;
183{
184
185 vap->va_type = VNON;
186 vap->va_size = vap->va_bytes = VNOVAL;
187 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
188 vap->va_fsid = vap->va_fileid =
189 vap->va_blocksize = vap->va_rdev =
190 vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
191 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
192 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
193 vap->va_flags = vap->va_gen = VNOVAL;
194 vap->va_vaflags = 0;
195}
196
197/*
198 * Routines having to do with the management of the vnode table.
199 */
200extern int (**dead_vnodeop_p)();
201extern void vclean();
202long numvnodes;
203extern struct vattr va_null;
204int newnodes = 0;
205int printcnt = 0;
206
207/*
208 * Return the next vnode from the free list.
209 */
210getnewvnode(tag, mp, vops, vpp)
211 enum vtagtype tag;
212 struct mount *mp;
213 int (**vops)();
214 struct vnode **vpp;
215{
216 register struct vnode *vp;
217 int s;
218
219newnodes++;
220 if ((vnode_free_list.tqh_first == NULL &&
221 numvnodes < 2 * desiredvnodes) ||
222 numvnodes < desiredvnodes) {
223 vp = (struct vnode *)malloc((u_long)sizeof *vp,
224 M_VNODE, M_WAITOK);
225 bzero((char *)vp, sizeof *vp);
226 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
227 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
228 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
229 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
230 numvnodes++;
231 vp->v_spare[0] = numvnodes;
232 } else {
233 if ((vp = vnode_free_list.tqh_first) == NULL) {
234 tablefull("vnode");
235 *vpp = 0;
236 return (ENFILE);
237 }
238 if (vp->v_usecount)
239 panic("free vnode isn't");
240 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
241 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
242 panic("getnewvnode: not on queue");
243 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
244 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
245 /* see comment on why 0xdeadb is set at end of vgone (below) */
246 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
247 vp->v_lease = NULL;
248 if (vp->v_type != VBAD)
249 vgone(vp);
250#ifdef DIAGNOSTIC
251 if (vp->v_data)
252 panic("cleaned vnode isn't");
253 s = splbio();
254 if (vp->v_numoutput)
255 panic("Clean vnode has pending I/O's");
256 splx(s);
257#endif
258 vp->v_flag = 0;
259 vp->v_lastr = 0;
260 vp->v_lastw = 0;
261 vp->v_lasta = 0;
262 vp->v_cstart = 0;
263 vp->v_clen = 0;
264 vp->v_socket = 0;
265 }
266 vp->v_ralen = 1;
267 vp->v_type = VNON;
268 cache_purge(vp);
269 vp->v_tag = tag;
270 vp->v_op = vops;
271 insmntque(vp, mp);
272 *vpp = vp;
273 vp->v_usecount = 1;
274 vp->v_data = 0;
275 if (printcnt-- > 0) vprint("getnewvnode got", vp);
276 return (0);
277}
278
279/*
280 * Move a vnode from one mount queue to another.
281 */
282insmntque(vp, mp)
283 register struct vnode *vp;
284 register struct mount *mp;
285{
286
287 /*
288 * Delete from old mount point vnode list, if on one.
289 */
290 if (vp->v_mount != NULL) {
291 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
292 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
293 panic("insmntque: not on queue");
294 LIST_REMOVE(vp, v_mntvnodes);
295 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
296 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
297 }
298 /*
299 * Insert into list of vnodes for the new mount point, if available.
300 */
301 if ((vp->v_mount = mp) == NULL)
302 return;
303 if (vp->v_mntvnodes.le_next != (struct vnode *)0xdeadf ||
304 vp->v_mntvnodes.le_prev != (struct vnode **)0xdeadb)
305 panic("insmntque: already on queue");
306 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
307}
308
309/*
310 * Update outstanding I/O count and do wakeup if requested.
311 */
312vwakeup(bp)
313 register struct buf *bp;
314{
315 register struct vnode *vp;
316
317 bp->b_flags &= ~B_WRITEINPROG;
318 if (vp = bp->b_vp) {
319 vp->v_numoutput--;
320 if (vp->v_numoutput < 0)
321 panic("vwakeup: neg numoutput");
322 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
323 if (vp->v_numoutput < 0)
324 panic("vwakeup: neg numoutput");
325 vp->v_flag &= ~VBWAIT;
326 wakeup((caddr_t)&vp->v_numoutput);
327 }
328 }
329}
330
331/*
332 * Flush out and invalidate all buffers associated with a vnode.
333 * Called with the underlying object locked.
334 */
335int
336vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
337 register struct vnode *vp;
338 int flags;
339 struct ucred *cred;
340 struct proc *p;
341 int slpflag, slptimeo;
342{
343 register struct buf *bp;
344 struct buf *nbp, *blist;
345 int s, error;
346
347 if (flags & V_SAVE) {
348 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
349 return (error);
350 if (vp->v_dirtyblkhd.lh_first != NULL)
351 panic("vinvalbuf: dirty bufs");
352 }
353 for (;;) {
354 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
355 while (blist && blist->b_lblkno < 0)
356 blist = blist->b_vnbufs.le_next;
357 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
358 (flags & V_SAVEMETA))
359 while (blist && blist->b_lblkno < 0)
360 blist = blist->b_vnbufs.le_next;
361 if (!blist)
362 break;
363
364 for (bp = blist; bp; bp = nbp) {
365 nbp = bp->b_vnbufs.le_next;
366 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
367 continue;
368 s = splbio();
369 if (bp->b_flags & B_BUSY) {
370 bp->b_flags |= B_WANTED;
371 error = tsleep((caddr_t)bp,
372 slpflag | (PRIBIO + 1), "vinvalbuf",
373 slptimeo);
374 splx(s);
375 if (error)
376 return (error);
377 break;
378 }
379 bremfree(bp);
380 bp->b_flags |= B_BUSY;
381 splx(s);
382 /*
383 * XXX Since there are no node locks for NFS, I believe
384 * there is a slight chance that a delayed write will
385 * occur while sleeping just above, so check for it.
386 */
387 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
388 (void) VOP_BWRITE(bp);
389 break;
390 }
391 bp->b_flags |= B_INVAL;
392 brelse(bp);
393 }
394 }
395 if (!(flags & V_SAVEMETA) &&
396 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
397 panic("vinvalbuf: flush failed");
398 return (0);
399}
400
401/*
402 * Associate a buffer with a vnode.
403 */
404bgetvp(vp, bp)
405 register struct vnode *vp;
406 register struct buf *bp;
407{
408
409 if (bp->b_vp)
410 panic("bgetvp: not free");
411 VHOLD(vp);
412 bp->b_vp = vp;
413 if (vp->v_type == VBLK || vp->v_type == VCHR)
414 bp->b_dev = vp->v_rdev;
415 else
416 bp->b_dev = NODEV;
417 /*
418 * Insert onto list for new vnode.
419 */
420 bufinsvn(bp, &vp->v_cleanblkhd);
421}
422
423/*
424 * Disassociate a buffer from a vnode.
425 */
426brelvp(bp)
427 register struct buf *bp;
428{
429 struct vnode *vp;
430
431 if (bp->b_vp == (struct vnode *) 0)
432 panic("brelvp: NULL");
433 /*
434 * Delete from old vnode list, if on one.
435 */
436 if (bp->b_vnbufs.le_next != NOLIST)
437 bufremvn(bp);
438 vp = bp->b_vp;
439 bp->b_vp = (struct vnode *) 0;
440 HOLDRELE(vp);
441}
442
443/*
444 * Reassign a buffer from one vnode to another.
445 * Used to assign file specific control information
446 * (indirect blocks) to the vnode to which they belong.
447 */
448reassignbuf(bp, newvp)
449 register struct buf *bp;
450 register struct vnode *newvp;
451{
452 register struct buflists *listheadp;
453
454 if (newvp == NULL) {
455 printf("reassignbuf: NULL");
456 return;
457 }
458 /*
459 * Delete from old vnode list, if on one.
460 */
461 if (bp->b_vnbufs.le_next != NOLIST)
462 bufremvn(bp);
463 /*
464 * If dirty, put on list of dirty buffers;
465 * otherwise insert onto list of clean buffers.
466 */
467 if (bp->b_flags & B_DELWRI)
468 listheadp = &newvp->v_dirtyblkhd;
469 else
470 listheadp = &newvp->v_cleanblkhd;
471 bufinsvn(bp, listheadp);
472}
473
474/*
475 * Create a vnode for a block device.
476 * Used for root filesystem, argdev, and swap areas.
477 * Also used for memory file system special devices.
478 */
479bdevvp(dev, vpp)
480 dev_t dev;
481 struct vnode **vpp;
482{
483 register struct vnode *vp;
484 struct vnode *nvp;
485 int error;
486
487 if (dev == NODEV)
488 return (0);
489 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
490 if (error) {
491 *vpp = 0;
492 return (error);
493 }
494 vp = nvp;
495 vp->v_type = VBLK;
496 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
497 vput(vp);
498 vp = nvp;
499 }
500 *vpp = vp;
501 return (0);
502}
503
504/*
505 * Check to see if the new vnode represents a special device
506 * for which we already have a vnode (either because of
507 * bdevvp() or because of a different vnode representing
508 * the same block device). If such an alias exists, deallocate
509 * the existing contents and return the aliased vnode. The
510 * caller is responsible for filling it with its new contents.
511 */
512struct vnode *
513checkalias(nvp, nvp_rdev, mp)
514 register struct vnode *nvp;
515 dev_t nvp_rdev;
516 struct mount *mp;
517{
518 register struct vnode *vp;
519 struct vnode **vpp;
520
521 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
522 return (NULLVP);
523
524 vpp = &speclisth[SPECHASH(nvp_rdev)];
525loop:
526 for (vp = *vpp; vp; vp = vp->v_specnext) {
527 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
528 continue;
529 /*
530 * Alias, but not in use, so flush it out.
531 */
532 if (vp->v_usecount == 0) {
533 vgone(vp);
534 goto loop;
535 }
536 if (vget(vp, 1))
537 goto loop;
538 break;
539 }
540 if (vp == NULL || vp->v_tag != VT_NON) {
541 MALLOC(nvp->v_specinfo, struct specinfo *,
542 sizeof(struct specinfo), M_VNODE, M_WAITOK);
543 nvp->v_rdev = nvp_rdev;
544 nvp->v_hashchain = vpp;
545 nvp->v_specnext = *vpp;
546 nvp->v_specflags = 0;
547 *vpp = nvp;
548 if (vp != NULL) {
549 nvp->v_flag |= VALIASED;
550 vp->v_flag |= VALIASED;
551 vput(vp);
552 }
553 return (NULLVP);
554 }
555 VOP_UNLOCK(vp);
556 vclean(vp, 0);
557 vp->v_op = nvp->v_op;
558 vp->v_tag = nvp->v_tag;
559 nvp->v_type = VNON;
560 insmntque(vp, mp);
561 return (vp);
562}
563
564/*
565 * Grab a particular vnode from the free list, increment its
566 * reference count and lock it. The vnode lock bit is set the
567 * vnode is being eliminated in vgone. The process is awakened
568 * when the transition is completed, and an error returned to
569 * indicate that the vnode is no longer usable (possibly having
570 * been changed to a new file system type).
571 */
572vget(vp, lockflag)
573 register struct vnode *vp;
574 int lockflag;
575{
576
577 if (vp->v_flag & VXLOCK) {
578 vp->v_flag |= VXWANT;
579 sleep((caddr_t)vp, PINOD);
580 return (1);
581 }
582 if (vp->v_usecount == 0) {
583 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
584 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
585 panic("vget: not on queue");
586 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
587 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
588 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
589 }
590 vp->v_usecount++;
591 if (lockflag)
592 VOP_LOCK(vp);
593 if (printcnt-- > 0) vprint("vget got", vp);
594 return (0);
595}
596
597int bug_refs = 0;
598
599/*
600 * Vnode reference, just increment the count
601 */
602void vref(vp)
603 struct vnode *vp;
604{
605
606 if (vp->v_usecount <= 0)
607 panic("vref used where vget required");
608 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
609 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
610 panic("vref: not free");
611 vp->v_usecount++;
612 if (printcnt-- > 0) vprint("vref get", vp);
613 if (vp->v_type != VBLK && curproc)
614 curproc->p_spare[0]++;
615 if (bug_refs)
616 vprint("vref: ");
617}
618
619/*
620 * vput(), just unlock and vrele()
621 */
622void vput(vp)
623 register struct vnode *vp;
624{
625
626 VOP_UNLOCK(vp);
627 vrele(vp);
628}
629
630/*
631 * Vnode release.
632 * If count drops to zero, call inactive routine and return to freelist.
633 */
634void vrele(vp)
635 register struct vnode *vp;
636{
637
638#ifdef DIAGNOSTIC
639 if (vp == NULL)
640 panic("vrele: null vp");
641#endif
642 vp->v_usecount--;
643 if (printcnt-- > 0) vprint("vrele put", vp);
644 if (vp->v_type != VBLK && curproc)
645 curproc->p_spare[0]--;
646 if (bug_refs)
647 vprint("vref: ");
648 if (vp->v_usecount > 0)
649 return;
650#ifdef DIAGNOSTIC
651 if (vp->v_usecount != 0 || vp->v_writecount != 0) {
652 vprint("vrele: bad ref count", vp);
653 panic("vrele: ref cnt");
654 }
655#endif
656 /*
657 * insert at tail of LRU list
658 */
659 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
660 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
661 panic("vrele: not free");
662 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
663 VOP_INACTIVE(vp);
664}
665
666/*
667 * Page or buffer structure gets a reference.
668 */
669void vhold(vp)
670 register struct vnode *vp;
671{
672
673 vp->v_holdcnt++;
674}
675
676/*
677 * Page or buffer structure frees a reference.
678 */
679void holdrele(vp)
680 register struct vnode *vp;
681{
682
683 if (vp->v_holdcnt <= 0)
684 panic("holdrele: holdcnt");
685 vp->v_holdcnt--;
686}
687
688/*
689 * Remove any vnodes in the vnode table belonging to mount point mp.
690 *
691 * If MNT_NOFORCE is specified, there should not be any active ones,
692 * return error if any are found (nb: this is a user error, not a
693 * system error). If MNT_FORCE is specified, detach any active vnodes
694 * that are found.
695 */
696#ifdef DIAGNOSTIC
697int busyprt = 0; /* print out busy vnodes */
698struct ctldebug debug1 = { "busyprt", &busyprt };
699#endif
700
701vflush(mp, skipvp, flags)
702 struct mount *mp;
703 struct vnode *skipvp;
704 int flags;
705{
706 register struct vnode *vp, *nvp;
707 int busy = 0;
708
709 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
710 panic("vflush: not busy");
711loop:
712 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
713 if (vp->v_mount != mp)
714 goto loop;
715 nvp = vp->v_mntvnodes.le_next;
716 /*
717 * Skip over a selected vnode.
718 */
719 if (vp == skipvp)
720 continue;
721 /*
722 * Skip over a vnodes marked VSYSTEM.
723 */
724 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
725 continue;
726 /*
727 * If WRITECLOSE is set, only flush out regular file
728 * vnodes open for writing.
729 */
730 if ((flags & WRITECLOSE) &&
731 (vp->v_writecount == 0 || vp->v_type != VREG))
732 continue;
733 /*
734 * With v_usecount == 0, all we need to do is clear
735 * out the vnode data structures and we are done.
736 */
737 if (vp->v_usecount == 0) {
738 vgone(vp);
739 continue;
740 }
741 /*
742 * If FORCECLOSE is set, forcibly close the vnode.
743 * For block or character devices, revert to an
744 * anonymous device. For all other files, just kill them.
745 */
746 if (flags & FORCECLOSE) {
747 if (vp->v_type != VBLK && vp->v_type != VCHR) {
748 vgone(vp);
749 } else {
750 vclean(vp, 0);
751 vp->v_op = spec_vnodeop_p;
752 insmntque(vp, (struct mount *)0);
753 }
754 continue;
755 }
756#ifdef DIAGNOSTIC
757 if (busyprt)
758 vprint("vflush: busy vnode", vp);
759#endif
760 busy++;
761 }
762 if (busy)
763 return (EBUSY);
764 return (0);
765}
766
767/*
768 * Disassociate the underlying file system from a vnode.
769 */
770void
771vclean(vp, flags)
772 register struct vnode *vp;
773 int flags;
774{
775 int active;
776
777 /*
778 * Check to see if the vnode is in use.
779 * If so we have to reference it before we clean it out
780 * so that its count cannot fall to zero and generate a
781 * race against ourselves to recycle it.
782 */
783 if (active = vp->v_usecount)
784 VREF(vp);
785 /*
786 * Even if the count is zero, the VOP_INACTIVE routine may still
787 * have the object locked while it cleans it out. The VOP_LOCK
788 * ensures that the VOP_INACTIVE routine is done with its work.
789 * For active vnodes, it ensures that no other activity can
790 * occur while the underlying object is being cleaned out.
791 */
792 VOP_LOCK(vp);
793 /*
794 * Prevent the vnode from being recycled or
795 * brought into use while we clean it out.
796 */
797 if (vp->v_flag & VXLOCK)
798 panic("vclean: deadlock");
799 vp->v_flag |= VXLOCK;
800 /*
801 * Clean out any buffers associated with the vnode.
802 */
803 if (flags & DOCLOSE)
804 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
805 /*
806 * Any other processes trying to obtain this lock must first
807 * wait for VXLOCK to clear, then call the new lock operation.
808 */
809 VOP_UNLOCK(vp);
810 /*
811 * If purging an active vnode, it must be closed and
812 * deactivated before being reclaimed.
813 */
814 if (active) {
815 if (flags & DOCLOSE)
816 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
817 VOP_INACTIVE(vp);
818 }
819 /*
820 * Reclaim the vnode.
821 */
822 if (VOP_RECLAIM(vp))
823 panic("vclean: cannot reclaim");
824 if (active)
825 vrele(vp);
826
827 /*
828 * Done with purge, notify sleepers of the grim news.
829 */
830 vp->v_op = dead_vnodeop_p;
831 vp->v_tag = VT_NON;
832 vp->v_flag &= ~VXLOCK;
833 if (vp->v_flag & VXWANT) {
834 vp->v_flag &= ~VXWANT;
835 wakeup((caddr_t)vp);
836 }
837}
838
839/*
840 * Eliminate all activity associated with the requested vnode
841 * and with all vnodes aliased to the requested vnode.
842 */
843void vgoneall(vp)
844 register struct vnode *vp;
845{
846 register struct vnode *vq;
847
848 if (vp->v_flag & VALIASED) {
849 /*
850 * If a vgone (or vclean) is already in progress,
851 * wait until it is done and return.
852 */
853 if (vp->v_flag & VXLOCK) {
854 vp->v_flag |= VXWANT;
855 sleep((caddr_t)vp, PINOD);
856 return;
857 }
858 /*
859 * Ensure that vp will not be vgone'd while we
860 * are eliminating its aliases.
861 */
862 vp->v_flag |= VXLOCK;
863 while (vp->v_flag & VALIASED) {
864 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
865 if (vq->v_rdev != vp->v_rdev ||
866 vq->v_type != vp->v_type || vp == vq)
867 continue;
868 vgone(vq);
869 break;
870 }
871 }
872 /*
873 * Remove the lock so that vgone below will
874 * really eliminate the vnode after which time
875 * vgone will awaken any sleepers.
876 */
877 vp->v_flag &= ~VXLOCK;
878 }
879 vgone(vp);
880}
881
882/*
883 * Eliminate all activity associated with a vnode
884 * in preparation for reuse.
885 */
886void vgone(vp)
887 register struct vnode *vp;
888{
889 register struct vnode *vq;
890 struct vnode *vx;
891
892 /*
893 * If a vgone (or vclean) is already in progress,
894 * wait until it is done and return.
895 */
896 if (vp->v_flag & VXLOCK) {
897 vp->v_flag |= VXWANT;
898 sleep((caddr_t)vp, PINOD);
899 return;
900 }
901 /*
902 * Clean out the filesystem specific data.
903 */
904 vclean(vp, DOCLOSE);
905 /*
906 * Delete from old mount point vnode list, if on one.
907 */
908 if (vp->v_mount != NULL) {
909 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
910 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
911 panic("vgone: not on queue");
912 LIST_REMOVE(vp, v_mntvnodes);
913 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
914 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
915 vp->v_mount = NULL;
916 }
917 /*
918 * If special device, remove it from special device alias list.
919 */
920 if (vp->v_type == VBLK || vp->v_type == VCHR) {
921 if (*vp->v_hashchain == vp) {
922 *vp->v_hashchain = vp->v_specnext;
923 } else {
924 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
925 if (vq->v_specnext != vp)
926 continue;
927 vq->v_specnext = vp->v_specnext;
928 break;
929 }
930 if (vq == NULL)
931 panic("missing bdev");
932 }
933 if (vp->v_flag & VALIASED) {
934 vx = NULL;
935 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
936 if (vq->v_rdev != vp->v_rdev ||
937 vq->v_type != vp->v_type)
938 continue;
939 if (vx)
940 break;
941 vx = vq;
942 }
943 if (vx == NULL)
944 panic("missing alias");
945 if (vq == NULL)
946 vx->v_flag &= ~VALIASED;
947 vp->v_flag &= ~VALIASED;
948 }
949 FREE(vp->v_specinfo, M_VNODE);
950 vp->v_specinfo = NULL;
951 }
952 /*
953 * If it is on the freelist and not already at the head,
954 * move it to the head of the list. The test of the back
955 * pointer and the reference count of zero is because
956 * it will be removed from the free list by getnewvnode,
957 * but will not have its reference count incremented until
958 * after calling vgone. If the reference count were
959 * incremented first, vgone would (incorrectly) try to
960 * close the previous instance of the underlying object.
961 * So, the back pointer is explicitly set to `0xdeadb' in
962 * getnewvnode after removing it from the freelist to ensure
963 * that we do not try to move it here.
964 */
965 if (vp->v_usecount == 0 &&
966 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
967 vnode_free_list.tqh_first != vp) {
968 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf)
969 panic("vgone: use 0, not free");
970 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
971 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
972 }
973 vp->v_type = VBAD;
974}
975
976/*
977 * Lookup a vnode by device number.
978 */
979vfinddev(dev, type, vpp)
980 dev_t dev;
981 enum vtype type;
982 struct vnode **vpp;
983{
984 register struct vnode *vp;
985
986 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
987 if (dev != vp->v_rdev || type != vp->v_type)
988 continue;
989 *vpp = vp;
990 return (1);
991 }
992 return (0);
993}
994
995/*
996 * Calculate the total number of references to a special device.
997 */
998vcount(vp)
999 register struct vnode *vp;
1000{
1001 register struct vnode *vq;
1002 int count;
1003
1004 if ((vp->v_flag & VALIASED) == 0)
1005 return (vp->v_usecount);
1006loop:
1007 for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1008 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1009 continue;
1010 /*
1011 * Alias, but not in use, so flush it out.
1012 */
1013 if (vq->v_usecount == 0) {
1014 vgone(vq);
1015 goto loop;
1016 }
1017 count += vq->v_usecount;
1018 }
1019 return (count);
1020}
1021
1022/*
1023 * Print out a description of a vnode.
1024 */
1025static char *typename[] =
1026 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1027
1028vprint(label, vp)
1029 char *label;
1030 register struct vnode *vp;
1031{
1032 char buf[64];
1033
1034 if (label != NULL)
1035 printf("%s: ", label);
1036 printf("num %d ", vp->v_spare[0]);
1037 printf("type %s, usecount %d, writecount %d, refcount %d,",
1038 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1039 vp->v_holdcnt);
1040 buf[0] = '\0';
1041 if (vp->v_flag & VROOT)
1042 strcat(buf, "|VROOT");
1043 if (vp->v_flag & VTEXT)
1044 strcat(buf, "|VTEXT");
1045 if (vp->v_flag & VSYSTEM)
1046 strcat(buf, "|VSYSTEM");
1047 if (vp->v_flag & VXLOCK)
1048 strcat(buf, "|VXLOCK");
1049 if (vp->v_flag & VXWANT)
1050 strcat(buf, "|VXWANT");
1051 if (vp->v_flag & VBWAIT)
1052 strcat(buf, "|VBWAIT");
1053 if (vp->v_flag & VALIASED)
1054 strcat(buf, "|VALIASED");
1055 if (buf[0] != '\0')
1056 printf(" flags (%s)", &buf[1]);
1057 if (vp->v_data == NULL) {
1058 printf("\n");
1059 } else {
1060 printf("\n\t");
1061 VOP_PRINT(vp);
1062 }
1063}
1064
1065#ifdef DEBUG
1066/*
1067 * List all of the locked vnodes in the system.
1068 * Called when debugging the kernel.
1069 */
1070printlockedvnodes()
1071{
1072 register struct mount *mp;
1073 register struct vnode *vp;
1074
1075 printf("Locked vnodes\n");
1076 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
1077 for (vp = mp->mnt_vnodelist.lh_first;
1078 vp != NULL;
1079 vp = vp->v_mntvnodes.le_next)
1080 if (VOP_ISLOCKED(vp))
1081 vprint((char *)0, vp);
1082 }
1083}
1084#endif
1085
1086int kinfo_vdebug = 1;
1087int kinfo_vgetfailed;
1088#define KINFO_VNODESLOP 10
1089/*
1090 * Dump vnode list (via sysctl).
1091 * Copyout address of vnode followed by vnode.
1092 */
1093/* ARGSUSED */
1094sysctl_vnode(where, sizep)
1095 char *where;
1096 size_t *sizep;
1097{
1098 register struct mount *mp, *nmp;
1099 struct vnode *vp;
1100 register char *bp = where, *savebp;
1101 char *ewhere;
1102 int error;
1103
1104#define VPTRSZ sizeof (struct vnode *)
1105#define VNODESZ sizeof (struct vnode)
1106 if (where == NULL) {
1107 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1108 return (0);
1109 }
1110 ewhere = where + *sizep;
1111
1112 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
1113 nmp = mp->mnt_list.tqe_next;
1114 if (vfs_busy(mp))
1115 continue;
1116 savebp = bp;
1117again:
1118 for (vp = mp->mnt_vnodelist.lh_first;
1119 vp != NULL;
1120 vp = vp->v_mntvnodes.le_next) {
1121 /*
1122 * Check that the vp is still associated with
1123 * this filesystem. RACE: could have been
1124 * recycled onto the same filesystem.
1125 */
1126 if (vp->v_mount != mp) {
1127 if (kinfo_vdebug)
1128 printf("kinfo: vp changed\n");
1129 bp = savebp;
1130 goto again;
1131 }
1132 if (bp + VPTRSZ + VNODESZ > ewhere) {
1133 *sizep = bp - where;
1134 return (ENOMEM);
1135 }
1136 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1137 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1138 return (error);
1139 bp += VPTRSZ + VNODESZ;
1140 }
1141 vfs_unbusy(mp);
1142 }
1143
1144 *sizep = bp - where;
1145 return (0);
1146}
1147
1148/*
1149 * Check to see if a filesystem is mounted on a block device.
1150 */
1151int
1152vfs_mountedon(vp)
1153 register struct vnode *vp;
1154{
1155 register struct vnode *vq;
1156
1157 if (vp->v_specflags & SI_MOUNTEDON)
1158 return (EBUSY);
1159 if (vp->v_flag & VALIASED) {
1160 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1161 if (vq->v_rdev != vp->v_rdev ||
1162 vq->v_type != vp->v_type)
1163 continue;
1164 if (vq->v_specflags & SI_MOUNTEDON)
1165 return (EBUSY);
1166 }
1167 }
1168 return (0);
1169}
1170
1171/*
1172 * Build hash lists of net addresses and hang them off the mount point.
1173 * Called by ufs_mount() to set up the lists of export addresses.
1174 */
1175static int
1176vfs_hang_addrlist(mp, nep, argp)
1177 struct mount *mp;
1178 struct netexport *nep;
1179 struct export_args *argp;
1180{
1181 register struct netcred *np;
1182 register struct radix_node_head *rnh;
1183 register int i;
1184 struct radix_node *rn;
1185 struct sockaddr *saddr, *smask = 0;
1186 struct domain *dom;
1187 int error;
1188
1189 if (argp->ex_addrlen == 0) {
1190 if (mp->mnt_flag & MNT_DEFEXPORTED)
1191 return (EPERM);
1192 np = &nep->ne_defexported;
1193 np->netc_exflags = argp->ex_flags;
1194 np->netc_anon = argp->ex_anon;
1195 np->netc_anon.cr_ref = 1;
1196 mp->mnt_flag |= MNT_DEFEXPORTED;
1197 return (0);
1198 }
1199 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1200 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1201 bzero((caddr_t)np, i);
1202 saddr = (struct sockaddr *)(np + 1);
1203 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1204 goto out;
1205 if (saddr->sa_len > argp->ex_addrlen)
1206 saddr->sa_len = argp->ex_addrlen;
1207 if (argp->ex_masklen) {
1208 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1209 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1210 if (error)
1211 goto out;
1212 if (smask->sa_len > argp->ex_masklen)
1213 smask->sa_len = argp->ex_masklen;
1214 }
1215 i = saddr->sa_family;
1216 if ((rnh = nep->ne_rtable[i]) == 0) {
1217 /*
1218 * Seems silly to initialize every AF when most are not
1219 * used, do so on demand here
1220 */
1221 for (dom = domains; dom; dom = dom->dom_next)
1222 if (dom->dom_family == i && dom->dom_rtattach) {
1223 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1224 dom->dom_rtoffset);
1225 break;
1226 }
1227 if ((rnh = nep->ne_rtable[i]) == 0) {
1228 error = ENOBUFS;
1229 goto out;
1230 }
1231 }
1232 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1233 np->netc_rnodes);
1234 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1235 error = EPERM;
1236 goto out;
1237 }
1238 np->netc_exflags = argp->ex_flags;
1239 np->netc_anon = argp->ex_anon;
1240 np->netc_anon.cr_ref = 1;
1241 return (0);
1242out:
1243 free(np, M_NETADDR);
1244 return (error);
1245}
1246
1247/* ARGSUSED */
1248static int
1249vfs_free_netcred(rn, w)
1250 struct radix_node *rn;
1251 caddr_t w;
1252{
1253 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1254
1255 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1256 free((caddr_t)rn, M_NETADDR);
1257 return (0);
1258}
1259
1260/*
1261 * Free the net address hash lists that are hanging off the mount points.
1262 */
1263static void
1264vfs_free_addrlist(nep)
1265 struct netexport *nep;
1266{
1267 register int i;
1268 register struct radix_node_head *rnh;
1269
1270 for (i = 0; i <= AF_MAX; i++)
1271 if (rnh = nep->ne_rtable[i]) {
1272 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1273 (caddr_t)rnh);
1274 free((caddr_t)rnh, M_RTABLE);
1275 nep->ne_rtable[i] = 0;
1276 }
1277}
1278
1279int
1280vfs_export(mp, nep, argp)
1281 struct mount *mp;
1282 struct netexport *nep;
1283 struct export_args *argp;
1284{
1285 int error;
1286
1287 if (argp->ex_flags & MNT_DELEXPORT) {
1288 vfs_free_addrlist(nep);
1289 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1290 }
1291 if (argp->ex_flags & MNT_EXPORTED) {
1292 if (error = vfs_hang_addrlist(mp, nep, argp))
1293 return (error);
1294 mp->mnt_flag |= MNT_EXPORTED;
1295 }
1296 return (0);
1297}
1298
1299struct netcred *
1300vfs_export_lookup(mp, nep, nam)
1301 register struct mount *mp;
1302 struct netexport *nep;
1303 struct mbuf *nam;
1304{
1305 register struct netcred *np;
1306 register struct radix_node_head *rnh;
1307 struct sockaddr *saddr;
1308
1309 np = NULL;
1310 if (mp->mnt_flag & MNT_EXPORTED) {
1311 /*
1312 * Lookup in the export list first.
1313 */
1314 if (nam != NULL) {
1315 saddr = mtod(nam, struct sockaddr *);
1316 rnh = nep->ne_rtable[saddr->sa_family];
1317 if (rnh != NULL) {
1318 np = (struct netcred *)
1319 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1320 rnh);
1321 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1322 np = NULL;
1323 }
1324 }
1325 /*
1326 * If no address match, use the default if it exists.
1327 */
1328 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1329 np = &nep->ne_defexported;
1330 }
1331 return (np);
1332}