expand vput inline so that locking is managed properly
[unix-history] / usr / src / sys / kern / vfs_subr.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * %sccs.include.redist.c%
11 *
12 * @(#)vfs_subr.c 8.25 (Berkeley) %G%
13 */
14
15/*
16 * External virtual filesystem routines
17 */
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/proc.h>
22#include <sys/mount.h>
23#include <sys/time.h>
24#include <sys/vnode.h>
25#include <sys/stat.h>
26#include <sys/namei.h>
27#include <sys/ucred.h>
28#include <sys/buf.h>
29#include <sys/errno.h>
30#include <sys/malloc.h>
31#include <sys/domain.h>
32#include <sys/mbuf.h>
33
34#include <vm/vm.h>
35#include <sys/sysctl.h>
36
37#include <miscfs/specfs/specdev.h>
38
39enum vtype iftovt_tab[16] = {
40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
42};
43int vttoif_tab[9] = {
44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
45 S_IFSOCK, S_IFIFO, S_IFMT,
46};
47
48/*
49 * Insq/Remq for the vnode usage lists.
50 */
51#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
52#define bufremvn(bp) { \
53 LIST_REMOVE(bp, b_vnbufs); \
54 (bp)->b_vnbufs.le_next = NOLIST; \
55}
56TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
57struct mntlist mountlist; /* mounted filesystem list */
58static struct simplelock mntid_slock;
59struct simplelock mntvnode_slock;
60static struct simplelock spechash_slock;
61static struct simplelock vnode_free_list_slock;
62
63/*
64 * Initialize the vnode management data structures.
65 */
66void
67vntblinit()
68{
69
70 simple_lock_init(&mntvnode_slock);
71 simple_lock_init(&mntid_slock);
72 simple_lock_init(&spechash_slock);
73 TAILQ_INIT(&vnode_free_list);
74 simple_lock_init(&vnode_free_list_slock);
75 CIRCLEQ_INIT(&mountlist);
76}
77
78/*
79 * Lock a filesystem.
80 * Used to prevent access to it while mounting and unmounting.
81 */
82int
83vfs_lock(mp)
84 register struct mount *mp;
85{
86
87 while (mp->mnt_flag & MNT_MLOCK) {
88 mp->mnt_flag |= MNT_MWAIT;
89 tsleep((caddr_t)mp, PVFS, "vfslock", 0);
90 }
91 mp->mnt_flag |= MNT_MLOCK;
92 return (0);
93}
94
95/*
96 * Unlock a locked filesystem.
97 * Panic if filesystem is not locked.
98 */
99void
100vfs_unlock(mp)
101 register struct mount *mp;
102{
103
104 if ((mp->mnt_flag & MNT_MLOCK) == 0)
105 panic("vfs_unlock: not locked");
106 mp->mnt_flag &= ~MNT_MLOCK;
107 if (mp->mnt_flag & MNT_MWAIT) {
108 mp->mnt_flag &= ~MNT_MWAIT;
109 wakeup((caddr_t)mp);
110 }
111}
112
113/*
114 * Mark a mount point as busy.
115 * Used to synchronize access and to delay unmounting.
116 */
117int
118vfs_busy(mp)
119 register struct mount *mp;
120{
121
122 while (mp->mnt_flag & MNT_MPBUSY) {
123 mp->mnt_flag |= MNT_MPWANT;
124 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0);
125 }
126 if (mp->mnt_flag & MNT_UNMOUNT)
127 return (1);
128 mp->mnt_flag |= MNT_MPBUSY;
129 return (0);
130}
131
132/*
133 * Free a busy filesystem.
134 * Panic if filesystem is not busy.
135 */
136void
137vfs_unbusy(mp)
138 register struct mount *mp;
139{
140
141 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
142 panic("vfs_unbusy: not busy");
143 mp->mnt_flag &= ~MNT_MPBUSY;
144 if (mp->mnt_flag & MNT_MPWANT) {
145 mp->mnt_flag &= ~MNT_MPWANT;
146 wakeup((caddr_t)&mp->mnt_flag);
147 }
148}
149
150/*
151 * Lookup a filesystem type, and if found allocate and initialize
152 * a mount structure for it.
153 *
154 * Devname is usually updated by mount(8) after booting.
155 */
156int
157vfs_rootmountalloc(fstypename, devname, mpp)
158 char *fstypename;
159 char *devname;
160 struct mount **mpp;
161{
162 struct vfsconf *vfsp;
163 struct mount *mp;
164
165 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
166 if (!strcmp(vfsp->vfc_name, fstypename))
167 break;
168 if (vfsp == NULL)
169 return (ENODEV);
170 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
171 bzero((char *)mp, (u_long)sizeof(struct mount));
172 LIST_INIT(&mp->mnt_vnodelist);
173 mp->mnt_vfc = vfsp;
174 mp->mnt_op = vfsp->vfc_vfsops;
175 mp->mnt_flag = MNT_RDONLY;
176 mp->mnt_vnodecovered = NULLVP;
177 vfsp->vfc_refcount++;
178 mp->mnt_stat.f_type = vfsp->vfc_typenum;
179 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
180 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
181 mp->mnt_stat.f_mntonname[0] = '/';
182 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
183 *mpp = mp;
184 return (0);
185}
186
187/*
188 * Find an appropriate filesystem to use for the root. If a filesystem
189 * has not been preselected, walk through the list of known filesystems
190 * trying those that have mountroot routines, and try them until one
191 * works or we have tried them all.
192 */
193int
194vfs_mountroot()
195{
196 struct vfsconf *vfsp;
197 extern int (*mountroot)(void);
198 int error;
199
200 if (mountroot != NULL)
201 return ((*vfsp->vfc_mountroot)());
202 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
203 if (vfsp->vfc_mountroot == NULL)
204 continue;
205 if ((error = (*vfsp->vfc_mountroot)()) == 0)
206 return (0);
207 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
208 }
209 return (ENODEV);
210}
211
212/*
213 * Lookup a mount point by filesystem identifier.
214 */
215struct mount *
216vfs_getvfs(fsid)
217 fsid_t *fsid;
218{
219 register struct mount *mp;
220
221 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
222 mp = mp->mnt_list.cqe_next) {
223 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
224 mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
225 return (mp);
226 }
227 return ((struct mount *)0);
228}
229
230/*
231 * Get a new unique fsid
232 */
233void
234vfs_getnewfsid(mp)
235 struct mount *mp;
236{
237static u_short xxxfs_mntid;
238
239 fsid_t tfsid;
240 int mtype;
241
242 simple_lock(&mntid_slock);
243 mtype = mp->mnt_vfc->vfc_typenum;
244 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
245 mp->mnt_stat.f_fsid.val[1] = mtype;
246 if (xxxfs_mntid == 0)
247 ++xxxfs_mntid;
248 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
249 tfsid.val[1] = mtype;
250 if (mountlist.cqh_first != (void *)&mountlist) {
251 while (vfs_getvfs(&tfsid)) {
252 tfsid.val[0]++;
253 xxxfs_mntid++;
254 }
255 }
256 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
257 simple_unlock(&mntid_slock);
258}
259
260/*
261 * Set vnode attributes to VNOVAL
262 */
263void
264vattr_null(vap)
265 register struct vattr *vap;
266{
267
268 vap->va_type = VNON;
269 vap->va_size = vap->va_bytes = VNOVAL;
270 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
271 vap->va_fsid = vap->va_fileid =
272 vap->va_blocksize = vap->va_rdev =
273 vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
274 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
275 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
276 vap->va_flags = vap->va_gen = VNOVAL;
277 vap->va_vaflags = 0;
278}
279
280/*
281 * Routines having to do with the management of the vnode table.
282 */
283extern int (**dead_vnodeop_p)();
284static void vclean __P((struct vnode *vp, int flag, struct proc *p));
285extern void vgonel __P((struct vnode *vp, struct proc *p));
286long numvnodes;
287extern struct vattr va_null;
288int newnodes = 0;
289int printcnt = 0;
290
291/*
292 * Return the next vnode from the free list.
293 */
294int
295getnewvnode(tag, mp, vops, vpp)
296 enum vtagtype tag;
297 struct mount *mp;
298 int (**vops)();
299 struct vnode **vpp;
300{
301 struct proc *p = curproc; /* XXX */
302 struct vnode *vp;
303 int s;
304 int cnt;
305
306top:
307 simple_lock(&vnode_free_list_slock);
308newnodes++;
309 if ((vnode_free_list.tqh_first == NULL &&
310 numvnodes < 2 * desiredvnodes) ||
311 numvnodes < desiredvnodes) {
312 simple_unlock(&vnode_free_list_slock);
313 vp = (struct vnode *)malloc((u_long)sizeof *vp,
314 M_VNODE, M_WAITOK);
315 bzero((char *)vp, sizeof *vp);
316 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
317 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
318 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
319 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
320 numvnodes++;
321 vp->v_spare[0] = numvnodes;
322 } else {
323 for (vp = vnode_free_list.tqh_first;
324 vp != NULLVP; vp = vp->v_freelist.tqe_next) {
325 if (simple_lock_try(&vp->v_interlock))
326 break;
327 }
328 /*
329 * Unless this is a bad time of the month, at most
330 * the first NCPUS items on the free list are
331 * locked, so this is close enough to being empty.
332 */
333 if (vp == NULLVP) {
334 simple_unlock(&vnode_free_list_slock);
335 tablefull("vnode");
336 *vpp = 0;
337 return (ENFILE);
338 }
339 if (vp->v_usecount)
340 panic("free vnode isn't");
341 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
342 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
343 panic("getnewvnode: not on queue");
344 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
345 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
346 /* see comment on why 0xdeadb is set at end of vgone (below) */
347 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
348 simple_unlock(&vnode_free_list_slock);
349 vp->v_lease = NULL;
350 if (vp->v_type != VBAD)
351 vgonel(vp, p);
352 else
353 simple_unlock(&vp->v_interlock);
354#ifdef DIAGNOSTIC
355 if (vp->v_data)
356 panic("cleaned vnode isn't");
357 s = splbio();
358 if (vp->v_numoutput)
359 panic("Clean vnode has pending I/O's");
360 splx(s);
361#endif
362 vp->v_flag = 0;
363 vp->v_lastr = 0;
364 vp->v_ralen = 0;
365 vp->v_maxra = 0;
366 vp->v_lastw = 0;
367 vp->v_lasta = 0;
368 vp->v_cstart = 0;
369 vp->v_clen = 0;
370 vp->v_socket = 0;
371 }
372 vp->v_type = VNON;
373 cache_purge(vp);
374 vp->v_tag = tag;
375 vp->v_op = vops;
376 insmntque(vp, mp);
377 *vpp = vp;
378 vp->v_usecount = 1;
379 vp->v_data = 0;
380 if (printcnt-- > 0) vprint("getnewvnode got", vp);
381 return (0);
382}
383
384/*
385 * Move a vnode from one mount queue to another.
386 */
387void
388insmntque(vp, mp)
389 struct vnode *vp;
390 struct mount *mp;
391{
392
393 simple_lock(&mntvnode_slock);
394 /*
395 * Delete from old mount point vnode list, if on one.
396 */
397 if (vp->v_mount != NULL) {
398 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
399 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
400 panic("insmntque: not on queue");
401 LIST_REMOVE(vp, v_mntvnodes);
402 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
403 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
404 }
405 /*
406 * Insert into list of vnodes for the new mount point, if available.
407 */
408 if ((vp->v_mount = mp) != NULL)
409 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
410 simple_unlock(&mntvnode_slock);
411}
412
413/*
414 * Update outstanding I/O count and do wakeup if requested.
415 */
416void
417vwakeup(bp)
418 register struct buf *bp;
419{
420 register struct vnode *vp;
421
422 bp->b_flags &= ~B_WRITEINPROG;
423 if (vp = bp->b_vp) {
424 if (--vp->v_numoutput < 0)
425 panic("vwakeup: neg numoutput");
426 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
427 if (vp->v_numoutput < 0)
428 panic("vwakeup: neg numoutput 2");
429 vp->v_flag &= ~VBWAIT;
430 wakeup((caddr_t)&vp->v_numoutput);
431 }
432 }
433}
434
435/*
436 * Flush out and invalidate all buffers associated with a vnode.
437 * Called with the underlying object locked.
438 */
439int
440vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
441 register struct vnode *vp;
442 int flags;
443 struct ucred *cred;
444 struct proc *p;
445 int slpflag, slptimeo;
446{
447 register struct buf *bp;
448 struct buf *nbp, *blist;
449 int s, error;
450
451 if (flags & V_SAVE) {
452 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
453 return (error);
454 if (vp->v_dirtyblkhd.lh_first != NULL)
455 panic("vinvalbuf: dirty bufs");
456 }
457 for (;;) {
458 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
459 while (blist && blist->b_lblkno < 0)
460 blist = blist->b_vnbufs.le_next;
461 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
462 (flags & V_SAVEMETA))
463 while (blist && blist->b_lblkno < 0)
464 blist = blist->b_vnbufs.le_next;
465 if (!blist)
466 break;
467
468 for (bp = blist; bp; bp = nbp) {
469 nbp = bp->b_vnbufs.le_next;
470 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
471 continue;
472 s = splbio();
473 if (bp->b_flags & B_BUSY) {
474 bp->b_flags |= B_WANTED;
475 error = tsleep((caddr_t)bp,
476 slpflag | (PRIBIO + 1), "vinvalbuf",
477 slptimeo);
478 splx(s);
479 if (error)
480 return (error);
481 break;
482 }
483 bremfree(bp);
484 bp->b_flags |= B_BUSY;
485 splx(s);
486 /*
487 * XXX Since there are no node locks for NFS, I believe
488 * there is a slight chance that a delayed write will
489 * occur while sleeping just above, so check for it.
490 */
491 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
492 (void) VOP_BWRITE(bp);
493 break;
494 }
495 bp->b_flags |= B_INVAL;
496 brelse(bp);
497 }
498 }
499 if (!(flags & V_SAVEMETA) &&
500 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
501 panic("vinvalbuf: flush failed");
502 return (0);
503}
504
505/*
506 * Associate a buffer with a vnode.
507 */
508void
509bgetvp(vp, bp)
510 register struct vnode *vp;
511 register struct buf *bp;
512{
513
514 if (bp->b_vp)
515 panic("bgetvp: not free");
516 VHOLD(vp);
517 bp->b_vp = vp;
518 if (vp->v_type == VBLK || vp->v_type == VCHR)
519 bp->b_dev = vp->v_rdev;
520 else
521 bp->b_dev = NODEV;
522 /*
523 * Insert onto list for new vnode.
524 */
525 bufinsvn(bp, &vp->v_cleanblkhd);
526}
527
528/*
529 * Disassociate a buffer from a vnode.
530 */
531void
532brelvp(bp)
533 register struct buf *bp;
534{
535 struct vnode *vp;
536
537 if (bp->b_vp == (struct vnode *) 0)
538 panic("brelvp: NULL");
539 /*
540 * Delete from old vnode list, if on one.
541 */
542 if (bp->b_vnbufs.le_next != NOLIST)
543 bufremvn(bp);
544 vp = bp->b_vp;
545 bp->b_vp = (struct vnode *) 0;
546 HOLDRELE(vp);
547}
548
549/*
550 * Reassign a buffer from one vnode to another.
551 * Used to assign file specific control information
552 * (indirect blocks) to the vnode to which they belong.
553 */
554void
555reassignbuf(bp, newvp)
556 register struct buf *bp;
557 register struct vnode *newvp;
558{
559 register struct buflists *listheadp;
560
561 if (newvp == NULL) {
562 printf("reassignbuf: NULL");
563 return;
564 }
565 /*
566 * Delete from old vnode list, if on one.
567 */
568 if (bp->b_vnbufs.le_next != NOLIST)
569 bufremvn(bp);
570 /*
571 * If dirty, put on list of dirty buffers;
572 * otherwise insert onto list of clean buffers.
573 */
574 if (bp->b_flags & B_DELWRI)
575 listheadp = &newvp->v_dirtyblkhd;
576 else
577 listheadp = &newvp->v_cleanblkhd;
578 bufinsvn(bp, listheadp);
579}
580
581/*
582 * Create a vnode for a block device.
583 * Used for root filesystem, argdev, and swap areas.
584 * Also used for memory file system special devices.
585 */
586int
587bdevvp(dev, vpp)
588 dev_t dev;
589 struct vnode **vpp;
590{
591 register struct vnode *vp;
592 struct vnode *nvp;
593 int error;
594
595 if (dev == NODEV) {
596 *vpp = NULLVP;
597 return (ENODEV);
598 }
599 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
600 if (error) {
601 *vpp = NULLVP;
602 return (error);
603 }
604 vp = nvp;
605 vp->v_type = VBLK;
606 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
607 vput(vp);
608 vp = nvp;
609 }
610 *vpp = vp;
611 return (0);
612}
613
614/*
615 * Check to see if the new vnode represents a special device
616 * for which we already have a vnode (either because of
617 * bdevvp() or because of a different vnode representing
618 * the same block device). If such an alias exists, deallocate
619 * the existing contents and return the aliased vnode. The
620 * caller is responsible for filling it with its new contents.
621 */
622struct vnode *
623checkalias(nvp, nvp_rdev, mp)
624 register struct vnode *nvp;
625 dev_t nvp_rdev;
626 struct mount *mp;
627{
628 struct proc *p = curproc; /* XXX */
629 struct vnode *vp;
630 struct vnode **vpp;
631
632 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
633 return (NULLVP);
634
635 vpp = &speclisth[SPECHASH(nvp_rdev)];
636loop:
637 simple_lock(&spechash_slock);
638 for (vp = *vpp; vp; vp = vp->v_specnext) {
639 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
640 continue;
641 /*
642 * Alias, but not in use, so flush it out.
643 */
644 simple_lock(&vp->v_interlock);
645 if (vp->v_usecount == 0) {
646 simple_unlock(&spechash_slock);
647 vgonel(vp, p);
648 goto loop;
649 }
650 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
651 simple_unlock(&spechash_slock);
652 goto loop;
653 }
654 break;
655 }
656 if (vp == NULL || vp->v_tag != VT_NON) {
657 MALLOC(nvp->v_specinfo, struct specinfo *,
658 sizeof(struct specinfo), M_VNODE, M_WAITOK);
659 nvp->v_rdev = nvp_rdev;
660 nvp->v_hashchain = vpp;
661 nvp->v_specnext = *vpp;
662 nvp->v_specflags = 0;
663 simple_unlock(&spechash_slock);
664 *vpp = nvp;
665 if (vp != NULLVP) {
666 nvp->v_flag |= VALIASED;
667 vp->v_flag |= VALIASED;
668 vput(vp);
669 }
670 return (NULLVP);
671 }
672 simple_unlock(&spechash_slock);
673 VOP_UNLOCK(vp, 0, p);
674 simple_lock(&vp->v_interlock);
675 vclean(vp, 0, p);
676 vp->v_op = nvp->v_op;
677 vp->v_tag = nvp->v_tag;
678 nvp->v_type = VNON;
679 insmntque(vp, mp);
680 return (vp);
681}
682
683/*
684 * Grab a particular vnode from the free list, increment its
685 * reference count and lock it. The vnode lock bit is set the
686 * vnode is being eliminated in vgone. The process is awakened
687 * when the transition is completed, and an error returned to
688 * indicate that the vnode is no longer usable (possibly having
689 * been changed to a new file system type).
690 */
691int
692vget(vp, flags, p)
693 struct vnode *vp;
694 int flags;
695 struct proc *p;
696{
697
698 /*
699 * If the vnode is in the process of being cleaned out for
700 * another use, we wait for the cleaning to finish and then
701 * return failure. Cleaning is determined by checking that
702 * the VXLOCK flag is set.
703 */
704 if ((flags & LK_INTERLOCK) == 0)
705 simple_lock(&vp->v_interlock);
706 if (vp->v_flag & VXLOCK) {
707 vp->v_flag |= VXWANT;
708 simple_unlock(&vp->v_interlock);
709 tsleep((caddr_t)vp, PINOD, "vget", 0);
710 return (ENOENT);
711 }
712 if (vp->v_usecount == 0) {
713 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
714 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
715 panic("vget: not on queue");
716 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
717 simple_unlock(&vnode_free_list_slock);
718 }
719 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
720 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
721 }
722 vp->v_usecount++;
723 if (flags & LK_TYPE_MASK)
724 return (vn_lock(vp, flags | LK_INTERLOCK, p));
725 simple_unlock(&vp->v_interlock);
726 if (printcnt-- > 0) vprint("vget got", vp);
727 return (0);
728}
729
730int bug_refs = 0;
731
732/*
733 * Stubs to use when there is no locking to be done on the underlying object.
734 *
735 * Getting a lock just clears the interlock if necessary.
736 */
737int
738vop_nolock(ap)
739 struct vop_lock_args /* {
740 struct vnode *a_vp;
741 int a_flags;
742 struct proc *a_p;
743 } */ *ap;
744{
745 struct vnode *vp = ap->a_vp;
746
747 /*
748 * Since we are not using the lock manager, we must clear
749 * the interlock here.
750 */
751 if (ap->a_flags & LK_INTERLOCK)
752 simple_unlock(&vp->v_interlock);
753 return (0);
754}
755
756/*
757 * Unlock has nothing to do.
758 */
759int
760vop_nounlock(ap)
761 struct vop_unlock_args /* {
762 struct vnode *a_vp;
763 int a_flags;
764 struct proc *a_p;
765 } */ *ap;
766{
767
768 return (0);
769}
770
771/*
772 * Nothing is ever locked.
773 */
774int
775vop_noislocked(ap)
776 struct vop_islocked_args /* {
777 struct vnode *a_vp;
778 } */ *ap;
779{
780
781 return (0);
782}
783
784/*
785 * Vnode reference.
786 */
787void
788vref(vp)
789 struct vnode *vp;
790{
791
792 simple_lock(&vp->v_interlock);
793 if (vp->v_usecount <= 0)
794 panic("vref used where vget required");
795 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
796 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
797 panic("vref: not free");
798 vp->v_usecount++;
799 simple_unlock(&vp->v_interlock);
800 if (printcnt-- > 0) vprint("vref get", vp);
801 if (vp->v_type != VBLK && curproc)
802 curproc->p_spare[0]++;
803 if (bug_refs)
804 vprint("vref: ");
805}
806
807/*
808 * vput(), just unlock and vrele()
809 */
810void
811vput(vp)
812 struct vnode *vp;
813{
814 struct proc *p = curproc; /* XXX */
815
816#ifdef DIGANOSTIC
817 if (vp == NULL)
818 panic("vput: null vp");
819#endif
820 simple_lock(&vp->v_interlock);
821 vp->v_usecount--;
822 if (vp->v_usecount > 0) {
823 simple_unlock(&vp->v_interlock);
824 VOP_UNLOCK(vp, 0, p);
825 return;
826 }
827#ifdef DIAGNOSTIC
828 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
829 vprint("vput: bad ref count", vp);
830 panic("vput: ref cnt");
831 }
832#endif
833 /*
834 * insert at tail of LRU list
835 */
836 simple_lock(&vnode_free_list_slock);
837 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
838 simple_unlock(&vnode_free_list_slock);
839 simple_unlock(&vp->v_interlock);
840 VOP_INACTIVE(vp, p);
841}
842
843/*
844 * Vnode release.
845 * If count drops to zero, call inactive routine and return to freelist.
846 */
847void
848vrele(vp)
849 struct vnode *vp;
850{
851 struct proc *p = curproc; /* XXX */
852
853#ifdef DIAGNOSTIC
854 if (vp == NULL)
855 panic("vrele: null vp");
856#endif
857 simple_lock(&vp->v_interlock);
858 vp->v_usecount--;
859 if (printcnt-- > 0) vprint("vrele put", vp);
860 if (vp->v_type != VBLK && curproc)
861 curproc->p_spare[0]--;
862 if (bug_refs)
863 vprint("vref: ");
864 if (vp->v_usecount > 0) {
865 simple_unlock(&vp->v_interlock);
866 return;
867 }
868#ifdef DIAGNOSTIC
869 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
870 vprint("vrele: bad ref count", vp);
871 panic("vrele: ref cnt");
872 }
873#endif
874 /*
875 * insert at tail of LRU list
876 */
877 simple_lock(&vnode_free_list_slock);
878 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
879 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
880 panic("vrele: not free");
881 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
882 simple_unlock(&vnode_free_list_slock);
883 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
884 VOP_INACTIVE(vp, p);
885}
886
887#ifdef DIAGNOSTIC
888/*
889 * Page or buffer structure gets a reference.
890 */
891void
892vhold(vp)
893 register struct vnode *vp;
894{
895
896 simple_lock(&vp->v_interlock);
897 vp->v_holdcnt++;
898 simple_unlock(&vp->v_interlock);
899}
900
901/*
902 * Page or buffer structure frees a reference.
903 */
904void
905holdrele(vp)
906 register struct vnode *vp;
907{
908
909 simple_lock(&vp->v_interlock);
910 if (vp->v_holdcnt <= 0)
911 panic("holdrele: holdcnt");
912 vp->v_holdcnt--;
913 simple_unlock(&vp->v_interlock);
914}
915#endif /* DIAGNOSTIC */
916
917/*
918 * Remove any vnodes in the vnode table belonging to mount point mp.
919 *
920 * If MNT_NOFORCE is specified, there should not be any active ones,
921 * return error if any are found (nb: this is a user error, not a
922 * system error). If MNT_FORCE is specified, detach any active vnodes
923 * that are found.
924 */
925#ifdef DIAGNOSTIC
926int busyprt = 0; /* print out busy vnodes */
927struct ctldebug debug1 = { "busyprt", &busyprt };
928#endif
929
930int
931vflush(mp, skipvp, flags)
932 struct mount *mp;
933 struct vnode *skipvp;
934 int flags;
935{
936 struct proc *p = curproc; /* XXX */
937 struct vnode *vp, *nvp;
938 int busy = 0;
939
940#ifdef DIAGNOSTIC
941 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
942 panic("vflush: not busy");
943#endif
944
945 simple_lock(&mntvnode_slock);
946loop:
947 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
948 if (vp->v_mount != mp)
949 goto loop;
950 nvp = vp->v_mntvnodes.le_next;
951 /*
952 * Skip over a selected vnode.
953 */
954 if (vp == skipvp)
955 continue;
956
957 simple_lock(&vp->v_interlock);
958 /*
959 * Skip over a vnodes marked VSYSTEM.
960 */
961 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
962 simple_unlock(&vp->v_interlock);
963 continue;
964 }
965 /*
966 * If WRITECLOSE is set, only flush out regular file
967 * vnodes open for writing.
968 */
969 if ((flags & WRITECLOSE) &&
970 (vp->v_writecount == 0 || vp->v_type != VREG)) {
971 simple_unlock(&vp->v_interlock);
972 continue;
973 }
974 /*
975 * With v_usecount == 0, all we need to do is clear
976 * out the vnode data structures and we are done.
977 */
978 if (vp->v_usecount == 0) {
979 simple_unlock(&mntvnode_slock);
980 vgonel(vp, p);
981 simple_lock(&mntvnode_slock);
982 continue;
983 }
984 /*
985 * If FORCECLOSE is set, forcibly close the vnode.
986 * For block or character devices, revert to an
987 * anonymous device. For all other files, just kill them.
988 */
989 if (flags & FORCECLOSE) {
990 simple_unlock(&mntvnode_slock);
991 if (vp->v_type != VBLK && vp->v_type != VCHR) {
992 vgonel(vp, p);
993 } else {
994 vclean(vp, 0, p);
995 vp->v_op = spec_vnodeop_p;
996 insmntque(vp, (struct mount *)0);
997 }
998 simple_lock(&mntvnode_slock);
999 continue;
1000 }
1001#ifdef DIAGNOSTIC
1002 if (busyprt)
1003 vprint("vflush: busy vnode", vp);
1004#endif
1005 simple_unlock(&vp->v_interlock);
1006 busy++;
1007 }
1008 simple_unlock(&mntvnode_slock);
1009 if (busy)
1010 return (EBUSY);
1011 return (0);
1012}
1013
1014/*
1015 * Disassociate the underlying file system from a vnode.
1016 * The vnode interlock is held on entry.
1017 */
1018static void
1019vclean(vp, flags, p)
1020 struct vnode *vp;
1021 int flags;
1022 struct proc *p;
1023{
1024 int active;
1025
1026 /*
1027 * Check to see if the vnode is in use.
1028 * If so we have to reference it before we clean it out
1029 * so that its count cannot fall to zero and generate a
1030 * race against ourselves to recycle it.
1031 */
1032 if (active = vp->v_usecount)
1033 vp->v_usecount++;
1034 /*
1035 * Prevent the vnode from being recycled or
1036 * brought into use while we clean it out.
1037 */
1038 if (vp->v_flag & VXLOCK)
1039 panic("vclean: deadlock");
1040 vp->v_flag |= VXLOCK;
1041 /*
1042 * Even if the count is zero, the VOP_INACTIVE routine may still
1043 * have the object locked while it cleans it out. The VOP_LOCK
1044 * ensures that the VOP_INACTIVE routine is done with its work.
1045 * For active vnodes, it ensures that no other activity can
1046 * occur while the underlying object is being cleaned out.
1047 */
1048 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1049 /*
1050 * Clean out any buffers associated with the vnode.
1051 */
1052 if (flags & DOCLOSE)
1053 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
1054 /*
1055 * If purging an active vnode, it must be closed and
1056 * deactivated before being reclaimed. Note that the
1057 * VOP_INACTIVE will unlock the vnode.
1058 */
1059 if (active) {
1060 if (flags & DOCLOSE)
1061 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1062 VOP_INACTIVE(vp, p);
1063 } else {
1064 /*
1065 * Any other processes trying to obtain this lock must first
1066 * wait for VXLOCK to clear, then call the new lock operation.
1067 */
1068 VOP_UNLOCK(vp, 0, p);
1069 }
1070 /*
1071 * Reclaim the vnode.
1072 */
1073 if (VOP_RECLAIM(vp, p))
1074 panic("vclean: cannot reclaim");
1075 if (active)
1076 vrele(vp);
1077 cache_purge(vp);
1078
1079 /*
1080 * Done with purge, notify sleepers of the grim news.
1081 */
1082 vp->v_op = dead_vnodeop_p;
1083 vp->v_tag = VT_NON;
1084 vp->v_flag &= ~VXLOCK;
1085 if (vp->v_flag & VXWANT) {
1086 vp->v_flag &= ~VXWANT;
1087 wakeup((caddr_t)vp);
1088 }
1089}
1090
1091/*
1092 * Eliminate all activity associated with the requested vnode
1093 * and with all vnodes aliased to the requested vnode.
1094 */
1095int
1096vop_revoke(ap)
1097 struct vop_revoke_args /* {
1098 struct vnode *a_vp;
1099 int a_flags;
1100 } */ *ap;
1101{
1102 struct vnode *vp, *vq;
1103 struct proc *p = curproc; /* XXX */
1104
1105#ifdef DIAGNOSTIC
1106 if ((ap->a_flags & REVOKEALL) == 0)
1107 panic("vop_revoke");
1108#endif
1109
1110 vp = ap->a_vp;
1111 simple_lock(&vp->v_interlock);
1112
1113 if (vp->v_flag & VALIASED) {
1114 /*
1115 * If a vgone (or vclean) is already in progress,
1116 * wait until it is done and return.
1117 */
1118 if (vp->v_flag & VXLOCK) {
1119 vp->v_flag |= VXWANT;
1120 simple_unlock(&vp->v_interlock);
1121 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1122 return (0);
1123 }
1124 /*
1125 * Ensure that vp will not be vgone'd while we
1126 * are eliminating its aliases.
1127 */
1128 vp->v_flag |= VXLOCK;
1129 simple_unlock(&vp->v_interlock);
1130 while (vp->v_flag & VALIASED) {
1131 simple_lock(&spechash_slock);
1132 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1133 if (vq->v_rdev != vp->v_rdev ||
1134 vq->v_type != vp->v_type || vp == vq)
1135 continue;
1136 simple_unlock(&spechash_slock);
1137 vgone(vq);
1138 break;
1139 }
1140 if (vq == NULLVP)
1141 simple_unlock(&spechash_slock);
1142 }
1143 /*
1144 * Remove the lock so that vgone below will
1145 * really eliminate the vnode after which time
1146 * vgone will awaken any sleepers.
1147 */
1148 simple_lock(&vp->v_interlock);
1149 vp->v_flag &= ~VXLOCK;
1150 }
1151 vgonel(vp, p);
1152 return (0);
1153}
1154
1155/*
1156 * Recycle an unused vnode to the front of the free list.
1157 * Release the passed interlock if the vnode will be recycled.
1158 */
1159int
1160vrecycle(vp, inter_lkp, p)
1161 struct vnode *vp;
1162 struct simplelock *inter_lkp;
1163 struct proc *p;
1164{
1165
1166 simple_lock(&vp->v_interlock);
1167 if (vp->v_usecount == 0) {
1168 if (inter_lkp)
1169 simple_unlock(inter_lkp);
1170 vgonel(vp, p);
1171 return (1);
1172 }
1173 simple_unlock(&vp->v_interlock);
1174 return (0);
1175}
1176
1177/*
1178 * Eliminate all activity associated with a vnode
1179 * in preparation for reuse.
1180 */
1181void
1182vgone(vp)
1183 struct vnode *vp;
1184{
1185 struct proc *p = curproc; /* XXX */
1186
1187 simple_lock(&vp->v_interlock);
1188 vgonel(vp, p);
1189}
1190
1191/*
1192 * vgone, with the vp interlock held.
1193 */
1194void
1195vgonel(vp, p)
1196 struct vnode *vp;
1197 struct proc *p;
1198{
1199 struct vnode *vq;
1200 struct vnode *vx;
1201
1202 /*
1203 * If a vgone (or vclean) is already in progress,
1204 * wait until it is done and return.
1205 */
1206 if (vp->v_flag & VXLOCK) {
1207 vp->v_flag |= VXWANT;
1208 simple_unlock(&vp->v_interlock);
1209 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1210 return;
1211 }
1212 /*
1213 * Clean out the filesystem specific data.
1214 */
1215 vclean(vp, DOCLOSE, p);
1216 /*
1217 * Delete from old mount point vnode list, if on one.
1218 */
1219 if (vp->v_mount != NULL)
1220 insmntque(vp, (struct mount *)0);
1221 /*
1222 * If special device, remove it from special device alias list
1223 * if it is on one.
1224 */
1225 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1226 simple_lock(&spechash_slock);
1227 if (*vp->v_hashchain == vp) {
1228 *vp->v_hashchain = vp->v_specnext;
1229 } else {
1230 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1231 if (vq->v_specnext != vp)
1232 continue;
1233 vq->v_specnext = vp->v_specnext;
1234 break;
1235 }
1236 if (vq == NULL)
1237 panic("missing bdev");
1238 }
1239 if (vp->v_flag & VALIASED) {
1240 vx = NULL;
1241 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1242 if (vq->v_rdev != vp->v_rdev ||
1243 vq->v_type != vp->v_type)
1244 continue;
1245 if (vx)
1246 break;
1247 vx = vq;
1248 }
1249 if (vx == NULL)
1250 panic("missing alias");
1251 if (vq == NULL)
1252 vx->v_flag &= ~VALIASED;
1253 vp->v_flag &= ~VALIASED;
1254 }
1255 simple_unlock(&spechash_slock);
1256 FREE(vp->v_specinfo, M_VNODE);
1257 vp->v_specinfo = NULL;
1258 }
1259 /*
1260 * If it is on the freelist and not already at the head,
1261 * move it to the head of the list. The test of the back
1262 * pointer and the reference count of zero is because
1263 * it will be removed from the free list by getnewvnode,
1264 * but will not have its reference count incremented until
1265 * after calling vgone. If the reference count were
1266 * incremented first, vgone would (incorrectly) try to
1267 * close the previous instance of the underlying object.
1268 * So, the back pointer is explicitly set to `0xdeadb' in
1269 * getnewvnode after removing it from the freelist to ensure
1270 * that we do not try to move it here.
1271 */
1272 if (vp->v_usecount == 0) {
1273 simple_lock(&vnode_free_list_slock);
1274 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1275 vnode_free_list.tqh_first != vp) {
1276 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1277 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1278 }
1279 simple_unlock(&vnode_free_list_slock);
1280 }
1281 vp->v_type = VBAD;
1282}
1283
1284/*
1285 * Lookup a vnode by device number.
1286 */
1287int
1288vfinddev(dev, type, vpp)
1289 dev_t dev;
1290 enum vtype type;
1291 struct vnode **vpp;
1292{
1293 struct vnode *vp;
1294 int rc = 0;
1295
1296 simple_lock(&spechash_slock);
1297 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1298 if (dev != vp->v_rdev || type != vp->v_type)
1299 continue;
1300 *vpp = vp;
1301 rc = 1;
1302 break;
1303 }
1304 simple_unlock(&spechash_slock);
1305 return (rc);
1306}
1307
1308/*
1309 * Calculate the total number of references to a special device.
1310 */
1311int
1312vcount(vp)
1313 struct vnode *vp;
1314{
1315 struct vnode *vq, *vnext;
1316 int count;
1317
1318loop:
1319 if ((vp->v_flag & VALIASED) == 0)
1320 return (vp->v_usecount);
1321 simple_lock(&spechash_slock);
1322 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1323 vnext = vq->v_specnext;
1324 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1325 continue;
1326 /*
1327 * Alias, but not in use, so flush it out.
1328 */
1329 if (vq->v_usecount == 0 && vq != vp) {
1330 simple_unlock(&spechash_slock);
1331 vgone(vq);
1332 goto loop;
1333 }
1334 count += vq->v_usecount;
1335 }
1336 simple_unlock(&spechash_slock);
1337 return (count);
1338}
1339
1340/*
1341 * Print out a description of a vnode.
1342 */
1343static char *typename[] =
1344 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1345
1346void
1347vprint(label, vp)
1348 char *label;
1349 register struct vnode *vp;
1350{
1351 char buf[64];
1352
1353 if (label != NULL)
1354 printf("%s: ", label);
1355 printf("num %d ", vp->v_spare[0]);
1356 printf("type %s, usecount %d, writecount %d, refcount %d,",
1357 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1358 vp->v_holdcnt);
1359 buf[0] = '\0';
1360 if (vp->v_flag & VROOT)
1361 strcat(buf, "|VROOT");
1362 if (vp->v_flag & VTEXT)
1363 strcat(buf, "|VTEXT");
1364 if (vp->v_flag & VSYSTEM)
1365 strcat(buf, "|VSYSTEM");
1366 if (vp->v_flag & VXLOCK)
1367 strcat(buf, "|VXLOCK");
1368 if (vp->v_flag & VXWANT)
1369 strcat(buf, "|VXWANT");
1370 if (vp->v_flag & VBWAIT)
1371 strcat(buf, "|VBWAIT");
1372 if (vp->v_flag & VALIASED)
1373 strcat(buf, "|VALIASED");
1374 if (buf[0] != '\0')
1375 printf(" flags (%s)", &buf[1]);
1376 if (vp->v_data == NULL) {
1377 printf("\n");
1378 } else {
1379 printf("\n\t");
1380 VOP_PRINT(vp);
1381 }
1382}
1383
1384#ifdef DEBUG
1385/*
1386 * List all of the locked vnodes in the system.
1387 * Called when debugging the kernel.
1388 */
1389void
1390printlockedvnodes()
1391{
1392 register struct mount *mp;
1393 register struct vnode *vp;
1394
1395 printf("Locked vnodes\n");
1396 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1397 mp = mp->mnt_list.cqe_next) {
1398 for (vp = mp->mnt_vnodelist.lh_first;
1399 vp != NULL;
1400 vp = vp->v_mntvnodes.le_next) {
1401 if (VOP_ISLOCKED(vp))
1402 vprint((char *)0, vp);
1403 }
1404 }
1405}
1406#endif
1407
1408/*
1409 * Top level filesystem related information gathering.
1410 */
1411int
1412vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1413 int *name;
1414 u_int namelen;
1415 void *oldp;
1416 size_t *oldlenp;
1417 void *newp;
1418 size_t newlen;
1419 struct proc *p;
1420{
1421 struct ctldebug *cdp;
1422 struct vfsconf *vfsp;
1423
1424 /* all sysctl names at this level are at least name and field */
1425 if (namelen < 2)
1426 return (ENOTDIR); /* overloaded */
1427 if (name[0] != VFS_GENERIC) {
1428 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1429 if (vfsp->vfc_typenum == name[0])
1430 break;
1431 if (vfsp == NULL)
1432 return (EOPNOTSUPP);
1433 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1434 oldp, oldlenp, newp, newlen, p));
1435 }
1436 switch (name[1]) {
1437 case VFS_MAXTYPENUM:
1438 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1439 case VFS_CONF:
1440 if (namelen < 3)
1441 return (ENOTDIR); /* overloaded */
1442 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1443 if (vfsp->vfc_typenum == name[2])
1444 break;
1445 if (vfsp == NULL)
1446 return (EOPNOTSUPP);
1447 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1448 sizeof(struct vfsconf)));
1449 }
1450 return (EOPNOTSUPP);
1451}
1452
1453int kinfo_vdebug = 1;
1454int kinfo_vgetfailed;
1455#define KINFO_VNODESLOP 10
1456/*
1457 * Dump vnode list (via sysctl).
1458 * Copyout address of vnode followed by vnode.
1459 */
1460/* ARGSUSED */
1461int
1462sysctl_vnode(where, sizep)
1463 char *where;
1464 size_t *sizep;
1465{
1466 register struct mount *mp, *nmp;
1467 struct vnode *nvp, *vp;
1468 register char *bp = where, *savebp;
1469 char *ewhere;
1470 int error;
1471
1472#define VPTRSZ sizeof (struct vnode *)
1473#define VNODESZ sizeof (struct vnode)
1474 if (where == NULL) {
1475 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1476 return (0);
1477 }
1478 ewhere = where + *sizep;
1479
1480 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1481 nmp = mp->mnt_list.cqe_next;
1482 if (vfs_busy(mp))
1483 continue;
1484 savebp = bp;
1485again:
1486 simple_lock(&mntvnode_slock);
1487 for (vp = mp->mnt_vnodelist.lh_first;
1488 vp != NULL;
1489 vp = nvp) {
1490 /*
1491 * Check that the vp is still associated with
1492 * this filesystem. RACE: could have been
1493 * recycled onto the same filesystem.
1494 */
1495 if (vp->v_mount != mp) {
1496 simple_unlock(&mntvnode_slock);
1497 if (kinfo_vdebug)
1498 printf("kinfo: vp changed\n");
1499 bp = savebp;
1500 goto again;
1501 }
1502 nvp = vp->v_mntvnodes.le_next;
1503 if (bp + VPTRSZ + VNODESZ > ewhere) {
1504 simple_unlock(&mntvnode_slock);
1505 *sizep = bp - where;
1506 return (ENOMEM);
1507 }
1508 simple_unlock(&mntvnode_slock);
1509 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1510 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1511 return (error);
1512 bp += VPTRSZ + VNODESZ;
1513 simple_lock(&mntvnode_slock);
1514 }
1515 simple_unlock(&mntvnode_slock);
1516 vfs_unbusy(mp);
1517 }
1518
1519 *sizep = bp - where;
1520 return (0);
1521}
1522
1523/*
1524 * Check to see if a filesystem is mounted on a block device.
1525 */
1526int
1527vfs_mountedon(vp)
1528 struct vnode *vp;
1529{
1530 struct vnode *vq;
1531 int error = 0;
1532
1533 if (vp->v_specflags & SI_MOUNTEDON)
1534 return (EBUSY);
1535 if (vp->v_flag & VALIASED) {
1536 simple_lock(&spechash_slock);
1537 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1538 if (vq->v_rdev != vp->v_rdev ||
1539 vq->v_type != vp->v_type)
1540 continue;
1541 if (vq->v_specflags & SI_MOUNTEDON) {
1542 error = EBUSY;
1543 break;
1544 }
1545 }
1546 simple_unlock(&spechash_slock);
1547 }
1548 return (error);
1549}
1550
1551/*
1552 * Unmount all filesystems. The list is traversed in reverse order
1553 * of mounting to avoid dependencies.
1554 */
1555void
1556vfs_unmountall()
1557{
1558 struct mount *mp, *nmp;
1559
1560 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1561 nmp = mp->mnt_list.cqe_prev;
1562 (void) dounmount(mp, MNT_FORCE, &proc0);
1563 }
1564}
1565
1566/*
1567 * Build hash lists of net addresses and hang them off the mount point.
1568 * Called by ufs_mount() to set up the lists of export addresses.
1569 */
1570static int
1571vfs_hang_addrlist(mp, nep, argp)
1572 struct mount *mp;
1573 struct netexport *nep;
1574 struct export_args *argp;
1575{
1576 register struct netcred *np;
1577 register struct radix_node_head *rnh;
1578 register int i;
1579 struct radix_node *rn;
1580 struct sockaddr *saddr, *smask = 0;
1581 struct domain *dom;
1582 int error;
1583
1584 if (argp->ex_addrlen == 0) {
1585 if (mp->mnt_flag & MNT_DEFEXPORTED)
1586 return (EPERM);
1587 np = &nep->ne_defexported;
1588 np->netc_exflags = argp->ex_flags;
1589 np->netc_anon = argp->ex_anon;
1590 np->netc_anon.cr_ref = 1;
1591 mp->mnt_flag |= MNT_DEFEXPORTED;
1592 return (0);
1593 }
1594 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1595 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1596 bzero((caddr_t)np, i);
1597 saddr = (struct sockaddr *)(np + 1);
1598 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1599 goto out;
1600 if (saddr->sa_len > argp->ex_addrlen)
1601 saddr->sa_len = argp->ex_addrlen;
1602 if (argp->ex_masklen) {
1603 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1604 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1605 if (error)
1606 goto out;
1607 if (smask->sa_len > argp->ex_masklen)
1608 smask->sa_len = argp->ex_masklen;
1609 }
1610 i = saddr->sa_family;
1611 if ((rnh = nep->ne_rtable[i]) == 0) {
1612 /*
1613 * Seems silly to initialize every AF when most are not
1614 * used, do so on demand here
1615 */
1616 for (dom = domains; dom; dom = dom->dom_next)
1617 if (dom->dom_family == i && dom->dom_rtattach) {
1618 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1619 dom->dom_rtoffset);
1620 break;
1621 }
1622 if ((rnh = nep->ne_rtable[i]) == 0) {
1623 error = ENOBUFS;
1624 goto out;
1625 }
1626 }
1627 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1628 np->netc_rnodes);
1629 if (rn == 0) {
1630 /*
1631 * One of the reasons that rnh_addaddr may fail is that
1632 * the entry already exists. To check for this case, we
1633 * look up the entry to see if it is there. If so, we
1634 * do not need to make a new entry but do return success.
1635 */
1636 free(np, M_NETADDR);
1637 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
1638 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
1639 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
1640 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
1641 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
1642 return (0);
1643 return (EPERM);
1644 }
1645 np->netc_exflags = argp->ex_flags;
1646 np->netc_anon = argp->ex_anon;
1647 np->netc_anon.cr_ref = 1;
1648 return (0);
1649out:
1650 free(np, M_NETADDR);
1651 return (error);
1652}
1653
1654/* ARGSUSED */
1655static int
1656vfs_free_netcred(rn, w)
1657 struct radix_node *rn;
1658 caddr_t w;
1659{
1660 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1661
1662 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1663 free((caddr_t)rn, M_NETADDR);
1664 return (0);
1665}
1666
1667/*
1668 * Free the net address hash lists that are hanging off the mount points.
1669 */
1670static void
1671vfs_free_addrlist(nep)
1672 struct netexport *nep;
1673{
1674 register int i;
1675 register struct radix_node_head *rnh;
1676
1677 for (i = 0; i <= AF_MAX; i++)
1678 if (rnh = nep->ne_rtable[i]) {
1679 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1680 (caddr_t)rnh);
1681 free((caddr_t)rnh, M_RTABLE);
1682 nep->ne_rtable[i] = 0;
1683 }
1684}
1685
1686int
1687vfs_export(mp, nep, argp)
1688 struct mount *mp;
1689 struct netexport *nep;
1690 struct export_args *argp;
1691{
1692 int error;
1693
1694 if (argp->ex_flags & MNT_DELEXPORT) {
1695 vfs_free_addrlist(nep);
1696 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1697 }
1698 if (argp->ex_flags & MNT_EXPORTED) {
1699 if (error = vfs_hang_addrlist(mp, nep, argp))
1700 return (error);
1701 mp->mnt_flag |= MNT_EXPORTED;
1702 }
1703 return (0);
1704}
1705
1706struct netcred *
1707vfs_export_lookup(mp, nep, nam)
1708 register struct mount *mp;
1709 struct netexport *nep;
1710 struct mbuf *nam;
1711{
1712 register struct netcred *np;
1713 register struct radix_node_head *rnh;
1714 struct sockaddr *saddr;
1715
1716 np = NULL;
1717 if (mp->mnt_flag & MNT_EXPORTED) {
1718 /*
1719 * Lookup in the export list first.
1720 */
1721 if (nam != NULL) {
1722 saddr = mtod(nam, struct sockaddr *);
1723 rnh = nep->ne_rtable[saddr->sa_family];
1724 if (rnh != NULL) {
1725 np = (struct netcred *)
1726 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1727 rnh);
1728 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1729 np = NULL;
1730 }
1731 }
1732 /*
1733 * If no address match, use the default if it exists.
1734 */
1735 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1736 np = &nep->ne_defexported;
1737 }
1738 return (np);
1739}