add information about CTL_VFS
[unix-history] / usr / src / sys / kern / vfs_subr.c
CommitLineData
3c4390e8 1/*
ec54f0cc
KB
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
adb35f79
KB
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
3c4390e8 9 *
dbf0c423 10 * %sccs.include.redist.c%
3c4390e8 11 *
b398c713 12 * @(#)vfs_subr.c 8.20 (Berkeley) %G%
3c4390e8
KM
13 */
14
15/*
16 * External virtual filesystem routines
17 */
18
cb796a23 19#include <sys/param.h>
917dc539 20#include <sys/systm.h>
cb796a23
KB
21#include <sys/proc.h>
22#include <sys/mount.h>
23#include <sys/time.h>
24#include <sys/vnode.h>
807cc430 25#include <sys/stat.h>
cb796a23
KB
26#include <sys/namei.h>
27#include <sys/ucred.h>
28#include <sys/buf.h>
29#include <sys/errno.h>
30#include <sys/malloc.h>
8981e258
MH
31#include <sys/domain.h>
32#include <sys/mbuf.h>
3c4390e8 33
bb4964fd
KM
34#include <vm/vm.h>
35#include <sys/sysctl.h>
36
021de758
JSP
37#include <miscfs/specfs/specdev.h>
38
807cc430
KM
39enum vtype iftovt_tab[16] = {
40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
42};
43int vttoif_tab[9] = {
44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
45 S_IFSOCK, S_IFIFO, S_IFMT,
46};
47
e3249ec0
KM
48/*
49 * Insq/Remq for the vnode usage lists.
50 */
3fc2ac18 51#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
05560902
CD
52#define bufremvn(bp) { \
53 LIST_REMOVE(bp, b_vnbufs); \
54 (bp)->b_vnbufs.le_next = NOLIST; \
3fc2ac18 55}
3fc2ac18
KM
56TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
57struct mntlist mountlist; /* mounted filesystem list */
e3249ec0 58
3c4390e8 59/*
3fc2ac18 60 * Initialize the vnode management data structures.
3c4390e8 61 */
05560902 62void
3fc2ac18 63vntblinit()
3c4390e8
KM
64{
65
3fc2ac18
KM
66 TAILQ_INIT(&vnode_free_list);
67 TAILQ_INIT(&mountlist);
3c4390e8
KM
68}
69
70/*
71 * Lock a filesystem.
72 * Used to prevent access to it while mounting and unmounting.
73 */
05560902 74int
3c4390e8
KM
75vfs_lock(mp)
76 register struct mount *mp;
77{
78
05560902 79 while (mp->mnt_flag & MNT_MLOCK) {
54fb9dc2 80 mp->mnt_flag |= MNT_MWAIT;
05560902 81 tsleep((caddr_t)mp, PVFS, "vfslock", 0);
594501df 82 }
54fb9dc2 83 mp->mnt_flag |= MNT_MLOCK;
3c4390e8
KM
84 return (0);
85}
86
87/*
88 * Unlock a locked filesystem.
89 * Panic if filesystem is not locked.
90 */
91void
92vfs_unlock(mp)
93 register struct mount *mp;
94{
95
54fb9dc2 96 if ((mp->mnt_flag & MNT_MLOCK) == 0)
36ef03ec 97 panic("vfs_unlock: not locked");
54fb9dc2
KM
98 mp->mnt_flag &= ~MNT_MLOCK;
99 if (mp->mnt_flag & MNT_MWAIT) {
100 mp->mnt_flag &= ~MNT_MWAIT;
3c4390e8
KM
101 wakeup((caddr_t)mp);
102 }
103}
104
36ef03ec
KM
105/*
106 * Mark a mount point as busy.
107 * Used to synchronize access and to delay unmounting.
108 */
05560902 109int
36ef03ec
KM
110vfs_busy(mp)
111 register struct mount *mp;
112{
113
05560902 114 while (mp->mnt_flag & MNT_MPBUSY) {
54fb9dc2 115 mp->mnt_flag |= MNT_MPWANT;
05560902 116 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0);
36ef03ec 117 }
d8b63609
KM
118 if (mp->mnt_flag & MNT_UNMOUNT)
119 return (1);
54fb9dc2 120 mp->mnt_flag |= MNT_MPBUSY;
36ef03ec
KM
121 return (0);
122}
123
124/*
125 * Free a busy filesystem.
126 * Panic if filesystem is not busy.
127 */
05560902 128void
36ef03ec
KM
129vfs_unbusy(mp)
130 register struct mount *mp;
131{
132
54fb9dc2 133 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
36ef03ec 134 panic("vfs_unbusy: not busy");
54fb9dc2
KM
135 mp->mnt_flag &= ~MNT_MPBUSY;
136 if (mp->mnt_flag & MNT_MPWANT) {
137 mp->mnt_flag &= ~MNT_MPWANT;
138 wakeup((caddr_t)&mp->mnt_flag);
36ef03ec
KM
139 }
140}
141
3c4390e8
KM
142/*
143 * Lookup a mount point by filesystem identifier.
144 */
145struct mount *
597259be 146vfs_getvfs(fsid)
3c4390e8
KM
147 fsid_t *fsid;
148{
149 register struct mount *mp;
150
3fc2ac18 151 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
54fb9dc2 152 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
3fc2ac18 153 mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
d713f801 154 return (mp);
3fc2ac18 155 }
d713f801 156 return ((struct mount *)0);
3c4390e8
KM
157}
158
917dc539
JSP
159/*
160 * Get a new unique fsid
161 */
162void
597259be 163vfs_getnewfsid(mp)
917dc539 164 struct mount *mp;
917dc539
JSP
165{
166static u_short xxxfs_mntid;
167
168 fsid_t tfsid;
597259be 169 int mtype;
917dc539 170
597259be 171 mtype = mp->mnt_vfc->vfc_typenum;
1209b9a4 172 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
917dc539
JSP
173 mp->mnt_stat.f_fsid.val[1] = mtype;
174 if (xxxfs_mntid == 0)
175 ++xxxfs_mntid;
1209b9a4 176 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
917dc539 177 tfsid.val[1] = mtype;
3fc2ac18 178 if (mountlist.tqh_first != NULL) {
597259be 179 while (vfs_getvfs(&tfsid)) {
17fd1cc7
JSP
180 tfsid.val[0]++;
181 xxxfs_mntid++;
182 }
917dc539
JSP
183 }
184 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
185}
186
3c4390e8
KM
187/*
188 * Set vnode attributes to VNOVAL
189 */
05560902
CD
190void
191vattr_null(vap)
3c4390e8
KM
192 register struct vattr *vap;
193{
194
195 vap->va_type = VNON;
83504fd5 196 vap->va_size = vap->va_bytes = VNOVAL;
3c4390e8 197 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
83504fd5
KM
198 vap->va_fsid = vap->va_fileid =
199 vap->va_blocksize = vap->va_rdev =
ecf75a7d
KM
200 vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
201 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
202 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
8cf4d4fb 203 vap->va_flags = vap->va_gen = VNOVAL;
fcba749b 204 vap->va_vaflags = 0;
3c4390e8 205}
c60798ca 206
36d09cb1
KM
207/*
208 * Routines having to do with the management of the vnode table.
209 */
9342689a 210extern int (**dead_vnodeop_p)();
32339c94 211extern void vclean();
1a80f56e 212long numvnodes;
e781da98 213extern struct vattr va_null;
3e787e54
KM
214int newnodes = 0;
215int printcnt = 0;
36d09cb1
KM
216
217/*
218 * Return the next vnode from the free list.
219 */
05560902 220int
36d09cb1
KM
221getnewvnode(tag, mp, vops, vpp)
222 enum vtagtype tag;
223 struct mount *mp;
cf74dd57 224 int (**vops)();
36d09cb1
KM
225 struct vnode **vpp;
226{
c768e50f 227 register struct vnode *vp;
1f9d2249 228 int s;
36d09cb1 229
3e787e54 230newnodes++;
3fc2ac18
KM
231 if ((vnode_free_list.tqh_first == NULL &&
232 numvnodes < 2 * desiredvnodes) ||
ecf75a7d 233 numvnodes < desiredvnodes) {
aacc1bff
KM
234 vp = (struct vnode *)malloc((u_long)sizeof *vp,
235 M_VNODE, M_WAITOK);
1a80f56e 236 bzero((char *)vp, sizeof *vp);
3e787e54
KM
237 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
238 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
239 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
240 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
1a80f56e 241 numvnodes++;
3e787e54 242 vp->v_spare[0] = numvnodes;
1a80f56e 243 } else {
3fc2ac18 244 if ((vp = vnode_free_list.tqh_first) == NULL) {
1a80f56e
KM
245 tablefull("vnode");
246 *vpp = 0;
247 return (ENFILE);
248 }
249 if (vp->v_usecount)
250 panic("free vnode isn't");
3e787e54
KM
251 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
252 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
253 panic("getnewvnode: not on queue");
3fc2ac18 254 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
3e787e54 255 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
0bf9bb76
KM
256 /* see comment on why 0xdeadb is set at end of vgone (below) */
257 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
39b99eb6 258 vp->v_lease = NULL;
1a80f56e 259 if (vp->v_type != VBAD)
7cd62fbc 260 VOP_REVOKE(vp, 0);
1f9d2249 261#ifdef DIAGNOSTIC
2345b093
KM
262 if (vp->v_data)
263 panic("cleaned vnode isn't");
1f9d2249
MS
264 s = splbio();
265 if (vp->v_numoutput)
266 panic("Clean vnode has pending I/O's");
267 splx(s);
268#endif
1a80f56e 269 vp->v_flag = 0;
1a80f56e 270 vp->v_lastr = 0;
2b5ada11
MH
271 vp->v_ralen = 0;
272 vp->v_maxra = 0;
1f9d2249
MS
273 vp->v_lastw = 0;
274 vp->v_lasta = 0;
275 vp->v_cstart = 0;
276 vp->v_clen = 0;
1a80f56e 277 vp->v_socket = 0;
36d09cb1 278 }
b027498b 279 vp->v_type = VNON;
36d09cb1
KM
280 cache_purge(vp);
281 vp->v_tag = tag;
ef24f6dd 282 vp->v_op = vops;
36d09cb1 283 insmntque(vp, mp);
36d09cb1 284 *vpp = vp;
0bf9bb76 285 vp->v_usecount = 1;
3fc2ac18 286 vp->v_data = 0;
3e787e54 287 if (printcnt-- > 0) vprint("getnewvnode got", vp);
36d09cb1
KM
288 return (0);
289}
8981e258 290
36d09cb1
KM
291/*
292 * Move a vnode from one mount queue to another.
293 */
05560902 294void
36d09cb1
KM
295insmntque(vp, mp)
296 register struct vnode *vp;
297 register struct mount *mp;
298{
36d09cb1
KM
299
300 /*
301 * Delete from old mount point vnode list, if on one.
302 */
3e787e54
KM
303 if (vp->v_mount != NULL) {
304 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
305 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
306 panic("insmntque: not on queue");
3fc2ac18 307 LIST_REMOVE(vp, v_mntvnodes);
3e787e54
KM
308 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
309 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
310 }
36d09cb1
KM
311 /*
312 * Insert into list of vnodes for the new mount point, if available.
313 */
3fc2ac18 314 if ((vp->v_mount = mp) == NULL)
36d09cb1 315 return;
3e787e54
KM
316 if (vp->v_mntvnodes.le_next != (struct vnode *)0xdeadf ||
317 vp->v_mntvnodes.le_prev != (struct vnode **)0xdeadb)
318 panic("insmntque: already on queue");
3fc2ac18 319 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
36d09cb1
KM
320}
321
76429560
KM
322/*
323 * Update outstanding I/O count and do wakeup if requested.
324 */
05560902 325void
76429560
KM
326vwakeup(bp)
327 register struct buf *bp;
328{
329 register struct vnode *vp;
330
a9338fad 331 bp->b_flags &= ~B_WRITEINPROG;
76429560 332 if (vp = bp->b_vp) {
05560902 333 if (--vp->v_numoutput < 0)
1f9d2249 334 panic("vwakeup: neg numoutput");
76429560
KM
335 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
336 if (vp->v_numoutput < 0)
05560902 337 panic("vwakeup: neg numoutput 2");
76429560
KM
338 vp->v_flag &= ~VBWAIT;
339 wakeup((caddr_t)&vp->v_numoutput);
340 }
341 }
342}
343
76429560
KM
344/*
345 * Flush out and invalidate all buffers associated with a vnode.
346 * Called with the underlying object locked.
347 */
d024c2ce 348int
c33e9e8b 349vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
76429560 350 register struct vnode *vp;
12079a9d 351 int flags;
d024c2ce
KM
352 struct ucred *cred;
353 struct proc *p;
c33e9e8b 354 int slpflag, slptimeo;
76429560
KM
355{
356 register struct buf *bp;
357 struct buf *nbp, *blist;
d024c2ce 358 int s, error;
76429560 359
12079a9d 360 if (flags & V_SAVE) {
d024c2ce
KM
361 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
362 return (error);
3fc2ac18 363 if (vp->v_dirtyblkhd.lh_first != NULL)
d024c2ce
KM
364 panic("vinvalbuf: dirty bufs");
365 }
76429560 366 for (;;) {
3fc2ac18 367 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
12079a9d 368 while (blist && blist->b_lblkno < 0)
3fc2ac18 369 blist = blist->b_vnbufs.le_next;
05560902 370 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
e3249ec0 371 (flags & V_SAVEMETA))
12079a9d 372 while (blist && blist->b_lblkno < 0)
3fc2ac18 373 blist = blist->b_vnbufs.le_next;
12079a9d 374 if (!blist)
76429560 375 break;
12079a9d 376
76429560 377 for (bp = blist; bp; bp = nbp) {
3fc2ac18 378 nbp = bp->b_vnbufs.le_next;
12079a9d
MS
379 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
380 continue;
76429560
KM
381 s = splbio();
382 if (bp->b_flags & B_BUSY) {
383 bp->b_flags |= B_WANTED;
c33e9e8b
KM
384 error = tsleep((caddr_t)bp,
385 slpflag | (PRIBIO + 1), "vinvalbuf",
386 slptimeo);
76429560 387 splx(s);
c33e9e8b
KM
388 if (error)
389 return (error);
76429560
KM
390 break;
391 }
392 bremfree(bp);
393 bp->b_flags |= B_BUSY;
394 splx(s);
c33e9e8b
KM
395 /*
396 * XXX Since there are no node locks for NFS, I believe
397 * there is a slight chance that a delayed write will
398 * occur while sleeping just above, so check for it.
399 */
400 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
401 (void) VOP_BWRITE(bp);
402 break;
403 }
12079a9d 404 bp->b_flags |= B_INVAL;
76429560
KM
405 brelse(bp);
406 }
407 }
e3249ec0 408 if (!(flags & V_SAVEMETA) &&
3fc2ac18 409 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
76429560 410 panic("vinvalbuf: flush failed");
d024c2ce 411 return (0);
76429560
KM
412}
413
414/*
415 * Associate a buffer with a vnode.
416 */
05560902 417void
76429560
KM
418bgetvp(vp, bp)
419 register struct vnode *vp;
420 register struct buf *bp;
421{
422
423 if (bp->b_vp)
424 panic("bgetvp: not free");
425 VHOLD(vp);
426 bp->b_vp = vp;
427 if (vp->v_type == VBLK || vp->v_type == VCHR)
428 bp->b_dev = vp->v_rdev;
429 else
430 bp->b_dev = NODEV;
431 /*
432 * Insert onto list for new vnode.
433 */
e3249ec0 434 bufinsvn(bp, &vp->v_cleanblkhd);
76429560
KM
435}
436
437/*
438 * Disassociate a buffer from a vnode.
439 */
05560902 440void
76429560
KM
441brelvp(bp)
442 register struct buf *bp;
443{
76429560
KM
444 struct vnode *vp;
445
446 if (bp->b_vp == (struct vnode *) 0)
447 panic("brelvp: NULL");
448 /*
449 * Delete from old vnode list, if on one.
450 */
3fc2ac18 451 if (bp->b_vnbufs.le_next != NOLIST)
e3249ec0 452 bufremvn(bp);
76429560
KM
453 vp = bp->b_vp;
454 bp->b_vp = (struct vnode *) 0;
455 HOLDRELE(vp);
456}
457
458/*
459 * Reassign a buffer from one vnode to another.
460 * Used to assign file specific control information
461 * (indirect blocks) to the vnode to which they belong.
462 */
05560902 463void
76429560
KM
464reassignbuf(bp, newvp)
465 register struct buf *bp;
466 register struct vnode *newvp;
467{
3fc2ac18 468 register struct buflists *listheadp;
76429560 469
e5c3f16e
KM
470 if (newvp == NULL) {
471 printf("reassignbuf: NULL");
472 return;
473 }
76429560
KM
474 /*
475 * Delete from old vnode list, if on one.
476 */
3fc2ac18 477 if (bp->b_vnbufs.le_next != NOLIST)
e3249ec0 478 bufremvn(bp);
76429560
KM
479 /*
480 * If dirty, put on list of dirty buffers;
481 * otherwise insert onto list of clean buffers.
482 */
483 if (bp->b_flags & B_DELWRI)
484 listheadp = &newvp->v_dirtyblkhd;
485 else
486 listheadp = &newvp->v_cleanblkhd;
e3249ec0 487 bufinsvn(bp, listheadp);
76429560
KM
488}
489
36d09cb1 490/*
ef24f6dd
KM
491 * Create a vnode for a block device.
492 * Used for root filesystem, argdev, and swap areas.
493 * Also used for memory file system special devices.
494 */
05560902 495int
ef24f6dd
KM
496bdevvp(dev, vpp)
497 dev_t dev;
498 struct vnode **vpp;
499{
ef24f6dd
KM
500 register struct vnode *vp;
501 struct vnode *nvp;
502 int error;
503
1c89915d
KM
504 if (dev == NODEV)
505 return (0);
9342689a 506 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
ef24f6dd 507 if (error) {
05560902 508 *vpp = NULLVP;
ef24f6dd
KM
509 return (error);
510 }
511 vp = nvp;
512 vp->v_type = VBLK;
c0de8792 513 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
ef24f6dd
KM
514 vput(vp);
515 vp = nvp;
516 }
517 *vpp = vp;
518 return (0);
519}
520
521/*
522 * Check to see if the new vnode represents a special device
523 * for which we already have a vnode (either because of
524 * bdevvp() or because of a different vnode representing
525 * the same block device). If such an alias exists, deallocate
f0556f86 526 * the existing contents and return the aliased vnode. The
ef24f6dd
KM
527 * caller is responsible for filling it with its new contents.
528 */
529struct vnode *
c0de8792 530checkalias(nvp, nvp_rdev, mp)
ef24f6dd 531 register struct vnode *nvp;
c0de8792 532 dev_t nvp_rdev;
ef24f6dd
KM
533 struct mount *mp;
534{
535 register struct vnode *vp;
c0de8792 536 struct vnode **vpp;
ef24f6dd
KM
537
538 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
54fb9dc2 539 return (NULLVP);
c0de8792
KM
540
541 vpp = &speclisth[SPECHASH(nvp_rdev)];
ef24f6dd 542loop:
c0de8792
KM
543 for (vp = *vpp; vp; vp = vp->v_specnext) {
544 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
ef24f6dd 545 continue;
c0de8792
KM
546 /*
547 * Alias, but not in use, so flush it out.
548 */
7f7b7d89 549 if (vp->v_usecount == 0) {
c0de8792
KM
550 vgone(vp);
551 goto loop;
552 }
3fc2ac18 553 if (vget(vp, 1))
ef62830d 554 goto loop;
ef24f6dd
KM
555 break;
556 }
c0de8792 557 if (vp == NULL || vp->v_tag != VT_NON) {
c0de8792
KM
558 MALLOC(nvp->v_specinfo, struct specinfo *,
559 sizeof(struct specinfo), M_VNODE, M_WAITOK);
560 nvp->v_rdev = nvp_rdev;
7f7b7d89 561 nvp->v_hashchain = vpp;
c0de8792 562 nvp->v_specnext = *vpp;
2c957a90 563 nvp->v_specflags = 0;
c0de8792 564 *vpp = nvp;
40452d5e
KM
565 if (vp != NULL) {
566 nvp->v_flag |= VALIASED;
567 vp->v_flag |= VALIASED;
568 vput(vp);
569 }
54fb9dc2 570 return (NULLVP);
ef24f6dd 571 }
2bae1875
KM
572 VOP_UNLOCK(vp);
573 vclean(vp, 0);
ef24f6dd
KM
574 vp->v_op = nvp->v_op;
575 vp->v_tag = nvp->v_tag;
576 nvp->v_type = VNON;
577 insmntque(vp, mp);
578 return (vp);
579}
580
581/*
582 * Grab a particular vnode from the free list, increment its
583 * reference count and lock it. The vnode lock bit is set the
584 * vnode is being eliminated in vgone. The process is awakened
585 * when the transition is completed, and an error returned to
586 * indicate that the vnode is no longer usable (possibly having
587 * been changed to a new file system type).
36d09cb1 588 */
05560902 589int
3fc2ac18 590vget(vp, lockflag)
36d09cb1 591 register struct vnode *vp;
3fc2ac18 592 int lockflag;
36d09cb1 593{
36d09cb1 594
9130defb
KM
595 /*
596 * If the vnode is in the process of being cleaned out for
597 * another use, we wait for the cleaning to finish and then
598 * return failure. Cleaning is determined either by checking
599 * that the VXLOCK flag is set, or that the use count is
600 * zero with the back pointer set to show that it has been
601 * removed from the free list by getnewvnode. The VXLOCK
602 * flag may not have been set yet because vclean is blocked in
603 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
604 */
605 if ((vp->v_flag & VXLOCK) ||
606 (vp->v_usecount == 0 &&
607 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
ef24f6dd 608 vp->v_flag |= VXWANT;
05560902 609 tsleep((caddr_t)vp, PINOD, "vget", 0);
ef24f6dd
KM
610 return (1);
611 }
3e787e54
KM
612 if (vp->v_usecount == 0) {
613 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
614 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
615 panic("vget: not on queue");
3fc2ac18 616 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
3e787e54
KM
617 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
618 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
619 }
ec04fc59 620 vp->v_usecount++;
3fc2ac18
KM
621 if (lockflag)
622 VOP_LOCK(vp);
3e787e54 623 if (printcnt-- > 0) vprint("vget got", vp);
ef24f6dd 624 return (0);
36d09cb1
KM
625}
626
d32390ea
KM
627int bug_refs = 0;
628
36d09cb1
KM
629/*
630 * Vnode reference, just increment the count
631 */
05560902
CD
632void
633vref(vp)
36d09cb1
KM
634 struct vnode *vp;
635{
636
ec04fc59
KM
637 if (vp->v_usecount <= 0)
638 panic("vref used where vget required");
3e787e54
KM
639 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
640 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
641 panic("vref: not free");
7f7b7d89 642 vp->v_usecount++;
3e787e54 643 if (printcnt-- > 0) vprint("vref get", vp);
d32390ea
KM
644 if (vp->v_type != VBLK && curproc)
645 curproc->p_spare[0]++;
646 if (bug_refs)
647 vprint("vref: ");
36d09cb1
KM
648}
649
650/*
651 * vput(), just unlock and vrele()
652 */
05560902
CD
653void
654vput(vp)
36d09cb1
KM
655 register struct vnode *vp;
656{
4d1ee2eb 657
36d09cb1
KM
658 VOP_UNLOCK(vp);
659 vrele(vp);
660}
661
662/*
663 * Vnode release.
664 * If count drops to zero, call inactive routine and return to freelist.
665 */
05560902
CD
666void
667vrele(vp)
36d09cb1
KM
668 register struct vnode *vp;
669{
670
65c3b3a8 671#ifdef DIAGNOSTIC
36d09cb1 672 if (vp == NULL)
ef24f6dd 673 panic("vrele: null vp");
65c3b3a8 674#endif
7f7b7d89 675 vp->v_usecount--;
3e787e54 676 if (printcnt-- > 0) vprint("vrele put", vp);
d32390ea
KM
677 if (vp->v_type != VBLK && curproc)
678 curproc->p_spare[0]--;
679 if (bug_refs)
680 vprint("vref: ");
7f7b7d89 681 if (vp->v_usecount > 0)
36d09cb1 682 return;
65c3b3a8
KM
683#ifdef DIAGNOSTIC
684 if (vp->v_usecount != 0 || vp->v_writecount != 0) {
685 vprint("vrele: bad ref count", vp);
686 panic("vrele: ref cnt");
687 }
688#endif
dc998e72
KM
689 /*
690 * insert at tail of LRU list
691 */
3e787e54
KM
692 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
693 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
694 panic("vrele: not free");
3fc2ac18 695 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
d024c2ce 696 VOP_INACTIVE(vp);
ef24f6dd
KM
697}
698
7f7b7d89
KM
699/*
700 * Page or buffer structure gets a reference.
701 */
05560902
CD
702void
703vhold(vp)
7f7b7d89
KM
704 register struct vnode *vp;
705{
706
707 vp->v_holdcnt++;
708}
709
710/*
711 * Page or buffer structure frees a reference.
712 */
05560902
CD
713void
714holdrele(vp)
7f7b7d89
KM
715 register struct vnode *vp;
716{
717
718 if (vp->v_holdcnt <= 0)
719 panic("holdrele: holdcnt");
720 vp->v_holdcnt--;
721}
722
f0556f86
KM
723/*
724 * Remove any vnodes in the vnode table belonging to mount point mp.
725 *
726 * If MNT_NOFORCE is specified, there should not be any active ones,
727 * return error if any are found (nb: this is a user error, not a
728 * system error). If MNT_FORCE is specified, detach any active vnodes
729 * that are found.
730 */
8981e258 731#ifdef DIAGNOSTIC
bb4964fd
KM
732int busyprt = 0; /* print out busy vnodes */
733struct ctldebug debug1 = { "busyprt", &busyprt };
8981e258 734#endif
f0556f86 735
05560902 736int
f0556f86
KM
737vflush(mp, skipvp, flags)
738 struct mount *mp;
739 struct vnode *skipvp;
740 int flags;
741{
742 register struct vnode *vp, *nvp;
743 int busy = 0;
744
54fb9dc2 745 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
36ef03ec 746 panic("vflush: not busy");
4597dd33 747loop:
3fc2ac18 748 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
4597dd33
KM
749 if (vp->v_mount != mp)
750 goto loop;
3fc2ac18 751 nvp = vp->v_mntvnodes.le_next;
f0556f86
KM
752 /*
753 * Skip over a selected vnode.
f0556f86
KM
754 */
755 if (vp == skipvp)
756 continue;
36ef03ec
KM
757 /*
758 * Skip over a vnodes marked VSYSTEM.
759 */
760 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
761 continue;
da374605
KM
762 /*
763 * If WRITECLOSE is set, only flush out regular file
764 * vnodes open for writing.
765 */
766 if ((flags & WRITECLOSE) &&
767 (vp->v_writecount == 0 || vp->v_type != VREG))
768 continue;
f0556f86 769 /*
7f7b7d89 770 * With v_usecount == 0, all we need to do is clear
f0556f86
KM
771 * out the vnode data structures and we are done.
772 */
7f7b7d89 773 if (vp->v_usecount == 0) {
7cd62fbc 774 VOP_REVOKE(vp, 0);
f0556f86
KM
775 continue;
776 }
777 /*
da374605 778 * If FORCECLOSE is set, forcibly close the vnode.
f0556f86
KM
779 * For block or character devices, revert to an
780 * anonymous device. For all other files, just kill them.
781 */
36ef03ec 782 if (flags & FORCECLOSE) {
f0556f86 783 if (vp->v_type != VBLK && vp->v_type != VCHR) {
7cd62fbc 784 VOP_REVOKE(vp, 0);
f0556f86
KM
785 } else {
786 vclean(vp, 0);
9342689a 787 vp->v_op = spec_vnodeop_p;
f0556f86
KM
788 insmntque(vp, (struct mount *)0);
789 }
790 continue;
791 }
8981e258 792#ifdef DIAGNOSTIC
f0556f86 793 if (busyprt)
0bf84b18 794 vprint("vflush: busy vnode", vp);
8981e258 795#endif
f0556f86
KM
796 busy++;
797 }
798 if (busy)
799 return (EBUSY);
800 return (0);
801}
802
ef24f6dd
KM
803/*
804 * Disassociate the underlying file system from a vnode.
ef24f6dd 805 */
ecf75a7d
KM
806void
807vclean(vp, flags)
ef24f6dd 808 register struct vnode *vp;
aacc1bff 809 int flags;
ef24f6dd 810{
2bae1875 811 int active;
ef24f6dd 812
2bae1875
KM
813 /*
814 * Check to see if the vnode is in use.
0bf84b18
KM
815 * If so we have to reference it before we clean it out
816 * so that its count cannot fall to zero and generate a
817 * race against ourselves to recycle it.
2bae1875 818 */
7f7b7d89 819 if (active = vp->v_usecount)
2bae1875 820 VREF(vp);
669df1aa
KM
821 /*
822 * Even if the count is zero, the VOP_INACTIVE routine may still
823 * have the object locked while it cleans it out. The VOP_LOCK
824 * ensures that the VOP_INACTIVE routine is done with its work.
825 * For active vnodes, it ensures that no other activity can
826 * occur while the underlying object is being cleaned out.
827 */
828 VOP_LOCK(vp);
2bae1875
KM
829 /*
830 * Prevent the vnode from being recycled or
831 * brought into use while we clean it out.
832 */
0bf84b18
KM
833 if (vp->v_flag & VXLOCK)
834 panic("vclean: deadlock");
ef24f6dd 835 vp->v_flag |= VXLOCK;
0bf84b18 836 /*
669df1aa 837 * Clean out any buffers associated with the vnode.
0bf84b18 838 */
36ef03ec 839 if (flags & DOCLOSE)
c33e9e8b 840 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
ef24f6dd 841 /*
669df1aa
KM
842 * Any other processes trying to obtain this lock must first
843 * wait for VXLOCK to clear, then call the new lock operation.
ef24f6dd 844 */
669df1aa 845 VOP_UNLOCK(vp);
ef24f6dd 846 /*
669df1aa
KM
847 * If purging an active vnode, it must be closed and
848 * deactivated before being reclaimed.
ef24f6dd 849 */
2bae1875 850 if (active) {
669df1aa
KM
851 if (flags & DOCLOSE)
852 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
853 VOP_INACTIVE(vp);
ef24f6dd
KM
854 }
855 /*
856 * Reclaim the vnode.
857 */
669df1aa 858 if (VOP_RECLAIM(vp))
ef24f6dd 859 panic("vclean: cannot reclaim");
2bae1875
KM
860 if (active)
861 vrele(vp);
7d9a1fe8 862 cache_purge(vp);
38c46eee 863
ef24f6dd 864 /*
669df1aa 865 * Done with purge, notify sleepers of the grim news.
ef24f6dd 866 */
669df1aa
KM
867 vp->v_op = dead_vnodeop_p;
868 vp->v_tag = VT_NON;
ef24f6dd
KM
869 vp->v_flag &= ~VXLOCK;
870 if (vp->v_flag & VXWANT) {
871 vp->v_flag &= ~VXWANT;
872 wakeup((caddr_t)vp);
873 }
874}
875
ef62830d
KM
876/*
877 * Eliminate all activity associated with the requested vnode
878 * and with all vnodes aliased to the requested vnode.
879 */
7cd62fbc
KM
880int
881vop_revoke(ap)
882 struct vop_revoke_args /* {
883 struct vnode *a_vp;
884 int a_flags;
885 } */ *ap;
ef62830d 886{
7cd62fbc 887 register struct vnode *vp, *vq;
ef62830d 888
7cd62fbc
KM
889 vp = ap->a_vp;
890 if ((ap->a_flags & REVOKEALL) && (vp->v_flag & VALIASED)) {
7a7b3a95
KM
891 /*
892 * If a vgone (or vclean) is already in progress,
893 * wait until it is done and return.
894 */
895 if (vp->v_flag & VXLOCK) {
896 vp->v_flag |= VXWANT;
7cd62fbc
KM
897 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
898 return (0);
7a7b3a95
KM
899 }
900 /*
901 * Ensure that vp will not be vgone'd while we
902 * are eliminating its aliases.
903 */
904 vp->v_flag |= VXLOCK;
905 while (vp->v_flag & VALIASED) {
906 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
907 if (vq->v_rdev != vp->v_rdev ||
908 vq->v_type != vp->v_type || vp == vq)
909 continue;
910 vgone(vq);
911 break;
912 }
ef62830d 913 }
7a7b3a95
KM
914 /*
915 * Remove the lock so that vgone below will
916 * really eliminate the vnode after which time
917 * vgone will awaken any sleepers.
918 */
919 vp->v_flag &= ~VXLOCK;
ef62830d
KM
920 }
921 vgone(vp);
7cd62fbc 922 return (0);
ef62830d
KM
923}
924
ef24f6dd
KM
925/*
926 * Eliminate all activity associated with a vnode
927 * in preparation for reuse.
928 */
05560902
CD
929void
930vgone(vp)
ef24f6dd
KM
931 register struct vnode *vp;
932{
7f7b7d89 933 register struct vnode *vq;
c0de8792 934 struct vnode *vx;
ef24f6dd 935
4f55e3ec
KM
936 /*
937 * If a vgone (or vclean) is already in progress,
938 * wait until it is done and return.
939 */
940 if (vp->v_flag & VXLOCK) {
941 vp->v_flag |= VXWANT;
05560902 942 tsleep((caddr_t)vp, PINOD, "vgone", 0);
4f55e3ec
KM
943 return;
944 }
ef24f6dd
KM
945 /*
946 * Clean out the filesystem specific data.
947 */
36ef03ec 948 vclean(vp, DOCLOSE);
ef24f6dd
KM
949 /*
950 * Delete from old mount point vnode list, if on one.
951 */
3fc2ac18 952 if (vp->v_mount != NULL) {
3e787e54
KM
953 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
954 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
955 panic("vgone: not on queue");
3fc2ac18 956 LIST_REMOVE(vp, v_mntvnodes);
3e787e54
KM
957 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
958 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
d10e9258 959 vp->v_mount = NULL;
ef24f6dd
KM
960 }
961 /*
5d0d19f1
KM
962 * If special device, remove it from special device alias list
963 * if it is on one.
ef24f6dd 964 */
5d0d19f1 965 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
7f7b7d89
KM
966 if (*vp->v_hashchain == vp) {
967 *vp->v_hashchain = vp->v_specnext;
ef24f6dd 968 } else {
7f7b7d89 969 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
c0de8792 970 if (vq->v_specnext != vp)
ef24f6dd 971 continue;
c0de8792 972 vq->v_specnext = vp->v_specnext;
ef24f6dd
KM
973 break;
974 }
c0de8792 975 if (vq == NULL)
ef24f6dd
KM
976 panic("missing bdev");
977 }
c0de8792 978 if (vp->v_flag & VALIASED) {
4d1ee2eb 979 vx = NULL;
7f7b7d89 980 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
de81e10c
KM
981 if (vq->v_rdev != vp->v_rdev ||
982 vq->v_type != vp->v_type)
c0de8792 983 continue;
4d1ee2eb
CT
984 if (vx)
985 break;
c0de8792
KM
986 vx = vq;
987 }
4d1ee2eb 988 if (vx == NULL)
c0de8792 989 panic("missing alias");
4d1ee2eb 990 if (vq == NULL)
c0de8792
KM
991 vx->v_flag &= ~VALIASED;
992 vp->v_flag &= ~VALIASED;
993 }
994 FREE(vp->v_specinfo, M_VNODE);
995 vp->v_specinfo = NULL;
ef24f6dd
KM
996 }
997 /*
3387ef89 998 * If it is on the freelist and not already at the head,
0bf9bb76
KM
999 * move it to the head of the list. The test of the back
1000 * pointer and the reference count of zero is because
1001 * it will be removed from the free list by getnewvnode,
1002 * but will not have its reference count incremented until
1003 * after calling vgone. If the reference count were
1004 * incremented first, vgone would (incorrectly) try to
1005 * close the previous instance of the underlying object.
1006 * So, the back pointer is explicitly set to `0xdeadb' in
1007 * getnewvnode after removing it from the freelist to ensure
1008 * that we do not try to move it here.
ef24f6dd 1009 */
0bf9bb76
KM
1010 if (vp->v_usecount == 0 &&
1011 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1012 vnode_free_list.tqh_first != vp) {
3e787e54
KM
1013 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf)
1014 panic("vgone: use 0, not free");
3fc2ac18
KM
1015 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1016 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
ef24f6dd 1017 }
2bae1875 1018 vp->v_type = VBAD;
36d09cb1 1019}
ef62830d 1020
2bcd6066
KM
1021/*
1022 * Lookup a vnode by device number.
1023 */
05560902 1024int
2bcd6066
KM
1025vfinddev(dev, type, vpp)
1026 dev_t dev;
1027 enum vtype type;
1028 struct vnode **vpp;
1029{
1030 register struct vnode *vp;
1031
1032 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1033 if (dev != vp->v_rdev || type != vp->v_type)
1034 continue;
1035 *vpp = vp;
05378ee4 1036 return (1);
2bcd6066 1037 }
05378ee4 1038 return (0);
2bcd6066
KM
1039}
1040
ef62830d
KM
1041/*
1042 * Calculate the total number of references to a special device.
1043 */
05560902 1044int
ef62830d
KM
1045vcount(vp)
1046 register struct vnode *vp;
1047{
1d2d7c6d 1048 register struct vnode *vq, *vnext;
ef62830d
KM
1049 int count;
1050
1d2d7c6d 1051loop:
ef62830d 1052 if ((vp->v_flag & VALIASED) == 0)
7f7b7d89 1053 return (vp->v_usecount);
1d2d7c6d
KM
1054 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1055 vnext = vq->v_specnext;
de81e10c 1056 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
ef62830d
KM
1057 continue;
1058 /*
1059 * Alias, but not in use, so flush it out.
1060 */
1d2d7c6d 1061 if (vq->v_usecount == 0 && vq != vp) {
ef62830d
KM
1062 vgone(vq);
1063 goto loop;
1064 }
7f7b7d89 1065 count += vq->v_usecount;
ef62830d
KM
1066 }
1067 return (count);
1068}
0bf84b18
KM
1069
1070/*
1071 * Print out a description of a vnode.
1072 */
1073static char *typename[] =
61f846a8 1074 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
0bf84b18 1075
5d263ebe 1076void
0bf84b18
KM
1077vprint(label, vp)
1078 char *label;
1079 register struct vnode *vp;
1080{
f2f730c6 1081 char buf[64];
0bf84b18
KM
1082
1083 if (label != NULL)
1084 printf("%s: ", label);
3e787e54 1085 printf("num %d ", vp->v_spare[0]);
65c3b3a8
KM
1086 printf("type %s, usecount %d, writecount %d, refcount %d,",
1087 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1088 vp->v_holdcnt);
f2f730c6
KM
1089 buf[0] = '\0';
1090 if (vp->v_flag & VROOT)
1091 strcat(buf, "|VROOT");
1092 if (vp->v_flag & VTEXT)
1093 strcat(buf, "|VTEXT");
36ef03ec
KM
1094 if (vp->v_flag & VSYSTEM)
1095 strcat(buf, "|VSYSTEM");
36ef03ec
KM
1096 if (vp->v_flag & VXLOCK)
1097 strcat(buf, "|VXLOCK");
1098 if (vp->v_flag & VXWANT)
1099 strcat(buf, "|VXWANT");
f2f730c6
KM
1100 if (vp->v_flag & VBWAIT)
1101 strcat(buf, "|VBWAIT");
36ef03ec
KM
1102 if (vp->v_flag & VALIASED)
1103 strcat(buf, "|VALIASED");
f2f730c6
KM
1104 if (buf[0] != '\0')
1105 printf(" flags (%s)", &buf[1]);
3fc2ac18
KM
1106 if (vp->v_data == NULL) {
1107 printf("\n");
1108 } else {
1109 printf("\n\t");
1110 VOP_PRINT(vp);
1111 }
0bf84b18 1112}
985cbdd5 1113
34c62e18
KM
1114#ifdef DEBUG
1115/*
1116 * List all of the locked vnodes in the system.
1117 * Called when debugging the kernel.
1118 */
05560902 1119void
34c62e18
KM
1120printlockedvnodes()
1121{
1122 register struct mount *mp;
1123 register struct vnode *vp;
1124
1125 printf("Locked vnodes\n");
3fc2ac18
KM
1126 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
1127 for (vp = mp->mnt_vnodelist.lh_first;
1128 vp != NULL;
1129 vp = vp->v_mntvnodes.le_next)
34c62e18
KM
1130 if (VOP_ISLOCKED(vp))
1131 vprint((char *)0, vp);
3fc2ac18 1132 }
34c62e18
KM
1133}
1134#endif
1135
597259be
KM
1136/*
1137 * Top level filesystem related information gathering.
1138 */
1139int
1140vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1141 int *name;
1142 u_int namelen;
1143 void *oldp;
1144 size_t *oldlenp;
1145 void *newp;
1146 size_t newlen;
1147 struct proc *p;
1148{
1149 struct ctldebug *cdp;
1150 struct vfsconf *vfsp;
1151
1152 /* all sysctl names at this level are at least name and field */
1153 if (namelen < 2)
1154 return (ENOTDIR); /* overloaded */
1155 if (name[0] != VFS_GENERIC) {
1156 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1157 if (vfsp->vfc_typenum == name[0])
1158 break;
1159 if (vfsp == NULL)
1160 return (EOPNOTSUPP);
1161 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1162 oldp, oldlenp, newp, newlen, p));
1163 }
1164 switch (name[1]) {
1165 case VFS_MAXTYPENUM:
1166 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1167 case VFS_CONF:
1168 if (namelen < 3)
1169 return (ENOTDIR); /* overloaded */
1170 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1171 if (vfsp->vfc_typenum == name[2])
1172 break;
1173 if (vfsp == NULL)
1174 return (EOPNOTSUPP);
1175 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1176 sizeof(struct vfsconf)));
1177 }
1178 return (EOPNOTSUPP);
1179}
1180
985cbdd5
MT
1181int kinfo_vdebug = 1;
1182int kinfo_vgetfailed;
1183#define KINFO_VNODESLOP 10
1184/*
786fb484 1185 * Dump vnode list (via sysctl).
985cbdd5
MT
1186 * Copyout address of vnode followed by vnode.
1187 */
aacc1bff 1188/* ARGSUSED */
05560902 1189int
786fb484 1190sysctl_vnode(where, sizep)
985cbdd5 1191 char *where;
c1909da4 1192 size_t *sizep;
985cbdd5 1193{
3fc2ac18 1194 register struct mount *mp, *nmp;
985cbdd5 1195 struct vnode *vp;
985cbdd5 1196 register char *bp = where, *savebp;
5bf57294 1197 char *ewhere;
985cbdd5
MT
1198 int error;
1199
1200#define VPTRSZ sizeof (struct vnode *)
1201#define VNODESZ sizeof (struct vnode)
1202 if (where == NULL) {
786fb484 1203 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
985cbdd5
MT
1204 return (0);
1205 }
786fb484 1206 ewhere = where + *sizep;
985cbdd5 1207
3fc2ac18
KM
1208 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
1209 nmp = mp->mnt_list.tqe_next;
1210 if (vfs_busy(mp))
36ef03ec 1211 continue;
985cbdd5
MT
1212 savebp = bp;
1213again:
3fc2ac18
KM
1214 for (vp = mp->mnt_vnodelist.lh_first;
1215 vp != NULL;
1216 vp = vp->v_mntvnodes.le_next) {
41185b3b
KM
1217 /*
1218 * Check that the vp is still associated with
1219 * this filesystem. RACE: could have been
1220 * recycled onto the same filesystem.
1221 */
4597dd33
KM
1222 if (vp->v_mount != mp) {
1223 if (kinfo_vdebug)
1224 printf("kinfo: vp changed\n");
1225 bp = savebp;
1226 goto again;
1227 }
786fb484
KM
1228 if (bp + VPTRSZ + VNODESZ > ewhere) {
1229 *sizep = bp - where;
1230 return (ENOMEM);
1231 }
1232 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1233 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
985cbdd5 1234 return (error);
985cbdd5 1235 bp += VPTRSZ + VNODESZ;
985cbdd5 1236 }
3fc2ac18
KM
1237 vfs_unbusy(mp);
1238 }
985cbdd5 1239
786fb484 1240 *sizep = bp - where;
985cbdd5
MT
1241 return (0);
1242}
8981e258
MH
1243
1244/*
1245 * Check to see if a filesystem is mounted on a block device.
1246 */
1247int
1248vfs_mountedon(vp)
1249 register struct vnode *vp;
1250{
1251 register struct vnode *vq;
1252
1253 if (vp->v_specflags & SI_MOUNTEDON)
1254 return (EBUSY);
1255 if (vp->v_flag & VALIASED) {
1256 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1257 if (vq->v_rdev != vp->v_rdev ||
1258 vq->v_type != vp->v_type)
1259 continue;
1260 if (vq->v_specflags & SI_MOUNTEDON)
1261 return (EBUSY);
1262 }
1263 }
1264 return (0);
1265}
1266
1267/*
1268 * Build hash lists of net addresses and hang them off the mount point.
1269 * Called by ufs_mount() to set up the lists of export addresses.
1270 */
1271static int
1272vfs_hang_addrlist(mp, nep, argp)
1273 struct mount *mp;
1274 struct netexport *nep;
1275 struct export_args *argp;
1276{
1277 register struct netcred *np;
1278 register struct radix_node_head *rnh;
1279 register int i;
1280 struct radix_node *rn;
1281 struct sockaddr *saddr, *smask = 0;
1282 struct domain *dom;
1283 int error;
1284
1285 if (argp->ex_addrlen == 0) {
1286 if (mp->mnt_flag & MNT_DEFEXPORTED)
1287 return (EPERM);
1288 np = &nep->ne_defexported;
1289 np->netc_exflags = argp->ex_flags;
1290 np->netc_anon = argp->ex_anon;
1291 np->netc_anon.cr_ref = 1;
1292 mp->mnt_flag |= MNT_DEFEXPORTED;
1293 return (0);
1294 }
1295 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1296 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1297 bzero((caddr_t)np, i);
1298 saddr = (struct sockaddr *)(np + 1);
1299 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1300 goto out;
1301 if (saddr->sa_len > argp->ex_addrlen)
1302 saddr->sa_len = argp->ex_addrlen;
1303 if (argp->ex_masklen) {
1304 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1305 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1306 if (error)
1307 goto out;
1308 if (smask->sa_len > argp->ex_masklen)
1309 smask->sa_len = argp->ex_masklen;
1310 }
1311 i = saddr->sa_family;
1312 if ((rnh = nep->ne_rtable[i]) == 0) {
1313 /*
1314 * Seems silly to initialize every AF when most are not
1315 * used, do so on demand here
1316 */
1317 for (dom = domains; dom; dom = dom->dom_next)
1318 if (dom->dom_family == i && dom->dom_rtattach) {
1319 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1320 dom->dom_rtoffset);
1321 break;
1322 }
1323 if ((rnh = nep->ne_rtable[i]) == 0) {
1324 error = ENOBUFS;
1325 goto out;
1326 }
1327 }
1328 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1329 np->netc_rnodes);
b398c713
KM
1330 if (rn == 0) {
1331 /*
1332 * One of the reasons that rnh_addaddr may fail is that
1333 * the entry already exists. To check for this case, we
1334 * look up the entry to see if it is there. If so, we
1335 * do not need to make a new entry but do return success.
1336 */
1337 free(np, M_NETADDR);
1338 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
1339 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
1340 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
1341 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
1342 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
1343 return (0);
1344 return (EPERM);
8981e258
MH
1345 }
1346 np->netc_exflags = argp->ex_flags;
1347 np->netc_anon = argp->ex_anon;
1348 np->netc_anon.cr_ref = 1;
1349 return (0);
1350out:
1351 free(np, M_NETADDR);
1352 return (error);
1353}
1354
1355/* ARGSUSED */
1356static int
1357vfs_free_netcred(rn, w)
1358 struct radix_node *rn;
1359 caddr_t w;
1360{
1361 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1362
1363 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1364 free((caddr_t)rn, M_NETADDR);
1365 return (0);
1366}
05560902 1367
8981e258
MH
1368/*
1369 * Free the net address hash lists that are hanging off the mount points.
1370 */
1371static void
1372vfs_free_addrlist(nep)
1373 struct netexport *nep;
1374{
1375 register int i;
1376 register struct radix_node_head *rnh;
1377
1378 for (i = 0; i <= AF_MAX; i++)
1379 if (rnh = nep->ne_rtable[i]) {
1380 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1381 (caddr_t)rnh);
1382 free((caddr_t)rnh, M_RTABLE);
1383 nep->ne_rtable[i] = 0;
1384 }
1385}
1386
1387int
1388vfs_export(mp, nep, argp)
1389 struct mount *mp;
1390 struct netexport *nep;
1391 struct export_args *argp;
1392{
1393 int error;
1394
1395 if (argp->ex_flags & MNT_DELEXPORT) {
1396 vfs_free_addrlist(nep);
1397 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1398 }
1399 if (argp->ex_flags & MNT_EXPORTED) {
1400 if (error = vfs_hang_addrlist(mp, nep, argp))
1401 return (error);
1402 mp->mnt_flag |= MNT_EXPORTED;
1403 }
1404 return (0);
1405}
1406
1407struct netcred *
1408vfs_export_lookup(mp, nep, nam)
1409 register struct mount *mp;
1410 struct netexport *nep;
1411 struct mbuf *nam;
1412{
1413 register struct netcred *np;
1414 register struct radix_node_head *rnh;
1415 struct sockaddr *saddr;
1416
1417 np = NULL;
1418 if (mp->mnt_flag & MNT_EXPORTED) {
1419 /*
1420 * Lookup in the export list first.
1421 */
1422 if (nam != NULL) {
1423 saddr = mtod(nam, struct sockaddr *);
1424 rnh = nep->ne_rtable[saddr->sa_family];
1425 if (rnh != NULL) {
1426 np = (struct netcred *)
1427 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1428 rnh);
1429 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1430 np = NULL;
1431 }
1432 }
1433 /*
1434 * If no address match, use the default if it exists.
1435 */
1436 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1437 np = &nep->ne_defexported;
1438 }
1439 return (np);
1440}