expand vput inline so that locking is managed properly
[unix-history] / usr / src / sys / kern / vfs_subr.c
CommitLineData
3c4390e8 1/*
ec54f0cc
KB
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
adb35f79
KB
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
3c4390e8 9 *
dbf0c423 10 * %sccs.include.redist.c%
3c4390e8 11 *
99afd20c 12 * @(#)vfs_subr.c 8.25 (Berkeley) %G%
3c4390e8
KM
13 */
14
15/*
16 * External virtual filesystem routines
17 */
18
cb796a23 19#include <sys/param.h>
917dc539 20#include <sys/systm.h>
cb796a23
KB
21#include <sys/proc.h>
22#include <sys/mount.h>
23#include <sys/time.h>
24#include <sys/vnode.h>
807cc430 25#include <sys/stat.h>
cb796a23
KB
26#include <sys/namei.h>
27#include <sys/ucred.h>
28#include <sys/buf.h>
29#include <sys/errno.h>
30#include <sys/malloc.h>
8981e258
MH
31#include <sys/domain.h>
32#include <sys/mbuf.h>
3c4390e8 33
bb4964fd
KM
34#include <vm/vm.h>
35#include <sys/sysctl.h>
36
021de758
JSP
37#include <miscfs/specfs/specdev.h>
38
807cc430
KM
39enum vtype iftovt_tab[16] = {
40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
42};
43int vttoif_tab[9] = {
44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
45 S_IFSOCK, S_IFIFO, S_IFMT,
46};
47
e3249ec0
KM
48/*
49 * Insq/Remq for the vnode usage lists.
50 */
3fc2ac18 51#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
05560902
CD
52#define bufremvn(bp) { \
53 LIST_REMOVE(bp, b_vnbufs); \
54 (bp)->b_vnbufs.le_next = NOLIST; \
3fc2ac18 55}
3fc2ac18
KM
56TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
57struct mntlist mountlist; /* mounted filesystem list */
662d4423
KM
58static struct simplelock mntid_slock;
59struct simplelock mntvnode_slock;
60static struct simplelock spechash_slock;
61static struct simplelock vnode_free_list_slock;
e3249ec0 62
3c4390e8 63/*
3fc2ac18 64 * Initialize the vnode management data structures.
3c4390e8 65 */
05560902 66void
3fc2ac18 67vntblinit()
3c4390e8
KM
68{
69
662d4423
KM
70 simple_lock_init(&mntvnode_slock);
71 simple_lock_init(&mntid_slock);
72 simple_lock_init(&spechash_slock);
3fc2ac18 73 TAILQ_INIT(&vnode_free_list);
662d4423 74 simple_lock_init(&vnode_free_list_slock);
29330550 75 CIRCLEQ_INIT(&mountlist);
3c4390e8
KM
76}
77
78/*
79 * Lock a filesystem.
80 * Used to prevent access to it while mounting and unmounting.
81 */
05560902 82int
3c4390e8
KM
83vfs_lock(mp)
84 register struct mount *mp;
85{
86
05560902 87 while (mp->mnt_flag & MNT_MLOCK) {
54fb9dc2 88 mp->mnt_flag |= MNT_MWAIT;
05560902 89 tsleep((caddr_t)mp, PVFS, "vfslock", 0);
594501df 90 }
54fb9dc2 91 mp->mnt_flag |= MNT_MLOCK;
3c4390e8
KM
92 return (0);
93}
94
95/*
96 * Unlock a locked filesystem.
97 * Panic if filesystem is not locked.
98 */
99void
100vfs_unlock(mp)
101 register struct mount *mp;
102{
103
54fb9dc2 104 if ((mp->mnt_flag & MNT_MLOCK) == 0)
36ef03ec 105 panic("vfs_unlock: not locked");
54fb9dc2
KM
106 mp->mnt_flag &= ~MNT_MLOCK;
107 if (mp->mnt_flag & MNT_MWAIT) {
108 mp->mnt_flag &= ~MNT_MWAIT;
3c4390e8
KM
109 wakeup((caddr_t)mp);
110 }
111}
112
36ef03ec
KM
113/*
114 * Mark a mount point as busy.
115 * Used to synchronize access and to delay unmounting.
116 */
05560902 117int
36ef03ec
KM
118vfs_busy(mp)
119 register struct mount *mp;
120{
121
05560902 122 while (mp->mnt_flag & MNT_MPBUSY) {
54fb9dc2 123 mp->mnt_flag |= MNT_MPWANT;
05560902 124 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0);
36ef03ec 125 }
d8b63609
KM
126 if (mp->mnt_flag & MNT_UNMOUNT)
127 return (1);
54fb9dc2 128 mp->mnt_flag |= MNT_MPBUSY;
36ef03ec
KM
129 return (0);
130}
131
132/*
133 * Free a busy filesystem.
134 * Panic if filesystem is not busy.
135 */
05560902 136void
36ef03ec
KM
137vfs_unbusy(mp)
138 register struct mount *mp;
139{
140
54fb9dc2 141 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
36ef03ec 142 panic("vfs_unbusy: not busy");
54fb9dc2
KM
143 mp->mnt_flag &= ~MNT_MPBUSY;
144 if (mp->mnt_flag & MNT_MPWANT) {
145 mp->mnt_flag &= ~MNT_MPWANT;
146 wakeup((caddr_t)&mp->mnt_flag);
36ef03ec
KM
147 }
148}
149
83b7e676
KM
150/*
151 * Lookup a filesystem type, and if found allocate and initialize
152 * a mount structure for it.
153 *
154 * Devname is usually updated by mount(8) after booting.
155 */
156int
157vfs_rootmountalloc(fstypename, devname, mpp)
158 char *fstypename;
159 char *devname;
160 struct mount **mpp;
161{
162 struct vfsconf *vfsp;
163 struct mount *mp;
164
165 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
166 if (!strcmp(vfsp->vfc_name, fstypename))
167 break;
168 if (vfsp == NULL)
169 return (ENODEV);
170 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
171 bzero((char *)mp, (u_long)sizeof(struct mount));
172 LIST_INIT(&mp->mnt_vnodelist);
173 mp->mnt_vfc = vfsp;
174 mp->mnt_op = vfsp->vfc_vfsops;
175 mp->mnt_flag = MNT_RDONLY;
176 mp->mnt_vnodecovered = NULLVP;
177 vfsp->vfc_refcount++;
178 mp->mnt_stat.f_type = vfsp->vfc_typenum;
179 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
180 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
181 mp->mnt_stat.f_mntonname[0] = '/';
182 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
183 *mpp = mp;
184 return (0);
185}
186
187/*
188 * Find an appropriate filesystem to use for the root. If a filesystem
189 * has not been preselected, walk through the list of known filesystems
190 * trying those that have mountroot routines, and try them until one
191 * works or we have tried them all.
192 */
193int
194vfs_mountroot()
195{
196 struct vfsconf *vfsp;
197 extern int (*mountroot)(void);
198 int error;
199
200 if (mountroot != NULL)
201 return ((*vfsp->vfc_mountroot)());
202 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
203 if (vfsp->vfc_mountroot == NULL)
204 continue;
205 if ((error = (*vfsp->vfc_mountroot)()) == 0)
206 return (0);
207 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
208 }
209 return (ENODEV);
210}
211
3c4390e8
KM
212/*
213 * Lookup a mount point by filesystem identifier.
214 */
215struct mount *
597259be 216vfs_getvfs(fsid)
3c4390e8
KM
217 fsid_t *fsid;
218{
219 register struct mount *mp;
220
29330550
KM
221 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
222 mp = mp->mnt_list.cqe_next) {
54fb9dc2 223 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
3fc2ac18 224 mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
d713f801 225 return (mp);
3fc2ac18 226 }
d713f801 227 return ((struct mount *)0);
3c4390e8
KM
228}
229
917dc539
JSP
230/*
231 * Get a new unique fsid
232 */
233void
597259be 234vfs_getnewfsid(mp)
917dc539 235 struct mount *mp;
917dc539
JSP
236{
237static u_short xxxfs_mntid;
238
239 fsid_t tfsid;
597259be 240 int mtype;
917dc539 241
662d4423 242 simple_lock(&mntid_slock);
597259be 243 mtype = mp->mnt_vfc->vfc_typenum;
1209b9a4 244 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
917dc539
JSP
245 mp->mnt_stat.f_fsid.val[1] = mtype;
246 if (xxxfs_mntid == 0)
247 ++xxxfs_mntid;
1209b9a4 248 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
917dc539 249 tfsid.val[1] = mtype;
29330550 250 if (mountlist.cqh_first != (void *)&mountlist) {
597259be 251 while (vfs_getvfs(&tfsid)) {
17fd1cc7
JSP
252 tfsid.val[0]++;
253 xxxfs_mntid++;
254 }
917dc539
JSP
255 }
256 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
662d4423 257 simple_unlock(&mntid_slock);
917dc539
JSP
258}
259
3c4390e8
KM
260/*
261 * Set vnode attributes to VNOVAL
262 */
05560902
CD
263void
264vattr_null(vap)
3c4390e8
KM
265 register struct vattr *vap;
266{
267
268 vap->va_type = VNON;
83504fd5 269 vap->va_size = vap->va_bytes = VNOVAL;
3c4390e8 270 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
83504fd5
KM
271 vap->va_fsid = vap->va_fileid =
272 vap->va_blocksize = vap->va_rdev =
ecf75a7d
KM
273 vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
274 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
275 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
8cf4d4fb 276 vap->va_flags = vap->va_gen = VNOVAL;
fcba749b 277 vap->va_vaflags = 0;
3c4390e8 278}
c60798ca 279
36d09cb1
KM
280/*
281 * Routines having to do with the management of the vnode table.
282 */
9342689a 283extern int (**dead_vnodeop_p)();
662d4423
KM
284static void vclean __P((struct vnode *vp, int flag, struct proc *p));
285extern void vgonel __P((struct vnode *vp, struct proc *p));
1a80f56e 286long numvnodes;
e781da98 287extern struct vattr va_null;
3e787e54
KM
288int newnodes = 0;
289int printcnt = 0;
36d09cb1
KM
290
291/*
292 * Return the next vnode from the free list.
293 */
05560902 294int
36d09cb1
KM
295getnewvnode(tag, mp, vops, vpp)
296 enum vtagtype tag;
297 struct mount *mp;
cf74dd57 298 int (**vops)();
36d09cb1
KM
299 struct vnode **vpp;
300{
662d4423
KM
301 struct proc *p = curproc; /* XXX */
302 struct vnode *vp;
1f9d2249 303 int s;
662d4423 304 int cnt;
36d09cb1 305
662d4423
KM
306top:
307 simple_lock(&vnode_free_list_slock);
3e787e54 308newnodes++;
3fc2ac18
KM
309 if ((vnode_free_list.tqh_first == NULL &&
310 numvnodes < 2 * desiredvnodes) ||
ecf75a7d 311 numvnodes < desiredvnodes) {
662d4423 312 simple_unlock(&vnode_free_list_slock);
aacc1bff
KM
313 vp = (struct vnode *)malloc((u_long)sizeof *vp,
314 M_VNODE, M_WAITOK);
1a80f56e 315 bzero((char *)vp, sizeof *vp);
3e787e54
KM
316 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
317 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
318 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
319 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
1a80f56e 320 numvnodes++;
3e787e54 321 vp->v_spare[0] = numvnodes;
1a80f56e 322 } else {
662d4423
KM
323 for (vp = vnode_free_list.tqh_first;
324 vp != NULLVP; vp = vp->v_freelist.tqe_next) {
325 if (simple_lock_try(&vp->v_interlock))
326 break;
327 }
328 /*
329 * Unless this is a bad time of the month, at most
330 * the first NCPUS items on the free list are
331 * locked, so this is close enough to being empty.
332 */
333 if (vp == NULLVP) {
334 simple_unlock(&vnode_free_list_slock);
1a80f56e
KM
335 tablefull("vnode");
336 *vpp = 0;
337 return (ENFILE);
338 }
339 if (vp->v_usecount)
340 panic("free vnode isn't");
3e787e54
KM
341 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
342 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
343 panic("getnewvnode: not on queue");
3fc2ac18 344 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
3e787e54 345 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
0bf9bb76
KM
346 /* see comment on why 0xdeadb is set at end of vgone (below) */
347 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
662d4423 348 simple_unlock(&vnode_free_list_slock);
39b99eb6 349 vp->v_lease = NULL;
1a80f56e 350 if (vp->v_type != VBAD)
662d4423
KM
351 vgonel(vp, p);
352 else
353 simple_unlock(&vp->v_interlock);
1f9d2249 354#ifdef DIAGNOSTIC
2345b093
KM
355 if (vp->v_data)
356 panic("cleaned vnode isn't");
1f9d2249
MS
357 s = splbio();
358 if (vp->v_numoutput)
359 panic("Clean vnode has pending I/O's");
360 splx(s);
361#endif
1a80f56e 362 vp->v_flag = 0;
1a80f56e 363 vp->v_lastr = 0;
2b5ada11
MH
364 vp->v_ralen = 0;
365 vp->v_maxra = 0;
1f9d2249
MS
366 vp->v_lastw = 0;
367 vp->v_lasta = 0;
368 vp->v_cstart = 0;
369 vp->v_clen = 0;
1a80f56e 370 vp->v_socket = 0;
36d09cb1 371 }
b027498b 372 vp->v_type = VNON;
36d09cb1
KM
373 cache_purge(vp);
374 vp->v_tag = tag;
ef24f6dd 375 vp->v_op = vops;
36d09cb1 376 insmntque(vp, mp);
36d09cb1 377 *vpp = vp;
0bf9bb76 378 vp->v_usecount = 1;
3fc2ac18 379 vp->v_data = 0;
3e787e54 380 if (printcnt-- > 0) vprint("getnewvnode got", vp);
36d09cb1
KM
381 return (0);
382}
8981e258 383
36d09cb1
KM
384/*
385 * Move a vnode from one mount queue to another.
386 */
05560902 387void
36d09cb1 388insmntque(vp, mp)
662d4423
KM
389 struct vnode *vp;
390 struct mount *mp;
36d09cb1 391{
36d09cb1 392
662d4423 393 simple_lock(&mntvnode_slock);
36d09cb1
KM
394 /*
395 * Delete from old mount point vnode list, if on one.
396 */
3e787e54
KM
397 if (vp->v_mount != NULL) {
398 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
399 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
400 panic("insmntque: not on queue");
3fc2ac18 401 LIST_REMOVE(vp, v_mntvnodes);
3e787e54
KM
402 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
403 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
404 }
36d09cb1
KM
405 /*
406 * Insert into list of vnodes for the new mount point, if available.
407 */
662d4423
KM
408 if ((vp->v_mount = mp) != NULL)
409 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
410 simple_unlock(&mntvnode_slock);
36d09cb1
KM
411}
412
76429560
KM
413/*
414 * Update outstanding I/O count and do wakeup if requested.
415 */
05560902 416void
76429560
KM
417vwakeup(bp)
418 register struct buf *bp;
419{
420 register struct vnode *vp;
421
a9338fad 422 bp->b_flags &= ~B_WRITEINPROG;
76429560 423 if (vp = bp->b_vp) {
05560902 424 if (--vp->v_numoutput < 0)
1f9d2249 425 panic("vwakeup: neg numoutput");
76429560
KM
426 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
427 if (vp->v_numoutput < 0)
05560902 428 panic("vwakeup: neg numoutput 2");
76429560
KM
429 vp->v_flag &= ~VBWAIT;
430 wakeup((caddr_t)&vp->v_numoutput);
431 }
432 }
433}
434
76429560
KM
435/*
436 * Flush out and invalidate all buffers associated with a vnode.
437 * Called with the underlying object locked.
438 */
d024c2ce 439int
c33e9e8b 440vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
76429560 441 register struct vnode *vp;
12079a9d 442 int flags;
d024c2ce
KM
443 struct ucred *cred;
444 struct proc *p;
c33e9e8b 445 int slpflag, slptimeo;
76429560
KM
446{
447 register struct buf *bp;
448 struct buf *nbp, *blist;
d024c2ce 449 int s, error;
76429560 450
12079a9d 451 if (flags & V_SAVE) {
d024c2ce
KM
452 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
453 return (error);
3fc2ac18 454 if (vp->v_dirtyblkhd.lh_first != NULL)
d024c2ce
KM
455 panic("vinvalbuf: dirty bufs");
456 }
76429560 457 for (;;) {
3fc2ac18 458 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
12079a9d 459 while (blist && blist->b_lblkno < 0)
3fc2ac18 460 blist = blist->b_vnbufs.le_next;
05560902 461 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
e3249ec0 462 (flags & V_SAVEMETA))
12079a9d 463 while (blist && blist->b_lblkno < 0)
3fc2ac18 464 blist = blist->b_vnbufs.le_next;
12079a9d 465 if (!blist)
76429560 466 break;
12079a9d 467
76429560 468 for (bp = blist; bp; bp = nbp) {
3fc2ac18 469 nbp = bp->b_vnbufs.le_next;
12079a9d
MS
470 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
471 continue;
76429560
KM
472 s = splbio();
473 if (bp->b_flags & B_BUSY) {
474 bp->b_flags |= B_WANTED;
c33e9e8b
KM
475 error = tsleep((caddr_t)bp,
476 slpflag | (PRIBIO + 1), "vinvalbuf",
477 slptimeo);
76429560 478 splx(s);
c33e9e8b
KM
479 if (error)
480 return (error);
76429560
KM
481 break;
482 }
483 bremfree(bp);
484 bp->b_flags |= B_BUSY;
485 splx(s);
c33e9e8b
KM
486 /*
487 * XXX Since there are no node locks for NFS, I believe
488 * there is a slight chance that a delayed write will
489 * occur while sleeping just above, so check for it.
490 */
491 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
492 (void) VOP_BWRITE(bp);
493 break;
494 }
12079a9d 495 bp->b_flags |= B_INVAL;
76429560
KM
496 brelse(bp);
497 }
498 }
e3249ec0 499 if (!(flags & V_SAVEMETA) &&
3fc2ac18 500 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
76429560 501 panic("vinvalbuf: flush failed");
d024c2ce 502 return (0);
76429560
KM
503}
504
505/*
506 * Associate a buffer with a vnode.
507 */
05560902 508void
76429560
KM
509bgetvp(vp, bp)
510 register struct vnode *vp;
511 register struct buf *bp;
512{
513
514 if (bp->b_vp)
515 panic("bgetvp: not free");
516 VHOLD(vp);
517 bp->b_vp = vp;
518 if (vp->v_type == VBLK || vp->v_type == VCHR)
519 bp->b_dev = vp->v_rdev;
520 else
521 bp->b_dev = NODEV;
522 /*
523 * Insert onto list for new vnode.
524 */
e3249ec0 525 bufinsvn(bp, &vp->v_cleanblkhd);
76429560
KM
526}
527
528/*
529 * Disassociate a buffer from a vnode.
530 */
05560902 531void
76429560
KM
532brelvp(bp)
533 register struct buf *bp;
534{
76429560
KM
535 struct vnode *vp;
536
537 if (bp->b_vp == (struct vnode *) 0)
538 panic("brelvp: NULL");
539 /*
540 * Delete from old vnode list, if on one.
541 */
3fc2ac18 542 if (bp->b_vnbufs.le_next != NOLIST)
e3249ec0 543 bufremvn(bp);
76429560
KM
544 vp = bp->b_vp;
545 bp->b_vp = (struct vnode *) 0;
546 HOLDRELE(vp);
547}
548
549/*
550 * Reassign a buffer from one vnode to another.
551 * Used to assign file specific control information
552 * (indirect blocks) to the vnode to which they belong.
553 */
05560902 554void
76429560
KM
555reassignbuf(bp, newvp)
556 register struct buf *bp;
557 register struct vnode *newvp;
558{
3fc2ac18 559 register struct buflists *listheadp;
76429560 560
e5c3f16e
KM
561 if (newvp == NULL) {
562 printf("reassignbuf: NULL");
563 return;
564 }
76429560
KM
565 /*
566 * Delete from old vnode list, if on one.
567 */
3fc2ac18 568 if (bp->b_vnbufs.le_next != NOLIST)
e3249ec0 569 bufremvn(bp);
76429560
KM
570 /*
571 * If dirty, put on list of dirty buffers;
572 * otherwise insert onto list of clean buffers.
573 */
574 if (bp->b_flags & B_DELWRI)
575 listheadp = &newvp->v_dirtyblkhd;
576 else
577 listheadp = &newvp->v_cleanblkhd;
e3249ec0 578 bufinsvn(bp, listheadp);
76429560
KM
579}
580
36d09cb1 581/*
ef24f6dd
KM
582 * Create a vnode for a block device.
583 * Used for root filesystem, argdev, and swap areas.
584 * Also used for memory file system special devices.
585 */
05560902 586int
ef24f6dd
KM
587bdevvp(dev, vpp)
588 dev_t dev;
589 struct vnode **vpp;
590{
ef24f6dd
KM
591 register struct vnode *vp;
592 struct vnode *nvp;
593 int error;
594
83b7e676
KM
595 if (dev == NODEV) {
596 *vpp = NULLVP;
597 return (ENODEV);
598 }
9342689a 599 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
ef24f6dd 600 if (error) {
05560902 601 *vpp = NULLVP;
ef24f6dd
KM
602 return (error);
603 }
604 vp = nvp;
605 vp->v_type = VBLK;
c0de8792 606 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
ef24f6dd
KM
607 vput(vp);
608 vp = nvp;
609 }
610 *vpp = vp;
611 return (0);
612}
613
614/*
615 * Check to see if the new vnode represents a special device
616 * for which we already have a vnode (either because of
617 * bdevvp() or because of a different vnode representing
618 * the same block device). If such an alias exists, deallocate
f0556f86 619 * the existing contents and return the aliased vnode. The
ef24f6dd
KM
620 * caller is responsible for filling it with its new contents.
621 */
622struct vnode *
c0de8792 623checkalias(nvp, nvp_rdev, mp)
ef24f6dd 624 register struct vnode *nvp;
c0de8792 625 dev_t nvp_rdev;
ef24f6dd
KM
626 struct mount *mp;
627{
662d4423
KM
628 struct proc *p = curproc; /* XXX */
629 struct vnode *vp;
c0de8792 630 struct vnode **vpp;
ef24f6dd
KM
631
632 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
54fb9dc2 633 return (NULLVP);
c0de8792
KM
634
635 vpp = &speclisth[SPECHASH(nvp_rdev)];
ef24f6dd 636loop:
662d4423 637 simple_lock(&spechash_slock);
c0de8792
KM
638 for (vp = *vpp; vp; vp = vp->v_specnext) {
639 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
ef24f6dd 640 continue;
c0de8792
KM
641 /*
642 * Alias, but not in use, so flush it out.
643 */
662d4423 644 simple_lock(&vp->v_interlock);
7f7b7d89 645 if (vp->v_usecount == 0) {
662d4423
KM
646 simple_unlock(&spechash_slock);
647 vgonel(vp, p);
c0de8792
KM
648 goto loop;
649 }
662d4423
KM
650 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
651 simple_unlock(&spechash_slock);
ef62830d 652 goto loop;
662d4423 653 }
ef24f6dd
KM
654 break;
655 }
c0de8792 656 if (vp == NULL || vp->v_tag != VT_NON) {
c0de8792
KM
657 MALLOC(nvp->v_specinfo, struct specinfo *,
658 sizeof(struct specinfo), M_VNODE, M_WAITOK);
659 nvp->v_rdev = nvp_rdev;
7f7b7d89 660 nvp->v_hashchain = vpp;
c0de8792 661 nvp->v_specnext = *vpp;
2c957a90 662 nvp->v_specflags = 0;
662d4423 663 simple_unlock(&spechash_slock);
c0de8792 664 *vpp = nvp;
662d4423 665 if (vp != NULLVP) {
40452d5e
KM
666 nvp->v_flag |= VALIASED;
667 vp->v_flag |= VALIASED;
668 vput(vp);
669 }
54fb9dc2 670 return (NULLVP);
ef24f6dd 671 }
662d4423
KM
672 simple_unlock(&spechash_slock);
673 VOP_UNLOCK(vp, 0, p);
674 simple_lock(&vp->v_interlock);
675 vclean(vp, 0, p);
ef24f6dd
KM
676 vp->v_op = nvp->v_op;
677 vp->v_tag = nvp->v_tag;
678 nvp->v_type = VNON;
679 insmntque(vp, mp);
680 return (vp);
681}
682
683/*
684 * Grab a particular vnode from the free list, increment its
685 * reference count and lock it. The vnode lock bit is set the
686 * vnode is being eliminated in vgone. The process is awakened
687 * when the transition is completed, and an error returned to
688 * indicate that the vnode is no longer usable (possibly having
689 * been changed to a new file system type).
36d09cb1 690 */
05560902 691int
662d4423
KM
692vget(vp, flags, p)
693 struct vnode *vp;
694 int flags;
695 struct proc *p;
36d09cb1 696{
36d09cb1 697
9130defb
KM
698 /*
699 * If the vnode is in the process of being cleaned out for
700 * another use, we wait for the cleaning to finish and then
662d4423
KM
701 * return failure. Cleaning is determined by checking that
702 * the VXLOCK flag is set.
9130defb 703 */
662d4423
KM
704 if ((flags & LK_INTERLOCK) == 0)
705 simple_lock(&vp->v_interlock);
706 if (vp->v_flag & VXLOCK) {
ef24f6dd 707 vp->v_flag |= VXWANT;
662d4423 708 simple_unlock(&vp->v_interlock);
05560902 709 tsleep((caddr_t)vp, PINOD, "vget", 0);
662d4423 710 return (ENOENT);
ef24f6dd 711 }
3e787e54
KM
712 if (vp->v_usecount == 0) {
713 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
714 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
715 panic("vget: not on queue");
3fc2ac18 716 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
662d4423
KM
717 simple_unlock(&vnode_free_list_slock);
718 }
3e787e54
KM
719 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
720 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
721 }
ec04fc59 722 vp->v_usecount++;
662d4423
KM
723 if (flags & LK_TYPE_MASK)
724 return (vn_lock(vp, flags | LK_INTERLOCK, p));
725 simple_unlock(&vp->v_interlock);
3e787e54 726 if (printcnt-- > 0) vprint("vget got", vp);
ef24f6dd 727 return (0);
36d09cb1
KM
728}
729
d32390ea
KM
730int bug_refs = 0;
731
36d09cb1 732/*
662d4423
KM
733 * Stubs to use when there is no locking to be done on the underlying object.
734 *
735 * Getting a lock just clears the interlock if necessary.
736 */
737int
738vop_nolock(ap)
739 struct vop_lock_args /* {
740 struct vnode *a_vp;
741 int a_flags;
742 struct proc *a_p;
743 } */ *ap;
744{
745 struct vnode *vp = ap->a_vp;
746
747 /*
748 * Since we are not using the lock manager, we must clear
749 * the interlock here.
750 */
751 if (ap->a_flags & LK_INTERLOCK)
752 simple_unlock(&vp->v_interlock);
753 return (0);
754}
755
756/*
757 * Unlock has nothing to do.
758 */
759int
760vop_nounlock(ap)
761 struct vop_unlock_args /* {
762 struct vnode *a_vp;
763 int a_flags;
764 struct proc *a_p;
765 } */ *ap;
766{
767
768 return (0);
769}
770
771/*
772 * Nothing is ever locked.
773 */
774int
775vop_noislocked(ap)
776 struct vop_islocked_args /* {
777 struct vnode *a_vp;
778 } */ *ap;
779{
780
781 return (0);
782}
783
784/*
785 * Vnode reference.
36d09cb1 786 */
05560902
CD
787void
788vref(vp)
36d09cb1
KM
789 struct vnode *vp;
790{
791
662d4423 792 simple_lock(&vp->v_interlock);
ec04fc59
KM
793 if (vp->v_usecount <= 0)
794 panic("vref used where vget required");
3e787e54
KM
795 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
796 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
797 panic("vref: not free");
7f7b7d89 798 vp->v_usecount++;
662d4423 799 simple_unlock(&vp->v_interlock);
3e787e54 800 if (printcnt-- > 0) vprint("vref get", vp);
d32390ea
KM
801 if (vp->v_type != VBLK && curproc)
802 curproc->p_spare[0]++;
803 if (bug_refs)
804 vprint("vref: ");
36d09cb1
KM
805}
806
807/*
808 * vput(), just unlock and vrele()
809 */
05560902
CD
810void
811vput(vp)
662d4423 812 struct vnode *vp;
36d09cb1 813{
662d4423 814 struct proc *p = curproc; /* XXX */
4d1ee2eb 815
99afd20c
JSP
816#ifdef DIGANOSTIC
817 if (vp == NULL)
818 panic("vput: null vp");
819#endif
820 simple_lock(&vp->v_interlock);
821 vp->v_usecount--;
822 if (vp->v_usecount > 0) {
823 simple_unlock(&vp->v_interlock);
824 VOP_UNLOCK(vp, 0, p);
825 return;
826 }
827#ifdef DIAGNOSTIC
828 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
829 vprint("vput: bad ref count", vp);
830 panic("vput: ref cnt");
831 }
832#endif
833 /*
834 * insert at tail of LRU list
835 */
836 simple_lock(&vnode_free_list_slock);
837 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
838 simple_unlock(&vnode_free_list_slock);
839 simple_unlock(&vp->v_interlock);
840 VOP_INACTIVE(vp, p);
36d09cb1
KM
841}
842
843/*
844 * Vnode release.
845 * If count drops to zero, call inactive routine and return to freelist.
846 */
05560902
CD
847void
848vrele(vp)
662d4423 849 struct vnode *vp;
36d09cb1 850{
662d4423 851 struct proc *p = curproc; /* XXX */
36d09cb1 852
65c3b3a8 853#ifdef DIAGNOSTIC
36d09cb1 854 if (vp == NULL)
ef24f6dd 855 panic("vrele: null vp");
65c3b3a8 856#endif
662d4423 857 simple_lock(&vp->v_interlock);
7f7b7d89 858 vp->v_usecount--;
3e787e54 859 if (printcnt-- > 0) vprint("vrele put", vp);
d32390ea
KM
860 if (vp->v_type != VBLK && curproc)
861 curproc->p_spare[0]--;
862 if (bug_refs)
863 vprint("vref: ");
662d4423
KM
864 if (vp->v_usecount > 0) {
865 simple_unlock(&vp->v_interlock);
36d09cb1 866 return;
662d4423 867 }
65c3b3a8 868#ifdef DIAGNOSTIC
662d4423 869 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
65c3b3a8
KM
870 vprint("vrele: bad ref count", vp);
871 panic("vrele: ref cnt");
872 }
873#endif
dc998e72
KM
874 /*
875 * insert at tail of LRU list
876 */
662d4423 877 simple_lock(&vnode_free_list_slock);
3e787e54
KM
878 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
879 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
880 panic("vrele: not free");
3fc2ac18 881 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
662d4423
KM
882 simple_unlock(&vnode_free_list_slock);
883 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
884 VOP_INACTIVE(vp, p);
ef24f6dd
KM
885}
886
662d4423 887#ifdef DIAGNOSTIC
7f7b7d89
KM
888/*
889 * Page or buffer structure gets a reference.
890 */
05560902
CD
891void
892vhold(vp)
7f7b7d89
KM
893 register struct vnode *vp;
894{
895
662d4423 896 simple_lock(&vp->v_interlock);
7f7b7d89 897 vp->v_holdcnt++;
662d4423 898 simple_unlock(&vp->v_interlock);
7f7b7d89
KM
899}
900
901/*
902 * Page or buffer structure frees a reference.
903 */
05560902
CD
904void
905holdrele(vp)
7f7b7d89
KM
906 register struct vnode *vp;
907{
908
662d4423 909 simple_lock(&vp->v_interlock);
7f7b7d89
KM
910 if (vp->v_holdcnt <= 0)
911 panic("holdrele: holdcnt");
912 vp->v_holdcnt--;
662d4423 913 simple_unlock(&vp->v_interlock);
7f7b7d89 914}
662d4423 915#endif /* DIAGNOSTIC */
7f7b7d89 916
f0556f86
KM
917/*
918 * Remove any vnodes in the vnode table belonging to mount point mp.
919 *
920 * If MNT_NOFORCE is specified, there should not be any active ones,
921 * return error if any are found (nb: this is a user error, not a
922 * system error). If MNT_FORCE is specified, detach any active vnodes
923 * that are found.
924 */
8981e258 925#ifdef DIAGNOSTIC
bb4964fd
KM
926int busyprt = 0; /* print out busy vnodes */
927struct ctldebug debug1 = { "busyprt", &busyprt };
8981e258 928#endif
f0556f86 929
05560902 930int
f0556f86
KM
931vflush(mp, skipvp, flags)
932 struct mount *mp;
933 struct vnode *skipvp;
934 int flags;
935{
662d4423
KM
936 struct proc *p = curproc; /* XXX */
937 struct vnode *vp, *nvp;
f0556f86
KM
938 int busy = 0;
939
662d4423 940#ifdef DIAGNOSTIC
54fb9dc2 941 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
36ef03ec 942 panic("vflush: not busy");
662d4423
KM
943#endif
944
945 simple_lock(&mntvnode_slock);
4597dd33 946loop:
3fc2ac18 947 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
4597dd33
KM
948 if (vp->v_mount != mp)
949 goto loop;
3fc2ac18 950 nvp = vp->v_mntvnodes.le_next;
f0556f86
KM
951 /*
952 * Skip over a selected vnode.
f0556f86
KM
953 */
954 if (vp == skipvp)
955 continue;
662d4423
KM
956
957 simple_lock(&vp->v_interlock);
36ef03ec
KM
958 /*
959 * Skip over a vnodes marked VSYSTEM.
960 */
662d4423
KM
961 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
962 simple_unlock(&vp->v_interlock);
36ef03ec 963 continue;
662d4423 964 }
da374605
KM
965 /*
966 * If WRITECLOSE is set, only flush out regular file
967 * vnodes open for writing.
968 */
969 if ((flags & WRITECLOSE) &&
662d4423
KM
970 (vp->v_writecount == 0 || vp->v_type != VREG)) {
971 simple_unlock(&vp->v_interlock);
da374605 972 continue;
662d4423 973 }
f0556f86 974 /*
7f7b7d89 975 * With v_usecount == 0, all we need to do is clear
f0556f86
KM
976 * out the vnode data structures and we are done.
977 */
7f7b7d89 978 if (vp->v_usecount == 0) {
662d4423
KM
979 simple_unlock(&mntvnode_slock);
980 vgonel(vp, p);
981 simple_lock(&mntvnode_slock);
f0556f86
KM
982 continue;
983 }
984 /*
da374605 985 * If FORCECLOSE is set, forcibly close the vnode.
f0556f86
KM
986 * For block or character devices, revert to an
987 * anonymous device. For all other files, just kill them.
988 */
36ef03ec 989 if (flags & FORCECLOSE) {
662d4423 990 simple_unlock(&mntvnode_slock);
f0556f86 991 if (vp->v_type != VBLK && vp->v_type != VCHR) {
662d4423 992 vgonel(vp, p);
f0556f86 993 } else {
662d4423 994 vclean(vp, 0, p);
9342689a 995 vp->v_op = spec_vnodeop_p;
f0556f86
KM
996 insmntque(vp, (struct mount *)0);
997 }
662d4423 998 simple_lock(&mntvnode_slock);
f0556f86
KM
999 continue;
1000 }
8981e258 1001#ifdef DIAGNOSTIC
f0556f86 1002 if (busyprt)
0bf84b18 1003 vprint("vflush: busy vnode", vp);
8981e258 1004#endif
662d4423 1005 simple_unlock(&vp->v_interlock);
f0556f86
KM
1006 busy++;
1007 }
662d4423 1008 simple_unlock(&mntvnode_slock);
f0556f86
KM
1009 if (busy)
1010 return (EBUSY);
1011 return (0);
1012}
1013
ef24f6dd
KM
1014/*
1015 * Disassociate the underlying file system from a vnode.
662d4423 1016 * The vnode interlock is held on entry.
ef24f6dd 1017 */
662d4423
KM
1018static void
1019vclean(vp, flags, p)
1020 struct vnode *vp;
aacc1bff 1021 int flags;
662d4423 1022 struct proc *p;
ef24f6dd 1023{
2bae1875 1024 int active;
ef24f6dd 1025
2bae1875
KM
1026 /*
1027 * Check to see if the vnode is in use.
0bf84b18
KM
1028 * If so we have to reference it before we clean it out
1029 * so that its count cannot fall to zero and generate a
1030 * race against ourselves to recycle it.
2bae1875 1031 */
7f7b7d89 1032 if (active = vp->v_usecount)
662d4423 1033 vp->v_usecount++;
2bae1875
KM
1034 /*
1035 * Prevent the vnode from being recycled or
1036 * brought into use while we clean it out.
1037 */
0bf84b18
KM
1038 if (vp->v_flag & VXLOCK)
1039 panic("vclean: deadlock");
ef24f6dd 1040 vp->v_flag |= VXLOCK;
662d4423
KM
1041 /*
1042 * Even if the count is zero, the VOP_INACTIVE routine may still
1043 * have the object locked while it cleans it out. The VOP_LOCK
1044 * ensures that the VOP_INACTIVE routine is done with its work.
1045 * For active vnodes, it ensures that no other activity can
1046 * occur while the underlying object is being cleaned out.
1047 */
1048 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
0bf84b18 1049 /*
669df1aa 1050 * Clean out any buffers associated with the vnode.
0bf84b18 1051 */
36ef03ec 1052 if (flags & DOCLOSE)
c33e9e8b 1053 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
ef24f6dd 1054 /*
669df1aa 1055 * If purging an active vnode, it must be closed and
662d4423
KM
1056 * deactivated before being reclaimed. Note that the
1057 * VOP_INACTIVE will unlock the vnode.
ef24f6dd 1058 */
2bae1875 1059 if (active) {
669df1aa 1060 if (flags & DOCLOSE)
662d4423
KM
1061 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1062 VOP_INACTIVE(vp, p);
1063 } else {
1064 /*
1065 * Any other processes trying to obtain this lock must first
1066 * wait for VXLOCK to clear, then call the new lock operation.
1067 */
1068 VOP_UNLOCK(vp, 0, p);
ef24f6dd
KM
1069 }
1070 /*
1071 * Reclaim the vnode.
1072 */
662d4423 1073 if (VOP_RECLAIM(vp, p))
ef24f6dd 1074 panic("vclean: cannot reclaim");
2bae1875
KM
1075 if (active)
1076 vrele(vp);
7d9a1fe8 1077 cache_purge(vp);
38c46eee 1078
ef24f6dd 1079 /*
669df1aa 1080 * Done with purge, notify sleepers of the grim news.
ef24f6dd 1081 */
669df1aa
KM
1082 vp->v_op = dead_vnodeop_p;
1083 vp->v_tag = VT_NON;
ef24f6dd
KM
1084 vp->v_flag &= ~VXLOCK;
1085 if (vp->v_flag & VXWANT) {
1086 vp->v_flag &= ~VXWANT;
1087 wakeup((caddr_t)vp);
1088 }
1089}
1090
ef62830d
KM
1091/*
1092 * Eliminate all activity associated with the requested vnode
1093 * and with all vnodes aliased to the requested vnode.
1094 */
7cd62fbc
KM
1095int
1096vop_revoke(ap)
1097 struct vop_revoke_args /* {
1098 struct vnode *a_vp;
1099 int a_flags;
1100 } */ *ap;
ef62830d 1101{
662d4423
KM
1102 struct vnode *vp, *vq;
1103 struct proc *p = curproc; /* XXX */
1104
1105#ifdef DIAGNOSTIC
1106 if ((ap->a_flags & REVOKEALL) == 0)
1107 panic("vop_revoke");
1108#endif
ef62830d 1109
7cd62fbc 1110 vp = ap->a_vp;
662d4423
KM
1111 simple_lock(&vp->v_interlock);
1112
1113 if (vp->v_flag & VALIASED) {
7a7b3a95
KM
1114 /*
1115 * If a vgone (or vclean) is already in progress,
1116 * wait until it is done and return.
1117 */
1118 if (vp->v_flag & VXLOCK) {
1119 vp->v_flag |= VXWANT;
662d4423 1120 simple_unlock(&vp->v_interlock);
7cd62fbc
KM
1121 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1122 return (0);
7a7b3a95
KM
1123 }
1124 /*
1125 * Ensure that vp will not be vgone'd while we
1126 * are eliminating its aliases.
1127 */
1128 vp->v_flag |= VXLOCK;
662d4423 1129 simple_unlock(&vp->v_interlock);
7a7b3a95 1130 while (vp->v_flag & VALIASED) {
662d4423 1131 simple_lock(&spechash_slock);
7a7b3a95
KM
1132 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1133 if (vq->v_rdev != vp->v_rdev ||
1134 vq->v_type != vp->v_type || vp == vq)
1135 continue;
662d4423 1136 simple_unlock(&spechash_slock);
7a7b3a95
KM
1137 vgone(vq);
1138 break;
1139 }
662d4423
KM
1140 if (vq == NULLVP)
1141 simple_unlock(&spechash_slock);
ef62830d 1142 }
7a7b3a95
KM
1143 /*
1144 * Remove the lock so that vgone below will
1145 * really eliminate the vnode after which time
1146 * vgone will awaken any sleepers.
1147 */
662d4423 1148 simple_lock(&vp->v_interlock);
7a7b3a95 1149 vp->v_flag &= ~VXLOCK;
ef62830d 1150 }
662d4423
KM
1151 vgonel(vp, p);
1152 return (0);
1153}
1154
1155/*
1156 * Recycle an unused vnode to the front of the free list.
1157 * Release the passed interlock if the vnode will be recycled.
1158 */
1159int
1160vrecycle(vp, inter_lkp, p)
1161 struct vnode *vp;
1162 struct simplelock *inter_lkp;
1163 struct proc *p;
1164{
1165
1166 simple_lock(&vp->v_interlock);
1167 if (vp->v_usecount == 0) {
1168 if (inter_lkp)
1169 simple_unlock(inter_lkp);
1170 vgonel(vp, p);
1171 return (1);
1172 }
1173 simple_unlock(&vp->v_interlock);
7cd62fbc 1174 return (0);
ef62830d
KM
1175}
1176
ef24f6dd
KM
1177/*
1178 * Eliminate all activity associated with a vnode
1179 * in preparation for reuse.
1180 */
05560902
CD
1181void
1182vgone(vp)
662d4423
KM
1183 struct vnode *vp;
1184{
1185 struct proc *p = curproc; /* XXX */
1186
1187 simple_lock(&vp->v_interlock);
1188 vgonel(vp, p);
1189}
1190
1191/*
1192 * vgone, with the vp interlock held.
1193 */
1194void
1195vgonel(vp, p)
1196 struct vnode *vp;
1197 struct proc *p;
ef24f6dd 1198{
662d4423 1199 struct vnode *vq;
c0de8792 1200 struct vnode *vx;
ef24f6dd 1201
4f55e3ec
KM
1202 /*
1203 * If a vgone (or vclean) is already in progress,
1204 * wait until it is done and return.
1205 */
1206 if (vp->v_flag & VXLOCK) {
1207 vp->v_flag |= VXWANT;
662d4423 1208 simple_unlock(&vp->v_interlock);
05560902 1209 tsleep((caddr_t)vp, PINOD, "vgone", 0);
4f55e3ec
KM
1210 return;
1211 }
ef24f6dd
KM
1212 /*
1213 * Clean out the filesystem specific data.
1214 */
662d4423 1215 vclean(vp, DOCLOSE, p);
ef24f6dd
KM
1216 /*
1217 * Delete from old mount point vnode list, if on one.
1218 */
662d4423
KM
1219 if (vp->v_mount != NULL)
1220 insmntque(vp, (struct mount *)0);
ef24f6dd 1221 /*
5d0d19f1
KM
1222 * If special device, remove it from special device alias list
1223 * if it is on one.
ef24f6dd 1224 */
5d0d19f1 1225 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
662d4423 1226 simple_lock(&spechash_slock);
7f7b7d89
KM
1227 if (*vp->v_hashchain == vp) {
1228 *vp->v_hashchain = vp->v_specnext;
ef24f6dd 1229 } else {
7f7b7d89 1230 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
c0de8792 1231 if (vq->v_specnext != vp)
ef24f6dd 1232 continue;
c0de8792 1233 vq->v_specnext = vp->v_specnext;
ef24f6dd
KM
1234 break;
1235 }
c0de8792 1236 if (vq == NULL)
ef24f6dd
KM
1237 panic("missing bdev");
1238 }
c0de8792 1239 if (vp->v_flag & VALIASED) {
4d1ee2eb 1240 vx = NULL;
7f7b7d89 1241 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
de81e10c
KM
1242 if (vq->v_rdev != vp->v_rdev ||
1243 vq->v_type != vp->v_type)
c0de8792 1244 continue;
4d1ee2eb
CT
1245 if (vx)
1246 break;
c0de8792
KM
1247 vx = vq;
1248 }
4d1ee2eb 1249 if (vx == NULL)
c0de8792 1250 panic("missing alias");
4d1ee2eb 1251 if (vq == NULL)
c0de8792
KM
1252 vx->v_flag &= ~VALIASED;
1253 vp->v_flag &= ~VALIASED;
1254 }
662d4423 1255 simple_unlock(&spechash_slock);
c0de8792
KM
1256 FREE(vp->v_specinfo, M_VNODE);
1257 vp->v_specinfo = NULL;
ef24f6dd
KM
1258 }
1259 /*
3387ef89 1260 * If it is on the freelist and not already at the head,
0bf9bb76
KM
1261 * move it to the head of the list. The test of the back
1262 * pointer and the reference count of zero is because
1263 * it will be removed from the free list by getnewvnode,
1264 * but will not have its reference count incremented until
1265 * after calling vgone. If the reference count were
1266 * incremented first, vgone would (incorrectly) try to
1267 * close the previous instance of the underlying object.
1268 * So, the back pointer is explicitly set to `0xdeadb' in
1269 * getnewvnode after removing it from the freelist to ensure
1270 * that we do not try to move it here.
ef24f6dd 1271 */
662d4423
KM
1272 if (vp->v_usecount == 0) {
1273 simple_lock(&vnode_free_list_slock);
1274 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1275 vnode_free_list.tqh_first != vp) {
1276 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1277 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1278 }
1279 simple_unlock(&vnode_free_list_slock);
ef24f6dd 1280 }
2bae1875 1281 vp->v_type = VBAD;
36d09cb1 1282}
ef62830d 1283
2bcd6066
KM
1284/*
1285 * Lookup a vnode by device number.
1286 */
05560902 1287int
2bcd6066
KM
1288vfinddev(dev, type, vpp)
1289 dev_t dev;
1290 enum vtype type;
1291 struct vnode **vpp;
1292{
662d4423
KM
1293 struct vnode *vp;
1294 int rc = 0;
2bcd6066 1295
662d4423 1296 simple_lock(&spechash_slock);
2bcd6066
KM
1297 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1298 if (dev != vp->v_rdev || type != vp->v_type)
1299 continue;
1300 *vpp = vp;
662d4423
KM
1301 rc = 1;
1302 break;
2bcd6066 1303 }
662d4423
KM
1304 simple_unlock(&spechash_slock);
1305 return (rc);
2bcd6066
KM
1306}
1307
ef62830d
KM
1308/*
1309 * Calculate the total number of references to a special device.
1310 */
05560902 1311int
ef62830d 1312vcount(vp)
662d4423 1313 struct vnode *vp;
ef62830d 1314{
662d4423 1315 struct vnode *vq, *vnext;
ef62830d
KM
1316 int count;
1317
1d2d7c6d 1318loop:
ef62830d 1319 if ((vp->v_flag & VALIASED) == 0)
7f7b7d89 1320 return (vp->v_usecount);
662d4423 1321 simple_lock(&spechash_slock);
1d2d7c6d
KM
1322 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1323 vnext = vq->v_specnext;
de81e10c 1324 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
ef62830d
KM
1325 continue;
1326 /*
1327 * Alias, but not in use, so flush it out.
1328 */
1d2d7c6d 1329 if (vq->v_usecount == 0 && vq != vp) {
662d4423 1330 simple_unlock(&spechash_slock);
ef62830d
KM
1331 vgone(vq);
1332 goto loop;
1333 }
7f7b7d89 1334 count += vq->v_usecount;
ef62830d 1335 }
662d4423 1336 simple_unlock(&spechash_slock);
ef62830d
KM
1337 return (count);
1338}
0bf84b18
KM
1339
1340/*
1341 * Print out a description of a vnode.
1342 */
1343static char *typename[] =
61f846a8 1344 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
0bf84b18 1345
5d263ebe 1346void
0bf84b18
KM
1347vprint(label, vp)
1348 char *label;
1349 register struct vnode *vp;
1350{
f2f730c6 1351 char buf[64];
0bf84b18
KM
1352
1353 if (label != NULL)
1354 printf("%s: ", label);
3e787e54 1355 printf("num %d ", vp->v_spare[0]);
65c3b3a8
KM
1356 printf("type %s, usecount %d, writecount %d, refcount %d,",
1357 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1358 vp->v_holdcnt);
f2f730c6
KM
1359 buf[0] = '\0';
1360 if (vp->v_flag & VROOT)
1361 strcat(buf, "|VROOT");
1362 if (vp->v_flag & VTEXT)
1363 strcat(buf, "|VTEXT");
36ef03ec
KM
1364 if (vp->v_flag & VSYSTEM)
1365 strcat(buf, "|VSYSTEM");
36ef03ec
KM
1366 if (vp->v_flag & VXLOCK)
1367 strcat(buf, "|VXLOCK");
1368 if (vp->v_flag & VXWANT)
1369 strcat(buf, "|VXWANT");
f2f730c6
KM
1370 if (vp->v_flag & VBWAIT)
1371 strcat(buf, "|VBWAIT");
36ef03ec
KM
1372 if (vp->v_flag & VALIASED)
1373 strcat(buf, "|VALIASED");
f2f730c6
KM
1374 if (buf[0] != '\0')
1375 printf(" flags (%s)", &buf[1]);
3fc2ac18
KM
1376 if (vp->v_data == NULL) {
1377 printf("\n");
1378 } else {
1379 printf("\n\t");
1380 VOP_PRINT(vp);
1381 }
0bf84b18 1382}
985cbdd5 1383
34c62e18
KM
1384#ifdef DEBUG
1385/*
1386 * List all of the locked vnodes in the system.
1387 * Called when debugging the kernel.
1388 */
05560902 1389void
34c62e18
KM
1390printlockedvnodes()
1391{
1392 register struct mount *mp;
1393 register struct vnode *vp;
1394
1395 printf("Locked vnodes\n");
29330550
KM
1396 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1397 mp = mp->mnt_list.cqe_next) {
3fc2ac18
KM
1398 for (vp = mp->mnt_vnodelist.lh_first;
1399 vp != NULL;
29330550 1400 vp = vp->v_mntvnodes.le_next) {
34c62e18
KM
1401 if (VOP_ISLOCKED(vp))
1402 vprint((char *)0, vp);
29330550 1403 }
3fc2ac18 1404 }
34c62e18
KM
1405}
1406#endif
1407
597259be
KM
1408/*
1409 * Top level filesystem related information gathering.
1410 */
1411int
1412vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1413 int *name;
1414 u_int namelen;
1415 void *oldp;
1416 size_t *oldlenp;
1417 void *newp;
1418 size_t newlen;
1419 struct proc *p;
1420{
1421 struct ctldebug *cdp;
1422 struct vfsconf *vfsp;
1423
1424 /* all sysctl names at this level are at least name and field */
1425 if (namelen < 2)
1426 return (ENOTDIR); /* overloaded */
1427 if (name[0] != VFS_GENERIC) {
1428 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1429 if (vfsp->vfc_typenum == name[0])
1430 break;
1431 if (vfsp == NULL)
1432 return (EOPNOTSUPP);
1433 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1434 oldp, oldlenp, newp, newlen, p));
1435 }
1436 switch (name[1]) {
1437 case VFS_MAXTYPENUM:
1438 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1439 case VFS_CONF:
1440 if (namelen < 3)
1441 return (ENOTDIR); /* overloaded */
1442 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1443 if (vfsp->vfc_typenum == name[2])
1444 break;
1445 if (vfsp == NULL)
1446 return (EOPNOTSUPP);
1447 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1448 sizeof(struct vfsconf)));
1449 }
1450 return (EOPNOTSUPP);
1451}
1452
985cbdd5
MT
1453int kinfo_vdebug = 1;
1454int kinfo_vgetfailed;
1455#define KINFO_VNODESLOP 10
1456/*
786fb484 1457 * Dump vnode list (via sysctl).
985cbdd5
MT
1458 * Copyout address of vnode followed by vnode.
1459 */
aacc1bff 1460/* ARGSUSED */
05560902 1461int
786fb484 1462sysctl_vnode(where, sizep)
985cbdd5 1463 char *where;
c1909da4 1464 size_t *sizep;
985cbdd5 1465{
3fc2ac18 1466 register struct mount *mp, *nmp;
662d4423 1467 struct vnode *nvp, *vp;
985cbdd5 1468 register char *bp = where, *savebp;
5bf57294 1469 char *ewhere;
985cbdd5
MT
1470 int error;
1471
1472#define VPTRSZ sizeof (struct vnode *)
1473#define VNODESZ sizeof (struct vnode)
1474 if (where == NULL) {
786fb484 1475 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
985cbdd5
MT
1476 return (0);
1477 }
786fb484 1478 ewhere = where + *sizep;
985cbdd5 1479
29330550
KM
1480 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1481 nmp = mp->mnt_list.cqe_next;
3fc2ac18 1482 if (vfs_busy(mp))
36ef03ec 1483 continue;
985cbdd5
MT
1484 savebp = bp;
1485again:
662d4423 1486 simple_lock(&mntvnode_slock);
3fc2ac18
KM
1487 for (vp = mp->mnt_vnodelist.lh_first;
1488 vp != NULL;
662d4423 1489 vp = nvp) {
41185b3b
KM
1490 /*
1491 * Check that the vp is still associated with
1492 * this filesystem. RACE: could have been
1493 * recycled onto the same filesystem.
1494 */
4597dd33 1495 if (vp->v_mount != mp) {
662d4423 1496 simple_unlock(&mntvnode_slock);
4597dd33
KM
1497 if (kinfo_vdebug)
1498 printf("kinfo: vp changed\n");
1499 bp = savebp;
1500 goto again;
1501 }
662d4423 1502 nvp = vp->v_mntvnodes.le_next;
786fb484 1503 if (bp + VPTRSZ + VNODESZ > ewhere) {
662d4423 1504 simple_unlock(&mntvnode_slock);
786fb484
KM
1505 *sizep = bp - where;
1506 return (ENOMEM);
1507 }
662d4423 1508 simple_unlock(&mntvnode_slock);
786fb484
KM
1509 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1510 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
985cbdd5 1511 return (error);
985cbdd5 1512 bp += VPTRSZ + VNODESZ;
662d4423 1513 simple_lock(&mntvnode_slock);
985cbdd5 1514 }
662d4423 1515 simple_unlock(&mntvnode_slock);
3fc2ac18
KM
1516 vfs_unbusy(mp);
1517 }
985cbdd5 1518
786fb484 1519 *sizep = bp - where;
985cbdd5
MT
1520 return (0);
1521}
8981e258
MH
1522
1523/*
1524 * Check to see if a filesystem is mounted on a block device.
1525 */
1526int
1527vfs_mountedon(vp)
662d4423 1528 struct vnode *vp;
8981e258 1529{
662d4423
KM
1530 struct vnode *vq;
1531 int error = 0;
8981e258
MH
1532
1533 if (vp->v_specflags & SI_MOUNTEDON)
1534 return (EBUSY);
1535 if (vp->v_flag & VALIASED) {
662d4423 1536 simple_lock(&spechash_slock);
8981e258
MH
1537 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1538 if (vq->v_rdev != vp->v_rdev ||
1539 vq->v_type != vp->v_type)
1540 continue;
662d4423
KM
1541 if (vq->v_specflags & SI_MOUNTEDON) {
1542 error = EBUSY;
1543 break;
1544 }
8981e258 1545 }
662d4423 1546 simple_unlock(&spechash_slock);
8981e258 1547 }
662d4423 1548 return (error);
8981e258
MH
1549}
1550
29330550
KM
1551/*
1552 * Unmount all filesystems. The list is traversed in reverse order
1553 * of mounting to avoid dependencies.
1554 */
1555void
1556vfs_unmountall()
1557{
1558 struct mount *mp, *nmp;
1559
1560 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1561 nmp = mp->mnt_list.cqe_prev;
1562 (void) dounmount(mp, MNT_FORCE, &proc0);
1563 }
1564}
1565
8981e258
MH
1566/*
1567 * Build hash lists of net addresses and hang them off the mount point.
1568 * Called by ufs_mount() to set up the lists of export addresses.
1569 */
1570static int
1571vfs_hang_addrlist(mp, nep, argp)
1572 struct mount *mp;
1573 struct netexport *nep;
1574 struct export_args *argp;
1575{
1576 register struct netcred *np;
1577 register struct radix_node_head *rnh;
1578 register int i;
1579 struct radix_node *rn;
1580 struct sockaddr *saddr, *smask = 0;
1581 struct domain *dom;
1582 int error;
1583
1584 if (argp->ex_addrlen == 0) {
1585 if (mp->mnt_flag & MNT_DEFEXPORTED)
1586 return (EPERM);
1587 np = &nep->ne_defexported;
1588 np->netc_exflags = argp->ex_flags;
1589 np->netc_anon = argp->ex_anon;
1590 np->netc_anon.cr_ref = 1;
1591 mp->mnt_flag |= MNT_DEFEXPORTED;
1592 return (0);
1593 }
1594 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1595 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1596 bzero((caddr_t)np, i);
1597 saddr = (struct sockaddr *)(np + 1);
1598 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1599 goto out;
1600 if (saddr->sa_len > argp->ex_addrlen)
1601 saddr->sa_len = argp->ex_addrlen;
1602 if (argp->ex_masklen) {
1603 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1604 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1605 if (error)
1606 goto out;
1607 if (smask->sa_len > argp->ex_masklen)
1608 smask->sa_len = argp->ex_masklen;
1609 }
1610 i = saddr->sa_family;
1611 if ((rnh = nep->ne_rtable[i]) == 0) {
1612 /*
1613 * Seems silly to initialize every AF when most are not
1614 * used, do so on demand here
1615 */
1616 for (dom = domains; dom; dom = dom->dom_next)
1617 if (dom->dom_family == i && dom->dom_rtattach) {
1618 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1619 dom->dom_rtoffset);
1620 break;
1621 }
1622 if ((rnh = nep->ne_rtable[i]) == 0) {
1623 error = ENOBUFS;
1624 goto out;
1625 }
1626 }
1627 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1628 np->netc_rnodes);
b398c713
KM
1629 if (rn == 0) {
1630 /*
1631 * One of the reasons that rnh_addaddr may fail is that
1632 * the entry already exists. To check for this case, we
1633 * look up the entry to see if it is there. If so, we
1634 * do not need to make a new entry but do return success.
1635 */
1636 free(np, M_NETADDR);
1637 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
1638 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
1639 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
1640 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
1641 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
1642 return (0);
1643 return (EPERM);
8981e258
MH
1644 }
1645 np->netc_exflags = argp->ex_flags;
1646 np->netc_anon = argp->ex_anon;
1647 np->netc_anon.cr_ref = 1;
1648 return (0);
1649out:
1650 free(np, M_NETADDR);
1651 return (error);
1652}
1653
1654/* ARGSUSED */
1655static int
1656vfs_free_netcred(rn, w)
1657 struct radix_node *rn;
1658 caddr_t w;
1659{
1660 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1661
1662 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1663 free((caddr_t)rn, M_NETADDR);
1664 return (0);
1665}
05560902 1666
8981e258
MH
1667/*
1668 * Free the net address hash lists that are hanging off the mount points.
1669 */
1670static void
1671vfs_free_addrlist(nep)
1672 struct netexport *nep;
1673{
1674 register int i;
1675 register struct radix_node_head *rnh;
1676
1677 for (i = 0; i <= AF_MAX; i++)
1678 if (rnh = nep->ne_rtable[i]) {
1679 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1680 (caddr_t)rnh);
1681 free((caddr_t)rnh, M_RTABLE);
1682 nep->ne_rtable[i] = 0;
1683 }
1684}
1685
1686int
1687vfs_export(mp, nep, argp)
1688 struct mount *mp;
1689 struct netexport *nep;
1690 struct export_args *argp;
1691{
1692 int error;
1693
1694 if (argp->ex_flags & MNT_DELEXPORT) {
1695 vfs_free_addrlist(nep);
1696 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1697 }
1698 if (argp->ex_flags & MNT_EXPORTED) {
1699 if (error = vfs_hang_addrlist(mp, nep, argp))
1700 return (error);
1701 mp->mnt_flag |= MNT_EXPORTED;
1702 }
1703 return (0);
1704}
1705
1706struct netcred *
1707vfs_export_lookup(mp, nep, nam)
1708 register struct mount *mp;
1709 struct netexport *nep;
1710 struct mbuf *nam;
1711{
1712 register struct netcred *np;
1713 register struct radix_node_head *rnh;
1714 struct sockaddr *saddr;
1715
1716 np = NULL;
1717 if (mp->mnt_flag & MNT_EXPORTED) {
1718 /*
1719 * Lookup in the export list first.
1720 */
1721 if (nam != NULL) {
1722 saddr = mtod(nam, struct sockaddr *);
1723 rnh = nep->ne_rtable[saddr->sa_family];
1724 if (rnh != NULL) {
1725 np = (struct netcred *)
1726 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1727 rnh);
1728 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1729 np = NULL;
1730 }
1731 }
1732 /*
1733 * If no address match, use the default if it exists.
1734 */
1735 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1736 np = &nep->ne_defexported;
1737 }
1738 return (np);
1739}