drain, deallocate and unlock map in vm_map_deallocate
[unix-history] / usr / src / sys / kern / vfs_subr.c
CommitLineData
3c4390e8 1/*
ec54f0cc
KB
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
adb35f79
KB
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
3c4390e8 9 *
dbf0c423 10 * %sccs.include.redist.c%
3c4390e8 11 *
662d4423 12 * @(#)vfs_subr.c 8.24 (Berkeley) %G%
3c4390e8
KM
13 */
14
15/*
16 * External virtual filesystem routines
17 */
18
cb796a23 19#include <sys/param.h>
917dc539 20#include <sys/systm.h>
cb796a23
KB
21#include <sys/proc.h>
22#include <sys/mount.h>
23#include <sys/time.h>
24#include <sys/vnode.h>
807cc430 25#include <sys/stat.h>
cb796a23
KB
26#include <sys/namei.h>
27#include <sys/ucred.h>
28#include <sys/buf.h>
29#include <sys/errno.h>
30#include <sys/malloc.h>
8981e258
MH
31#include <sys/domain.h>
32#include <sys/mbuf.h>
3c4390e8 33
bb4964fd
KM
34#include <vm/vm.h>
35#include <sys/sysctl.h>
36
021de758
JSP
37#include <miscfs/specfs/specdev.h>
38
807cc430
KM
39enum vtype iftovt_tab[16] = {
40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
42};
43int vttoif_tab[9] = {
44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
45 S_IFSOCK, S_IFIFO, S_IFMT,
46};
47
e3249ec0
KM
48/*
49 * Insq/Remq for the vnode usage lists.
50 */
3fc2ac18 51#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
05560902
CD
52#define bufremvn(bp) { \
53 LIST_REMOVE(bp, b_vnbufs); \
54 (bp)->b_vnbufs.le_next = NOLIST; \
3fc2ac18 55}
3fc2ac18
KM
56TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
57struct mntlist mountlist; /* mounted filesystem list */
662d4423
KM
58static struct simplelock mntid_slock;
59struct simplelock mntvnode_slock;
60static struct simplelock spechash_slock;
61static struct simplelock vnode_free_list_slock;
e3249ec0 62
3c4390e8 63/*
3fc2ac18 64 * Initialize the vnode management data structures.
3c4390e8 65 */
05560902 66void
3fc2ac18 67vntblinit()
3c4390e8
KM
68{
69
662d4423
KM
70 simple_lock_init(&mntvnode_slock);
71 simple_lock_init(&mntid_slock);
72 simple_lock_init(&spechash_slock);
3fc2ac18 73 TAILQ_INIT(&vnode_free_list);
662d4423 74 simple_lock_init(&vnode_free_list_slock);
29330550 75 CIRCLEQ_INIT(&mountlist);
3c4390e8
KM
76}
77
78/*
79 * Lock a filesystem.
80 * Used to prevent access to it while mounting and unmounting.
81 */
05560902 82int
3c4390e8
KM
83vfs_lock(mp)
84 register struct mount *mp;
85{
86
05560902 87 while (mp->mnt_flag & MNT_MLOCK) {
54fb9dc2 88 mp->mnt_flag |= MNT_MWAIT;
05560902 89 tsleep((caddr_t)mp, PVFS, "vfslock", 0);
594501df 90 }
54fb9dc2 91 mp->mnt_flag |= MNT_MLOCK;
3c4390e8
KM
92 return (0);
93}
94
95/*
96 * Unlock a locked filesystem.
97 * Panic if filesystem is not locked.
98 */
99void
100vfs_unlock(mp)
101 register struct mount *mp;
102{
103
54fb9dc2 104 if ((mp->mnt_flag & MNT_MLOCK) == 0)
36ef03ec 105 panic("vfs_unlock: not locked");
54fb9dc2
KM
106 mp->mnt_flag &= ~MNT_MLOCK;
107 if (mp->mnt_flag & MNT_MWAIT) {
108 mp->mnt_flag &= ~MNT_MWAIT;
3c4390e8
KM
109 wakeup((caddr_t)mp);
110 }
111}
112
36ef03ec
KM
113/*
114 * Mark a mount point as busy.
115 * Used to synchronize access and to delay unmounting.
116 */
05560902 117int
36ef03ec
KM
118vfs_busy(mp)
119 register struct mount *mp;
120{
121
05560902 122 while (mp->mnt_flag & MNT_MPBUSY) {
54fb9dc2 123 mp->mnt_flag |= MNT_MPWANT;
05560902 124 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0);
36ef03ec 125 }
d8b63609
KM
126 if (mp->mnt_flag & MNT_UNMOUNT)
127 return (1);
54fb9dc2 128 mp->mnt_flag |= MNT_MPBUSY;
36ef03ec
KM
129 return (0);
130}
131
132/*
133 * Free a busy filesystem.
134 * Panic if filesystem is not busy.
135 */
05560902 136void
36ef03ec
KM
137vfs_unbusy(mp)
138 register struct mount *mp;
139{
140
54fb9dc2 141 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
36ef03ec 142 panic("vfs_unbusy: not busy");
54fb9dc2
KM
143 mp->mnt_flag &= ~MNT_MPBUSY;
144 if (mp->mnt_flag & MNT_MPWANT) {
145 mp->mnt_flag &= ~MNT_MPWANT;
146 wakeup((caddr_t)&mp->mnt_flag);
36ef03ec
KM
147 }
148}
149
83b7e676
KM
150/*
151 * Lookup a filesystem type, and if found allocate and initialize
152 * a mount structure for it.
153 *
154 * Devname is usually updated by mount(8) after booting.
155 */
156int
157vfs_rootmountalloc(fstypename, devname, mpp)
158 char *fstypename;
159 char *devname;
160 struct mount **mpp;
161{
162 struct vfsconf *vfsp;
163 struct mount *mp;
164
165 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
166 if (!strcmp(vfsp->vfc_name, fstypename))
167 break;
168 if (vfsp == NULL)
169 return (ENODEV);
170 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
171 bzero((char *)mp, (u_long)sizeof(struct mount));
172 LIST_INIT(&mp->mnt_vnodelist);
173 mp->mnt_vfc = vfsp;
174 mp->mnt_op = vfsp->vfc_vfsops;
175 mp->mnt_flag = MNT_RDONLY;
176 mp->mnt_vnodecovered = NULLVP;
177 vfsp->vfc_refcount++;
178 mp->mnt_stat.f_type = vfsp->vfc_typenum;
179 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
180 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
181 mp->mnt_stat.f_mntonname[0] = '/';
182 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
183 *mpp = mp;
184 return (0);
185}
186
187/*
188 * Find an appropriate filesystem to use for the root. If a filesystem
189 * has not been preselected, walk through the list of known filesystems
190 * trying those that have mountroot routines, and try them until one
191 * works or we have tried them all.
192 */
193int
194vfs_mountroot()
195{
196 struct vfsconf *vfsp;
197 extern int (*mountroot)(void);
198 int error;
199
200 if (mountroot != NULL)
201 return ((*vfsp->vfc_mountroot)());
202 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
203 if (vfsp->vfc_mountroot == NULL)
204 continue;
205 if ((error = (*vfsp->vfc_mountroot)()) == 0)
206 return (0);
207 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
208 }
209 return (ENODEV);
210}
211
3c4390e8
KM
212/*
213 * Lookup a mount point by filesystem identifier.
214 */
215struct mount *
597259be 216vfs_getvfs(fsid)
3c4390e8
KM
217 fsid_t *fsid;
218{
219 register struct mount *mp;
220
29330550
KM
221 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
222 mp = mp->mnt_list.cqe_next) {
54fb9dc2 223 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
3fc2ac18 224 mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
d713f801 225 return (mp);
3fc2ac18 226 }
d713f801 227 return ((struct mount *)0);
3c4390e8
KM
228}
229
917dc539
JSP
230/*
231 * Get a new unique fsid
232 */
233void
597259be 234vfs_getnewfsid(mp)
917dc539 235 struct mount *mp;
917dc539
JSP
236{
237static u_short xxxfs_mntid;
238
239 fsid_t tfsid;
597259be 240 int mtype;
917dc539 241
662d4423 242 simple_lock(&mntid_slock);
597259be 243 mtype = mp->mnt_vfc->vfc_typenum;
1209b9a4 244 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
917dc539
JSP
245 mp->mnt_stat.f_fsid.val[1] = mtype;
246 if (xxxfs_mntid == 0)
247 ++xxxfs_mntid;
1209b9a4 248 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
917dc539 249 tfsid.val[1] = mtype;
29330550 250 if (mountlist.cqh_first != (void *)&mountlist) {
597259be 251 while (vfs_getvfs(&tfsid)) {
17fd1cc7
JSP
252 tfsid.val[0]++;
253 xxxfs_mntid++;
254 }
917dc539
JSP
255 }
256 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
662d4423 257 simple_unlock(&mntid_slock);
917dc539
JSP
258}
259
3c4390e8
KM
260/*
261 * Set vnode attributes to VNOVAL
262 */
05560902
CD
263void
264vattr_null(vap)
3c4390e8
KM
265 register struct vattr *vap;
266{
267
268 vap->va_type = VNON;
83504fd5 269 vap->va_size = vap->va_bytes = VNOVAL;
3c4390e8 270 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
83504fd5
KM
271 vap->va_fsid = vap->va_fileid =
272 vap->va_blocksize = vap->va_rdev =
ecf75a7d
KM
273 vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
274 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
275 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
8cf4d4fb 276 vap->va_flags = vap->va_gen = VNOVAL;
fcba749b 277 vap->va_vaflags = 0;
3c4390e8 278}
c60798ca 279
36d09cb1
KM
280/*
281 * Routines having to do with the management of the vnode table.
282 */
9342689a 283extern int (**dead_vnodeop_p)();
662d4423
KM
284static void vclean __P((struct vnode *vp, int flag, struct proc *p));
285extern void vgonel __P((struct vnode *vp, struct proc *p));
1a80f56e 286long numvnodes;
e781da98 287extern struct vattr va_null;
3e787e54
KM
288int newnodes = 0;
289int printcnt = 0;
36d09cb1
KM
290
291/*
292 * Return the next vnode from the free list.
293 */
05560902 294int
36d09cb1
KM
295getnewvnode(tag, mp, vops, vpp)
296 enum vtagtype tag;
297 struct mount *mp;
cf74dd57 298 int (**vops)();
36d09cb1
KM
299 struct vnode **vpp;
300{
662d4423
KM
301 struct proc *p = curproc; /* XXX */
302 struct vnode *vp;
1f9d2249 303 int s;
662d4423 304 int cnt;
36d09cb1 305
662d4423
KM
306top:
307 simple_lock(&vnode_free_list_slock);
3e787e54 308newnodes++;
3fc2ac18
KM
309 if ((vnode_free_list.tqh_first == NULL &&
310 numvnodes < 2 * desiredvnodes) ||
ecf75a7d 311 numvnodes < desiredvnodes) {
662d4423 312 simple_unlock(&vnode_free_list_slock);
aacc1bff
KM
313 vp = (struct vnode *)malloc((u_long)sizeof *vp,
314 M_VNODE, M_WAITOK);
1a80f56e 315 bzero((char *)vp, sizeof *vp);
3e787e54
KM
316 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
317 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
318 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
319 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
1a80f56e 320 numvnodes++;
3e787e54 321 vp->v_spare[0] = numvnodes;
1a80f56e 322 } else {
662d4423
KM
323 for (vp = vnode_free_list.tqh_first;
324 vp != NULLVP; vp = vp->v_freelist.tqe_next) {
325 if (simple_lock_try(&vp->v_interlock))
326 break;
327 }
328 /*
329 * Unless this is a bad time of the month, at most
330 * the first NCPUS items on the free list are
331 * locked, so this is close enough to being empty.
332 */
333 if (vp == NULLVP) {
334 simple_unlock(&vnode_free_list_slock);
1a80f56e
KM
335 tablefull("vnode");
336 *vpp = 0;
337 return (ENFILE);
338 }
339 if (vp->v_usecount)
340 panic("free vnode isn't");
3e787e54
KM
341 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
342 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
343 panic("getnewvnode: not on queue");
3fc2ac18 344 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
3e787e54 345 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
0bf9bb76
KM
346 /* see comment on why 0xdeadb is set at end of vgone (below) */
347 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
662d4423 348 simple_unlock(&vnode_free_list_slock);
39b99eb6 349 vp->v_lease = NULL;
1a80f56e 350 if (vp->v_type != VBAD)
662d4423
KM
351 vgonel(vp, p);
352 else
353 simple_unlock(&vp->v_interlock);
1f9d2249 354#ifdef DIAGNOSTIC
2345b093
KM
355 if (vp->v_data)
356 panic("cleaned vnode isn't");
1f9d2249
MS
357 s = splbio();
358 if (vp->v_numoutput)
359 panic("Clean vnode has pending I/O's");
360 splx(s);
361#endif
1a80f56e 362 vp->v_flag = 0;
1a80f56e 363 vp->v_lastr = 0;
2b5ada11
MH
364 vp->v_ralen = 0;
365 vp->v_maxra = 0;
1f9d2249
MS
366 vp->v_lastw = 0;
367 vp->v_lasta = 0;
368 vp->v_cstart = 0;
369 vp->v_clen = 0;
1a80f56e 370 vp->v_socket = 0;
36d09cb1 371 }
b027498b 372 vp->v_type = VNON;
36d09cb1
KM
373 cache_purge(vp);
374 vp->v_tag = tag;
ef24f6dd 375 vp->v_op = vops;
36d09cb1 376 insmntque(vp, mp);
36d09cb1 377 *vpp = vp;
0bf9bb76 378 vp->v_usecount = 1;
3fc2ac18 379 vp->v_data = 0;
3e787e54 380 if (printcnt-- > 0) vprint("getnewvnode got", vp);
36d09cb1
KM
381 return (0);
382}
8981e258 383
36d09cb1
KM
384/*
385 * Move a vnode from one mount queue to another.
386 */
05560902 387void
36d09cb1 388insmntque(vp, mp)
662d4423
KM
389 struct vnode *vp;
390 struct mount *mp;
36d09cb1 391{
36d09cb1 392
662d4423 393 simple_lock(&mntvnode_slock);
36d09cb1
KM
394 /*
395 * Delete from old mount point vnode list, if on one.
396 */
3e787e54
KM
397 if (vp->v_mount != NULL) {
398 if (vp->v_mntvnodes.le_next == (struct vnode *)0xdeadf ||
399 vp->v_mntvnodes.le_prev == (struct vnode **)0xdeadb)
400 panic("insmntque: not on queue");
3fc2ac18 401 LIST_REMOVE(vp, v_mntvnodes);
3e787e54
KM
402 vp->v_mntvnodes.le_next = (struct vnode *)0xdeadf;
403 vp->v_mntvnodes.le_prev = (struct vnode **)0xdeadb;
404 }
36d09cb1
KM
405 /*
406 * Insert into list of vnodes for the new mount point, if available.
407 */
662d4423
KM
408 if ((vp->v_mount = mp) != NULL)
409 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
410 simple_unlock(&mntvnode_slock);
36d09cb1
KM
411}
412
76429560
KM
413/*
414 * Update outstanding I/O count and do wakeup if requested.
415 */
05560902 416void
76429560
KM
417vwakeup(bp)
418 register struct buf *bp;
419{
420 register struct vnode *vp;
421
a9338fad 422 bp->b_flags &= ~B_WRITEINPROG;
76429560 423 if (vp = bp->b_vp) {
05560902 424 if (--vp->v_numoutput < 0)
1f9d2249 425 panic("vwakeup: neg numoutput");
76429560
KM
426 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
427 if (vp->v_numoutput < 0)
05560902 428 panic("vwakeup: neg numoutput 2");
76429560
KM
429 vp->v_flag &= ~VBWAIT;
430 wakeup((caddr_t)&vp->v_numoutput);
431 }
432 }
433}
434
76429560
KM
435/*
436 * Flush out and invalidate all buffers associated with a vnode.
437 * Called with the underlying object locked.
438 */
d024c2ce 439int
c33e9e8b 440vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
76429560 441 register struct vnode *vp;
12079a9d 442 int flags;
d024c2ce
KM
443 struct ucred *cred;
444 struct proc *p;
c33e9e8b 445 int slpflag, slptimeo;
76429560
KM
446{
447 register struct buf *bp;
448 struct buf *nbp, *blist;
d024c2ce 449 int s, error;
76429560 450
12079a9d 451 if (flags & V_SAVE) {
d024c2ce
KM
452 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
453 return (error);
3fc2ac18 454 if (vp->v_dirtyblkhd.lh_first != NULL)
d024c2ce
KM
455 panic("vinvalbuf: dirty bufs");
456 }
76429560 457 for (;;) {
3fc2ac18 458 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
12079a9d 459 while (blist && blist->b_lblkno < 0)
3fc2ac18 460 blist = blist->b_vnbufs.le_next;
05560902 461 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
e3249ec0 462 (flags & V_SAVEMETA))
12079a9d 463 while (blist && blist->b_lblkno < 0)
3fc2ac18 464 blist = blist->b_vnbufs.le_next;
12079a9d 465 if (!blist)
76429560 466 break;
12079a9d 467
76429560 468 for (bp = blist; bp; bp = nbp) {
3fc2ac18 469 nbp = bp->b_vnbufs.le_next;
12079a9d
MS
470 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
471 continue;
76429560
KM
472 s = splbio();
473 if (bp->b_flags & B_BUSY) {
474 bp->b_flags |= B_WANTED;
c33e9e8b
KM
475 error = tsleep((caddr_t)bp,
476 slpflag | (PRIBIO + 1), "vinvalbuf",
477 slptimeo);
76429560 478 splx(s);
c33e9e8b
KM
479 if (error)
480 return (error);
76429560
KM
481 break;
482 }
483 bremfree(bp);
484 bp->b_flags |= B_BUSY;
485 splx(s);
c33e9e8b
KM
486 /*
487 * XXX Since there are no node locks for NFS, I believe
488 * there is a slight chance that a delayed write will
489 * occur while sleeping just above, so check for it.
490 */
491 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
492 (void) VOP_BWRITE(bp);
493 break;
494 }
12079a9d 495 bp->b_flags |= B_INVAL;
76429560
KM
496 brelse(bp);
497 }
498 }
e3249ec0 499 if (!(flags & V_SAVEMETA) &&
3fc2ac18 500 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
76429560 501 panic("vinvalbuf: flush failed");
d024c2ce 502 return (0);
76429560
KM
503}
504
505/*
506 * Associate a buffer with a vnode.
507 */
05560902 508void
76429560
KM
509bgetvp(vp, bp)
510 register struct vnode *vp;
511 register struct buf *bp;
512{
513
514 if (bp->b_vp)
515 panic("bgetvp: not free");
516 VHOLD(vp);
517 bp->b_vp = vp;
518 if (vp->v_type == VBLK || vp->v_type == VCHR)
519 bp->b_dev = vp->v_rdev;
520 else
521 bp->b_dev = NODEV;
522 /*
523 * Insert onto list for new vnode.
524 */
e3249ec0 525 bufinsvn(bp, &vp->v_cleanblkhd);
76429560
KM
526}
527
528/*
529 * Disassociate a buffer from a vnode.
530 */
05560902 531void
76429560
KM
532brelvp(bp)
533 register struct buf *bp;
534{
76429560
KM
535 struct vnode *vp;
536
537 if (bp->b_vp == (struct vnode *) 0)
538 panic("brelvp: NULL");
539 /*
540 * Delete from old vnode list, if on one.
541 */
3fc2ac18 542 if (bp->b_vnbufs.le_next != NOLIST)
e3249ec0 543 bufremvn(bp);
76429560
KM
544 vp = bp->b_vp;
545 bp->b_vp = (struct vnode *) 0;
546 HOLDRELE(vp);
547}
548
549/*
550 * Reassign a buffer from one vnode to another.
551 * Used to assign file specific control information
552 * (indirect blocks) to the vnode to which they belong.
553 */
05560902 554void
76429560
KM
555reassignbuf(bp, newvp)
556 register struct buf *bp;
557 register struct vnode *newvp;
558{
3fc2ac18 559 register struct buflists *listheadp;
76429560 560
e5c3f16e
KM
561 if (newvp == NULL) {
562 printf("reassignbuf: NULL");
563 return;
564 }
76429560
KM
565 /*
566 * Delete from old vnode list, if on one.
567 */
3fc2ac18 568 if (bp->b_vnbufs.le_next != NOLIST)
e3249ec0 569 bufremvn(bp);
76429560
KM
570 /*
571 * If dirty, put on list of dirty buffers;
572 * otherwise insert onto list of clean buffers.
573 */
574 if (bp->b_flags & B_DELWRI)
575 listheadp = &newvp->v_dirtyblkhd;
576 else
577 listheadp = &newvp->v_cleanblkhd;
e3249ec0 578 bufinsvn(bp, listheadp);
76429560
KM
579}
580
36d09cb1 581/*
ef24f6dd
KM
582 * Create a vnode for a block device.
583 * Used for root filesystem, argdev, and swap areas.
584 * Also used for memory file system special devices.
585 */
05560902 586int
ef24f6dd
KM
587bdevvp(dev, vpp)
588 dev_t dev;
589 struct vnode **vpp;
590{
ef24f6dd
KM
591 register struct vnode *vp;
592 struct vnode *nvp;
593 int error;
594
83b7e676
KM
595 if (dev == NODEV) {
596 *vpp = NULLVP;
597 return (ENODEV);
598 }
9342689a 599 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
ef24f6dd 600 if (error) {
05560902 601 *vpp = NULLVP;
ef24f6dd
KM
602 return (error);
603 }
604 vp = nvp;
605 vp->v_type = VBLK;
c0de8792 606 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
ef24f6dd
KM
607 vput(vp);
608 vp = nvp;
609 }
610 *vpp = vp;
611 return (0);
612}
613
614/*
615 * Check to see if the new vnode represents a special device
616 * for which we already have a vnode (either because of
617 * bdevvp() or because of a different vnode representing
618 * the same block device). If such an alias exists, deallocate
f0556f86 619 * the existing contents and return the aliased vnode. The
ef24f6dd
KM
620 * caller is responsible for filling it with its new contents.
621 */
622struct vnode *
c0de8792 623checkalias(nvp, nvp_rdev, mp)
ef24f6dd 624 register struct vnode *nvp;
c0de8792 625 dev_t nvp_rdev;
ef24f6dd
KM
626 struct mount *mp;
627{
662d4423
KM
628 struct proc *p = curproc; /* XXX */
629 struct vnode *vp;
c0de8792 630 struct vnode **vpp;
ef24f6dd
KM
631
632 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
54fb9dc2 633 return (NULLVP);
c0de8792
KM
634
635 vpp = &speclisth[SPECHASH(nvp_rdev)];
ef24f6dd 636loop:
662d4423 637 simple_lock(&spechash_slock);
c0de8792
KM
638 for (vp = *vpp; vp; vp = vp->v_specnext) {
639 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
ef24f6dd 640 continue;
c0de8792
KM
641 /*
642 * Alias, but not in use, so flush it out.
643 */
662d4423 644 simple_lock(&vp->v_interlock);
7f7b7d89 645 if (vp->v_usecount == 0) {
662d4423
KM
646 simple_unlock(&spechash_slock);
647 vgonel(vp, p);
c0de8792
KM
648 goto loop;
649 }
662d4423
KM
650 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
651 simple_unlock(&spechash_slock);
ef62830d 652 goto loop;
662d4423 653 }
ef24f6dd
KM
654 break;
655 }
c0de8792 656 if (vp == NULL || vp->v_tag != VT_NON) {
c0de8792
KM
657 MALLOC(nvp->v_specinfo, struct specinfo *,
658 sizeof(struct specinfo), M_VNODE, M_WAITOK);
659 nvp->v_rdev = nvp_rdev;
7f7b7d89 660 nvp->v_hashchain = vpp;
c0de8792 661 nvp->v_specnext = *vpp;
2c957a90 662 nvp->v_specflags = 0;
662d4423 663 simple_unlock(&spechash_slock);
c0de8792 664 *vpp = nvp;
662d4423 665 if (vp != NULLVP) {
40452d5e
KM
666 nvp->v_flag |= VALIASED;
667 vp->v_flag |= VALIASED;
668 vput(vp);
669 }
54fb9dc2 670 return (NULLVP);
ef24f6dd 671 }
662d4423
KM
672 simple_unlock(&spechash_slock);
673 VOP_UNLOCK(vp, 0, p);
674 simple_lock(&vp->v_interlock);
675 vclean(vp, 0, p);
ef24f6dd
KM
676 vp->v_op = nvp->v_op;
677 vp->v_tag = nvp->v_tag;
678 nvp->v_type = VNON;
679 insmntque(vp, mp);
680 return (vp);
681}
682
683/*
684 * Grab a particular vnode from the free list, increment its
685 * reference count and lock it. The vnode lock bit is set the
686 * vnode is being eliminated in vgone. The process is awakened
687 * when the transition is completed, and an error returned to
688 * indicate that the vnode is no longer usable (possibly having
689 * been changed to a new file system type).
36d09cb1 690 */
05560902 691int
662d4423
KM
692vget(vp, flags, p)
693 struct vnode *vp;
694 int flags;
695 struct proc *p;
36d09cb1 696{
36d09cb1 697
9130defb
KM
698 /*
699 * If the vnode is in the process of being cleaned out for
700 * another use, we wait for the cleaning to finish and then
662d4423
KM
701 * return failure. Cleaning is determined by checking that
702 * the VXLOCK flag is set.
9130defb 703 */
662d4423
KM
704 if ((flags & LK_INTERLOCK) == 0)
705 simple_lock(&vp->v_interlock);
706 if (vp->v_flag & VXLOCK) {
ef24f6dd 707 vp->v_flag |= VXWANT;
662d4423 708 simple_unlock(&vp->v_interlock);
05560902 709 tsleep((caddr_t)vp, PINOD, "vget", 0);
662d4423 710 return (ENOENT);
ef24f6dd 711 }
3e787e54
KM
712 if (vp->v_usecount == 0) {
713 if (vp->v_freelist.tqe_next == (struct vnode *)0xdeadf ||
714 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)
715 panic("vget: not on queue");
3fc2ac18 716 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
662d4423
KM
717 simple_unlock(&vnode_free_list_slock);
718 }
3e787e54
KM
719 vp->v_freelist.tqe_next = (struct vnode *)0xdeadf;
720 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
721 }
ec04fc59 722 vp->v_usecount++;
662d4423
KM
723 if (flags & LK_TYPE_MASK)
724 return (vn_lock(vp, flags | LK_INTERLOCK, p));
725 simple_unlock(&vp->v_interlock);
3e787e54 726 if (printcnt-- > 0) vprint("vget got", vp);
ef24f6dd 727 return (0);
36d09cb1
KM
728}
729
d32390ea
KM
730int bug_refs = 0;
731
36d09cb1 732/*
662d4423
KM
733 * Stubs to use when there is no locking to be done on the underlying object.
734 *
735 * Getting a lock just clears the interlock if necessary.
736 */
737int
738vop_nolock(ap)
739 struct vop_lock_args /* {
740 struct vnode *a_vp;
741 int a_flags;
742 struct proc *a_p;
743 } */ *ap;
744{
745 struct vnode *vp = ap->a_vp;
746
747 /*
748 * Since we are not using the lock manager, we must clear
749 * the interlock here.
750 */
751 if (ap->a_flags & LK_INTERLOCK)
752 simple_unlock(&vp->v_interlock);
753 return (0);
754}
755
756/*
757 * Unlock has nothing to do.
758 */
759int
760vop_nounlock(ap)
761 struct vop_unlock_args /* {
762 struct vnode *a_vp;
763 int a_flags;
764 struct proc *a_p;
765 } */ *ap;
766{
767
768 return (0);
769}
770
771/*
772 * Nothing is ever locked.
773 */
774int
775vop_noislocked(ap)
776 struct vop_islocked_args /* {
777 struct vnode *a_vp;
778 } */ *ap;
779{
780
781 return (0);
782}
783
784/*
785 * Vnode reference.
36d09cb1 786 */
05560902
CD
787void
788vref(vp)
36d09cb1
KM
789 struct vnode *vp;
790{
791
662d4423 792 simple_lock(&vp->v_interlock);
ec04fc59
KM
793 if (vp->v_usecount <= 0)
794 panic("vref used where vget required");
3e787e54
KM
795 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
796 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
797 panic("vref: not free");
7f7b7d89 798 vp->v_usecount++;
662d4423 799 simple_unlock(&vp->v_interlock);
3e787e54 800 if (printcnt-- > 0) vprint("vref get", vp);
d32390ea
KM
801 if (vp->v_type != VBLK && curproc)
802 curproc->p_spare[0]++;
803 if (bug_refs)
804 vprint("vref: ");
36d09cb1
KM
805}
806
807/*
808 * vput(), just unlock and vrele()
809 */
05560902
CD
810void
811vput(vp)
662d4423 812 struct vnode *vp;
36d09cb1 813{
662d4423 814 struct proc *p = curproc; /* XXX */
4d1ee2eb 815
662d4423 816 VOP_UNLOCK(vp, 0, p);
36d09cb1
KM
817 vrele(vp);
818}
819
820/*
821 * Vnode release.
822 * If count drops to zero, call inactive routine and return to freelist.
823 */
05560902
CD
824void
825vrele(vp)
662d4423 826 struct vnode *vp;
36d09cb1 827{
662d4423 828 struct proc *p = curproc; /* XXX */
36d09cb1 829
65c3b3a8 830#ifdef DIAGNOSTIC
36d09cb1 831 if (vp == NULL)
ef24f6dd 832 panic("vrele: null vp");
65c3b3a8 833#endif
662d4423 834 simple_lock(&vp->v_interlock);
7f7b7d89 835 vp->v_usecount--;
3e787e54 836 if (printcnt-- > 0) vprint("vrele put", vp);
d32390ea
KM
837 if (vp->v_type != VBLK && curproc)
838 curproc->p_spare[0]--;
839 if (bug_refs)
840 vprint("vref: ");
662d4423
KM
841 if (vp->v_usecount > 0) {
842 simple_unlock(&vp->v_interlock);
36d09cb1 843 return;
662d4423 844 }
65c3b3a8 845#ifdef DIAGNOSTIC
662d4423 846 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
65c3b3a8
KM
847 vprint("vrele: bad ref count", vp);
848 panic("vrele: ref cnt");
849 }
850#endif
dc998e72
KM
851 /*
852 * insert at tail of LRU list
853 */
662d4423 854 simple_lock(&vnode_free_list_slock);
3e787e54
KM
855 if (vp->v_freelist.tqe_next != (struct vnode *)0xdeadf ||
856 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
857 panic("vrele: not free");
3fc2ac18 858 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
662d4423
KM
859 simple_unlock(&vnode_free_list_slock);
860 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
861 VOP_INACTIVE(vp, p);
ef24f6dd
KM
862}
863
662d4423 864#ifdef DIAGNOSTIC
7f7b7d89
KM
865/*
866 * Page or buffer structure gets a reference.
867 */
05560902
CD
868void
869vhold(vp)
7f7b7d89
KM
870 register struct vnode *vp;
871{
872
662d4423 873 simple_lock(&vp->v_interlock);
7f7b7d89 874 vp->v_holdcnt++;
662d4423 875 simple_unlock(&vp->v_interlock);
7f7b7d89
KM
876}
877
878/*
879 * Page or buffer structure frees a reference.
880 */
05560902
CD
881void
882holdrele(vp)
7f7b7d89
KM
883 register struct vnode *vp;
884{
885
662d4423 886 simple_lock(&vp->v_interlock);
7f7b7d89
KM
887 if (vp->v_holdcnt <= 0)
888 panic("holdrele: holdcnt");
889 vp->v_holdcnt--;
662d4423 890 simple_unlock(&vp->v_interlock);
7f7b7d89 891}
662d4423 892#endif /* DIAGNOSTIC */
7f7b7d89 893
f0556f86
KM
894/*
895 * Remove any vnodes in the vnode table belonging to mount point mp.
896 *
897 * If MNT_NOFORCE is specified, there should not be any active ones,
898 * return error if any are found (nb: this is a user error, not a
899 * system error). If MNT_FORCE is specified, detach any active vnodes
900 * that are found.
901 */
8981e258 902#ifdef DIAGNOSTIC
bb4964fd
KM
903int busyprt = 0; /* print out busy vnodes */
904struct ctldebug debug1 = { "busyprt", &busyprt };
8981e258 905#endif
f0556f86 906
05560902 907int
f0556f86
KM
908vflush(mp, skipvp, flags)
909 struct mount *mp;
910 struct vnode *skipvp;
911 int flags;
912{
662d4423
KM
913 struct proc *p = curproc; /* XXX */
914 struct vnode *vp, *nvp;
f0556f86
KM
915 int busy = 0;
916
662d4423 917#ifdef DIAGNOSTIC
54fb9dc2 918 if ((mp->mnt_flag & MNT_MPBUSY) == 0)
36ef03ec 919 panic("vflush: not busy");
662d4423
KM
920#endif
921
922 simple_lock(&mntvnode_slock);
4597dd33 923loop:
3fc2ac18 924 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
4597dd33
KM
925 if (vp->v_mount != mp)
926 goto loop;
3fc2ac18 927 nvp = vp->v_mntvnodes.le_next;
f0556f86
KM
928 /*
929 * Skip over a selected vnode.
f0556f86
KM
930 */
931 if (vp == skipvp)
932 continue;
662d4423
KM
933
934 simple_lock(&vp->v_interlock);
36ef03ec
KM
935 /*
936 * Skip over a vnodes marked VSYSTEM.
937 */
662d4423
KM
938 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
939 simple_unlock(&vp->v_interlock);
36ef03ec 940 continue;
662d4423 941 }
da374605
KM
942 /*
943 * If WRITECLOSE is set, only flush out regular file
944 * vnodes open for writing.
945 */
946 if ((flags & WRITECLOSE) &&
662d4423
KM
947 (vp->v_writecount == 0 || vp->v_type != VREG)) {
948 simple_unlock(&vp->v_interlock);
da374605 949 continue;
662d4423 950 }
f0556f86 951 /*
7f7b7d89 952 * With v_usecount == 0, all we need to do is clear
f0556f86
KM
953 * out the vnode data structures and we are done.
954 */
7f7b7d89 955 if (vp->v_usecount == 0) {
662d4423
KM
956 simple_unlock(&mntvnode_slock);
957 vgonel(vp, p);
958 simple_lock(&mntvnode_slock);
f0556f86
KM
959 continue;
960 }
961 /*
da374605 962 * If FORCECLOSE is set, forcibly close the vnode.
f0556f86
KM
963 * For block or character devices, revert to an
964 * anonymous device. For all other files, just kill them.
965 */
36ef03ec 966 if (flags & FORCECLOSE) {
662d4423 967 simple_unlock(&mntvnode_slock);
f0556f86 968 if (vp->v_type != VBLK && vp->v_type != VCHR) {
662d4423 969 vgonel(vp, p);
f0556f86 970 } else {
662d4423 971 vclean(vp, 0, p);
9342689a 972 vp->v_op = spec_vnodeop_p;
f0556f86
KM
973 insmntque(vp, (struct mount *)0);
974 }
662d4423 975 simple_lock(&mntvnode_slock);
f0556f86
KM
976 continue;
977 }
8981e258 978#ifdef DIAGNOSTIC
f0556f86 979 if (busyprt)
0bf84b18 980 vprint("vflush: busy vnode", vp);
8981e258 981#endif
662d4423 982 simple_unlock(&vp->v_interlock);
f0556f86
KM
983 busy++;
984 }
662d4423 985 simple_unlock(&mntvnode_slock);
f0556f86
KM
986 if (busy)
987 return (EBUSY);
988 return (0);
989}
990
ef24f6dd
KM
991/*
992 * Disassociate the underlying file system from a vnode.
662d4423 993 * The vnode interlock is held on entry.
ef24f6dd 994 */
662d4423
KM
995static void
996vclean(vp, flags, p)
997 struct vnode *vp;
aacc1bff 998 int flags;
662d4423 999 struct proc *p;
ef24f6dd 1000{
2bae1875 1001 int active;
ef24f6dd 1002
2bae1875
KM
1003 /*
1004 * Check to see if the vnode is in use.
0bf84b18
KM
1005 * If so we have to reference it before we clean it out
1006 * so that its count cannot fall to zero and generate a
1007 * race against ourselves to recycle it.
2bae1875 1008 */
7f7b7d89 1009 if (active = vp->v_usecount)
662d4423 1010 vp->v_usecount++;
2bae1875
KM
1011 /*
1012 * Prevent the vnode from being recycled or
1013 * brought into use while we clean it out.
1014 */
0bf84b18
KM
1015 if (vp->v_flag & VXLOCK)
1016 panic("vclean: deadlock");
ef24f6dd 1017 vp->v_flag |= VXLOCK;
662d4423
KM
1018 /*
1019 * Even if the count is zero, the VOP_INACTIVE routine may still
1020 * have the object locked while it cleans it out. The VOP_LOCK
1021 * ensures that the VOP_INACTIVE routine is done with its work.
1022 * For active vnodes, it ensures that no other activity can
1023 * occur while the underlying object is being cleaned out.
1024 */
1025 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
0bf84b18 1026 /*
669df1aa 1027 * Clean out any buffers associated with the vnode.
0bf84b18 1028 */
36ef03ec 1029 if (flags & DOCLOSE)
c33e9e8b 1030 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
ef24f6dd 1031 /*
669df1aa 1032 * If purging an active vnode, it must be closed and
662d4423
KM
1033 * deactivated before being reclaimed. Note that the
1034 * VOP_INACTIVE will unlock the vnode.
ef24f6dd 1035 */
2bae1875 1036 if (active) {
669df1aa 1037 if (flags & DOCLOSE)
662d4423
KM
1038 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1039 VOP_INACTIVE(vp, p);
1040 } else {
1041 /*
1042 * Any other processes trying to obtain this lock must first
1043 * wait for VXLOCK to clear, then call the new lock operation.
1044 */
1045 VOP_UNLOCK(vp, 0, p);
ef24f6dd
KM
1046 }
1047 /*
1048 * Reclaim the vnode.
1049 */
662d4423 1050 if (VOP_RECLAIM(vp, p))
ef24f6dd 1051 panic("vclean: cannot reclaim");
2bae1875
KM
1052 if (active)
1053 vrele(vp);
7d9a1fe8 1054 cache_purge(vp);
38c46eee 1055
ef24f6dd 1056 /*
669df1aa 1057 * Done with purge, notify sleepers of the grim news.
ef24f6dd 1058 */
669df1aa
KM
1059 vp->v_op = dead_vnodeop_p;
1060 vp->v_tag = VT_NON;
ef24f6dd
KM
1061 vp->v_flag &= ~VXLOCK;
1062 if (vp->v_flag & VXWANT) {
1063 vp->v_flag &= ~VXWANT;
1064 wakeup((caddr_t)vp);
1065 }
1066}
1067
ef62830d
KM
1068/*
1069 * Eliminate all activity associated with the requested vnode
1070 * and with all vnodes aliased to the requested vnode.
1071 */
7cd62fbc
KM
1072int
1073vop_revoke(ap)
1074 struct vop_revoke_args /* {
1075 struct vnode *a_vp;
1076 int a_flags;
1077 } */ *ap;
ef62830d 1078{
662d4423
KM
1079 struct vnode *vp, *vq;
1080 struct proc *p = curproc; /* XXX */
1081
1082#ifdef DIAGNOSTIC
1083 if ((ap->a_flags & REVOKEALL) == 0)
1084 panic("vop_revoke");
1085#endif
ef62830d 1086
7cd62fbc 1087 vp = ap->a_vp;
662d4423
KM
1088 simple_lock(&vp->v_interlock);
1089
1090 if (vp->v_flag & VALIASED) {
7a7b3a95
KM
1091 /*
1092 * If a vgone (or vclean) is already in progress,
1093 * wait until it is done and return.
1094 */
1095 if (vp->v_flag & VXLOCK) {
1096 vp->v_flag |= VXWANT;
662d4423 1097 simple_unlock(&vp->v_interlock);
7cd62fbc
KM
1098 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1099 return (0);
7a7b3a95
KM
1100 }
1101 /*
1102 * Ensure that vp will not be vgone'd while we
1103 * are eliminating its aliases.
1104 */
1105 vp->v_flag |= VXLOCK;
662d4423 1106 simple_unlock(&vp->v_interlock);
7a7b3a95 1107 while (vp->v_flag & VALIASED) {
662d4423 1108 simple_lock(&spechash_slock);
7a7b3a95
KM
1109 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1110 if (vq->v_rdev != vp->v_rdev ||
1111 vq->v_type != vp->v_type || vp == vq)
1112 continue;
662d4423 1113 simple_unlock(&spechash_slock);
7a7b3a95
KM
1114 vgone(vq);
1115 break;
1116 }
662d4423
KM
1117 if (vq == NULLVP)
1118 simple_unlock(&spechash_slock);
ef62830d 1119 }
7a7b3a95
KM
1120 /*
1121 * Remove the lock so that vgone below will
1122 * really eliminate the vnode after which time
1123 * vgone will awaken any sleepers.
1124 */
662d4423 1125 simple_lock(&vp->v_interlock);
7a7b3a95 1126 vp->v_flag &= ~VXLOCK;
ef62830d 1127 }
662d4423
KM
1128 vgonel(vp, p);
1129 return (0);
1130}
1131
1132/*
1133 * Recycle an unused vnode to the front of the free list.
1134 * Release the passed interlock if the vnode will be recycled.
1135 */
1136int
1137vrecycle(vp, inter_lkp, p)
1138 struct vnode *vp;
1139 struct simplelock *inter_lkp;
1140 struct proc *p;
1141{
1142
1143 simple_lock(&vp->v_interlock);
1144 if (vp->v_usecount == 0) {
1145 if (inter_lkp)
1146 simple_unlock(inter_lkp);
1147 vgonel(vp, p);
1148 return (1);
1149 }
1150 simple_unlock(&vp->v_interlock);
7cd62fbc 1151 return (0);
ef62830d
KM
1152}
1153
ef24f6dd
KM
1154/*
1155 * Eliminate all activity associated with a vnode
1156 * in preparation for reuse.
1157 */
05560902
CD
1158void
1159vgone(vp)
662d4423
KM
1160 struct vnode *vp;
1161{
1162 struct proc *p = curproc; /* XXX */
1163
1164 simple_lock(&vp->v_interlock);
1165 vgonel(vp, p);
1166}
1167
1168/*
1169 * vgone, with the vp interlock held.
1170 */
1171void
1172vgonel(vp, p)
1173 struct vnode *vp;
1174 struct proc *p;
ef24f6dd 1175{
662d4423 1176 struct vnode *vq;
c0de8792 1177 struct vnode *vx;
ef24f6dd 1178
4f55e3ec
KM
1179 /*
1180 * If a vgone (or vclean) is already in progress,
1181 * wait until it is done and return.
1182 */
1183 if (vp->v_flag & VXLOCK) {
1184 vp->v_flag |= VXWANT;
662d4423 1185 simple_unlock(&vp->v_interlock);
05560902 1186 tsleep((caddr_t)vp, PINOD, "vgone", 0);
4f55e3ec
KM
1187 return;
1188 }
ef24f6dd
KM
1189 /*
1190 * Clean out the filesystem specific data.
1191 */
662d4423 1192 vclean(vp, DOCLOSE, p);
ef24f6dd
KM
1193 /*
1194 * Delete from old mount point vnode list, if on one.
1195 */
662d4423
KM
1196 if (vp->v_mount != NULL)
1197 insmntque(vp, (struct mount *)0);
ef24f6dd 1198 /*
5d0d19f1
KM
1199 * If special device, remove it from special device alias list
1200 * if it is on one.
ef24f6dd 1201 */
5d0d19f1 1202 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
662d4423 1203 simple_lock(&spechash_slock);
7f7b7d89
KM
1204 if (*vp->v_hashchain == vp) {
1205 *vp->v_hashchain = vp->v_specnext;
ef24f6dd 1206 } else {
7f7b7d89 1207 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
c0de8792 1208 if (vq->v_specnext != vp)
ef24f6dd 1209 continue;
c0de8792 1210 vq->v_specnext = vp->v_specnext;
ef24f6dd
KM
1211 break;
1212 }
c0de8792 1213 if (vq == NULL)
ef24f6dd
KM
1214 panic("missing bdev");
1215 }
c0de8792 1216 if (vp->v_flag & VALIASED) {
4d1ee2eb 1217 vx = NULL;
7f7b7d89 1218 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
de81e10c
KM
1219 if (vq->v_rdev != vp->v_rdev ||
1220 vq->v_type != vp->v_type)
c0de8792 1221 continue;
4d1ee2eb
CT
1222 if (vx)
1223 break;
c0de8792
KM
1224 vx = vq;
1225 }
4d1ee2eb 1226 if (vx == NULL)
c0de8792 1227 panic("missing alias");
4d1ee2eb 1228 if (vq == NULL)
c0de8792
KM
1229 vx->v_flag &= ~VALIASED;
1230 vp->v_flag &= ~VALIASED;
1231 }
662d4423 1232 simple_unlock(&spechash_slock);
c0de8792
KM
1233 FREE(vp->v_specinfo, M_VNODE);
1234 vp->v_specinfo = NULL;
ef24f6dd
KM
1235 }
1236 /*
3387ef89 1237 * If it is on the freelist and not already at the head,
0bf9bb76
KM
1238 * move it to the head of the list. The test of the back
1239 * pointer and the reference count of zero is because
1240 * it will be removed from the free list by getnewvnode,
1241 * but will not have its reference count incremented until
1242 * after calling vgone. If the reference count were
1243 * incremented first, vgone would (incorrectly) try to
1244 * close the previous instance of the underlying object.
1245 * So, the back pointer is explicitly set to `0xdeadb' in
1246 * getnewvnode after removing it from the freelist to ensure
1247 * that we do not try to move it here.
ef24f6dd 1248 */
662d4423
KM
1249 if (vp->v_usecount == 0) {
1250 simple_lock(&vnode_free_list_slock);
1251 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1252 vnode_free_list.tqh_first != vp) {
1253 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1254 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1255 }
1256 simple_unlock(&vnode_free_list_slock);
ef24f6dd 1257 }
2bae1875 1258 vp->v_type = VBAD;
36d09cb1 1259}
ef62830d 1260
2bcd6066
KM
1261/*
1262 * Lookup a vnode by device number.
1263 */
05560902 1264int
2bcd6066
KM
1265vfinddev(dev, type, vpp)
1266 dev_t dev;
1267 enum vtype type;
1268 struct vnode **vpp;
1269{
662d4423
KM
1270 struct vnode *vp;
1271 int rc = 0;
2bcd6066 1272
662d4423 1273 simple_lock(&spechash_slock);
2bcd6066
KM
1274 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1275 if (dev != vp->v_rdev || type != vp->v_type)
1276 continue;
1277 *vpp = vp;
662d4423
KM
1278 rc = 1;
1279 break;
2bcd6066 1280 }
662d4423
KM
1281 simple_unlock(&spechash_slock);
1282 return (rc);
2bcd6066
KM
1283}
1284
ef62830d
KM
1285/*
1286 * Calculate the total number of references to a special device.
1287 */
05560902 1288int
ef62830d 1289vcount(vp)
662d4423 1290 struct vnode *vp;
ef62830d 1291{
662d4423 1292 struct vnode *vq, *vnext;
ef62830d
KM
1293 int count;
1294
1d2d7c6d 1295loop:
ef62830d 1296 if ((vp->v_flag & VALIASED) == 0)
7f7b7d89 1297 return (vp->v_usecount);
662d4423 1298 simple_lock(&spechash_slock);
1d2d7c6d
KM
1299 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1300 vnext = vq->v_specnext;
de81e10c 1301 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
ef62830d
KM
1302 continue;
1303 /*
1304 * Alias, but not in use, so flush it out.
1305 */
1d2d7c6d 1306 if (vq->v_usecount == 0 && vq != vp) {
662d4423 1307 simple_unlock(&spechash_slock);
ef62830d
KM
1308 vgone(vq);
1309 goto loop;
1310 }
7f7b7d89 1311 count += vq->v_usecount;
ef62830d 1312 }
662d4423 1313 simple_unlock(&spechash_slock);
ef62830d
KM
1314 return (count);
1315}
0bf84b18
KM
1316
1317/*
1318 * Print out a description of a vnode.
1319 */
1320static char *typename[] =
61f846a8 1321 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
0bf84b18 1322
5d263ebe 1323void
0bf84b18
KM
1324vprint(label, vp)
1325 char *label;
1326 register struct vnode *vp;
1327{
f2f730c6 1328 char buf[64];
0bf84b18
KM
1329
1330 if (label != NULL)
1331 printf("%s: ", label);
3e787e54 1332 printf("num %d ", vp->v_spare[0]);
65c3b3a8
KM
1333 printf("type %s, usecount %d, writecount %d, refcount %d,",
1334 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1335 vp->v_holdcnt);
f2f730c6
KM
1336 buf[0] = '\0';
1337 if (vp->v_flag & VROOT)
1338 strcat(buf, "|VROOT");
1339 if (vp->v_flag & VTEXT)
1340 strcat(buf, "|VTEXT");
36ef03ec
KM
1341 if (vp->v_flag & VSYSTEM)
1342 strcat(buf, "|VSYSTEM");
36ef03ec
KM
1343 if (vp->v_flag & VXLOCK)
1344 strcat(buf, "|VXLOCK");
1345 if (vp->v_flag & VXWANT)
1346 strcat(buf, "|VXWANT");
f2f730c6
KM
1347 if (vp->v_flag & VBWAIT)
1348 strcat(buf, "|VBWAIT");
36ef03ec
KM
1349 if (vp->v_flag & VALIASED)
1350 strcat(buf, "|VALIASED");
f2f730c6
KM
1351 if (buf[0] != '\0')
1352 printf(" flags (%s)", &buf[1]);
3fc2ac18
KM
1353 if (vp->v_data == NULL) {
1354 printf("\n");
1355 } else {
1356 printf("\n\t");
1357 VOP_PRINT(vp);
1358 }
0bf84b18 1359}
985cbdd5 1360
34c62e18
KM
1361#ifdef DEBUG
1362/*
1363 * List all of the locked vnodes in the system.
1364 * Called when debugging the kernel.
1365 */
05560902 1366void
34c62e18
KM
1367printlockedvnodes()
1368{
1369 register struct mount *mp;
1370 register struct vnode *vp;
1371
1372 printf("Locked vnodes\n");
29330550
KM
1373 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1374 mp = mp->mnt_list.cqe_next) {
3fc2ac18
KM
1375 for (vp = mp->mnt_vnodelist.lh_first;
1376 vp != NULL;
29330550 1377 vp = vp->v_mntvnodes.le_next) {
34c62e18
KM
1378 if (VOP_ISLOCKED(vp))
1379 vprint((char *)0, vp);
29330550 1380 }
3fc2ac18 1381 }
34c62e18
KM
1382}
1383#endif
1384
597259be
KM
1385/*
1386 * Top level filesystem related information gathering.
1387 */
1388int
1389vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1390 int *name;
1391 u_int namelen;
1392 void *oldp;
1393 size_t *oldlenp;
1394 void *newp;
1395 size_t newlen;
1396 struct proc *p;
1397{
1398 struct ctldebug *cdp;
1399 struct vfsconf *vfsp;
1400
1401 /* all sysctl names at this level are at least name and field */
1402 if (namelen < 2)
1403 return (ENOTDIR); /* overloaded */
1404 if (name[0] != VFS_GENERIC) {
1405 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1406 if (vfsp->vfc_typenum == name[0])
1407 break;
1408 if (vfsp == NULL)
1409 return (EOPNOTSUPP);
1410 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1411 oldp, oldlenp, newp, newlen, p));
1412 }
1413 switch (name[1]) {
1414 case VFS_MAXTYPENUM:
1415 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1416 case VFS_CONF:
1417 if (namelen < 3)
1418 return (ENOTDIR); /* overloaded */
1419 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1420 if (vfsp->vfc_typenum == name[2])
1421 break;
1422 if (vfsp == NULL)
1423 return (EOPNOTSUPP);
1424 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1425 sizeof(struct vfsconf)));
1426 }
1427 return (EOPNOTSUPP);
1428}
1429
985cbdd5
MT
1430int kinfo_vdebug = 1;
1431int kinfo_vgetfailed;
1432#define KINFO_VNODESLOP 10
1433/*
786fb484 1434 * Dump vnode list (via sysctl).
985cbdd5
MT
1435 * Copyout address of vnode followed by vnode.
1436 */
aacc1bff 1437/* ARGSUSED */
05560902 1438int
786fb484 1439sysctl_vnode(where, sizep)
985cbdd5 1440 char *where;
c1909da4 1441 size_t *sizep;
985cbdd5 1442{
3fc2ac18 1443 register struct mount *mp, *nmp;
662d4423 1444 struct vnode *nvp, *vp;
985cbdd5 1445 register char *bp = where, *savebp;
5bf57294 1446 char *ewhere;
985cbdd5
MT
1447 int error;
1448
1449#define VPTRSZ sizeof (struct vnode *)
1450#define VNODESZ sizeof (struct vnode)
1451 if (where == NULL) {
786fb484 1452 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
985cbdd5
MT
1453 return (0);
1454 }
786fb484 1455 ewhere = where + *sizep;
985cbdd5 1456
29330550
KM
1457 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1458 nmp = mp->mnt_list.cqe_next;
3fc2ac18 1459 if (vfs_busy(mp))
36ef03ec 1460 continue;
985cbdd5
MT
1461 savebp = bp;
1462again:
662d4423 1463 simple_lock(&mntvnode_slock);
3fc2ac18
KM
1464 for (vp = mp->mnt_vnodelist.lh_first;
1465 vp != NULL;
662d4423 1466 vp = nvp) {
41185b3b
KM
1467 /*
1468 * Check that the vp is still associated with
1469 * this filesystem. RACE: could have been
1470 * recycled onto the same filesystem.
1471 */
4597dd33 1472 if (vp->v_mount != mp) {
662d4423 1473 simple_unlock(&mntvnode_slock);
4597dd33
KM
1474 if (kinfo_vdebug)
1475 printf("kinfo: vp changed\n");
1476 bp = savebp;
1477 goto again;
1478 }
662d4423 1479 nvp = vp->v_mntvnodes.le_next;
786fb484 1480 if (bp + VPTRSZ + VNODESZ > ewhere) {
662d4423 1481 simple_unlock(&mntvnode_slock);
786fb484
KM
1482 *sizep = bp - where;
1483 return (ENOMEM);
1484 }
662d4423 1485 simple_unlock(&mntvnode_slock);
786fb484
KM
1486 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1487 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
985cbdd5 1488 return (error);
985cbdd5 1489 bp += VPTRSZ + VNODESZ;
662d4423 1490 simple_lock(&mntvnode_slock);
985cbdd5 1491 }
662d4423 1492 simple_unlock(&mntvnode_slock);
3fc2ac18
KM
1493 vfs_unbusy(mp);
1494 }
985cbdd5 1495
786fb484 1496 *sizep = bp - where;
985cbdd5
MT
1497 return (0);
1498}
8981e258
MH
1499
1500/*
1501 * Check to see if a filesystem is mounted on a block device.
1502 */
1503int
1504vfs_mountedon(vp)
662d4423 1505 struct vnode *vp;
8981e258 1506{
662d4423
KM
1507 struct vnode *vq;
1508 int error = 0;
8981e258
MH
1509
1510 if (vp->v_specflags & SI_MOUNTEDON)
1511 return (EBUSY);
1512 if (vp->v_flag & VALIASED) {
662d4423 1513 simple_lock(&spechash_slock);
8981e258
MH
1514 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1515 if (vq->v_rdev != vp->v_rdev ||
1516 vq->v_type != vp->v_type)
1517 continue;
662d4423
KM
1518 if (vq->v_specflags & SI_MOUNTEDON) {
1519 error = EBUSY;
1520 break;
1521 }
8981e258 1522 }
662d4423 1523 simple_unlock(&spechash_slock);
8981e258 1524 }
662d4423 1525 return (error);
8981e258
MH
1526}
1527
29330550
KM
1528/*
1529 * Unmount all filesystems. The list is traversed in reverse order
1530 * of mounting to avoid dependencies.
1531 */
1532void
1533vfs_unmountall()
1534{
1535 struct mount *mp, *nmp;
1536
1537 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1538 nmp = mp->mnt_list.cqe_prev;
1539 (void) dounmount(mp, MNT_FORCE, &proc0);
1540 }
1541}
1542
8981e258
MH
1543/*
1544 * Build hash lists of net addresses and hang them off the mount point.
1545 * Called by ufs_mount() to set up the lists of export addresses.
1546 */
1547static int
1548vfs_hang_addrlist(mp, nep, argp)
1549 struct mount *mp;
1550 struct netexport *nep;
1551 struct export_args *argp;
1552{
1553 register struct netcred *np;
1554 register struct radix_node_head *rnh;
1555 register int i;
1556 struct radix_node *rn;
1557 struct sockaddr *saddr, *smask = 0;
1558 struct domain *dom;
1559 int error;
1560
1561 if (argp->ex_addrlen == 0) {
1562 if (mp->mnt_flag & MNT_DEFEXPORTED)
1563 return (EPERM);
1564 np = &nep->ne_defexported;
1565 np->netc_exflags = argp->ex_flags;
1566 np->netc_anon = argp->ex_anon;
1567 np->netc_anon.cr_ref = 1;
1568 mp->mnt_flag |= MNT_DEFEXPORTED;
1569 return (0);
1570 }
1571 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1572 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1573 bzero((caddr_t)np, i);
1574 saddr = (struct sockaddr *)(np + 1);
1575 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1576 goto out;
1577 if (saddr->sa_len > argp->ex_addrlen)
1578 saddr->sa_len = argp->ex_addrlen;
1579 if (argp->ex_masklen) {
1580 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1581 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1582 if (error)
1583 goto out;
1584 if (smask->sa_len > argp->ex_masklen)
1585 smask->sa_len = argp->ex_masklen;
1586 }
1587 i = saddr->sa_family;
1588 if ((rnh = nep->ne_rtable[i]) == 0) {
1589 /*
1590 * Seems silly to initialize every AF when most are not
1591 * used, do so on demand here
1592 */
1593 for (dom = domains; dom; dom = dom->dom_next)
1594 if (dom->dom_family == i && dom->dom_rtattach) {
1595 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1596 dom->dom_rtoffset);
1597 break;
1598 }
1599 if ((rnh = nep->ne_rtable[i]) == 0) {
1600 error = ENOBUFS;
1601 goto out;
1602 }
1603 }
1604 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1605 np->netc_rnodes);
b398c713
KM
1606 if (rn == 0) {
1607 /*
1608 * One of the reasons that rnh_addaddr may fail is that
1609 * the entry already exists. To check for this case, we
1610 * look up the entry to see if it is there. If so, we
1611 * do not need to make a new entry but do return success.
1612 */
1613 free(np, M_NETADDR);
1614 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
1615 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
1616 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
1617 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
1618 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
1619 return (0);
1620 return (EPERM);
8981e258
MH
1621 }
1622 np->netc_exflags = argp->ex_flags;
1623 np->netc_anon = argp->ex_anon;
1624 np->netc_anon.cr_ref = 1;
1625 return (0);
1626out:
1627 free(np, M_NETADDR);
1628 return (error);
1629}
1630
1631/* ARGSUSED */
1632static int
1633vfs_free_netcred(rn, w)
1634 struct radix_node *rn;
1635 caddr_t w;
1636{
1637 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1638
1639 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1640 free((caddr_t)rn, M_NETADDR);
1641 return (0);
1642}
05560902 1643
8981e258
MH
1644/*
1645 * Free the net address hash lists that are hanging off the mount points.
1646 */
1647static void
1648vfs_free_addrlist(nep)
1649 struct netexport *nep;
1650{
1651 register int i;
1652 register struct radix_node_head *rnh;
1653
1654 for (i = 0; i <= AF_MAX; i++)
1655 if (rnh = nep->ne_rtable[i]) {
1656 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1657 (caddr_t)rnh);
1658 free((caddr_t)rnh, M_RTABLE);
1659 nep->ne_rtable[i] = 0;
1660 }
1661}
1662
1663int
1664vfs_export(mp, nep, argp)
1665 struct mount *mp;
1666 struct netexport *nep;
1667 struct export_args *argp;
1668{
1669 int error;
1670
1671 if (argp->ex_flags & MNT_DELEXPORT) {
1672 vfs_free_addrlist(nep);
1673 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1674 }
1675 if (argp->ex_flags & MNT_EXPORTED) {
1676 if (error = vfs_hang_addrlist(mp, nep, argp))
1677 return (error);
1678 mp->mnt_flag |= MNT_EXPORTED;
1679 }
1680 return (0);
1681}
1682
1683struct netcred *
1684vfs_export_lookup(mp, nep, nam)
1685 register struct mount *mp;
1686 struct netexport *nep;
1687 struct mbuf *nam;
1688{
1689 register struct netcred *np;
1690 register struct radix_node_head *rnh;
1691 struct sockaddr *saddr;
1692
1693 np = NULL;
1694 if (mp->mnt_flag & MNT_EXPORTED) {
1695 /*
1696 * Lookup in the export list first.
1697 */
1698 if (nam != NULL) {
1699 saddr = mtod(nam, struct sockaddr *);
1700 rnh = nep->ne_rtable[saddr->sa_family];
1701 if (rnh != NULL) {
1702 np = (struct netcred *)
1703 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1704 rnh);
1705 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1706 np = NULL;
1707 }
1708 }
1709 /*
1710 * If no address match, use the default if it exists.
1711 */
1712 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1713 np = &nep->ne_defexported;
1714 }
1715 return (np);
1716}