* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
* External virtual filesystem routines
#include <miscfs/specfs/specdev.h>
enum vtype iftovt_tab
[16] = {
VNON
, VFIFO
, VCHR
, VNON
, VDIR
, VNON
, VBLK
, VNON
,
VREG
, VNON
, VLNK
, VNON
, VSOCK
, VNON
, VNON
, VBAD
,
0, S_IFREG
, S_IFDIR
, S_IFBLK
, S_IFCHR
, S_IFLNK
,
S_IFSOCK
, S_IFIFO
, S_IFMT
,
* Insq/Remq for the vnode usage lists.
#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
LIST_REMOVE(bp, b_vnbufs); \
(bp)->b_vnbufs.le_next = NOLIST; \
TAILQ_HEAD(freelst
, vnode
) vnode_free_list
; /* vnode free list */
struct mntlist mountlist
; /* mounted filesystem list */
struct simplelock mountlist_slock
;
static struct simplelock mntid_slock
;
struct simplelock mntvnode_slock
;
struct simplelock vnode_free_list_slock
;
static struct simplelock spechash_slock
;
* Initialize the vnode management data structures.
simple_lock_init(&mntvnode_slock
);
simple_lock_init(&mntid_slock
);
simple_lock_init(&spechash_slock
);
TAILQ_INIT(&vnode_free_list
);
simple_lock_init(&vnode_free_list_slock
);
CIRCLEQ_INIT(&mountlist
);
* Mark a mount point as busy. Used to synchronize access and to delay
* unmounting. Interlock is not released on failure.
vfs_busy(mp
, flags
, interlkp
, p
)
struct simplelock
*interlkp
;
if (mp
->mnt_flag
& MNT_UNMOUNT
) {
mp
->mnt_flag
|= MNT_MWAIT
;
* Since all busy locks are shared except the exclusive
* lock granted when unmounting, the only place that a
* wakeup needs to be done is at the release of the
* exclusive lock at the end of dounmount.
sleep((caddr_t
)mp
, PVFS
);
if (lockmgr(&mp
->mnt_lock
, lkflags
, interlkp
, p
))
panic("vfs_busy: unexpected lock failure");
* Free a busy filesystem.
lockmgr(&mp
->mnt_lock
, LK_RELEASE
, NULL
, p
);
* Lookup a filesystem type, and if found allocate and initialize
* a mount structure for it.
* Devname is usually updated by mount(8) after booting.
vfs_rootmountalloc(fstypename
, devname
, mpp
)
struct proc
*p
= curproc
; /* XXX */
for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
if (!strcmp(vfsp
->vfc_name
, fstypename
))
mp
= malloc((u_long
)sizeof(struct mount
), M_MOUNT
, M_WAITOK
);
bzero((char *)mp
, (u_long
)sizeof(struct mount
));
lockinit(&mp
->mnt_lock
, PVFS
, "vfslock", 0, 0);
(void)vfs_busy(mp
, LK_NOWAIT
, 0, p
);
LIST_INIT(&mp
->mnt_vnodelist
);
mp
->mnt_op
= vfsp
->vfc_vfsops
;
mp
->mnt_flag
= MNT_RDONLY
;
mp
->mnt_vnodecovered
= NULLVP
;
mp
->mnt_stat
.f_type
= vfsp
->vfc_typenum
;
mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
strncpy(mp
->mnt_stat
.f_fstypename
, vfsp
->vfc_name
, MFSNAMELEN
);
mp
->mnt_stat
.f_mntonname
[0] = '/';
(void) copystr(devname
, mp
->mnt_stat
.f_mntfromname
, MNAMELEN
- 1, 0);
* Find an appropriate filesystem to use for the root. If a filesystem
* has not been preselected, walk through the list of known filesystems
* trying those that have mountroot routines, and try them until one
* works or we have tried them all.
extern int (*mountroot
)(void);
for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
if (vfsp
->vfc_mountroot
== NULL
)
if ((error
= (*vfsp
->vfc_mountroot
)()) == 0)
printf("%s_mountroot failed: %d\n", vfsp
->vfc_name
, error
);
* Lookup a mount point by filesystem identifier.
register struct mount
*mp
;
simple_lock(&mountlist_slock
);
for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
;
mp
= mp
->mnt_list
.cqe_next
) {
if (mp
->mnt_stat
.f_fsid
.val
[0] == fsid
->val
[0] &&
mp
->mnt_stat
.f_fsid
.val
[1] == fsid
->val
[1]) {
simple_unlock(&mountlist_slock
);
simple_unlock(&mountlist_slock
);
return ((struct mount
*)0);
static u_short xxxfs_mntid
;
simple_lock(&mntid_slock
);
mtype
= mp
->mnt_vfc
->vfc_typenum
;
mp
->mnt_stat
.f_fsid
.val
[0] = makedev(nblkdev
+ mtype
, 0);
mp
->mnt_stat
.f_fsid
.val
[1] = mtype
;
tfsid
.val
[0] = makedev(nblkdev
+ mtype
, xxxfs_mntid
);
if (mountlist
.cqh_first
!= (void *)&mountlist
) {
while (vfs_getvfs(&tfsid
)) {
mp
->mnt_stat
.f_fsid
.val
[0] = tfsid
.val
[0];
simple_unlock(&mntid_slock
);
* Set vnode attributes to VNOVAL
register struct vattr
*vap
;
vap
->va_size
= vap
->va_bytes
= VNOVAL
;
vap
->va_mode
= vap
->va_nlink
= vap
->va_uid
= vap
->va_gid
=
vap
->va_fsid
= vap
->va_fileid
=
vap
->va_blocksize
= vap
->va_rdev
=
vap
->va_atime
.ts_sec
= vap
->va_atime
.ts_nsec
=
vap
->va_mtime
.ts_sec
= vap
->va_mtime
.ts_nsec
=
vap
->va_ctime
.ts_sec
= vap
->va_ctime
.ts_nsec
=
vap
->va_flags
= vap
->va_gen
= VNOVAL
;
* Routines having to do with the management of the vnode table.
extern int (**dead_vnodeop_p
)();
static void vclean
__P((struct vnode
*vp
, int flag
, struct proc
*p
));
extern void vgonel
__P((struct vnode
*vp
, struct proc
*p
));
extern struct vattr va_null
;
* Return the next vnode from the free list.
getnewvnode(tag
, mp
, vops
, vpp
)
struct proc
*p
= curproc
; /* XXX */
simple_lock(&vnode_free_list_slock
);
if ((vnode_free_list
.tqh_first
== NULL
&&
numvnodes
< 2 * desiredvnodes
) ||
numvnodes
< desiredvnodes
) {
simple_unlock(&vnode_free_list_slock
);
vp
= (struct vnode
*)malloc((u_long
)sizeof *vp
,
bzero((char *)vp
, sizeof *vp
);
for (vp
= vnode_free_list
.tqh_first
;
vp
!= NULLVP
; vp
= vp
->v_freelist
.tqe_next
) {
if (simple_lock_try(&vp
->v_interlock
))
* Unless this is a bad time of the month, at most
* the first NCPUS items on the free list are
* locked, so this is close enough to being empty.
simple_unlock(&vnode_free_list_slock
);
panic("free vnode isn't");
TAILQ_REMOVE(&vnode_free_list
, vp
, v_freelist
);
/* see comment on why 0xdeadb is set at end of vgone (below) */
vp
->v_freelist
.tqe_prev
= (struct vnode
**)0xdeadb;
simple_unlock(&vnode_free_list_slock
);
simple_unlock(&vp
->v_interlock
);
panic("cleaned vnode isn't");
panic("Clean vnode has pending I/O's");
* Move a vnode from one mount queue to another.
simple_lock(&mntvnode_slock
);
* Delete from old mount point vnode list, if on one.
LIST_REMOVE(vp
, v_mntvnodes
);
* Insert into list of vnodes for the new mount point, if available.
if ((vp
->v_mount
= mp
) != NULL
)
LIST_INSERT_HEAD(&mp
->mnt_vnodelist
, vp
, v_mntvnodes
);
simple_unlock(&mntvnode_slock
);
* Update outstanding I/O count and do wakeup if requested.
register struct vnode
*vp
;
bp
->b_flags
&= ~B_WRITEINPROG
;
if (--vp
->v_numoutput
< 0)
panic("vwakeup: neg numoutput");
if ((vp
->v_flag
& VBWAIT
) && vp
->v_numoutput
<= 0) {
panic("vwakeup: neg numoutput 2");
wakeup((caddr_t
)&vp
->v_numoutput
);
* Flush out and invalidate all buffers associated with a vnode.
* Called with the underlying object locked.
vinvalbuf(vp
, flags
, cred
, p
, slpflag
, slptimeo
)
register struct vnode
*vp
;
if (error
= VOP_FSYNC(vp
, cred
, MNT_WAIT
, p
))
if (vp
->v_dirtyblkhd
.lh_first
!= NULL
)
panic("vinvalbuf: dirty bufs");
if ((blist
= vp
->v_cleanblkhd
.lh_first
) && flags
& V_SAVEMETA
)
while (blist
&& blist
->b_lblkno
< 0)
blist
= blist
->b_vnbufs
.le_next
;
if (!blist
&& (blist
= vp
->v_dirtyblkhd
.lh_first
) &&
while (blist
&& blist
->b_lblkno
< 0)
blist
= blist
->b_vnbufs
.le_next
;
for (bp
= blist
; bp
; bp
= nbp
) {
nbp
= bp
->b_vnbufs
.le_next
;
if (flags
& V_SAVEMETA
&& bp
->b_lblkno
< 0)
if (bp
->b_flags
& B_BUSY
) {
error
= tsleep((caddr_t
)bp
,
slpflag
| (PRIBIO
+ 1), "vinvalbuf",
* XXX Since there are no node locks for NFS, I believe
* there is a slight chance that a delayed write will
* occur while sleeping just above, so check for it.
if ((bp
->b_flags
& B_DELWRI
) && (flags
& V_SAVE
)) {
if (!(flags
& V_SAVEMETA
) &&
(vp
->v_dirtyblkhd
.lh_first
|| vp
->v_cleanblkhd
.lh_first
))
panic("vinvalbuf: flush failed");
* Associate a buffer with a vnode.
register struct vnode
*vp
;
panic("bgetvp: not free");
if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
)
* Insert onto list for new vnode.
bufinsvn(bp
, &vp
->v_cleanblkhd
);
* Disassociate a buffer from a vnode.
if (bp
->b_vp
== (struct vnode
*) 0)
* Delete from old vnode list, if on one.
if (bp
->b_vnbufs
.le_next
!= NOLIST
)
bp
->b_vp
= (struct vnode
*) 0;
* Reassign a buffer from one vnode to another.
* Used to assign file specific control information
* (indirect blocks) to the vnode to which they belong.
register struct vnode
*newvp
;
register struct buflists
*listheadp
;
printf("reassignbuf: NULL");
* Delete from old vnode list, if on one.
if (bp
->b_vnbufs
.le_next
!= NOLIST
)
* If dirty, put on list of dirty buffers;
* otherwise insert onto list of clean buffers.
if (bp
->b_flags
& B_DELWRI
)
listheadp
= &newvp
->v_dirtyblkhd
;
listheadp
= &newvp
->v_cleanblkhd
;
* Create a vnode for a block device.
* Used for root filesystem, argdev, and swap areas.
* Also used for memory file system special devices.
register struct vnode
*vp
;
error
= getnewvnode(VT_NON
, (struct mount
*)0, spec_vnodeop_p
, &nvp
);
if (nvp
= checkalias(vp
, dev
, (struct mount
*)0)) {
* Check to see if the new vnode represents a special device
* for which we already have a vnode (either because of
* bdevvp() or because of a different vnode representing
* the same block device). If such an alias exists, deallocate
* the existing contents and return the aliased vnode. The
* caller is responsible for filling it with its new contents.
checkalias(nvp
, nvp_rdev
, mp
)
register struct vnode
*nvp
;
struct proc
*p
= curproc
; /* XXX */
if (nvp
->v_type
!= VBLK
&& nvp
->v_type
!= VCHR
)
vpp
= &speclisth
[SPECHASH(nvp_rdev
)];
simple_lock(&spechash_slock
);
for (vp
= *vpp
; vp
; vp
= vp
->v_specnext
) {
if (nvp_rdev
!= vp
->v_rdev
|| nvp
->v_type
!= vp
->v_type
)
* Alias, but not in use, so flush it out.
simple_lock(&vp
->v_interlock
);
if (vp
->v_usecount
== 0) {
simple_unlock(&spechash_slock
);
if (vget(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
, p
)) {
simple_unlock(&spechash_slock
);
if (vp
== NULL
|| vp
->v_tag
!= VT_NON
) {
MALLOC(nvp
->v_specinfo
, struct specinfo
*,
sizeof(struct specinfo
), M_VNODE
, M_WAITOK
);
simple_unlock(&spechash_slock
);
simple_unlock(&spechash_slock
);
simple_lock(&vp
->v_interlock
);
* Grab a particular vnode from the free list, increment its
* reference count and lock it. The vnode lock bit is set the
* vnode is being eliminated in vgone. The process is awakened
* when the transition is completed, and an error returned to
* indicate that the vnode is no longer usable (possibly having
* been changed to a new file system type).
* If the vnode is in the process of being cleaned out for
* another use, we wait for the cleaning to finish and then
* return failure. Cleaning is determined by checking that
* the VXLOCK flag is set.
if ((flags
& LK_INTERLOCK
) == 0)
simple_lock(&vp
->v_interlock
);
if (vp
->v_flag
& VXLOCK
) {
simple_unlock(&vp
->v_interlock
);
tsleep((caddr_t
)vp
, PINOD
, "vget", 0);
if (vp
->v_usecount
== 0) {
simple_lock(&vnode_free_list_slock
);
TAILQ_REMOVE(&vnode_free_list
, vp
, v_freelist
);
simple_unlock(&vnode_free_list_slock
);
if (flags
& LK_TYPE_MASK
) {
if (error
= vn_lock(vp
, flags
| LK_INTERLOCK
, p
))
simple_unlock(&vp
->v_interlock
);
* Stubs to use when there is no locking to be done on the underlying object.
* A minimal shared lock is necessary to ensure that the underlying object
* is not revoked while an operation is in progress. So, an active shared
* count is maintained in an auxillary vnode lock structure.
struct vop_lock_args
/* {
* This code cannot be used until all the non-locking filesystems
* (notably NFS) are converted to properly lock and release nodes.
* Also, certain vnode operations change the locking state within
* the operation (create, mknod, remove, link, rename, mkdir, rmdir,
* and symlink). Ideally these operations should not change the
* lock state, but should be changed to let the caller of the
* function unlock them. Otherwise all intermediate vnode layers
* (such as union, umapfs, etc) must catch these functions to do
* the necessary locking at their layer. Note that the inactive
* and lookup operations also change their lock state, but this
* cannot be avoided, so these two operations will always need
* to be handled in intermediate layers.
struct vnode
*vp
= ap
->a_vp
;
int vnflags
, flags
= ap
->a_flags
;
if (vp
->v_vnlock
== NULL
) {
if ((flags
& LK_TYPE_MASK
) == LK_DRAIN
)
MALLOC(vp
->v_vnlock
, struct lock
*, sizeof(struct lock
),
lockinit(vp
->v_vnlock
, PVFS
, "vnlock", 0, 0);
switch (flags
& LK_TYPE_MASK
) {
panic("vop_nolock: bad operation %d", flags
& LK_TYPE_MASK
);
if (flags
& LK_INTERLOCK
)
return(lockmgr(vp
->v_vnlock
, vnflags
, &vp
->v_interlock
, ap
->a_p
));
* Since we are not using the lock manager, we must clear
if (ap
->a_flags
& LK_INTERLOCK
)
simple_unlock(&ap
->a_vp
->v_interlock
);
* Decrement the active use count.
struct vop_unlock_args
/* {
struct vnode
*vp
= ap
->a_vp
;
if (vp
->v_vnlock
== NULL
)
return (lockmgr(vp
->v_vnlock
, LK_RELEASE
, NULL
, ap
->a_p
));
* Return whether or not the node is in use.
struct vop_islocked_args
/* {
struct vnode
*vp
= ap
->a_vp
;
if (vp
->v_vnlock
== NULL
)
return (lockstatus(vp
->v_vnlock
));
simple_lock(&vp
->v_interlock
);
panic("vref used where vget required");
simple_unlock(&vp
->v_interlock
);
* vput(), just unlock and vrele()
struct proc
*p
= curproc
; /* XXX */
simple_lock(&vp
->v_interlock
);
if (vp
->v_usecount
> 0) {
simple_unlock(&vp
->v_interlock
);
if (vp
->v_usecount
< 0 || vp
->v_writecount
!= 0) {
vprint("vput: bad ref count", vp
);
* insert at tail of LRU list
simple_lock(&vnode_free_list_slock
);
TAILQ_INSERT_TAIL(&vnode_free_list
, vp
, v_freelist
);
simple_unlock(&vnode_free_list_slock
);
simple_unlock(&vp
->v_interlock
);
* If count drops to zero, call inactive routine and return to freelist.
struct proc
*p
= curproc
; /* XXX */
simple_lock(&vp
->v_interlock
);
if (vp
->v_usecount
> 0) {
simple_unlock(&vp
->v_interlock
);
if (vp
->v_usecount
< 0 || vp
->v_writecount
!= 0) {
vprint("vrele: bad ref count", vp
);
* insert at tail of LRU list
simple_lock(&vnode_free_list_slock
);
TAILQ_INSERT_TAIL(&vnode_free_list
, vp
, v_freelist
);
simple_unlock(&vnode_free_list_slock
);
if (vn_lock(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
, p
) == 0)
* Page or buffer structure gets a reference.
register struct vnode
*vp
;
simple_lock(&vp
->v_interlock
);
simple_unlock(&vp
->v_interlock
);
* Page or buffer structure frees a reference.
register struct vnode
*vp
;
simple_lock(&vp
->v_interlock
);
panic("holdrele: holdcnt");
simple_unlock(&vp
->v_interlock
);
* Remove any vnodes in the vnode table belonging to mount point mp.
* If MNT_NOFORCE is specified, there should not be any active ones,
* return error if any are found (nb: this is a user error, not a
* system error). If MNT_FORCE is specified, detach any active vnodes
int busyprt
= 0; /* print out busy vnodes */
struct ctldebug debug1
= { "busyprt", &busyprt
};
vflush(mp
, skipvp
, flags
)
struct proc
*p
= curproc
; /* XXX */
simple_lock(&mntvnode_slock
);
for (vp
= mp
->mnt_vnodelist
.lh_first
; vp
; vp
= nvp
) {
nvp
= vp
->v_mntvnodes
.le_next
;
* Skip over a selected vnode.
simple_lock(&vp
->v_interlock
);
* Skip over a vnodes marked VSYSTEM.
if ((flags
& SKIPSYSTEM
) && (vp
->v_flag
& VSYSTEM
)) {
simple_unlock(&vp
->v_interlock
);
* If WRITECLOSE is set, only flush out regular file
* vnodes open for writing.
if ((flags
& WRITECLOSE
) &&
(vp
->v_writecount
== 0 || vp
->v_type
!= VREG
)) {
simple_unlock(&vp
->v_interlock
);
* With v_usecount == 0, all we need to do is clear
* out the vnode data structures and we are done.
if (vp
->v_usecount
== 0) {
simple_unlock(&mntvnode_slock
);
simple_lock(&mntvnode_slock
);
* If FORCECLOSE is set, forcibly close the vnode.
* For block or character devices, revert to an
* anonymous device. For all other files, just kill them.
if (flags
& FORCECLOSE
) {
simple_unlock(&mntvnode_slock
);
if (vp
->v_type
!= VBLK
&& vp
->v_type
!= VCHR
) {
vp
->v_op
= spec_vnodeop_p
;
insmntque(vp
, (struct mount
*)0);
simple_lock(&mntvnode_slock
);
vprint("vflush: busy vnode", vp
);
simple_unlock(&vp
->v_interlock
);
simple_unlock(&mntvnode_slock
);
* Disassociate the underlying file system from a vnode.
* The vnode interlock is held on entry.
* Check to see if the vnode is in use.
* If so we have to reference it before we clean it out
* so that its count cannot fall to zero and generate a
* race against ourselves to recycle it.
if (active
= vp
->v_usecount
)
* Prevent the vnode from being recycled or
* brought into use while we clean it out.
panic("vclean: deadlock");
* Even if the count is zero, the VOP_INACTIVE routine may still
* have the object locked while it cleans it out. The VOP_LOCK
* ensures that the VOP_INACTIVE routine is done with its work.
* For active vnodes, it ensures that no other activity can
* occur while the underlying object is being cleaned out.
VOP_LOCK(vp
, LK_DRAIN
| LK_INTERLOCK
, p
);
* Clean out any buffers associated with the vnode.
vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0, 0);
* If purging an active vnode, it must be closed and
* deactivated before being reclaimed. Note that the
* VOP_INACTIVE will unlock the vnode.
VOP_CLOSE(vp
, IO_NDELAY
, NOCRED
, p
);
* Any other processes trying to obtain this lock must first
* wait for VXLOCK to clear, then call the new lock operation.
panic("vclean: cannot reclaim");
if ((vp
->v_vnlock
->lk_flags
& LK_DRAINED
) == 0)
vprint("vclean: lock not drained", vp
);
FREE(vp
->v_vnlock
, M_VNODE
);
* Done with purge, notify sleepers of the grim news.
vp
->v_op
= dead_vnodeop_p
;
if (vp
->v_flag
& VXWANT
) {
* Eliminate all activity associated with the requested vnode
* and with all vnodes aliased to the requested vnode.
struct vop_revoke_args
/* {
struct proc
*p
= curproc
; /* XXX */
if ((ap
->a_flags
& REVOKEALL
) == 0)
simple_lock(&vp
->v_interlock
);
if (vp
->v_flag
& VALIASED
) {
* If a vgone (or vclean) is already in progress,
* wait until it is done and return.
if (vp
->v_flag
& VXLOCK
) {
simple_unlock(&vp
->v_interlock
);
tsleep((caddr_t
)vp
, PINOD
, "vop_revokeall", 0);
* Ensure that vp will not be vgone'd while we
* are eliminating its aliases.
simple_unlock(&vp
->v_interlock
);
while (vp
->v_flag
& VALIASED
) {
simple_lock(&spechash_slock
);
for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_rdev
!= vp
->v_rdev
||
vq
->v_type
!= vp
->v_type
|| vp
== vq
)
simple_unlock(&spechash_slock
);
simple_unlock(&spechash_slock
);
* Remove the lock so that vgone below will
* really eliminate the vnode after which time
* vgone will awaken any sleepers.
simple_lock(&vp
->v_interlock
);
* Recycle an unused vnode to the front of the free list.
* Release the passed interlock if the vnode will be recycled.
vrecycle(vp
, inter_lkp
, p
)
struct simplelock
*inter_lkp
;
simple_lock(&vp
->v_interlock
);
if (vp
->v_usecount
== 0) {
simple_unlock(inter_lkp
);
simple_unlock(&vp
->v_interlock
);
* Eliminate all activity associated with a vnode
* in preparation for reuse.
struct proc
*p
= curproc
; /* XXX */
simple_lock(&vp
->v_interlock
);
* vgone, with the vp interlock held.
* If a vgone (or vclean) is already in progress,
* wait until it is done and return.
if (vp
->v_flag
& VXLOCK
) {
simple_unlock(&vp
->v_interlock
);
tsleep((caddr_t
)vp
, PINOD
, "vgone", 0);
* Clean out the filesystem specific data.
* Delete from old mount point vnode list, if on one.
insmntque(vp
, (struct mount
*)0);
* If special device, remove it from special device alias list
if ((vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
) && vp
->v_specinfo
!= 0) {
simple_lock(&spechash_slock
);
if (*vp
->v_hashchain
== vp
) {
*vp
->v_hashchain
= vp
->v_specnext
;
for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_specnext
!= vp
)
vq
->v_specnext
= vp
->v_specnext
;
if (vp
->v_flag
& VALIASED
) {
for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_rdev
!= vp
->v_rdev
||
vq
->v_type
!= vp
->v_type
)
simple_unlock(&spechash_slock
);
FREE(vp
->v_specinfo
, M_VNODE
);
* If it is on the freelist and not already at the head,
* move it to the head of the list. The test of the back
* pointer and the reference count of zero is because
* it will be removed from the free list by getnewvnode,
* but will not have its reference count incremented until
* after calling vgone. If the reference count were
* incremented first, vgone would (incorrectly) try to
* close the previous instance of the underlying object.
* So, the back pointer is explicitly set to `0xdeadb' in
* getnewvnode after removing it from the freelist to ensure
* that we do not try to move it here.
if (vp
->v_usecount
== 0) {
simple_lock(&vnode_free_list_slock
);
if ((vp
->v_freelist
.tqe_prev
!= (struct vnode
**)0xdeadb) &&
vnode_free_list
.tqh_first
!= vp
) {
TAILQ_REMOVE(&vnode_free_list
, vp
, v_freelist
);
TAILQ_INSERT_HEAD(&vnode_free_list
, vp
, v_freelist
);
simple_unlock(&vnode_free_list_slock
);
* Lookup a vnode by device number.
simple_lock(&spechash_slock
);
for (vp
= speclisth
[SPECHASH(dev
)]; vp
; vp
= vp
->v_specnext
) {
if (dev
!= vp
->v_rdev
|| type
!= vp
->v_type
)
simple_unlock(&spechash_slock
);
* Calculate the total number of references to a special device.
struct vnode
*vq
, *vnext
;
if ((vp
->v_flag
& VALIASED
) == 0)
simple_lock(&spechash_slock
);
for (count
= 0, vq
= *vp
->v_hashchain
; vq
; vq
= vnext
) {
if (vq
->v_rdev
!= vp
->v_rdev
|| vq
->v_type
!= vp
->v_type
)
* Alias, but not in use, so flush it out.
if (vq
->v_usecount
== 0 && vq
!= vp
) {
simple_unlock(&spechash_slock
);
simple_unlock(&spechash_slock
);
* Print out a description of a vnode.
static char *typename
[] =
{ "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
register struct vnode
*vp
;
printf("type %s, usecount %d, writecount %d, refcount %d,",
typename
[vp
->v_type
], vp
->v_usecount
, vp
->v_writecount
,
if (vp
->v_flag
& VSYSTEM
)
if (vp
->v_flag
& VALIASED
)
strcat(buf
, "|VALIASED");
printf(" flags (%s)", &buf
[1]);
if (vp
->v_data
== NULL
) {
* List all of the locked vnodes in the system.
* Called when debugging the kernel.
struct proc
*p
= curproc
; /* XXX */
printf("Locked vnodes\n");
simple_lock(&mountlist_slock
);
for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
if (vfs_busy(mp
, LK_NOWAIT
, &mountlist_slock
, p
)) {
nmp
= mp
->mnt_list
.cqe_next
;
for (vp
= mp
->mnt_vnodelist
.lh_first
;
vp
= vp
->v_mntvnodes
.le_next
) {
simple_lock(&mountlist_slock
);
nmp
= mp
->mnt_list
.cqe_next
;
simple_unlock(&mountlist_slock
);
* Top level filesystem related information gathering.
vfs_sysctl(name
, namelen
, oldp
, oldlenp
, newp
, newlen
, p
)
/* all sysctl names at this level are at least name and field */
return (ENOTDIR
); /* overloaded */
if (name
[0] != VFS_GENERIC
) {
for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
if (vfsp
->vfc_typenum
== name
[0])
return ((*vfsp
->vfc_vfsops
->vfs_sysctl
)(&name
[1], namelen
- 1,
oldp
, oldlenp
, newp
, newlen
, p
));
return (sysctl_rdint(oldp
, oldlenp
, newp
, maxvfsconf
));
return (ENOTDIR
); /* overloaded */
for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
if (vfsp
->vfc_typenum
== name
[2])
return (sysctl_rdstruct(oldp
, oldlenp
, newp
, vfsp
,
sizeof(struct vfsconf
)));
#define KINFO_VNODESLOP 10
* Dump vnode list (via sysctl).
* Copyout address of vnode followed by vnode.
sysctl_vnode(where
, sizep
, p
)
char *bp
= where
, *savebp
;
#define VPTRSZ sizeof (struct vnode *)
#define VNODESZ sizeof (struct vnode)
*sizep
= (numvnodes
+ KINFO_VNODESLOP
) * (VPTRSZ
+ VNODESZ
);
simple_lock(&mountlist_slock
);
for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
if (vfs_busy(mp
, LK_NOWAIT
, &mountlist_slock
, p
)) {
nmp
= mp
->mnt_list
.cqe_next
;
simple_lock(&mntvnode_slock
);
for (vp
= mp
->mnt_vnodelist
.lh_first
;
* Check that the vp is still associated with
* this filesystem. RACE: could have been
* recycled onto the same filesystem.
simple_unlock(&mntvnode_slock
);
printf("kinfo: vp changed\n");
nvp
= vp
->v_mntvnodes
.le_next
;
if (bp
+ VPTRSZ
+ VNODESZ
> ewhere
) {
simple_unlock(&mntvnode_slock
);
simple_unlock(&mntvnode_slock
);
if ((error
= copyout((caddr_t
)&vp
, bp
, VPTRSZ
)) ||
(error
= copyout((caddr_t
)vp
, bp
+ VPTRSZ
, VNODESZ
)))
simple_lock(&mntvnode_slock
);
simple_unlock(&mntvnode_slock
);
simple_lock(&mountlist_slock
);
nmp
= mp
->mnt_list
.cqe_next
;
simple_unlock(&mountlist_slock
);
* Check to see if a filesystem is mounted on a block device.
if (vp
->v_specflags
& SI_MOUNTEDON
)
if (vp
->v_flag
& VALIASED
) {
simple_lock(&spechash_slock
);
for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_rdev
!= vp
->v_rdev
||
vq
->v_type
!= vp
->v_type
)
if (vq
->v_specflags
& SI_MOUNTEDON
) {
simple_unlock(&spechash_slock
);
* Unmount all filesystems. The list is traversed in reverse order
* of mounting to avoid dependencies.
struct proc
*p
= curproc
; /* XXX */
* Since this only runs when rebooting, it is not interlocked.
for (mp
= mountlist
.cqh_last
; mp
!= (void *)&mountlist
; mp
= nmp
) {
nmp
= mp
->mnt_list
.cqe_prev
;
(void) dounmount(mp
, MNT_FORCE
, p
);
* Build hash lists of net addresses and hang them off the mount point.
* Called by ufs_mount() to set up the lists of export addresses.
vfs_hang_addrlist(mp
, nep
, argp
)
struct export_args
*argp
;
register struct netcred
*np
;
register struct radix_node_head
*rnh
;
struct sockaddr
*saddr
, *smask
= 0;
if (argp
->ex_addrlen
== 0) {
if (mp
->mnt_flag
& MNT_DEFEXPORTED
)
np
= &nep
->ne_defexported
;
np
->netc_exflags
= argp
->ex_flags
;
np
->netc_anon
= argp
->ex_anon
;
np
->netc_anon
.cr_ref
= 1;
mp
->mnt_flag
|= MNT_DEFEXPORTED
;
i
= sizeof(struct netcred
) + argp
->ex_addrlen
+ argp
->ex_masklen
;
np
= (struct netcred
*)malloc(i
, M_NETADDR
, M_WAITOK
);
saddr
= (struct sockaddr
*)(np
+ 1);
if (error
= copyin(argp
->ex_addr
, (caddr_t
)saddr
, argp
->ex_addrlen
))
if (saddr
->sa_len
> argp
->ex_addrlen
)
saddr
->sa_len
= argp
->ex_addrlen
;
smask
= (struct sockaddr
*)((caddr_t
)saddr
+ argp
->ex_addrlen
);
error
= copyin(argp
->ex_addr
, (caddr_t
)smask
, argp
->ex_masklen
);
if (smask
->sa_len
> argp
->ex_masklen
)
smask
->sa_len
= argp
->ex_masklen
;
if ((rnh
= nep
->ne_rtable
[i
]) == 0) {
* Seems silly to initialize every AF when most are not
* used, do so on demand here
for (dom
= domains
; dom
; dom
= dom
->dom_next
)
if (dom
->dom_family
== i
&& dom
->dom_rtattach
) {
dom
->dom_rtattach((void **)&nep
->ne_rtable
[i
],
if ((rnh
= nep
->ne_rtable
[i
]) == 0) {
rn
= (*rnh
->rnh_addaddr
)((caddr_t
)saddr
, (caddr_t
)smask
, rnh
,
* One of the reasons that rnh_addaddr may fail is that
* the entry already exists. To check for this case, we
* look up the entry to see if it is there. If so, we
* do not need to make a new entry but do return success.
rn
= (*rnh
->rnh_matchaddr
)((caddr_t
)saddr
, rnh
);
if (rn
!= 0 && (rn
->rn_flags
& RNF_ROOT
) == 0 &&
((struct netcred
*)rn
)->netc_exflags
== argp
->ex_flags
&&
!bcmp((caddr_t
)&((struct netcred
*)rn
)->netc_anon
,
(caddr_t
)&argp
->ex_anon
, sizeof(struct ucred
)))
np
->netc_exflags
= argp
->ex_flags
;
np
->netc_anon
= argp
->ex_anon
;
np
->netc_anon
.cr_ref
= 1;
register struct radix_node_head
*rnh
= (struct radix_node_head
*)w
;
(*rnh
->rnh_deladdr
)(rn
->rn_key
, rn
->rn_mask
, rnh
);
free((caddr_t
)rn
, M_NETADDR
);
* Free the net address hash lists that are hanging off the mount points.
register struct radix_node_head
*rnh
;
for (i
= 0; i
<= AF_MAX
; i
++)
if (rnh
= nep
->ne_rtable
[i
]) {
(*rnh
->rnh_walktree
)(rnh
, vfs_free_netcred
,
free((caddr_t
)rnh
, M_RTABLE
);
vfs_export(mp
, nep
, argp
)
struct export_args
*argp
;
if (argp
->ex_flags
& MNT_DELEXPORT
) {
mp
->mnt_flag
&= ~(MNT_EXPORTED
| MNT_DEFEXPORTED
);
if (argp
->ex_flags
& MNT_EXPORTED
) {
if (error
= vfs_hang_addrlist(mp
, nep
, argp
))
mp
->mnt_flag
|= MNT_EXPORTED
;
vfs_export_lookup(mp
, nep
, nam
)
register struct mount
*mp
;
register struct netcred
*np
;
register struct radix_node_head
*rnh
;
if (mp
->mnt_flag
& MNT_EXPORTED
) {
* Lookup in the export list first.
saddr
= mtod(nam
, struct sockaddr
*);
rnh
= nep
->ne_rtable
[saddr
->sa_family
];
(*rnh
->rnh_matchaddr
)((caddr_t
)saddr
,
if (np
&& np
->netc_rnodes
->rn_flags
& RNF_ROOT
)
* If no address match, use the default if it exists.
if (np
== NULL
&& mp
->mnt_flag
& MNT_DEFEXPORTED
)
np
= &nep
->ne_defexported
;