* Copyright (c) 1989 The Regents of the University of California.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* from: @(#)vfs_subr.c 7.60 (Berkeley) 6/21/91
* $Id: vfs_subr.c,v 1.6 1993/11/07 21:44:50 wollman Exp $
* External virtual filesystem routines
static void insmntque(struct vnode
*, struct mount
*);
struct vnode
*speclisth
[SPECHSZ
]; /* device special file vnode hash table */
* Remove a mount point from the list of mounted filesystems.
* Unmount of the root is illegal.
register struct mount
*mp
;
panic("vfs_remove: unmounting root");
mp
->mnt_prev
->mnt_next
= mp
->mnt_next
;
mp
->mnt_next
->mnt_prev
= mp
->mnt_prev
;
mp
->mnt_vnodecovered
->v_mountedhere
= (struct mount
*)0;
* Used to prevent access to it while mounting and unmounting.
register struct mount
*mp
;
while(mp
->mnt_flag
& MNT_MLOCK
) {
mp
->mnt_flag
|= MNT_MWAIT
;
tsleep((caddr_t
)mp
, PVFS
, "vfslock", 0);
mp
->mnt_flag
|= MNT_MLOCK
;
* Unlock a locked filesystem.
* Panic if filesystem is not locked.
register struct mount
*mp
;
if ((mp
->mnt_flag
& MNT_MLOCK
) == 0)
panic("vfs_unlock: not locked");
mp
->mnt_flag
&= ~MNT_MLOCK
;
if (mp
->mnt_flag
& MNT_MWAIT
) {
mp
->mnt_flag
&= ~MNT_MWAIT
;
* Mark a mount point as busy.
* Used to synchronize access and to delay unmounting.
register struct mount
*mp
;
while(mp
->mnt_flag
& MNT_MPBUSY
) {
mp
->mnt_flag
|= MNT_MPWANT
;
tsleep((caddr_t
)&mp
->mnt_flag
, PVFS
, "vfsbusy", 0);
if (mp
->mnt_flag
& MNT_UNMOUNT
)
mp
->mnt_flag
|= MNT_MPBUSY
;
* Free a busy filesystem.
* Panic if filesystem is not busy.
register struct mount
*mp
;
if ((mp
->mnt_flag
& MNT_MPBUSY
) == 0)
panic("vfs_unbusy: not busy");
mp
->mnt_flag
&= ~MNT_MPBUSY
;
if (mp
->mnt_flag
& MNT_MPWANT
) {
mp
->mnt_flag
&= ~MNT_MPWANT
;
wakeup((caddr_t
)&mp
->mnt_flag
);
* Lookup a mount point by filesystem identifier.
register struct mount
*mp
;
if (mp
->mnt_stat
.f_fsid
.val
[0] == fsid
->val
[0] &&
mp
->mnt_stat
.f_fsid
.val
[1] == fsid
->val
[1]) {
return ((struct mount
*)0);
* Set vnode attributes to VNOVAL
register struct vattr
*vap
;
vap
->va_mode
= vap
->va_nlink
= vap
->va_uid
= vap
->va_gid
=
vap
->va_fsid
= vap
->va_fileid
= vap
->va_size
=
vap
->va_size_rsv
= vap
->va_blocksize
= vap
->va_rdev
=
vap
->va_bytes
= vap
->va_bytes_rsv
=
vap
->va_atime
.tv_sec
= vap
->va_atime
.tv_usec
=
vap
->va_mtime
.tv_sec
= vap
->va_mtime
.tv_usec
=
vap
->va_ctime
.tv_sec
= vap
->va_ctime
.tv_usec
=
vap
->va_flags
= vap
->va_gen
= VNOVAL
;
* Routines having to do with the management of the vnode table.
struct vnode
*vfreeh
, **vfreet
;
extern struct vnodeops dead_vnodeops
, spec_vnodeops
;
* Initialize the vnode structures and initialize each file system type.
* Initialize the vnode name cache
* Initialize each file system type.
for (vfsp
= &vfssw
[0]; vfsp
<= &vfssw
[MOUNT_MAXTYPE
]; vfsp
++) {
* Return the next vnode from the free list.
getnewvnode(tag
, mp
, vops
, vpp
)
register struct vnode
*vp
, *vq
;
if (numvnodes
< desiredvnodes
) {
vp
= (struct vnode
*)malloc((u_long
)sizeof *vp
,
bzero((char *)vp
, sizeof *vp
);
if ((vp
= vfreeh
) == NULL
) {
panic("free vnode isn't");
* Move a vnode from one mount queue to another.
register struct vnode
*vp
;
register struct mount
*mp
;
register struct vnode
*vq
;
* Delete from old mount point vnode list, if on one.
vq
->v_mountb
= vp
->v_mountb
;
* Insert into list of vnodes for the new mount point, if available.
vq
->v_mountb
= &vp
->v_mountf
;
vp
->v_mountb
= &mp
->mnt_mounth
;
* Make sure all write-behind blocks associated
* with mount point are flushed out (from sync).
mntflushbuf(mountp
, flags
)
register struct vnode
*vp
;
if ((mountp
->mnt_flag
& MNT_MPBUSY
) == 0)
panic("mntflushbuf: not busy");
for (vp
= mountp
->mnt_mounth
; vp
; vp
= vp
->v_mountf
) {
if (vp
->v_mount
!= mountp
)
* Flush all dirty buffers associated with a vnode.
register struct vnode
*vp
;
for (bp
= vp
->v_dirtyblkhd
; bp
; bp
= nbp
) {
if ((bp
->b_flags
& B_BUSY
))
if ((bp
->b_flags
& B_DELWRI
) == 0)
panic("vflushbuf: not dirty");
* Wait for I/O associated with indirect blocks to complete,
* since there is no way to quickly wait for them below.
* NB: This is really specific to ufs, but is done here
* as it is easier and quicker.
if (bp
->b_vp
== vp
|| (flags
& B_SYNC
) == 0)
if ((flags
& B_SYNC
) == 0)
while (vp
->v_numoutput
) {
tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "vflushbf", 0);
vprint("vflushbuf: dirty", vp
);
* Update outstanding I/O count and do wakeup if requested.
register struct vnode
*vp
;
bp
->b_dirtyoff
= bp
->b_dirtyend
= 0;
if ((vp
->v_flag
& VBWAIT
) && vp
->v_numoutput
<= 0) {
panic("vwakeup: neg numoutput");
wakeup((caddr_t
)&vp
->v_numoutput
);
* Invalidate in core blocks belonging to closed or umounted filesystem
* Go through the list of vnodes associated with the file system;
* for each vnode invalidate any buffers that it holds. Normally
* this routine is preceeded by a bflush call, so that on a quiescent
* filesystem there will be no dirty buffers when we are done. Binval
* returns the count of dirty buffers when it is finished.
register struct vnode
*vp
;
if ((mountp
->mnt_flag
& MNT_MPBUSY
) == 0)
panic("mntinvalbuf: not busy");
for (vp
= mountp
->mnt_mounth
; vp
; vp
= vp
->v_mountf
) {
dirty
+= vinvalbuf(vp
, 1);
if (vp
->v_mount
!= mountp
)
* Flush out and invalidate all buffers associated with a vnode.
* Called with the underlying object locked.
register struct vnode
*vp
;
if (blist
= vp
->v_dirtyblkhd
)
else if (blist
= vp
->v_cleanblkhd
)
for (bp
= blist
; bp
; bp
= nbp
) {
if (bp
->b_flags
& B_BUSY
) {
tsleep((caddr_t
)bp
, PRIBIO
+ 1, "vinvalbf", 0);
if (save
&& (bp
->b_flags
& B_DELWRI
)) {
reassignbuf(bp
, bp
->b_vp
);
if (vp
->v_dirtyblkhd
|| vp
->v_cleanblkhd
)
panic("vinvalbuf: flush failed");
* Associate a buffer with a vnode.
register struct vnode
*vp
;
register struct vnode
*vq
;
panic("bgetvp: not free");
if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
)
* Insert onto list for new vnode.
if (bq
= vp
->v_cleanblkhd
)
bq
->b_blockb
= &bp
->b_blockf
;
bp
->b_blockb
= &vp
->v_cleanblkhd
;
* Disassociate a buffer from a vnode.
if (bp
->b_vp
== (struct vnode
*) 0)
* Delete from old vnode list, if on one.
bq
->b_blockb
= bp
->b_blockb
;
bp
->b_vp
= (struct vnode
*) 0;
* Reassign a buffer from one vnode to another.
* Used to assign file specific control information
* (indirect blocks) to the vnode to which they belong.
register struct vnode
*newvp
;
register struct buf
*bq
, **listheadp
;
panic("reassignbuf: NULL");
* Delete from old vnode list, if on one.
bq
->b_blockb
= bp
->b_blockb
;
* If dirty, put on list of dirty buffers;
* otherwise insert onto list of clean buffers.
if (bp
->b_flags
& B_DELWRI
)
listheadp
= &newvp
->v_dirtyblkhd
;
listheadp
= &newvp
->v_cleanblkhd
;
bq
->b_blockb
= &bp
->b_blockf
;
bp
->b_blockb
= listheadp
;
* Create a vnode for a block device.
* Used for root filesystem, argdev, and swap areas.
* Also used for memory file system special devices.
register struct vnode
*vp
;
error
= getnewvnode(VT_NON
, (struct mount
*)0, &spec_vnodeops
, &nvp
);
if (nvp
= checkalias(vp
, dev
, (struct mount
*)0)) {
* Check to see if the new vnode represents a special device
* for which we already have a vnode (either because of
* bdevvp() or because of a different vnode representing
* the same block device). If such an alias exists, deallocate
* the existing contents and return the aliased vnode. The
* caller is responsible for filling it with its new contents.
checkalias(nvp
, nvp_rdev
, mp
)
register struct vnode
*nvp
;
register struct vnode
*vp
;
if (nvp
->v_type
!= VBLK
&& nvp
->v_type
!= VCHR
)
vpp
= &speclisth
[SPECHASH(nvp_rdev
)];
for (vp
= *vpp
; vp
; vp
= vp
->v_specnext
) {
if (nvp_rdev
!= vp
->v_rdev
|| nvp
->v_type
!= vp
->v_type
)
* Alias, but not in use, so flush it out.
if (vp
->v_usecount
== 0) {
if (vp
== NULL
|| vp
->v_tag
!= VT_NON
) {
MALLOC(nvp
->v_specinfo
, struct specinfo
*,
sizeof(struct specinfo
), M_VNODE
, M_WAITOK
);
* Grab a particular vnode from the free list, increment its
* reference count and lock it. The vnode lock bit is set the
* vnode is being eliminated in vgone. The process is awakened
* when the transition is completed, and an error returned to
* indicate that the vnode is no longer usable (possibly having
* been changed to a new file system type).
register struct vnode
*vp
;
register struct vnode
*vq
;
if (vp
->v_flag
& VXLOCK
) {
tsleep((caddr_t
)vp
, PINOD
, "vget", 0);
if (vp
->v_usecount
== 0) {
vq
->v_freeb
= vp
->v_freeb
;
* Vnode reference, just increment the count
* vput(), just unlock and vrele()
register struct vnode
*vp
;
* If count drops to zero, call inactive routine and return to freelist.
register struct vnode
*vp
;
struct proc
*p
= curproc
; /* XXX */
if (vp
->v_usecount
!= 0 || vp
->v_writecount
!= 0) {
vprint("vrele: bad ref count", vp
);
* Page or buffer structure gets a reference.
register struct vnode
*vp
;
* Page or buffer structure frees a reference.
register struct vnode
*vp
;
panic("holdrele: holdcnt");
* Remove any vnodes in the vnode table belonging to mount point mp.
* If MNT_NOFORCE is specified, there should not be any active ones,
* return error if any are found (nb: this is a user error, not a
* system error). If MNT_FORCE is specified, detach any active vnodes
int busyprt
= 0; /* patch to print out busy vnodes */
vflush(mp
, skipvp
, flags
)
register struct vnode
*vp
, *nvp
;
if ((mp
->mnt_flag
& MNT_MPBUSY
) == 0)
panic("vflush: not busy");
for (vp
= mp
->mnt_mounth
; vp
; vp
= nvp
) {
* Skip over a selected vnode.
* Skip over a vnodes marked VSYSTEM.
if ((flags
& SKIPSYSTEM
) && (vp
->v_flag
& VSYSTEM
))
* With v_usecount == 0, all we need to do is clear
* out the vnode data structures and we are done.
if (vp
->v_usecount
== 0) {
* For block or character devices, revert to an
* anonymous device. For all other files, just kill them.
if (flags
& FORCECLOSE
) {
if (vp
->v_type
!= VBLK
&& vp
->v_type
!= VCHR
) {
vp
->v_op
= &spec_vnodeops
;
insmntque(vp
, (struct mount
*)0);
vprint("vflush: busy vnode", vp
);
* Disassociate the underlying file system from a vnode.
register struct vnode
*vp
;
struct vnodeops
*origops
;
struct proc
*p
= curproc
; /* XXX */
* Check to see if the vnode is in use.
* If so we have to reference it before we clean it out
* so that its count cannot fall to zero and generate a
* race against ourselves to recycle it.
if (active
= vp
->v_usecount
)
* Prevent the vnode from being recycled or
* brought into use while we clean it out.
panic("vclean: deadlock");
* Even if the count is zero, the VOP_INACTIVE routine may still
* have the object locked while it cleans it out. The VOP_LOCK
* ensures that the VOP_INACTIVE routine is done with its work.
* For active vnodes, it ensures that no other activity can
* occur while the buffer list is being cleaned out.
* Prevent any further operations on the vnode from
* being passed through to the old file system.
vp
->v_op
= &dead_vnodeops
;
* If purging an active vnode, it must be unlocked, closed,
* and deactivated before being reclaimed.
(*(origops
->vop_unlock
))(vp
);
(*(origops
->vop_close
))(vp
, IO_NDELAY
, NOCRED
, p
);
(*(origops
->vop_inactive
))(vp
, p
);
if ((*(origops
->vop_reclaim
))(vp
))
panic("vclean: cannot reclaim");
* Done with purge, notify sleepers in vget of the grim news.
if (vp
->v_flag
& VXWANT
) {
* Eliminate all activity associated with the requested vnode
* and with all vnodes aliased to the requested vnode.
register struct vnode
*vp
;
register struct vnode
*vq
;
if (vp
->v_flag
& VALIASED
) {
* If a vgone (or vclean) is already in progress,
* wait until it is done and return.
if (vp
->v_flag
& VXLOCK
) {
tsleep((caddr_t
)vp
, PINOD
, "vgoneall", 0);
* Ensure that vp will not be vgone'd while we
* are eliminating its aliases.
while (vp
->v_flag
& VALIASED
) {
for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_rdev
!= vp
->v_rdev
||
vq
->v_type
!= vp
->v_type
|| vp
== vq
)
* Remove the lock so that vgone below will
* really eliminate the vnode after which time
* vgone will awaken any sleepers.
* Eliminate all activity associated with a vnode
* in preparation for reuse.
register struct vnode
*vp
;
register struct vnode
*vq
;
* If a vgone (or vclean) is already in progress,
* wait until it is done and return.
if (vp
->v_flag
& VXLOCK
) {
tsleep((caddr_t
)vp
, PINOD
, "vgone", 0);
* Clean out the filesystem specific data.
* Delete from old mount point vnode list, if on one.
vq
->v_mountb
= vp
->v_mountb
;
* If special device, remove it from special device alias list.
if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
) {
if (*vp
->v_hashchain
== vp
) {
*vp
->v_hashchain
= vp
->v_specnext
;
for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_specnext
!= vp
)
vq
->v_specnext
= vp
->v_specnext
;
if (vp
->v_flag
& VALIASED
) {
for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_rdev
!= vp
->v_rdev
||
vq
->v_type
!= vp
->v_type
)
FREE(vp
->v_specinfo
, M_VNODE
);
* If it is on the freelist, move it to the head of the list.
vq
->v_freeb
= vp
->v_freeb
;
vfreeh
->v_freeb
= &vp
->v_freef
;
* Lookup a vnode by device number.
register struct vnode
*vp
;
for (vp
= speclisth
[SPECHASH(dev
)]; vp
; vp
= vp
->v_specnext
) {
if (dev
!= vp
->v_rdev
|| type
!= vp
->v_type
)
* Calculate the total number of references to a special device.
register struct vnode
*vp
;
register struct vnode
*vq
;
if ((vp
->v_flag
& VALIASED
) == 0)
for (count
= 0, vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
if (vq
->v_rdev
!= vp
->v_rdev
|| vq
->v_type
!= vp
->v_type
)
* Alias, but not in use, so flush it out.
if (vq
->v_usecount
== 0) {
* Print out a description of a vnode.
static char *typename
[] =
{ "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
register struct vnode
*vp
;
printf("type %s, usecount %d, writecount %d, refcount %d,",
typename
[vp
->v_type
], vp
->v_usecount
, vp
->v_writecount
,
if (vp
->v_flag
& VSYSTEM
)
if (vp
->v_flag
& VALIASED
)
strcat(buf
, "|VALIASED");
printf(" flags (%s)", &buf
[1]);
* List all of the locked vnodes in the system.
* Called when debugging the kernel.
register struct mount
*mp
;
register struct vnode
*vp
;
printf("Locked vnodes\n");
for (vp
= mp
->mnt_mounth
; vp
; vp
= vp
->v_mountf
)
#define KINFO_VNODESLOP 10
* Dump vnode list (via kinfo).
* Copyout address of vnode followed by vnode.
kinfo_vnode(op
, where
, acopysize
, arg
, aneeded
)
int *acopysize
, arg
, *aneeded
;
register struct mount
*mp
= rootfs
;
register char *bp
= where
, *savebp
;
char *ewhere
= where
+ *acopysize
;
#define VPTRSZ sizeof (struct vnode *)
#define VNODESZ sizeof (struct vnode)
*aneeded
= (numvnodes
+ KINFO_VNODESLOP
) * (VPTRSZ
+ VNODESZ
);
for (vp
= mp
->mnt_mounth
; vp
; vp
= vp
->v_mountf
) {
* Check that the vp is still associated with
* this filesystem. RACE: could have been
* recycled onto the same filesystem.
printf("kinfo: vp changed\n");
if ((bp
+ VPTRSZ
+ VNODESZ
<= ewhere
) &&
((error
= copyout((caddr_t
)&vp
, bp
, VPTRSZ
)) ||
(error
= copyout((caddr_t
)vp
, bp
+ VPTRSZ
,
*acopysize
= ewhere
- where
;