* Copyright (c) 1992, 1993, 1994 The Regents of the University of California.
* Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry.
* This code is derived from software contributed to Berkeley by
* Jan-Simon Pendry and by John Heidemann of the UCLA Ficus project.
* %sccs.include.redist.c%
* @(#)union_vnops.c 1.1 (Berkeley) %G%
#include <sys/filedesc.h>
int union_bug_bypass
= 0; /* for debugging: enables bypass printf'ing */
* This is the 10-Apr-92 bypass routine.
* This version has been optimized for speed, throwing away some
* safety checks. It should still always work, but it's not as
* robust to programmer errors.
* Define SAFETY to include some error checking code.
* In general, we map all vnodes going down and unmap them on the way back.
* As an exception to this, vnodes can be marked "unmapped" by setting
* the Nth bit in operation's vdesc_flags.
* Also, some BSD vnode operations have the side effect of vrele'ing
* their arguments. With stacking, the reference counts are held
* by the upper node, not the lower one, so we must handle these
* side-effects here. This is not of concern in Sun-derived systems
* since there are no such side-effects.
* This makes the following assumptions:
* - only one returned vpp
* - no INOUT vpp's (Sun's vop_open has one of these)
* - the vnode operation vector of the first vnode should be used
* to determine what implementation of the op should be invoked
* - all mapped vnodes are of our vnode-type (NEEDSWORK:
* problems on rmdir'ing mount points and renaming?)
struct vop_generic_args
/* {
struct vnodeop_desc *a_desc;
<other random data follows, presumably>
struct vnode
**this_vp_p
;
struct vnode
*old_vps
[VDESC_MAX_VPS
];
struct vnode
**vps_p
[VDESC_MAX_VPS
];
struct vnodeop_desc
*descp
= ap
->a_desc
;
printf ("union_bypass: %s\n", descp
->vdesc_name
);
* We require at least one vp.
if (descp
->vdesc_vp_offsets
== NULL
||
descp
->vdesc_vp_offsets
[0] == VDESC_NO_OFFSET
)
panic ("union_bypass: no vp's in map.\n");
* Map the vnodes going in.
* Later, we'll invoke the operation based on
* the first mapped vnode's operation vector.
reles
= descp
->vdesc_flags
;
for (i
= 0; i
< VDESC_MAX_VPS
; reles
>>= 1, i
++) {
if (descp
->vdesc_vp_offsets
[i
] == VDESC_NO_OFFSET
)
break; /* bail out at end of list */
VOPARG_OFFSETTO(struct vnode
**, descp
->vdesc_vp_offsets
[i
],ap
);
* We're not guaranteed that any but the first vnode
* are of our type. Check for and don't map any
* that aren't. (We must always map first vp or vclean fails.)
if (i
&& (*this_vp_p
)->v_op
!= union_vnodeop_p
) {
*(vps_p
[i
]) = OTHERVP(*this_vp_p
);
* XXX - Several operations have the side effect
* of vrele'ing their vp's. We must account for
* that. (This should go away in the future.)
* Call the operation on the lower layer
* with the modified argument structure.
error
= VCALL(*(vps_p
[0]), descp
->vdesc_offset
, ap
);
* Maintain the illusion of call-by-value
* by restoring vnodes in the argument structure
* to their original value.
reles
= descp
->vdesc_flags
;
for (i
= 0; i
< VDESC_MAX_VPS
; reles
>>= 1, i
++) {
if (descp
->vdesc_vp_offsets
[i
] == VDESC_NO_OFFSET
)
break; /* bail out at end of list */
*(vps_p
[i
]) = old_vps
[i
];
* Map the possible out-going vpp
* (Assumes that the lower layer always returns
* a VREF'ed vpp unless it gets an error.)
if (descp
->vdesc_vpp_offset
!= VDESC_NO_OFFSET
&&
!(descp
->vdesc_flags
& VDESC_NOMAP_VPP
) &&
* XXX - even though some ops have vpp returned vp's,
* several ops actually vrele this before returning.
* We must avoid these ops.
* (This should go away when these ops are regularized.)
if (descp
->vdesc_flags
& VDESC_VPP_WILLRELE
)
vppp
= VOPARG_OFFSETTO(struct vnode
***,
descp
->vdesc_vpp_offset
,ap
);
panic("union: failed to handled returned vnode");
error
= union_allocvp(0, 0, 0, 0, 0, 0);
* Check access permission on the union vnode.
* The access check being enforced is to check
* against both the underlying vnode, and any
* copied vnode. This ensures that no additional
* file permissions are given away simply because
* the user caused an implicit file copy.
struct vop_access_args
/* {
struct vnodeop_desc *a_desc;
struct union_node
*un
= VTOUNION(ap
->a_vp
);
if (vp
= un
->un_lowervp
) {
error
= VOP_ACCESS(vp
, ap
->a_mode
, ap
->a_cred
, ap
->a_p
);
return (VOP_ACCESS(vp
, ap
->a_mode
, ap
->a_cred
, ap
->a_p
));
union_mkshadow(dvp
, cnp
, vpp
)
struct componentname
*cnp
;
struct proc
*p
= cnp
->cn_proc
;
int lockparent
= (cnp
->cn_flags
& LOCKPARENT
);
* policy: when creating the shadow directory in the
* upper layer, create it owned by the current user,
* group from parent directory, and mode 777 modified
* by umask (ie mostly identical to the mkdir syscall).
* TODO: create the directory owned by the user who
* did the mount (um->um_cred).
va
.va_mode
= UN_DIRMODE
&~ p
->p_fd
->fd_cmask
;
LEASE_CHECK(dvp
, p
, p
->p_ucred
, LEASE_WRITE
);
error
= VOP_MKDIR(dvp
, vpp
, cnp
, &va
);
union_lookup1(dvp
, vpp
, cnp
)
struct componentname
*cnp
;
if (cnp
->cn_flags
& ISDOTDOT
) {
if ((dvp
->v_flag
& VROOT
) == 0 ||
(cnp
->cn_flags
& NOCROSSMOUNT
))
dvp
= dvp
->v_mount
->mnt_vnodecovered
;
error
= VOP_LOOKUP(dvp
, &tdvp
, cnp
);
while ((dvp
->v_type
== VDIR
) && (mp
= dvp
->v_mountedhere
) &&
(cnp
->cn_flags
& NOCROSSMOUNT
) == 0) {
if (mp
->mnt_flag
& MNT_MLOCK
) {
mp
->mnt_flag
|= MNT_MWAIT
;
sleep((caddr_t
) mp
, PVFS
);
if (error
= VFS_ROOT(mp
, &tdvp
)) {
struct vop_lookup_args
/* {
struct vnodeop_desc *a_desc;
struct componentname *a_cnp;
struct vnode
*uppervp
, *lowervp
;
struct vnode
*upperdvp
, *lowerdvp
;
struct vnode
*dvp
= ap
->a_dvp
;
struct union_node
*dun
= VTOUNION(ap
->a_dvp
);
struct componentname
*cnp
= ap
->a_cnp
;
int lockparent
= cnp
->cn_flags
& LOCKPARENT
;
upperdvp
= dun
->un_uppervp
;
lowerdvp
= dun
->un_lowervp
;
* do the lookup in the upper level.
* if that level comsumes additional pathnames,
* then assume that something special is going
* on and just return that vnode.
uerror
= union_lookup1(upperdvp
, &uppervp
, cnp
);
if (cnp
->cn_consume
!= 0) {
* in a similar way to the upper layer, do the lookup
* in the lower layer. this time, if there is some
* component magic going on, then vput whatever we got
* back from the upper layer and return the lower vnode
lerror
= union_lookup1(lowerdvp
, &lowervp
, cnp
);
if (cnp
->cn_consume
!= 0) {
* at this point, we have uerror and lerror indicating
* possible errors with the lookups in the upper and lower
* layers. additionally, uppervp and lowervp are (locked)
* references to existing vnodes in the upper and lower layers.
* there are now three cases to consider.
* 1. if both layers returned an error, then return whatever
* error the upper layer generated.
* 2. if the top layer failed and the bottom layer succeeded
* then two subcases occur.
* a. the bottom vnode is not a directory, in which
* case just return a new union vnode referencing
* an empty top layer and the existing bottom layer.
* b. the bottom vnode is a directory, in which case
* create a new directory in the top-level and
* 3. if the top layer succeeded then return a new union
* vnode referencing whatever the new top layer and
* whatever the bottom layer returned.
if ((uerror
!= 0) && (lerror
!= 0)) {
if (uerror
!= 0 /* && (lerror == 0) */ ) {
if (lowervp
->v_type
== VDIR
) { /* case 2b. */
uerror
= union_mkshadow(upperdvp
, cnp
, &uppervp
);
return (union_allocvp(ap
->a_vpp
, dvp
->v_mount
, dvp
, cnp
,
* copyfile. copy the vnode (fvp) to the vnode (tvp)
* using a sequence of reads and writes.
union_copyfile(p
, cred
, fvp
, tvp
)
* allocate a buffer of size MAXBSIZE.
* loop doing reads and writes, keeping track
* of the current uio offset.
* give up at the first sign of trouble.
uio
.uio_segflg
= UIO_SYSSPACE
;
VOP_UNLOCK(fvp
); /* XXX */
LEASE_CHECK(fvp
, p
, cred
, LEASE_READ
);
VOP_UNLOCK(tvp
); /* XXX */
LEASE_CHECK(tvp
, p
, cred
, LEASE_WRITE
);
buf
= malloc(MAXBSIZE
, M_TEMP
, M_WAITOK
);
uio
.uio_resid
= iov
.iov_len
;
error
= VOP_READ(fvp
, &uio
, 0, cred
);
iov
.iov_len
= MAXBSIZE
- uio
.uio_resid
;
uio
.uio_resid
= iov
.iov_len
;
error
= VOP_WRITE(tvp
, &uio
, 0, cred
);
} while (error
== 0 && uio
.uio_resid
> 0);
} while ((uio
.uio_resid
== 0) && (error
== 0));
struct vop_open_args
/* {
struct vnodeop_desc *a_desc;
struct union_node
*un
= VTOUNION(ap
->a_vp
);
struct ucred
*cred
= ap
->a_cred
;
struct proc
*p
= ap
->a_p
;
* If there is an existing upper vp then simply open that.
return (VOP_OPEN(un
->un_uppervp
, mode
, cred
, p
));
* If the lower vnode is being opened for writing, then
* copy the file contents to the upper vnode and open that,
* otherwise can simply open the lower vnode.
if ((ap
->a_mode
& FWRITE
) && (un
->un_lowervp
->v_type
== VREG
)) {
struct filedesc
*fdp
= p
->p_fd
;
* Open the named file in the upper layer. Note that
* the file may have come into existence *since* the lookup
* was done, since the upper layer may really be a
* loopback mount of some other filesystem... so open
* the file with exclusive create and barf if it already
* XXX - perhaps shoudl re-lookup the node (once more with
* feeling) and simply open that. Who knows.
NDINIT(&nd
, CREATE
, 0, UIO_SYSSPACE
, un
->un_path
, p
);
fmode
= (O_CREAT
|O_TRUNC
|O_EXCL
);
cmode
= UN_FILEMODE
& ~fdp
->fd_cmask
;
error
= vn_open(&nd
, fmode
, cmode
);
un
->un_uppervp
= nd
.ni_vp
;
* Now, if the file is being opened with truncation, then
* the (new) upper vnode is ready to fly, otherwise the
* data from the lower vnode must be copied to the upper
* layer first. This only works for regular files (check
if ((mode
& O_TRUNC
) == 0) {
/* XXX - should not ignore errors from VOP_CLOSE */
error
= VOP_OPEN(un
->un_lowervp
, FREAD
, cred
, p
);
error
= union_copyfile(p
, cred
,
un
->un_lowervp
, un
->un_uppervp
);
(void) VOP_CLOSE(un
->un_lowervp
, FREAD
);
(void) VOP_CLOSE(un
->un_uppervp
, FWRITE
);
error
= VOP_OPEN(un
->un_uppervp
, FREAD
, cred
, p
);
return (VOP_OPEN(un
->un_lowervp
, mode
, cred
, p
));
* We handle getattr only to change the fsid.
struct vop_getattr_args
/* {
if (error
= union_bypass(ap
))
/* Requires that arguments be restored. */
ap
->a_vap
->va_fsid
= ap
->a_vp
->v_mount
->mnt_stat
.f_fsid
.val
[0];
* union_readdir works in concert with getdirentries and
* readdir(3) to provide a list of entries in the unioned
* directories. getdirentries is responsible for walking
* down the union stack. readdir(3) is responsible for
* eliminating duplicate names from the returned data stream.
struct vop_readdir_args
/* {
struct vnodeop_desc *a_desc;
struct union_node
*un
= VTOUNION(ap
->a_vp
);
return (union_bypass(ap
));
struct vop_inactive_args
/* {
* Do nothing (and _don't_ bypass).
* Wait to vrele lowervp until reclaim,
* so that until then our union_node is in the
* NEEDSWORK: Someday, consider inactive'ing
* the lowervp and then trying to reactivate it
* with capabilities (v_id)
* like they do in the name lookup cache code.
* That's too much work for now.
struct vop_reclaim_args
/* {
struct vnode
*vp
= ap
->a_vp
;
struct union_node
*un
= VTOUNION(vp
);
struct vnode
*uppervp
= un
->un_uppervp
;
struct vnode
*lowervp
= un
->un_lowervp
;
struct vnode
*dirvp
= un
->un_dirvp
;
char *path
= un
->un_path
;
* Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
* so we can't call VOPs on ourself.
/* After this assignment, this node will not be re-used. */
struct vop_print_args
/* {
struct vnode
*vp
= ap
->a_vp
;
printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
vp
, UPPERVP(vp
), LOWERVP(vp
));
* XXX - vop_strategy must be hand coded because it has no
* vnode in its arguments.
* This goes away with a merged VM/buffer cache.
struct vop_strategy_args
/* {
struct buf
*bp
= ap
->a_bp
;
bp
->b_vp
= OTHERVP(bp
->b_vp
);
panic("union_strategy: nil vp");
if (((bp
->b_flags
& B_READ
) == 0) &&
(bp
->b_vp
== LOWERVP(savedvp
)))
panic("union_strategy: writing to lowervp");
error
= VOP_STRATEGY(bp
);
* XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
* vnode in its arguments.
* This goes away with a merged VM/buffer cache.
struct vop_bwrite_args
/* {
struct buf
*bp
= ap
->a_bp
;
bp
->b_vp
= UPPERVP(bp
->b_vp
);
panic("union_bwrite: no upper vp");
struct vop_lock_args
*ap
;
struct union_node
*un
= VTOUNION(ap
->a_vp
);
if (un
->un_pid
== curproc
->p_pid
)
panic("union: locking agsinst myself");
while (un
->un_flags
& UN_LOCKED
) {
sleep((caddr_t
) &un
->un_flags
, PINOD
);
un
->un_flags
|= UN_LOCKED
;
un
->un_pid
= curproc
->p_pid
;
if (un
->un_lowervp
&& !VOP_ISLOCKED(un
->un_lowervp
))
VOP_LOCK(un
->un_lowervp
);
if (un
->un_uppervp
&& !VOP_ISLOCKED(un
->un_uppervp
))
VOP_LOCK(un
->un_uppervp
);
struct vop_lock_args
*ap
;
struct union_node
*un
= VTOUNION(ap
->a_vp
);
if (un
->un_pid
!= curproc
->p_pid
)
panic("union: unlocking other process's union node");
if ((un
->un_flags
& UN_LOCKED
) == 0)
panic("union: unlock unlocked node");
if (un
->un_uppervp
&& VOP_ISLOCKED(un
->un_uppervp
))
VOP_UNLOCK(un
->un_uppervp
);
if (un
->un_lowervp
&& VOP_ISLOCKED(un
->un_lowervp
))
VOP_UNLOCK(un
->un_lowervp
);
un
->un_flags
&= ~UN_LOCKED
;
if (un
->un_flags
& UN_WANT
) {
un
->un_flags
&= ~UN_WANT
;
wakeup((caddr_t
) &un
->un_flags
);
* Global vfs data structures
int (**union_vnodeop_p
)();
struct vnodeopv_entry_desc union_vnodeop_entries
[] = {
{ &vop_default_desc
, union_bypass
},
{ &vop_getattr_desc
, union_getattr
},
{ &vop_inactive_desc
, union_inactive
},
{ &vop_reclaim_desc
, union_reclaim
},
{ &vop_print_desc
, union_print
},
{ &vop_strategy_desc
, union_strategy
},
{ &vop_bwrite_desc
, union_bwrite
},
{ &vop_lock_desc
, union_lock
},
{ &vop_unlock_desc
, union_unlock
},
{ (struct vnodeop_desc
*)NULL
, (int(*)())NULL
}
struct vnodeopv_desc union_vnodeop_opv_desc
=
{ &union_vnodeop_p
, union_vnodeop_entries
};