* Copyright (c) 1994 Jan-Simon Pendry
* The Regents of the University of California. All rights reserved.
* This code is derived from software contributed to Berkeley by
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* @(#)union_subr.c 8.20 (Berkeley) 5/20/95
#include <sys/filedesc.h>
#include <vm/vm.h> /* for vnode_pager_setsize */
#include <miscfs/union/union.h>
/* must be power of two, otherwise change UNION_HASH() */
#define UNION_HASH(u, l) \
(((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
static LIST_HEAD(unhead
, union_node
) unhead
[NHASH
];
static int unvplock
[NHASH
];
for (i
= 0; i
< NHASH
; i
++)
bzero((caddr_t
) unvplock
, sizeof(unvplock
));
if (unvplock
[ix
] & UN_LOCKED
) {
sleep((caddr_t
) &unvplock
[ix
], PINOD
);
unvplock
[ix
] |= UN_LOCKED
;
unvplock
[ix
] &= ~UN_LOCKED
;
if (unvplock
[ix
] & UN_WANT
) {
unvplock
[ix
] &= ~UN_WANT
;
wakeup((caddr_t
) &unvplock
[ix
]);
union_updatevp(un
, uppervp
, lowervp
)
int ohash
= UNION_HASH(un
->un_uppervp
, un
->un_lowervp
);
int nhash
= UNION_HASH(uppervp
, lowervp
);
int docache
= (lowervp
!= NULLVP
|| uppervp
!= NULLVP
);
* Ensure locking is ordered from lower to higher
while (union_list_lock(lhash
))
while (union_list_lock(uhash
))
if (ohash
!= nhash
|| !docache
) {
if (un
->un_flags
& UN_CACHED
) {
un
->un_flags
&= ~UN_CACHED
;
LIST_REMOVE(un
, un_cache
);
union_list_unlock(ohash
);
if (un
->un_lowervp
!= lowervp
) {
free(un
->un_path
, M_TEMP
);
un
->un_lowervp
= lowervp
;
if (un
->un_uppervp
!= uppervp
) {
un
->un_uppervp
= uppervp
;
if (docache
&& (ohash
!= nhash
)) {
LIST_INSERT_HEAD(&unhead
[nhash
], un
, un_cache
);
un
->un_flags
|= UN_CACHED
;
union_list_unlock(nhash
);
union_newlower(un
, lowervp
)
union_updatevp(un
, un
->un_uppervp
, lowervp
);
union_newupper(un
, uppervp
)
union_updatevp(un
, uppervp
, un
->un_lowervp
);
* Keep track of size changes in the underlying vnodes.
* If the size changes, then callback to the vm layer
* giving priority to the upper layer size.
union_newsize(vp
, uppersz
, lowersz
)
/* only interested in regular files */
if ((uppersz
!= VNOVAL
) && (un
->un_uppersz
!= uppersz
)) {
un
->un_uppersz
= uppersz
;
if ((lowersz
!= VNOVAL
) && (un
->un_lowersz
!= lowersz
)) {
un
->un_lowersz
= lowersz
;
printf("union: %s size now %ld\n",
uppersz
!= VNOVAL
? "upper" : "lower", (long) sz
);
vnode_pager_setsize(vp
, sz
);
* allocate a union_node/vnode pair. the vnode is
* referenced and locked. the new vnode is returned
* via (vpp). (mp) is the mountpoint of the union filesystem,
* (dvp) is the parent directory where the upper layer object
* should exist (but doesn't) and (cnp) is the componentname
* information which is partially copied to allow the upper
* layer object to be created at a later time. (uppervp)
* and (lowervp) reference the upper and lower layer objects
* being mapped. either, but not both, can be nil.
* if supplied, (uppervp) is locked.
* the reference is either maintained in the new union_node
* object which is allocated, or they are vrele'd.
* all union_nodes are maintained on a singly-linked
* list. new nodes are only allocated when they cannot
* be found on this list. entries on the list are
* removed when the vfs reclaim entry is called.
* a single lock is kept for the entire list. this is
* needed because the getnewvnode() function can block
* waiting for a vnode to become free, in which case there
* may be more than one process trying to get the same
* vnode. this lock is only taken if we are going to
* call getnewvnode, since the kernel itself is single-threaded.
* if an entry is found on the list, then call vget() to
* take a reference. this is done because there may be
* zero references to it and so it needs to removed from
union_allocvp(vpp
, mp
, undvp
, dvp
, cnp
, uppervp
, lowervp
, docache
)
struct vnode
*undvp
; /* parent union vnode */
struct vnode
*dvp
; /* may be null */
struct componentname
*cnp
; /* may be null */
struct vnode
*uppervp
; /* may be null */
struct vnode
*lowervp
; /* may be null */
struct vnode
*xlowervp
= NULLVP
;
struct union_mount
*um
= MOUNTTOUNIONMOUNT(mp
);
if (uppervp
== NULLVP
&& lowervp
== NULLVP
)
panic("union: unidentifiable allocation");
if (uppervp
&& lowervp
&& (uppervp
->v_type
!= lowervp
->v_type
)) {
/* detect the root vnode (and aliases) */
if ((uppervp
== um
->um_uppervp
) &&
((lowervp
== NULLVP
) || lowervp
== um
->um_lowervp
)) {
lowervp
= um
->um_lowervp
;
} else for (try = 0; try < 3; try++) {
hash
= UNION_HASH(uppervp
, lowervp
);
hash
= UNION_HASH(uppervp
, NULLVP
);
hash
= UNION_HASH(NULLVP
, lowervp
);
while (union_list_lock(hash
))
for (un
= unhead
[hash
].lh_first
; un
!= 0;
un
= un
->un_cache
.le_next
) {
if ((un
->un_lowervp
== lowervp
||
un
->un_lowervp
== NULLVP
) &&
(un
->un_uppervp
== uppervp
||
un
->un_uppervp
== NULLVP
) &&
(UNIONTOV(un
)->v_mount
== mp
)) {
if (vget(UNIONTOV(un
), 0,
cnp
? cnp
->cn_proc
: NULL
)) {
* Obtain a lock on the union_node.
* uppervp is locked, though un->un_uppervp
* may not be. this doesn't break the locking
* hierarchy since in the case that un->un_uppervp
* is not yet locked it will be vrele'd and replaced
if ((dvp
!= NULLVP
) && (uppervp
== dvp
)) {
* Access ``.'', so (un) will already
* be locked. Since this process has
* the lock on (uppervp) no other
* process can hold the lock on (un).
if ((un
->un_flags
& UN_LOCKED
) == 0)
panic("union: . not locked");
else if (curproc
&& un
->un_pid
!= curproc
->p_pid
&&
un
->un_pid
> -1 && curproc
->p_pid
> -1)
panic("union: allocvp not lock owner");
if (un
->un_flags
& UN_LOCKED
) {
sleep((caddr_t
) &un
->un_flags
, PINOD
);
un
->un_flags
|= UN_LOCKED
;
un
->un_pid
= curproc
->p_pid
;
* At this point, the union_node is locked,
* un->un_uppervp may not be locked, and uppervp
* Save information about the upper layer.
if (uppervp
!= un
->un_uppervp
) {
union_newupper(un
, uppervp
);
un
->un_flags
|= UN_ULOCK
;
un
->un_flags
&= ~UN_KLOCK
;
* Save information about the lower layer.
* This needs to keep track of pathname
* and directory information which union_vn_create
if (lowervp
!= un
->un_lowervp
) {
union_newlower(un
, lowervp
);
if (cnp
&& (lowervp
!= NULLVP
)) {
un
->un_hash
= cnp
->cn_hash
;
un
->un_path
= malloc(cnp
->cn_namelen
+1,
bcopy(cnp
->cn_nameptr
, un
->un_path
,
un
->un_path
[cnp
->cn_namelen
] = '\0';
* otherwise lock the vp list while we call getnewvnode
hash
= UNION_HASH(uppervp
, lowervp
);
if (union_list_lock(hash
))
error
= getnewvnode(VT_UNION
, mp
, union_vnodeop_p
, vpp
);
MALLOC((*vpp
)->v_data
, void *, sizeof(struct union_node
),
(*vpp
)->v_type
= uppervp
->v_type
;
(*vpp
)->v_type
= lowervp
->v_type
;
un
->un_uppervp
= uppervp
;
un
->un_lowervp
= lowervp
;
un
->un_flags
= UN_LOCKED
;
un
->un_flags
|= UN_ULOCK
;
un
->un_pid
= curproc
->p_pid
;
if (cnp
&& (lowervp
!= NULLVP
)) {
un
->un_hash
= cnp
->cn_hash
;
un
->un_path
= malloc(cnp
->cn_namelen
+1, M_TEMP
, M_WAITOK
);
bcopy(cnp
->cn_nameptr
, un
->un_path
, cnp
->cn_namelen
);
un
->un_path
[cnp
->cn_namelen
] = '\0';
LIST_INSERT_HEAD(&unhead
[hash
], un
, un_cache
);
un
->un_flags
|= UN_CACHED
;
struct union_node
*un
= VTOUNION(vp
);
if (un
->un_flags
& UN_CACHED
) {
un
->un_flags
&= ~UN_CACHED
;
LIST_REMOVE(un
, un_cache
);
if (un
->un_pvp
!= NULLVP
)
if (un
->un_uppervp
!= NULLVP
)
if (un
->un_lowervp
!= NULLVP
)
if (un
->un_dirvp
!= NULLVP
)
free(un
->un_path
, M_TEMP
);
FREE(vp
->v_data
, M_TEMP
);
* copyfile. copy the vnode (fvp) to the vnode (tvp)
* using a sequence of reads and writes. both (fvp)
* and (tvp) are locked on entry and exit.
union_copyfile(fvp
, tvp
, cred
, p
)
* allocate a buffer of size MAXBSIZE.
* loop doing reads and writes, keeping track
* of the current uio offset.
* give up at the first sign of trouble.
uio
.uio_segflg
= UIO_SYSSPACE
;
VOP_UNLOCK(fvp
, 0, p
); /* XXX */
VOP_LEASE(fvp
, p
, cred
, LEASE_READ
);
vn_lock(fvp
, LK_EXCLUSIVE
| LK_RETRY
, p
); /* XXX */
VOP_UNLOCK(tvp
, 0, p
); /* XXX */
VOP_LEASE(tvp
, p
, cred
, LEASE_WRITE
);
vn_lock(tvp
, LK_EXCLUSIVE
| LK_RETRY
, p
); /* XXX */
buf
= malloc(MAXBSIZE
, M_TEMP
, M_WAITOK
);
/* ugly loop follows... */
off_t offset
= uio
.uio_offset
;
uio
.uio_resid
= iov
.iov_len
;
error
= VOP_READ(fvp
, &uio
, 0, cred
);
iov
.iov_len
= MAXBSIZE
- uio
.uio_resid
;
uio
.uio_resid
= iov
.iov_len
;
error
= VOP_WRITE(tvp
, &uio
, 0, cred
);
} while ((uio
.uio_resid
> 0) && (error
== 0));
* (un) is assumed to be locked on entry and remains
union_copyup(un
, docopy
, cred
, p
)
error
= union_vn_create(&uvp
, un
, p
);
/* at this point, uppervp is locked */
un
->un_flags
|= UN_ULOCK
;
* XX - should not ignore errors
vn_lock(lvp
, LK_EXCLUSIVE
| LK_RETRY
, p
);
error
= VOP_OPEN(lvp
, FREAD
, cred
, p
);
error
= union_copyfile(lvp
, uvp
, cred
, p
);
(void) VOP_CLOSE(lvp
, FREAD
, cred
, p
);
uprintf("union: copied up %s\n", un
->un_path
);
un
->un_flags
&= ~UN_ULOCK
;
union_vn_close(uvp
, FWRITE
, cred
, p
);
vn_lock(uvp
, LK_EXCLUSIVE
| LK_RETRY
, p
);
un
->un_flags
|= UN_ULOCK
;
* Subsequent IOs will go to the top layer, so
* call close on the lower vnode and open on the
* upper vnode to ensure that the filesystem keeps
* its references counts right. This doesn't do
* the right thing with (cred) and (FREAD) though.
* Ignoring error returns is not right, either.
for (i
= 0; i
< un
->un_openl
; i
++) {
(void) VOP_CLOSE(lvp
, FREAD
, cred
, p
);
(void) VOP_OPEN(uvp
, FREAD
, cred
, p
);
union_relookup(um
, dvp
, vpp
, cnp
, cn
, path
, pathlen
)
struct componentname
*cnp
;
struct componentname
*cn
;
* A new componentname structure must be faked up because
* there is no way to know where the upper level cnp came
* from or what it is being used for. This must duplicate
* some of the work done by NDINIT, some of the work done
* by namei, some of the work done by lookup and some of
* the work done by VOP_LOOKUP when given a CREATE flag.
* The pathname buffer will be FREEed by VOP_MKDIR.
cn
->cn_namelen
= pathlen
;
cn
->cn_pnbuf
= malloc(cn
->cn_namelen
+1, M_NAMEI
, M_WAITOK
);
bcopy(path
, cn
->cn_pnbuf
, cn
->cn_namelen
);
cn
->cn_pnbuf
[cn
->cn_namelen
] = '\0';
cn
->cn_flags
= (LOCKPARENT
|HASBUF
|SAVENAME
|SAVESTART
|ISLASTCN
);
cn
->cn_proc
= cnp
->cn_proc
;
if (um
->um_op
== UNMNT_ABOVE
)
cn
->cn_cred
= cnp
->cn_cred
;
cn
->cn_cred
= um
->um_cred
;
cn
->cn_nameptr
= cn
->cn_pnbuf
;
cn
->cn_hash
= cnp
->cn_hash
;
cn
->cn_consume
= cnp
->cn_consume
;
error
= relookup(dvp
, vpp
, cn
);
* Create a shadow directory in the upper layer.
* The new vnode is returned locked.
* (um) points to the union mount structure for access to the
* the mounting process's credentials.
* (dvp) is the directory in which to create the shadow directory.
* it is unlocked on entry and exit.
* (cnp) is the componentname to be created.
* (vpp) is the returned newly created shadow directory, which
union_mkshadow(um
, dvp
, cnp
, vpp
)
struct componentname
*cnp
;
struct proc
*p
= cnp
->cn_proc
;
error
= union_relookup(um
, dvp
, vpp
, cnp
, &cn
,
cnp
->cn_nameptr
, cnp
->cn_namelen
);
* policy: when creating the shadow directory in the
* upper layer, create it owned by the user who did
* the mount, group from parent directory, and mode
* 777 modified by umask (ie mostly identical to the
* mkdir syscall). (jsp, kb)
va
.va_mode
= um
->um_cmode
;
/* VOP_LEASE: dvp is locked */
VOP_LEASE(dvp
, p
, cn
.cn_cred
, LEASE_WRITE
);
error
= VOP_MKDIR(dvp
, vpp
, &cn
, &va
);
* Create a whiteout entry in the upper layer.
* (um) points to the union mount structure for access to the
* the mounting process's credentials.
* (dvp) is the directory in which to create the whiteout.
* it is locked on entry and exit.
* (cnp) is the componentname to be created.
union_mkwhiteout(um
, dvp
, cnp
, path
)
struct componentname
*cnp
;
struct proc
*p
= cnp
->cn_proc
;
error
= union_relookup(um
, dvp
, &wvp
, cnp
, &cn
, path
, strlen(path
));
vn_lock(dvp
, LK_EXCLUSIVE
| LK_RETRY
, p
);
/* VOP_LEASE: dvp is locked */
VOP_LEASE(dvp
, p
, p
->p_ucred
, LEASE_WRITE
);
error
= VOP_WHITEOUT(dvp
, &cn
, CREATE
);
* union_vn_create: creates and opens a new shadow file
* on the upper union layer. this function is similar
* in spirit to calling vn_open but it avoids calling namei().
* the problem with calling namei is that a) it locks too many
* things, and b) it doesn't start at the "right" directory,
* whereas relookup is told where to start.
union_vn_create(vpp
, un
, p
)
struct ucred
*cred
= p
->p_ucred
;
struct vattr
*vap
= &vat
;
int fmode
= FFLAGS(O_WRONLY
|O_CREAT
|O_TRUNC
|O_EXCL
);
int cmode
= UN_FILEMODE
& ~p
->p_fd
->fd_cmask
;
* Build a new componentname structure (for the same
* reasons outlines in union_mkshadow).
* The difference here is that the file is owned by
* the current user, rather than by the person who
* did the mount, since the current user needs to be
* able to write the file (that's why it is being
* copied in the first place).
cn
.cn_namelen
= strlen(un
->un_path
);
cn
.cn_pnbuf
= (caddr_t
) malloc(cn
.cn_namelen
, M_NAMEI
, M_WAITOK
);
bcopy(un
->un_path
, cn
.cn_pnbuf
, cn
.cn_namelen
+1);
cn
.cn_flags
= (LOCKPARENT
|HASBUF
|SAVENAME
|SAVESTART
|ISLASTCN
);
cn
.cn_nameptr
= cn
.cn_pnbuf
;
cn
.cn_hash
= un
->un_hash
;
if (error
= relookup(un
->un_dirvp
, &vp
, &cn
))
VOP_ABORTOP(un
->un_dirvp
, &cn
);
* Good - there was no race to create the file
* so go ahead and create it. The permissions
* on the file will be 0666 modified by the
* current user's umask. Access to the file, while
* it is unioned, will require access to the top *and*
* bottom files. Access when not unioned will simply
* require access to the top-level file.
* TODO: confirm choice of access permissions.
VOP_LEASE(un
->un_dirvp
, p
, cred
, LEASE_WRITE
);
if (error
= VOP_CREATE(un
->un_dirvp
, &vp
, &cn
, vap
))
if (error
= VOP_OPEN(vp
, fmode
, cred
, p
)) {
union_vn_close(vp
, fmode
, cred
, p
)
return (VOP_CLOSE(vp
, fmode
, cred
, p
));
struct proc
*p
= curproc
; /* XXX */
union_newupper(un
, NULLVP
);
if (un
->un_flags
& UN_CACHED
) {
un
->un_flags
&= ~UN_CACHED
;
LIST_REMOVE(un
, un_cache
);
if (un
->un_flags
& UN_ULOCK
) {
un
->un_flags
&= ~UN_ULOCK
;
VOP_UNLOCK(un
->un_uppervp
, 0, p
);
struct union_node
*un
= VTOUNION(vp
);
if ((un
->un_lowervp
!= NULLVP
) &&
(vp
->v_type
== un
->un_lowervp
->v_type
)) {
if (vget(un
->un_lowervp
, 0) == 0)
* determine whether a whiteout is needed
* during a remove/rmdir operation.
union_dowhiteout(un
, cred
, p
)
if (un
->un_lowervp
!= NULLVP
)
if (VOP_GETATTR(un
->un_uppervp
, &va
, cred
, p
) == 0 &&
union_dircache_r(vp
, vppp
, cntp
)
if (vp
->v_op
!= union_vnodeop_p
) {
panic("union: dircache table too small");
if (un
->un_uppervp
!= NULLVP
)
union_dircache_r(un
->un_uppervp
, vppp
, cntp
);
if (un
->un_lowervp
!= NULLVP
)
union_dircache_r(un
->un_lowervp
, vppp
, cntp
);
vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
, p
);
dircache
= VTOUNION(vp
)->un_dircache
;
union_dircache_r(vp
, 0, &cnt
);
dircache
= (struct vnode
**)
malloc(cnt
* sizeof(struct vnode
*),
union_dircache_r(vp
, &vpp
, &cnt
);
if (*vpp
++ == VTOUNION(vp
)->un_uppervp
)
} while (*vpp
!= NULLVP
);
vn_lock(*vpp
, LK_EXCLUSIVE
| LK_RETRY
, p
);
error
= union_allocvp(&nvp
, vp
->v_mount
, NULLVP
, NULLVP
, 0, *vpp
, NULLVP
, 0);
VTOUNION(vp
)->un_dircache
= 0;
un
->un_dircache
= dircache
;