* Copyright (c) 1989 The Regents of the University of California.
* %sccs.include.redist.c%
* @(#)ufs_lookup.c 7.36 (Berkeley) %G%
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
struct nchstats nchstats
;
* Convert a component of a pathname into a pointer to a locked inode.
* This is a very central and rather complicated routine.
* If the file system is not maintained in a strict tree hierarchy,
* this can result in a deadlock situation (see comments in code below).
* The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
* whether the name is to be looked up, created, renamed, or deleted.
* When CREATE, RENAME, or DELETE is specified, information usable in
* creating, renaming, or deleting a directory entry may be calculated.
* If flag has LOCKPARENT or'ed into it and the target of the pathname
* exists, lookup returns both the target and its parent directory locked.
* When creating or renaming and LOCKPARENT is specified, the target may
* not be ".". When deleting and LOCKPARENT is specified, the target may
* be "."., but the caller must check to ensure it does an vrele and iput
* Overall outline of ufs_lookup:
* check accessibility of directory
* look for name in cache, if found, then if at end of path
* and deleting or creating, drop it, else return name
* search for name in directory, to found or notfound
* if creating, return locked directory, leaving info on available slots
* if at end of path and deleting, return information to allow delete
* if at end of path and rewriting (RENAME and LOCKPARENT), lock target
* inode and return info to allow rewrite
* if not at end, add name to cache; if at end and neither creating
* nor deleting, add name to cache
* NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
register struct vnode
*vdp
;
register struct nameidata
*ndp
;
register struct inode
*dp
; /* the directory we are searching */
struct buf
*bp
; /* a buffer of directory entries */
register struct direct
*ep
; /* the current directory entry */
int entryoffsetinblock
; /* offset of ep in bp's buffer */
enum {NONE
, COMPACT
, FOUND
} slotstatus
;
int slotoffset
; /* offset of area with free space */
int slotsize
; /* size of area at slotoffset */
int slotfreespace
; /* amount of space free in slot */
int slotneeded
; /* size of the entry we're seeking */
int numdirpasses
; /* strategy for directory search */
int endsearch
; /* offset to end directory search */
int prevoff
; /* prev entry ndp->ni_ufs.ufs_offset */
struct inode
*pdp
; /* saved dp during symlink work */
struct vnode
*tdp
; /* returned by VOP_VGET */
off_t enduseful
; /* pointer past last used dir slot */
u_long bmask
; /* block offset mask */
int flag
; /* LOOKUP, CREATE, RENAME, or DELETE */
int lockparent
; /* 1 => lockparent flag is set */
int wantparent
; /* 1 => wantparent or lockparent flag */
lockparent
= ndp
->ni_nameiop
& LOCKPARENT
;
flag
= ndp
->ni_nameiop
& OPMASK
;
wantparent
= ndp
->ni_nameiop
& (LOCKPARENT
|WANTPARENT
);
* Check accessiblity of directory.
if ((dp
->i_mode
&IFMT
) != IFDIR
)
if (error
= ufs_access(vdp
, VEXEC
, ndp
->ni_cred
, p
))
* We now have a segment name to search for, and a directory to search.
* Before tediously performing a linear scan of the directory,
* check the name cache to see if the directory/name pair
* we are looking for is known already.
if (error
= cache_lookup(ndp
)) {
int vpid
; /* capability number of vnode */
if (vdp
== ndp
->ni_rdir
&& ndp
->ni_isdotdot
)
panic("ufs_lookup: .. through root");
* Get the next vnode in the path.
* See comment below starting `Step through' for
* an explaination of the locking protocol.
} else if (ndp
->ni_isdotdot
) {
if (!error
&& lockparent
&& *ndp
->ni_next
== '\0')
if (!lockparent
|| error
|| *ndp
->ni_next
!= '\0')
* Check that the capability number did not change
* while we were waiting for the lock.
if (lockparent
&& pdp
!= dp
&& *ndp
->ni_next
== '\0')
* Suppress search for slots unless creating
* file and at end of pathname, in which case
* we watch for a place to put the new file in
* case it doesn't already exist.
if ((flag
== CREATE
|| flag
== RENAME
) && *ndp
->ni_next
== 0) {
slotneeded
= ((sizeof (struct direct
) - (MAXNAMLEN
+ 1)) +
((ndp
->ni_namelen
+ 1 + 3) &~ 3));
* If there is cached information on a previous search of
* this directory, pick up where we last left off.
* We cache only lookups as these are the most common
* and have the greatest payoff. Caching CREATE has little
* benefit as it usually must search the entire directory
* to determine that the entry does not exist. Caching the
* location of the last DELETE or RENAME has not reduced
* profiling time and hence has been removed in the interest
bmask
= VFSTOUFS(vdp
->v_mount
)->um_mountp
->mnt_stat
.f_iosize
- 1;
if (flag
!= LOOKUP
|| dp
->i_diroff
== 0 || dp
->i_diroff
> dp
->i_size
) {
ndp
->ni_ufs
.ufs_offset
= 0;
ndp
->ni_ufs
.ufs_offset
= dp
->i_diroff
;
if ((entryoffsetinblock
= ndp
->ni_ufs
.ufs_offset
& bmask
) &&
(error
= VOP_BLKATOFF(vdp
, ndp
->ni_ufs
.ufs_offset
, NULL
,
endsearch
= roundup(dp
->i_size
, DIRBLKSIZ
);
while (ndp
->ni_ufs
.ufs_offset
< endsearch
) {
* If offset is on a block boundary, read the next directory
* block. Release previous if it exists.
if ((ndp
->ni_ufs
.ufs_offset
& bmask
) == 0) {
if (error
= VOP_BLKATOFF(vdp
, ndp
->ni_ufs
.ufs_offset
,
* If still looking for a slot, and at a DIRBLKSIZE
* boundary, have to start looking for free space again.
if (slotstatus
== NONE
&&
(entryoffsetinblock
& (DIRBLKSIZ
- 1)) == 0) {
* Get pointer to next entry.
* Full validation checks are slow, so we only check
* enough to insure forward progress through the
* directory. Complete checks can be run by patching
ep
= (struct direct
*)(bp
->b_un
.b_addr
+ entryoffsetinblock
);
dirchk
&& ufs_dirbadentry(ep
, entryoffsetinblock
)) {
ufs_dirbad(dp
, ndp
->ni_ufs
.ufs_offset
, "mangled entry");
i
= DIRBLKSIZ
- (entryoffsetinblock
& (DIRBLKSIZ
- 1));
ndp
->ni_ufs
.ufs_offset
+= i
;
* If an appropriate sized slot has not yet been found,
* check to see if one is available. Also accumulate space
* in the current block so that we can determine if
if (slotstatus
!= FOUND
) {
if (size
>= slotneeded
) {
slotoffset
= ndp
->ni_ufs
.ufs_offset
;
} else if (slotstatus
== NONE
) {
if (slotfreespace
>= slotneeded
) {
ep
->d_reclen
- slotoffset
;
* Check for a name match.
if (ep
->d_namlen
== ndp
->ni_namelen
&&
!bcmp(ndp
->ni_ptr
, ep
->d_name
,
(unsigned)ep
->d_namlen
)) {
* Save directory entry's inode number and
* reclen in ndp->ni_ufs area, and release
ndp
->ni_ufs
.ufs_ino
= ep
->d_ino
;
ndp
->ni_ufs
.ufs_reclen
= ep
->d_reclen
;
prevoff
= ndp
->ni_ufs
.ufs_offset
;
ndp
->ni_ufs
.ufs_offset
+= ep
->d_reclen
;
entryoffsetinblock
+= ep
->d_reclen
;
enduseful
= ndp
->ni_ufs
.ufs_offset
;
* If we started in the middle of the directory and failed
* to find our target, we must check the beginning as well.
ndp
->ni_ufs
.ufs_offset
= 0;
endsearch
= dp
->i_diroff
;
* If creating, and at end of pathname and current
* directory has not been removed, then can consider
* allowing file to be created.
if ((flag
== CREATE
|| flag
== RENAME
) &&
*ndp
->ni_next
== 0 && dp
->i_nlink
!= 0) {
* Access for write is interpreted as allowing
* creation of files in the directory.
if (error
= ufs_access(vdp
, VWRITE
, ndp
->ni_cred
, p
))
* Return an indication of where the new directory
* entry should be put. If we didn't find a slot,
* then set ndp->ni_ufs.ufs_count to 0 indicating
* that the new slot belongs at the end of the
* directory. If we found a slot, then the new entry
* can be put in the range from ndp->ni_ufs.ufs_offset
* to ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count.
if (slotstatus
== NONE
) {
ndp
->ni_ufs
.ufs_offset
= roundup(dp
->i_size
, DIRBLKSIZ
);
ndp
->ni_ufs
.ufs_count
= 0;
enduseful
= ndp
->ni_ufs
.ufs_offset
;
ndp
->ni_ufs
.ufs_offset
= slotoffset
;
ndp
->ni_ufs
.ufs_count
= slotsize
;
if (enduseful
< slotoffset
+ slotsize
)
enduseful
= slotoffset
+ slotsize
;
ndp
->ni_ufs
.ufs_endoff
= roundup(enduseful
, DIRBLKSIZ
);
* We return with the directory locked, so that
* the parameters we set up above will still be
* valid if we actually decide to do a direnter().
* We return ni_vp == NULL to indicate that the entry
* does not currently exist; we leave a pointer to
* the (locked) directory inode in ndp->ni_dvp.
* The pathname buffer is saved so that the name
* NB - if the directory is unlocked, then this
* information cannot be used.
ndp
->ni_nameiop
|= SAVENAME
;
* Insert name into cache (as non-existent) if appropriate.
if (ndp
->ni_makeentry
&& flag
!= CREATE
)
* Check that directory length properly reflects presence
if (entryoffsetinblock
+ DIRSIZ(ep
) > dp
->i_size
) {
ufs_dirbad(dp
, ndp
->ni_ufs
.ufs_offset
, "i_size too small");
dp
->i_size
= entryoffsetinblock
+ DIRSIZ(ep
);
* Found component in pathname.
* If the final component of path name, save information
* in the cache as to where the entry was found.
if (*ndp
->ni_next
== '\0' && flag
== LOOKUP
)
dp
->i_diroff
= ndp
->ni_ufs
.ufs_offset
&~ (DIRBLKSIZ
- 1);
* If deleting, and at end of pathname, return
* parameters which can be used to remove file.
* If the wantparent flag isn't set, we return only
* the directory (in ndp->ni_dvp), otherwise we go
* on and lock the inode, being careful with ".".
if (flag
== DELETE
&& *ndp
->ni_next
== 0) {
* Write access to directory required to delete files.
if (error
= ufs_access(vdp
, VWRITE
, ndp
->ni_cred
, p
))
* Return pointer to current entry in ndp->ni_ufs.ufs_offset,
* and distance past previous entry (if there
* is a previous entry in this block) in ndp->ni_ufs.ufs_count.
* Save directory inode pointer in ndp->ni_dvp for dirremove().
if ((ndp
->ni_ufs
.ufs_offset
&(DIRBLKSIZ
-1)) == 0)
ndp
->ni_ufs
.ufs_count
= 0;
ndp
->ni_ufs
.ufs_offset
- prevoff
;
if (dp
->i_number
== ndp
->ni_ufs
.ufs_ino
) {
if (error
= VOP_VGET(vdp
, ndp
->ni_ufs
.ufs_ino
, &tdp
))
* If directory is "sticky", then user must own
* the directory, or the file in it, else she
* may not delete it (unless she's root). This
* implements append-only directories.
if ((dp
->i_mode
& ISVTX
) &&
ndp
->ni_cred
->cr_uid
!= 0 &&
ndp
->ni_cred
->cr_uid
!= dp
->i_uid
&&
VTOI(tdp
)->i_uid
!= ndp
->ni_cred
->cr_uid
) {
* If rewriting (RENAME), return the inode and the
* information required to rewrite the present directory
* Must get inode of directory entry to verify it's a
* regular file, or empty directory.
if (flag
== RENAME
&& wantparent
&& *ndp
->ni_next
== 0) {
if (error
= ufs_access(vdp
, VWRITE
, ndp
->ni_cred
, p
))
* Careful about locking second inode.
* This can only occur if the target is ".".
if (dp
->i_number
== ndp
->ni_ufs
.ufs_ino
)
if (error
= VOP_VGET(vdp
, ndp
->ni_ufs
.ufs_ino
, &tdp
))
ndp
->ni_nameiop
|= SAVENAME
;
* Step through the translation in the name. We do not `iput' the
* directory because we may need it again if a symbolic link
* is relative to the current directory. Instead we save it
* unlocked as "pdp". We must get the target inode before unlocking
* the directory to insure that the inode will not be removed
* before we get it. We prevent deadlock by always fetching
* inodes from the root, moving down the directory tree. Thus
* when following backward pointers ".." we must unlock the
* parent directory before getting the requested directory.
* There is a potential race condition here if both the current
* and parent directories are removed before the `iget' for the
* inode associated with ".." returns. We hope that this occurs
* infrequently since we cannot avoid this race condition without
* implementing a sophisticated deadlock detection algorithm.
* Note also that this simple deadlock detection scheme will not
* work if the file system has any hard links other than ".."
* that point backwards in the directory structure.
IUNLOCK(pdp
); /* race to get the inode */
if (error
= VOP_VGET(vdp
, ndp
->ni_ufs
.ufs_ino
, &tdp
)) {
if (lockparent
&& *ndp
->ni_next
== '\0')
} else if (dp
->i_number
== ndp
->ni_ufs
.ufs_ino
) {
VREF(vdp
); /* we want ourself, ie "." */
if (error
= VOP_VGET(vdp
, ndp
->ni_ufs
.ufs_ino
, &tdp
))
if (!lockparent
|| *ndp
->ni_next
!= '\0')
* Insert name into cache if appropriate.
ufs_dirbad(ip
, offset
, how
)
(void)printf("%s: bad dir ino %d at offset %d: %s\n",
mp
->mnt_stat
.f_mntonname
, ip
->i_number
, offset
, how
);
if ((mp
->mnt_stat
.f_flags
& MNT_RDONLY
) == 0)
* Do consistency checking on a directory entry:
* record length must be multiple of 4
* entry must fit in rest of its DIRBLKSIZ block
* record must be large enough to contain entry
* name is not longer than MAXNAMLEN
* name must be as long as advertised, and null terminated
ufs_dirbadentry(ep
, entryoffsetinblock
)
register struct direct
*ep
;
if ((ep
->d_reclen
& 0x3) != 0 ||
ep
->d_reclen
> DIRBLKSIZ
- (entryoffsetinblock
& (DIRBLKSIZ
- 1)) ||
ep
->d_reclen
< DIRSIZ(ep
) || ep
->d_namlen
> MAXNAMLEN
) {
for (i
= 0; i
< ep
->d_namlen
; i
++)
if (ep
->d_name
[i
] == '\0') {
printf("ufs_dirbadentry: jumping out: reclen: %d namlen %d ino %d name %s\n",
ep
->d_reclen
, ep
->d_namlen
, ep
->d_ino
, ep
->d_name
);
* Write a directory entry after a call to namei, using the parameters
* that it left in nameidata. The argument ip is the inode which the new
* directory entry will refer to. The nameidata field ndp->ni_dvp is a
* pointer to the directory to be written, which was left locked by namei.
* Remaining parameters (ndp->ni_ufs.ufs_offset, ndp->ni_ufs.ufs_count)
* indicate how the space for the new entry is to be obtained.
register struct nameidata
*ndp
;
register struct direct
*ep
, *nep
;
register struct inode
*dp
;
register struct vnode
*dvp
;
int error
, loc
, newentrysize
, spacefree
;
if ((ndp
->ni_nameiop
& SAVENAME
) == 0)
panic("direnter: missing name");
newdir
.d_ino
= ip
->i_number
;
newdir
.d_namlen
= ndp
->ni_namelen
;
bcopy(ndp
->ni_ptr
, newdir
.d_name
, (unsigned)ndp
->ni_namelen
+ 1);
newentrysize
= DIRSIZ(&newdir
);
if (ndp
->ni_ufs
.ufs_count
== 0) {
* If ndp->ni_ufs.ufs_count is 0, then namei could find no
* space in the directory. Here, ndp->ni_ufs.ufs_offset will
* be on a directory block boundary and we will write the
* new entry into a fresh block.
if (ndp
->ni_ufs
.ufs_offset
& (DIRBLKSIZ
- 1))
auio
.uio_offset
= ndp
->ni_ufs
.ufs_offset
;
newdir
.d_reclen
= DIRBLKSIZ
;
auio
.uio_resid
= newentrysize
;
aiov
.iov_len
= newentrysize
;
aiov
.iov_base
= (caddr_t
)&newdir
;
auio
.uio_segflg
= UIO_SYSSPACE
;
auio
.uio_procp
= (struct proc
*)0;
error
= VOP_WRITE(dvp
, &auio
, IO_SYNC
, ndp
->ni_cred
);
VFSTOUFS(dvp
->v_mount
)->um_mountp
->mnt_stat
.f_bsize
)
/* XXX should grow with balloc() */
panic("ufs_direnter: frag size");
dp
->i_size
= roundup(dp
->i_size
, DIRBLKSIZ
);
* If ndp->ni_ufs.ufs_count is non-zero, then namei found space
* for the new entry in the range ndp->ni_ufs.ufs_offset to
* ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count in the directory.
* To use this space, we may have to compact the entries located
* there, by copying them together towards the beginning of the
* block, leaving the free space in one usable chunk at the end.
* Increase size of directory if entry eats into new space.
* This should never push the size past a new multiple of
* N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
if (ndp
->ni_ufs
.ufs_offset
+ ndp
->ni_ufs
.ufs_count
> dp
->i_size
)
dp
->i_size
= ndp
->ni_ufs
.ufs_offset
+ ndp
->ni_ufs
.ufs_count
;
* Get the block containing the space for the new directory entry.
if (error
= VOP_BLKATOFF(dvp
, ndp
->ni_ufs
.ufs_offset
, &dirbuf
, &bp
))
* Find space for the new entry. In the simple case, the entry at
* offset base will have the space. If it does not, then namei
* arranged that compacting the region ndp->ni_ufs.ufs_offset to
* ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count would yield the
ep
= (struct direct
*)dirbuf
;
spacefree
= ep
->d_reclen
- dsize
;
for (loc
= ep
->d_reclen
; loc
< ndp
->ni_ufs
.ufs_count
; ) {
nep
= (struct direct
*)(dirbuf
+ loc
);
/* trim the existing slot */
ep
= (struct direct
*)((char *)ep
+ dsize
);
/* overwrite; nothing there; header is ours */
spacefree
+= nep
->d_reclen
- dsize
;
bcopy((caddr_t
)nep
, (caddr_t
)ep
, dsize
);
* Update the pointer fields in the previous entry (if any),
* copy in the new entry, and write out the block.
if (spacefree
+ dsize
< newentrysize
)
newdir
.d_reclen
= spacefree
+ dsize
;
if (spacefree
< newentrysize
)
newdir
.d_reclen
= spacefree
;
ep
= (struct direct
*)((char *)ep
+ dsize
);
bcopy((caddr_t
)&newdir
, (caddr_t
)ep
, (u_int
)newentrysize
);
if (!error
&& ndp
->ni_ufs
.ufs_endoff
&&
ndp
->ni_ufs
.ufs_endoff
< dp
->i_size
)
error
= VOP_TRUNCATE(dvp
, (u_long
)ndp
->ni_ufs
.ufs_endoff
,
* Remove a directory entry after a call to namei, using
* the parameters which it left in nameidata. The entry
* ni_ufs.ufs_offset contains the offset into the directory of the
* entry to be eliminated. The ni_ufs.ufs_count field contains the
* size of the previous record in the directory. If this
* is 0, the first entry is being deleted, so we need only
* zero the inode number to mark the entry as free. If the
* entry is not the first in the directory, we must reclaim
* the space of the now empty record by adding the record size
* to the size of the previous entry.
register struct nameidata
*ndp
;
register struct inode
*dp
;
if (ndp
->ni_ufs
.ufs_count
== 0) {
* First entry in block: set d_ino to zero.
if (error
= VOP_BLKATOFF(ndp
->ni_dvp
, ndp
->ni_ufs
.ufs_offset
,
* Collapse new free space into previous entry.
if (error
= VOP_BLKATOFF(ndp
->ni_dvp
,
ndp
->ni_ufs
.ufs_offset
- ndp
->ni_ufs
.ufs_count
, (char **)&ep
, &bp
))
ep
->d_reclen
+= ndp
->ni_ufs
.ufs_reclen
;
* Rewrite an existing directory entry to point at the inode
* supplied. The parameters describing the directory entry are
* set up by a call to namei.
ufs_dirrewrite(dp
, ip
, ndp
)
if (error
= VOP_BLKATOFF(ITOV(dp
), ndp
->ni_ufs
.ufs_offset
,
ep
->d_ino
= ip
->i_number
;
* Check if a directory is empty or not.
* Inode supplied must be locked.
* Using a struct dirtemplate here is not precisely
* what we want, but better than using a struct direct.
* NB: does not handle corrupted directories.
ufs_dirempty(ip
, parentino
, cred
)
register struct inode
*ip
;
register struct direct
*dp
= (struct direct
*)&dbuf
;
#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
for (off
= 0; off
< ip
->i_size
; off
+= dp
->d_reclen
) {
error
= vn_rdwr(UIO_READ
, ITOV(ip
), (caddr_t
)dp
, MINDIRSIZ
, off
,
UIO_SYSSPACE
, IO_NODELOCKED
, cred
, &count
, (struct proc
*)0);
* Since we read MINDIRSIZ, residual must
* be 0 unless we're at end of file.
/* avoid infinite loops */
/* accept only "." and ".." */
if (dp
->d_name
[0] != '.')
* At this point d_namlen must be 1 or 2.
* 1 implies ".", 2 implies ".." if second
if (dp
->d_name
[1] == '.' && dp
->d_ino
== parentino
)
* Check if source directory is in the path of the target directory.
* Target is supplied locked, source is unlocked.
* The target is always iput before returning.
ufs_checkpath(source
, target
, cred
)
struct inode
*source
, *target
;
struct dirtemplate dirbuf
;
register struct inode
*ip
;
if (ip
->i_number
== source
->i_number
) {
if (ip
->i_number
== rootino
)
if ((ip
->i_mode
&IFMT
) != IFDIR
) {
error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)&dirbuf
,
sizeof (struct dirtemplate
), (off_t
)0, UIO_SYSSPACE
,
IO_NODELOCKED
, cred
, (int *)0, (struct proc
*)0);
if (dirbuf
.dotdot_namlen
!= 2 ||
dirbuf
.dotdot_name
[0] != '.' ||
dirbuf
.dotdot_name
[1] != '.') {
if (dirbuf
.dotdot_ino
== source
->i_number
) {
if (dirbuf
.dotdot_ino
== rootino
)
if (error
= VOP_VGET(vp
, dirbuf
.dotdot_ino
, &vp
))
printf("checkpath: .. not a directory\n");