add IRENAME to prevent reentrant rename for same directory
[unix-history] / usr / src / sys / ufs / ffs / ufs_lookup.c
... / ...
CommitLineData
1/* ufs_lookup.c 6.13 84/07/08 */
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/inode.h"
6#include "../h/fs.h"
7#include "../h/mount.h"
8#include "../h/dir.h"
9#include "../h/user.h"
10#include "../h/buf.h"
11#include "../h/conf.h"
12#include "../h/uio.h"
13#include "../h/kernel.h"
14
15struct buf *blkatoff();
16struct buf *freenamebuf;
17int dirchk = 0;
18
19/*
20 * Structures associated with name cacheing.
21 */
22#define NCHHASH 32 /* size of hash table */
23
24#if ((NCHHASH)&((NCHHASH)-1)) != 0
25#define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
26#else
27#define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
28#endif
29
30union nchash {
31 union nchash *nch_head[2];
32 struct nch *nch_chain[2];
33} nchash[NCHHASH];
34#define nch_forw nch_chain[0]
35#define nch_back nch_chain[1]
36
37struct nch *nchhead, **nchtail; /* LRU chain pointers */
38struct nchstats nchstats; /* cache effectiveness statistics */
39
40/*
41 * Convert a pathname into a pointer to a locked inode,
42 * with side effects usable in creating and removing files.
43 * This is a very central and rather complicated routine.
44 *
45 * The segflg defines whether the name is to be copied from user
46 * space or kernel space.
47 *
48 * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether
49 * the name is to be (looked up, created, deleted). If flag has
50 * LOCKPARENT or'ed into it and the target of the pathname exists,
51 * namei returns both the target and its parent directory locked.
52 * If the file system is not maintained in a strict tree hierarchy,
53 * this can result in a deadlock situation. When creating and
54 * LOCKPARENT is specified, the target may not be ".". When deleting
55 * and LOCKPARENT is specified, the target may be ".", but the caller
56 * must check to insure it does an irele and iput instead of two iputs.
57 *
58 * The FOLLOW flag is set when symbolic links are to be followed
59 * when they occur at the end of the name translation process.
60 *
61 * Name caching works as follows:
62 *
63 * names found by directory scans are retained in a cache
64 * for future reference. It is managed LRU, so frequently
65 * used names will hang around. Cache is indexed by hash value
66 * obtained from (ino,dev,name) where ino & dev refer to the
67 * directory containing name.
68 *
69 * For simplicity (and economy of storage), names longer than
70 * some (small) maximum length are not cached, they occur
71 * infrequently in any case, and are almost never of interest.
72 *
73 * Upon reaching the last segment of a path, if the reference
74 * is for DELETE, or NOCACHE is set (rewrite), and the
75 * name is located in the cache, it will be dropped.
76 *
77 * We must be sure never to enter the name ".." into the cache
78 * because of the extremely kludgey way that rename() alters
79 * ".." in a situation like
80 * mv a/x b/x
81 * where x is a directory, and x/.. is the ".." in question.
82 *
83 * Overall outline of namei:
84 *
85 * copy in name
86 * get starting directory
87 * dirloop:
88 * check accessibility of directory
89 * dirloop2:
90 * copy next component of name to ndp->ni_dent
91 * handle degenerate case where name is null string
92 * look for name in cache, if found, then if at end of path
93 * and deleting or creating, drop it, else to haveino
94 * search for name in directory, to found or notfound
95 * notfound:
96 * if creating, return locked directory, leaving info on avail. slots
97 * else return error
98 * found:
99 * if at end of path and deleting, return information to allow delete
100 * if at end of path and rewriting (create and LOCKPARENT), lock target
101 * inode and return info to allow rewrite
102 * if .. and on mounted filesys, look in mount table for parent
103 * if not at end, if neither creating nor deleting, add name to cache
104 * haveino:
105 * if symbolic link, massage name in buffer and continue at dirloop
106 * if more components of name, do next level at dirloop
107 * return the answer as locked inode
108 *
109 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
110 * but unlocked.
111 */
112struct inode *
113namei(ndp)
114 register struct nameidata *ndp;
115{
116 register char *cp; /* pointer into pathname argument */
117/* these variables refer to things which must be freed or unlocked */
118 register struct inode *dp = 0; /* the directory we are searching */
119 register struct nch *ncp; /* cache slot for entry */
120 register struct fs *fs; /* file system that directory is in */
121 register struct buf *bp = 0; /* a buffer of directory entries */
122 register struct direct *ep; /* the current directory entry */
123 int entryoffsetinblock; /* offset of ep in bp's buffer */
124 register struct buf *nbp; /* buffer storing path name argument */
125/* these variables hold information about the search for a slot */
126 enum {NONE, COMPACT, FOUND} slotstatus;
127 int slotoffset = -1; /* offset of area with free space */
128 int slotsize; /* size of area at slotoffset */
129 int slotfreespace; /* amount of space free in slot */
130 int slotneeded; /* size of the entry we're seeking */
131/* */
132 int numdirpasses; /* strategy for directory search */
133 int endsearch; /* offset to end directory search */
134 int prevoff; /* ndp->ni_offset of previous entry */
135 int nlink = 0; /* number of symbolic links taken */
136 struct inode *pdp; /* saved dp during symlink work */
137 int error, i;
138 int lockparent;
139 int docache;
140 unsigned hash; /* value of name hash for entry */
141 union nchash *nhp; /* cache chain head for entry */
142 int isdotdot; /* != 0 if current name is ".." */
143 int flag; /* op ie, LOOKUP, CREATE, or DELETE */
144
145 lockparent = ndp->ni_nameiop & LOCKPARENT;
146 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE;
147 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
148 if (flag == DELETE)
149 docache = 0;
150 /*
151 * Get a buffer for the name to be translated, and copy the
152 * name into the buffer.
153 */
154 nbp = freenamebuf;
155 if (nbp == NULL)
156 nbp = geteblk(MAXPATHLEN);
157 else
158 freenamebuf = nbp->av_forw;
159 if (ndp->ni_segflg == UIO_SYSSPACE)
160 error = copystr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
161 (u_int *)0);
162 else
163 error = copyinstr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
164 (u_int *)0);
165 if (error) {
166 u.u_error = error;
167 goto bad;
168 }
169
170 /*
171 * Get starting directory.
172 */
173 cp = nbp->b_un.b_addr;
174 if (*cp == '/') {
175 while (*cp == '/')
176 cp++;
177 if ((dp = u.u_rdir) == NULL)
178 dp = rootdir;
179 } else
180 dp = u.u_cdir;
181 fs = dp->i_fs;
182 ILOCK(dp);
183 dp->i_count++;
184 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */
185
186 /*
187 * We come to dirloop to search a new directory.
188 * The directory must be locked so that it can be
189 * iput, and fs must be already set to dp->i_fs.
190 */
191dirloop:
192 /*
193 * Check accessiblity of directory.
194 */
195 if ((dp->i_mode&IFMT) != IFDIR) {
196 u.u_error = ENOTDIR;
197 goto bad;
198 }
199 if (access(dp, IEXEC))
200 goto bad;
201
202dirloop2:
203 /*
204 * Copy next component of name to ndp->ni_dent.
205 */
206 hash = 0;
207 for (i = 0; *cp != 0 && *cp != '/'; cp++) {
208 if (i >= MAXNAMLEN) {
209 u.u_error = ENOENT;
210 goto bad;
211 }
212 if ((*cp&0377) == ('/'|0200) || (*cp&0200) && flag != DELETE) {
213 u.u_error = EPERM;
214 goto bad;
215 }
216 ndp->ni_dent.d_name[i++] = *cp;
217 hash += (unsigned char)*cp * i;
218 }
219 ndp->ni_dent.d_namlen = i;
220 ndp->ni_dent.d_name[i] = '\0';
221 isdotdot = (i == 2 &&
222 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.');
223
224 /*
225 * Check for degenerate name (e.g. / or "")
226 * which is a way of talking about a directory,
227 * e.g. like "/." or ".".
228 */
229 if (ndp->ni_dent.d_name[0] == '\0') {
230 if (flag != LOOKUP || lockparent) {
231 u.u_error = EISDIR;
232 goto bad;
233 }
234 nbp->av_forw = freenamebuf;
235 freenamebuf = nbp;
236 return (dp);
237 }
238
239 /*
240 * We now have a segment name to search for, and a directory to search.
241 *
242 * Before tediously performing a linear scan of the directory,
243 * check the name cache to see if the directory/name pair
244 * we are looking for is known already. We don't do this
245 * if the segment name is long, simply so the cache can avoid
246 * holding long names (which would either waste space, or
247 * add greatly to the complexity).
248 */
249 if (ndp->ni_dent.d_namlen > NCHNAMLEN) {
250 nchstats.ncs_long++;
251 docache = 0;
252 } else {
253 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
254 for (ncp = nhp->nch_forw; ncp != (struct nch *)nhp;
255 ncp = ncp->nc_forw) {
256 if (ncp->nc_ino == dp->i_number &&
257 ncp->nc_dev == dp->i_dev &&
258 ncp->nc_nlen == ndp->ni_dent.d_namlen &&
259 !bcmp(ncp->nc_name, ndp->ni_dent.d_name,
260 ncp->nc_nlen))
261 break;
262 }
263
264 if (ncp == (struct nch *)nhp) {
265 nchstats.ncs_miss++;
266 ncp = NULL;
267 } else {
268 if (ncp->nc_id != ncp->nc_ip->i_id) {
269 nchstats.ncs_falsehits++;
270 } else if (*cp == '\0' && !docache) {
271 nchstats.ncs_badhits++;
272 } else {
273
274 /*
275 * move this slot to end of LRU
276 * chain, if not already there
277 */
278 if (ncp->nc_nxt) {
279 /* remove from LRU chain */
280 *ncp->nc_prev = ncp->nc_nxt;
281 ncp->nc_nxt->nc_prev = ncp->nc_prev;
282
283 /* and replace at end of it */
284 ncp->nc_nxt = NULL;
285 ncp->nc_prev = nchtail;
286 *nchtail = ncp;
287 nchtail = &ncp->nc_nxt;
288 }
289
290 /*
291 * Get the next inode in the path.
292 * See comment above other `IUNLOCK' code for
293 * an explaination of the locking protocol.
294 */
295 pdp = dp;
296 dp = ncp->nc_ip;
297 if (dp == NULL)
298 panic("nami: null cache ino");
299 if (pdp == dp)
300 dp->i_count++;
301 else {
302 if (isdotdot) {
303 IUNLOCK(pdp);
304 igrab(dp);
305 } else {
306 igrab(dp);
307 IUNLOCK(pdp);
308 }
309 }
310
311 /*
312 * Verify that the inode that we got
313 * did not change while we were waiting
314 * for it to be locked.
315 */
316 if (ncp->nc_id != ncp->nc_ip->i_id) {
317 iput(dp);
318 ILOCK(pdp);
319 dp = pdp;
320 nchstats.ncs_falsehits++;
321 } else {
322 ndp->ni_dent.d_ino = dp->i_number;
323 /* ni_dent.d_reclen is garbage ... */
324 nchstats.ncs_goodhits++;
325 goto haveino;
326 }
327 }
328
329 /*
330 * Last component and we are renaming or deleting,
331 * the cache entry is invalid, or otherwise don't
332 * want cache entry to exist.
333 */
334
335 /* remove from LRU chain */
336 *ncp->nc_prev = ncp->nc_nxt;
337 if (ncp->nc_nxt)
338 ncp->nc_nxt->nc_prev = ncp->nc_prev;
339 else
340 nchtail = ncp->nc_prev;
341
342 /* remove from hash chain */
343 remque(ncp);
344
345 /* insert at head of LRU list (first to grab) */
346 ncp->nc_nxt = nchhead;
347 ncp->nc_prev = &nchhead;
348 nchhead->nc_prev = &ncp->nc_nxt;
349 nchhead = ncp;
350
351 /* and make a dummy hash chain */
352 ncp->nc_forw = ncp;
353 ncp->nc_back = ncp;
354
355 ncp = NULL;
356 }
357 }
358
359 /*
360 * Suppress search for slots unless creating
361 * file and at end of pathname, in which case
362 * we watch for a place to put the new file in
363 * case it doesn't already exist.
364 */
365 slotstatus = FOUND;
366 if (flag == CREATE && *cp == 0) {
367 slotstatus = NONE;
368 slotfreespace = 0;
369 slotneeded = DIRSIZ(&ndp->ni_dent);
370 }
371 /*
372 * If this is the same directory that this process
373 * previously searched, pick up where we last left off.
374 * We cache only lookups as these are the most common
375 * and have the greatest payoff. Caching CREATE has little
376 * benefit as it usually must search the entire directory
377 * to determine that the entry does not exist. Caching the
378 * location of the last DELETE has not reduced profiling time
379 * and hence has been removed in the interest of simplicity.
380 */
381 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
382 dp->i_dev != u.u_ncache.nc_dev) {
383 ndp->ni_offset = 0;
384 numdirpasses = 1;
385 } else {
386 if ((dp->i_flag & ICHG) || dp->i_ctime >= u.u_ncache.nc_time) {
387 u.u_ncache.nc_prevoffset &= ~(DIRBLKSIZ - 1);
388 u.u_ncache.nc_time = time.tv_sec;
389 }
390 ndp->ni_offset = u.u_ncache.nc_prevoffset;
391 entryoffsetinblock = blkoff(fs, ndp->ni_offset);
392 if (entryoffsetinblock != 0) {
393 bp = blkatoff(dp, ndp->ni_offset, (char **)0);
394 if (bp == 0)
395 goto bad;
396 }
397 numdirpasses = 2;
398 nchstats.ncs_2passes++;
399 }
400 endsearch = roundup(dp->i_size, DIRBLKSIZ);
401
402searchloop:
403 while (ndp->ni_offset < endsearch) {
404 /*
405 * If offset is on a block boundary,
406 * read the next directory block.
407 * Release previous if it exists.
408 */
409 if (blkoff(fs, ndp->ni_offset) == 0) {
410 if (bp != NULL)
411 brelse(bp);
412 bp = blkatoff(dp, ndp->ni_offset, (char **)0);
413 if (bp == 0)
414 goto bad;
415 entryoffsetinblock = 0;
416 }
417
418 /*
419 * If still looking for a slot, and at a DIRBLKSIZE
420 * boundary, have to start looking for free space again.
421 */
422 if (slotstatus == NONE &&
423 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
424 slotoffset = -1;
425 slotfreespace = 0;
426 }
427
428 /*
429 * Get pointer to next entry.
430 * Full validation checks are slow, so we only check
431 * enough to insure forward progress through the
432 * directory. Complete checks can be run by patching
433 * "dirchk" to be true.
434 */
435 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
436 if (ep->d_reclen <= 0 ||
437 dirchk && dirbadentry(ep, entryoffsetinblock)) {
438 dirbad(dp, ndp->ni_offset, "mangled entry");
439 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
440 ndp->ni_offset += i;
441 entryoffsetinblock += i;
442 continue;
443 }
444
445 /*
446 * If an appropriate sized slot has not yet been found,
447 * check to see if one is available. Also accumulate space
448 * in the current block so that we can determine if
449 * compaction is viable.
450 */
451 if (slotstatus != FOUND) {
452 int size = ep->d_reclen;
453
454 if (ep->d_ino != 0)
455 size -= DIRSIZ(ep);
456 if (size > 0) {
457 if (size >= slotneeded) {
458 slotstatus = FOUND;
459 slotoffset = ndp->ni_offset;
460 slotsize = ep->d_reclen;
461 } else if (slotstatus == NONE) {
462 slotfreespace += size;
463 if (slotoffset == -1)
464 slotoffset = ndp->ni_offset;
465 if (slotfreespace >= slotneeded) {
466 slotstatus = COMPACT;
467 slotsize = ndp->ni_offset +
468 ep->d_reclen - slotoffset;
469 }
470 }
471 }
472 }
473
474 /*
475 * Check for a name match.
476 */
477 if (ep->d_ino) {
478 if (ep->d_namlen == ndp->ni_dent.d_namlen &&
479 !bcmp(ndp->ni_dent.d_name, ep->d_name,
480 ep->d_namlen))
481 goto found;
482 }
483 prevoff = ndp->ni_offset;
484 ndp->ni_offset += ep->d_reclen;
485 entryoffsetinblock += ep->d_reclen;
486 }
487/* notfound: */
488 /*
489 * If we started in the middle of the directory and failed
490 * to find our target, we must check the beginning as well.
491 */
492 if (numdirpasses == 2) {
493 numdirpasses--;
494 ndp->ni_offset = 0;
495 endsearch = u.u_ncache.nc_prevoffset;
496 goto searchloop;
497 }
498 /*
499 * If creating, and at end of pathname and current
500 * directory has not been removed, then can consider
501 * allowing file to be created.
502 */
503 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
504 /*
505 * Access for write is interpreted as allowing
506 * creation of files in the directory.
507 */
508 if (access(dp, IWRITE))
509 goto bad;
510 /*
511 * Return an indication of where the new directory
512 * entry should be put. If we didn't find a slot,
513 * then set ndp->ni_count to 0 indicating that the new
514 * slot belongs at the end of the directory. If we found
515 * a slot, then the new entry can be put in the range
516 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
517 */
518 if (slotstatus == NONE) {
519 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
520 ndp->ni_count = 0;
521 } else {
522 ndp->ni_offset = slotoffset;
523 ndp->ni_count = slotsize;
524 }
525 dp->i_flag |= IUPD|ICHG;
526 if (bp)
527 brelse(bp);
528 nbp->av_forw = freenamebuf;
529 freenamebuf = nbp;
530 /*
531 * We return with the directory locked, so that
532 * the parameters we set up above will still be
533 * valid if we actually decide to do a direnter().
534 * We return NULL to indicate that the entry doesn't
535 * currently exist, leaving a pointer to the (locked)
536 * directory inode in ndp->ni_pdir.
537 */
538 ndp->ni_pdir = dp;
539 return (NULL);
540 }
541 u.u_error = ENOENT;
542 goto bad;
543found:
544 if (numdirpasses == 2)
545 nchstats.ncs_pass2++;
546 /*
547 * Check that directory length properly reflects presence
548 * of this entry.
549 */
550 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
551 dirbad(dp, ndp->ni_offset, "i_size too small");
552 dp->i_size = entryoffsetinblock + DIRSIZ(ep);
553 dp->i_flag |= IUPD|ICHG;
554 }
555
556 /*
557 * Found component in pathname.
558 * If the final component of path name, save information
559 * in the cache as to where the entry was found.
560 */
561 if (*cp == '\0' && flag == LOOKUP) {
562 u.u_ncache.nc_prevoffset = ndp->ni_offset;
563 u.u_ncache.nc_inumber = dp->i_number;
564 u.u_ncache.nc_dev = dp->i_dev;
565 u.u_ncache.nc_time = time.tv_sec;
566 }
567 /*
568 * Save directory entry in ndp->ni_dent,
569 * and release directory buffer.
570 */
571 bcopy((caddr_t)ep, (caddr_t)&ndp->ni_dent, (u_int)DIRSIZ(ep));
572 brelse(bp);
573 bp = NULL;
574
575 /*
576 * If deleting, and at end of pathname, return
577 * parameters which can be used to remove file.
578 * If the lockparent flag isn't set, we return only
579 * the directory (in ndp->ni_pdir), otherwise we go
580 * on and lock the inode, being careful with ".".
581 */
582 if (flag == DELETE && *cp == 0) {
583 /*
584 * Write access to directory required to delete files.
585 */
586 if (access(dp, IWRITE))
587 goto bad;
588 ndp->ni_pdir = dp; /* for dirremove() */
589 /*
590 * Return pointer to current entry in ndp->ni_offset,
591 * and distance past previous entry (if there
592 * is a previous entry in this block) in ndp->ni_count.
593 * Save directory inode pointer in ndp->ni_pdir for dirremove().
594 */
595 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
596 ndp->ni_count = 0;
597 else
598 ndp->ni_count = ndp->ni_offset - prevoff;
599 if (lockparent) {
600 if (dp->i_number == ndp->ni_dent.d_ino)
601 dp->i_count++;
602 else {
603 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
604 if (dp == NULL) {
605 iput(ndp->ni_pdir);
606 goto bad;
607 }
608 /*
609 * If directory is "sticky", then user must own
610 * the directory, or the file in it, else he
611 * may not delete it (unless he's root). This
612 * implements append-only directories.
613 */
614 if ((ndp->ni_pdir->i_mode & ISVTX) &&
615 u.u_uid != 0 &&
616 u.u_uid != ndp->ni_pdir->i_uid &&
617 dp->i_uid != u.u_uid) {
618 iput(ndp->ni_pdir);
619 u.u_error = EPERM;
620 goto bad;
621 }
622 }
623 }
624 nbp->av_forw = freenamebuf;
625 freenamebuf = nbp;
626 return (dp);
627 }
628
629 /*
630 * Special handling for ".." allowing chdir out of mounted
631 * file system: indirect .. in root inode to reevaluate
632 * in directory file system was mounted on.
633 */
634 if (isdotdot) {
635 if (dp == u.u_rdir)
636 ndp->ni_dent.d_ino = dp->i_number;
637 else if (ndp->ni_dent.d_ino == ROOTINO &&
638 dp->i_number == ROOTINO) {
639 for (i = 1; i < NMOUNT; i++)
640 if (mount[i].m_bufp != NULL &&
641 mount[i].m_dev == dp->i_dev) {
642 iput(dp);
643 dp = mount[i].m_inodp;
644 ILOCK(dp);
645 dp->i_count++;
646 fs = dp->i_fs;
647 cp -= 2; /* back over .. */
648 goto dirloop2;
649 }
650 }
651 }
652
653 /*
654 * If rewriting (rename), return the inode and the
655 * information required to rewrite the present directory
656 * Must get inode of directory entry to verify it's a
657 * regular file, or empty directory.
658 */
659 if ((flag == CREATE && lockparent) && *cp == 0) {
660 if (access(dp, IWRITE))
661 goto bad;
662 ndp->ni_pdir = dp; /* for dirrewrite() */
663 /*
664 * Careful about locking second inode.
665 * This can only occur if the target is ".".
666 */
667 if (dp->i_number == ndp->ni_dent.d_ino) {
668 u.u_error = EISDIR; /* XXX */
669 goto bad;
670 }
671 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
672 if (dp == NULL) {
673 iput(ndp->ni_pdir);
674 goto bad;
675 }
676 nbp->av_forw = freenamebuf;
677 freenamebuf = nbp;
678 return (dp);
679 }
680
681 /*
682 * Check for symbolic link, which may require us to massage the
683 * name before we continue translation. We do not `iput' the
684 * directory because we may need it again if the symbolic link
685 * is relative to the current directory. Instead we save it
686 * unlocked as "pdp". We must get the target inode before unlocking
687 * the directory to insure that the inode will not be removed
688 * before we get it. We prevent deadlock by always fetching
689 * inodes from the root, moving down the directory tree. Thus
690 * when following backward pointers ".." we must unlock the
691 * parent directory before getting the requested directory.
692 * There is a potential race condition here if both the current
693 * and parent directories are removed before the `iget' for the
694 * inode associated with ".." returns. We hope that this occurs
695 * infrequently since we cannot avoid this race condition without
696 * implementing a sophisticated deadlock detection algorithm.
697 * Note also that this simple deadlock detection scheme will not
698 * work if the file system has any hard links other than ".."
699 * that point backwards in the directory structure.
700 */
701 pdp = dp;
702 if (isdotdot) {
703 IUNLOCK(pdp); /* race to get the inode */
704 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
705 if (dp == NULL)
706 goto bad2;
707 } else if (dp->i_number == ndp->ni_dent.d_ino) {
708 dp->i_count++; /* we want ourself, ie "." */
709 } else {
710 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
711 IUNLOCK(pdp);
712 if (dp == NULL)
713 goto bad2;
714 }
715
716 /*
717 * insert name into cache (if we want it, and it isn't "." or "..")
718 *
719 * all other cases where making a cache entry would be wrong
720 * have already departed from the code sequence somewhere above.
721 */
722 if (docache) {
723 if (ncp != NULL)
724 panic("nami: duplicating cache");
725
726 /*
727 * free the cache slot at head of lru chain
728 */
729 if (ncp = nchhead) {
730 /* remove from lru chain */
731 *ncp->nc_prev = ncp->nc_nxt;
732 if (ncp->nc_nxt)
733 ncp->nc_nxt->nc_prev = ncp->nc_prev;
734 else
735 nchtail = ncp->nc_prev;
736
737 /* remove from old hash chain */
738 remque(ncp);
739
740 /* grab the inode we just found */
741 ncp->nc_ip = dp;
742
743 /* fill in cache info */
744 ncp->nc_ino = pdp->i_number; /* parents inum */
745 ncp->nc_dev = pdp->i_dev; /* & device */
746 ncp->nc_idev = dp->i_dev; /* our device */
747 ncp->nc_id = dp->i_id; /* identifier */
748 ncp->nc_nlen = ndp->ni_dent.d_namlen;
749 bcopy(ndp->ni_dent.d_name, ncp->nc_name, ncp->nc_nlen);
750
751 /* link at end of lru chain */
752 ncp->nc_nxt = NULL;
753 ncp->nc_prev = nchtail;
754 *nchtail = ncp;
755 nchtail = &ncp->nc_nxt;
756
757 /* and insert on hash chain */
758 insque(ncp, nhp);
759 }
760 }
761
762haveino:
763 fs = dp->i_fs;
764
765 /*
766 * Check for symbolic link
767 */
768 if ((dp->i_mode & IFMT) == IFLNK &&
769 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) {
770 u_int pathlen = strlen(cp) + 1;
771
772 if (dp->i_size + pathlen >= MAXPATHLEN - 1 ||
773 ++nlink > MAXSYMLINKS) {
774 u.u_error = ELOOP;
775 goto bad2;
776 }
777 ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen);
778 u.u_error =
779 rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size,
780 0, 1, (int *)0);
781 if (u.u_error)
782 goto bad2;
783 cp = nbp->b_un.b_addr;
784 iput(dp);
785 if (*cp == '/') {
786 irele(pdp);
787 while (*cp == '/')
788 cp++;
789 if ((dp = u.u_rdir) == NULL)
790 dp = rootdir;
791 ILOCK(dp);
792 dp->i_count++;
793 } else {
794 dp = pdp;
795 ILOCK(dp);
796 }
797 fs = dp->i_fs;
798 goto dirloop;
799 }
800
801 /*
802 * Not a symbolic link. If more pathname,
803 * continue at next component, else return.
804 */
805 if (*cp == '/') {
806 while (*cp == '/')
807 cp++;
808 irele(pdp);
809 goto dirloop;
810 }
811 nbp->av_forw = freenamebuf;
812 freenamebuf = nbp;
813 if (lockparent)
814 ndp->ni_pdir = pdp;
815 else
816 irele(pdp);
817 return (dp);
818bad2:
819 irele(pdp);
820bad:
821 if (bp)
822 brelse(bp);
823 if (dp)
824 iput(dp);
825 nbp->av_forw = freenamebuf;
826 freenamebuf = nbp;
827 return (NULL);
828}
829
830
831dirbad(ip, offset, how)
832 struct inode *ip;
833 off_t offset;
834 char *how;
835{
836
837 printf("%s: bad dir ino %d at offset %d: %s\n",
838 ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
839}
840
841/*
842 * Do consistency checking on a directory entry:
843 * record length must be multiple of 4
844 * record length must not be non-negative
845 * entry must fit in rest of its DIRBLKSIZ block
846 * record must be large enough to contain entry
847 * name is not longer than MAXNAMLEN
848 * name must be as long as advertised, and null terminated
849 */
850dirbadentry(ep, entryoffsetinblock)
851 register struct direct *ep;
852 int entryoffsetinblock;
853{
854 register int i;
855
856 if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen <= 0 ||
857 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
858 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
859 return (1);
860 for (i = 0; i < ep->d_namlen; i++)
861 if (ep->d_name[i] == '\0')
862 return (1);
863 return (ep->d_name[i]);
864}
865
866/*
867 * Write a directory entry after a call to namei, using the parameters
868 * which it left in the u. area. The argument ip is the inode which
869 * the new directory entry will refer to. The u. area field ndp->ni_pdir is
870 * a pointer to the directory to be written, which was left locked by
871 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
872 * how the space for the new entry is to be gotten.
873 */
874direnter(ip, ndp)
875 struct inode *ip;
876 register struct nameidata *ndp;
877{
878 register struct direct *ep, *nep;
879 struct buf *bp;
880 int loc, spacefree, error = 0;
881 u_int dsize;
882 int newentrysize;
883 char *dirbuf;
884
885 ndp->ni_dent.d_ino = ip->i_number;
886 newentrysize = DIRSIZ(&ndp->ni_dent);
887 if (ndp->ni_count == 0) {
888 /*
889 * If ndp->ni_count is 0, then namei could find no space in the
890 * directory. In this case ndp->ni_offset will be on a directory
891 * block boundary and we will write the new entry into a fresh
892 * block.
893 */
894 if (ndp->ni_offset&(DIRBLKSIZ-1))
895 panic("wdir: newblk");
896 ndp->ni_dent.d_reclen = DIRBLKSIZ;
897 error = rdwri(UIO_WRITE, ndp->ni_pdir, (caddr_t)&ndp->ni_dent,
898 newentrysize, ndp->ni_offset, 1, (int *)0);
899 iput(ndp->ni_pdir);
900 return (error);
901 }
902
903 /*
904 * If ndp->ni_count is non-zero, then namei found space for the new
905 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
906 * in the directory. To use this space, we may have to compact
907 * the entries located there, by copying them together towards
908 * the beginning of the block, leaving the free space in
909 * one usable chunk at the end.
910 */
911
912 /*
913 * Increase size of directory if entry eats into new space.
914 * This should never push the size past a new multiple of
915 * DIRBLKSIZE.
916 */
917 if (ndp->ni_offset + ndp->ni_count > ndp->ni_pdir->i_size)
918 ndp->ni_pdir->i_size = ndp->ni_offset + ndp->ni_count;
919
920 /*
921 * Get the block containing the space for the new directory
922 * entry. Should return error by result instead of u.u_error.
923 */
924 bp = blkatoff(ndp->ni_pdir, ndp->ni_offset, (char **)&dirbuf);
925 if (bp == 0) {
926 iput(ndp->ni_pdir);
927 return (u.u_error);
928 }
929
930 /*
931 * Find space for the new entry. In the simple case, the
932 * entry at offset base will have the space. If it does
933 * not, then namei arranged that compacting the region
934 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
935 */
936 ep = (struct direct *)dirbuf;
937 dsize = DIRSIZ(ep);
938 spacefree = ep->d_reclen - dsize;
939 for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
940 nep = (struct direct *)(dirbuf + loc);
941 if (ep->d_ino) {
942 /* trim the existing slot */
943 ep->d_reclen = dsize;
944 ep = (struct direct *)((char *)ep + dsize);
945 } else {
946 /* overwrite; nothing there; header is ours */
947 spacefree += dsize;
948 }
949 dsize = DIRSIZ(nep);
950 spacefree += nep->d_reclen - dsize;
951 loc += nep->d_reclen;
952 bcopy((caddr_t)nep, (caddr_t)ep, dsize);
953 }
954 /*
955 * Update the pointer fields in the previous entry (if any),
956 * copy in the new entry, and write out the block.
957 */
958 if (ep->d_ino == 0) {
959 if (spacefree + dsize < newentrysize)
960 panic("wdir: compact1");
961 ndp->ni_dent.d_reclen = spacefree + dsize;
962 } else {
963 if (spacefree < newentrysize)
964 panic("wdir: compact2");
965 ndp->ni_dent.d_reclen = spacefree;
966 ep->d_reclen = dsize;
967 ep = (struct direct *)((char *)ep + dsize);
968 }
969 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
970 bwrite(bp);
971 ndp->ni_pdir->i_flag |= IUPD|ICHG;
972 iput(ndp->ni_pdir);
973 return (error);
974}
975
976/*
977 * Remove a directory entry after a call to namei, using the
978 * parameters which it left in the u. area. The u. entry
979 * ni_offset contains the offset into the directory of the
980 * entry to be eliminated. The ni_count field contains the
981 * size of the previous record in the directory. If this
982 * is 0, the first entry is being deleted, so we need only
983 * zero the inode number to mark the entry as free. If the
984 * entry isn't the first in the directory, we must reclaim
985 * the space of the now empty record by adding the record size
986 * to the size of the previous entry.
987 */
988dirremove(ndp)
989 register struct nameidata *ndp;
990{
991 register struct inode *dp = ndp->ni_pdir;
992 register struct buf *bp;
993 struct direct *ep;
994
995 if (ndp->ni_count == 0) {
996 /*
997 * First entry in block: set d_ino to zero.
998 */
999 ndp->ni_dent.d_ino = 0;
1000 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1001 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1002 } else {
1003 /*
1004 * Collapse new free space into previous entry.
1005 */
1006 bp = blkatoff(dp, (int)(ndp->ni_offset - ndp->ni_count),
1007 (char **)&ep);
1008 if (bp == 0)
1009 return (0);
1010 ep->d_reclen += ndp->ni_dent.d_reclen;
1011 bwrite(bp);
1012 dp->i_flag |= IUPD|ICHG;
1013 }
1014 return (1);
1015}
1016
1017/*
1018 * Rewrite an existing directory entry to point at the inode
1019 * supplied. The parameters describing the directory entry are
1020 * set up by a call to namei.
1021 */
1022dirrewrite(dp, ip, ndp)
1023 struct inode *dp, *ip;
1024 struct nameidata *ndp;
1025{
1026
1027 ndp->ni_dent.d_ino = ip->i_number;
1028 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1029 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1030 iput(dp);
1031}
1032
1033/*
1034 * Return buffer with contents of block "offset"
1035 * from the beginning of directory "ip". If "res"
1036 * is non-zero, fill it in with a pointer to the
1037 * remaining space in the directory.
1038 */
1039struct buf *
1040blkatoff(ip, offset, res)
1041 struct inode *ip;
1042 off_t offset;
1043 char **res;
1044{
1045 register struct fs *fs = ip->i_fs;
1046 daddr_t lbn = lblkno(fs, offset);
1047 int base = blkoff(fs, offset);
1048 int bsize = blksize(fs, ip, lbn);
1049 daddr_t bn = fsbtodb(fs, bmap(ip, lbn, B_WRITE, base, bsize));
1050 register struct buf *bp;
1051
1052 if (u.u_error)
1053 return (0);
1054 bp = bread(ip->i_dev, bn, bsize);
1055 if (bp->b_flags & B_ERROR) {
1056 brelse(bp);
1057 return (0);
1058 }
1059 if (res)
1060 *res = bp->b_un.b_addr + base;
1061 return (bp);
1062}
1063
1064/*
1065 * Check if a directory is empty or not.
1066 * Inode supplied must be locked.
1067 *
1068 * Using a struct dirtemplate here is not precisely
1069 * what we want, but better than using a struct direct.
1070 *
1071 * NB: does not handle corrupted directories.
1072 */
1073dirempty(ip)
1074 register struct inode *ip;
1075{
1076 register off_t off;
1077 struct dirtemplate dbuf;
1078 register struct direct *dp = (struct direct *)&dbuf;
1079 int error, count;
1080#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1081
1082 for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1083 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
1084 off, 1, &count);
1085 /*
1086 * Since we read MINDIRSIZ, residual must
1087 * be 0 unless we're at end of file.
1088 */
1089 if (error || count != 0)
1090 return (0);
1091 /* skip empty entries */
1092 if (dp->d_ino == 0)
1093 continue;
1094 /* accept only "." and ".." */
1095 if (dp->d_namlen > 2)
1096 return (0);
1097 if (dp->d_name[0] != '.')
1098 return (0);
1099 /*
1100 * At this point d_namlen must be 1 or 2.
1101 * 1 implies ".", 2 implies ".." if second
1102 * char is also "."
1103 */
1104 if (dp->d_namlen == 1 || dp->d_name[1] == '.')
1105 continue;
1106 return (0);
1107 }
1108 return (1);
1109}
1110
1111/*
1112 * Check if source directory is in the path of the target directory.
1113 * Target is supplied locked, source is unlocked.
1114 * The target is always iput() before returning.
1115 */
1116checkpath(source, target)
1117 struct inode *source, *target;
1118{
1119 struct dirtemplate dirbuf;
1120 register struct inode *ip;
1121 int error = 0;
1122
1123 ip = target;
1124 if (ip->i_number == source->i_number) {
1125 error = EEXIST;
1126 goto out;
1127 }
1128 if (ip->i_number == ROOTINO)
1129 goto out;
1130
1131 for (;;) {
1132 if ((ip->i_mode&IFMT) != IFDIR) {
1133 error = ENOTDIR;
1134 break;
1135 }
1136 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
1137 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
1138 if (error != 0)
1139 break;
1140 if (dirbuf.dotdot_namlen != 2 ||
1141 dirbuf.dotdot_name[0] != '.' ||
1142 dirbuf.dotdot_name[1] != '.') {
1143 error = ENOTDIR;
1144 break;
1145 }
1146 if (dirbuf.dotdot_ino == source->i_number) {
1147 error = EINVAL;
1148 break;
1149 }
1150 if (dirbuf.dotdot_ino == ROOTINO)
1151 break;
1152 iput(ip);
1153 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
1154 if (ip == NULL) {
1155 error = u.u_error;
1156 break;
1157 }
1158 }
1159
1160out:
1161 if (error == ENOTDIR)
1162 printf("checkpath: .. not a directory\n");
1163 if (ip != NULL)
1164 iput(ip);
1165 return (error);
1166}
1167
1168/*
1169 * Name cache initialization, from main() when we are booting
1170 */
1171nchinit()
1172{
1173 register union nchash *nchp;
1174 register struct nch *ncp;
1175
1176 nchhead = 0;
1177 nchtail = &nchhead;
1178
1179 for (ncp = nch; ncp < &nch[nchsize]; ncp++) {
1180 ncp->nc_forw = ncp; /* hash chain */
1181 ncp->nc_back = ncp;
1182
1183 ncp->nc_nxt = NULL; /* lru chain */
1184 *nchtail = ncp;
1185 ncp->nc_prev = nchtail;
1186 nchtail = &ncp->nc_nxt;
1187
1188 /* all else is zero already */
1189 }
1190
1191 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
1192 nchp->nch_head[0] = nchp;
1193 nchp->nch_head[1] = nchp;
1194 }
1195}
1196
1197/*
1198 * Cache flush, called when filesys is umounted to
1199 * remove entries that would now be invalid
1200 *
1201 * The line "nxtcp = nchhead" near the end is to avoid potential problems
1202 * if the cache lru chain is modified while we are dumping the
1203 * inode. This makes the algorithm O(n^2), but do you think I care?
1204 */
1205nchinval(dev)
1206 register dev_t dev;
1207{
1208 register struct nch *ncp, *nxtcp;
1209
1210 for (ncp = nchhead; ncp; ncp = nxtcp) {
1211 nxtcp = ncp->nc_nxt;
1212
1213 if (ncp->nc_ip == NULL ||
1214 (ncp->nc_idev != dev && ncp->nc_dev != dev))
1215 continue;
1216
1217 /* free the resources we had */
1218 ncp->nc_idev = NODEV;
1219 ncp->nc_dev = NODEV;
1220 ncp->nc_id = NULL;
1221 ncp->nc_ino = 0;
1222 ncp->nc_ip = NULL;
1223
1224
1225 /* remove the entry from its hash chain */
1226 remque(ncp);
1227 /* and make a dummy one */
1228 ncp->nc_forw = ncp;
1229 ncp->nc_back = ncp;
1230
1231 /* delete this entry from LRU chain */
1232 *ncp->nc_prev = nxtcp;
1233 if (nxtcp)
1234 nxtcp->nc_prev = ncp->nc_prev;
1235 else
1236 nchtail = ncp->nc_prev;
1237
1238 /* cause rescan of list, it may have altered */
1239 nxtcp = nchhead;
1240 /* put the now-free entry at head of LRU */
1241 ncp->nc_nxt = nxtcp;
1242 ncp->nc_prev = &nchhead;
1243 nxtcp->nc_prev = &ncp->nc_nxt;
1244 nchhead = ncp;
1245 }
1246}