insure that directories are always a multiple of DIRBLKSIZ in size
[unix-history] / usr / src / sys / ufs / ffs / ufs_lookup.c
... / ...
CommitLineData
1/* ufs_lookup.c 6.20 85/02/22 */
2
3#include "param.h"
4#include "systm.h"
5#include "inode.h"
6#include "fs.h"
7#include "mount.h"
8#include "dir.h"
9#include "user.h"
10#include "buf.h"
11#include "conf.h"
12#include "uio.h"
13#include "kernel.h"
14
15struct buf *blkatoff();
16struct buf *freenamebuf;
17int dirchk = 0;
18
19/*
20 * Structures associated with name cacheing.
21 */
22#define NCHHASH 32 /* size of hash table */
23
24#if ((NCHHASH)&((NCHHASH)-1)) != 0
25#define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
26#else
27#define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
28#endif
29
30union nchash {
31 union nchash *nch_head[2];
32 struct nch *nch_chain[2];
33} nchash[NCHHASH];
34#define nch_forw nch_chain[0]
35#define nch_back nch_chain[1]
36
37struct nch *nchhead, **nchtail; /* LRU chain pointers */
38struct nchstats nchstats; /* cache effectiveness statistics */
39
40/*
41 * Convert a pathname into a pointer to a locked inode,
42 * with side effects usable in creating and removing files.
43 * This is a very central and rather complicated routine.
44 *
45 * The segflg defines whether the name is to be copied from user
46 * space or kernel space.
47 *
48 * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether
49 * the name is to be (looked up, created, deleted). If flag has
50 * LOCKPARENT or'ed into it and the target of the pathname exists,
51 * namei returns both the target and its parent directory locked.
52 * If the file system is not maintained in a strict tree hierarchy,
53 * this can result in a deadlock situation. When creating and
54 * LOCKPARENT is specified, the target may not be ".". When deleting
55 * and LOCKPARENT is specified, the target may be ".", but the caller
56 * must check to insure it does an irele and iput instead of two iputs.
57 *
58 * The FOLLOW flag is set when symbolic links are to be followed
59 * when they occur at the end of the name translation process.
60 *
61 * Name caching works as follows:
62 *
63 * names found by directory scans are retained in a cache
64 * for future reference. It is managed LRU, so frequently
65 * used names will hang around. Cache is indexed by hash value
66 * obtained from (ino,dev,name) where ino & dev refer to the
67 * directory containing name.
68 *
69 * For simplicity (and economy of storage), names longer than
70 * some (small) maximum length are not cached, they occur
71 * infrequently in any case, and are almost never of interest.
72 *
73 * Upon reaching the last segment of a path, if the reference
74 * is for DELETE, or NOCACHE is set (rewrite), and the
75 * name is located in the cache, it will be dropped.
76 *
77 * We must be sure never to enter the name ".." into the cache
78 * because of the extremely kludgey way that rename() alters
79 * ".." in a situation like
80 * mv a/x b/x
81 * where x is a directory, and x/.. is the ".." in question.
82 *
83 * Overall outline of namei:
84 *
85 * copy in name
86 * get starting directory
87 * dirloop:
88 * check accessibility of directory
89 * dirloop2:
90 * copy next component of name to ndp->ni_dent
91 * handle degenerate case where name is null string
92 * look for name in cache, if found, then if at end of path
93 * and deleting or creating, drop it, else to haveino
94 * search for name in directory, to found or notfound
95 * notfound:
96 * if creating, return locked directory, leaving info on avail. slots
97 * else return error
98 * found:
99 * if at end of path and deleting, return information to allow delete
100 * if at end of path and rewriting (create and LOCKPARENT), lock target
101 * inode and return info to allow rewrite
102 * if .. and on mounted filesys, look in mount table for parent
103 * if not at end, if neither creating nor deleting, add name to cache
104 * haveino:
105 * if symbolic link, massage name in buffer and continue at dirloop
106 * if more components of name, do next level at dirloop
107 * return the answer as locked inode
108 *
109 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
110 * but unlocked.
111 */
112struct inode *
113namei(ndp)
114 register struct nameidata *ndp;
115{
116 register char *cp; /* pointer into pathname argument */
117/* these variables refer to things which must be freed or unlocked */
118 register struct inode *dp = 0; /* the directory we are searching */
119 register struct nch *ncp; /* cache slot for entry */
120 register struct fs *fs; /* file system that directory is in */
121 register struct buf *bp = 0; /* a buffer of directory entries */
122 register struct direct *ep; /* the current directory entry */
123 int entryoffsetinblock; /* offset of ep in bp's buffer */
124 register struct buf *nbp; /* buffer storing path name argument */
125/* these variables hold information about the search for a slot */
126 enum {NONE, COMPACT, FOUND} slotstatus;
127 int slotoffset = -1; /* offset of area with free space */
128 int slotsize; /* size of area at slotoffset */
129 int slotfreespace; /* amount of space free in slot */
130 int slotneeded; /* size of the entry we're seeking */
131/* */
132 int numdirpasses; /* strategy for directory search */
133 int endsearch; /* offset to end directory search */
134 int prevoff; /* ndp->ni_offset of previous entry */
135 int nlink = 0; /* number of symbolic links taken */
136 struct inode *pdp; /* saved dp during symlink work */
137 int error, i;
138 int lockparent;
139 int docache;
140 unsigned hash; /* value of name hash for entry */
141 union nchash *nhp; /* cache chain head for entry */
142 int isdotdot; /* != 0 if current name is ".." */
143 int flag; /* op ie, LOOKUP, CREATE, or DELETE */
144 off_t enduseful; /* pointer past last used dir slot */
145
146 lockparent = ndp->ni_nameiop & LOCKPARENT;
147 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE;
148 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
149 if (flag == DELETE)
150 docache = 0;
151 /*
152 * Get a buffer for the name to be translated, and copy the
153 * name into the buffer.
154 */
155 nbp = freenamebuf;
156 if (nbp == NULL)
157 nbp = geteblk(MAXPATHLEN);
158 else
159 freenamebuf = nbp->av_forw;
160 if (ndp->ni_segflg == UIO_SYSSPACE)
161 error = copystr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
162 (u_int *)0);
163 else
164 error = copyinstr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
165 (u_int *)0);
166 if (error) {
167 u.u_error = error;
168 goto bad;
169 }
170
171 /*
172 * Get starting directory.
173 */
174 cp = nbp->b_un.b_addr;
175 if (*cp == '/') {
176 while (*cp == '/')
177 cp++;
178 if ((dp = u.u_rdir) == NULL)
179 dp = rootdir;
180 } else
181 dp = u.u_cdir;
182 fs = dp->i_fs;
183 ILOCK(dp);
184 dp->i_count++;
185 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */
186 ndp->ni_endoff = 0;
187
188 /*
189 * We come to dirloop to search a new directory.
190 * The directory must be locked so that it can be
191 * iput, and fs must be already set to dp->i_fs.
192 */
193dirloop:
194 /*
195 * Check accessiblity of directory.
196 */
197 if ((dp->i_mode&IFMT) != IFDIR) {
198 u.u_error = ENOTDIR;
199 goto bad;
200 }
201 if (access(dp, IEXEC))
202 goto bad;
203
204dirloop2:
205 /*
206 * Copy next component of name to ndp->ni_dent.
207 */
208 hash = 0;
209 for (i = 0; *cp != 0 && *cp != '/'; cp++) {
210 if (i >= MAXNAMLEN) {
211 u.u_error = ENOENT;
212 goto bad;
213 }
214 if ((*cp&0377) == ('/'|0200) || (*cp&0200) && flag != DELETE) {
215 u.u_error = EPERM;
216 goto bad;
217 }
218 ndp->ni_dent.d_name[i++] = *cp;
219 hash += (unsigned char)*cp * i;
220 }
221 ndp->ni_dent.d_namlen = i;
222 ndp->ni_dent.d_name[i] = '\0';
223 isdotdot = (i == 2 &&
224 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.');
225
226 /*
227 * Check for degenerate name (e.g. / or "")
228 * which is a way of talking about a directory,
229 * e.g. like "/." or ".".
230 */
231 if (ndp->ni_dent.d_name[0] == '\0') {
232 if (flag != LOOKUP || lockparent) {
233 u.u_error = EISDIR;
234 goto bad;
235 }
236 nbp->av_forw = freenamebuf;
237 freenamebuf = nbp;
238 return (dp);
239 }
240
241 /*
242 * We now have a segment name to search for, and a directory to search.
243 *
244 * Before tediously performing a linear scan of the directory,
245 * check the name cache to see if the directory/name pair
246 * we are looking for is known already. We don't do this
247 * if the segment name is long, simply so the cache can avoid
248 * holding long names (which would either waste space, or
249 * add greatly to the complexity).
250 */
251 if (ndp->ni_dent.d_namlen > NCHNAMLEN) {
252 nchstats.ncs_long++;
253 docache = 0;
254 } else {
255 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
256 for (ncp = nhp->nch_forw; ncp != (struct nch *)nhp;
257 ncp = ncp->nc_forw) {
258 if (ncp->nc_ino == dp->i_number &&
259 ncp->nc_dev == dp->i_dev &&
260 ncp->nc_nlen == ndp->ni_dent.d_namlen &&
261 !bcmp(ncp->nc_name, ndp->ni_dent.d_name,
262 ncp->nc_nlen))
263 break;
264 }
265
266 if (ncp == (struct nch *)nhp) {
267 nchstats.ncs_miss++;
268 ncp = NULL;
269 } else {
270 if (ncp->nc_id != ncp->nc_ip->i_id) {
271 nchstats.ncs_falsehits++;
272 } else if (*cp == '\0' && !docache) {
273 nchstats.ncs_badhits++;
274 } else {
275
276 /*
277 * move this slot to end of LRU
278 * chain, if not already there
279 */
280 if (ncp->nc_nxt) {
281 /* remove from LRU chain */
282 *ncp->nc_prev = ncp->nc_nxt;
283 ncp->nc_nxt->nc_prev = ncp->nc_prev;
284
285 /* and replace at end of it */
286 ncp->nc_nxt = NULL;
287 ncp->nc_prev = nchtail;
288 *nchtail = ncp;
289 nchtail = &ncp->nc_nxt;
290 }
291
292 /*
293 * Get the next inode in the path.
294 * See comment above other `IUNLOCK' code for
295 * an explaination of the locking protocol.
296 */
297 pdp = dp;
298 dp = ncp->nc_ip;
299 if (dp == NULL)
300 panic("nami: null cache ino");
301 if (pdp == dp)
302 dp->i_count++;
303 else {
304 if (isdotdot) {
305 IUNLOCK(pdp);
306 igrab(dp);
307 } else {
308 igrab(dp);
309 IUNLOCK(pdp);
310 }
311 }
312
313 /*
314 * Verify that the inode that we got
315 * did not change while we were waiting
316 * for it to be locked.
317 */
318 if (ncp->nc_id != ncp->nc_ip->i_id) {
319 iput(dp);
320 ILOCK(pdp);
321 dp = pdp;
322 nchstats.ncs_falsehits++;
323 } else {
324 ndp->ni_dent.d_ino = dp->i_number;
325 /* ni_dent.d_reclen is garbage ... */
326 nchstats.ncs_goodhits++;
327 goto haveino;
328 }
329 }
330
331 /*
332 * Last component and we are renaming or deleting,
333 * the cache entry is invalid, or otherwise don't
334 * want cache entry to exist.
335 */
336
337 /* remove from LRU chain */
338 *ncp->nc_prev = ncp->nc_nxt;
339 if (ncp->nc_nxt)
340 ncp->nc_nxt->nc_prev = ncp->nc_prev;
341 else
342 nchtail = ncp->nc_prev;
343
344 /* remove from hash chain */
345 remque(ncp);
346
347 /* insert at head of LRU list (first to grab) */
348 ncp->nc_nxt = nchhead;
349 ncp->nc_prev = &nchhead;
350 nchhead->nc_prev = &ncp->nc_nxt;
351 nchhead = ncp;
352
353 /* and make a dummy hash chain */
354 ncp->nc_forw = ncp;
355 ncp->nc_back = ncp;
356
357 ncp = NULL;
358 }
359 }
360
361 /*
362 * Suppress search for slots unless creating
363 * file and at end of pathname, in which case
364 * we watch for a place to put the new file in
365 * case it doesn't already exist.
366 */
367 slotstatus = FOUND;
368 if (flag == CREATE && *cp == 0) {
369 slotstatus = NONE;
370 slotfreespace = 0;
371 slotneeded = DIRSIZ(&ndp->ni_dent);
372 }
373 /*
374 * If this is the same directory that this process
375 * previously searched, pick up where we last left off.
376 * We cache only lookups as these are the most common
377 * and have the greatest payoff. Caching CREATE has little
378 * benefit as it usually must search the entire directory
379 * to determine that the entry does not exist. Caching the
380 * location of the last DELETE has not reduced profiling time
381 * and hence has been removed in the interest of simplicity.
382 */
383 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
384 dp->i_dev != u.u_ncache.nc_dev) {
385 ndp->ni_offset = 0;
386 numdirpasses = 1;
387 } else {
388 if ((dp->i_flag & ICHG) || dp->i_ctime >= u.u_ncache.nc_time) {
389 if (u.u_ncache.nc_prevoffset > dp->i_size)
390 u.u_ncache.nc_prevoffset = 0;
391 else
392 u.u_ncache.nc_prevoffset &= ~(DIRBLKSIZ - 1);
393 u.u_ncache.nc_time = time.tv_sec;
394 }
395 ndp->ni_offset = u.u_ncache.nc_prevoffset;
396 entryoffsetinblock = blkoff(fs, ndp->ni_offset);
397 if (entryoffsetinblock != 0) {
398 bp = blkatoff(dp, ndp->ni_offset, (char **)0);
399 if (bp == 0)
400 goto bad;
401 }
402 numdirpasses = 2;
403 nchstats.ncs_2passes++;
404 }
405 endsearch = roundup(dp->i_size, DIRBLKSIZ);
406 enduseful = 0;
407
408searchloop:
409 while (ndp->ni_offset < endsearch) {
410 /*
411 * If offset is on a block boundary,
412 * read the next directory block.
413 * Release previous if it exists.
414 */
415 if (blkoff(fs, ndp->ni_offset) == 0) {
416 if (bp != NULL)
417 brelse(bp);
418 bp = blkatoff(dp, ndp->ni_offset, (char **)0);
419 if (bp == 0)
420 goto bad;
421 entryoffsetinblock = 0;
422 }
423
424 /*
425 * If still looking for a slot, and at a DIRBLKSIZE
426 * boundary, have to start looking for free space again.
427 */
428 if (slotstatus == NONE &&
429 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
430 slotoffset = -1;
431 slotfreespace = 0;
432 }
433
434 /*
435 * Get pointer to next entry.
436 * Full validation checks are slow, so we only check
437 * enough to insure forward progress through the
438 * directory. Complete checks can be run by patching
439 * "dirchk" to be true.
440 */
441 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
442 if (ep->d_reclen <= 0 ||
443 dirchk && dirbadentry(ep, entryoffsetinblock)) {
444 dirbad(dp, ndp->ni_offset, "mangled entry");
445 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
446 ndp->ni_offset += i;
447 entryoffsetinblock += i;
448 continue;
449 }
450
451 /*
452 * If an appropriate sized slot has not yet been found,
453 * check to see if one is available. Also accumulate space
454 * in the current block so that we can determine if
455 * compaction is viable.
456 */
457 if (slotstatus != FOUND) {
458 int size = ep->d_reclen;
459
460 if (ep->d_ino != 0)
461 size -= DIRSIZ(ep);
462 if (size > 0) {
463 if (size >= slotneeded) {
464 slotstatus = FOUND;
465 slotoffset = ndp->ni_offset;
466 slotsize = ep->d_reclen;
467 } else if (slotstatus == NONE) {
468 slotfreespace += size;
469 if (slotoffset == -1)
470 slotoffset = ndp->ni_offset;
471 if (slotfreespace >= slotneeded) {
472 slotstatus = COMPACT;
473 slotsize = ndp->ni_offset +
474 ep->d_reclen - slotoffset;
475 }
476 }
477 }
478 }
479
480 /*
481 * Check for a name match.
482 */
483 if (ep->d_ino) {
484 if (ep->d_namlen == ndp->ni_dent.d_namlen &&
485 !bcmp(ndp->ni_dent.d_name, ep->d_name,
486 ep->d_namlen))
487 goto found;
488 }
489 prevoff = ndp->ni_offset;
490 ndp->ni_offset += ep->d_reclen;
491 entryoffsetinblock += ep->d_reclen;
492 if (ep->d_ino)
493 enduseful = ndp->ni_offset;
494 }
495/* notfound: */
496 /*
497 * If we started in the middle of the directory and failed
498 * to find our target, we must check the beginning as well.
499 */
500 if (numdirpasses == 2) {
501 numdirpasses--;
502 ndp->ni_offset = 0;
503 endsearch = u.u_ncache.nc_prevoffset;
504 goto searchloop;
505 }
506 /*
507 * If creating, and at end of pathname and current
508 * directory has not been removed, then can consider
509 * allowing file to be created.
510 */
511 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
512 /*
513 * Access for write is interpreted as allowing
514 * creation of files in the directory.
515 */
516 if (access(dp, IWRITE))
517 goto bad;
518 /*
519 * Return an indication of where the new directory
520 * entry should be put. If we didn't find a slot,
521 * then set ndp->ni_count to 0 indicating that the new
522 * slot belongs at the end of the directory. If we found
523 * a slot, then the new entry can be put in the range
524 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
525 */
526 if (slotstatus == NONE) {
527 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
528 ndp->ni_count = 0;
529 enduseful = ndp->ni_offset;
530 } else {
531 ndp->ni_offset = slotoffset;
532 ndp->ni_count = slotsize;
533 if (enduseful < slotoffset + slotsize)
534 enduseful = slotoffset + slotsize;
535 }
536 ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
537 dp->i_flag |= IUPD|ICHG;
538 if (bp)
539 brelse(bp);
540 nbp->av_forw = freenamebuf;
541 freenamebuf = nbp;
542 /*
543 * We return with the directory locked, so that
544 * the parameters we set up above will still be
545 * valid if we actually decide to do a direnter().
546 * We return NULL to indicate that the entry doesn't
547 * currently exist, leaving a pointer to the (locked)
548 * directory inode in ndp->ni_pdir.
549 */
550 ndp->ni_pdir = dp;
551 return (NULL);
552 }
553 u.u_error = ENOENT;
554 goto bad;
555found:
556 if (numdirpasses == 2)
557 nchstats.ncs_pass2++;
558 /*
559 * Check that directory length properly reflects presence
560 * of this entry.
561 */
562 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
563 dirbad(dp, ndp->ni_offset, "i_size too small");
564 dp->i_size = entryoffsetinblock + DIRSIZ(ep);
565 dp->i_flag |= IUPD|ICHG;
566 }
567
568 /*
569 * Found component in pathname.
570 * If the final component of path name, save information
571 * in the cache as to where the entry was found.
572 */
573 if (*cp == '\0' && flag == LOOKUP) {
574 u.u_ncache.nc_prevoffset = ndp->ni_offset;
575 u.u_ncache.nc_inumber = dp->i_number;
576 u.u_ncache.nc_dev = dp->i_dev;
577 u.u_ncache.nc_time = time.tv_sec;
578 }
579 /*
580 * Save directory entry in ndp->ni_dent,
581 * and release directory buffer.
582 */
583 bcopy((caddr_t)ep, (caddr_t)&ndp->ni_dent, (u_int)DIRSIZ(ep));
584 brelse(bp);
585 bp = NULL;
586
587 /*
588 * If deleting, and at end of pathname, return
589 * parameters which can be used to remove file.
590 * If the lockparent flag isn't set, we return only
591 * the directory (in ndp->ni_pdir), otherwise we go
592 * on and lock the inode, being careful with ".".
593 */
594 if (flag == DELETE && *cp == 0) {
595 /*
596 * Write access to directory required to delete files.
597 */
598 if (access(dp, IWRITE))
599 goto bad;
600 ndp->ni_pdir = dp; /* for dirremove() */
601 /*
602 * Return pointer to current entry in ndp->ni_offset,
603 * and distance past previous entry (if there
604 * is a previous entry in this block) in ndp->ni_count.
605 * Save directory inode pointer in ndp->ni_pdir for dirremove().
606 */
607 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
608 ndp->ni_count = 0;
609 else
610 ndp->ni_count = ndp->ni_offset - prevoff;
611 if (lockparent) {
612 if (dp->i_number == ndp->ni_dent.d_ino)
613 dp->i_count++;
614 else {
615 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
616 if (dp == NULL) {
617 iput(ndp->ni_pdir);
618 goto bad;
619 }
620 /*
621 * If directory is "sticky", then user must own
622 * the directory, or the file in it, else he
623 * may not delete it (unless he's root). This
624 * implements append-only directories.
625 */
626 if ((ndp->ni_pdir->i_mode & ISVTX) &&
627 u.u_uid != 0 &&
628 u.u_uid != ndp->ni_pdir->i_uid &&
629 dp->i_uid != u.u_uid) {
630 iput(ndp->ni_pdir);
631 u.u_error = EPERM;
632 goto bad;
633 }
634 }
635 }
636 nbp->av_forw = freenamebuf;
637 freenamebuf = nbp;
638 return (dp);
639 }
640
641 /*
642 * Special handling for ".." allowing chdir out of mounted
643 * file system: indirect .. in root inode to reevaluate
644 * in directory file system was mounted on.
645 */
646 if (isdotdot) {
647 if (dp == u.u_rdir)
648 ndp->ni_dent.d_ino = dp->i_number;
649 else if (ndp->ni_dent.d_ino == ROOTINO &&
650 dp->i_number == ROOTINO) {
651 for (i = 1; i < NMOUNT; i++)
652 if (mount[i].m_bufp != NULL &&
653 mount[i].m_dev == dp->i_dev) {
654 iput(dp);
655 dp = mount[i].m_inodp;
656 ILOCK(dp);
657 dp->i_count++;
658 fs = dp->i_fs;
659 cp -= 2; /* back over .. */
660 goto dirloop2;
661 }
662 }
663 }
664
665 /*
666 * If rewriting (rename), return the inode and the
667 * information required to rewrite the present directory
668 * Must get inode of directory entry to verify it's a
669 * regular file, or empty directory.
670 */
671 if ((flag == CREATE && lockparent) && *cp == 0) {
672 if (access(dp, IWRITE))
673 goto bad;
674 ndp->ni_pdir = dp; /* for dirrewrite() */
675 /*
676 * Careful about locking second inode.
677 * This can only occur if the target is ".".
678 */
679 if (dp->i_number == ndp->ni_dent.d_ino) {
680 u.u_error = EISDIR; /* XXX */
681 goto bad;
682 }
683 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
684 if (dp == NULL) {
685 iput(ndp->ni_pdir);
686 goto bad;
687 }
688 nbp->av_forw = freenamebuf;
689 freenamebuf = nbp;
690 return (dp);
691 }
692
693 /*
694 * Check for symbolic link, which may require us to massage the
695 * name before we continue translation. We do not `iput' the
696 * directory because we may need it again if the symbolic link
697 * is relative to the current directory. Instead we save it
698 * unlocked as "pdp". We must get the target inode before unlocking
699 * the directory to insure that the inode will not be removed
700 * before we get it. We prevent deadlock by always fetching
701 * inodes from the root, moving down the directory tree. Thus
702 * when following backward pointers ".." we must unlock the
703 * parent directory before getting the requested directory.
704 * There is a potential race condition here if both the current
705 * and parent directories are removed before the `iget' for the
706 * inode associated with ".." returns. We hope that this occurs
707 * infrequently since we cannot avoid this race condition without
708 * implementing a sophisticated deadlock detection algorithm.
709 * Note also that this simple deadlock detection scheme will not
710 * work if the file system has any hard links other than ".."
711 * that point backwards in the directory structure.
712 */
713 pdp = dp;
714 if (isdotdot) {
715 IUNLOCK(pdp); /* race to get the inode */
716 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
717 if (dp == NULL)
718 goto bad2;
719 } else if (dp->i_number == ndp->ni_dent.d_ino) {
720 dp->i_count++; /* we want ourself, ie "." */
721 } else {
722 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
723 IUNLOCK(pdp);
724 if (dp == NULL)
725 goto bad2;
726 }
727
728 /*
729 * insert name into cache (if we want it, and it isn't "." or "..")
730 *
731 * all other cases where making a cache entry would be wrong
732 * have already departed from the code sequence somewhere above.
733 */
734 if (docache) {
735 if (ncp != NULL)
736 panic("nami: duplicating cache");
737
738 /*
739 * free the cache slot at head of lru chain
740 */
741 if (ncp = nchhead) {
742 /* remove from lru chain */
743 *ncp->nc_prev = ncp->nc_nxt;
744 if (ncp->nc_nxt)
745 ncp->nc_nxt->nc_prev = ncp->nc_prev;
746 else
747 nchtail = ncp->nc_prev;
748
749 /* remove from old hash chain */
750 remque(ncp);
751
752 /* grab the inode we just found */
753 ncp->nc_ip = dp;
754
755 /* fill in cache info */
756 ncp->nc_ino = pdp->i_number; /* parents inum */
757 ncp->nc_dev = pdp->i_dev; /* & device */
758 ncp->nc_idev = dp->i_dev; /* our device */
759 ncp->nc_id = dp->i_id; /* identifier */
760 ncp->nc_nlen = ndp->ni_dent.d_namlen;
761 bcopy(ndp->ni_dent.d_name, ncp->nc_name, ncp->nc_nlen);
762
763 /* link at end of lru chain */
764 ncp->nc_nxt = NULL;
765 ncp->nc_prev = nchtail;
766 *nchtail = ncp;
767 nchtail = &ncp->nc_nxt;
768
769 /* and insert on hash chain */
770 insque(ncp, nhp);
771 }
772 }
773
774haveino:
775 fs = dp->i_fs;
776
777 /*
778 * Check for symbolic link
779 */
780 if ((dp->i_mode & IFMT) == IFLNK &&
781 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) {
782 u_int pathlen = strlen(cp) + 1;
783
784 if (dp->i_size + pathlen >= MAXPATHLEN - 1 ||
785 ++nlink > MAXSYMLINKS) {
786 u.u_error = ELOOP;
787 goto bad2;
788 }
789 ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen);
790 u.u_error =
791 rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size,
792 0, 1, (int *)0);
793 if (u.u_error)
794 goto bad2;
795 cp = nbp->b_un.b_addr;
796 iput(dp);
797 if (*cp == '/') {
798 irele(pdp);
799 while (*cp == '/')
800 cp++;
801 if ((dp = u.u_rdir) == NULL)
802 dp = rootdir;
803 ILOCK(dp);
804 dp->i_count++;
805 } else {
806 dp = pdp;
807 ILOCK(dp);
808 }
809 fs = dp->i_fs;
810 goto dirloop;
811 }
812
813 /*
814 * Not a symbolic link. If more pathname,
815 * continue at next component, else return.
816 */
817 if (*cp == '/') {
818 while (*cp == '/')
819 cp++;
820 irele(pdp);
821 goto dirloop;
822 }
823 nbp->av_forw = freenamebuf;
824 freenamebuf = nbp;
825 if (lockparent)
826 ndp->ni_pdir = pdp;
827 else
828 irele(pdp);
829 return (dp);
830bad2:
831 irele(pdp);
832bad:
833 if (bp)
834 brelse(bp);
835 if (dp)
836 iput(dp);
837 nbp->av_forw = freenamebuf;
838 freenamebuf = nbp;
839 return (NULL);
840}
841
842
843dirbad(ip, offset, how)
844 struct inode *ip;
845 off_t offset;
846 char *how;
847{
848
849 printf("%s: bad dir ino %d at offset %d: %s\n",
850 ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
851}
852
853/*
854 * Do consistency checking on a directory entry:
855 * record length must be multiple of 4
856 * record length must not be non-negative
857 * entry must fit in rest of its DIRBLKSIZ block
858 * record must be large enough to contain entry
859 * name is not longer than MAXNAMLEN
860 * name must be as long as advertised, and null terminated
861 */
862dirbadentry(ep, entryoffsetinblock)
863 register struct direct *ep;
864 int entryoffsetinblock;
865{
866 register int i;
867
868 if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen <= 0 ||
869 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
870 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
871 return (1);
872 for (i = 0; i < ep->d_namlen; i++)
873 if (ep->d_name[i] == '\0')
874 return (1);
875 return (ep->d_name[i]);
876}
877
878/*
879 * Write a directory entry after a call to namei, using the parameters
880 * which it left in the u. area. The argument ip is the inode which
881 * the new directory entry will refer to. The u. area field ndp->ni_pdir is
882 * a pointer to the directory to be written, which was left locked by
883 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
884 * how the space for the new entry is to be gotten.
885 */
886direnter(ip, ndp)
887 struct inode *ip;
888 register struct nameidata *ndp;
889{
890 register struct direct *ep, *nep;
891 register struct inode *dp = ndp->ni_pdir;
892 struct buf *bp;
893 int loc, spacefree, error = 0;
894 u_int dsize;
895 int newentrysize;
896 char *dirbuf;
897
898 ndp->ni_dent.d_ino = ip->i_number;
899 newentrysize = DIRSIZ(&ndp->ni_dent);
900 if (ndp->ni_count == 0) {
901 /*
902 * If ndp->ni_count is 0, then namei could find no space in the
903 * directory. In this case ndp->ni_offset will be on a directory
904 * block boundary and we will write the new entry into a fresh
905 * block.
906 */
907 if (ndp->ni_offset&(DIRBLKSIZ-1))
908 panic("wdir: newblk");
909 ndp->ni_dent.d_reclen = DIRBLKSIZ;
910 error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
911 newentrysize, ndp->ni_offset, 1, (int *)0);
912 if (DIRBLKSIZ > dp->i_fs->fs_fsize)
913 panic("wdir: blksize"); /* XXX - should grow w/bmap() */
914 else
915 dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
916 iput(dp);
917 return (error);
918 }
919
920 /*
921 * If ndp->ni_count is non-zero, then namei found space for the new
922 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
923 * in the directory. To use this space, we may have to compact
924 * the entries located there, by copying them together towards
925 * the beginning of the block, leaving the free space in
926 * one usable chunk at the end.
927 */
928
929 /*
930 * Increase size of directory if entry eats into new space.
931 * This should never push the size past a new multiple of
932 * DIRBLKSIZE.
933 *
934 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
935 */
936 if (ndp->ni_offset + ndp->ni_count > dp->i_size)
937 dp->i_size = ndp->ni_offset + ndp->ni_count;
938
939 /*
940 * Get the block containing the space for the new directory
941 * entry. Should return error by result instead of u.u_error.
942 */
943 bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf);
944 if (bp == 0) {
945 iput(dp);
946 return (u.u_error);
947 }
948
949 /*
950 * Find space for the new entry. In the simple case, the
951 * entry at offset base will have the space. If it does
952 * not, then namei arranged that compacting the region
953 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
954 */
955 ep = (struct direct *)dirbuf;
956 dsize = DIRSIZ(ep);
957 spacefree = ep->d_reclen - dsize;
958 for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
959 nep = (struct direct *)(dirbuf + loc);
960 if (ep->d_ino) {
961 /* trim the existing slot */
962 ep->d_reclen = dsize;
963 ep = (struct direct *)((char *)ep + dsize);
964 } else {
965 /* overwrite; nothing there; header is ours */
966 spacefree += dsize;
967 }
968 dsize = DIRSIZ(nep);
969 spacefree += nep->d_reclen - dsize;
970 loc += nep->d_reclen;
971 bcopy((caddr_t)nep, (caddr_t)ep, dsize);
972 }
973 /*
974 * Update the pointer fields in the previous entry (if any),
975 * copy in the new entry, and write out the block.
976 */
977 if (ep->d_ino == 0) {
978 if (spacefree + dsize < newentrysize)
979 panic("wdir: compact1");
980 ndp->ni_dent.d_reclen = spacefree + dsize;
981 } else {
982 if (spacefree < newentrysize)
983 panic("wdir: compact2");
984 ndp->ni_dent.d_reclen = spacefree;
985 ep->d_reclen = dsize;
986 ep = (struct direct *)((char *)ep + dsize);
987 }
988 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
989 bwrite(bp);
990 dp->i_flag |= IUPD|ICHG;
991 if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
992 itrunc(dp, ndp->ni_endoff);
993 iput(dp);
994 return (error);
995}
996
997/*
998 * Remove a directory entry after a call to namei, using the
999 * parameters which it left in the u. area. The u. entry
1000 * ni_offset contains the offset into the directory of the
1001 * entry to be eliminated. The ni_count field contains the
1002 * size of the previous record in the directory. If this
1003 * is 0, the first entry is being deleted, so we need only
1004 * zero the inode number to mark the entry as free. If the
1005 * entry isn't the first in the directory, we must reclaim
1006 * the space of the now empty record by adding the record size
1007 * to the size of the previous entry.
1008 */
1009dirremove(ndp)
1010 register struct nameidata *ndp;
1011{
1012 register struct inode *dp = ndp->ni_pdir;
1013 register struct buf *bp;
1014 struct direct *ep;
1015
1016 if (ndp->ni_count == 0) {
1017 /*
1018 * First entry in block: set d_ino to zero.
1019 */
1020 ndp->ni_dent.d_ino = 0;
1021 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1022 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1023 } else {
1024 /*
1025 * Collapse new free space into previous entry.
1026 */
1027 bp = blkatoff(dp, (int)(ndp->ni_offset - ndp->ni_count),
1028 (char **)&ep);
1029 if (bp == 0)
1030 return (0);
1031 ep->d_reclen += ndp->ni_dent.d_reclen;
1032 bwrite(bp);
1033 dp->i_flag |= IUPD|ICHG;
1034 }
1035 return (1);
1036}
1037
1038/*
1039 * Rewrite an existing directory entry to point at the inode
1040 * supplied. The parameters describing the directory entry are
1041 * set up by a call to namei.
1042 */
1043dirrewrite(dp, ip, ndp)
1044 struct inode *dp, *ip;
1045 struct nameidata *ndp;
1046{
1047
1048 ndp->ni_dent.d_ino = ip->i_number;
1049 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1050 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1051 iput(dp);
1052}
1053
1054/*
1055 * Return buffer with contents of block "offset"
1056 * from the beginning of directory "ip". If "res"
1057 * is non-zero, fill it in with a pointer to the
1058 * remaining space in the directory.
1059 */
1060struct buf *
1061blkatoff(ip, offset, res)
1062 struct inode *ip;
1063 off_t offset;
1064 char **res;
1065{
1066 register struct fs *fs = ip->i_fs;
1067 daddr_t lbn = lblkno(fs, offset);
1068 int base = blkoff(fs, offset);
1069 int bsize = blksize(fs, ip, lbn);
1070 daddr_t bn = fsbtodb(fs, bmap(ip, lbn, B_WRITE, base, bsize));
1071 register struct buf *bp;
1072
1073 if (u.u_error)
1074 return (0);
1075 bp = bread(ip->i_dev, bn, bsize);
1076 if (bp->b_flags & B_ERROR) {
1077 brelse(bp);
1078 return (0);
1079 }
1080 if (res)
1081 *res = bp->b_un.b_addr + base;
1082 return (bp);
1083}
1084
1085/*
1086 * Check if a directory is empty or not.
1087 * Inode supplied must be locked.
1088 *
1089 * Using a struct dirtemplate here is not precisely
1090 * what we want, but better than using a struct direct.
1091 *
1092 * NB: does not handle corrupted directories.
1093 */
1094dirempty(ip, parentino)
1095 register struct inode *ip;
1096 ino_t parentino;
1097{
1098 register off_t off;
1099 struct dirtemplate dbuf;
1100 register struct direct *dp = (struct direct *)&dbuf;
1101 int error, count;
1102#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1103
1104 for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1105 if (dp->d_reclen <= 0)
1106 return (0);
1107 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
1108 off, 1, &count);
1109 /*
1110 * Since we read MINDIRSIZ, residual must
1111 * be 0 unless we're at end of file.
1112 */
1113 if (error || count != 0)
1114 return (0);
1115 /* skip empty entries */
1116 if (dp->d_ino == 0)
1117 continue;
1118 /* accept only "." and ".." */
1119 if (dp->d_namlen > 2)
1120 return (0);
1121 if (dp->d_name[0] != '.')
1122 return (0);
1123 /*
1124 * At this point d_namlen must be 1 or 2.
1125 * 1 implies ".", 2 implies ".." if second
1126 * char is also "."
1127 */
1128 if (dp->d_namlen == 1)
1129 continue;
1130 if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1131 continue;
1132 return (0);
1133 }
1134 return (1);
1135}
1136
1137/*
1138 * Check if source directory is in the path of the target directory.
1139 * Target is supplied locked, source is unlocked.
1140 * The target is always iput() before returning.
1141 */
1142checkpath(source, target)
1143 struct inode *source, *target;
1144{
1145 struct dirtemplate dirbuf;
1146 register struct inode *ip;
1147 int error = 0;
1148
1149 ip = target;
1150 if (ip->i_number == source->i_number) {
1151 error = EEXIST;
1152 goto out;
1153 }
1154 if (ip->i_number == ROOTINO)
1155 goto out;
1156
1157 for (;;) {
1158 if ((ip->i_mode&IFMT) != IFDIR) {
1159 error = ENOTDIR;
1160 break;
1161 }
1162 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
1163 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
1164 if (error != 0)
1165 break;
1166 if (dirbuf.dotdot_namlen != 2 ||
1167 dirbuf.dotdot_name[0] != '.' ||
1168 dirbuf.dotdot_name[1] != '.') {
1169 error = ENOTDIR;
1170 break;
1171 }
1172 if (dirbuf.dotdot_ino == source->i_number) {
1173 error = EINVAL;
1174 break;
1175 }
1176 if (dirbuf.dotdot_ino == ROOTINO)
1177 break;
1178 iput(ip);
1179 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
1180 if (ip == NULL) {
1181 error = u.u_error;
1182 break;
1183 }
1184 }
1185
1186out:
1187 if (error == ENOTDIR)
1188 printf("checkpath: .. not a directory\n");
1189 if (ip != NULL)
1190 iput(ip);
1191 return (error);
1192}
1193
1194/*
1195 * Name cache initialization, from main() when we are booting
1196 */
1197nchinit()
1198{
1199 register union nchash *nchp;
1200 register struct nch *ncp;
1201
1202 nchhead = 0;
1203 nchtail = &nchhead;
1204
1205 for (ncp = nch; ncp < &nch[nchsize]; ncp++) {
1206 ncp->nc_forw = ncp; /* hash chain */
1207 ncp->nc_back = ncp;
1208
1209 ncp->nc_nxt = NULL; /* lru chain */
1210 *nchtail = ncp;
1211 ncp->nc_prev = nchtail;
1212 nchtail = &ncp->nc_nxt;
1213
1214 /* all else is zero already */
1215 }
1216
1217 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
1218 nchp->nch_head[0] = nchp;
1219 nchp->nch_head[1] = nchp;
1220 }
1221}
1222
1223/*
1224 * Cache flush, called when filesys is umounted to
1225 * remove entries that would now be invalid
1226 *
1227 * The line "nxtcp = nchhead" near the end is to avoid potential problems
1228 * if the cache lru chain is modified while we are dumping the
1229 * inode. This makes the algorithm O(n^2), but do you think I care?
1230 */
1231nchinval(dev)
1232 register dev_t dev;
1233{
1234 register struct nch *ncp, *nxtcp;
1235
1236 for (ncp = nchhead; ncp; ncp = nxtcp) {
1237 nxtcp = ncp->nc_nxt;
1238
1239 if (ncp->nc_ip == NULL ||
1240 (ncp->nc_idev != dev && ncp->nc_dev != dev))
1241 continue;
1242
1243 /* free the resources we had */
1244 ncp->nc_idev = NODEV;
1245 ncp->nc_dev = NODEV;
1246 ncp->nc_id = NULL;
1247 ncp->nc_ino = 0;
1248 ncp->nc_ip = NULL;
1249
1250
1251 /* remove the entry from its hash chain */
1252 remque(ncp);
1253 /* and make a dummy one */
1254 ncp->nc_forw = ncp;
1255 ncp->nc_back = ncp;
1256
1257 /* delete this entry from LRU chain */
1258 *ncp->nc_prev = nxtcp;
1259 if (nxtcp)
1260 nxtcp->nc_prev = ncp->nc_prev;
1261 else
1262 nchtail = ncp->nc_prev;
1263
1264 /* cause rescan of list, it may have altered */
1265 nxtcp = nchhead;
1266 /* put the now-free entry at head of LRU */
1267 ncp->nc_nxt = nxtcp;
1268 ncp->nc_prev = &nchhead;
1269 nxtcp->nc_prev = &ncp->nc_nxt;
1270 nchhead = ncp;
1271 }
1272}
1273
1274/*
1275 * Name cache invalidation of all entries.
1276 */
1277cacheinvalall()
1278{
1279 register struct nch *ncp;
1280
1281 for (ncp = nch; ncp < &nch[nchsize]; ncp++) {
1282 ncp->nc_id = 0;
1283 }
1284}