minor fix in ^X where rocount != rawq.c_cc
[unix-history] / usr / src / sys / kern / vfs_lookup.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1982, 1986 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
6 * @(#)vfs_lookup.c 7.4 (Berkeley) %G%
7 */
8
9#include "param.h"
10#include "systm.h"
11#include "inode.h"
12#include "fs.h"
13#include "mount.h"
14#include "dir.h"
15#include "user.h"
16#include "buf.h"
17#include "conf.h"
18#include "uio.h"
19#include "kernel.h"
20#include "malloc.h"
21
22struct buf *blkatoff();
23int dirchk = 0;
24
25/*
26 * Structures associated with name cacheing.
27 */
28#define NCHHASH 32 /* size of hash table */
29
30#if ((NCHHASH)&((NCHHASH)-1)) != 0
31#define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
32#else
33#define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
34#endif
35
36union nchash {
37 union nchash *nch_head[2];
38 struct namecache *nch_chain[2];
39} nchash[NCHHASH];
40#define nch_forw nch_chain[0]
41#define nch_back nch_chain[1]
42
43struct namecache *nchhead, **nchtail; /* LRU chain pointers */
44struct nchstats nchstats; /* cache effectiveness statistics */
45
46/*
47 * Convert a pathname into a pointer to a locked inode.
48 * This is a very central and rather complicated routine.
49 * If the file system is not maintained in a strict tree hierarchy,
50 * this can result in a deadlock situation (see comments in code below).
51 *
52 * The flag argument is LOOKUP, CREATE, or DELETE depending on whether
53 * the name is to be looked up, created, or deleted. When CREATE or
54 * DELETE is specified, information usable in creating or deleteing a
55 * directory entry is also calculated. If flag has LOCKPARENT or'ed
56 * into it and the target of the pathname exists, namei returns both
57 * the target and its parent directory locked. When creating and
58 * LOCKPARENT is specified, the target may not be ".". When deleting
59 * and LOCKPARENT is specified, the target may be ".", but the caller
60 * must check to insure it does an irele and iput instead of two iputs.
61 *
62 * The FOLLOW flag is set when symbolic links are to be followed
63 * when they occur at the end of the name translation process.
64 * Symbolic links are always followed for all other pathname
65 * components other than the last.
66 *
67 * The segflg defines whether the name is to be copied from user
68 * space or kernel space.
69 *
70 * Name caching works as follows:
71 *
72 * Names found by directory scans are retained in a cache
73 * for future reference. It is managed LRU, so frequently
74 * used names will hang around. Cache is indexed by hash value
75 * obtained from (ino,dev,name) where ino & dev refer to the
76 * directory containing name.
77 *
78 * For simplicity (and economy of storage), names longer than
79 * a maximum length of NCHNAMLEN are not cached; they occur
80 * infrequently in any case, and are almost never of interest.
81 *
82 * Upon reaching the last segment of a path, if the reference
83 * is for DELETE, or NOCACHE is set (rewrite), and the
84 * name is located in the cache, it will be dropped.
85 *
86 * Overall outline of namei:
87 *
88 * copy in name
89 * get starting directory
90 * dirloop:
91 * check accessibility of directory
92 * dirloop2:
93 * copy next component of name to ndp->ni_dent
94 * handle degenerate case where name is null string
95 * look for name in cache, if found, then if at end of path
96 * and deleting or creating, drop it, else to haveino
97 * search for name in directory, to found or notfound
98 * notfound:
99 * if creating, return locked directory, leaving info on avail. slots
100 * else return error
101 * found:
102 * if at end of path and deleting, return information to allow delete
103 * if at end of path and rewriting (CREATE and LOCKPARENT), lock target
104 * inode and return info to allow rewrite
105 * if .. and on mounted filesys, look in mount table for parent
106 * if not at end, add name to cache; if at end and neither creating
107 * nor deleting, add name to cache
108 * haveino:
109 * if symbolic link, massage name in buffer and continue at dirloop
110 * if more components of name, do next level at dirloop
111 * return the answer as locked inode
112 *
113 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
114 * but unlocked.
115 */
116struct inode *
117namei(ndp)
118 register struct nameidata *ndp;
119{
120 register char *cp; /* pointer into pathname argument */
121/* these variables refer to things which must be freed or unlocked */
122 register struct inode *dp = 0; /* the directory we are searching */
123 register struct namecache *ncp; /* cache slot for entry */
124 register struct fs *fs; /* file system that directory is in */
125 register struct buf *bp = 0; /* a buffer of directory entries */
126 register struct direct *ep; /* the current directory entry */
127 int entryoffsetinblock; /* offset of ep in bp's buffer */
128 register caddr_t nbp; /* buffer storing path name argument */
129/* these variables hold information about the search for a slot */
130 enum {NONE, COMPACT, FOUND} slotstatus;
131 int slotoffset = -1; /* offset of area with free space */
132 int slotsize; /* size of area at slotoffset */
133 int slotfreespace; /* amount of space free in slot */
134 int slotneeded; /* size of the entry we're seeking */
135/* */
136 int numdirpasses; /* strategy for directory search */
137 int endsearch; /* offset to end directory search */
138 int prevoff; /* ndp->ni_offset of previous entry */
139 int nlink = 0; /* number of symbolic links taken */
140 struct inode *pdp; /* saved dp during symlink work */
141 int error, i;
142 int lockparent;
143 int docache; /* == 0 do not cache last component */
144 int makeentry; /* != 0 if name to be added to cache */
145 unsigned hash; /* value of name hash for entry */
146 union nchash *nhp; /* cache chain head for entry */
147 int isdotdot; /* != 0 if current name is ".." */
148 int flag; /* op ie, LOOKUP, CREATE, or DELETE */
149 off_t enduseful; /* pointer past last used dir slot */
150
151 lockparent = ndp->ni_nameiop & LOCKPARENT;
152 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE;
153 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
154 if (flag == DELETE || lockparent)
155 docache = 0;
156 /*
157 * Get a buffer for the name to be translated, and copy the
158 * name into the buffer.
159 */
160 MALLOC(nbp, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
161 if (ndp->ni_segflg == UIO_SYSSPACE)
162 error = copystr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0);
163 else
164 error = copyinstr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0);
165 if (error) {
166 u.u_error = error;
167 goto bad;
168 }
169
170 /*
171 * Get starting directory.
172 */
173 cp = nbp;
174 if (*cp == '/') {
175 while (*cp == '/')
176 cp++;
177 if ((dp = u.u_rdir) == NULL)
178 dp = rootdir;
179 } else
180 dp = u.u_cdir;
181 fs = dp->i_fs;
182 ILOCK(dp);
183 dp->i_count++;
184 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */
185 ndp->ni_endoff = 0;
186
187 /*
188 * We come to dirloop to search a new directory.
189 * The directory must be locked so that it can be
190 * iput, and fs must be already set to dp->i_fs.
191 */
192dirloop:
193 /*
194 * Check accessiblity of directory.
195 */
196 if ((dp->i_mode&IFMT) != IFDIR) {
197 u.u_error = ENOTDIR;
198 goto bad;
199 }
200 if (access(dp, IEXEC))
201 goto bad;
202
203dirloop2:
204 /*
205 * Copy next component of name to ndp->ni_dent.
206 */
207 hash = 0;
208 for (i = 0; *cp != 0 && *cp != '/'; cp++) {
209 if (i >= MAXNAMLEN) {
210 u.u_error = ENAMETOOLONG;
211 goto bad;
212 }
213 if (*cp & 0200)
214 if ((*cp&0377) == ('/'|0200) || flag != DELETE) {
215 u.u_error = EINVAL;
216 goto bad;
217 }
218 ndp->ni_dent.d_name[i++] = *cp;
219 hash += (unsigned char)*cp * i;
220 }
221 ndp->ni_dent.d_namlen = i;
222 ndp->ni_dent.d_name[i] = '\0';
223 isdotdot = (i == 2 &&
224 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.');
225 makeentry = 1;
226 if (*cp == '\0' && docache == 0)
227 makeentry = 0;
228
229 /*
230 * Check for degenerate name (e.g. / or "")
231 * which is a way of talking about a directory,
232 * e.g. like "/." or ".".
233 */
234 if (ndp->ni_dent.d_name[0] == '\0') {
235 if (flag != LOOKUP || lockparent) {
236 u.u_error = EISDIR;
237 goto bad;
238 }
239 FREE(nbp, M_NAMEI);
240 return (dp);
241 }
242
243 /*
244 * We now have a segment name to search for, and a directory to search.
245 *
246 * Before tediously performing a linear scan of the directory,
247 * check the name cache to see if the directory/name pair
248 * we are looking for is known already. We don't do this
249 * if the segment name is long, simply so the cache can avoid
250 * holding long names (which would either waste space, or
251 * add greatly to the complexity).
252 */
253 if (ndp->ni_dent.d_namlen > NCHNAMLEN) {
254 nchstats.ncs_long++;
255 makeentry = 0;
256 } else {
257 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
258 for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp;
259 ncp = ncp->nc_forw) {
260 if (ncp->nc_ino == dp->i_number &&
261 ncp->nc_dev == dp->i_dev &&
262 ncp->nc_nlen == ndp->ni_dent.d_namlen &&
263 !bcmp(ncp->nc_name, ndp->ni_dent.d_name,
264 (unsigned)ncp->nc_nlen))
265 break;
266 }
267 if (ncp == (struct namecache *)nhp) {
268 nchstats.ncs_miss++;
269 ncp = NULL;
270 } else {
271 if (ncp->nc_id != ncp->nc_ip->i_id)
272 nchstats.ncs_falsehits++;
273 else if (!makeentry)
274 nchstats.ncs_badhits++;
275 else {
276 /*
277 * move this slot to end of LRU
278 * chain, if not already there
279 */
280 if (ncp->nc_nxt) {
281 /* remove from LRU chain */
282 *ncp->nc_prev = ncp->nc_nxt;
283 ncp->nc_nxt->nc_prev = ncp->nc_prev;
284
285 /* and replace at end of it */
286 ncp->nc_nxt = NULL;
287 ncp->nc_prev = nchtail;
288 *nchtail = ncp;
289 nchtail = &ncp->nc_nxt;
290 }
291
292 /*
293 * Get the next inode in the path.
294 * See comment above other `IUNLOCK' code for
295 * an explaination of the locking protocol.
296 */
297 pdp = dp;
298 if (!isdotdot || dp != u.u_rdir)
299 dp = ncp->nc_ip;
300 if (dp == NULL)
301 panic("namei: null cache ino");
302 if (pdp == dp)
303 dp->i_count++;
304 else if (isdotdot) {
305 IUNLOCK(pdp);
306 igrab(dp);
307 } else {
308 igrab(dp);
309 IUNLOCK(pdp);
310 }
311
312 /*
313 * Verify that the inode that we got
314 * did not change while we were waiting
315 * for it to be locked.
316 */
317 if (ncp->nc_id != ncp->nc_ip->i_id) {
318 iput(dp);
319 ILOCK(pdp);
320 dp = pdp;
321 nchstats.ncs_falsehits++;
322 } else {
323 ndp->ni_dent.d_ino = dp->i_number;
324 /* ni_dent.d_reclen is garbage ... */
325 nchstats.ncs_goodhits++;
326 goto haveino;
327 }
328 }
329
330 /*
331 * Last component and we are renaming or deleting,
332 * the cache entry is invalid, or otherwise don't
333 * want cache entry to exist.
334 */
335 /* remove from LRU chain */
336 *ncp->nc_prev = ncp->nc_nxt;
337 if (ncp->nc_nxt)
338 ncp->nc_nxt->nc_prev = ncp->nc_prev;
339 else
340 nchtail = ncp->nc_prev;
341 remque(ncp); /* remove from hash chain */
342 /* insert at head of LRU list (first to grab) */
343 ncp->nc_nxt = nchhead;
344 ncp->nc_prev = &nchhead;
345 nchhead->nc_prev = &ncp->nc_nxt;
346 nchhead = ncp;
347 /* and make a dummy hash chain */
348 ncp->nc_forw = ncp;
349 ncp->nc_back = ncp;
350 ncp = NULL;
351 }
352 }
353
354 /*
355 * Suppress search for slots unless creating
356 * file and at end of pathname, in which case
357 * we watch for a place to put the new file in
358 * case it doesn't already exist.
359 */
360 slotstatus = FOUND;
361 if (flag == CREATE && *cp == 0) {
362 slotstatus = NONE;
363 slotfreespace = 0;
364 slotneeded = DIRSIZ(&ndp->ni_dent);
365 }
366 /*
367 * If this is the same directory that this process
368 * previously searched, pick up where we last left off.
369 * We cache only lookups as these are the most common
370 * and have the greatest payoff. Caching CREATE has little
371 * benefit as it usually must search the entire directory
372 * to determine that the entry does not exist. Caching the
373 * location of the last DELETE has not reduced profiling time
374 * and hence has been removed in the interest of simplicity.
375 */
376 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
377 dp->i_dev != u.u_ncache.nc_dev) {
378 ndp->ni_offset = 0;
379 numdirpasses = 1;
380 } else {
381 if (u.u_ncache.nc_prevoffset > dp->i_size)
382 u.u_ncache.nc_prevoffset = 0;
383 ndp->ni_offset = u.u_ncache.nc_prevoffset;
384 entryoffsetinblock = blkoff(fs, ndp->ni_offset);
385 if (entryoffsetinblock != 0) {
386 bp = blkatoff(dp, ndp->ni_offset, (char **)0);
387 if (bp == 0)
388 goto bad;
389 }
390 numdirpasses = 2;
391 nchstats.ncs_2passes++;
392 }
393 endsearch = roundup(dp->i_size, DIRBLKSIZ);
394 enduseful = 0;
395
396searchloop:
397 while (ndp->ni_offset < endsearch) {
398 /*
399 * If offset is on a block boundary,
400 * read the next directory block.
401 * Release previous if it exists.
402 */
403 if (blkoff(fs, ndp->ni_offset) == 0) {
404 if (bp != NULL)
405 brelse(bp);
406 bp = blkatoff(dp, ndp->ni_offset, (char **)0);
407 if (bp == 0)
408 goto bad;
409 entryoffsetinblock = 0;
410 }
411 /*
412 * If still looking for a slot, and at a DIRBLKSIZE
413 * boundary, have to start looking for free space again.
414 */
415 if (slotstatus == NONE &&
416 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
417 slotoffset = -1;
418 slotfreespace = 0;
419 }
420 /*
421 * Get pointer to next entry.
422 * Full validation checks are slow, so we only check
423 * enough to insure forward progress through the
424 * directory. Complete checks can be run by patching
425 * "dirchk" to be true.
426 */
427 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
428 if (ep->d_reclen == 0 ||
429 dirchk && dirbadentry(ep, entryoffsetinblock)) {
430 dirbad(dp, ndp->ni_offset, "mangled entry");
431 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
432 ndp->ni_offset += i;
433 entryoffsetinblock += i;
434 continue;
435 }
436
437 /*
438 * If an appropriate sized slot has not yet been found,
439 * check to see if one is available. Also accumulate space
440 * in the current block so that we can determine if
441 * compaction is viable.
442 */
443 if (slotstatus != FOUND) {
444 int size = ep->d_reclen;
445
446 if (ep->d_ino != 0)
447 size -= DIRSIZ(ep);
448 if (size > 0) {
449 if (size >= slotneeded) {
450 slotstatus = FOUND;
451 slotoffset = ndp->ni_offset;
452 slotsize = ep->d_reclen;
453 } else if (slotstatus == NONE) {
454 slotfreespace += size;
455 if (slotoffset == -1)
456 slotoffset = ndp->ni_offset;
457 if (slotfreespace >= slotneeded) {
458 slotstatus = COMPACT;
459 slotsize = ndp->ni_offset +
460 ep->d_reclen - slotoffset;
461 }
462 }
463 }
464 }
465
466 /*
467 * Check for a name match.
468 */
469 if (ep->d_ino) {
470 if (ep->d_namlen == ndp->ni_dent.d_namlen &&
471 !bcmp(ndp->ni_dent.d_name, ep->d_name,
472 (unsigned)ep->d_namlen))
473 goto found;
474 }
475 prevoff = ndp->ni_offset;
476 ndp->ni_offset += ep->d_reclen;
477 entryoffsetinblock += ep->d_reclen;
478 if (ep->d_ino)
479 enduseful = ndp->ni_offset;
480 }
481/* notfound: */
482 /*
483 * If we started in the middle of the directory and failed
484 * to find our target, we must check the beginning as well.
485 */
486 if (numdirpasses == 2) {
487 numdirpasses--;
488 ndp->ni_offset = 0;
489 endsearch = u.u_ncache.nc_prevoffset;
490 goto searchloop;
491 }
492 /*
493 * If creating, and at end of pathname and current
494 * directory has not been removed, then can consider
495 * allowing file to be created.
496 */
497 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
498 /*
499 * Access for write is interpreted as allowing
500 * creation of files in the directory.
501 */
502 if (access(dp, IWRITE))
503 goto bad;
504 /*
505 * Return an indication of where the new directory
506 * entry should be put. If we didn't find a slot,
507 * then set ndp->ni_count to 0 indicating that the new
508 * slot belongs at the end of the directory. If we found
509 * a slot, then the new entry can be put in the range
510 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
511 */
512 if (slotstatus == NONE) {
513 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
514 ndp->ni_count = 0;
515 enduseful = ndp->ni_offset;
516 } else {
517 ndp->ni_offset = slotoffset;
518 ndp->ni_count = slotsize;
519 if (enduseful < slotoffset + slotsize)
520 enduseful = slotoffset + slotsize;
521 }
522 ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
523 dp->i_flag |= IUPD|ICHG;
524 if (bp)
525 brelse(bp);
526 FREE(nbp, M_NAMEI);
527 /*
528 * We return with the directory locked, so that
529 * the parameters we set up above will still be
530 * valid if we actually decide to do a direnter().
531 * We return NULL to indicate that the entry doesn't
532 * currently exist, leaving a pointer to the (locked)
533 * directory inode in ndp->ni_pdir.
534 */
535 ndp->ni_pdir = dp;
536 return (NULL);
537 }
538 u.u_error = ENOENT;
539 goto bad;
540found:
541 if (numdirpasses == 2)
542 nchstats.ncs_pass2++;
543 /*
544 * Check that directory length properly reflects presence
545 * of this entry.
546 */
547 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
548 dirbad(dp, ndp->ni_offset, "i_size too small");
549 dp->i_size = entryoffsetinblock + DIRSIZ(ep);
550 dp->i_flag |= IUPD|ICHG;
551 }
552
553 /*
554 * Found component in pathname.
555 * If the final component of path name, save information
556 * in the cache as to where the entry was found.
557 */
558 if (*cp == '\0' && flag == LOOKUP) {
559 u.u_ncache.nc_prevoffset = ndp->ni_offset &~ (DIRBLKSIZ - 1);
560 u.u_ncache.nc_inumber = dp->i_number;
561 u.u_ncache.nc_dev = dp->i_dev;
562 }
563 /*
564 * Save directory entry's inode number and reclen in ndp->ni_dent,
565 * and release directory buffer.
566 */
567 ndp->ni_dent.d_ino = ep->d_ino;
568 ndp->ni_dent.d_reclen = ep->d_reclen;
569 brelse(bp);
570 bp = NULL;
571
572 /*
573 * If deleting, and at end of pathname, return
574 * parameters which can be used to remove file.
575 * If the lockparent flag isn't set, we return only
576 * the directory (in ndp->ni_pdir), otherwise we go
577 * on and lock the inode, being careful with ".".
578 */
579 if (flag == DELETE && *cp == 0) {
580 /*
581 * Write access to directory required to delete files.
582 */
583 if (access(dp, IWRITE))
584 goto bad;
585 ndp->ni_pdir = dp; /* for dirremove() */
586 /*
587 * Return pointer to current entry in ndp->ni_offset,
588 * and distance past previous entry (if there
589 * is a previous entry in this block) in ndp->ni_count.
590 * Save directory inode pointer in ndp->ni_pdir for dirremove().
591 */
592 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
593 ndp->ni_count = 0;
594 else
595 ndp->ni_count = ndp->ni_offset - prevoff;
596 if (lockparent) {
597 if (dp->i_number == ndp->ni_dent.d_ino)
598 dp->i_count++;
599 else {
600 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
601 if (dp == NULL) {
602 iput(ndp->ni_pdir);
603 goto bad;
604 }
605 /*
606 * If directory is "sticky", then user must own
607 * the directory, or the file in it, else he
608 * may not delete it (unless he's root). This
609 * implements append-only directories.
610 */
611 if ((ndp->ni_pdir->i_mode & ISVTX) &&
612 u.u_uid != 0 &&
613 u.u_uid != ndp->ni_pdir->i_uid &&
614 dp->i_uid != u.u_uid) {
615 iput(ndp->ni_pdir);
616 u.u_error = EPERM;
617 goto bad;
618 }
619 }
620 }
621 FREE(nbp, M_NAMEI);
622 return (dp);
623 }
624
625 /*
626 * Special handling for ".." allowing chdir out of mounted
627 * file system: indirect .. in root inode to reevaluate
628 * in directory file system was mounted on.
629 */
630 if (isdotdot) {
631 if (dp == u.u_rdir) {
632 ndp->ni_dent.d_ino = dp->i_number;
633 makeentry = 0;
634 } else if (ndp->ni_dent.d_ino == ROOTINO &&
635 dp->i_number == ROOTINO) {
636 for (i = 1; i < NMOUNT; i++)
637 if (mount[i].m_fs != NULL &&
638 mount[i].m_dev == dp->i_dev) {
639 iput(dp);
640 dp = mount[i].m_inodp;
641 ILOCK(dp);
642 dp->i_count++;
643 fs = dp->i_fs;
644 cp -= 2; /* back over .. */
645 goto dirloop2;
646 }
647 }
648 }
649
650 /*
651 * If rewriting (rename), return the inode and the
652 * information required to rewrite the present directory
653 * Must get inode of directory entry to verify it's a
654 * regular file, or empty directory.
655 */
656 if ((flag == CREATE && lockparent) && *cp == 0) {
657 if (access(dp, IWRITE))
658 goto bad;
659 ndp->ni_pdir = dp; /* for dirrewrite() */
660 /*
661 * Careful about locking second inode.
662 * This can only occur if the target is ".".
663 */
664 if (dp->i_number == ndp->ni_dent.d_ino) {
665 u.u_error = EISDIR; /* XXX */
666 goto bad;
667 }
668 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
669 if (dp == NULL) {
670 iput(ndp->ni_pdir);
671 goto bad;
672 }
673 FREE(nbp, M_NAMEI);
674 return (dp);
675 }
676
677 /*
678 * Check for symbolic link, which may require us to massage the
679 * name before we continue translation. We do not `iput' the
680 * directory because we may need it again if the symbolic link
681 * is relative to the current directory. Instead we save it
682 * unlocked as "pdp". We must get the target inode before unlocking
683 * the directory to insure that the inode will not be removed
684 * before we get it. We prevent deadlock by always fetching
685 * inodes from the root, moving down the directory tree. Thus
686 * when following backward pointers ".." we must unlock the
687 * parent directory before getting the requested directory.
688 * There is a potential race condition here if both the current
689 * and parent directories are removed before the `iget' for the
690 * inode associated with ".." returns. We hope that this occurs
691 * infrequently since we cannot avoid this race condition without
692 * implementing a sophisticated deadlock detection algorithm.
693 * Note also that this simple deadlock detection scheme will not
694 * work if the file system has any hard links other than ".."
695 * that point backwards in the directory structure.
696 */
697 pdp = dp;
698 if (isdotdot) {
699 IUNLOCK(pdp); /* race to get the inode */
700 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
701 if (dp == NULL)
702 goto bad2;
703 } else if (dp->i_number == ndp->ni_dent.d_ino) {
704 dp->i_count++; /* we want ourself, ie "." */
705 } else {
706 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
707 IUNLOCK(pdp);
708 if (dp == NULL)
709 goto bad2;
710 }
711
712 /*
713 * Insert name into cache if appropriate.
714 */
715 if (makeentry) {
716 if (ncp != NULL)
717 panic("namei: duplicating cache");
718 /*
719 * Free the cache slot at head of lru chain.
720 */
721 if (ncp = nchhead) {
722 /* remove from lru chain */
723 *ncp->nc_prev = ncp->nc_nxt;
724 if (ncp->nc_nxt)
725 ncp->nc_nxt->nc_prev = ncp->nc_prev;
726 else
727 nchtail = ncp->nc_prev;
728 remque(ncp); /* remove from old hash chain */
729 /* grab the inode we just found */
730 ncp->nc_ip = dp;
731 /* fill in cache info */
732 ncp->nc_ino = pdp->i_number; /* parents inum */
733 ncp->nc_dev = pdp->i_dev; /* & device */
734 ncp->nc_idev = dp->i_dev; /* our device */
735 ncp->nc_id = dp->i_id; /* identifier */
736 ncp->nc_nlen = ndp->ni_dent.d_namlen;
737 bcopy(ndp->ni_dent.d_name, ncp->nc_name,
738 (unsigned)ncp->nc_nlen);
739 /* link at end of lru chain */
740 ncp->nc_nxt = NULL;
741 ncp->nc_prev = nchtail;
742 *nchtail = ncp;
743 nchtail = &ncp->nc_nxt;
744 /* and insert on hash chain */
745 insque(ncp, nhp);
746 }
747 }
748
749haveino:
750 fs = dp->i_fs;
751
752 /*
753 * Check for symbolic link
754 */
755 if ((dp->i_mode & IFMT) == IFLNK &&
756 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) {
757 u_int pathlen = strlen(cp) + 1;
758
759 if (dp->i_size + pathlen >= MAXPATHLEN - 1) {
760 u.u_error = ENAMETOOLONG;
761 goto bad2;
762 }
763 if (++nlink > MAXSYMLINKS) {
764 u.u_error = ELOOP;
765 goto bad2;
766 }
767 ovbcopy(cp, nbp + dp->i_size, pathlen);
768 u.u_error =
769 rdwri(UIO_READ, dp, nbp, (int)dp->i_size,
770 (off_t)0, 1, (int *)0);
771 if (u.u_error)
772 goto bad2;
773 cp = nbp;
774 iput(dp);
775 if (*cp == '/') {
776 irele(pdp);
777 while (*cp == '/')
778 cp++;
779 if ((dp = u.u_rdir) == NULL)
780 dp = rootdir;
781 ILOCK(dp);
782 dp->i_count++;
783 } else {
784 dp = pdp;
785 ILOCK(dp);
786 }
787 fs = dp->i_fs;
788 goto dirloop;
789 }
790
791 /*
792 * Not a symbolic link. If more pathname,
793 * continue at next component, else return.
794 */
795 if (*cp == '/') {
796 while (*cp == '/')
797 cp++;
798 irele(pdp);
799 goto dirloop;
800 }
801 FREE(nbp, M_NAMEI);
802 if (lockparent)
803 ndp->ni_pdir = pdp;
804 else
805 irele(pdp);
806 return (dp);
807bad2:
808 irele(pdp);
809bad:
810 if (bp)
811 brelse(bp);
812 if (dp)
813 iput(dp);
814 FREE(nbp, M_NAMEI);
815 return (NULL);
816}
817
818
819dirbad(ip, offset, how)
820 struct inode *ip;
821 off_t offset;
822 char *how;
823{
824
825 printf("%s: bad dir ino %d at offset %d: %s\n",
826 ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
827}
828
829/*
830 * Do consistency checking on a directory entry:
831 * record length must be multiple of 4
832 * entry must fit in rest of its DIRBLKSIZ block
833 * record must be large enough to contain entry
834 * name is not longer than MAXNAMLEN
835 * name must be as long as advertised, and null terminated
836 */
837dirbadentry(ep, entryoffsetinblock)
838 register struct direct *ep;
839 int entryoffsetinblock;
840{
841 register int i;
842
843 if ((ep->d_reclen & 0x3) != 0 ||
844 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
845 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
846 return (1);
847 for (i = 0; i < ep->d_namlen; i++)
848 if (ep->d_name[i] == '\0')
849 return (1);
850 return (ep->d_name[i]);
851}
852
853/*
854 * Write a directory entry after a call to namei, using the parameters
855 * which it left in the u. area. The argument ip is the inode which
856 * the new directory entry will refer to. The u. area field ndp->ni_pdir is
857 * a pointer to the directory to be written, which was left locked by
858 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
859 * how the space for the new entry is to be gotten.
860 */
861direnter(ip, ndp)
862 struct inode *ip;
863 register struct nameidata *ndp;
864{
865 register struct direct *ep, *nep;
866 register struct inode *dp = ndp->ni_pdir;
867 struct buf *bp;
868 int loc, spacefree, error = 0;
869 u_int dsize;
870 int newentrysize;
871 char *dirbuf;
872
873 ndp->ni_dent.d_ino = ip->i_number;
874 newentrysize = DIRSIZ(&ndp->ni_dent);
875 if (ndp->ni_count == 0) {
876 /*
877 * If ndp->ni_count is 0, then namei could find no space in the
878 * directory. In this case ndp->ni_offset will be on a directory
879 * block boundary and we will write the new entry into a fresh
880 * block.
881 */
882 if (ndp->ni_offset&(DIRBLKSIZ-1))
883 panic("wdir: newblk");
884 ndp->ni_dent.d_reclen = DIRBLKSIZ;
885 error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
886 newentrysize, ndp->ni_offset, 1, (int *)0);
887 if (DIRBLKSIZ > dp->i_fs->fs_fsize)
888 panic("wdir: blksize"); /* XXX - should grow w/bmap() */
889 else
890 dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
891 iput(dp);
892 return (error);
893 }
894
895 /*
896 * If ndp->ni_count is non-zero, then namei found space for the new
897 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
898 * in the directory. To use this space, we may have to compact
899 * the entries located there, by copying them together towards
900 * the beginning of the block, leaving the free space in
901 * one usable chunk at the end.
902 */
903
904 /*
905 * Increase size of directory if entry eats into new space.
906 * This should never push the size past a new multiple of
907 * DIRBLKSIZE.
908 *
909 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
910 */
911 if (ndp->ni_offset + ndp->ni_count > dp->i_size)
912 dp->i_size = ndp->ni_offset + ndp->ni_count;
913 /*
914 * Get the block containing the space for the new directory
915 * entry. Should return error by result instead of u.u_error.
916 */
917 bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf);
918 if (bp == 0) {
919 iput(dp);
920 return (u.u_error);
921 }
922 /*
923 * Find space for the new entry. In the simple case, the
924 * entry at offset base will have the space. If it does
925 * not, then namei arranged that compacting the region
926 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
927 */
928 ep = (struct direct *)dirbuf;
929 dsize = DIRSIZ(ep);
930 spacefree = ep->d_reclen - dsize;
931 for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
932 nep = (struct direct *)(dirbuf + loc);
933 if (ep->d_ino) {
934 /* trim the existing slot */
935 ep->d_reclen = dsize;
936 ep = (struct direct *)((char *)ep + dsize);
937 } else {
938 /* overwrite; nothing there; header is ours */
939 spacefree += dsize;
940 }
941 dsize = DIRSIZ(nep);
942 spacefree += nep->d_reclen - dsize;
943 loc += nep->d_reclen;
944 bcopy((caddr_t)nep, (caddr_t)ep, dsize);
945 }
946 /*
947 * Update the pointer fields in the previous entry (if any),
948 * copy in the new entry, and write out the block.
949 */
950 if (ep->d_ino == 0) {
951 if (spacefree + dsize < newentrysize)
952 panic("wdir: compact1");
953 ndp->ni_dent.d_reclen = spacefree + dsize;
954 } else {
955 if (spacefree < newentrysize)
956 panic("wdir: compact2");
957 ndp->ni_dent.d_reclen = spacefree;
958 ep->d_reclen = dsize;
959 ep = (struct direct *)((char *)ep + dsize);
960 }
961 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
962 bwrite(bp);
963 dp->i_flag |= IUPD|ICHG;
964 if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
965 itrunc(dp, (u_long)ndp->ni_endoff);
966 iput(dp);
967 return (error);
968}
969
970/*
971 * Remove a directory entry after a call to namei, using the
972 * parameters which it left in the u. area. The u. entry
973 * ni_offset contains the offset into the directory of the
974 * entry to be eliminated. The ni_count field contains the
975 * size of the previous record in the directory. If this
976 * is 0, the first entry is being deleted, so we need only
977 * zero the inode number to mark the entry as free. If the
978 * entry isn't the first in the directory, we must reclaim
979 * the space of the now empty record by adding the record size
980 * to the size of the previous entry.
981 */
982dirremove(ndp)
983 register struct nameidata *ndp;
984{
985 register struct inode *dp = ndp->ni_pdir;
986 register struct buf *bp;
987 struct direct *ep;
988
989 if (ndp->ni_count == 0) {
990 /*
991 * First entry in block: set d_ino to zero.
992 */
993 ndp->ni_dent.d_ino = 0;
994 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
995 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
996 } else {
997 /*
998 * Collapse new free space into previous entry.
999 */
1000 bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep);
1001 if (bp == 0)
1002 return (0);
1003 ep->d_reclen += ndp->ni_dent.d_reclen;
1004 bwrite(bp);
1005 dp->i_flag |= IUPD|ICHG;
1006 }
1007 return (1);
1008}
1009
1010/*
1011 * Rewrite an existing directory entry to point at the inode
1012 * supplied. The parameters describing the directory entry are
1013 * set up by a call to namei.
1014 */
1015dirrewrite(dp, ip, ndp)
1016 struct inode *dp, *ip;
1017 struct nameidata *ndp;
1018{
1019
1020 ndp->ni_dent.d_ino = ip->i_number;
1021 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1022 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1023 iput(dp);
1024}
1025
1026/*
1027 * Return buffer with contents of block "offset"
1028 * from the beginning of directory "ip". If "res"
1029 * is non-zero, fill it in with a pointer to the
1030 * remaining space in the directory.
1031 */
1032struct buf *
1033blkatoff(ip, offset, res)
1034 struct inode *ip;
1035 off_t offset;
1036 char **res;
1037{
1038 register struct fs *fs = ip->i_fs;
1039 daddr_t lbn = lblkno(fs, offset);
1040 int bsize = blksize(fs, ip, lbn);
1041 register struct buf *bp;
1042 daddr_t bn;
1043
1044 bn = bmap(ip, lbn, B_READ, bsize);
1045 if (u.u_error)
1046 return (0);
1047 if (bn == (daddr_t)-1) {
1048 dirbad(ip, offset, "hole in dir");
1049 return (0);
1050 }
1051#ifdef SECSIZE
1052 bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize, fs->fs_dbsize);
1053#else SECSIZE
1054 bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize);
1055#endif SECSIZE
1056 if (bp->b_flags & B_ERROR) {
1057 brelse(bp);
1058 return (0);
1059 }
1060 if (res)
1061 *res = bp->b_un.b_addr + blkoff(fs, offset);
1062 return (bp);
1063}
1064
1065/*
1066 * Check if a directory is empty or not.
1067 * Inode supplied must be locked.
1068 *
1069 * Using a struct dirtemplate here is not precisely
1070 * what we want, but better than using a struct direct.
1071 *
1072 * NB: does not handle corrupted directories.
1073 */
1074dirempty(ip, parentino)
1075 register struct inode *ip;
1076 ino_t parentino;
1077{
1078 register off_t off;
1079 struct dirtemplate dbuf;
1080 register struct direct *dp = (struct direct *)&dbuf;
1081 int error, count;
1082#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1083
1084 for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1085 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
1086 off, 1, &count);
1087 /*
1088 * Since we read MINDIRSIZ, residual must
1089 * be 0 unless we're at end of file.
1090 */
1091 if (error || count != 0)
1092 return (0);
1093 /* avoid infinite loops */
1094 if (dp->d_reclen == 0)
1095 return (0);
1096 /* skip empty entries */
1097 if (dp->d_ino == 0)
1098 continue;
1099 /* accept only "." and ".." */
1100 if (dp->d_namlen > 2)
1101 return (0);
1102 if (dp->d_name[0] != '.')
1103 return (0);
1104 /*
1105 * At this point d_namlen must be 1 or 2.
1106 * 1 implies ".", 2 implies ".." if second
1107 * char is also "."
1108 */
1109 if (dp->d_namlen == 1)
1110 continue;
1111 if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1112 continue;
1113 return (0);
1114 }
1115 return (1);
1116}
1117
1118/*
1119 * Check if source directory is in the path of the target directory.
1120 * Target is supplied locked, source is unlocked.
1121 * The target is always iput() before returning.
1122 */
1123checkpath(source, target)
1124 struct inode *source, *target;
1125{
1126 struct dirtemplate dirbuf;
1127 register struct inode *ip;
1128 int error = 0;
1129
1130 ip = target;
1131 if (ip->i_number == source->i_number) {
1132 error = EEXIST;
1133 goto out;
1134 }
1135 if (ip->i_number == ROOTINO)
1136 goto out;
1137
1138 for (;;) {
1139 if ((ip->i_mode&IFMT) != IFDIR) {
1140 error = ENOTDIR;
1141 break;
1142 }
1143 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
1144 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
1145 if (error != 0)
1146 break;
1147 if (dirbuf.dotdot_namlen != 2 ||
1148 dirbuf.dotdot_name[0] != '.' ||
1149 dirbuf.dotdot_name[1] != '.') {
1150 error = ENOTDIR;
1151 break;
1152 }
1153 if (dirbuf.dotdot_ino == source->i_number) {
1154 error = EINVAL;
1155 break;
1156 }
1157 if (dirbuf.dotdot_ino == ROOTINO)
1158 break;
1159 iput(ip);
1160 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
1161 if (ip == NULL) {
1162 error = u.u_error;
1163 break;
1164 }
1165 }
1166
1167out:
1168 if (error == ENOTDIR)
1169 printf("checkpath: .. not a directory\n");
1170 if (ip != NULL)
1171 iput(ip);
1172 return (error);
1173}
1174
1175/*
1176 * Name cache initialization, from main() when we are booting
1177 */
1178nchinit()
1179{
1180 register union nchash *nchp;
1181 register struct namecache *ncp;
1182
1183 nchhead = 0;
1184 nchtail = &nchhead;
1185 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) {
1186 ncp->nc_forw = ncp; /* hash chain */
1187 ncp->nc_back = ncp;
1188 ncp->nc_nxt = NULL; /* lru chain */
1189 *nchtail = ncp;
1190 ncp->nc_prev = nchtail;
1191 nchtail = &ncp->nc_nxt;
1192 /* all else is zero already */
1193 }
1194 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
1195 nchp->nch_head[0] = nchp;
1196 nchp->nch_head[1] = nchp;
1197 }
1198}
1199
1200/*
1201 * Cache flush, called when filesys is umounted to
1202 * remove entries that would now be invalid
1203 *
1204 * The line "nxtcp = nchhead" near the end is to avoid potential problems
1205 * if the cache lru chain is modified while we are dumping the
1206 * inode. This makes the algorithm O(n^2), but do you think I care?
1207 */
1208nchinval(dev)
1209 register dev_t dev;
1210{
1211 register struct namecache *ncp, *nxtcp;
1212
1213 for (ncp = nchhead; ncp; ncp = nxtcp) {
1214 nxtcp = ncp->nc_nxt;
1215 if (ncp->nc_ip == NULL ||
1216 (ncp->nc_idev != dev && ncp->nc_dev != dev))
1217 continue;
1218 /* free the resources we had */
1219 ncp->nc_idev = NODEV;
1220 ncp->nc_dev = NODEV;
1221 ncp->nc_id = NULL;
1222 ncp->nc_ino = 0;
1223 ncp->nc_ip = NULL;
1224 remque(ncp); /* remove entry from its hash chain */
1225 ncp->nc_forw = ncp; /* and make a dummy one */
1226 ncp->nc_back = ncp;
1227 /* delete this entry from LRU chain */
1228 *ncp->nc_prev = nxtcp;
1229 if (nxtcp)
1230 nxtcp->nc_prev = ncp->nc_prev;
1231 else
1232 nchtail = ncp->nc_prev;
1233 /* cause rescan of list, it may have altered */
1234 nxtcp = nchhead;
1235 /* put the now-free entry at head of LRU */
1236 ncp->nc_nxt = nxtcp;
1237 ncp->nc_prev = &nchhead;
1238 nxtcp->nc_prev = &ncp->nc_nxt;
1239 nchhead = ncp;
1240 }
1241}
1242
1243/*
1244 * Name cache invalidation of all entries.
1245 */
1246cacheinvalall()
1247{
1248 register struct namecache *ncp;
1249
1250 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++)
1251 ncp->nc_id = 0;
1252}