X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/68791ee2c22f1fd8b0fa121f804b8d9e036bac06..f1d6a550c8b18c1ff8b21c5facf75e227c20258b:/usr/src/usr.bin/find/find.c diff --git a/usr/src/usr.bin/find/find.c b/usr/src/usr.bin/find/find.c index 8e035f0f95..31b5632f18 100644 --- a/usr/src/usr.bin/find/find.c +++ b/usr/src/usr.bin/find/find.c @@ -1,7 +1,9 @@ -static char *sccsid = "@(#)find.c 4.2 (Berkeley) %G%"; +static char *sccsid = "@(#)find.c 4.8 (Berkeley) %G%"; + /* find COMPILE: cc -o find -s -O -i find.c -lS */ + #include -#include +#include #include #include #define A_DAY 86400L /* a day full of seconds */ @@ -41,13 +43,48 @@ char Home[128]; long Blocks; char *rindex(); char *sbrk(); -main(argc, argv) char *argv[]; + +/* + * SEE ALSO: find.squeeze, find.bigram.c, find.code.c + * Usenix ;login:, February/March, 1983, p. 8. + * + * REVISIONS: James A. Woods, Informatics General Corporation, + * NASA Ames Research Center, 6/81. + * + * The second form searches a pre-computed filelist + * (constructed nightly by /usr/lib/crontab) which is + * compressed by find.squeeze (v.i.z.) The effect of + * find + * is similar to + * find / +0 -name "**" -print + * but much faster. + * + * 8/82 faster yet + incorporation of bigram coding -- jaw + * + * 1/83 incorporate glob-style matching -- jaw + */ +#define AMES 1 + +main(argc, argv) + int argc; + char *argv[]; { struct anode *exlist; int paths; register char *cp, *sp = 0; FILE *pwd, *popen(); +#ifdef AMES + if (argc < 2) { + fprintf(stderr, + "Usage: find name, or find path-list predicate-list\n"); + exit(1); + } + if (argc == 2) { + fastfind(argv[1]); + exit(0); + } +#endif time(&Now); pwd = popen("pwd", "r"); fgets(Home, 128, pwd); @@ -181,7 +218,7 @@ struct anode *e3() { /* parse parens and predicates */ return(mk(ino, (struct anode *)atoi(b), (struct anode *)s)); else if(EQ(a, "-group")) { if((i=getunum("/etc/group", b)) == -1) { - if(gmatch(b, "[0-9][0-9]*")) + if(gmatch(b, "[0-9]*")) return mk(group, (struct anode *)atoi(b), (struct anode *)s); fprintf(stderr, "find: cannot find -group name\n"); exit(1); @@ -203,7 +240,8 @@ struct anode *e3() { /* parse parens and predicates */ i = s=='d' ? S_IFDIR : s=='b' ? S_IFBLK : s=='c' ? S_IFCHR : - s=='f' ? 0100000 : + s=='f' ? S_IFREG : + s=='l' ? S_IFLNK : 0; return(mk(type, (struct anode *)i, (struct anode *)0)); } @@ -456,8 +494,14 @@ doex(com) } nargv[np] = 0; if (np==0) return(9); - if(fork()) /*parent*/ wait(&ccode); - else { /*child*/ + if(fork()) /*parent*/ { +#include + int (*old)() = signal(SIGINT, SIG_IGN); + int (*oldq)() = signal(SIGQUIT, SIG_IGN); + wait(&ccode); + signal(SIGINT, old); + signal(SIGQUIT, oldq); + } else { /*child*/ chdir(Home); execvp(nargv[0], nargv, np); exit(1); @@ -498,22 +542,16 @@ getunum(f, s) char *f, *s; { /* find user/group name and return number */ } descend(name, fname, exlist) -struct anode *exlist; -char *name, *fname; + struct anode *exlist; + char *name, *fname; { - int dir = 0, /* open directory */ - offset, - dsize, - entries, - dirsize; - struct direct dentry[BUFSIZ / sizeof (struct direct)]; + DIR *dir = NULL; register struct direct *dp; - register char *c1, *c2; - int i; + register char *c1; int rv = 0; char *endofname; - if(stat(fname, &Statb)<0) { + if (lstat(fname, &Statb)<0) { fprintf(stderr, "find: bad status < %s >\n", name); return(0); } @@ -521,74 +559,39 @@ char *name, *fname; if((Statb.st_mode&S_IFMT)!=S_IFDIR) return(1); - for(c1 = name; *c1; ++c1); - if(*(c1-1) == '/') + for (c1 = name; *c1; ++c1); + if (*(c1-1) == '/') --c1; endofname = c1; - dirsize = Statb.st_size; - if(chdir(fname) == -1) + if (chdir(fname) == -1) return(0); - for(offset=0 ; offset < dirsize ; offset += BUFSIZ) { /* each block */ - dsize = BUFSIZ<(dirsize-offset)? BUFSIZ: (dirsize-offset); - if(!dir) { - if((dir=open(".", 0))<0) { - fprintf(stderr, "find: cannot open < %s >\n", - name); - rv = 0; - goto ret; - } - if(offset) lseek(dir, (long)offset, 0); - if(read(dir, (char *)dentry, dsize)<0) { - fprintf(stderr, "find: cannot read < %s >\n", - name); - rv = 0; - goto ret; - } - if(dir > 10) { - close(dir); - dir = 0; - } - } else - if(read(dir, (char *)dentry, dsize)<0) { - fprintf(stderr, "find: cannot read < %s >\n", - name); - rv = 0; - goto ret; - } - for(dp=dentry, entries=dsize>>4; entries; --entries, ++dp) { /* each directory entry */ - if(dp->d_ino==0 - || (dp->d_name[0]=='.' && dp->d_name[1]=='\0') - || (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0')) - continue; - c1 = endofname; - *c1++ = '/'; - c2 = dp->d_name; - for(i=0; i<14; ++i) - if(*c2) - *c1++ = *c2++; - else - break; - *c1 = '\0'; - if(c1 == endofname) { /* ?? */ - rv = 0; - goto ret; - } - Fname = endofname+1; - if(!descend(name, Fname, exlist)) { - *endofname = '\0'; - chdir(Home); - if(chdir(Pathname) == -1) { - fprintf(stderr, "find: bad directory tree\n"); - exit(1); - } + if ((dir = opendir(".")) == NULL) { + fprintf(stderr, "find: cannot open < %s >\n", name); + rv = 0; + goto ret; + } + for (dp = readdir(dir); dp != NULL; dp = readdir(dir)) { + if ((dp->d_name[0]=='.' && dp->d_name[1]=='\0') || + (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0')) + continue; + c1 = endofname; + *c1++ = '/'; + strcpy(c1, dp->d_name); + Fname = endofname+1; + if(!descend(name, Fname, exlist)) { + *endofname = '\0'; + chdir(Home); + if(chdir(Pathname) == -1) { + fprintf(stderr, "find: bad directory tree\n"); + exit(1); } } } rv = 1; ret: if(dir) - close(dir); + closedir(dir); if(chdir("..") == -1) { *endofname = '\0'; fprintf(stderr, "find: bad directory <%s>\n", name); @@ -707,3 +710,119 @@ again: } return f; } + +#ifdef AMES +/* + * 'fastfind' scans a file list for the full pathname of a file + * given only a piece of the name. The list has been processed with + * with "front-compression" and bigram coding. Front compression reduces + * space by a factor of 4-5, bigram coding by a further 20-25%. + * The codes are: + * + * 0-28 likeliest differential counts + offset to make nonnegative + * 30 escape code for out-of-range count to follow in next word + * 128-255 bigram codes, (128 most common, as determined by 'find.squeeze') + * 32-127 single character (printable) ascii residue + * + * A novel two-tiered string search technique is employed: + * + * First, a metacharacter-free subpattern and partial pathname is + * matched BACKWARDS to avoid full expansion of the pathname list. + * The time savings is 40-50% over forward matching, which cannot efficiently + * handle overlapped search patterns and compressed path residue. + * + * Then, the actual shell glob-style regular expression (if in this form) + * is matched against the candidate pathnames using the slower routines + * provided in the standard 'find'. + */ + +#define FCODES "/etc/find.codes" +#define YES 1 +#define NO 0 +#define OFFSET 14 +#define ESCCODE 30 + +fastfind ( pathpart ) + char pathpart[]; +{ + register char *p, *s; + register int c; + char *q, *index(), *patprep(); + int i, count, globflag; + FILE *fp, *fopen(); + char *patend, *cutoff; + char path[1024]; + char bigram1[128], bigram2[128]; + int found = NO; + + if ( (fp = fopen ( FCODES, "r" )) == NULL ) { + fprintf ( "find: can't open %s\n", FCODES ); + exit ( 1 ); + } + for ( i = 0; i < 128; i++ ) + bigram1[i] = getc ( fp ), bigram2[i] = getc ( fp ); + + if ( index ( pathpart, '*' ) || index ( pathpart, '?' ) || index ( pathpart, '[' ) ) + globflag = YES; + patend = patprep ( pathpart ); + + c = getc ( fp ); + for ( ; ; ) { + + count += ( (c == ESCCODE) ? getw ( fp ) : c ) - OFFSET; + + for ( p = path + count; (c = getc ( fp )) > ESCCODE; ) /* overlay old path */ + if ( c < 0200 ) + *p++ = c; + else /* bigrams are parity-marked */ + *p++ = bigram1[c & 0177], *p++ = bigram2[c & 0177]; + if ( c == EOF ) + break; + *p-- = NULL; + cutoff = ( found ? path : path + count); + + for ( found = NO, s = p; s >= cutoff; s-- ) + if ( *s == *patend ) { /* fast first char check */ + for ( p = patend - 1, q = s - 1; *p != NULL; p--, q-- ) + if ( *q != *p ) + break; + if ( *p == NULL ) { /* success on fast match */ + found = YES; + if ( globflag == NO || amatch ( path, pathpart ) ) + puts ( path ); + break; + } + } + } +} + +/* + extract first glob-free subpattern for fast pre-match; + prepend NULL for backwards match; return end of pattern +*/ +static char globfree[100]; + +char * +patprep ( p ) + register char *p; +{ + register char *subp = globfree; + + while ( *p == '*' || *p == '?' ) + p++; + if ( *p == '[' ) { + while ( *p != ']' && *p != NULL ) + p++; + p++; + } + *subp++ = NULL; + if ( *p != NULL ) /* copy first noglob string */ + while ( *p != '*' && *p != '?' && *p != '[' && *p != NULL && + subp < (globfree + sizeof(globfree)) ) + *subp++ = *p++; + else /* pattern has noglob chars only */ + *subp++ = '/'; /* ... check every path */ + *subp = NULL; + return ( --subp ); +} +#endif