-static char *sccsid = "@(#)find.c 4.2 (Berkeley) %G%";
+static char *sccsid = "@(#)find.c 4.8 (Berkeley) %G%";
+
/* find COMPILE: cc -o find -s -O -i find.c -lS */
+
#include <stdio.h>
-#include <sys/types.h>
+#include <sys/param.h>
#include <sys/dir.h>
#include <sys/stat.h>
#define A_DAY 86400L /* a day full of seconds */
long Blocks;
char *rindex();
char *sbrk();
-main(argc, argv) char *argv[];
+
+/*
+ * SEE ALSO: find.squeeze, find.bigram.c, find.code.c
+ * Usenix ;login:, February/March, 1983, p. 8.
+ *
+ * REVISIONS: James A. Woods, Informatics General Corporation,
+ * NASA Ames Research Center, 6/81.
+ *
+ * The second form searches a pre-computed filelist
+ * (constructed nightly by /usr/lib/crontab) which is
+ * compressed by find.squeeze (v.i.z.) The effect of
+ * find <name>
+ * is similar to
+ * find / +0 -name "*<name>*" -print
+ * but much faster.
+ *
+ * 8/82 faster yet + incorporation of bigram coding -- jaw
+ *
+ * 1/83 incorporate glob-style matching -- jaw
+ */
+#define AMES 1
+
+main(argc, argv)
+ int argc;
+ char *argv[];
{
struct anode *exlist;
int paths;
register char *cp, *sp = 0;
FILE *pwd, *popen();
+#ifdef AMES
+ if (argc < 2) {
+ fprintf(stderr,
+ "Usage: find name, or find path-list predicate-list\n");
+ exit(1);
+ }
+ if (argc == 2) {
+ fastfind(argv[1]);
+ exit(0);
+ }
+#endif
time(&Now);
pwd = popen("pwd", "r");
fgets(Home, 128, pwd);
return(mk(ino, (struct anode *)atoi(b), (struct anode *)s));
else if(EQ(a, "-group")) {
if((i=getunum("/etc/group", b)) == -1) {
- if(gmatch(b, "[0-9][0-9]*"))
+ if(gmatch(b, "[0-9]*"))
return mk(group, (struct anode *)atoi(b), (struct anode *)s);
fprintf(stderr, "find: cannot find -group name\n");
exit(1);
i = s=='d' ? S_IFDIR :
s=='b' ? S_IFBLK :
s=='c' ? S_IFCHR :
- s=='f' ? 0100000 :
+ s=='f' ? S_IFREG :
+ s=='l' ? S_IFLNK :
0;
return(mk(type, (struct anode *)i, (struct anode *)0));
}
}
nargv[np] = 0;
if (np==0) return(9);
- if(fork()) /*parent*/ wait(&ccode);
- else { /*child*/
+ if(fork()) /*parent*/ {
+#include <signal.h>
+ int (*old)() = signal(SIGINT, SIG_IGN);
+ int (*oldq)() = signal(SIGQUIT, SIG_IGN);
+ wait(&ccode);
+ signal(SIGINT, old);
+ signal(SIGQUIT, oldq);
+ } else { /*child*/
chdir(Home);
execvp(nargv[0], nargv, np);
exit(1);
}
descend(name, fname, exlist)
-struct anode *exlist;
-char *name, *fname;
+ struct anode *exlist;
+ char *name, *fname;
{
- int dir = 0, /* open directory */
- offset,
- dsize,
- entries,
- dirsize;
- struct direct dentry[BUFSIZ / sizeof (struct direct)];
+ DIR *dir = NULL;
register struct direct *dp;
- register char *c1, *c2;
- int i;
+ register char *c1;
int rv = 0;
char *endofname;
- if(stat(fname, &Statb)<0) {
+ if (lstat(fname, &Statb)<0) {
fprintf(stderr, "find: bad status < %s >\n", name);
return(0);
}
if((Statb.st_mode&S_IFMT)!=S_IFDIR)
return(1);
- for(c1 = name; *c1; ++c1);
- if(*(c1-1) == '/')
+ for (c1 = name; *c1; ++c1);
+ if (*(c1-1) == '/')
--c1;
endofname = c1;
- dirsize = Statb.st_size;
- if(chdir(fname) == -1)
+ if (chdir(fname) == -1)
return(0);
- for(offset=0 ; offset < dirsize ; offset += BUFSIZ) { /* each block */
- dsize = BUFSIZ<(dirsize-offset)? BUFSIZ: (dirsize-offset);
- if(!dir) {
- if((dir=open(".", 0))<0) {
- fprintf(stderr, "find: cannot open < %s >\n",
- name);
- rv = 0;
- goto ret;
- }
- if(offset) lseek(dir, (long)offset, 0);
- if(read(dir, (char *)dentry, dsize)<0) {
- fprintf(stderr, "find: cannot read < %s >\n",
- name);
- rv = 0;
- goto ret;
- }
- if(dir > 10) {
- close(dir);
- dir = 0;
- }
- } else
- if(read(dir, (char *)dentry, dsize)<0) {
- fprintf(stderr, "find: cannot read < %s >\n",
- name);
- rv = 0;
- goto ret;
- }
- for(dp=dentry, entries=dsize>>4; entries; --entries, ++dp) { /* each directory entry */
- if(dp->d_ino==0
- || (dp->d_name[0]=='.' && dp->d_name[1]=='\0')
- || (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0'))
- continue;
- c1 = endofname;
- *c1++ = '/';
- c2 = dp->d_name;
- for(i=0; i<14; ++i)
- if(*c2)
- *c1++ = *c2++;
- else
- break;
- *c1 = '\0';
- if(c1 == endofname) { /* ?? */
- rv = 0;
- goto ret;
- }
- Fname = endofname+1;
- if(!descend(name, Fname, exlist)) {
- *endofname = '\0';
- chdir(Home);
- if(chdir(Pathname) == -1) {
- fprintf(stderr, "find: bad directory tree\n");
- exit(1);
- }
+ if ((dir = opendir(".")) == NULL) {
+ fprintf(stderr, "find: cannot open < %s >\n", name);
+ rv = 0;
+ goto ret;
+ }
+ for (dp = readdir(dir); dp != NULL; dp = readdir(dir)) {
+ if ((dp->d_name[0]=='.' && dp->d_name[1]=='\0') ||
+ (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0'))
+ continue;
+ c1 = endofname;
+ *c1++ = '/';
+ strcpy(c1, dp->d_name);
+ Fname = endofname+1;
+ if(!descend(name, Fname, exlist)) {
+ *endofname = '\0';
+ chdir(Home);
+ if(chdir(Pathname) == -1) {
+ fprintf(stderr, "find: bad directory tree\n");
+ exit(1);
}
}
}
rv = 1;
ret:
if(dir)
- close(dir);
+ closedir(dir);
if(chdir("..") == -1) {
*endofname = '\0';
fprintf(stderr, "find: bad directory <%s>\n", name);
}
return f;
}
+
+#ifdef AMES
+/*
+ * 'fastfind' scans a file list for the full pathname of a file
+ * given only a piece of the name. The list has been processed with
+ * with "front-compression" and bigram coding. Front compression reduces
+ * space by a factor of 4-5, bigram coding by a further 20-25%.
+ * The codes are:
+ *
+ * 0-28 likeliest differential counts + offset to make nonnegative
+ * 30 escape code for out-of-range count to follow in next word
+ * 128-255 bigram codes, (128 most common, as determined by 'find.squeeze')
+ * 32-127 single character (printable) ascii residue
+ *
+ * A novel two-tiered string search technique is employed:
+ *
+ * First, a metacharacter-free subpattern and partial pathname is
+ * matched BACKWARDS to avoid full expansion of the pathname list.
+ * The time savings is 40-50% over forward matching, which cannot efficiently
+ * handle overlapped search patterns and compressed path residue.
+ *
+ * Then, the actual shell glob-style regular expression (if in this form)
+ * is matched against the candidate pathnames using the slower routines
+ * provided in the standard 'find'.
+ */
+
+#define FCODES "/etc/find.codes"
+#define YES 1
+#define NO 0
+#define OFFSET 14
+#define ESCCODE 30
+
+fastfind ( pathpart )
+ char pathpart[];
+{
+ register char *p, *s;
+ register int c;
+ char *q, *index(), *patprep();
+ int i, count, globflag;
+ FILE *fp, *fopen();
+ char *patend, *cutoff;
+ char path[1024];
+ char bigram1[128], bigram2[128];
+ int found = NO;
+
+ if ( (fp = fopen ( FCODES, "r" )) == NULL ) {
+ fprintf ( "find: can't open %s\n", FCODES );
+ exit ( 1 );
+ }
+ for ( i = 0; i < 128; i++ )
+ bigram1[i] = getc ( fp ), bigram2[i] = getc ( fp );
+
+ if ( index ( pathpart, '*' ) || index ( pathpart, '?' ) || index ( pathpart, '[' ) )
+ globflag = YES;
+ patend = patprep ( pathpart );
+
+ c = getc ( fp );
+ for ( ; ; ) {
+
+ count += ( (c == ESCCODE) ? getw ( fp ) : c ) - OFFSET;
+
+ for ( p = path + count; (c = getc ( fp )) > ESCCODE; ) /* overlay old path */
+ if ( c < 0200 )
+ *p++ = c;
+ else /* bigrams are parity-marked */
+ *p++ = bigram1[c & 0177], *p++ = bigram2[c & 0177];
+ if ( c == EOF )
+ break;
+ *p-- = NULL;
+ cutoff = ( found ? path : path + count);
+
+ for ( found = NO, s = p; s >= cutoff; s-- )
+ if ( *s == *patend ) { /* fast first char check */
+ for ( p = patend - 1, q = s - 1; *p != NULL; p--, q-- )
+ if ( *q != *p )
+ break;
+ if ( *p == NULL ) { /* success on fast match */
+ found = YES;
+ if ( globflag == NO || amatch ( path, pathpart ) )
+ puts ( path );
+ break;
+ }
+ }
+ }
+}
+
+/*
+ extract first glob-free subpattern for fast pre-match;
+ prepend NULL for backwards match; return end of pattern
+*/
+static char globfree[100];
+
+char *
+patprep ( p )
+ register char *p;
+{
+ register char *subp = globfree;
+
+ while ( *p == '*' || *p == '?' )
+ p++;
+ if ( *p == '[' ) {
+ while ( *p != ']' && *p != NULL )
+ p++;
+ p++;
+ }
+ *subp++ = NULL;
+ if ( *p != NULL ) /* copy first noglob string */
+ while ( *p != '*' && *p != '?' && *p != '[' && *p != NULL &&
+ subp < (globfree + sizeof(globfree)) )
+ *subp++ = *p++;
+ else /* pattern has noglob chars only */
+ *subp++ = '/'; /* ... check every path */
+ *subp = NULL;
+ return ( --subp );
+}
+#endif