no logout time for reboot; minor format change
[unix-history] / usr / src / usr.bin / find / find.c
index 8e035f0..c3de45f 100644 (file)
@@ -1,7 +1,7 @@
-static char *sccsid = "@(#)find.c      4.2 (Berkeley) %G%";
-/*     find    COMPILE:        cc -o find -s -O -i find.c -lS  */
+static char *sccsid = "@(#)find.c      4.9 (Berkeley) %G%";
+
 #include <stdio.h>
 #include <stdio.h>
-#include <sys/types.h>
+#include <sys/param.h>
 #include <sys/dir.h>
 #include <sys/stat.h>
 #define A_DAY  86400L /* a day full of seconds */
 #include <sys/dir.h>
 #include <sys/stat.h>
 #define A_DAY  86400L /* a day full of seconds */
@@ -41,13 +41,48 @@ char        Home[128];
 long   Blocks;
 char *rindex();
 char *sbrk();
 long   Blocks;
 char *rindex();
 char *sbrk();
-main(argc, argv) char *argv[];
+
+/*
+ * SEE ALSO:   updatedb, bigram.c, code.c
+ *             Usenix ;login:, February/March, 1983, p. 8.
+ *
+ * REVISIONS:  James A. Woods, Informatics General Corporation,
+ *             NASA Ames Research Center, 6/81.
+ *
+ *             The second form searches a pre-computed filelist
+ *             (constructed nightly by /usr/lib/crontab) which is
+ *             compressed by updatedb (v.i.z.)  The effect of
+ *                     find <name>
+ *             is similar to
+ *                     find / +0 -name "*<name>*" -print
+ *             but much faster.
+ *
+ *             8/82 faster yet + incorporation of bigram coding -- jaw
+ *
+ *             1/83 incorporate glob-style matching -- jaw
+ */
+#define        AMES    1
+
+main(argc, argv)
+       int argc;
+       char *argv[];
 {
        struct anode *exlist;
        int paths;
        register char *cp, *sp = 0;
        FILE *pwd, *popen();
 
 {
        struct anode *exlist;
        int paths;
        register char *cp, *sp = 0;
        FILE *pwd, *popen();
 
+#ifdef  AMES
+       if (argc < 2) {
+               fprintf(stderr,
+                       "Usage: find name, or find path-list predicate-list\n");
+               exit(1);
+       }
+       if (argc == 2) {
+               fastfind(argv[1]);
+               exit(0);
+       }
+#endif
        time(&Now);
        pwd = popen("pwd", "r");
        fgets(Home, 128, pwd);
        time(&Now);
        pwd = popen("pwd", "r");
        fgets(Home, 128, pwd);
@@ -181,7 +216,7 @@ struct anode *e3() { /* parse parens and predicates */
                return(mk(ino, (struct anode *)atoi(b), (struct anode *)s));
        else if(EQ(a, "-group")) {
                if((i=getunum("/etc/group", b)) == -1) {
                return(mk(ino, (struct anode *)atoi(b), (struct anode *)s));
        else if(EQ(a, "-group")) {
                if((i=getunum("/etc/group", b)) == -1) {
-                       if(gmatch(b, "[0-9][0-9]*"))
+                       if(gmatch(b, "[0-9]*"))
                                return mk(group, (struct anode *)atoi(b), (struct anode *)s);
                        fprintf(stderr, "find: cannot find -group name\n");
                        exit(1);
                                return mk(group, (struct anode *)atoi(b), (struct anode *)s);
                        fprintf(stderr, "find: cannot find -group name\n");
                        exit(1);
@@ -203,7 +238,8 @@ struct anode *e3() { /* parse parens and predicates */
                i = s=='d' ? S_IFDIR :
                    s=='b' ? S_IFBLK :
                    s=='c' ? S_IFCHR :
                i = s=='d' ? S_IFDIR :
                    s=='b' ? S_IFBLK :
                    s=='c' ? S_IFCHR :
-                   s=='f' ? 0100000 :
+                   s=='f' ? S_IFREG :
+                   s=='l' ? S_IFLNK :
                    0;
                return(mk(type, (struct anode *)i, (struct anode *)0));
        }
                    0;
                return(mk(type, (struct anode *)i, (struct anode *)0));
        }
@@ -456,8 +492,14 @@ doex(com)
        }
        nargv[np] = 0;
        if (np==0) return(9);
        }
        nargv[np] = 0;
        if (np==0) return(9);
-       if(fork()) /*parent*/ wait(&ccode);
-       else { /*child*/
+       if(fork()) /*parent*/ {
+#include <signal.h>
+               int (*old)() = signal(SIGINT, SIG_IGN);
+               int (*oldq)() = signal(SIGQUIT, SIG_IGN);
+               wait(&ccode);
+               signal(SIGINT, old);
+               signal(SIGQUIT, oldq);
+       } else { /*child*/
                chdir(Home);
                execvp(nargv[0], nargv, np);
                exit(1);
                chdir(Home);
                execvp(nargv[0], nargv, np);
                exit(1);
@@ -498,22 +540,16 @@ getunum(f, s) char *f, *s; { /* find user/group name and return number */
 }
 
 descend(name, fname, exlist)
 }
 
 descend(name, fname, exlist)
-struct anode *exlist;
-char *name, *fname;
+       struct anode *exlist;
+       char *name, *fname;
 {
 {
-       int     dir = 0, /* open directory */
-               offset,
-               dsize,
-               entries,
-               dirsize;
-       struct direct dentry[BUFSIZ / sizeof (struct direct)];
+       DIR     *dir = NULL;
        register struct direct  *dp;
        register struct direct  *dp;
-       register char *c1, *c2;
-       int i;
+       register char *c1;
        int rv = 0;
        char *endofname;
 
        int rv = 0;
        char *endofname;
 
-       if(stat(fname, &Statb)<0) {
+       if (lstat(fname, &Statb)<0) {
                fprintf(stderr, "find: bad status < %s >\n", name);
                return(0);
        }
                fprintf(stderr, "find: bad status < %s >\n", name);
                return(0);
        }
@@ -521,74 +557,39 @@ char *name, *fname;
        if((Statb.st_mode&S_IFMT)!=S_IFDIR)
                return(1);
 
        if((Statb.st_mode&S_IFMT)!=S_IFDIR)
                return(1);
 
-       for(c1 = name; *c1; ++c1);
-       if(*(c1-1) == '/')
+       for (c1 = name; *c1; ++c1);
+       if (*(c1-1) == '/')
                --c1;
        endofname = c1;
                --c1;
        endofname = c1;
-       dirsize = Statb.st_size;
 
 
-       if(chdir(fname) == -1)
+       if (chdir(fname) == -1)
                return(0);
                return(0);
-       for(offset=0 ; offset < dirsize ; offset += BUFSIZ) { /* each block */
-               dsize = BUFSIZ<(dirsize-offset)? BUFSIZ: (dirsize-offset);
-               if(!dir) {
-                       if((dir=open(".", 0))<0) {
-                               fprintf(stderr, "find: cannot open < %s >\n",
-                                       name);
-                               rv = 0;
-                               goto ret;
-                       }
-                       if(offset) lseek(dir, (long)offset, 0);
-                       if(read(dir, (char *)dentry, dsize)<0) {
-                               fprintf(stderr, "find: cannot read < %s >\n",
-                                       name);
-                               rv = 0;
-                               goto ret;
-                       }
-                       if(dir > 10) {
-                               close(dir);
-                               dir = 0;
-                       }
-               } else 
-                       if(read(dir, (char *)dentry, dsize)<0) {
-                               fprintf(stderr, "find: cannot read < %s >\n",
-                                       name);
-                               rv = 0;
-                               goto ret;
-                       }
-               for(dp=dentry, entries=dsize>>4; entries; --entries, ++dp) { /* each directory entry */
-                       if(dp->d_ino==0
-                       || (dp->d_name[0]=='.' && dp->d_name[1]=='\0')
-                       || (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0'))
-                               continue;
-                       c1 = endofname;
-                       *c1++ = '/';
-                       c2 = dp->d_name;
-                       for(i=0; i<14; ++i)
-                               if(*c2)
-                                       *c1++ = *c2++;
-                               else
-                                       break;
-                       *c1 = '\0';
-                       if(c1 == endofname) { /* ?? */
-                               rv = 0;
-                               goto ret;
-                       }
-                       Fname = endofname+1;
-                       if(!descend(name, Fname, exlist)) {
-                               *endofname = '\0';
-                               chdir(Home);
-                               if(chdir(Pathname) == -1) {
-                                       fprintf(stderr, "find: bad directory tree\n");
-                                       exit(1);
-                               }
+       if ((dir = opendir(".")) == NULL) {
+               fprintf(stderr, "find: cannot open < %s >\n", name);
+               rv = 0;
+               goto ret;
+       }
+       for (dp = readdir(dir); dp != NULL; dp = readdir(dir)) {
+               if ((dp->d_name[0]=='.' && dp->d_name[1]=='\0') ||
+                   (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0'))
+                       continue;
+               c1 = endofname;
+               *c1++ = '/';
+               strcpy(c1, dp->d_name);
+               Fname = endofname+1;
+               if(!descend(name, Fname, exlist)) {
+                       *endofname = '\0';
+                       chdir(Home);
+                       if(chdir(Pathname) == -1) {
+                               fprintf(stderr, "find: bad directory tree\n");
+                               exit(1);
                        }
                }
        }
        rv = 1;
 ret:
        if(dir)
                        }
                }
        }
        rv = 1;
 ret:
        if(dir)
-               close(dir);
+               closedir(dir);
        if(chdir("..") == -1) {
                *endofname = '\0';
                fprintf(stderr, "find: bad directory <%s>\n", name);
        if(chdir("..") == -1) {
                *endofname = '\0';
                fprintf(stderr, "find: bad directory <%s>\n", name);
@@ -707,3 +708,119 @@ again:
        }
        return f;
 }
        }
        return f;
 }
+
+#ifdef AMES
+/*
+ * 'fastfind' scans a file list for the full pathname of a file
+ * given only a piece of the name.  The list has been processed with
+ * with "front-compression" and bigram coding.  Front compression reduces
+ * space by a factor of 4-5, bigram coding by a further 20-25%.
+ * The codes are:
+ *
+ *     0-28    likeliest differential counts + offset to make nonnegative 
+ *     30      escape code for out-of-range count to follow in next word
+ *     128-255 bigram codes, (128 most common, as determined by 'updatedb')
+ *     32-127  single character (printable) ascii residue
+ *
+ * A novel two-tiered string search technique is employed: 
+ *
+ * First, a metacharacter-free subpattern and partial pathname is
+ * matched BACKWARDS to avoid full expansion of the pathname list.
+ * The time savings is 40-50% over forward matching, which cannot efficiently
+ * handle overlapped search patterns and compressed path residue.
+ *
+ * Then, the actual shell glob-style regular expression (if in this form)
+ * is matched against the candidate pathnames using the slower routines
+ * provided in the standard 'find'.
+ */
+
+#define        FCODES  "/usr/lib/find/find.codes"
+#define        YES     1
+#define        NO      0
+#define        OFFSET  14
+#define        ESCCODE 30
+
+fastfind ( pathpart )  
+       char pathpart[];
+{
+       register char *p, *s;
+       register int c; 
+       char *q, *index(), *patprep();
+       int i, count, globflag;
+       FILE *fp, *fopen();
+       char *patend, *cutoff;
+       char path[1024];
+       char bigram1[128], bigram2[128];
+       int found = NO;
+
+       if ( (fp = fopen ( FCODES, "r" )) == NULL ) {
+               fprintf ( "find: can't open %s\n", FCODES );
+               exit ( 1 );
+       }
+       for ( i = 0; i < 128; i++ ) 
+               bigram1[i] = getc ( fp ),  bigram2[i] = getc ( fp );
+       
+       if ( index ( pathpart, '*' ) || index ( pathpart, '?' ) || index ( pathpart, '[' ) )
+               globflag = YES;
+       patend = patprep ( pathpart );
+
+       c = getc ( fp );
+       for ( ; ; ) {
+
+               count += ( (c == ESCCODE) ? getw ( fp ) : c ) - OFFSET;
+
+               for ( p = path + count; (c = getc ( fp )) > ESCCODE; )  /* overlay old path */
+                       if ( c < 0200 ) 
+                               *p++ = c;
+                       else            /* bigrams are parity-marked */
+                               *p++ = bigram1[c & 0177],  *p++ = bigram2[c & 0177];
+               if ( c == EOF )
+                       break;
+               *p-- = NULL;
+               cutoff = ( found ? path : path + count);
+
+               for ( found = NO, s = p; s >= cutoff; s-- ) 
+                       if ( *s == *patend ) {          /* fast first char check */
+                               for ( p = patend - 1, q = s - 1; *p != NULL; p--, q-- )
+                                       if ( *q != *p )
+                                               break;
+                               if ( *p == NULL ) {     /* success on fast match */
+                                       found = YES;
+                                       if ( globflag == NO || amatch ( path, pathpart ) )
+                                               puts ( path );
+                                       break;
+                               }
+                       }
+       }
+}
+
+/*
+    extract first glob-free subpattern for fast pre-match;
+    prepend NULL for backwards match; return end of pattern
+*/
+static char globfree[100];
+
+char *
+patprep ( p )
+       register char *p;
+{
+       register char *subp = globfree;
+
+       while ( *p == '*' || *p == '?' )
+               p++;
+       if ( *p == '[' ) {
+               while ( *p != ']' && *p != NULL )
+                       p++;
+               p++;
+       }
+       *subp++ = NULL;
+       if ( *p != NULL )               /* copy first noglob string */
+               while ( *p != '*' && *p != '?' && *p != '[' && *p != NULL &&
+                     subp < (globfree + sizeof(globfree)) )
+                       *subp++ = *p++;
+       else                            /* pattern has noglob chars only */
+               *subp++ = '/';          /* ... check every path */
+       *subp = NULL;
+       return ( --subp );
+}
+#endif