X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/68791ee2c22f1fd8b0fa121f804b8d9e036bac06..f1d6a550c8b18c1ff8b21c5facf75e227c20258b:/usr/src/usr.bin/find/find.c

diff --git a/usr/src/usr.bin/find/find.c b/usr/src/usr.bin/find/find.c
index 8e035f0f95..31b5632f18 100644
--- a/usr/src/usr.bin/find/find.c
+++ b/usr/src/usr.bin/find/find.c
@@ -1,7 +1,9 @@
-static char *sccsid = "@(#)find.c	4.2 (Berkeley) %G%";
+static char *sccsid = "@(#)find.c	4.8 (Berkeley) %G%";
+
 /*	find	COMPILE:	cc -o find -s -O -i find.c -lS	*/
+
 #include <stdio.h>
-#include <sys/types.h>
+#include <sys/param.h>
 #include <sys/dir.h>
 #include <sys/stat.h>
 #define A_DAY	86400L /* a day full of seconds */
@@ -41,13 +43,48 @@ char	Home[128];
 long	Blocks;
 char *rindex();
 char *sbrk();
-main(argc, argv) char *argv[];
+
+/*
+ * SEE ALSO:	find.squeeze, find.bigram.c, find.code.c
+ *		Usenix ;login:, February/March, 1983, p. 8.
+ *
+ * REVISIONS: 	James A. Woods, Informatics General Corporation,
+ *		NASA Ames Research Center, 6/81.
+ *
+ *		The second form searches a pre-computed filelist
+ *		(constructed nightly by /usr/lib/crontab) which is
+ *		compressed by find.squeeze (v.i.z.)  The effect of
+ *			find <name>
+ *		is similar to
+ *			find / +0 -name "*<name>*" -print
+ *		but much faster.
+ *
+ *		8/82 faster yet + incorporation of bigram coding -- jaw
+ *
+ *		1/83 incorporate glob-style matching -- jaw
+ */
+#define	AMES	1
+
+main(argc, argv)
+	int argc;
+	char *argv[];
 {
 	struct anode *exlist;
 	int paths;
 	register char *cp, *sp = 0;
 	FILE *pwd, *popen();
 
+#ifdef  AMES
+	if (argc < 2) {
+		fprintf(stderr,
+			"Usage: find name, or find path-list predicate-list\n");
+		exit(1);
+	}
+	if (argc == 2) {
+		fastfind(argv[1]);
+		exit(0);
+	}
+#endif
 	time(&Now);
 	pwd = popen("pwd", "r");
 	fgets(Home, 128, pwd);
@@ -181,7 +218,7 @@ struct anode *e3() { /* parse parens and predicates */
 		return(mk(ino, (struct anode *)atoi(b), (struct anode *)s));
 	else if(EQ(a, "-group")) {
 		if((i=getunum("/etc/group", b)) == -1) {
-			if(gmatch(b, "[0-9][0-9]*"))
+			if(gmatch(b, "[0-9]*"))
 				return mk(group, (struct anode *)atoi(b), (struct anode *)s);
 			fprintf(stderr, "find: cannot find -group name\n");
 			exit(1);
@@ -203,7 +240,8 @@ struct anode *e3() { /* parse parens and predicates */
 		i = s=='d' ? S_IFDIR :
 		    s=='b' ? S_IFBLK :
 		    s=='c' ? S_IFCHR :
-		    s=='f' ? 0100000 :
+		    s=='f' ? S_IFREG :
+		    s=='l' ? S_IFLNK :
 		    0;
 		return(mk(type, (struct anode *)i, (struct anode *)0));
 	}
@@ -456,8 +494,14 @@ doex(com)
 	}
 	nargv[np] = 0;
 	if (np==0) return(9);
-	if(fork()) /*parent*/ wait(&ccode);
-	else { /*child*/
+	if(fork()) /*parent*/ {
+#include <signal.h>
+		int (*old)() = signal(SIGINT, SIG_IGN);
+		int (*oldq)() = signal(SIGQUIT, SIG_IGN);
+		wait(&ccode);
+		signal(SIGINT, old);
+		signal(SIGQUIT, oldq);
+	} else { /*child*/
 		chdir(Home);
 		execvp(nargv[0], nargv, np);
 		exit(1);
@@ -498,22 +542,16 @@ getunum(f, s) char *f, *s; { /* find user/group name and return number */
 }
 
 descend(name, fname, exlist)
-struct anode *exlist;
-char *name, *fname;
+	struct anode *exlist;
+	char *name, *fname;
 {
-	int	dir = 0, /* open directory */
-		offset,
-		dsize,
-		entries,
-		dirsize;
-	struct direct dentry[BUFSIZ / sizeof (struct direct)];
+	DIR	*dir = NULL;
 	register struct direct	*dp;
-	register char *c1, *c2;
-	int i;
+	register char *c1;
 	int rv = 0;
 	char *endofname;
 
-	if(stat(fname, &Statb)<0) {
+	if (lstat(fname, &Statb)<0) {
 		fprintf(stderr, "find: bad status < %s >\n", name);
 		return(0);
 	}
@@ -521,74 +559,39 @@ char *name, *fname;
 	if((Statb.st_mode&S_IFMT)!=S_IFDIR)
 		return(1);
 
-	for(c1 = name; *c1; ++c1);
-	if(*(c1-1) == '/')
+	for (c1 = name; *c1; ++c1);
+	if (*(c1-1) == '/')
 		--c1;
 	endofname = c1;
-	dirsize = Statb.st_size;
 
-	if(chdir(fname) == -1)
+	if (chdir(fname) == -1)
 		return(0);
-	for(offset=0 ; offset < dirsize ; offset += BUFSIZ) { /* each block */
-		dsize = BUFSIZ<(dirsize-offset)? BUFSIZ: (dirsize-offset);
-		if(!dir) {
-			if((dir=open(".", 0))<0) {
-				fprintf(stderr, "find: cannot open < %s >\n",
-					name);
-				rv = 0;
-				goto ret;
-			}
-			if(offset) lseek(dir, (long)offset, 0);
-			if(read(dir, (char *)dentry, dsize)<0) {
-				fprintf(stderr, "find: cannot read < %s >\n",
-					name);
-				rv = 0;
-				goto ret;
-			}
-			if(dir > 10) {
-				close(dir);
-				dir = 0;
-			}
-		} else 
-			if(read(dir, (char *)dentry, dsize)<0) {
-				fprintf(stderr, "find: cannot read < %s >\n",
-					name);
-				rv = 0;
-				goto ret;
-			}
-		for(dp=dentry, entries=dsize>>4; entries; --entries, ++dp) { /* each directory entry */
-			if(dp->d_ino==0
-			|| (dp->d_name[0]=='.' && dp->d_name[1]=='\0')
-			|| (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0'))
-				continue;
-			c1 = endofname;
-			*c1++ = '/';
-			c2 = dp->d_name;
-			for(i=0; i<14; ++i)
-				if(*c2)
-					*c1++ = *c2++;
-				else
-					break;
-			*c1 = '\0';
-			if(c1 == endofname) { /* ?? */
-				rv = 0;
-				goto ret;
-			}
-			Fname = endofname+1;
-			if(!descend(name, Fname, exlist)) {
-				*endofname = '\0';
-				chdir(Home);
-				if(chdir(Pathname) == -1) {
-					fprintf(stderr, "find: bad directory tree\n");
-					exit(1);
-				}
+	if ((dir = opendir(".")) == NULL) {
+		fprintf(stderr, "find: cannot open < %s >\n", name);
+		rv = 0;
+		goto ret;
+	}
+	for (dp = readdir(dir); dp != NULL; dp = readdir(dir)) {
+		if ((dp->d_name[0]=='.' && dp->d_name[1]=='\0') ||
+		    (dp->d_name[0]=='.' && dp->d_name[1]=='.' && dp->d_name[2]=='\0'))
+			continue;
+		c1 = endofname;
+		*c1++ = '/';
+		strcpy(c1, dp->d_name);
+		Fname = endofname+1;
+		if(!descend(name, Fname, exlist)) {
+			*endofname = '\0';
+			chdir(Home);
+			if(chdir(Pathname) == -1) {
+				fprintf(stderr, "find: bad directory tree\n");
+				exit(1);
 			}
 		}
 	}
 	rv = 1;
 ret:
 	if(dir)
-		close(dir);
+		closedir(dir);
 	if(chdir("..") == -1) {
 		*endofname = '\0';
 		fprintf(stderr, "find: bad directory <%s>\n", name);
@@ -707,3 +710,119 @@ again:
 	}
 	return f;
 }
+
+#ifdef	AMES
+/*
+ * 'fastfind' scans a file list for the full pathname of a file
+ * given only a piece of the name.  The list has been processed with
+ * with "front-compression" and bigram coding.  Front compression reduces
+ * space by a factor of 4-5, bigram coding by a further 20-25%.
+ * The codes are:
+ *
+ *	0-28	likeliest differential counts + offset to make nonnegative 
+ *	30	escape code for out-of-range count to follow in next word
+ *	128-255 bigram codes, (128 most common, as determined by 'find.squeeze')
+ *	32-127  single character (printable) ascii residue
+ *
+ * A novel two-tiered string search technique is employed: 
+ *
+ * First, a metacharacter-free subpattern and partial pathname is
+ * matched BACKWARDS to avoid full expansion of the pathname list.
+ * The time savings is 40-50% over forward matching, which cannot efficiently
+ * handle overlapped search patterns and compressed path residue.
+ *
+ * Then, the actual shell glob-style regular expression (if in this form)
+ * is matched against the candidate pathnames using the slower routines
+ * provided in the standard 'find'.
+ */
+
+#define	FCODES 	"/etc/find.codes"
+#define	YES	1
+#define	NO	0
+#define	OFFSET	14
+#define	ESCCODE	30
+
+fastfind ( pathpart )	
+	char pathpart[];
+{
+	register char *p, *s;
+	register int c; 
+	char *q, *index(), *patprep();
+	int i, count, globflag;
+	FILE *fp, *fopen();
+	char *patend, *cutoff;
+	char path[1024];
+	char bigram1[128], bigram2[128];
+	int found = NO;
+
+	if ( (fp = fopen ( FCODES, "r" )) == NULL ) {
+		fprintf ( "find: can't open %s\n", FCODES );
+		exit ( 1 );
+	}
+	for ( i = 0; i < 128; i++ ) 
+		bigram1[i] = getc ( fp ),  bigram2[i] = getc ( fp );
+	
+	if ( index ( pathpart, '*' ) || index ( pathpart, '?' ) || index ( pathpart, '[' ) )
+		globflag = YES;
+	patend = patprep ( pathpart );
+
+	c = getc ( fp );
+	for ( ; ; ) {
+
+		count += ( (c == ESCCODE) ? getw ( fp ) : c ) - OFFSET;
+
+		for ( p = path + count; (c = getc ( fp )) > ESCCODE; )	/* overlay old path */
+			if ( c < 0200 )	
+				*p++ = c;
+			else		/* bigrams are parity-marked */
+				*p++ = bigram1[c & 0177],  *p++ = bigram2[c & 0177];
+		if ( c == EOF )
+			break;
+		*p-- = NULL;
+		cutoff = ( found ? path : path + count);
+
+		for ( found = NO, s = p; s >= cutoff; s-- ) 
+			if ( *s == *patend ) {		/* fast first char check */
+				for ( p = patend - 1, q = s - 1; *p != NULL; p--, q-- )
+					if ( *q != *p )
+						break;
+				if ( *p == NULL ) {	/* success on fast match */
+					found = YES;
+					if ( globflag == NO || amatch ( path, pathpart ) )
+						puts ( path );
+					break;
+				}
+			}
+	}
+}
+
+/*
+    extract first glob-free subpattern for fast pre-match;
+    prepend NULL for backwards match; return end of pattern
+*/
+static char globfree[100];
+
+char *
+patprep ( p )
+	register char *p;
+{
+	register char *subp = globfree;
+
+	while ( *p == '*' || *p == '?' )
+		p++;
+	if ( *p == '[' ) {
+		while ( *p != ']' && *p != NULL )
+			p++;
+		p++;
+	}
+	*subp++ = NULL;
+	if ( *p != NULL )		/* copy first noglob string */
+		while ( *p != '*' && *p != '?' && *p != '[' && *p != NULL &&
+		      subp < (globfree + sizeof(globfree)) )
+			*subp++ = *p++;
+	else				/* pattern has noglob chars only */
+		*subp++ = '/';		/* ... check every path */
+	*subp = NULL;
+	return ( --subp );
+}
+#endif