Added new 4.4 assert that Chris Torek posted to the net.
[unix-history] / lib / libc / gen / glob.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#if defined(LIBC_SCCS) && !defined(lint)
38static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91";
39#endif /* LIBC_SCCS and not lint */
40
41/*
42 * glob(3) -- a superset of the one defined in POSIX 1003.2.
43 *
44 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
45 *
46 * Optional extra services, controlled by flags not defined by POSIX:
47 *
48 * GLOB_QUOTE:
49 * Escaping convention: \ inhibits any special meaning the following
50 * character might have (except \ at end of string is retained).
51 * GLOB_MAGCHAR:
52 * Set in gl_flags if pattern contained a globbing character.
53 * gl_matchc:
54 * Number of matches in the current invocation of glob.
55 */
56
57#include <sys/cdefs.h>
58#include <sys/param.h>
59#include <sys/stat.h>
60#include <dirent.h>
61#include <glob.h>
62#include <ctype.h>
63#include <errno.h>
64#include <string.h>
65#include <stdio.h>
66#include <stdlib.h>
67
68#define DOLLAR '$'
69#define DOT '.'
70#define EOS '\0'
71#define LBRACKET '['
72#define NOT '!'
73#define QUESTION '?'
74#define QUOTE '\\'
75#define RANGE '-'
76#define RBRACKET ']'
77#define SEP '/'
78#define STAR '*'
79#define TILDE '~'
80#define UNDERSCORE '_'
81
82#define M_QUOTE 0x8000
83#define M_PROTECT 0x4000
84#define M_MASK 0xffff
85#define M_ASCII 0x00ff
86
87#define CHAR(c) ((c)&M_ASCII)
88#define META(c) ((c)|M_QUOTE)
89#define M_ALL META('*')
90#define M_END META(']')
91#define M_NOT META('!')
92#define M_ONE META('?')
93#define M_RNG META('-')
94#define M_SET META('[')
95#define ismeta(c) (((c)&M_QUOTE) != 0)
96
97typedef u_short Char;
98
99static int compare __P((const void *, const void *));
100static void g_Ctoc __P((Char *, char *));
101static int g_lstat __P((Char *, struct stat *));
102static DIR *g_opendir __P((Char *));
103static Char *g_strchr __P((Char *, int));
104static int g_stat __P((Char *, struct stat *));
105static int glob1 __P((Char *, glob_t *));
106static int glob2 __P((Char *, Char *, Char *, glob_t *));
107static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
108static int globextend __P((Char *, glob_t *));
109static int match __P((Char *, Char *, Char *));
110#ifdef DEBUG
111static void qprintf __P((Char *));
112#endif
113
114/*
115 * The main glob() routine: compiles the pattern (optionally processing
116 * quotes), calls glob1() to do the real pattern matching, and finally
117 * sorts the list (unless unsorted operation is requested). Returns 0
118 * if things went well, nonzero if errors occurred. It is not an error
119 * to find no matches.
120 */
121glob(pattern, flags, errfunc, pglob)
122 const char *pattern;
123 int flags, (*errfunc) __P((char *, int));
124 glob_t *pglob;
125{
126 const u_char *compilepat, *patnext;
127 int c, err, oldpathc;
128 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
129
130 patnext = (u_char *) pattern;
131 if (!(flags & GLOB_APPEND)) {
132 pglob->gl_pathc = 0;
133 pglob->gl_pathv = NULL;
134 if (!(flags & GLOB_DOOFFS))
135 pglob->gl_offs = 0;
136 }
137 pglob->gl_flags = flags & ~GLOB_MAGCHAR;
138 pglob->gl_errfunc = errfunc;
139 oldpathc = pglob->gl_pathc;
140 pglob->gl_matchc = 0;
141
142 bufnext = patbuf;
143 bufend = bufnext + MAXPATHLEN;
144 compilebuf = bufnext;
145 compilepat = patnext;
146 if (flags & GLOB_QUOTE) {
147 /* Protect the quoted characters. */
148 while (bufnext < bufend && (c = *patnext++) != EOS)
149 if (c == QUOTE) {
150 if ((c = *patnext++) == EOS) {
151 c = QUOTE;
152 --patnext;
153 }
154 *bufnext++ = c | M_PROTECT;
155 }
156 else
157 *bufnext++ = c;
158 }
159 else
160 while (bufnext < bufend && (c = *patnext++) != EOS)
161 *bufnext++ = c;
162 *bufnext = EOS;
163
164 bufnext = patbuf;
165 qpatnext = patbuf;
166 /* We don't need to check for buffer overflow any more. */
167 while ((c = *qpatnext++) != EOS) {
168 switch (c) {
169 case LBRACKET:
170 pglob->gl_flags |= GLOB_MAGCHAR;
171 c = *qpatnext;
172 if (c == NOT)
173 ++qpatnext;
174 if (*qpatnext == EOS ||
175 g_strchr(qpatnext+1, RBRACKET) == NULL) {
176 *bufnext++ = LBRACKET;
177 if (c == NOT)
178 --qpatnext;
179 break;
180 }
181 *bufnext++ = M_SET;
182 if (c == NOT)
183 *bufnext++ = M_NOT;
184 c = *qpatnext++;
185 do {
186 *bufnext++ = CHAR(c);
187 if (*qpatnext == RANGE &&
188 (c = qpatnext[1]) != RBRACKET) {
189 *bufnext++ = M_RNG;
190 *bufnext++ = CHAR(c);
191 qpatnext += 2;
192 }
193 } while ((c = *qpatnext++) != RBRACKET);
194 *bufnext++ = M_END;
195 break;
196 case QUESTION:
197 pglob->gl_flags |= GLOB_MAGCHAR;
198 *bufnext++ = M_ONE;
199 break;
200 case STAR:
201 pglob->gl_flags |= GLOB_MAGCHAR;
202 *bufnext++ = M_ALL;
203 break;
204 default:
205 *bufnext++ = CHAR(c);
206 break;
207 }
208 }
209 *bufnext = EOS;
210#ifdef DEBUG
211 qprintf(patbuf);
212#endif
213
214 if ((err = glob1(patbuf, pglob)) != 0)
215 return(err);
216
217 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
218 if (!(flags & GLOB_QUOTE)) {
219 Char *dp = compilebuf;
220 const u_char *sp = compilepat;
221 while (*dp++ = *sp++);
222 }
223 else {
224 /*
225 * Copy pattern, interpreting quotes; this is slightly
226 * different than the interpretation of quotes above
227 * -- which should prevail?
228 */
229 while (*compilepat != EOS) {
230 if (*compilepat == QUOTE) {
231 if (*++compilepat == EOS)
232 --compilepat;
233 }
234 *compilebuf++ = (u_char)*compilepat++;
235 }
236 *compilebuf = EOS;
237 }
238 return(globextend(patbuf, pglob));
239 } else if (!(flags & GLOB_NOSORT))
240 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
241 pglob->gl_pathc - oldpathc, sizeof(char *), compare);
242 return(0);
243}
244
245static int
246compare(p, q)
247 const void *p, *q;
248{
249 return(strcmp(*(char **)p, *(char **)q));
250}
251
252static
253glob1(pattern, pglob)
254 Char *pattern;
255 glob_t *pglob;
256{
257 Char pathbuf[MAXPATHLEN+1];
258
259 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
260 if (*pattern == EOS)
261 return(0);
262 return(glob2(pathbuf, pathbuf, pattern, pglob));
263}
264
265/*
266 * The functions glob2 and glob3 are mutually recursive; there is one level
267 * of recursion for each segment in the pattern that contains one or more
268 * meta characters.
269 */
270static
271glob2(pathbuf, pathend, pattern, pglob)
272 Char *pathbuf, *pathend, *pattern;
273 glob_t *pglob;
274{
275 struct stat sb;
276 Char *p, *q;
277 int anymeta;
278
279 /*
280 * Loop over pattern segments until end of pattern or until
281 * segment with meta character found.
282 */
283 for (anymeta = 0;;) {
284 if (*pattern == EOS) { /* End of pattern? */
285 *pathend = EOS;
286 if (g_stat(pathbuf, &sb))
287 return(0);
288
289 if (((pglob->gl_flags & GLOB_MARK) &&
290 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
291 || (S_ISLNK(sb.st_mode) &&
292 (g_stat(pathbuf, &sb) == 0) &&
293 S_ISDIR(sb.st_mode)))) {
294 *pathend++ = SEP;
295 *pathend = EOS;
296 }
297 ++pglob->gl_matchc;
298 return(globextend(pathbuf, pglob));
299 }
300
301 /* Find end of next segment, copy tentatively to pathend. */
302 q = pathend;
303 p = pattern;
304 while (*p != EOS && *p != SEP) {
305 if (ismeta(*p))
306 anymeta = 1;
307 *q++ = *p++;
308 }
309
310 if (!anymeta) { /* No expansion, do next segment. */
311 pathend = q;
312 pattern = p;
313 while (*pattern == SEP)
314 *pathend++ = *pattern++;
315 } else /* Need expansion, recurse. */
316 return(glob3(pathbuf, pathend, pattern, p, pglob));
317 }
318 /* NOTREACHED */
319}
320
321static
322glob3(pathbuf, pathend, pattern, restpattern, pglob)
323 Char *pathbuf, *pathend, *pattern, *restpattern;
324 glob_t *pglob;
325{
326 register struct dirent *dp;
327 DIR *dirp;
328 int len, err;
329
330 *pathend = EOS;
331 errno = 0;
332
333 if (!(dirp = g_opendir(pathbuf)))
334 /* TODO: don't call for ENOENT or ENOTDIR? */
335 if (pglob->gl_errfunc &&
336 (*pglob->gl_errfunc)(pathbuf, errno) ||
337 (pglob->gl_flags & GLOB_ERR))
338 return(GLOB_ABEND);
339 else
340 return(0);
341
342 err = 0;
343
344 /* Search directory for matching names. */
345 while ((dp = readdir(dirp))) {
346 register u_char *sc;
347 register Char *dc;
348
349 /* Initial DOT must be matched literally. */
350 if (dp->d_name[0] == DOT && *pattern != DOT)
351 continue;
352 for (sc = (u_char *) dp->d_name, dc = pathend;
353 *dc++ = *sc++;);
354 if (!match(pathend, pattern, restpattern)) {
355 *pathend = EOS;
356 continue;
357 }
358 err = glob2(pathbuf, --dc, restpattern, pglob);
359 if (err)
360 break;
361 }
362
363 /* TODO: check error from readdir? */
364 (void)closedir(dirp);
365 return(err);
366}
367
368
369/*
370 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
371 * add the new item, and update gl_pathc.
372 *
373 * This assumes the BSD realloc, which only copies the block when its size
374 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
375 * behavior.
376 *
377 * Return 0 if new item added, error code if memory couldn't be allocated.
378 *
379 * Invariant of the glob_t structure:
380 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
381 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
382 */
383static int
384globextend(path, pglob)
385 Char *path;
386 glob_t *pglob;
387{
388 register char **pathv;
389 register int i;
390 u_int newsize;
391 char *copy;
392 Char *p;
393
394 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
395 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
396 if (pathv == NULL)
397 return(GLOB_NOSPACE);
398
399 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
400 /* first time around -- clear initial gl_offs items */
401 pathv += pglob->gl_offs;
402 for (i = pglob->gl_offs; --i >= 0; )
403 *--pathv = NULL;
404 }
405 pglob->gl_pathv = pathv;
406
407 for (p = path; *p++;);
408 if ((copy = malloc(p - path)) != NULL) {
409 g_Ctoc(path, copy);
410 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
411 }
412 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
413 return(copy == NULL ? GLOB_NOSPACE : 0);
414}
415
416
417/*
418 * pattern matching function for filenames. Each occurrence of the *
419 * pattern causes a recursion level.
420 */
421static
422match(name, pat, patend)
423 register Char *name, *pat, *patend;
424{
425 int ok, negate_range;
426 Char c, k;
427
428 while (pat < patend) {
429 c = *pat++;
430 switch (c & M_MASK) {
431 case M_ALL:
432 if (pat == patend)
433 return(1);
434 for (; *name != EOS; ++name)
435 if (match(name, pat, patend))
436 return(1);
437 return(0);
438 case M_ONE:
439 if (*name++ == EOS)
440 return(0);
441 break;
442 case M_SET:
443 ok = 0;
444 k = *name++;
445 if (negate_range = ((*pat & M_MASK) == M_NOT))
446 ++pat;
447 while (((c = *pat++) & M_MASK) != M_END)
448 if ((*pat & M_MASK) == M_RNG) {
449 if (c <= k && k <= pat[1])
450 ok = 1;
451 pat += 2;
452 } else if (c == k)
453 ok = 1;
454 if (ok == negate_range)
455 return(0);
456 break;
457 default:
458 if (*name++ != c)
459 return(0);
460 break;
461 }
462 }
463 return(*name == EOS);
464}
465
466/* Free allocated data belonging to a glob_t structure. */
467void
468globfree(pglob)
469 glob_t *pglob;
470{
471 register int i;
472 register char **pp;
473
474 if (pglob->gl_pathv != NULL) {
475 pp = pglob->gl_pathv + pglob->gl_offs;
476 for (i = pglob->gl_pathc; i--; ++pp)
477 if (*pp)
478 free(*pp);
479 free(pglob->gl_pathv);
480 }
481}
482
483static DIR *
484g_opendir(str)
485 register Char *str;
486{
487 char buf[MAXPATHLEN];
488
489 if (!*str)
490 return(opendir("."));
491 g_Ctoc(str, buf);
492 return(opendir(buf));
493}
494
495static int
496g_lstat(fn, sb)
497 register Char *fn;
498 struct stat *sb;
499{
500 char buf[MAXPATHLEN];
501
502 g_Ctoc(fn, buf);
503 return(lstat(buf, sb));
504}
505
506static int
507g_stat(fn, sb)
508 register Char *fn;
509 struct stat *sb;
510{
511 char buf[MAXPATHLEN];
512
513 g_Ctoc(fn, buf);
514 return(stat(buf, sb));
515}
516
517static Char *
518g_strchr(str, ch)
519 Char *str;
520 int ch;
521{
522 do {
523 if (*str == ch)
524 return (str);
525 } while (*str++);
526 return (NULL);
527}
528
529static void
530g_Ctoc(str, buf)
531 register Char *str;
532 char *buf;
533{
534 register char *dc;
535
536 for (dc = buf; *dc++ = *str++;);
537}
538
539#ifdef DEBUG
540static void
541qprintf(s)
542 register Char *s;
543{
544 register Char *p;
545
546 for (p = s; *p; p++)
547 (void)printf("%c", *p & 0xff);
548 (void)printf("\n");
549 for (p = s; *p; p++)
550 (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
551 (void)printf("\n");
552 for (p = s; *p; p++)
553 (void)printf("%c", *p & M_META ? '_' : ' ');
554 (void)printf("\n");
555}
556#endif