Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Guido van Rossum. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | */ | |
36 | ||
37 | #if defined(LIBC_SCCS) && !defined(lint) | |
38 | static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91"; | |
39 | #endif /* LIBC_SCCS and not lint */ | |
40 | ||
41 | /* | |
42 | * glob(3) -- a superset of the one defined in POSIX 1003.2. | |
43 | * | |
44 | * The [!...] convention to negate a range is supported (SysV, Posix, ksh). | |
45 | * | |
46 | * Optional extra services, controlled by flags not defined by POSIX: | |
47 | * | |
48 | * GLOB_QUOTE: | |
49 | * Escaping convention: \ inhibits any special meaning the following | |
50 | * character might have (except \ at end of string is retained). | |
51 | * GLOB_MAGCHAR: | |
52 | * Set in gl_flags if pattern contained a globbing character. | |
53 | * gl_matchc: | |
54 | * Number of matches in the current invocation of glob. | |
55 | */ | |
56 | ||
57 | #include <sys/cdefs.h> | |
58 | #include <sys/param.h> | |
59 | #include <sys/stat.h> | |
60 | #include <dirent.h> | |
61 | #include <glob.h> | |
62 | #include <ctype.h> | |
63 | #include <errno.h> | |
64 | #include <string.h> | |
65 | #include <stdio.h> | |
66 | #include <stdlib.h> | |
67 | ||
68 | #define DOLLAR '$' | |
69 | #define DOT '.' | |
70 | #define EOS '\0' | |
71 | #define LBRACKET '[' | |
72 | #define NOT '!' | |
73 | #define QUESTION '?' | |
74 | #define QUOTE '\\' | |
75 | #define RANGE '-' | |
76 | #define RBRACKET ']' | |
77 | #define SEP '/' | |
78 | #define STAR '*' | |
79 | #define TILDE '~' | |
80 | #define UNDERSCORE '_' | |
81 | ||
82 | #define M_QUOTE 0x8000 | |
83 | #define M_PROTECT 0x4000 | |
84 | #define M_MASK 0xffff | |
85 | #define M_ASCII 0x00ff | |
86 | ||
87 | #define CHAR(c) ((c)&M_ASCII) | |
88 | #define META(c) ((c)|M_QUOTE) | |
89 | #define M_ALL META('*') | |
90 | #define M_END META(']') | |
91 | #define M_NOT META('!') | |
92 | #define M_ONE META('?') | |
93 | #define M_RNG META('-') | |
94 | #define M_SET META('[') | |
95 | #define ismeta(c) (((c)&M_QUOTE) != 0) | |
96 | ||
97 | typedef u_short Char; | |
98 | ||
99 | static int compare __P((const void *, const void *)); | |
100 | static void g_Ctoc __P((Char *, char *)); | |
101 | static int g_lstat __P((Char *, struct stat *)); | |
102 | static DIR *g_opendir __P((Char *)); | |
103 | static Char *g_strchr __P((Char *, int)); | |
104 | static int g_stat __P((Char *, struct stat *)); | |
105 | static int glob1 __P((Char *, glob_t *)); | |
106 | static int glob2 __P((Char *, Char *, Char *, glob_t *)); | |
107 | static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); | |
108 | static int globextend __P((Char *, glob_t *)); | |
109 | static int match __P((Char *, Char *, Char *)); | |
110 | #ifdef DEBUG | |
111 | static void qprintf __P((Char *)); | |
112 | #endif | |
113 | ||
114 | /* | |
115 | * The main glob() routine: compiles the pattern (optionally processing | |
116 | * quotes), calls glob1() to do the real pattern matching, and finally | |
117 | * sorts the list (unless unsorted operation is requested). Returns 0 | |
118 | * if things went well, nonzero if errors occurred. It is not an error | |
119 | * to find no matches. | |
120 | */ | |
121 | glob(pattern, flags, errfunc, pglob) | |
122 | const char *pattern; | |
123 | int flags, (*errfunc) __P((char *, int)); | |
124 | glob_t *pglob; | |
125 | { | |
126 | const u_char *compilepat, *patnext; | |
127 | int c, err, oldpathc; | |
128 | Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; | |
129 | ||
130 | patnext = (u_char *) pattern; | |
131 | if (!(flags & GLOB_APPEND)) { | |
132 | pglob->gl_pathc = 0; | |
133 | pglob->gl_pathv = NULL; | |
134 | if (!(flags & GLOB_DOOFFS)) | |
135 | pglob->gl_offs = 0; | |
136 | } | |
137 | pglob->gl_flags = flags & ~GLOB_MAGCHAR; | |
138 | pglob->gl_errfunc = errfunc; | |
139 | oldpathc = pglob->gl_pathc; | |
140 | pglob->gl_matchc = 0; | |
141 | ||
142 | bufnext = patbuf; | |
143 | bufend = bufnext + MAXPATHLEN; | |
144 | compilebuf = bufnext; | |
145 | compilepat = patnext; | |
146 | if (flags & GLOB_QUOTE) { | |
147 | /* Protect the quoted characters. */ | |
148 | while (bufnext < bufend && (c = *patnext++) != EOS) | |
149 | if (c == QUOTE) { | |
150 | if ((c = *patnext++) == EOS) { | |
151 | c = QUOTE; | |
152 | --patnext; | |
153 | } | |
154 | *bufnext++ = c | M_PROTECT; | |
155 | } | |
156 | else | |
157 | *bufnext++ = c; | |
158 | } | |
159 | else | |
160 | while (bufnext < bufend && (c = *patnext++) != EOS) | |
161 | *bufnext++ = c; | |
162 | *bufnext = EOS; | |
163 | ||
164 | bufnext = patbuf; | |
165 | qpatnext = patbuf; | |
166 | /* We don't need to check for buffer overflow any more. */ | |
167 | while ((c = *qpatnext++) != EOS) { | |
168 | switch (c) { | |
169 | case LBRACKET: | |
170 | pglob->gl_flags |= GLOB_MAGCHAR; | |
171 | c = *qpatnext; | |
172 | if (c == NOT) | |
173 | ++qpatnext; | |
174 | if (*qpatnext == EOS || | |
175 | g_strchr(qpatnext+1, RBRACKET) == NULL) { | |
176 | *bufnext++ = LBRACKET; | |
177 | if (c == NOT) | |
178 | --qpatnext; | |
179 | break; | |
180 | } | |
181 | *bufnext++ = M_SET; | |
182 | if (c == NOT) | |
183 | *bufnext++ = M_NOT; | |
184 | c = *qpatnext++; | |
185 | do { | |
186 | *bufnext++ = CHAR(c); | |
187 | if (*qpatnext == RANGE && | |
188 | (c = qpatnext[1]) != RBRACKET) { | |
189 | *bufnext++ = M_RNG; | |
190 | *bufnext++ = CHAR(c); | |
191 | qpatnext += 2; | |
192 | } | |
193 | } while ((c = *qpatnext++) != RBRACKET); | |
194 | *bufnext++ = M_END; | |
195 | break; | |
196 | case QUESTION: | |
197 | pglob->gl_flags |= GLOB_MAGCHAR; | |
198 | *bufnext++ = M_ONE; | |
199 | break; | |
200 | case STAR: | |
201 | pglob->gl_flags |= GLOB_MAGCHAR; | |
202 | *bufnext++ = M_ALL; | |
203 | break; | |
204 | default: | |
205 | *bufnext++ = CHAR(c); | |
206 | break; | |
207 | } | |
208 | } | |
209 | *bufnext = EOS; | |
210 | #ifdef DEBUG | |
211 | qprintf(patbuf); | |
212 | #endif | |
213 | ||
214 | if ((err = glob1(patbuf, pglob)) != 0) | |
215 | return(err); | |
216 | ||
217 | if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { | |
218 | if (!(flags & GLOB_QUOTE)) { | |
219 | Char *dp = compilebuf; | |
220 | const u_char *sp = compilepat; | |
221 | while (*dp++ = *sp++); | |
222 | } | |
223 | else { | |
224 | /* | |
225 | * Copy pattern, interpreting quotes; this is slightly | |
226 | * different than the interpretation of quotes above | |
227 | * -- which should prevail? | |
228 | */ | |
229 | while (*compilepat != EOS) { | |
230 | if (*compilepat == QUOTE) { | |
231 | if (*++compilepat == EOS) | |
232 | --compilepat; | |
233 | } | |
234 | *compilebuf++ = (u_char)*compilepat++; | |
235 | } | |
236 | *compilebuf = EOS; | |
237 | } | |
238 | return(globextend(patbuf, pglob)); | |
239 | } else if (!(flags & GLOB_NOSORT)) | |
240 | qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, | |
241 | pglob->gl_pathc - oldpathc, sizeof(char *), compare); | |
242 | return(0); | |
243 | } | |
244 | ||
245 | static int | |
246 | compare(p, q) | |
247 | const void *p, *q; | |
248 | { | |
249 | return(strcmp(*(char **)p, *(char **)q)); | |
250 | } | |
251 | ||
252 | static | |
253 | glob1(pattern, pglob) | |
254 | Char *pattern; | |
255 | glob_t *pglob; | |
256 | { | |
257 | Char pathbuf[MAXPATHLEN+1]; | |
258 | ||
259 | /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ | |
260 | if (*pattern == EOS) | |
261 | return(0); | |
262 | return(glob2(pathbuf, pathbuf, pattern, pglob)); | |
263 | } | |
264 | ||
265 | /* | |
266 | * The functions glob2 and glob3 are mutually recursive; there is one level | |
267 | * of recursion for each segment in the pattern that contains one or more | |
268 | * meta characters. | |
269 | */ | |
270 | static | |
271 | glob2(pathbuf, pathend, pattern, pglob) | |
272 | Char *pathbuf, *pathend, *pattern; | |
273 | glob_t *pglob; | |
274 | { | |
275 | struct stat sb; | |
276 | Char *p, *q; | |
277 | int anymeta; | |
278 | ||
279 | /* | |
280 | * Loop over pattern segments until end of pattern or until | |
281 | * segment with meta character found. | |
282 | */ | |
283 | for (anymeta = 0;;) { | |
284 | if (*pattern == EOS) { /* End of pattern? */ | |
285 | *pathend = EOS; | |
286 | if (g_stat(pathbuf, &sb)) | |
287 | return(0); | |
288 | ||
289 | if (((pglob->gl_flags & GLOB_MARK) && | |
290 | pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) | |
291 | || (S_ISLNK(sb.st_mode) && | |
292 | (g_stat(pathbuf, &sb) == 0) && | |
293 | S_ISDIR(sb.st_mode)))) { | |
294 | *pathend++ = SEP; | |
295 | *pathend = EOS; | |
296 | } | |
297 | ++pglob->gl_matchc; | |
298 | return(globextend(pathbuf, pglob)); | |
299 | } | |
300 | ||
301 | /* Find end of next segment, copy tentatively to pathend. */ | |
302 | q = pathend; | |
303 | p = pattern; | |
304 | while (*p != EOS && *p != SEP) { | |
305 | if (ismeta(*p)) | |
306 | anymeta = 1; | |
307 | *q++ = *p++; | |
308 | } | |
309 | ||
310 | if (!anymeta) { /* No expansion, do next segment. */ | |
311 | pathend = q; | |
312 | pattern = p; | |
313 | while (*pattern == SEP) | |
314 | *pathend++ = *pattern++; | |
315 | } else /* Need expansion, recurse. */ | |
316 | return(glob3(pathbuf, pathend, pattern, p, pglob)); | |
317 | } | |
318 | /* NOTREACHED */ | |
319 | } | |
320 | ||
321 | static | |
322 | glob3(pathbuf, pathend, pattern, restpattern, pglob) | |
323 | Char *pathbuf, *pathend, *pattern, *restpattern; | |
324 | glob_t *pglob; | |
325 | { | |
326 | register struct dirent *dp; | |
327 | DIR *dirp; | |
328 | int len, err; | |
329 | ||
330 | *pathend = EOS; | |
331 | errno = 0; | |
332 | ||
333 | if (!(dirp = g_opendir(pathbuf))) | |
334 | /* TODO: don't call for ENOENT or ENOTDIR? */ | |
335 | if (pglob->gl_errfunc && | |
336 | (*pglob->gl_errfunc)(pathbuf, errno) || | |
337 | (pglob->gl_flags & GLOB_ERR)) | |
338 | return(GLOB_ABEND); | |
339 | else | |
340 | return(0); | |
341 | ||
342 | err = 0; | |
343 | ||
344 | /* Search directory for matching names. */ | |
345 | while ((dp = readdir(dirp))) { | |
346 | register u_char *sc; | |
347 | register Char *dc; | |
348 | ||
349 | /* Initial DOT must be matched literally. */ | |
350 | if (dp->d_name[0] == DOT && *pattern != DOT) | |
351 | continue; | |
352 | for (sc = (u_char *) dp->d_name, dc = pathend; | |
353 | *dc++ = *sc++;); | |
354 | if (!match(pathend, pattern, restpattern)) { | |
355 | *pathend = EOS; | |
356 | continue; | |
357 | } | |
358 | err = glob2(pathbuf, --dc, restpattern, pglob); | |
359 | if (err) | |
360 | break; | |
361 | } | |
362 | ||
363 | /* TODO: check error from readdir? */ | |
364 | (void)closedir(dirp); | |
365 | return(err); | |
366 | } | |
367 | ||
368 | ||
369 | /* | |
370 | * Extend the gl_pathv member of a glob_t structure to accomodate a new item, | |
371 | * add the new item, and update gl_pathc. | |
372 | * | |
373 | * This assumes the BSD realloc, which only copies the block when its size | |
374 | * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic | |
375 | * behavior. | |
376 | * | |
377 | * Return 0 if new item added, error code if memory couldn't be allocated. | |
378 | * | |
379 | * Invariant of the glob_t structure: | |
380 | * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and | |
381 | * gl_pathv points to (gl_offs + gl_pathc + 1) items. | |
382 | */ | |
383 | static int | |
384 | globextend(path, pglob) | |
385 | Char *path; | |
386 | glob_t *pglob; | |
387 | { | |
388 | register char **pathv; | |
389 | register int i; | |
390 | u_int newsize; | |
391 | char *copy; | |
392 | Char *p; | |
393 | ||
394 | newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); | |
395 | pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); | |
396 | if (pathv == NULL) | |
397 | return(GLOB_NOSPACE); | |
398 | ||
399 | if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { | |
400 | /* first time around -- clear initial gl_offs items */ | |
401 | pathv += pglob->gl_offs; | |
402 | for (i = pglob->gl_offs; --i >= 0; ) | |
403 | *--pathv = NULL; | |
404 | } | |
405 | pglob->gl_pathv = pathv; | |
406 | ||
407 | for (p = path; *p++;); | |
408 | if ((copy = malloc(p - path)) != NULL) { | |
409 | g_Ctoc(path, copy); | |
410 | pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; | |
411 | } | |
412 | pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; | |
413 | return(copy == NULL ? GLOB_NOSPACE : 0); | |
414 | } | |
415 | ||
416 | ||
417 | /* | |
418 | * pattern matching function for filenames. Each occurrence of the * | |
419 | * pattern causes a recursion level. | |
420 | */ | |
421 | static | |
422 | match(name, pat, patend) | |
423 | register Char *name, *pat, *patend; | |
424 | { | |
425 | int ok, negate_range; | |
426 | Char c, k; | |
427 | ||
428 | while (pat < patend) { | |
429 | c = *pat++; | |
430 | switch (c & M_MASK) { | |
431 | case M_ALL: | |
432 | if (pat == patend) | |
433 | return(1); | |
434 | for (; *name != EOS; ++name) | |
435 | if (match(name, pat, patend)) | |
436 | return(1); | |
437 | return(0); | |
438 | case M_ONE: | |
439 | if (*name++ == EOS) | |
440 | return(0); | |
441 | break; | |
442 | case M_SET: | |
443 | ok = 0; | |
444 | k = *name++; | |
445 | if (negate_range = ((*pat & M_MASK) == M_NOT)) | |
446 | ++pat; | |
447 | while (((c = *pat++) & M_MASK) != M_END) | |
448 | if ((*pat & M_MASK) == M_RNG) { | |
449 | if (c <= k && k <= pat[1]) | |
450 | ok = 1; | |
451 | pat += 2; | |
452 | } else if (c == k) | |
453 | ok = 1; | |
454 | if (ok == negate_range) | |
455 | return(0); | |
456 | break; | |
457 | default: | |
458 | if (*name++ != c) | |
459 | return(0); | |
460 | break; | |
461 | } | |
462 | } | |
463 | return(*name == EOS); | |
464 | } | |
465 | ||
466 | /* Free allocated data belonging to a glob_t structure. */ | |
467 | void | |
468 | globfree(pglob) | |
469 | glob_t *pglob; | |
470 | { | |
471 | register int i; | |
472 | register char **pp; | |
473 | ||
474 | if (pglob->gl_pathv != NULL) { | |
475 | pp = pglob->gl_pathv + pglob->gl_offs; | |
476 | for (i = pglob->gl_pathc; i--; ++pp) | |
477 | if (*pp) | |
478 | free(*pp); | |
479 | free(pglob->gl_pathv); | |
480 | } | |
481 | } | |
482 | ||
483 | static DIR * | |
484 | g_opendir(str) | |
485 | register Char *str; | |
486 | { | |
487 | char buf[MAXPATHLEN]; | |
488 | ||
489 | if (!*str) | |
490 | return(opendir(".")); | |
491 | g_Ctoc(str, buf); | |
492 | return(opendir(buf)); | |
493 | } | |
494 | ||
495 | static int | |
496 | g_lstat(fn, sb) | |
497 | register Char *fn; | |
498 | struct stat *sb; | |
499 | { | |
500 | char buf[MAXPATHLEN]; | |
501 | ||
502 | g_Ctoc(fn, buf); | |
503 | return(lstat(buf, sb)); | |
504 | } | |
505 | ||
506 | static int | |
507 | g_stat(fn, sb) | |
508 | register Char *fn; | |
509 | struct stat *sb; | |
510 | { | |
511 | char buf[MAXPATHLEN]; | |
512 | ||
513 | g_Ctoc(fn, buf); | |
514 | return(stat(buf, sb)); | |
515 | } | |
516 | ||
517 | static Char * | |
518 | g_strchr(str, ch) | |
519 | Char *str; | |
520 | int ch; | |
521 | { | |
522 | do { | |
523 | if (*str == ch) | |
524 | return (str); | |
525 | } while (*str++); | |
526 | return (NULL); | |
527 | } | |
528 | ||
529 | static void | |
530 | g_Ctoc(str, buf) | |
531 | register Char *str; | |
532 | char *buf; | |
533 | { | |
534 | register char *dc; | |
535 | ||
536 | for (dc = buf; *dc++ = *str++;); | |
537 | } | |
538 | ||
539 | #ifdef DEBUG | |
540 | static void | |
541 | qprintf(s) | |
542 | register Char *s; | |
543 | { | |
544 | register Char *p; | |
545 | ||
546 | for (p = s; *p; p++) | |
547 | (void)printf("%c", *p & 0xff); | |
548 | (void)printf("\n"); | |
549 | for (p = s; *p; p++) | |
550 | (void)printf("%c", *p & M_PROTECT ? '"' : ' '); | |
551 | (void)printf("\n"); | |
552 | for (p = s; *p; p++) | |
553 | (void)printf("%c", *p & M_META ? '_' : ' '); | |
554 | (void)printf("\n"); | |
555 | } | |
556 | #endif |