Commit | Line | Data |
---|---|---|
bde95929 KB |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Guido van Rossum. | |
7 | * | |
269a7923 | 8 | * %sccs.include.redist.c% |
bde95929 KB |
9 | */ |
10 | ||
11 | #if defined(LIBC_SCCS) && !defined(lint) | |
baed3ae0 | 12 | static char sccsid[] = "@(#)glob.c 5.6 (Berkeley) %G%"; |
bde95929 KB |
13 | #endif /* LIBC_SCCS and not lint */ |
14 | ||
15 | /* | |
16 | * Glob: the interface is a superset of the one defined in POSIX 1003.2, | |
17 | * draft 9. | |
18 | * | |
19 | * The [!...] convention to negate a range is supported (SysV, Posix, ksh). | |
20 | * | |
21 | * Optional extra services, controlled by flags not defined by POSIX: | |
f6ec3c7f KB |
22 | * |
23 | * GLOB_QUOTE: | |
24 | * Escaping convention: \ inhibits any special meaning the following | |
25 | * character might have (except \ at end of string is retained). | |
26 | * GLOB_MAGCHAR: | |
27 | * Set in gl_flags is pattern contained a globbing character. | |
28 | * gl_matchc: | |
29 | * Number of matches in the current invocation of glob. | |
bde95929 KB |
30 | */ |
31 | ||
32 | #include <sys/param.h> | |
33 | #include <sys/stat.h> | |
34 | #include <dirent.h> | |
35 | #include <glob.h> | |
36 | #include <ctype.h> | |
37 | #include <errno.h> | |
38 | #include <string.h> | |
39 | #include <stdio.h> | |
c5980113 | 40 | #include <stdlib.h> |
bde95929 KB |
41 | |
42 | typedef int bool_t; | |
43 | ||
c5980113 DS |
44 | static glob1(); |
45 | static glob2(); | |
46 | static glob3(); | |
47 | static globextend(); | |
48 | static bool_t match(); | |
49 | ||
bde95929 KB |
50 | #define DOLLAR '$' |
51 | #define DOT '.' | |
52 | #define EOS '\0' | |
53 | #define LBRACKET '[' | |
54 | #define NOT '!' | |
55 | #define QUESTION '?' | |
56 | #define QUOTE '\\' | |
57 | #define RANGE '-' | |
58 | #define RBRACKET ']' | |
59 | #define SEP '/' | |
60 | #define STAR '*' | |
61 | #define TILDE '~' | |
62 | #define UNDERSCORE '_' | |
63 | ||
64 | #define METABIT 0x80 | |
65 | #define META(c) ((c)|METABIT) | |
66 | #define M_ALL META('*') | |
67 | #define M_END META(']') | |
68 | #define M_NOT META('!') | |
69 | #define M_ONE META('?') | |
70 | #define M_RNG META('-') | |
71 | #define M_SET META('[') | |
72 | #define ismeta(c) (((c)&METABIT) != 0) | |
73 | ||
74 | static | |
75 | compare(p, q) | |
5587df80 | 76 | void **p, **q; |
bde95929 | 77 | { |
5587df80 | 78 | return(strcmp(*(char **)p, *(char **)q)); |
bde95929 KB |
79 | } |
80 | ||
81 | ||
82 | /* | |
83 | * The main glob() routine: compiles the pattern (optionally processing | |
84 | * quotes), calls glob1() to do the real pattern matching, and finally | |
85 | * sorts the list (unless unsorted operation is requested). Returns 0 | |
86 | * if things went well, nonzero if errors occurred. It is not an error | |
87 | * to find no matches. | |
88 | */ | |
89 | glob(pattern, flags, errfunc, pglob) | |
c5980113 DS |
90 | const char *pattern; |
91 | int flags; | |
92 | int (*errfunc) __P((char *, int)); | |
bde95929 KB |
93 | glob_t *pglob; |
94 | { | |
95 | int err, oldpathc; | |
c5980113 DS |
96 | char *bufnext, *bufend, *compilebuf; |
97 | const char *compilepat, *patnext; | |
bde95929 KB |
98 | char c, patbuf[MAXPATHLEN+1]; |
99 | ||
100 | patnext = pattern; | |
101 | if (!(flags & GLOB_APPEND)) { | |
102 | pglob->gl_pathc = 0; | |
103 | pglob->gl_pathv = NULL; | |
104 | if (!(flags & GLOB_DOOFFS)) | |
105 | pglob->gl_offs = 0; | |
106 | } | |
f6ec3c7f | 107 | pglob->gl_flags = flags & ~GLOB_MAGCHAR; |
bde95929 KB |
108 | pglob->gl_errfunc = errfunc; |
109 | oldpathc = pglob->gl_pathc; | |
f6ec3c7f | 110 | pglob->gl_matchc = 0; |
bde95929 KB |
111 | |
112 | bufnext = patbuf; | |
113 | bufend = bufnext+MAXPATHLEN; | |
114 | ||
115 | compilebuf = bufnext; | |
116 | compilepat = patnext; | |
117 | while (bufnext < bufend && (c = *patnext++) != EOS) { | |
118 | switch (c) { | |
119 | case LBRACKET: | |
f6ec3c7f | 120 | pglob->gl_flags |= GLOB_MAGCHAR; |
bde95929 KB |
121 | c = *patnext; |
122 | if (c == NOT) | |
123 | ++patnext; | |
124 | if (*patnext == EOS || | |
125 | strchr(patnext+1, RBRACKET) == NULL) { | |
126 | *bufnext++ = LBRACKET; | |
127 | if (c == NOT) | |
128 | --patnext; | |
129 | break; | |
130 | } | |
131 | *bufnext++ = M_SET; | |
132 | if (c == NOT) | |
133 | *bufnext++ = M_NOT; | |
134 | c = *patnext++; | |
135 | do { | |
136 | /* todo: quoting */ | |
137 | *bufnext++ = c; | |
138 | if (*patnext == RANGE && | |
139 | (c = patnext[1]) != RBRACKET) { | |
140 | *bufnext++ = M_RNG; | |
141 | *bufnext++ = c; | |
142 | patnext += 2; | |
143 | } | |
144 | } while ((c = *patnext++) != RBRACKET); | |
145 | *bufnext++ = M_END; | |
146 | break; | |
147 | case QUESTION: | |
f6ec3c7f | 148 | pglob->gl_flags |= GLOB_MAGCHAR; |
bde95929 KB |
149 | *bufnext++ = M_ONE; |
150 | break; | |
151 | case QUOTE: | |
152 | if (!(flags & GLOB_QUOTE)) | |
153 | *bufnext++ = QUOTE; | |
154 | else { | |
155 | if ((c = *patnext++) == EOS) { | |
156 | c = QUOTE; | |
157 | --patnext; | |
158 | } | |
159 | *bufnext++ = c; | |
160 | } | |
161 | break; | |
162 | case STAR: | |
f6ec3c7f | 163 | pglob->gl_flags |= GLOB_MAGCHAR; |
bde95929 KB |
164 | *bufnext++ = M_ALL; |
165 | break; | |
166 | default: | |
167 | *bufnext++ = c; | |
168 | break; | |
169 | } | |
170 | } | |
171 | *bufnext = EOS; | |
172 | ||
173 | if ((err = glob1(patbuf, pglob)) != 0) | |
174 | return(err); | |
175 | ||
176 | if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { | |
177 | if (!(flags & GLOB_QUOTE)) | |
178 | (void)strcpy(compilebuf, compilepat); | |
179 | else { | |
180 | /* | |
181 | * copy pattern, interpreting quotes; this is slightly | |
182 | * different than the interpretation of quotes above | |
183 | * -- which should prevail? | |
184 | */ | |
185 | while (*compilepat != EOS) { | |
186 | if (*compilepat == QUOTE) { | |
187 | if (*++compilepat == EOS) | |
188 | --compilepat; | |
189 | } | |
190 | *compilebuf++ = *compilepat++; | |
191 | } | |
192 | *compilebuf = EOS; | |
193 | } | |
194 | return(globextend(patbuf, pglob)); | |
195 | } else if (!(flags & GLOB_NOSORT)) | |
196 | qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc), | |
197 | pglob->gl_pathc - oldpathc, sizeof(char*), compare); | |
198 | return(0); | |
199 | } | |
200 | ||
201 | static | |
202 | glob1(pattern, pglob) | |
203 | char *pattern; | |
204 | glob_t *pglob; | |
205 | { | |
206 | char pathbuf[MAXPATHLEN+1]; | |
207 | ||
208 | /* | |
209 | * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ | |
210 | if (*pattern == EOS) | |
211 | return(0); | |
212 | return(glob2(pathbuf, pathbuf, pattern, pglob)); | |
213 | } | |
214 | ||
215 | /* | |
216 | * functions glob2 and glob3 are mutually recursive; there is one level | |
217 | * of recursion for each segment in the pattern that contains one or | |
218 | * more meta characters. | |
219 | */ | |
220 | static | |
221 | glob2(pathbuf, pathend, pattern, pglob) | |
222 | char *pathbuf, *pathend, *pattern; | |
223 | glob_t *pglob; | |
224 | { | |
225 | struct stat sbuf; | |
226 | bool_t anymeta = 0; | |
227 | char *p, *q; | |
228 | ||
229 | /* | |
230 | * loop over pattern segments until end of pattern or until | |
231 | * segment with meta character found. | |
232 | */ | |
233 | for (;;) { | |
234 | if (*pattern == EOS) { /* end of pattern? */ | |
235 | *pathend = EOS; | |
236 | if (stat(pathbuf, &sbuf) != 0) | |
237 | return(0); /* need error call here? */ | |
238 | if ((pglob->gl_flags & GLOB_MARK) && | |
239 | pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) { | |
240 | *pathend++ = SEP; | |
241 | *pathend = EOS; | |
242 | } | |
f6ec3c7f | 243 | ++pglob->gl_matchc; |
bde95929 KB |
244 | return(globextend(pathbuf, pglob)); |
245 | } | |
246 | ||
247 | /* find end of next segment, copy tentatively to pathend */ | |
248 | q = pathend; | |
249 | p = pattern; | |
250 | while (*p != EOS && *p != SEP) { | |
251 | if (ismeta(*p)) | |
252 | anymeta = 1; | |
253 | *q++ = *p++; | |
254 | } | |
255 | ||
256 | if (!anymeta) { /* no expansion, do next segment */ | |
257 | pathend = q; | |
258 | pattern = p; | |
259 | while (*pattern == SEP) | |
260 | *pathend++ = *pattern++; | |
261 | } else /* need expansion, recurse */ | |
262 | return(glob3(pathbuf, pathend, pattern, p, pglob)); | |
263 | } | |
264 | /* NOTREACHED */ | |
265 | } | |
266 | ||
267 | static | |
268 | glob3(pathbuf, pathend, pattern, restpattern, pglob) | |
269 | char *pathbuf, *pathend, *pattern, *restpattern; | |
270 | glob_t *pglob; | |
271 | { | |
272 | extern int errno; | |
273 | DIR *dirp; | |
274 | struct dirent *dp; | |
275 | int len, err; | |
276 | ||
277 | *pathend = EOS; | |
278 | errno = 0; | |
279 | if (!(dirp = opendir(pathbuf))) | |
280 | /* todo: don't call for ENOENT or ENOTDIR? */ | |
281 | if (pglob->gl_errfunc && | |
282 | (*pglob->gl_errfunc)(pathbuf, errno) || | |
283 | (pglob->gl_flags & GLOB_ERR)) | |
284 | return(GLOB_ABEND); | |
285 | else | |
286 | return(0); | |
287 | ||
288 | err = 0; | |
289 | ||
290 | /* search directory for matching names */ | |
291 | while ((dp = readdir(dirp))) { | |
292 | /* initial DOT must be matched literally */ | |
293 | if (dp->d_name[0] == DOT && *pattern != DOT) | |
294 | continue; | |
295 | if (!match(dp->d_name, pattern, restpattern)) | |
296 | continue; | |
297 | len = dp->d_namlen; | |
298 | (void)strcpy(pathend, dp->d_name); | |
299 | err = glob2(pathbuf, pathend+len, restpattern, pglob); | |
300 | if (err) | |
301 | break; | |
302 | } | |
303 | /* todo: check error from readdir? */ | |
304 | (void)closedir(dirp); | |
305 | return(err); | |
306 | } | |
307 | ||
308 | ||
309 | /* | |
310 | * Extend the gl_pathv member of a glob_t structure to accomodate a new item, | |
311 | * add the new item, and update gl_pathc. | |
312 | * | |
313 | * This assumes the BSD realloc, which only copies the block when its size | |
314 | * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic | |
315 | * behavior. | |
316 | * | |
317 | * Return 0 if new item added, error code if memory couldn't be allocated. | |
318 | * | |
319 | * Invariant of the glob_t structure: | |
320 | * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and | |
321 | * gl_pathv points to (gl_offs + gl_pathc + 1) items. | |
322 | */ | |
323 | static | |
324 | globextend(path, pglob) | |
325 | char *path; | |
326 | glob_t *pglob; | |
327 | { | |
328 | register char **pathv; | |
329 | register int i; | |
330 | u_int copysize, newsize; | |
331 | char *copy; | |
332 | ||
333 | newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); | |
334 | pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize); | |
335 | if (pathv == NULL) | |
336 | return(GLOB_NOSPACE); | |
337 | ||
338 | if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { | |
339 | /* first time around -- clear initial gl_offs items */ | |
340 | pathv += pglob->gl_offs; | |
341 | for (i = pglob->gl_offs; --i >= 0; ) | |
342 | *--pathv = NULL; | |
343 | } | |
344 | pglob->gl_pathv = pathv; | |
345 | ||
346 | copysize = strlen(path) + 1; | |
347 | if ((copy = malloc(copysize)) != NULL) { | |
348 | (void)strcpy(copy, path); | |
349 | pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; | |
350 | } | |
351 | pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; | |
352 | return((copy == NULL) ? GLOB_NOSPACE : 0); | |
353 | } | |
354 | ||
355 | ||
356 | /* | |
357 | * pattern matching function for filenames. Each occurrence of the * | |
358 | * pattern causes a recursion level. | |
359 | */ | |
360 | static bool_t | |
361 | match(name, pat, patend) | |
362 | register char *name, *pat, *patend; | |
363 | { | |
364 | bool_t ok, negate_range; | |
365 | char c, k; | |
366 | ||
367 | while (pat < patend) { | |
368 | c = *pat++; | |
369 | switch (c & 0xff) { | |
370 | case M_ALL: | |
371 | if (pat == patend) | |
372 | return(1); | |
373 | for (; *name != EOS; ++name) { | |
374 | if (match(name, pat, patend)) | |
375 | return(1); | |
376 | } | |
377 | return(0); | |
378 | case M_ONE: | |
379 | if (*name++ == EOS) | |
380 | return(0); | |
381 | break; | |
382 | case M_SET: | |
383 | ok = 0; | |
384 | k = *name++; | |
385 | if (negate_range = (*pat & 0xff) == M_NOT) | |
386 | ++pat; | |
387 | while (((c = *pat++) & 0xff) != M_END) { | |
388 | if ((*pat & 0xff) == M_RNG) { | |
389 | if (c <= k && k <= pat[1]) | |
390 | ok = 1; | |
391 | pat += 2; | |
392 | } | |
393 | else if (c == k) | |
394 | ok = 1; | |
395 | } | |
396 | if (ok == negate_range) | |
397 | return(0); | |
398 | break; | |
399 | default: | |
400 | if (*name++ != c) | |
401 | return(0); | |
402 | break; | |
403 | } | |
404 | } | |
405 | return(*name == EOS); | |
406 | } | |
407 | ||
408 | /* free allocated data belonging to a glob_t structure */ | |
409 | void | |
410 | globfree(pglob) | |
411 | glob_t *pglob; | |
412 | { | |
413 | register int i; | |
414 | register char **pp; | |
415 | ||
416 | if (pglob->gl_pathv != NULL) { | |
417 | pp = pglob->gl_pathv + pglob->gl_offs; | |
418 | for (i = pglob->gl_pathc; i--; ++pp) | |
419 | if (*pp) | |
baed3ae0 KB |
420 | free((void *)*pp); |
421 | free((void *)pglob->gl_pathv); | |
bde95929 KB |
422 | } |
423 | } |