Commit | Line | Data |
---|---|---|
ae6495a7 NW |
1 | /*- |
2 | * Copyright (c) 1991 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * 3. All advertising materials mentioning features or use of this software | |
14 | * must display the following acknowledgement: | |
15 | * This product includes software developed by the University of | |
16 | * California, Berkeley and its contributors. | |
17 | * 4. Neither the name of the University nor the names of its contributors | |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #ifndef lint | |
35 | static char sccsid[] = "@(#)str.c 5.9 (Berkeley) 3/4/93"; | |
36 | #endif /* not lint */ | |
37 | ||
38 | #include <sys/cdefs.h> | |
39 | #include <sys/types.h> | |
40 | ||
41 | #include <errno.h> | |
42 | #include <stddef.h> | |
43 | #include <stdio.h> | |
44 | #include <stdlib.h> | |
45 | #include <string.h> | |
46 | ||
47 | #include "extern.h" | |
48 | ||
49 | static int backslash __P((STR *)); | |
50 | static int bracket __P((STR *)); | |
51 | static int c_class __P((const void *, const void *)); | |
52 | static void genclass __P((STR *)); | |
53 | static void genequiv __P((STR *)); | |
54 | static int genrange __P((STR *)); | |
55 | static void genseq __P((STR *)); | |
56 | ||
57 | int | |
58 | next(s) | |
59 | register STR *s; | |
60 | { | |
61 | register int ch; | |
62 | ||
63 | switch (s->state) { | |
64 | case EOS: | |
65 | return (0); | |
66 | case INFINITE: | |
67 | return (1); | |
68 | case NORMAL: | |
69 | switch (ch = *s->str) { | |
70 | case '\0': | |
71 | s->state = EOS; | |
72 | return (0); | |
73 | case '\\': | |
74 | s->lastch = backslash(s); | |
75 | break; | |
76 | case '[': | |
77 | if (bracket(s)) | |
78 | return (next(s)); | |
79 | /* FALLTHROUGH */ | |
80 | default: | |
81 | ++s->str; | |
82 | s->lastch = ch; | |
83 | break; | |
84 | } | |
85 | ||
86 | /* We can start a range at any time. */ | |
87 | if (s->str[0] == '-' && genrange(s)) | |
88 | return (next(s)); | |
89 | return (1); | |
90 | case RANGE: | |
91 | if (s->cnt-- == 0) { | |
92 | s->state = NORMAL; | |
93 | return (next(s)); | |
94 | } | |
95 | ++s->lastch; | |
96 | return (1); | |
97 | case SEQUENCE: | |
98 | if (s->cnt-- == 0) { | |
99 | s->state = NORMAL; | |
100 | return (next(s)); | |
101 | } | |
102 | return (1); | |
103 | case SET: | |
104 | if ((s->lastch = s->set[s->cnt++]) == OOBCH) { | |
105 | s->state = NORMAL; | |
106 | return (next(s)); | |
107 | } | |
108 | return (1); | |
109 | } | |
110 | /* NOTREACHED */ | |
111 | } | |
112 | ||
113 | static int | |
114 | bracket(s) | |
115 | register STR *s; | |
116 | { | |
117 | register char *p; | |
118 | ||
119 | switch (s->str[1]) { | |
120 | case ':': /* "[:class:]" */ | |
121 | if ((p = strstr(s->str + 2, ":]")) == NULL) | |
122 | return (0); | |
123 | *p = '\0'; | |
124 | s->str += 2; | |
125 | genclass(s); | |
126 | s->str = p + 2; | |
127 | return (1); | |
128 | case '=': /* "[=equiv=]" */ | |
129 | if ((p = strstr(s->str + 2, "=]")) == NULL) | |
130 | return (0); | |
131 | s->str += 2; | |
132 | genequiv(s); | |
133 | return (1); | |
134 | default: /* "[\###*n]" or "[#*n]" */ | |
135 | if ((p = strpbrk(s->str + 2, "*]")) == NULL) | |
136 | return (0); | |
137 | if (p[0] != '*' || index(p, ']') == NULL) | |
138 | return (0); | |
139 | s->str += 1; | |
140 | genseq(s); | |
141 | return (1); | |
142 | } | |
143 | /* NOTREACHED */ | |
144 | } | |
145 | ||
146 | int isalnum __P((int)), | |
147 | isalpha __P((int)), | |
9b729a75 | 148 | /* isblank __P((int)), until 4.4 */ |
ae6495a7 NW |
149 | isspace __P((int)), |
150 | iscntrl __P((int)), | |
151 | isdigit __P((int)), | |
152 | isgraph __P((int)), | |
153 | islower __P((int)), | |
154 | isprint __P((int)), | |
155 | ispunct __P((int)), | |
156 | isupper __P((int)), | |
157 | isxdigit __P((int)); | |
158 | ||
ef9a6ca4 NW |
159 | |
160 | static int isblank(x) /* until 4.4 */ | |
161 | int x; | |
162 | { | |
163 | if ((x == ' ') || (x== '\t')) return 1; | |
164 | return 0; | |
165 | } | |
166 | ||
167 | ||
ae6495a7 NW |
168 | typedef struct { |
169 | char *name; | |
170 | int (*func) __P((int)); | |
171 | int *set; | |
172 | } CLASS; | |
173 | ||
174 | static CLASS classes[] = { | |
175 | { "alnum", isalnum, }, | |
176 | { "alpha", isalpha, }, | |
177 | { "blank", isblank, }, | |
178 | { "cntrl", iscntrl, }, | |
179 | { "digit", isdigit, }, | |
180 | { "graph", isgraph, }, | |
181 | { "lower", islower, }, | |
182 | { "print", isupper, }, | |
183 | { "punct", ispunct, }, | |
184 | { "space", isspace, }, | |
185 | { "upper", isupper, }, | |
186 | { "xdigit", isxdigit, }, | |
187 | }; | |
188 | ||
189 | static void | |
190 | genclass(s) | |
191 | STR *s; | |
192 | { | |
193 | register int cnt, (*func) __P((int)); | |
194 | CLASS *cp, tmp; | |
195 | int *p; | |
196 | ||
197 | tmp.name = s->str; | |
198 | if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / | |
199 | sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) | |
200 | err("unknown class %s", s->str); | |
201 | ||
202 | if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) | |
203 | err("%s", strerror(errno)); | |
204 | bzero(p, (NCHARS + 1) * sizeof(int)); | |
205 | for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) | |
206 | if ((func)(cnt)) | |
207 | *p++ = cnt; | |
208 | *p = OOBCH; | |
209 | ||
210 | s->cnt = 0; | |
211 | s->state = SET; | |
212 | s->set = cp->set; | |
213 | } | |
214 | ||
215 | static int | |
216 | c_class(a, b) | |
217 | const void *a, *b; | |
218 | { | |
219 | return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name)); | |
220 | } | |
221 | ||
222 | /* | |
223 | * English doesn't have any equivalence classes, so for now | |
224 | * we just syntax check and grab the character. | |
225 | */ | |
226 | static void | |
227 | genequiv(s) | |
228 | STR *s; | |
229 | { | |
230 | if (*s->str == '\\') { | |
231 | s->equiv[0] = backslash(s); | |
232 | if (*s->str != '=') | |
233 | err("misplaced equivalence equals sign"); | |
234 | } else { | |
235 | s->equiv[0] = s->str[0]; | |
236 | if (s->str[1] != '=') | |
237 | err("misplaced equivalence equals sign"); | |
238 | } | |
239 | s->str += 2; | |
240 | s->cnt = 0; | |
241 | s->state = SET; | |
242 | s->set = s->equiv; | |
243 | } | |
244 | ||
245 | static int | |
246 | genrange(s) | |
247 | STR *s; | |
248 | { | |
249 | int stopval; | |
250 | char *savestart; | |
251 | ||
252 | savestart = s->str; | |
cec1dca5 | 253 | stopval = *++s->str == '\\' ? backslash(s) : *s->str++; |
ae6495a7 NW |
254 | if (stopval < s->lastch) { |
255 | s->str = savestart; | |
256 | return (0); | |
257 | } | |
258 | s->cnt = stopval - s->lastch + 1; | |
259 | s->state = RANGE; | |
260 | --s->lastch; | |
261 | return (1); | |
262 | } | |
263 | ||
264 | static void | |
265 | genseq(s) | |
266 | STR *s; | |
267 | { | |
268 | char *ep; | |
269 | ||
270 | if (s->which == STRING1) | |
271 | err("sequences only valid in string2"); | |
272 | ||
273 | if (*s->str == '\\') | |
274 | s->lastch = backslash(s); | |
275 | else | |
276 | s->lastch = *s->str++; | |
277 | if (*s->str != '*') | |
278 | err("misplaced sequence asterisk"); | |
279 | ||
280 | switch (*++s->str) { | |
281 | case '\\': | |
282 | s->cnt = backslash(s); | |
283 | break; | |
284 | case ']': | |
285 | s->cnt = 0; | |
286 | ++s->str; | |
287 | break; | |
288 | default: | |
289 | if (isdigit(*s->str)) { | |
290 | s->cnt = strtol(s->str, &ep, 0); | |
291 | if (*ep == ']') { | |
292 | s->str = ep + 1; | |
293 | break; | |
294 | } | |
295 | } | |
296 | err("illegal sequence count"); | |
297 | /* NOTREACHED */ | |
298 | } | |
299 | ||
300 | s->state = s->cnt ? SEQUENCE : INFINITE; | |
301 | } | |
302 | ||
303 | /* Use the #defines isXXX() here, DON'T use them above. */ | |
304 | #include <ctype.h> | |
305 | ||
306 | /* | |
307 | * Translate \??? into a character. Up to 3 octal digits, if no digits either | |
308 | * an escape code or a literal character. | |
309 | */ | |
310 | static int | |
311 | backslash(s) | |
312 | register STR *s; | |
313 | { | |
314 | register int ch, cnt, val; | |
315 | ||
316 | for (cnt = val = 0;;) { | |
317 | ch = *++s->str; | |
318 | if (!isascii(ch) || !isdigit(ch)) | |
319 | break; | |
320 | val = val * 8 + ch - '0'; | |
321 | if (++cnt == 3) { | |
322 | ++s->str; | |
323 | break; | |
324 | } | |
325 | } | |
326 | if (cnt) | |
327 | return (val); | |
328 | if (ch != '\0') | |
329 | ++s->str; | |
330 | switch (ch) { | |
331 | case 'a': /* escape characters */ | |
332 | return ('\7'); | |
333 | case 'b': | |
334 | return ('\b'); | |
335 | case 'f': | |
336 | return ('\f'); | |
337 | case 'n': | |
338 | return ('\n'); | |
339 | case 'r': | |
340 | return ('\r'); | |
341 | case 't': | |
342 | return ('\t'); | |
343 | case 'v': | |
344 | return ('\13'); | |
345 | case '\0': /* \" -> \ */ | |
346 | s->state = EOS; | |
347 | return ('\\'); | |
348 | default: /* \x" -> x */ | |
349 | return (ch); | |
350 | } | |
351 | } |