Commit | Line | Data |
---|---|---|
304f87e5 | 1 | /*- |
2bda8e6c KB |
2 | * Copyright (c) 1991, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
304f87e5 KB |
4 | * |
5 | * %sccs.include.redist.c% | |
6 | */ | |
7 | ||
8 | #ifndef lint | |
2bda8e6c | 9 | static char sccsid[] = "@(#)str.c 8.1 (Berkeley) %G%"; |
304f87e5 KB |
10 | #endif /* not lint */ |
11 | ||
12 | #include <sys/cdefs.h> | |
13 | #include <sys/types.h> | |
f3bfa023 | 14 | |
304f87e5 | 15 | #include <errno.h> |
304f87e5 | 16 | #include <stddef.h> |
f3bfa023 | 17 | #include <stdio.h> |
304f87e5 KB |
18 | #include <stdlib.h> |
19 | #include <string.h> | |
f3bfa023 | 20 | |
304f87e5 KB |
21 | #include "extern.h" |
22 | ||
23 | static int backslash __P((STR *)); | |
24 | static int bracket __P((STR *)); | |
25 | static int c_class __P((const void *, const void *)); | |
26 | static void genclass __P((STR *)); | |
27 | static void genequiv __P((STR *)); | |
28 | static int genrange __P((STR *)); | |
29 | static void genseq __P((STR *)); | |
30 | ||
31 | int | |
32 | next(s) | |
33 | register STR *s; | |
34 | { | |
35 | register int ch; | |
36 | ||
37 | switch (s->state) { | |
38 | case EOS: | |
39 | return (0); | |
40 | case INFINITE: | |
41 | return (1); | |
42 | case NORMAL: | |
34897cd7 | 43 | switch (ch = *s->str) { |
304f87e5 | 44 | case '\0': |
304f87e5 KB |
45 | s->state = EOS; |
46 | return (0); | |
47 | case '\\': | |
48 | s->lastch = backslash(s); | |
49 | break; | |
50 | case '[': | |
51 | if (bracket(s)) | |
52 | return (next(s)); | |
53 | /* FALLTHROUGH */ | |
54 | default: | |
34897cd7 | 55 | ++s->str; |
304f87e5 KB |
56 | s->lastch = ch; |
57 | break; | |
58 | } | |
59 | ||
60 | /* We can start a range at any time. */ | |
61 | if (s->str[0] == '-' && genrange(s)) | |
62 | return (next(s)); | |
63 | return (1); | |
64 | case RANGE: | |
65 | if (s->cnt-- == 0) { | |
66 | s->state = NORMAL; | |
67 | return (next(s)); | |
68 | } | |
69 | ++s->lastch; | |
70 | return (1); | |
71 | case SEQUENCE: | |
72 | if (s->cnt-- == 0) { | |
73 | s->state = NORMAL; | |
74 | return (next(s)); | |
75 | } | |
76 | return (1); | |
77 | case SET: | |
304f87e5 KB |
78 | if ((s->lastch = s->set[s->cnt++]) == OOBCH) { |
79 | s->state = NORMAL; | |
80 | return (next(s)); | |
81 | } | |
82 | return (1); | |
83 | } | |
84 | /* NOTREACHED */ | |
85 | } | |
86 | ||
87 | static int | |
88 | bracket(s) | |
89 | register STR *s; | |
90 | { | |
91 | register char *p; | |
92 | ||
9048bb69 | 93 | switch (s->str[1]) { |
304f87e5 | 94 | case ':': /* "[:class:]" */ |
9048bb69 | 95 | if ((p = strstr(s->str + 2, ":]")) == NULL) |
304f87e5 KB |
96 | return (0); |
97 | *p = '\0'; | |
9048bb69 | 98 | s->str += 2; |
304f87e5 KB |
99 | genclass(s); |
100 | s->str = p + 2; | |
101 | return (1); | |
102 | case '=': /* "[=equiv=]" */ | |
9048bb69 | 103 | if ((p = strstr(s->str + 2, "=]")) == NULL) |
304f87e5 | 104 | return (0); |
9048bb69 | 105 | s->str += 2; |
304f87e5 KB |
106 | genequiv(s); |
107 | return (1); | |
9048bb69 KB |
108 | default: /* "[\###*n]" or "[#*n]" */ |
109 | if ((p = strpbrk(s->str + 2, "*]")) == NULL) | |
304f87e5 KB |
110 | return (0); |
111 | if (p[0] != '*' || index(p, ']') == NULL) | |
112 | return (0); | |
9048bb69 | 113 | s->str += 1; |
304f87e5 KB |
114 | genseq(s); |
115 | return (1); | |
116 | } | |
117 | /* NOTREACHED */ | |
118 | } | |
119 | ||
120 | int isalnum __P((int)), | |
121 | isalpha __P((int)), | |
122 | isblank __P((int)), | |
123 | isspace __P((int)), | |
124 | iscntrl __P((int)), | |
125 | isdigit __P((int)), | |
126 | isgraph __P((int)), | |
127 | islower __P((int)), | |
128 | isprint __P((int)), | |
129 | ispunct __P((int)), | |
130 | isupper __P((int)), | |
131 | isxdigit __P((int)); | |
132 | ||
133 | typedef struct { | |
134 | char *name; | |
135 | int (*func) __P((int)); | |
304f87e5 KB |
136 | int *set; |
137 | } CLASS; | |
138 | ||
139 | static CLASS classes[] = { | |
6d5d747e KB |
140 | { "alnum", isalnum, }, |
141 | { "alpha", isalpha, }, | |
142 | { "blank", isblank, }, | |
143 | { "cntrl", iscntrl, }, | |
144 | { "digit", isdigit, }, | |
145 | { "graph", isgraph, }, | |
146 | { "lower", islower, }, | |
147 | { "print", isupper, }, | |
148 | { "punct", ispunct, }, | |
149 | { "space", isspace, }, | |
150 | { "upper", isupper, }, | |
151 | { "xdigit", isxdigit, }, | |
304f87e5 KB |
152 | }; |
153 | ||
154 | static void | |
155 | genclass(s) | |
156 | STR *s; | |
157 | { | |
158 | register int cnt, (*func) __P((int)); | |
159 | CLASS *cp, tmp; | |
160 | int *p; | |
161 | ||
162 | tmp.name = s->str; | |
163 | if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / | |
164 | sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) | |
165 | err("unknown class %s", s->str); | |
304f87e5 KB |
166 | |
167 | if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) | |
168 | err("%s", strerror(errno)); | |
6d5d747e | 169 | bzero(p, (NCHARS + 1) * sizeof(int)); |
304f87e5 KB |
170 | for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) |
171 | if ((func)(cnt)) | |
172 | *p++ = cnt; | |
173 | *p = OOBCH; | |
174 | ||
175 | s->cnt = 0; | |
6d5d747e | 176 | s->state = SET; |
304f87e5 KB |
177 | s->set = cp->set; |
178 | } | |
179 | ||
180 | static int | |
181 | c_class(a, b) | |
182 | const void *a, *b; | |
183 | { | |
184 | return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name)); | |
185 | } | |
186 | ||
187 | /* | |
188 | * English doesn't have any equivalence classes, so for now | |
189 | * we just syntax check and grab the character. | |
190 | */ | |
191 | static void | |
192 | genequiv(s) | |
193 | STR *s; | |
194 | { | |
9048bb69 | 195 | if (*s->str == '\\') { |
6d5d747e | 196 | s->equiv[0] = backslash(s); |
304f87e5 KB |
197 | if (*s->str != '=') |
198 | err("misplaced equivalence equals sign"); | |
199 | } else { | |
6d5d747e | 200 | s->equiv[0] = s->str[0]; |
304f87e5 KB |
201 | if (s->str[1] != '=') |
202 | err("misplaced equivalence equals sign"); | |
203 | } | |
204 | s->str += 2; | |
205 | s->cnt = 0; | |
206 | s->state = SET; | |
6d5d747e | 207 | s->set = s->equiv; |
304f87e5 KB |
208 | } |
209 | ||
210 | static int | |
211 | genrange(s) | |
212 | STR *s; | |
213 | { | |
214 | int stopval; | |
215 | char *savestart; | |
216 | ||
217 | savestart = s->str; | |
218 | stopval = *++s->str == '\\' ? backslash(s) : *s->str; | |
219 | if (stopval < s->lastch) { | |
220 | s->str = savestart; | |
221 | return (0); | |
222 | } | |
223 | s->cnt = stopval - s->lastch + 1; | |
224 | s->state = RANGE; | |
225 | --s->lastch; | |
226 | return (1); | |
227 | } | |
228 | ||
229 | static void | |
230 | genseq(s) | |
231 | STR *s; | |
232 | { | |
233 | char *ep; | |
234 | ||
6d5d747e KB |
235 | if (s->which == STRING1) |
236 | err("sequences only valid in string2"); | |
304f87e5 KB |
237 | |
238 | if (*s->str == '\\') | |
239 | s->lastch = backslash(s); | |
240 | else | |
241 | s->lastch = *s->str++; | |
242 | if (*s->str != '*') | |
243 | err("misplaced sequence asterisk"); | |
244 | ||
245 | switch (*++s->str) { | |
246 | case '\\': | |
247 | s->cnt = backslash(s); | |
248 | break; | |
249 | case ']': | |
250 | s->cnt = 0; | |
251 | ++s->str; | |
252 | break; | |
253 | default: | |
254 | if (isdigit(*s->str)) { | |
255 | s->cnt = strtol(s->str, &ep, 0); | |
256 | if (*ep == ']') { | |
257 | s->str = ep + 1; | |
258 | break; | |
259 | } | |
260 | } | |
261 | err("illegal sequence count"); | |
262 | /* NOTREACHED */ | |
263 | } | |
264 | ||
265 | s->state = s->cnt ? SEQUENCE : INFINITE; | |
266 | } | |
267 | ||
6d5d747e | 268 | /* Use the #defines isXXX() here, DON'T use them above. */ |
304f87e5 KB |
269 | #include <ctype.h> |
270 | ||
271 | /* | |
272 | * Translate \??? into a character. Up to 3 octal digits, if no digits either | |
273 | * an escape code or a literal character. | |
274 | */ | |
275 | static int | |
276 | backslash(s) | |
277 | register STR *s; | |
278 | { | |
279 | register int ch, cnt, val; | |
280 | ||
281 | for (cnt = val = 0;;) { | |
282 | ch = *++s->str; | |
283 | if (!isascii(ch) || !isdigit(ch)) | |
284 | break; | |
285 | val = val * 8 + ch - '0'; | |
f3bfa023 KB |
286 | if (++cnt == 3) { |
287 | ++s->str; | |
304f87e5 | 288 | break; |
f3bfa023 | 289 | } |
304f87e5 KB |
290 | } |
291 | if (cnt) | |
292 | return (val); | |
ec0cc493 KB |
293 | if (ch != '\0') |
294 | ++s->str; | |
304f87e5 KB |
295 | switch (ch) { |
296 | case 'a': /* escape characters */ | |
297 | return ('\7'); | |
298 | case 'b': | |
299 | return ('\b'); | |
300 | case 'f': | |
301 | return ('\f'); | |
302 | case 'n': | |
303 | return ('\n'); | |
304 | case 'r': | |
305 | return ('\r'); | |
306 | case 't': | |
307 | return ('\t'); | |
308 | case 'v': | |
309 | return ('\13'); | |
310 | case '\0': /* \" -> \ */ | |
311 | s->state = EOS; | |
312 | return ('\\'); | |
313 | default: /* \x" -> x */ | |
314 | return (ch); | |
315 | } | |
316 | } |