Commit | Line | Data |
---|---|---|
7ece99ba C |
1 | #include <ctype.h> |
2 | ||
3 | typedef int boolean; | |
4 | #define TRUE 1 | |
5 | #define FALSE 0 | |
6 | #define NIL 0 | |
7 | ||
8 | boolean l_onecase; /* true if upper and lower equivalent */ | |
9 | ||
10 | #define makelower(c) (isupper((c)) ? tolower((c)) : (c)) | |
11 | ||
12 | /* STRNCMP - like strncmp except that we convert the | |
13 | * first string to lower case before comparing | |
14 | * if l_onecase is set. | |
15 | */ | |
16 | ||
17 | STRNCMP(s1, s2, len) | |
18 | register char *s1,*s2; | |
19 | register int len; | |
20 | { | |
21 | if (l_onecase) { | |
22 | do | |
23 | if (*s2 - makelower(*s1)) | |
24 | return (*s2 - makelower(*s1)); | |
25 | else { | |
26 | s2++; | |
27 | s1++; | |
28 | } | |
29 | while (--len); | |
30 | } else { | |
31 | do | |
32 | if (*s2 - *s1) | |
33 | return (*s2 - *s1); | |
34 | else { | |
35 | s2++; | |
36 | s1++; | |
37 | } | |
38 | while (--len); | |
39 | } | |
40 | return(0); | |
41 | } | |
42 | ||
43 | /* The following routine converts an irregular expression to | |
44 | * internal format. | |
45 | * | |
46 | * Either meta symbols (\a \d or \p) or character strings or | |
47 | * operations ( alternation or perenthesizing ) can be | |
48 | * specified. Each starts with a descriptor byte. The descriptor | |
49 | * byte has STR set for strings, META set for meta symbols | |
50 | * and OPER set for operations. | |
51 | * The descriptor byte can also have the OPT bit set if the object | |
52 | * defined is optional. Also ALT can be set to indicate an alternation. | |
53 | * | |
54 | * For metasymbols the byte following the descriptor byte identities | |
55 | * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For | |
56 | * strings the byte after the descriptor is a character count for | |
57 | * the string: | |
58 | * | |
59 | * meta symbols := descriptor | |
60 | * symbol | |
61 | * | |
62 | * strings := descriptor | |
63 | * character count | |
64 | * the string | |
65 | * | |
66 | * operatins := descriptor | |
67 | * symbol | |
68 | * character count | |
69 | */ | |
70 | ||
71 | /* | |
72 | * handy macros for accessing parts of match blocks | |
73 | */ | |
74 | #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */ | |
75 | #define MNEXT(A) (A+2) /* character following a metasymbol block */ | |
76 | ||
77 | #define OSYM(A) (*(A+1)) /* symbol in an operation block */ | |
78 | #define OCNT(A) (*(A+2)) /* character count */ | |
79 | #define ONEXT(A) (A+3) /* next character after the operation */ | |
80 | #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */ | |
81 | ||
82 | #define SCNT(A) (*(A+1)) /* byte count of a string */ | |
83 | #define SSTR(A) (A+2) /* address of the string */ | |
84 | #define SNEXT(A) (A+2+*(A+1)) /* character following the string */ | |
85 | ||
86 | /* | |
87 | * bit flags in the descriptor | |
88 | */ | |
89 | #define OPT 1 | |
90 | #define STR 2 | |
91 | #define META 4 | |
92 | #define ALT 8 | |
93 | #define OPER 16 | |
94 | ||
95 | char *ure; /* pointer current position in unconverted exp */ | |
96 | char *ccre; /* pointer to current position in converted exp*/ | |
97 | char *malloc(); | |
98 | ||
99 | char * | |
100 | convexp(re) | |
101 | char *re; /* unconverted irregular expression */ | |
102 | { | |
103 | register char *cre; /* pointer to converted regular expression */ | |
104 | ||
105 | /* allocate room for the converted expression */ | |
106 | if (re == NIL) | |
107 | return (NIL); | |
108 | if (*re == '\0') | |
109 | return (NIL); | |
110 | cre = malloc (4 * strlen(re) + 3); | |
111 | ccre = cre; | |
112 | ure = re; | |
113 | ||
114 | /* start the conversion with a \a */ | |
115 | *cre = META | OPT; | |
116 | MSYM(cre) = 'a'; | |
117 | ccre = MNEXT(cre); | |
118 | ||
119 | /* start the conversion (its recursive) */ | |
120 | expconv (); | |
121 | *ccre = 0; | |
122 | return (cre); | |
123 | } | |
124 | ||
125 | expconv() | |
126 | { | |
127 | register char *cs; /* pointer to current symbol in converted exp */ | |
128 | register char c; /* character being processed */ | |
129 | register char *acs; /* pinter to last alternate */ | |
130 | register int temp; | |
131 | ||
132 | /* let the conversion begin */ | |
133 | acs = NIL; | |
134 | while (*ure != NIL) { | |
135 | switch (c = *ure++) { | |
136 | ||
137 | case '\\': | |
138 | switch (c = *ure++) { | |
139 | ||
140 | /* escaped characters are just characters */ | |
141 | default: | |
142 | if ((*cs & STR) == 0) { | |
143 | cs = ccre; | |
144 | *cs = STR; | |
145 | SCNT(cs) = 1; | |
146 | ccre += 2; | |
147 | } else | |
148 | SCNT(cs)++; | |
149 | *ccre++ = c; | |
150 | break; | |
151 | ||
152 | /* normal(?) metacharacters */ | |
153 | case 'a': | |
154 | case 'd': | |
155 | case 'e': | |
156 | case 'p': | |
157 | if (acs != NIL && acs != cs) { | |
158 | do { | |
159 | temp = OCNT(acs); | |
160 | OCNT(acs) = ccre - acs; | |
161 | acs -= temp; | |
162 | } while (temp != 0); | |
163 | acs = NIL; | |
164 | } | |
165 | cs = ccre; | |
166 | *cs = META; | |
167 | MSYM(cs) = c; | |
168 | ccre = MNEXT(cs); | |
169 | break; | |
170 | } | |
171 | break; | |
172 | ||
173 | /* just put the symbol in */ | |
174 | case '^': | |
175 | case '$': | |
176 | if (acs != NIL && acs != cs) { | |
177 | do { | |
178 | temp = OCNT(acs); | |
179 | OCNT(acs) = ccre - acs; | |
180 | acs -= temp; | |
181 | } while (temp != 0); | |
182 | acs = NIL; | |
183 | } | |
184 | cs = ccre; | |
185 | *cs = META; | |
186 | MSYM(cs) = c; | |
187 | ccre = MNEXT(cs); | |
188 | break; | |
189 | ||
190 | /* mark the last match sequence as optional */ | |
191 | case '?': | |
192 | *cs = *cs | OPT; | |
193 | break; | |
194 | ||
195 | /* recurse and define a subexpression */ | |
196 | case '(': | |
197 | if (acs != NIL && acs != cs) { | |
198 | do { | |
199 | temp = OCNT(acs); | |
200 | OCNT(acs) = ccre - acs; | |
201 | acs -= temp; | |
202 | } while (temp != 0); | |
203 | acs = NIL; | |
204 | } | |
205 | cs = ccre; | |
206 | *cs = OPER; | |
207 | OSYM(cs) = '('; | |
208 | ccre = ONEXT(cs); | |
209 | expconv (); | |
210 | OCNT(cs) = ccre - cs; /* offset to next symbol */ | |
211 | break; | |
212 | ||
213 | /* return from a recursion */ | |
214 | case ')': | |
215 | if (acs != NIL) { | |
216 | do { | |
217 | temp = OCNT(acs); | |
218 | OCNT(acs) = ccre - acs; | |
219 | acs -= temp; | |
220 | } while (temp != 0); | |
221 | acs = NIL; | |
222 | } | |
223 | cs = ccre; | |
224 | *cs = META; | |
225 | MSYM(cs) = c; | |
226 | ccre = MNEXT(cs); | |
227 | return; | |
228 | ||
229 | /* mark the last match sequence as having an alternate */ | |
230 | /* the third byte will contain an offset to jump over the */ | |
231 | /* alternate match in case the first did not fail */ | |
232 | case '|': | |
233 | if (acs != NIL && acs != cs) | |
234 | OCNT(ccre) = ccre - acs; /* make a back pointer */ | |
235 | else | |
236 | OCNT(ccre) = 0; | |
237 | *cs |= ALT; | |
238 | cs = ccre; | |
239 | *cs = OPER; | |
240 | OSYM(cs) = '|'; | |
241 | ccre = ONEXT(cs); | |
242 | acs = cs; /* remember that the pointer is to be filles */ | |
243 | break; | |
244 | ||
245 | /* if its not a metasymbol just build a scharacter string */ | |
246 | default: | |
247 | if ((*cs & STR) == 0) { | |
248 | cs = ccre; | |
249 | *cs = STR; | |
250 | SCNT(cs) = 1; | |
251 | ccre = SSTR(cs); | |
252 | } else | |
253 | SCNT(cs)++; | |
254 | *ccre++ = c; | |
255 | break; | |
256 | } | |
257 | } | |
258 | if (acs != NIL) { | |
259 | do { | |
260 | temp = OCNT(acs); | |
261 | OCNT(acs) = ccre - acs; | |
262 | acs -= temp; | |
263 | } while (temp != 0); | |
264 | acs = NIL; | |
265 | } | |
266 | return; | |
267 | } | |
268 | /* end of convertre */ | |
269 | ||
270 | ||
271 | /* | |
272 | * The following routine recognises an irregular expresion | |
273 | * with the following special characters: | |
274 | * | |
275 | * \? - means last match was optional | |
276 | * \a - matches any number of characters | |
277 | * \d - matches any number of spaces and tabs | |
278 | * \p - matches any number of alphanumeric | |
279 | * characters. The | |
280 | * characters matched will be copied into | |
281 | * the area pointed to by 'name'. | |
282 | * \| - alternation | |
283 | * \( \) - grouping used mostly for alternation and | |
284 | * optionality | |
285 | * | |
286 | * The irregular expression must be translated to internal form | |
287 | * prior to calling this routine | |
288 | * | |
289 | * The value returned is the pointer to the first non \a | |
290 | * character matched. | |
291 | */ | |
292 | ||
293 | boolean _escaped; /* true if we are currently _escaped */ | |
294 | char *_start; /* start of string */ | |
295 | ||
296 | char * | |
297 | expmatch (s, re, mstring) | |
298 | register char *s; /* string to check for a match in */ | |
299 | register char *re; /* a converted irregular expression */ | |
300 | register char *mstring; /* where to put whatever matches a \p */ | |
301 | { | |
302 | register char *cs; /* the current symbol */ | |
303 | register char *ptr,*s1; /* temporary pointer */ | |
304 | boolean matched; /* a temporary boolean */ | |
305 | ||
306 | /* initial conditions */ | |
307 | if (re == NIL) | |
308 | return (NIL); | |
309 | cs = re; | |
310 | matched = FALSE; | |
311 | ||
312 | /* loop till expression string is exhausted (or at least pretty tired) */ | |
313 | while (*cs) { | |
314 | switch (*cs & (OPER | STR | META)) { | |
315 | ||
316 | /* try to match a string */ | |
317 | case STR: | |
318 | matched = !STRNCMP (s, SSTR(cs), SCNT(cs)); | |
319 | if (matched) { | |
320 | ||
321 | /* hoorah it matches */ | |
322 | s += SCNT(cs); | |
323 | cs = SNEXT(cs); | |
324 | } else if (*cs & ALT) { | |
325 | ||
326 | /* alternation, skip to next expression */ | |
327 | cs = SNEXT(cs); | |
328 | } else if (*cs & OPT) { | |
329 | ||
330 | /* the match is optional */ | |
331 | cs = SNEXT(cs); | |
332 | matched = 1; /* indicate a successful match */ | |
333 | } else { | |
334 | ||
335 | /* no match, error return */ | |
336 | return (NIL); | |
337 | } | |
338 | break; | |
339 | ||
340 | /* an operator, do something fancy */ | |
341 | case OPER: | |
342 | switch (OSYM(cs)) { | |
343 | ||
344 | /* this is an alternation */ | |
345 | case '|': | |
346 | if (matched) | |
347 | ||
348 | /* last thing in the alternation was a match, skip ahead */ | |
349 | cs = OPTR(cs); | |
350 | else | |
351 | ||
352 | /* no match, keep trying */ | |
353 | cs = ONEXT(cs); | |
354 | break; | |
355 | ||
356 | /* this is a grouping, recurse */ | |
357 | case '(': | |
358 | ptr = expmatch (s, ONEXT(cs), mstring); | |
359 | if (ptr != NIL) { | |
360 | ||
361 | /* the subexpression matched */ | |
362 | matched = 1; | |
363 | s = ptr; | |
364 | } else if (*cs & ALT) { | |
365 | ||
366 | /* alternation, skip to next expression */ | |
367 | matched = 0; | |
368 | } else if (*cs & OPT) { | |
369 | ||
370 | /* the match is optional */ | |
371 | matched = 1; /* indicate a successful match */ | |
372 | } else { | |
373 | ||
374 | /* no match, error return */ | |
375 | return (NIL); | |
376 | } | |
377 | cs = OPTR(cs); | |
378 | break; | |
379 | } | |
380 | break; | |
381 | ||
382 | /* try to match a metasymbol */ | |
383 | case META: | |
384 | switch (MSYM(cs)) { | |
385 | ||
386 | /* try to match anything and remember what was matched */ | |
387 | case 'p': | |
388 | /* | |
389 | * This is really the same as trying the match the | |
390 | * remaining parts of the expression to any subset | |
391 | * of the string. | |
392 | */ | |
393 | s1 = s; | |
394 | do { | |
395 | ptr = expmatch (s1, MNEXT(cs), mstring); | |
396 | if (ptr != NIL && s1 != s) { | |
397 | ||
398 | /* we have a match, remember the match */ | |
399 | strncpy (mstring, s, s1 - s); | |
400 | mstring[s1 - s] = '\0'; | |
401 | return (ptr); | |
402 | } else if (ptr != NIL && (*cs & OPT)) { | |
403 | ||
404 | /* it was aoptional so no match is ok */ | |
405 | return (ptr); | |
406 | } else if (ptr != NIL) { | |
407 | ||
408 | /* not optional and we still matched */ | |
409 | return (NIL); | |
410 | } | |
411 | if (!isalnum(*s1) && *s1 != '_') | |
412 | return (NIL); | |
413 | if (*s1 == '\\') | |
414 | _escaped = _escaped ? FALSE : TRUE; | |
415 | else | |
416 | _escaped = FALSE; | |
417 | } while (*s1++); | |
418 | return (NIL); | |
419 | ||
420 | /* try to match anything */ | |
421 | case 'a': | |
422 | /* | |
423 | * This is really the same as trying the match the | |
424 | * remaining parts of the expression to any subset | |
425 | * of the string. | |
426 | */ | |
427 | s1 = s; | |
428 | do { | |
429 | ptr = expmatch (s1, MNEXT(cs), mstring); | |
430 | if (ptr != NIL && s1 != s) { | |
431 | ||
432 | /* we have a match */ | |
433 | return (ptr); | |
434 | } else if (ptr != NIL && (*cs & OPT)) { | |
435 | ||
436 | /* it was aoptional so no match is ok */ | |
437 | return (ptr); | |
438 | } else if (ptr != NIL) { | |
439 | ||
440 | /* not optional and we still matched */ | |
441 | return (NIL); | |
442 | } | |
443 | if (*s1 == '\\') | |
444 | _escaped = _escaped ? FALSE : TRUE; | |
445 | else | |
446 | _escaped = FALSE; | |
447 | } while (*s1++); | |
448 | return (NIL); | |
449 | ||
450 | /* fail if we are currently _escaped */ | |
451 | case 'e': | |
452 | if (_escaped) | |
453 | return(NIL); | |
454 | cs = MNEXT(cs); | |
455 | break; | |
456 | ||
457 | /* match any number of tabs and spaces */ | |
458 | case 'd': | |
459 | ptr = s; | |
460 | while (*s == ' ' || *s == '\t') | |
461 | s++; | |
462 | if (s != ptr || s == _start) { | |
463 | ||
464 | /* match, be happy */ | |
465 | matched = 1; | |
466 | cs = MNEXT(cs); | |
467 | } else if (*s == '\n' || *s == '\0') { | |
468 | ||
469 | /* match, be happy */ | |
470 | matched = 1; | |
471 | cs = MNEXT(cs); | |
472 | } else if (*cs & ALT) { | |
473 | ||
474 | /* try the next part */ | |
475 | matched = 0; | |
476 | cs = MNEXT(cs); | |
477 | } else if (*cs & OPT) { | |
478 | ||
479 | /* doesn't matter */ | |
480 | matched = 1; | |
481 | cs = MNEXT(cs); | |
482 | } else | |
483 | ||
484 | /* no match, error return */ | |
485 | return (NIL); | |
486 | break; | |
487 | ||
488 | /* check for end of line */ | |
489 | case '$': | |
490 | if (*s == '\0' || *s == '\n') { | |
491 | ||
492 | /* match, be happy */ | |
493 | s++; | |
494 | matched = 1; | |
495 | cs = MNEXT(cs); | |
496 | } else if (*cs & ALT) { | |
497 | ||
498 | /* try the next part */ | |
499 | matched = 0; | |
500 | cs = MNEXT(cs); | |
501 | } else if (*cs & OPT) { | |
502 | ||
503 | /* doesn't matter */ | |
504 | matched = 1; | |
505 | cs = MNEXT(cs); | |
506 | } else | |
507 | ||
508 | /* no match, error return */ | |
509 | return (NIL); | |
510 | break; | |
511 | ||
512 | /* check for start of line */ | |
513 | case '^': | |
514 | if (s == _start) { | |
515 | ||
516 | /* match, be happy */ | |
517 | matched = 1; | |
518 | cs = MNEXT(cs); | |
519 | } else if (*cs & ALT) { | |
520 | ||
521 | /* try the next part */ | |
522 | matched = 0; | |
523 | cs = MNEXT(cs); | |
524 | } else if (*cs & OPT) { | |
525 | ||
526 | /* doesn't matter */ | |
527 | matched = 1; | |
528 | cs = MNEXT(cs); | |
529 | } else | |
530 | ||
531 | /* no match, error return */ | |
532 | return (NIL); | |
533 | break; | |
534 | ||
535 | /* end of a subexpression, return success */ | |
536 | case ')': | |
537 | return (s); | |
538 | } | |
539 | break; | |
540 | } | |
541 | } | |
542 | return (s); | |
543 | } |