Removed KLUDGELINEMODE option.
[unix-history] / usr.bin / elvis / regexp.c
CommitLineData
15637ed4
RG
1/* regexp.c */
2
3/* This file contains the code that compiles regular expressions and executes
4 * them. It supports the same syntax and features as vi's regular expression
5 * code. Specifically, the meta characters are:
6 * ^ matches the beginning of a line
7 * $ matches the end of a line
8 * \< matches the beginning of a word
9 * \> matches the end of a word
10 * . matches any single character
11 * [] matches any character in a character class
12 * \( delimits the start of a subexpression
13 * \) delimits the end of a subexpression
14 * * repeats the preceding 0 or more times
15 * NOTE: You cannot follow a \) with a *.
16 *
17 * The physical structure of a compiled RE is as follows:
18 * - First, there is a one-byte value that says how many character classes
19 * are used in this regular expression
20 * - Next, each character class is stored as a bitmap that is 256 bits
21 * (32 bytes) long.
22 * - A mixture of literal characters and compiled meta characters follows.
23 * This begins with M_BEGIN(0) and ends with M_END(0). All meta chars
24 * are stored as a \n followed by a one-byte code, so they take up two
25 * bytes apiece. Literal characters take up one byte apiece. \n can't
26 * be used as a literal character.
27 *
28 * If NO_MAGIC is defined, then a different set of functions is used instead.
29 * That right, this file contains TWO versions of the code.
30 */
31
32#include <setjmp.h>
33#include "config.h"
34#include "ctype.h"
35#include "vi.h"
6e657cf2
AM
36#ifdef REGEX
37# include <regex.h>
38#else
39# include "regexp.h"
40#endif
41
15637ed4 42
6e657cf2
AM
43#ifdef REGEX
44extern int patlock; /* from cmd_substitute() module */
15637ed4 45
6e657cf2 46static regex_t *previous = NULL; /* the previous regexp, used when null regexp is given */
15637ed4 47
6e657cf2
AM
48regex_t *
49optpat(s)
50 char *s;
51{
99668b43
AM
52 char *neuter();
53 char *expand_tilde();
0b18c9d7 54
6e657cf2
AM
55 int n;
56 if (*s == '\0') {
99668b43 57 if (!previous) regerr("no previous pattern");
6e657cf2
AM
58 return previous;
59 } else if (previous && !patlock)
60 regfree(previous);
61 else if ((previous = (regex_t *) malloc(sizeof(regex_t))) == NULL) {
99668b43 62 regerr("out of memory");
6e657cf2
AM
63 return previous;
64 }
65 patlock = 0;
99668b43
AM
66 if ((s = *o_magic ? expand_tilde(s) : neuter(s)) == NULL) {
67 free(previous);
68 return previous = NULL;
69 } else if (n = regcomp(previous, s, *o_ignorecase ? REG_ICASE : 0)) {
70 regerr("%d", n);
6e657cf2
AM
71 free(previous);
72 return previous = NULL;
73 }
74 return previous;
75}
0b18c9d7 76
99668b43
AM
77extern char *last_repl; /* replacement text from previous substitute */
78
79/* expand_tilde: expand ~'s in a BRE */
80char *
81expand_tilde(s)
82 char *s;
83{
84 char *literalize();
85 static char *hd = NULL;
86
87 char *t, *repl;
88 int size;
89 int offset;
90 int m;
91
92 free(hd);
93 hd = t = malloc(size = strlen(s) + 1);
94 while (*s)
95 if (*s == '\\' && *(s + 1) == '~') {
96 *t++ = *s++;
97 *t++ = *s++;
98 } else if (*s != '~')
99 *t++ = *s++;
100 else {
101 if (!last_repl) {
102 regerr("no previous replacement");
103 return NULL;
104 } else if ((repl = literalize(last_repl)) == NULL)
105 return NULL;
106 m = strlen(repl);
107 offset = t - hd;
108 if ((hd = realloc(hd, size += m)) == NULL) {
109 regerr("out of memory");
110 return NULL;
111 }
112 t = hd + offset;
113 strcpy(t, repl);
114 t += m;
115 s++;
116 }
117 *t = '\0';
118 return hd;
119}
120
121
122/* escape BRE meta-characters and expand ~'s in a string */
123char *
0b18c9d7
AM
124neuter(s)
125 char *s;
126{
99668b43 127 char *literalize();
0b18c9d7
AM
128 static char *hd = NULL;
129
99668b43
AM
130 char *t, *repl;
131 int size;
132 int offset;
133 int m;
0b18c9d7
AM
134
135 free(hd);
99668b43
AM
136 if ((hd = t = (char *) malloc(size = 2 * strlen(s) + 1)) == NULL) {
137 regerr("out of memory");
0b18c9d7 138 return NULL;
99668b43 139 }
0b18c9d7
AM
140 if (*s == '^')
141 *t++ = *s++;
142 while (*s) {
99668b43
AM
143 if (*s == '\\' && (*(s + 1) == '.' || *(s + 1) == '*' ||
144 *(s + 1) == '[')) {
145 s++;
146 *t++ = *s++;
147 continue;
148 } else if (*s == '\\' && *(s + 1) == '~') {
149 if (!last_repl) {
150 regerr("no previous replacement");
151 return NULL;
152 } else if ((repl = literalize(last_repl)) == NULL)
153 return NULL;
154 m = strlen(repl);
155 offset = t - hd;
156 if ((hd = realloc(hd, size += m)) == NULL) {
157 regerr("out of memory");
158 return NULL;
159 }
160 t = hd + (offset);
161 strcpy(t, repl);
162 t += m;
163 s += 2;
164 continue;
165 } else if (*s == '.' || *s == '\\' || *s == '[' || *s == '*')
0b18c9d7
AM
166 *t++ = '\\';
167 *t++ = *s++;
168 }
169 *t = '\0';
170 return hd;
171}
172
99668b43
AM
173
174/* escape BRE meta-characters in a string */
175char *
176literalize(s)
177 char *s;
178{
179 static char *hd = NULL;
180
181 char *t;
182 int size;
183 int offset;
184 int m;
185
186 free(hd);
187 if ((hd = t = (char *) malloc(size = 2 * strlen(s) + 1)) == NULL) {
188 regerr("out of memory");
189 return NULL;
190 }
191 if (*o_magic)
192 while (*s) {
193 if (*s == '~' || *s == '&') {
194 regerr("can't use ~ or & in pattern");
195 return NULL;
196 } else if (*s == '\\') {
197 s++;
198 if (isdigit(*s)) {
199 regerr("can't use \\d in pattern");
200 return NULL;
201 } else if (*s == '&' || *s == '~') {
202 *t++ = '\\';
203 *t++ = *s++;
204 }
205 } else if (*s == '^' || *s == '$' || *s == '.' ||
206 *s == '[' || *s == '*') {
207 *t++ = '\\';
208 *t++ = *s++;
209 } else
210 *t++ = *s++;
211 }
212 else
213 while (*s) {
214 if (*s == '\\') {
215 s++;
216 if (*s == '&' || *s == '~') {
217 regerr("can't use \\~ or \\& in pattern");
218 return NULL;
219 } else if (isdigit(*s)) {
220 regerr("can't use \\d in pattern");
221 return NULL;
222 }
223 } else
224 *t++ = *s++;
225 }
226 *t = '\0';
227 return hd;
228}
229
6e657cf2 230#else
15637ed4
RG
231static char *previous; /* the previous regexp, used when null regexp is given */
232
233
234#ifndef NO_MAGIC
235/* THE REAL REGEXP PACKAGE IS USED UNLESS "NO_MAGIC" IS DEFINED */
236
237/* These are used to classify or recognize meta-characters */
238#define META '\0'
239#define BASE_META(m) ((m) - 256)
240#define INT_META(c) ((c) + 256)
241#define IS_META(m) ((m) >= 256)
242#define IS_CLASS(m) ((m) >= M_CLASS(0) && (m) <= M_CLASS(9))
243#define IS_START(m) ((m) >= M_START(0) && (m) <= M_START(9))
244#define IS_END(m) ((m) >= M_END(0) && (m) <= M_END(9))
245#define IS_CLOSURE(m) ((m) >= M_SPLAT && (m) <= M_RANGE)
246#define ADD_META(s,m) (*(s)++ = META, *(s)++ = BASE_META(m))
247#define GET_META(s) (*(s) == META ? INT_META(*++(s)) : *s)
248
249/* These are the internal codes used for each type of meta-character */
250#define M_BEGLINE 256 /* internal code for ^ */
251#define M_ENDLINE 257 /* internal code for $ */
252#define M_BEGWORD 258 /* internal code for \< */
253#define M_ENDWORD 259 /* internal code for \> */
254#define M_ANY 260 /* internal code for . */
255#define M_SPLAT 261 /* internal code for * */
256#define M_PLUS 262 /* internal code for \+ */
257#define M_QMARK 263 /* internal code for \? */
258#define M_RANGE 264 /* internal code for \{ */
259#define M_CLASS(n) (265+(n)) /* internal code for [] */
260#define M_START(n) (275+(n)) /* internal code for \( */
261#define M_END(n) (285+(n)) /* internal code for \) */
262
263/* These are used during compilation */
264static int class_cnt; /* used to assign class IDs */
265static int start_cnt; /* used to assign start IDs */
266static int end_stk[NSUBEXP];/* used to assign end IDs */
267static int end_sp;
268static char *retext; /* points to the text being compiled */
269
270/* error-handling stuff */
271jmp_buf errorhandler;
6e657cf2 272#define FAIL(why) regerr(why); longjmp(errorhandler, 1)
15637ed4
RG
273
274
275
276
277
278/* This function builds a bitmap for a particular class */
279static char *makeclass(text, bmap)
280 REG char *text; /* start of the class */
281 REG char *bmap; /* the bitmap */
282{
283 REG int i;
284 int complement = 0;
285
286
08746e8b
AM
287 checkmem();
288
15637ed4
RG
289 /* zero the bitmap */
290 for (i = 0; bmap && i < 32; i++)
291 {
292 bmap[i] = 0;
293 }
294
295 /* see if we're going to complement this class */
296 if (*text == '^')
297 {
298 text++;
299 complement = 1;
300 }
301
302 /* add in the characters */
303 while (*text && *text != ']')
304 {
305 /* is this a span of characters? */
306 if (text[1] == '-' && text[2])
307 {
308 /* spans can't be backwards */
309 if (text[0] > text[2])
310 {
311 FAIL("Backwards span in []");
312 }
313
314 /* add each character in the span to the bitmap */
08746e8b 315 for (i = UCHAR(text[0]); bmap && (unsigned)i <= UCHAR(text[2]); i++)
15637ed4
RG
316 {
317 bmap[i >> 3] |= (1 << (i & 7));
318 }
319
320 /* move past this span */
321 text += 3;
322 }
323 else
324 {
325 /* add this single character to the span */
326 i = *text++;
327 if (bmap)
328 {
08746e8b 329 bmap[UCHAR(i) >> 3] |= (1 << (UCHAR(i) & 7));
15637ed4
RG
330 }
331 }
332 }
333
334 /* make sure the closing ] is missing */
335 if (*text++ != ']')
336 {
337 FAIL("] missing");
338 }
339
340 /* if we're supposed to complement this class, then do so */
341 if (complement && bmap)
342 {
343 for (i = 0; i < 32; i++)
344 {
345 bmap[i] = ~bmap[i];
346 }
347 }
348
08746e8b
AM
349 checkmem();
350
15637ed4
RG
351 return text;
352}
353
354
355
356
357/* This function gets the next character or meta character from a string.
358 * The pointer is incremented by 1, or by 2 for \-quoted characters. For [],
359 * a bitmap is generated via makeclass() (if re is given), and the
360 * character-class text is skipped.
361 */
362static int gettoken(sptr, re)
363 char **sptr;
364 regexp *re;
365{
366 int c;
367
368 c = **sptr;
08746e8b
AM
369 if (!c)
370 {
371 return c;
372 }
15637ed4
RG
373 ++*sptr;
374 if (c == '\\')
375 {
376 c = **sptr;
377 ++*sptr;
378 switch (c)
379 {
380 case '<':
381 return M_BEGWORD;
382
383 case '>':
384 return M_ENDWORD;
385
386 case '(':
387 if (start_cnt >= NSUBEXP)
388 {
389 FAIL("Too many \\(s");
390 }
391 end_stk[end_sp++] = start_cnt;
392 return M_START(start_cnt++);
393
394 case ')':
395 if (end_sp <= 0)
396 {
397 FAIL("Mismatched \\)");
398 }
399 return M_END(end_stk[--end_sp]);
400
401 case '*':
402 return (*o_magic ? c : M_SPLAT);
403
404 case '.':
405 return (*o_magic ? c : M_ANY);
406
407 case '+':
408 return M_PLUS;
409
410 case '?':
411 return M_QMARK;
412#ifndef CRUNCH
413 case '{':
414 return M_RANGE;
415#endif
416 default:
417 return c;
418 }
419 }
420 else if (*o_magic)
421 {
422 switch (c)
423 {
424 case '^':
425 if (*sptr == retext + 1)
426 {
427 return M_BEGLINE;
428 }
429 return c;
430
431 case '$':
432 if (!**sptr)
433 {
434 return M_ENDLINE;
435 }
436 return c;
437
438 case '.':
439 return M_ANY;
440
441 case '*':
442 return M_SPLAT;
443
444 case '[':
445 /* make sure we don't have too many classes */
446 if (class_cnt >= 10)
447 {
448 FAIL("Too many []s");
449 }
450
451 /* process the character list for this class */
452 if (re)
453 {
454 /* generate the bitmap for this class */
455 *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt);
456 }
457 else
458 {
459 /* skip to end of the class */
460 *sptr = makeclass(*sptr, (char *)0);
461 }
462 return M_CLASS(class_cnt++);
463
464 default:
465 return c;
466 }
467 }
468 else /* unquoted nomagic */
469 {
470 switch (c)
471 {
472 case '^':
473 if (*sptr == retext + 1)
474 {
475 return M_BEGLINE;
476 }
477 return c;
478
479 case '$':
480 if (!**sptr)
481 {
482 return M_ENDLINE;
483 }
484 return c;
485
486 default:
487 return c;
488 }
489 }
490 /*NOTREACHED*/
491}
492
493
494
495
496/* This function calculates the number of bytes that will be needed for a
497 * compiled RE. Its argument is the uncompiled version. It is not clever
498 * about catching syntax errors; that is done in a later pass.
499 */
500static unsigned calcsize(text)
501 char *text;
502{
503 unsigned size;
504 int token;
505
506 retext = text;
507 class_cnt = 0;
508 start_cnt = 1;
509 end_sp = 0;
510 size = 5;
511 while ((token = gettoken(&text, (regexp *)0)) != 0)
512 {
513 if (IS_CLASS(token))
514 {
515 size += 34;
516 }
517#ifndef CRUNCH
518 else if (token == M_RANGE)
519 {
520 size += 4;
521 while ((token = gettoken(&text, (regexp *)0)) != 0
522 && token != '}')
523 {
524 }
525 if (!token)
526 {
527 return size;
528 }
529 }
530#endif
531 else if (IS_META(token))
532 {
533 size += 2;
534 }
535 else
536 {
537 size++;
538 }
539 }
540
541 return size;
542}
543
544
545
546/* This function compiles a regexp. */
547regexp *regcomp(exp)
548 char *exp;
549{
550 int needfirst;
551 unsigned size;
552 int token;
553 int peek;
554 char *build;
08746e8b
AM
555#if __STDC__
556 volatile
557#endif
15637ed4
RG
558 regexp *re;
559#ifndef CRUNCH
560 int from;
561 int to;
562 int digit;
563#endif
08746e8b
AM
564#ifdef DEBUG
565 int calced;
566#endif
567
15637ed4 568
08746e8b 569 checkmem();
15637ed4
RG
570
571 /* prepare for error handling */
572 re = (regexp *)0;
573 if (setjmp(errorhandler))
574 {
08746e8b 575 checkmem();
15637ed4
RG
576 if (re)
577 {
08746e8b 578 _free_(re);
15637ed4
RG
579 }
580 return (regexp *)0;
581 }
582
583 /* if an empty regexp string was given, use the previous one */
584 if (*exp == 0)
585 {
586 if (!previous)
587 {
588 FAIL("No previous RE");
589 }
590 exp = previous;
591 }
592 else /* non-empty regexp given, so remember it */
593 {
594 if (previous)
08746e8b 595 _free_(previous);
15637ed4
RG
596 previous = (char *)malloc((unsigned)(strlen(exp) + 1));
597 if (previous)
598 strcpy(previous, exp);
599 }
600
601 /* allocate memory */
08746e8b 602 checkmem();
15637ed4
RG
603 class_cnt = 0;
604 start_cnt = 1;
605 end_sp = 0;
606 retext = exp;
08746e8b
AM
607#ifdef DEBUG
608 calced = calcsize(exp);
609 size = calced + sizeof(regexp);
610#else
15637ed4 611 size = calcsize(exp) + sizeof(regexp) + 10; /* !!! 10 bytes for slop */
08746e8b 612#endif
15637ed4 613#ifdef lint
08746e8b 614 re = (regexp *)0;
15637ed4
RG
615#else
616 re = (regexp *)malloc((unsigned)size);
617#endif
618 if (!re)
619 {
620 FAIL("Not enough memory for this RE");
621 }
08746e8b 622 checkmem();
15637ed4
RG
623
624 /* compile it */
625 build = &re->program[1 + 32 * class_cnt];
626 re->program[0] = class_cnt;
627 for (token = 0; token < NSUBEXP; token++)
628 {
629 re->startp[token] = re->endp[token] = (char *)0;
630 }
631 re->first = 0;
632 re->bol = 0;
633 re->minlen = 0;
634 needfirst = 1;
635 class_cnt = 0;
636 start_cnt = 1;
637 end_sp = 0;
638 retext = exp;
639 for (token = M_START(0), peek = gettoken(&exp, re);
640 token;
641 token = peek, peek = gettoken(&exp, re))
642 {
643 /* special processing for the closure operator */
644 if (IS_CLOSURE(peek))
645 {
646 /* detect misuse of closure operator */
647 if (IS_START(token))
648 {
649 FAIL("Closure operator follows nothing");
650 }
651 else if (IS_META(token) && token != M_ANY && !IS_CLASS(token))
652 {
653 FAIL("Closure operators can only follow a normal character or . or []");
654 }
655
656#ifndef CRUNCH
657 /* if \{ \} then read the range */
658 if (peek == M_RANGE)
659 {
660 from = 0;
661 for (digit = gettoken(&exp, re);
662 !IS_META(digit) && isdigit(digit);
663 digit = gettoken(&exp, re))
664 {
665 from = from * 10 + digit - '0';
666 }
667 if (digit == '}')
668 {
669 to = from;
670 }
671 else if (digit == ',')
672 {
673 to = 0;
674 for (digit = gettoken(&exp, re);
675 !IS_META(digit) && isdigit(digit);
676 digit = gettoken(&exp, re))
677 {
678 to = to * 10 + digit - '0';
679 }
680 if (to == 0)
681 {
682 to = 255;
683 }
684 }
685 if (digit != '}')
686 {
687 FAIL("Bad characters after \\{");
688 }
689 else if (to < from || to == 0 || from >= 255)
690 {
691 FAIL("Invalid range for \\{ \\}");
692 }
693 re->minlen += from;
694 }
695 else
696#endif
697 if (peek != M_SPLAT)
698 {
699 re->minlen++;
700 }
701
702 /* it is okay -- make it prefix instead of postfix */
703 ADD_META(build, peek);
704#ifndef CRUNCH
705 if (peek == M_RANGE)
706 {
707 *build++ = from;
708 *build++ = (to < 255 ? to : 255);
709 }
710#endif
711
712
713 /* take care of "needfirst" - is this the first char? */
714 if (needfirst && peek == M_PLUS && !IS_META(token))
715 {
716 re->first = token;
717 }
718 needfirst = 0;
719
720 /* we used "peek" -- need to refill it */
721 peek = gettoken(&exp, re);
722 if (IS_CLOSURE(peek))
723 {
724 FAIL("* or \\+ or \\? doubled up");
725 }
726 }
727 else if (!IS_META(token))
728 {
729 /* normal char is NOT argument of closure */
730 if (needfirst)
731 {
732 re->first = token;
733 needfirst = 0;
734 }
735 re->minlen++;
736 }
737 else if (token == M_ANY || IS_CLASS(token))
738 {
739 /* . or [] is NOT argument of closure */
740 needfirst = 0;
741 re->minlen++;
742 }
743
744 /* the "token" character is not closure -- process it normally */
745 if (token == M_BEGLINE)
746 {
747 /* set the BOL flag instead of storing M_BEGLINE */
748 re->bol = 1;
749 }
750 else if (IS_META(token))
751 {
752 ADD_META(build, token);
753 }
754 else
755 {
756 *build++ = token;
757 }
758 }
08746e8b 759 checkmem();
15637ed4
RG
760
761 /* end it with a \) which MUST MATCH the opening \( */
762 ADD_META(build, M_END(0));
763 if (end_sp > 0)
764 {
765 FAIL("Not enough \\)s");
766 }
767
08746e8b
AM
768#ifdef DEBUG
769 if ((int)(build - re->program) != calced)
770 {
771 msg("regcomp error: calced=%d, actual=%d", calced, (int)(build - re->program));
772 getkey(0);
773 }
774#endif
775
776 checkmem();
15637ed4
RG
777 return re;
778}
779
780
781
782/*---------------------------------------------------------------------------*/
783
784
785/* This function checks for a match between a character and a token which is
786 * known to represent a single character. It returns 0 if they match, or
787 * 1 if they don't.
788 */
789int match1(re, ch, token)
790 regexp *re;
791 REG char ch;
792 REG int token;
793{
794 if (!ch)
795 {
796 /* the end of a line can't match any RE of width 1 */
797 return 1;
798 }
799 if (token == M_ANY)
800 {
801 return 0;
802 }
803 else if (IS_CLASS(token))
804 {
08746e8b 805 if (re->program[1 + 32 * (token - M_CLASS(0)) + (UCHAR(ch) >> 3)] & (1 << (UCHAR(ch) & 7)))
15637ed4
RG
806 return 0;
807 }
808 else if (ch == token || *o_ignorecase && tolower(ch) == tolower(token))
809 {
810 return 0;
811 }
812 return 1;
813}
814
815
816
817/* This function checks characters up to and including the next closure, at
818 * which point it does a recursive call to check the rest of it. This function
819 * returns 0 if everything matches, or 1 if something doesn't match.
820 */
821int match(re, str, prog, here)
822 regexp *re; /* the regular expression */
823 char *str; /* the string */
824 REG char *prog; /* a portion of re->program, an compiled RE */
825 REG char *here; /* a portion of str, the string to compare it to */
826{
827 REG int token; /* the roken pointed to by prog */
828 REG int nmatched;/* counter, used during closure matching */
829 REG int closure;/* the token denoting the type of closure */
830 int from; /* minimum number of matches in closure */
831 int to; /* maximum number of matches in closure */
832
833 for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog))
834 {
835 switch (token)
836 {
837 /*case M_BEGLINE: can't happen; re->bol is used instead */
838 case M_ENDLINE:
839 if (*here)
840 return 1;
841 break;
842
843 case M_BEGWORD:
844 if (here != str &&
845 (here[-1] == '_' || isalnum(here[-1])))
846 return 1;
847 break;
848
849 case M_ENDWORD:
850 if (here[0] == '_' || isalnum(here[0]))
851 return 1;
852 break;
853
854 case M_START(0):
855 case M_START(1):
856 case M_START(2):
857 case M_START(3):
858 case M_START(4):
859 case M_START(5):
860 case M_START(6):
861 case M_START(7):
862 case M_START(8):
863 case M_START(9):
864 re->startp[token - M_START(0)] = (char *)here;
865 break;
866
867 case M_END(0):
868 case M_END(1):
869 case M_END(2):
870 case M_END(3):
871 case M_END(4):
872 case M_END(5):
873 case M_END(6):
874 case M_END(7):
875 case M_END(8):
876 case M_END(9):
877 re->endp[token - M_END(0)] = (char *)here;
878 if (token == M_END(0))
879 {
880 return 0;
881 }
882 break;
883
884 default: /* literal, M_CLASS(n), or M_ANY */
885 if (match1(re, *here, token) != 0)
886 return 1;
887 here++;
888 }
889 }
890
891 /* C L O S U R E */
892
893 /* step 1: see what we have to match against, and move "prog" to point
894 * to the remainder of the compiled RE.
895 */
896 closure = token;
897 prog++;
898 switch (closure)
899 {
900 case M_SPLAT:
901 from = 0;
902 to = strlen(str); /* infinity */
903 break;
904
905 case M_PLUS:
906 from = 1;
907 to = strlen(str); /* infinity */
908 break;
909
910 case M_QMARK:
911 from = 0;
912 to = 1;
913 break;
914
915#ifndef CRUNCH
916 case M_RANGE:
917 from = UCHAR(*prog++);
918 to = UCHAR(*prog++);
919 if (to == 255)
920 {
921 to = strlen(str); /* infinity */
922 }
923 break;
924#endif
925 }
926 token = GET_META(prog);
927 prog++;
928
929 /* step 2: see how many times we can match that token against the string */
930 for (nmatched = 0;
931 nmatched < to && *here && match1(re, *here, token) == 0;
932 nmatched++, here++)
933 {
934 }
935
936 /* step 3: try to match the remainder, and back off if it doesn't */
937 while (nmatched >= from && match(re, str, prog, here) != 0)
938 {
939 nmatched--;
940 here--;
941 }
942
943 /* so how did it work out? */
944 if (nmatched >= from)
945 return 0;
946 return 1;
947}
948
949
950
951/* This function searches through a string for text that matches an RE. */
952int regexec(re, str, bol)
953 regexp *re; /* the compiled regexp to search for */
954 char *str; /* the string to search through */
955 int bol; /* boolean: does str start at the beginning of a line? */
956{
957 char *prog; /* the entry point of re->program */
958 int len; /* length of the string */
959 REG char *here;
960
08746e8b
AM
961 checkmem();
962
15637ed4
RG
963 /* if must start at the beginning of a line, and this isn't, then fail */
964 if (re->bol && !bol)
965 {
966 return 0;
967 }
968
969 len = strlen(str);
970 prog = re->program + 1 + 32 * re->program[0];
971
972 /* search for the RE in the string */
973 if (re->bol)
974 {
975 /* must occur at BOL */
976 if ((re->first
977 && match1(re, *(char *)str, re->first))/* wrong first letter? */
978 || len < re->minlen /* not long enough? */
979 || match(re, (char *)str, prog, str)) /* doesn't match? */
980 return 0; /* THEN FAIL! */
981 }
982#ifndef CRUNCH
983 else if (!*o_ignorecase)
984 {
985 /* can occur anywhere in the line, noignorecase */
986 for (here = (char *)str;
987 (re->first && re->first != *here)
988 || match(re, (char *)str, prog, here);
989 here++, len--)
990 {
991 if (len < re->minlen)
992 return 0;
993 }
994 }
995#endif
996 else
997 {
998 /* can occur anywhere in the line, ignorecase */
999 for (here = (char *)str;
1000 (re->first && match1(re, *here, (int)re->first))
1001 || match(re, (char *)str, prog, here);
1002 here++, len--)
1003 {
1004 if (len < re->minlen)
1005 return 0;
1006 }
1007 }
1008
1009 /* if we didn't fail, then we must have succeeded */
08746e8b 1010 checkmem();
15637ed4
RG
1011 return 1;
1012}
1013
1014/*============================================================================*/
1015#else /* NO_MAGIC */
1016
1017regexp *regcomp(exp)
1018 char *exp;
1019{
1020 char *src;
1021 char *dest;
1022 regexp *re;
1023 int i;
1024
1025 /* allocate a big enough regexp structure */
1026#ifdef lint
1027 re = (regexp *)0;
1028#else
1029 re = (regexp *)malloc((unsigned)(strlen(exp) + 1 + sizeof(struct regexp)));
1030#endif
1031 if (!re)
1032 {
6e657cf2 1033 regerr("Could not malloc a regexp structure");
15637ed4
RG
1034 return (regexp *)0;
1035 }
1036
1037 /* initialize all fields of the structure */
1038 for (i = 0; i < NSUBEXP; i++)
1039 {
1040 re->startp[i] = re->endp[i] = (char *)0;
1041 }
1042 re->minlen = 0;
1043 re->first = 0;
1044 re->bol = 0;
1045
1046 /* copy the string into it, translating ^ and $ as needed */
1047 for (src = exp, dest = re->program + 1; *src; src++)
1048 {
1049 switch (*src)
1050 {
1051 case '^':
1052 if (src == exp)
1053 {
1054 re->bol += 1;
1055 }
1056 else
1057 {
1058 *dest++ = '^';
1059 re->minlen++;
1060 }
1061 break;
1062
1063 case '$':
1064 if (!src[1])
1065 {
1066 re->bol += 2;
1067 }
1068 else
1069 {
1070 *dest++ = '$';
1071 re->minlen++;
1072 }
1073 break;
1074
1075 case '\\':
1076 if (src[1])
1077 {
1078 *dest++ = *++src;
1079 re->minlen++;
1080 }
1081 else
1082 {
6e657cf2 1083 regerr("extra \\ at end of regular expression");
15637ed4
RG
1084 }
1085 break;
1086
1087 default:
1088 *dest++ = *src;
1089 re->minlen++;
1090 }
1091 }
1092 *dest = '\0';
1093
1094 return re;
1095}
1096
1097
1098/* This "helper" function checks for a match at a given location. It returns
1099 * 1 if it matches, 0 if it doesn't match here but might match later on in the
1100 * string, or -1 if it could not possibly match
1101 */
1102static int reghelp(prog, string, bolflag)
1103 struct regexp *prog;
1104 char *string;
1105 int bolflag;
1106{
1107 char *scan;
1108 char *str;
1109
1110 /* if ^, then require bolflag */
1111 if ((prog->bol & 1) && !bolflag)
1112 {
1113 return -1;
1114 }
1115
1116 /* if it matches, then it will start here */
1117 prog->startp[0] = string;
1118
1119 /* compare, possibly ignoring case */
1120 if (*o_ignorecase)
1121 {
1122 for (scan = &prog->program[1]; *scan; scan++, string++)
1123 if (tolower(*scan) != tolower(*string))
1124 return *string ? 0 : -1;
1125 }
1126 else
1127 {
1128 for (scan = &prog->program[1]; *scan; scan++, string++)
1129 if (*scan != *string)
1130 return *string ? 0 : -1;
1131 }
1132
1133 /* if $, then require string to end here, too */
1134 if ((prog->bol & 2) && *string)
1135 {
1136 return 0;
1137 }
1138
1139 /* if we get to here, it matches */
1140 prog->endp[0] = string;
1141 return 1;
1142}
1143
1144
1145
1146int regexec(prog, string, bolflag)
1147 struct regexp *prog;
1148 char *string;
1149 int bolflag;
1150{
1151 int rc;
1152
1153 /* keep trying to match it */
1154 for (rc = reghelp(prog, string, bolflag); rc == 0; rc = reghelp(prog, string, 0))
1155 {
1156 string++;
1157 }
1158
1159 /* did we match? */
1160 return rc == 1;
1161}
1162#endif
6e657cf2 1163#endif /* !REGEX */