This commit was generated by cvs2svn to track changes on a CVS vendor
[unix-history] / gnu / usr.bin / grep / regex.c
CommitLineData
d35a7625
NW
1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18
19 In other words, you are welcome to use, share and improve this program.
20 You are forbidden to forbid anyone else to use, share and improve
21 what you give them. Help stamp out software-hoarding! */
22
23
24/* To test, compile with -Dtest.
25 This Dtestable feature turns this into a self-contained program
26 which reads a pattern, describes how it compiles,
27 then reads a string and searches for it. */
28
29/* AIX requires this to be the first thing in the file. */
30#ifdef __GNUC__
31#undef alloca
32#define alloca __builtin_alloca
33#else /* not __GNUC__ */
34#if defined(sparc) && !defined(USG) && !defined(SVR4) && !defined(__svr4__)
35#include <alloca.h>
36#else
37#ifdef _AIX
38 #pragma alloca
39#else
40char *alloca ();
41#endif
42#endif /* sparc */
43#endif /* not __GNUC__ */
44
45
46#ifdef emacs
47
48/* The `emacs' switch turns on certain special matching commands
49 that make sense only in emacs. */
50
51#include "config.h"
52#include "lisp.h"
53#include "buffer.h"
54#include "syntax.h"
55
56#else /* not emacs */
57
58#if defined(USG) || defined(STDC_HEADERS)
59#include <string.h>
60#ifndef bcopy
61#define bcopy(s,d,n) memcpy((d),(s),(n))
62#endif
63#ifndef bcmp
64#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
65#endif
66#ifndef bzero
67#define bzero(s,n) memset((s),0,(n))
68#endif
69#endif
70
71#ifdef STDC_HEADERS
72#include <stdlib.h>
73#else
74char *malloc ();
75#endif
76
77/*
78 * Define the syntax stuff, so we can do the \<...\> things.
79 */
80
81#ifndef Sword /* must be non-zero in some of the tests below... */
82#define Sword 1
83#endif
84
85#define SYNTAX(c) re_syntax_table[c]
86
87#ifdef SYNTAX_TABLE
88
89char *re_syntax_table;
90
91#else
92
93static char re_syntax_table[256];
94
95static void
96init_syntax_once ()
97{
98 register int c;
99 static int done = 0;
100
101 if (done)
102 return;
103
104 bzero (re_syntax_table, sizeof re_syntax_table);
105
106 for (c = 'a'; c <= 'z'; c++)
107 re_syntax_table[c] = Sword;
108
109 for (c = 'A'; c <= 'Z'; c++)
110 re_syntax_table[c] = Sword;
111
112 for (c = '0'; c <= '9'; c++)
113 re_syntax_table[c] = Sword;
114
115 done = 1;
116}
117
118#endif /* SYNTAX_TABLE */
119#endif /* not emacs */
120
121#include "regex.h"
122
123/* Number of failure points to allocate space for initially,
124 when matching. If this number is exceeded, more space is allocated,
125 so it is not a hard limit. */
126
127#ifndef NFAILURES
128#define NFAILURES 80
129#endif /* NFAILURES */
130
131/* width of a byte in bits */
132
133#define BYTEWIDTH 8
134
135#ifndef SIGN_EXTEND_CHAR
136#ifdef __CHAR_UNSIGNED__
137#define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c))
138#else
139#define SIGN_EXTEND_CHAR(x) (x)
140#endif
141#endif
142\f
143static int obscure_syntax = 0;
144
145/* Specify the precise syntax of regexp for compilation.
146 This provides for compatibility for various utilities
147 which historically have different, incompatible syntaxes.
148
149 The argument SYNTAX is a bit-mask containing the two bits
150 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
151
152int
153re_set_syntax (syntax)
154 int syntax;
155{
156 int ret;
157
158 ret = obscure_syntax;
159 obscure_syntax = syntax;
160 return ret;
161}
162\f
163/* re_compile_pattern takes a regular-expression string
164 and converts it into a buffer full of byte commands for matching.
165
166 PATTERN is the address of the pattern string
167 SIZE is the length of it.
168 BUFP is a struct re_pattern_buffer * which points to the info
169 on where to store the byte commands.
170 This structure contains a char * which points to the
171 actual space, which should have been obtained with malloc.
172 re_compile_pattern may use realloc to grow the buffer space.
173
174 The number of bytes of commands can be found out by looking in
175 the struct re_pattern_buffer that bufp pointed to,
176 after re_compile_pattern returns.
177*/
178
179#define PATPUSH(ch) (*b++ = (char) (ch))
180
181#define PATFETCH(c) \
182 {if (p == pend) goto end_of_pattern; \
183 c = * (unsigned char *) p++; \
184 if (translate) c = translate[c]; }
185
186#define PATFETCH_RAW(c) \
187 {if (p == pend) goto end_of_pattern; \
188 c = * (unsigned char *) p++; }
189
190#define PATUNFETCH p--
191
192#define EXTEND_BUFFER \
193 { char *old_buffer = bufp->buffer; \
194 if (bufp->allocated == (1<<16)) goto too_big; \
195 bufp->allocated *= 2; \
196 if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
197 if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
198 goto memory_exhausted; \
199 c = bufp->buffer - old_buffer; \
200 b += c; \
201 if (fixup_jump) \
202 fixup_jump += c; \
203 if (laststart) \
204 laststart += c; \
205 begalt += c; \
206 if (pending_exact) \
207 pending_exact += c; \
208 }
209
210static void store_jump (), insert_jump ();
211
212char *
213re_compile_pattern (pattern, size, bufp)
214 char *pattern;
215 int size;
216 struct re_pattern_buffer *bufp;
217{
218 register char *b = bufp->buffer;
219 register char *p = pattern;
220 char *pend = pattern + size;
221 register unsigned c, c1;
222 char *p1;
223 unsigned char *translate = (unsigned char *) bufp->translate;
224
225 /* address of the count-byte of the most recently inserted "exactn" command.
226 This makes it possible to tell whether a new exact-match character
227 can be added to that command or requires a new "exactn" command. */
228
229 char *pending_exact = 0;
230
231 /* address of the place where a forward-jump should go
232 to the end of the containing expression.
233 Each alternative of an "or", except the last, ends with a forward-jump
234 of this sort. */
235
236 char *fixup_jump = 0;
237
238 /* address of start of the most recently finished expression.
239 This tells postfix * where to find the start of its operand. */
240
241 char *laststart = 0;
242
243 /* In processing a repeat, 1 means zero matches is allowed */
244
245 char zero_times_ok;
246
247 /* In processing a repeat, 1 means many matches is allowed */
248
249 char many_times_ok;
250
251 /* address of beginning of regexp, or inside of last \( */
252
253 char *begalt = b;
254
255 /* Stack of information saved by \( and restored by \).
256 Four stack elements are pushed by each \(:
257 First, the value of b.
258 Second, the value of fixup_jump.
259 Third, the value of regnum.
260 Fourth, the value of begalt. */
261
262 int stackb[40];
263 int *stackp = stackb;
264 int *stacke = stackb + 40;
265 int *stackt;
266
267 /* Counts \('s as they are encountered. Remembered for the matching \),
268 where it becomes the "register number" to put in the stop_memory command */
269
270 int regnum = 1;
271
272 bufp->fastmap_accurate = 0;
273
274#ifndef emacs
275#ifndef SYNTAX_TABLE
276 /*
277 * Initialize the syntax table.
278 */
279 init_syntax_once();
280#endif
281#endif
282
283 if (bufp->allocated == 0)
284 {
285 bufp->allocated = 28;
286 if (bufp->buffer)
287 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
288 bufp->buffer = (char *) realloc (bufp->buffer, 28);
289 else
290 /* Caller did not allocate a buffer. Do it for him */
291 bufp->buffer = (char *) malloc (28);
292 if (!bufp->buffer) goto memory_exhausted;
293 begalt = b = bufp->buffer;
294 }
295
296 while (p != pend)
297 {
298 if (b - bufp->buffer > bufp->allocated - 10)
299 /* Note that EXTEND_BUFFER clobbers c */
300 EXTEND_BUFFER;
301
302 PATFETCH (c);
303
304 switch (c)
305 {
306 case '$':
307 if (obscure_syntax & RE_TIGHT_VBAR)
308 {
309 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
310 goto normal_char;
311 /* Make operand of last vbar end before this `$'. */
312 if (fixup_jump)
313 store_jump (fixup_jump, jump, b);
314 fixup_jump = 0;
315 PATPUSH (endline);
316 break;
317 }
318
319 /* $ means succeed if at end of line, but only in special contexts.
320 If randomly in the middle of a pattern, it is a normal character. */
321 if (p == pend || *p == '\n'
322 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
323 || (obscure_syntax & RE_NO_BK_PARENS
324 ? *p == ')'
325 : *p == '\\' && p[1] == ')')
326 || (obscure_syntax & RE_NO_BK_VBAR
327 ? *p == '|'
328 : *p == '\\' && p[1] == '|'))
329 {
330 PATPUSH (endline);
331 break;
332 }
333 goto normal_char;
334
335 case '^':
336 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
337
338 if (laststart && p[-2] != '\n'
339 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
340 goto normal_char;
341 if (obscure_syntax & RE_TIGHT_VBAR)
342 {
343 if (p != pattern + 1
344 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
345 goto normal_char;
346 PATPUSH (begline);
347 begalt = b;
348 }
349 else
350 PATPUSH (begline);
351 break;
352
353 case '+':
354 case '?':
355 if (obscure_syntax & RE_BK_PLUS_QM)
356 goto normal_char;
357 handle_plus:
358 case '*':
359 /* If there is no previous pattern, char not special. */
360 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
361 goto normal_char;
362 /* If there is a sequence of repetition chars,
363 collapse it down to equivalent to just one. */
364 zero_times_ok = 0;
365 many_times_ok = 0;
366 while (1)
367 {
368 zero_times_ok |= c != '+';
369 many_times_ok |= c != '?';
370 if (p == pend)
371 break;
372 PATFETCH (c);
373 if (c == '*')
374 ;
375 else if (!(obscure_syntax & RE_BK_PLUS_QM)
376 && (c == '+' || c == '?'))
377 ;
378 else if ((obscure_syntax & RE_BK_PLUS_QM)
379 && c == '\\')
380 {
381 int c1;
382 PATFETCH (c1);
383 if (!(c1 == '+' || c1 == '?'))
384 {
385 PATUNFETCH;
386 PATUNFETCH;
387 break;
388 }
389 c = c1;
390 }
391 else
392 {
393 PATUNFETCH;
394 break;
395 }
396 }
397
398 /* Star, etc. applied to an empty pattern is equivalent
399 to an empty pattern. */
400 if (!laststart)
401 break;
402
403 /* Now we know whether 0 matches is allowed,
404 and whether 2 or more matches is allowed. */
405 if (many_times_ok)
406 {
407 /* If more than one repetition is allowed,
408 put in a backward jump at the end. */
409 store_jump (b, maybe_finalize_jump, laststart - 3);
410 b += 3;
411 }
412 insert_jump (on_failure_jump, laststart, b + 3, b);
413 pending_exact = 0;
414 b += 3;
415 if (!zero_times_ok)
416 {
417 /* At least one repetition required: insert before the loop
418 a skip over the initial on-failure-jump instruction */
419 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
420 b += 3;
421 }
422 break;
423
424 case '.':
425 laststart = b;
426 PATPUSH (anychar);
427 break;
428
429 case '[':
430 while (b - bufp->buffer
431 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
432 /* Note that EXTEND_BUFFER clobbers c */
433 EXTEND_BUFFER;
434
435 laststart = b;
436 if (*p == '^')
437 PATPUSH (charset_not), p++;
438 else
439 PATPUSH (charset);
440 p1 = p;
441
442 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
443 /* Clear the whole map */
444 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
445 /* Read in characters and ranges, setting map bits */
446 while (1)
447 {
448 PATFETCH (c);
449 if (c == ']' && p != p1 + 1) break;
450 if (*p == '-' && p[1] != ']')
451 {
452 PATFETCH (c1);
453 PATFETCH (c1);
454 while (c <= c1)
455 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
456 }
457 else
458 {
459 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
460 }
461 }
462 /* Discard any bitmap bytes that are all 0 at the end of the map.
463 Decrement the map-length byte too. */
464 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
465 b[-1]--;
466 b += b[-1];
467 break;
468
469 case '(':
470 if (! (obscure_syntax & RE_NO_BK_PARENS))
471 goto normal_char;
472 else
473 goto handle_open;
474
475 case ')':
476 if (! (obscure_syntax & RE_NO_BK_PARENS))
477 goto normal_char;
478 else
479 goto handle_close;
480
481 case '\n':
482 if (! (obscure_syntax & RE_NEWLINE_OR))
483 goto normal_char;
484 else
485 goto handle_bar;
486
487 case '|':
488 if (! (obscure_syntax & RE_NO_BK_VBAR))
489 goto normal_char;
490 else
491 goto handle_bar;
492
493 case '\\':
494 if (p == pend) goto invalid_pattern;
495 PATFETCH_RAW (c);
496 switch (c)
497 {
498 case '(':
499 if (obscure_syntax & RE_NO_BK_PARENS)
500 goto normal_backsl;
501 handle_open:
502 if (stackp == stacke) goto nesting_too_deep;
503 if (regnum < RE_NREGS)
504 {
505 PATPUSH (start_memory);
506 PATPUSH (regnum);
507 }
508 *stackp++ = b - bufp->buffer;
509 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
510 *stackp++ = regnum++;
511 *stackp++ = begalt - bufp->buffer;
512 fixup_jump = 0;
513 laststart = 0;
514 begalt = b;
515 break;
516
517 case ')':
518 if (obscure_syntax & RE_NO_BK_PARENS)
519 goto normal_backsl;
520 handle_close:
521 if (stackp == stackb) goto unmatched_close;
522 begalt = *--stackp + bufp->buffer;
523 if (fixup_jump)
524 store_jump (fixup_jump, jump, b);
525 if (stackp[-1] < RE_NREGS)
526 {
527 PATPUSH (stop_memory);
528 PATPUSH (stackp[-1]);
529 }
530 stackp -= 2;
531 fixup_jump = 0;
532 if (*stackp)
533 fixup_jump = *stackp + bufp->buffer - 1;
534 laststart = *--stackp + bufp->buffer;
535 break;
536
537 case '|':
538 if (obscure_syntax & RE_NO_BK_VBAR)
539 goto normal_backsl;
540 handle_bar:
541 insert_jump (on_failure_jump, begalt, b + 6, b);
542 pending_exact = 0;
543 b += 3;
544 if (fixup_jump)
545 store_jump (fixup_jump, jump, b);
546 fixup_jump = b;
547 b += 3;
548 laststart = 0;
549 begalt = b;
550 break;
551
552#ifdef emacs
553 case '=':
554 PATPUSH (at_dot);
555 break;
556
557 case 's':
558 laststart = b;
559 PATPUSH (syntaxspec);
560 PATFETCH (c);
561 PATPUSH (syntax_spec_code[c]);
562 break;
563
564 case 'S':
565 laststart = b;
566 PATPUSH (notsyntaxspec);
567 PATFETCH (c);
568 PATPUSH (syntax_spec_code[c]);
569 break;
570#endif /* emacs */
571
572 case 'w':
573 laststart = b;
574 PATPUSH (wordchar);
575 break;
576
577 case 'W':
578 laststart = b;
579 PATPUSH (notwordchar);
580 break;
581
582 case '<':
583 PATPUSH (wordbeg);
584 break;
585
586 case '>':
587 PATPUSH (wordend);
588 break;
589
590 case 'b':
591 PATPUSH (wordbound);
592 break;
593
594 case 'B':
595 PATPUSH (notwordbound);
596 break;
597
598 case '`':
599 PATPUSH (begbuf);
600 break;
601
602 case '\'':
603 PATPUSH (endbuf);
604 break;
605
606 case '1':
607 case '2':
608 case '3':
609 case '4':
610 case '5':
611 case '6':
612 case '7':
613 case '8':
614 case '9':
615 c1 = c - '0';
616 if (c1 >= regnum)
617 goto normal_char;
618 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
619 if (*stackt == c1)
620 goto normal_char;
621 laststart = b;
622 PATPUSH (duplicate);
623 PATPUSH (c1);
624 break;
625
626 case '+':
627 case '?':
628 if (obscure_syntax & RE_BK_PLUS_QM)
629 goto handle_plus;
630
631 default:
632 normal_backsl:
633 /* You might think it would be useful for \ to mean
634 not to translate; but if we don't translate it
635 it will never match anything. */
636 if (translate) c = translate[c];
637 goto normal_char;
638 }
639 break;
640
641 default:
642 normal_char:
643 if (!pending_exact || pending_exact + *pending_exact + 1 != b
644 || *pending_exact == 0177 || *p == '*' || *p == '^'
645 || ((obscure_syntax & RE_BK_PLUS_QM)
646 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
647 : (*p == '+' || *p == '?')))
648 {
649 laststart = b;
650 PATPUSH (exactn);
651 pending_exact = b;
652 PATPUSH (0);
653 }
654 PATPUSH (c);
655 (*pending_exact)++;
656 }
657 }
658
659 if (fixup_jump)
660 store_jump (fixup_jump, jump, b);
661
662 if (stackp != stackb) goto unmatched_open;
663
664 bufp->used = b - bufp->buffer;
665 return 0;
666
667 invalid_pattern:
668 return "Invalid regular expression";
669
670 unmatched_open:
671 return "Unmatched \\(";
672
673 unmatched_close:
674 return "Unmatched \\)";
675
676 end_of_pattern:
677 return "Premature end of regular expression";
678
679 nesting_too_deep:
680 return "Nesting too deep";
681
682 too_big:
683 return "Regular expression too big";
684
685 memory_exhausted:
686 return "Memory exhausted";
687}
688
689/* Store where `from' points a jump operation to jump to where `to' points.
690 `opcode' is the opcode to store. */
691
692static void
693store_jump (from, opcode, to)
694 char *from, *to;
695 char opcode;
696{
697 from[0] = opcode;
698 from[1] = (to - (from + 3)) & 0377;
699 from[2] = (to - (from + 3)) >> 8;
700}
701
702/* Open up space at char FROM, and insert there a jump to TO.
703 CURRENT_END gives te end of the storage no in use,
704 so we know how much data to copy up.
705 OP is the opcode of the jump to insert.
706
707 If you call this function, you must zero out pending_exact. */
708
709static void
710insert_jump (op, from, to, current_end)
711 char op;
712 char *from, *to, *current_end;
713{
714 register char *pto = current_end + 3;
715 register char *pfrom = current_end;
716 while (pfrom != from)
717 *--pto = *--pfrom;
718 store_jump (from, op, to);
719}
720\f
721/* Given a pattern, compute a fastmap from it.
722 The fastmap records which of the (1 << BYTEWIDTH) possible characters
723 can start a string that matches the pattern.
724 This fastmap is used by re_search to skip quickly over totally implausible text.
725
726 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
727 as bufp->fastmap.
728 The other components of bufp describe the pattern to be used. */
729
730void
731re_compile_fastmap (bufp)
732 struct re_pattern_buffer *bufp;
733{
734 unsigned char *pattern = (unsigned char *) bufp->buffer;
735 int size = bufp->used;
736 register char *fastmap = bufp->fastmap;
737 register unsigned char *p = pattern;
738 register unsigned char *pend = pattern + size;
739 register int j, k;
740 unsigned char *translate = (unsigned char *) bufp->translate;
741
742 unsigned char *stackb[NFAILURES];
743 unsigned char **stackp = stackb;
744
745 bzero (fastmap, (1 << BYTEWIDTH));
746 bufp->fastmap_accurate = 1;
747 bufp->can_be_null = 0;
748
749 while (p)
750 {
751 if (p == pend)
752 {
753 bufp->can_be_null = 1;
754 break;
755 }
756#ifdef SWITCH_ENUM_BUG
757 switch ((int) ((enum regexpcode) *p++))
758#else
759 switch ((enum regexpcode) *p++)
760#endif
761 {
762 case exactn:
763 if (translate)
764 fastmap[translate[p[1]]] = 1;
765 else
766 fastmap[p[1]] = 1;
767 break;
768
769 case begline:
770 case before_dot:
771 case at_dot:
772 case after_dot:
773 case begbuf:
774 case endbuf:
775 case wordbound:
776 case notwordbound:
777 case wordbeg:
778 case wordend:
779 continue;
780
781 case endline:
782 if (translate)
783 fastmap[translate['\n']] = 1;
784 else
785 fastmap['\n'] = 1;
786 if (bufp->can_be_null != 1)
787 bufp->can_be_null = 2;
788 break;
789
790 case finalize_jump:
791 case maybe_finalize_jump:
792 case jump:
793 case dummy_failure_jump:
794 bufp->can_be_null = 1;
795 j = *p++ & 0377;
796 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
797 p += j + 1; /* The 1 compensates for missing ++ above */
798 if (j > 0)
799 continue;
800 /* Jump backward reached implies we just went through
801 the body of a loop and matched nothing.
802 Opcode jumped to should be an on_failure_jump.
803 Just treat it like an ordinary jump.
804 For a * loop, it has pushed its failure point already;
805 if so, discard that as redundant. */
806 if ((enum regexpcode) *p != on_failure_jump)
807 continue;
808 p++;
809 j = *p++ & 0377;
810 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
811 p += j + 1; /* The 1 compensates for missing ++ above */
812 if (stackp != stackb && *stackp == p)
813 stackp--;
814 continue;
815
816 case on_failure_jump:
817 j = *p++ & 0377;
818 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
819 p++;
820 *++stackp = p + j;
821 continue;
822
823 case start_memory:
824 case stop_memory:
825 p++;
826 continue;
827
828 case duplicate:
829 bufp->can_be_null = 1;
830 fastmap['\n'] = 1;
831 case anychar:
832 for (j = 0; j < (1 << BYTEWIDTH); j++)
833 if (j != '\n')
834 fastmap[j] = 1;
835 if (bufp->can_be_null)
836 return;
837 /* Don't return; check the alternative paths
838 so we can set can_be_null if appropriate. */
839 break;
840
841 case wordchar:
842 for (j = 0; j < (1 << BYTEWIDTH); j++)
843 if (SYNTAX (j) == Sword)
844 fastmap[j] = 1;
845 break;
846
847 case notwordchar:
848 for (j = 0; j < (1 << BYTEWIDTH); j++)
849 if (SYNTAX (j) != Sword)
850 fastmap[j] = 1;
851 break;
852
853#ifdef emacs
854 case syntaxspec:
855 k = *p++;
856 for (j = 0; j < (1 << BYTEWIDTH); j++)
857 if (SYNTAX (j) == (enum syntaxcode) k)
858 fastmap[j] = 1;
859 break;
860
861 case notsyntaxspec:
862 k = *p++;
863 for (j = 0; j < (1 << BYTEWIDTH); j++)
864 if (SYNTAX (j) != (enum syntaxcode) k)
865 fastmap[j] = 1;
866 break;
867#endif /* emacs */
868
869 case charset:
870 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
871 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
872 {
873 if (translate)
874 fastmap[translate[j]] = 1;
875 else
876 fastmap[j] = 1;
877 }
878 break;
879
880 case charset_not:
881 /* Chars beyond end of map must be allowed */
882 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
883 if (translate)
884 fastmap[translate[j]] = 1;
885 else
886 fastmap[j] = 1;
887
888 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
889 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
890 {
891 if (translate)
892 fastmap[translate[j]] = 1;
893 else
894 fastmap[j] = 1;
895 }
896 break;
897
898 default:
899 break;
900 }
901
902 /* Get here means we have successfully found the possible starting characters
903 of one path of the pattern. We need not follow this path any farther.
904 Instead, look at the next alternative remembered in the stack. */
905 if (stackp != stackb)
906 p = *stackp--;
907 else
908 break;
909 }
910}
911\f
912/* Like re_search_2, below, but only one string is specified. */
913
914int
915re_search (pbufp, string, size, startpos, range, regs)
916 struct re_pattern_buffer *pbufp;
917 char *string;
918 int size, startpos, range;
919 struct re_registers *regs;
920{
921 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
922}
923
924/* Like re_match_2 but tries first a match starting at index STARTPOS,
925 then at STARTPOS + 1, and so on.
926 RANGE is the number of places to try before giving up.
927 If RANGE is negative, the starting positions tried are
928 STARTPOS, STARTPOS - 1, etc.
929 It is up to the caller to make sure that range is not so large
930 as to take the starting position outside of the input strings.
931
932The value returned is the position at which the match was found,
933 or -1 if no match was found,
934 or -2 if error (such as failure stack overflow). */
935
936int
937re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
938 struct re_pattern_buffer *pbufp;
939 char *string1, *string2;
940 int size1, size2;
941 int startpos;
942 register int range;
943 struct re_registers *regs;
944 int mstop;
945{
946 register char *fastmap = pbufp->fastmap;
947 register unsigned char *translate = (unsigned char *) pbufp->translate;
948 int total = size1 + size2;
949 int val;
950
951 /* Update the fastmap now if not correct already */
952 if (fastmap && !pbufp->fastmap_accurate)
953 re_compile_fastmap (pbufp);
954
955 /* Don't waste time in a long search for a pattern
956 that says it is anchored. */
957 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
958 && range > 0)
959 {
960 if (startpos > 0)
961 return -1;
962 else
963 range = 1;
964 }
965
966 while (1)
967 {
968 /* If a fastmap is supplied, skip quickly over characters
969 that cannot possibly be the start of a match.
970 Note, however, that if the pattern can possibly match
971 the null string, we must test it at each starting point
972 so that we take the first null string we get. */
973
974 if (fastmap && startpos < total && pbufp->can_be_null != 1)
975 {
976 if (range > 0)
977 {
978 register int lim = 0;
979 register unsigned char *p;
980 int irange = range;
981 if (startpos < size1 && startpos + range >= size1)
982 lim = range - (size1 - startpos);
983
984 p = ((unsigned char *)
985 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
986
987 if (translate)
988 {
989 while (range > lim && !fastmap[translate[*p++]])
990 range--;
991 }
992 else
993 {
994 while (range > lim && !fastmap[*p++])
995 range--;
996 }
997 startpos += irange - range;
998 }
999 else
1000 {
1001 register unsigned char c;
1002 if (startpos >= size1)
1003 c = string2[startpos - size1];
1004 else
1005 c = string1[startpos];
1006 c &= 0xff;
1007 if (translate ? !fastmap[translate[c]] : !fastmap[c])
1008 goto advance;
1009 }
1010 }
1011
1012 if (range >= 0 && startpos == total
1013 && fastmap && pbufp->can_be_null == 0)
1014 return -1;
1015
1016 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
1017 if (0 <= val)
1018 {
1019 if (val == -2)
1020 return -2;
1021 return startpos;
1022 }
1023
1024#ifdef C_ALLOCA
1025 alloca (0);
1026#endif /* C_ALLOCA */
1027
1028 advance:
1029 if (!range) break;
1030 if (range > 0) range--, startpos++; else range++, startpos--;
1031 }
1032 return -1;
1033}
1034\f
1035#ifndef emacs /* emacs never uses this */
1036int
1037re_match (pbufp, string, size, pos, regs)
1038 struct re_pattern_buffer *pbufp;
1039 char *string;
1040 int size, pos;
1041 struct re_registers *regs;
1042{
1043 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1044}
1045#endif /* emacs */
1046
1047/* Maximum size of failure stack. Beyond this, overflow is an error. */
1048
1049int re_max_failures = 2000;
1050
1051static int bcmp_translate();
1052/* Match the pattern described by PBUFP
1053 against data which is the virtual concatenation of STRING1 and STRING2.
1054 SIZE1 and SIZE2 are the sizes of the two data strings.
1055 Start the match at position POS.
1056 Do not consider matching past the position MSTOP.
1057
1058 If pbufp->fastmap is nonzero, then it had better be up to date.
1059
1060 The reason that the data to match are specified as two components
1061 which are to be regarded as concatenated
1062 is so this function can be used directly on the contents of an Emacs buffer.
1063
1064 -1 is returned if there is no match. -2 is returned if there is
1065 an error (such as match stack overflow). Otherwise the value is the length
1066 of the substring which was matched. */
1067
1068int
1069re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1070 struct re_pattern_buffer *pbufp;
1071 unsigned char *string1, *string2;
1072 int size1, size2;
1073 int pos;
1074 struct re_registers *regs;
1075 int mstop;
1076{
1077 register unsigned char *p = (unsigned char *) pbufp->buffer;
1078 register unsigned char *pend = p + pbufp->used;
1079 /* End of first string */
1080 unsigned char *end1;
1081 /* End of second string */
1082 unsigned char *end2;
1083 /* Pointer just past last char to consider matching */
1084 unsigned char *end_match_1, *end_match_2;
1085 register unsigned char *d, *dend;
1086 register int mcnt;
1087 unsigned char *translate = (unsigned char *) pbufp->translate;
1088
1089 /* Failure point stack. Each place that can handle a failure further down the line
1090 pushes a failure point on this stack. It consists of two char *'s.
1091 The first one pushed is where to resume scanning the pattern;
1092 the second pushed is where to resume scanning the strings.
1093 If the latter is zero, the failure point is a "dummy".
1094 If a failure happens and the innermost failure point is dormant,
1095 it discards that failure point and tries the next one. */
1096
1097 unsigned char *initial_stack[2 * NFAILURES];
1098 unsigned char **stackb = initial_stack;
1099 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1100
1101 /* Information on the "contents" of registers.
1102 These are pointers into the input strings; they record
1103 just what was matched (on this attempt) by some part of the pattern.
1104 The start_memory command stores the start of a register's contents
1105 and the stop_memory command stores the end.
1106
1107 At that point, regstart[regnum] points to the first character in the register,
1108 regend[regnum] points to the first character beyond the end of the register,
1109 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1110 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1111
1112 unsigned char *regstart[RE_NREGS];
1113 unsigned char *regend[RE_NREGS];
1114 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1115
1116 /* Set up pointers to ends of strings.
1117 Don't allow the second string to be empty unless both are empty. */
1118 if (!size2)
1119 {
1120 string2 = string1;
1121 size2 = size1;
1122 string1 = 0;
1123 size1 = 0;
1124 }
1125 end1 = string1 + size1;
1126 end2 = string2 + size2;
1127
1128 /* Compute where to stop matching, within the two strings */
1129 if (mstop <= size1)
1130 {
1131 end_match_1 = string1 + mstop;
1132 end_match_2 = string2;
1133 }
1134 else
1135 {
1136 end_match_1 = end1;
1137 end_match_2 = string2 + mstop - size1;
1138 }
1139
1140 /* Initialize \) text positions to -1
1141 to mark ones that no \( or \) has been seen for. */
1142
1143 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1144 regend[mcnt] = (unsigned char *) -1;
1145
1146 /* `p' scans through the pattern as `d' scans through the data.
1147 `dend' is the end of the input string that `d' points within.
1148 `d' is advanced into the following input string whenever necessary,
1149 but this happens before fetching;
1150 therefore, at the beginning of the loop,
1151 `d' can be pointing at the end of a string,
1152 but it cannot equal string2. */
1153
1154 if (pos <= size1)
1155 d = string1 + pos, dend = end_match_1;
1156 else
1157 d = string2 + pos - size1, dend = end_match_2;
1158
1159/* Write PREFETCH; just before fetching a character with *d. */
1160#define PREFETCH \
1161 while (d == dend) \
1162 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1163 d = string2; /* end of string1 => advance to string2. */ \
1164 dend = end_match_2; }
1165
1166 /* This loop loops over pattern commands.
1167 It exits by returning from the function if match is complete,
1168 or it drops through if match fails at this starting point in the input data. */
1169
1170 while (1)
1171 {
1172 if (p == pend)
1173 /* End of pattern means we have succeeded! */
1174 {
1175 /* If caller wants register contents data back, convert it to indices */
1176 if (regs)
1177 {
1178 regs->start[0] = pos;
1179 if (dend == end_match_1)
1180 regs->end[0] = d - string1;
1181 else
1182 regs->end[0] = d - string2 + size1;
1183 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1184 {
1185 if (regend[mcnt] == (unsigned char *) -1)
1186 {
1187 regs->start[mcnt] = -1;
1188 regs->end[mcnt] = -1;
1189 continue;
1190 }
1191 if (regstart_seg1[mcnt])
1192 regs->start[mcnt] = regstart[mcnt] - string1;
1193 else
1194 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1195 if (regend_seg1[mcnt])
1196 regs->end[mcnt] = regend[mcnt] - string1;
1197 else
1198 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1199 }
1200 }
1201 if (dend == end_match_1)
1202 return (d - string1 - pos);
1203 else
1204 return d - string2 + size1 - pos;
1205 }
1206
1207 /* Otherwise match next pattern command */
1208#ifdef SWITCH_ENUM_BUG
1209 switch ((int) ((enum regexpcode) *p++))
1210#else
1211 switch ((enum regexpcode) *p++)
1212#endif
1213 {
1214
1215 /* \( is represented by a start_memory, \) by a stop_memory.
1216 Both of those commands contain a "register number" argument.
1217 The text matched within the \( and \) is recorded under that number.
1218 Then, \<digit> turns into a `duplicate' command which
1219 is followed by the numeric value of <digit> as the register number. */
1220
1221 case start_memory:
1222 regstart[*p] = d;
1223 regstart_seg1[*p++] = (dend == end_match_1);
1224 break;
1225
1226 case stop_memory:
1227 regend[*p] = d;
1228 regend_seg1[*p++] = (dend == end_match_1);
1229 break;
1230
1231 case duplicate:
1232 {
1233 int regno = *p++; /* Get which register to match against */
1234 register unsigned char *d2, *dend2;
1235
1236 d2 = regstart[regno];
1237 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1238 ? regend[regno] : end_match_1);
1239 while (1)
1240 {
1241 /* Advance to next segment in register contents, if necessary */
1242 while (d2 == dend2)
1243 {
1244 if (dend2 == end_match_2) break;
1245 if (dend2 == regend[regno]) break;
1246 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1247 }
1248 /* At end of register contents => success */
1249 if (d2 == dend2) break;
1250
1251 /* Advance to next segment in data being matched, if necessary */
1252 PREFETCH;
1253
1254 /* mcnt gets # consecutive chars to compare */
1255 mcnt = dend - d;
1256 if (mcnt > dend2 - d2)
1257 mcnt = dend2 - d2;
1258 /* Compare that many; failure if mismatch, else skip them. */
1259 if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1260 goto fail;
1261 d += mcnt, d2 += mcnt;
1262 }
1263 }
1264 break;
1265
1266 case anychar:
1267 /* fetch a data character */
1268 PREFETCH;
1269 /* Match anything but a newline. */
1270 if ((translate ? translate[*d++] : *d++) == '\n')
1271 goto fail;
1272 break;
1273
1274 case charset:
1275 case charset_not:
1276 {
1277 /* Nonzero for charset_not */
1278 int not = 0;
1279 register int c;
1280 if (*(p - 1) == (unsigned char) charset_not)
1281 not = 1;
1282
1283 /* fetch a data character */
1284 PREFETCH;
1285
1286 if (translate)
1287 c = translate [*d];
1288 else
1289 c = *d;
1290
1291 if (c < *p * BYTEWIDTH
1292 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1293 not = !not;
1294
1295 p += 1 + *p;
1296
1297 if (!not) goto fail;
1298 d++;
1299 break;
1300 }
1301
1302 case begline:
1303 if (d == string1 || d[-1] == '\n')
1304 break;
1305 goto fail;
1306
1307 case endline:
1308 if (d == end2
1309 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1310 break;
1311 goto fail;
1312
1313 /* "or" constructs ("|") are handled by starting each alternative
1314 with an on_failure_jump that points to the start of the next alternative.
1315 Each alternative except the last ends with a jump to the joining point.
1316 (Actually, each jump except for the last one really jumps
1317 to the following jump, because tensioning the jumps is a hassle.) */
1318
1319 /* The start of a stupid repeat has an on_failure_jump that points
1320 past the end of the repeat text.
1321 This makes a failure point so that, on failure to match a repetition,
1322 matching restarts past as many repetitions have been found
1323 with no way to fail and look for another one. */
1324
1325 /* A smart repeat is similar but loops back to the on_failure_jump
1326 so that each repetition makes another failure point. */
1327
1328 case on_failure_jump:
1329 if (stackp == stacke)
1330 {
1331 unsigned char **stackx;
1332 if (stacke - stackb > re_max_failures * 2)
1333 return -2;
1334 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1335 * sizeof (char *));
1336 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1337 stackp = stackx + (stackp - stackb);
1338 stacke = stackx + 2 * (stacke - stackb);
1339 stackb = stackx;
1340 }
1341 mcnt = *p++ & 0377;
1342 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1343 p++;
1344 *stackp++ = mcnt + p;
1345 *stackp++ = d;
1346 break;
1347
1348 /* The end of a smart repeat has an maybe_finalize_jump back.
1349 Change it either to a finalize_jump or an ordinary jump. */
1350
1351 case maybe_finalize_jump:
1352 mcnt = *p++ & 0377;
1353 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1354 p++;
1355 {
1356 register unsigned char *p2 = p;
1357 /* Compare what follows with the begining of the repeat.
1358 If we can establish that there is nothing that they would
1359 both match, we can change to finalize_jump */
1360 while (p2 != pend
1361 && (*p2 == (unsigned char) stop_memory
1362 || *p2 == (unsigned char) start_memory))
1363 p2++;
1364 if (p2 == pend)
1365 p[-3] = (unsigned char) finalize_jump;
1366 else if (*p2 == (unsigned char) exactn
1367 || *p2 == (unsigned char) endline)
1368 {
1369 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1370 register unsigned char *p1 = p + mcnt;
1371 /* p1[0] ... p1[2] are an on_failure_jump.
1372 Examine what follows that */
1373 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1374 p[-3] = (unsigned char) finalize_jump;
1375 else if (p1[3] == (unsigned char) charset
1376 || p1[3] == (unsigned char) charset_not)
1377 {
1378 int not = p1[3] == (unsigned char) charset_not;
1379 if (c < p1[4] * BYTEWIDTH
1380 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1381 not = !not;
1382 /* not is 1 if c would match */
1383 /* That means it is not safe to finalize */
1384 if (!not)
1385 p[-3] = (unsigned char) finalize_jump;
1386 }
1387 }
1388 }
1389 p -= 2;
1390 if (p[-1] != (unsigned char) finalize_jump)
1391 {
1392 p[-1] = (unsigned char) jump;
1393 goto nofinalize;
1394 }
1395
1396 /* The end of a stupid repeat has a finalize-jump
1397 back to the start, where another failure point will be made
1398 which will point after all the repetitions found so far. */
1399
1400 case finalize_jump:
1401 stackp -= 2;
1402
1403 case jump:
1404 nofinalize:
1405 mcnt = *p++ & 0377;
1406 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1407 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1408 break;
1409
1410 case dummy_failure_jump:
1411 if (stackp == stacke)
1412 {
1413 unsigned char **stackx
1414 = (unsigned char **) alloca (2 * (stacke - stackb)
1415 * sizeof (char *));
1416 bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1417 stackp = stackx + (stackp - stackb);
1418 stacke = stackx + 2 * (stacke - stackb);
1419 stackb = stackx;
1420 }
1421 *stackp++ = 0;
1422 *stackp++ = 0;
1423 goto nofinalize;
1424
1425 case wordbound:
1426 if (d == string1 /* Points to first char */
1427 || d == end2 /* Points to end */
1428 || (d == end1 && size2 == 0)) /* Points to end */
1429 break;
1430 if ((SYNTAX (d[-1]) == Sword)
1431 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1432 break;
1433 goto fail;
1434
1435 case notwordbound:
1436 if (d == string1 /* Points to first char */
1437 || d == end2 /* Points to end */
1438 || (d == end1 && size2 == 0)) /* Points to end */
1439 goto fail;
1440 if ((SYNTAX (d[-1]) == Sword)
1441 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1442 goto fail;
1443 break;
1444
1445 case wordbeg:
1446 if (d == end2 /* Points to end */
1447 || (d == end1 && size2 == 0) /* Points to end */
1448 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1449 goto fail;
1450 if (d == string1 /* Points to first char */
1451 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1452 break;
1453 goto fail;
1454
1455 case wordend:
1456 if (d == string1 /* Points to first char */
1457 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1458 goto fail;
1459 if (d == end2 /* Points to end */
1460 || (d == end1 && size2 == 0) /* Points to end */
1461 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1462 break;
1463 goto fail;
1464
1465#ifdef emacs
1466 case before_dot:
1467 if (((d - string2 <= (unsigned) size2)
1468 ? d - bf_p2 : d - bf_p1)
1469 <= point)
1470 goto fail;
1471 break;
1472
1473 case at_dot:
1474 if (((d - string2 <= (unsigned) size2)
1475 ? d - bf_p2 : d - bf_p1)
1476 == point)
1477 goto fail;
1478 break;
1479
1480 case after_dot:
1481 if (((d - string2 <= (unsigned) size2)
1482 ? d - bf_p2 : d - bf_p1)
1483 >= point)
1484 goto fail;
1485 break;
1486
1487 case wordchar:
1488 mcnt = (int) Sword;
1489 goto matchsyntax;
1490
1491 case syntaxspec:
1492 mcnt = *p++;
1493 matchsyntax:
1494 PREFETCH;
1495 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1496 break;
1497
1498 case notwordchar:
1499 mcnt = (int) Sword;
1500 goto matchnotsyntax;
1501
1502 case notsyntaxspec:
1503 mcnt = *p++;
1504 matchnotsyntax:
1505 PREFETCH;
1506 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1507 break;
1508#else
1509 case wordchar:
1510 PREFETCH;
1511 if (SYNTAX (*d++) == 0) goto fail;
1512 break;
1513
1514 case notwordchar:
1515 PREFETCH;
1516 if (SYNTAX (*d++) != 0) goto fail;
1517 break;
1518#endif /* not emacs */
1519
1520 case begbuf:
1521 if (d == string1) /* Note, d cannot equal string2 */
1522 break; /* unless string1 == string2. */
1523 goto fail;
1524
1525 case endbuf:
1526 if (d == end2 || (d == end1 && size2 == 0))
1527 break;
1528 goto fail;
1529
1530 case exactn:
1531 /* Match the next few pattern characters exactly.
1532 mcnt is how many characters to match. */
1533 mcnt = *p++;
1534 if (translate)
1535 {
1536 do
1537 {
1538 PREFETCH;
1539 if (translate[*d++] != *p++) goto fail;
1540 }
1541 while (--mcnt);
1542 }
1543 else
1544 {
1545 do
1546 {
1547 PREFETCH;
1548 if (*d++ != *p++) goto fail;
1549 }
1550 while (--mcnt);
1551 }
1552 break;
1553
1554 default:
1555 break;
1556 }
1557 continue; /* Successfully matched one pattern command; keep matching */
1558
1559 /* Jump here if any matching operation fails. */
1560 fail:
1561 if (stackp != stackb)
1562 /* A restart point is known. Restart there and pop it. */
1563 {
1564 if (!stackp[-2])
1565 { /* If innermost failure point is dormant, flush it and keep looking */
1566 stackp -= 2;
1567 goto fail;
1568 }
1569 d = *--stackp;
1570 p = *--stackp;
1571 if (d >= string1 && d <= end1)
1572 dend = end_match_1;
1573 }
1574 else break; /* Matching at this starting point really fails! */
1575 }
1576 return -1; /* Failure to match */
1577}
1578
1579static int
1580bcmp_translate (s1, s2, len, translate)
1581 unsigned char *s1, *s2;
1582 register int len;
1583 unsigned char *translate;
1584{
1585 register unsigned char *p1 = s1, *p2 = s2;
1586 while (len)
1587 {
1588 if (translate [*p1++] != translate [*p2++]) return 1;
1589 len--;
1590 }
1591 return 0;
1592}
1593\f
1594/* Entry points compatible with bsd4.2 regex library */
1595
1596#ifndef emacs
1597
1598static struct re_pattern_buffer re_comp_buf;
1599
1600char *
1601re_comp (s)
1602 char *s;
1603{
1604 if (!s)
1605 {
1606 if (!re_comp_buf.buffer)
1607 return "No previous regular expression";
1608 return 0;
1609 }
1610
1611 if (!re_comp_buf.buffer)
1612 {
1613 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1614 return "Memory exhausted";
1615 re_comp_buf.allocated = 200;
1616 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1617 return "Memory exhausted";
1618 }
1619 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1620}
1621
1622int
1623re_exec (s)
1624 char *s;
1625{
1626 int len = strlen (s);
1627 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1628}
1629
1630#endif /* emacs */
1631\f
1632#ifdef test
1633
1634#include <stdio.h>
1635
1636/* Indexed by a character, gives the upper case equivalent of the character */
1637
1638static char upcase[0400] =
1639 { 000, 001, 002, 003, 004, 005, 006, 007,
1640 010, 011, 012, 013, 014, 015, 016, 017,
1641 020, 021, 022, 023, 024, 025, 026, 027,
1642 030, 031, 032, 033, 034, 035, 036, 037,
1643 040, 041, 042, 043, 044, 045, 046, 047,
1644 050, 051, 052, 053, 054, 055, 056, 057,
1645 060, 061, 062, 063, 064, 065, 066, 067,
1646 070, 071, 072, 073, 074, 075, 076, 077,
1647 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1648 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1649 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1650 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1651 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1652 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1653 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1654 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1655 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1656 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1657 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1658 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1659 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1660 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1661 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1662 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1663 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1664 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1665 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1666 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1667 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1668 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1669 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1670 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1671 };
1672
1673main (argc, argv)
1674 int argc;
1675 char **argv;
1676{
1677 char pat[80];
1678 struct re_pattern_buffer buf;
1679 int i;
1680 char c;
1681 char fastmap[(1 << BYTEWIDTH)];
1682
1683 /* Allow a command argument to specify the style of syntax. */
1684 if (argc > 1)
1685 obscure_syntax = atoi (argv[1]);
1686
1687 buf.allocated = 40;
1688 buf.buffer = (char *) malloc (buf.allocated);
1689 buf.fastmap = fastmap;
1690 buf.translate = upcase;
1691
1692 while (1)
1693 {
1694 gets (pat);
1695
1696 if (*pat)
1697 {
1698 re_compile_pattern (pat, strlen(pat), &buf);
1699
1700 for (i = 0; i < buf.used; i++)
1701 printchar (buf.buffer[i]);
1702
1703 putchar ('\n');
1704
1705 printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1706
1707 re_compile_fastmap (&buf);
1708 printf ("Allowed by fastmap: ");
1709 for (i = 0; i < (1 << BYTEWIDTH); i++)
1710 if (fastmap[i]) printchar (i);
1711 putchar ('\n');
1712 }
1713
1714 gets (pat); /* Now read the string to match against */
1715
1716 i = re_match (&buf, pat, strlen (pat), 0, 0);
1717 printf ("Match value %d.\n", i);
1718 }
1719}
1720
1721#ifdef NOTDEF
1722print_buf (bufp)
1723 struct re_pattern_buffer *bufp;
1724{
1725 int i;
1726
1727 printf ("buf is :\n----------------\n");
1728 for (i = 0; i < bufp->used; i++)
1729 printchar (bufp->buffer[i]);
1730
1731 printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1732
1733 printf ("Allowed by fastmap: ");
1734 for (i = 0; i < (1 << BYTEWIDTH); i++)
1735 if (bufp->fastmap[i])
1736 printchar (i);
1737 printf ("\nAllowed by translate: ");
1738 if (bufp->translate)
1739 for (i = 0; i < (1 << BYTEWIDTH); i++)
1740 if (bufp->translate[i])
1741 printchar (i);
1742 printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1743 printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1744}
1745#endif
1746
1747printchar (c)
1748 char c;
1749{
1750 if (c < 041 || c >= 0177)
1751 {
1752 putchar ('\\');
1753 putchar (((c >> 6) & 3) + '0');
1754 putchar (((c >> 3) & 7) + '0');
1755 putchar ((c & 7) + '0');
1756 }
1757 else
1758 putchar (c);
1759}
1760
1761error (string)
1762 char *string;
1763{
1764 puts (string);
1765 exit (1);
1766}
1767
1768#endif /* test */