This commit was manufactured by cvs2svn to create tag 'FreeBSD-release/1.0'.
[unix-history] / gnu / lib / libg++ / libg++ / regex.cc
CommitLineData
78ed81a3 1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
17
18
19// This is a translation into C++ of regex.c, the GNU regexp package.
20
21/* To test, compile with -Dtest. This Dtestable feature turns this into
22 a self-contained program which reads a pattern, describes how it
23 compiles, then reads a string and searches for it.
24
25 On the other hand, if you compile with both -Dtest and -Dcanned you
26 can run some tests we've already thought of. */
27
28/* AIX requires the alloca decl to be the first thing in the file. */
29#ifdef __GNUC__
30#define alloca __builtin_alloca
31#else
32#ifdef sparc
33#include <alloca.h>
34extern "C" void *__builtin_alloca(...);
35#else
36#ifdef _AIX
37#pragma alloca
38#else
39char *alloca ();
40#endif
41#endif
42#endif
15637ed4
RG
43
44#ifdef emacs
45
46/* The `emacs' switch turns on certain special matching commands
78ed81a3 47 that make sense only in emacs. */
15637ed4 48
15637ed4
RG
49#include "config.h"
50#include "lisp.h"
51#include "buffer.h"
52#include "syntax.h"
53
54#else /* not emacs */
55
78ed81a3 56#include <_G_config.h>
57#include <string.h>
58#include <stdlib.h>
15637ed4 59
78ed81a3 60/* Define the syntax stuff, so we can do the \<, \>, etc. */
15637ed4 61
78ed81a3 62/* This must be nonzero for the wordchar and notwordchar pattern
63 commands in re_match_2. */
64#ifndef Sword
15637ed4
RG
65#define Sword 1
66#endif
67
68#define SYNTAX(c) re_syntax_table[c]
69
78ed81a3 70
15637ed4
RG
71#ifdef SYNTAX_TABLE
72
73char *re_syntax_table;
74
78ed81a3 75#else /* not SYNTAX_TABLE */
15637ed4
RG
76
77static char re_syntax_table[256];
78
78ed81a3 79
15637ed4 80static void
78ed81a3 81init_syntax_once ()
15637ed4
RG
82{
83 register int c;
84 static int done = 0;
85
86 if (done)
87 return;
88
78ed81a3 89 memset (re_syntax_table, 0, sizeof re_syntax_table);
15637ed4
RG
90
91 for (c = 'a'; c <= 'z'; c++)
92 re_syntax_table[c] = Sword;
93
94 for (c = 'A'; c <= 'Z'; c++)
95 re_syntax_table[c] = Sword;
96
97 for (c = '0'; c <= '9'; c++)
98 re_syntax_table[c] = Sword;
99
100 done = 1;
101}
102
103#endif /* SYNTAX_TABLE */
78ed81a3 104#endif /* emacs */
105
106/* We write fatal error messages on standard error. */
107#include <stdio.h>
15637ed4 108
78ed81a3 109/* isalpha(3) etc. are used for the character classes. */
110#include <ctype.h>
111/* Sequents are missing isgraph. */
112#ifndef isgraph
113#define isgraph(c) (isprint((c)) && !isspace((c)))
114#endif
115
116/* Get the interface, including the syntax bits. */
15637ed4
RG
117#include "regex.h"
118
78ed81a3 119
120/* These are the command codes that appear in compiled regular
121 expressions, one per byte. Some command codes are followed by
122 argument bytes. A command code can specify any interpretation
123 whatsoever for its arguments. Zero-bytes may appear in the compiled
124 regular expression.
125
126 The value of `exactn' is needed in search.c (search_buffer) in emacs.
127 So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
128 `exactn' we use here must also be 1. */
129
130enum regexpcode
131 {
132 unused=0,
133 exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
134 begline, /* Fail unless at beginning of line. */
135 endline, /* Fail unless at end of line. */
136 jump, /* Followed by two bytes giving relative address to jump to. */
137 on_failure_jump, /* Followed by two bytes giving relative address of
138 place to resume at in case of failure. */
139 finalize_jump, /* Throw away latest failure point and then jump to
140 address. */
141 maybe_finalize_jump, /* Like jump but finalize if safe to do so.
142 This is used to jump back to the beginning
143 of a repeat. If the command that follows
144 this jump is clearly incompatible with the
145 one at the beginning of the repeat, such that
146 we can be sure that there is no use backtracking
147 out of repetitions already completed,
148 then we finalize. */
149 dummy_failure_jump, /* Jump, and push a dummy failure point. This
150 failure point will be thrown away if an attempt
151 is made to use it for a failure. A + construct
152 makes this before the first repeat. Also
153 use it as an intermediary kind of jump when
154 compiling an or construct. */
155 succeed_n, /* Used like on_failure_jump except has to succeed n times;
156 then gets turned into an on_failure_jump. The relative
157 address following it is useless until then. The
158 address is followed by two bytes containing n. */
159 jump_n, /* Similar to jump, but jump n times only; also the relative
160 address following is in turn followed by yet two more bytes
161 containing n. */
162 set_number_at, /* Set the following relative location to the
163 subsequent number. */
164 anychar, /* Matches any (more or less) one character. */
165 charset, /* Matches any one char belonging to specified set.
166 First following byte is number of bitmap bytes.
167 Then come bytes for a bitmap saying which chars are in.
168 Bits in each byte are ordered low-bit-first.
169 A character is in the set if its bit is 1.
170 A character too large to have a bit in the map
171 is automatically not in the set. */
172 charset_not, /* Same parameters as charset, but match any character
173 that is not one of those specified. */
174 start_memory, /* Start remembering the text that is matched, for
175 storing in a memory register. Followed by one
176 byte containing the register number. Register numbers
177 must be in the range 0 through RE_NREGS. */
178 stop_memory, /* Stop remembering the text that is matched
179 and store it in a memory register. Followed by
180 one byte containing the register number. Register
181 numbers must be in the range 0 through RE_NREGS. */
182 duplicate, /* Match a duplicate of something remembered.
183 Followed by one byte containing the index of the memory
184 register. */
185#ifdef emacs
186 before_dot, /* Succeeds if before point. */
187 at_dot, /* Succeeds if at point. */
188 after_dot, /* Succeeds if after point. */
189#endif
190 begbuf, /* Succeeds if at beginning of buffer. */
191 endbuf, /* Succeeds if at end of buffer. */
192 wordchar, /* Matches any word-constituent character. */
193 notwordchar, /* Matches any char that is not a word-constituent. */
194 wordbeg, /* Succeeds if at word beginning. */
195 wordend, /* Succeeds if at word end. */
196 wordbound, /* Succeeds if at a word boundary. */
197 notwordbound,/* Succeeds if not at a word boundary. */
198#ifdef emacs
199 syntaxspec, /* Matches any character whose syntax is specified.
200 followed by a byte which contains a syntax code,
201 e.g., Sword. */
202 notsyntaxspec /* Matches any character whose syntax differs from
203 that specified. */
204#endif
205 };
206
207
15637ed4 208/* Number of failure points to allocate space for initially,
78ed81a3 209 when matching. If this number is exceeded, more space is allocated,
210 so it is not a hard limit. */
15637ed4
RG
211
212#ifndef NFAILURES
213#define NFAILURES 80
78ed81a3 214#endif
15637ed4 215
15637ed4
RG
216
217#ifndef SIGN_EXTEND_CHAR
78ed81a3 218#ifdef __STDC__
219#define SIGN_EXTEND_CHAR(c) ((signed char)(c))
220#else
221#define SIGN_EXTEND_CHAR(c) (((c)^128) - 128) /* As in Harbison and Steele. */
15637ed4 222#endif
78ed81a3 223#endif /* not SIGN_EXTEND_CHAR */
15637ed4 224
78ed81a3 225/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
226#define STORE_NUMBER(destination, number) \
227 { (destination)[0] = (char)((number) & 0377); \
228 (destination)[1] = (number) >> 8; }
229
230/* Same as STORE_NUMBER, except increment the destination pointer to
231 the byte after where the number is stored. Watch out that values for
232 DESTINATION such as p + 1 won't work, whereas p will. */
233#define STORE_NUMBER_AND_INCR(destination, number) \
234 { STORE_NUMBER(destination, number); \
235 (destination) += 2; }
236
237
238/* Put into DESTINATION a number stored in two contingous bytes starting
239 at SOURCE. */
240#define EXTRACT_NUMBER(destination, source) \
241 { (destination) = *(source) & 0377; \
242 (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
243
244/* Same as EXTRACT_NUMBER, except increment the pointer for source to
245 point to second byte of SOURCE. Note that SOURCE has to be a value
246 such as p, not, e.g., p + 1. */
247#define EXTRACT_NUMBER_AND_INCR(destination, source) \
248 { EXTRACT_NUMBER (destination, source); \
249 (source) += 2; }
250
251
252/* Specify the precise syntax of regexps for compilation. This provides
253 for compatibility for various utilities which historically have
254 different, incompatible syntaxes.
255
256 The argument SYNTAX is a bit-mask comprised of the various bits
257 defined in regex.h. */
15637ed4
RG
258
259int
260re_set_syntax (int syntax)
261{
262 int ret;
263
264 ret = obscure_syntax;
265 obscure_syntax = syntax;
266 return ret;
267}
15637ed4 268
78ed81a3 269/* Set by re_set_syntax to the current regexp syntax to recognize. */
270int obscure_syntax = 0;
15637ed4 271
15637ed4 272
78ed81a3 273\f
274/* Macros for re_compile_pattern, which is found below these definitions. */
275
276#define CHAR_CLASS_MAX_LENGTH 6
15637ed4 277
78ed81a3 278/* Fetch the next character in the uncompiled pattern, translating it if
279 necessary. */
280#define PATFETCH(c) \
281 {if (p == pend) goto end_of_pattern; \
282 c = * (const unsigned char *) p++; \
15637ed4
RG
283 if (translate) c = translate[c]; }
284
78ed81a3 285/* Fetch the next character in the uncompiled pattern, with no
286 translation. */
287#define PATFETCH_RAW(c) \
288 {if (p == pend) goto end_of_pattern; \
289 c = * (const unsigned char *) p++; }
15637ed4
RG
290
291#define PATUNFETCH p--
292
78ed81a3 293
294/* If the buffer isn't allocated when it comes in, use this. */
295#define INIT_BUF_SIZE 28
296
297/* Make sure we have at least N more bytes of space in buffer. */
298#define GET_BUFFER_SPACE(n) \
299 { \
300 while (b - bufp->buffer + (n) >= bufp->allocated) \
301 EXTEND_BUFFER; \
15637ed4
RG
302 }
303
78ed81a3 304/* Make sure we have one more byte of buffer space and then add CH to it. */
305#define BUFPUSH(ch) \
306 { \
307 GET_BUFFER_SPACE (1); \
308 *b++ = (char) (ch); \
309 }
310
311/* Extend the buffer by twice its current size via reallociation and
312 reset the pointers that pointed into the old allocation to point to
313 the correct places in the new allocation. If extending the buffer
314 results in it being larger than 1 << 16, then flag memory exhausted. */
315#define EXTEND_BUFFER \
316 { char *old_buffer = bufp->buffer; \
317 if (bufp->allocated == (1L<<16)) goto too_big; \
318 bufp->allocated *= 2; \
319 if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
320 bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \
321 if (bufp->buffer == 0) \
322 goto memory_exhausted; \
323 b = (b - old_buffer) + bufp->buffer; \
324 if (fixup_jump) \
325 fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \
326 if (laststart) \
327 laststart = (laststart - old_buffer) + bufp->buffer; \
328 begalt = (begalt - old_buffer) + bufp->buffer; \
329 if (pending_exact) \
330 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
331 }
332
333/* Set the bit for character C in a character set list. */
334#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
335
336/* Get the next unsigned number in the uncompiled pattern. */
337#define GET_UNSIGNED_NUMBER(num) \
338 { if (p != pend) \
339 { \
340 PATFETCH (c); \
341 while (isdigit (c)) \
342 { \
343 if (num < 0) \
344 num = 0; \
345 num = num * 10 + c - '0'; \
346 if (p == pend) \
347 break; \
348 PATFETCH (c); \
349 } \
350 } \
351 }
352
353/* Subroutines for re_compile_pattern. */
354static void store_jump (char *from, char opcode, char *to);
355static void insert_jump (char op, char *from, char *to, char *current_end);
356static void store_jump_n (char *from, char opcode, char *to, unsigned n);
357static void insert_jump_n (char, char *, char *, char *, unsigned);
358static void insert_op_2 (char, char *, char *_end, int, int);
359
360
361/* re_compile_pattern takes a regular-expression string
362 and converts it into a buffer full of byte commands for matching.
363
364 PATTERN is the address of the pattern string
365 SIZE is the length of it.
366 BUFP is a struct re_pattern_buffer * which points to the info
367 on where to store the byte commands.
368 This structure contains a char * which points to the
369 actual space, which should have been obtained with malloc.
370 re_compile_pattern may use realloc to grow the buffer space.
371
372 The number of bytes of commands can be found out by looking in
373 the `struct re_pattern_buffer' that bufp pointed to, after
374 re_compile_pattern returns. */
15637ed4
RG
375
376char *
78ed81a3 377re_compile_pattern (const char *pattern, int size, struct re_pattern_buffer *bufp)
15637ed4
RG
378{
379 register char *b = bufp->buffer;
78ed81a3 380 register const char *p = pattern;
381 const char *pend = pattern + size;
15637ed4 382 register unsigned c, c1;
78ed81a3 383 const char *p1;
15637ed4
RG
384 unsigned char *translate = (unsigned char *) bufp->translate;
385
78ed81a3 386 /* Address of the count-byte of the most recently inserted `exactn'
387 command. This makes it possible to tell whether a new exact-match
388 character can be added to that command or requires a new `exactn'
389 command. */
15637ed4
RG
390
391 char *pending_exact = 0;
392
78ed81a3 393 /* Address of the place where a forward-jump should go to the end of
394 the containing expression. Each alternative of an `or', except the
395 last, ends with a forward-jump of this sort. */
15637ed4
RG
396
397 char *fixup_jump = 0;
398
78ed81a3 399 /* Address of start of the most recently finished expression.
400 This tells postfix * where to find the start of its operand. */
15637ed4
RG
401
402 char *laststart = 0;
403
78ed81a3 404 /* In processing a repeat, 1 means zero matches is allowed. */
15637ed4
RG
405
406 char zero_times_ok;
407
78ed81a3 408 /* In processing a repeat, 1 means many matches is allowed. */
15637ed4
RG
409
410 char many_times_ok;
411
78ed81a3 412 /* Address of beginning of regexp, or inside of last \(. */
15637ed4
RG
413
414 char *begalt = b;
415
78ed81a3 416 /* In processing an interval, at least this many matches must be made. */
417 int lower_bound;
418
419 /* In processing an interval, at most this many matches can be made. */
420 int upper_bound;
421
422 /* Place in pattern (i.e., the {) to which to go back if the interval
423 is invalid. */
424 const char *beg_interval = 0;
425
15637ed4
RG
426 /* Stack of information saved by \( and restored by \).
427 Four stack elements are pushed by each \(:
428 First, the value of b.
429 Second, the value of fixup_jump.
430 Third, the value of regnum.
431 Fourth, the value of begalt. */
432
433 int stackb[40];
434 int *stackp = stackb;
435 int *stacke = stackb + 40;
436 int *stackt;
437
438 /* Counts \('s as they are encountered. Remembered for the matching \),
78ed81a3 439 where it becomes the register number to put in the stop_memory
440 command. */
15637ed4 441
78ed81a3 442 unsigned regnum = 1;
15637ed4
RG
443
444 bufp->fastmap_accurate = 0;
445
446#ifndef emacs
447#ifndef SYNTAX_TABLE
78ed81a3 448 /* Initialize the syntax table. */
15637ed4
RG
449 init_syntax_once();
450#endif
451#endif
452
453 if (bufp->allocated == 0)
454 {
78ed81a3 455 bufp->allocated = INIT_BUF_SIZE;
15637ed4 456 if (bufp->buffer)
78ed81a3 457 /* EXTEND_BUFFER loses when bufp->allocated is 0. */
458 bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
15637ed4 459 else
78ed81a3 460 /* Caller did not allocate a buffer. Do it for them. */
461 bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
15637ed4
RG
462 if (!bufp->buffer) goto memory_exhausted;
463 begalt = b = bufp->buffer;
464 }
465
466 while (p != pend)
467 {
15637ed4
RG
468 PATFETCH (c);
469
470 switch (c)
471 {
472 case '$':
78ed81a3 473 {
474 const char *p1 = p;
475 /* When testing what follows the $,
476 look past the \-constructs that don't consume anything. */
477 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
478 while (p1 != pend)
479 {
480 if (*p1 == '\\' && p1 + 1 != pend
481 && (p1[1] == '<' || p1[1] == '>'
482 || p1[1] == '`' || p1[1] == '\''
483#ifdef emacs
484 || p1[1] == '='
485#endif
486 || p1[1] == 'b' || p1[1] == 'B'))
487 p1 += 2;
488 else
489 break;
490 }
491 if (obscure_syntax & RE_TIGHT_VBAR)
492 {
493 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
494 goto normal_char;
495 /* Make operand of last vbar end before this `$'. */
496 if (fixup_jump)
497 store_jump (fixup_jump, jump, b);
498 fixup_jump = 0;
499 BUFPUSH (endline);
500 break;
501 }
502 /* $ means succeed if at end of line, but only in special contexts.
503 If validly in the middle of a pattern, it is a normal character. */
504
505 if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
506 goto invalid_pattern;
507 if (p1 == pend || *p1 == '\n'
508 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
509 || (obscure_syntax & RE_NO_BK_PARENS
510 ? *p1 == ')'
511 : *p1 == '\\' && p1[1] == ')')
512 || (obscure_syntax & RE_NO_BK_VBAR
513 ? *p1 == '|'
514 : *p1 == '\\' && p1[1] == '|'))
515 {
516 BUFPUSH (endline);
517 break;
518 }
519 goto normal_char;
520 }
15637ed4 521 case '^':
78ed81a3 522 /* ^ means succeed if at beg of line, but only if no preceding
523 pattern. */
524
525 if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
526 goto invalid_pattern;
527 if (laststart && p - 2 >= pattern && p[-2] != '\n'
528 && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
15637ed4
RG
529 goto normal_char;
530 if (obscure_syntax & RE_TIGHT_VBAR)
531 {
532 if (p != pattern + 1
533 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
534 goto normal_char;
78ed81a3 535 BUFPUSH (begline);
15637ed4
RG
536 begalt = b;
537 }
538 else
78ed81a3 539 BUFPUSH (begline);
15637ed4
RG
540 break;
541
542 case '+':
543 case '?':
78ed81a3 544 if ((obscure_syntax & RE_BK_PLUS_QM)
545 || (obscure_syntax & RE_LIMITED_OPS))
15637ed4
RG
546 goto normal_char;
547 handle_plus:
548 case '*':
549 /* If there is no previous pattern, char not special. */
78ed81a3 550 if (!laststart)
551 {
552 if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
553 goto invalid_pattern;
554 else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
555 goto normal_char;
556 }
15637ed4 557 /* If there is a sequence of repetition chars,
78ed81a3 558 collapse it down to just one. */
15637ed4
RG
559 zero_times_ok = 0;
560 many_times_ok = 0;
561 while (1)
562 {
563 zero_times_ok |= c != '+';
564 many_times_ok |= c != '?';
565 if (p == pend)
566 break;
567 PATFETCH (c);
568 if (c == '*')
569 ;
570 else if (!(obscure_syntax & RE_BK_PLUS_QM)
571 && (c == '+' || c == '?'))
572 ;
573 else if ((obscure_syntax & RE_BK_PLUS_QM)
574 && c == '\\')
575 {
576 int c1;
577 PATFETCH (c1);
578 if (!(c1 == '+' || c1 == '?'))
579 {
580 PATUNFETCH;
581 PATUNFETCH;
582 break;
583 }
584 c = c1;
585 }
586 else
587 {
588 PATUNFETCH;
589 break;
590 }
591 }
592
593 /* Star, etc. applied to an empty pattern is equivalent
594 to an empty pattern. */
78ed81a3 595 if (!laststart)
15637ed4
RG
596 break;
597
78ed81a3 598 /* Now we know whether or not zero matches is allowed
599 and also whether or not two or more matches is allowed. */
15637ed4
RG
600 if (many_times_ok)
601 {
78ed81a3 602 /* If more than one repetition is allowed, put in at the
603 end a backward relative jump from b to before the next
604 jump we're going to put in below (which jumps from
605 laststart to after this jump). */
606 GET_BUFFER_SPACE (3);
15637ed4 607 store_jump (b, maybe_finalize_jump, laststart - 3);
78ed81a3 608 b += 3; /* Because store_jump put stuff here. */
15637ed4 609 }
78ed81a3 610 /* On failure, jump from laststart to b + 3, which will be the
611 end of the buffer after this jump is inserted. */
612 GET_BUFFER_SPACE (3);
15637ed4
RG
613 insert_jump (on_failure_jump, laststart, b + 3, b);
614 pending_exact = 0;
615 b += 3;
616 if (!zero_times_ok)
617 {
78ed81a3 618 /* At least one repetition is required, so insert a
619 dummy-failure before the initial on-failure-jump
620 instruction of the loop. This effects a skip over that
621 instruction the first time we hit that loop. */
622 GET_BUFFER_SPACE (6);
623 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
15637ed4
RG
624 b += 3;
625 }
626 break;
627
628 case '.':
629 laststart = b;
78ed81a3 630 BUFPUSH (anychar);
15637ed4
RG
631 break;
632
78ed81a3 633 case '[':
634 if (p == pend)
635 goto invalid_pattern;
15637ed4
RG
636 while (b - bufp->buffer
637 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
15637ed4
RG
638 EXTEND_BUFFER;
639
640 laststart = b;
641 if (*p == '^')
78ed81a3 642 {
643 BUFPUSH (charset_not);
644 p++;
645 }
15637ed4 646 else
78ed81a3 647 BUFPUSH (charset);
15637ed4
RG
648 p1 = p;
649
78ed81a3 650 BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
15637ed4 651 /* Clear the whole map */
78ed81a3 652 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
653
654 if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
655 SET_LIST_BIT ('\n');
656
657
658 /* Read in characters and ranges, setting map bits. */
15637ed4
RG
659 while (1)
660 {
78ed81a3 661 /* Don't translate while fetching, in case it's a range bound.
662 When we set the bit for the character, we translate it. */
663 PATFETCH_RAW (c);
664
665 /* If set, \ escapes characters when inside [...]. */
666 if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
667 {
668 PATFETCH(c1);
669 SET_LIST_BIT (c1);
670 continue;
671 }
672 if (c == ']')
673 {
674 if (p == p1 + 1)
675 {
676 /* If this is an empty bracket expression. */
677 if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
678 && p == pend)
679 goto invalid_pattern;
680 }
681 else
682 /* Stop if this isn't merely a ] inside a bracket
683 expression, but rather the end of a bracket
684 expression. */
685 break;
686 }
687 /* Get a range. */
688 if (p[0] == '-' && p[1] != ']')
15637ed4 689 {
78ed81a3 690 PATFETCH (c1);
691 /* Don't translate the range bounds while fetching them. */
692 PATFETCH_RAW (c1);
693
694 if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
695 goto invalid_pattern;
696
697 if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
698 && c1 == '-' && *p != ']')
699 goto invalid_pattern;
700
701 while (c <= c1)
702 {
703 /* Translate each char that's in the range. */
704 if (translate)
705 SET_LIST_BIT (translate[c]);
706 else
707 SET_LIST_BIT (c);
708 c++;
709 }
710 }
711 else if ((obscure_syntax & RE_CHAR_CLASSES)
712 && c == '[' && p[0] == ':')
713 {
714 /* Longest valid character class word has six characters. */
715 char str[CHAR_CLASS_MAX_LENGTH];
716 PATFETCH (c);
717 c1 = 0;
718 /* If no ] at end. */
719 if (p == pend)
720 goto invalid_pattern;
721 while (1)
722 {
723 /* Don't translate the ``character class'' characters. */
724 PATFETCH_RAW (c);
725 if (c == ':' || c == ']' || p == pend
726 || c1 == CHAR_CLASS_MAX_LENGTH)
727 break;
728 str[c1++] = c;
729 }
730 str[c1] = '\0';
731 if (p == pend
732 || c == ']' /* End of the bracket expression. */
733 || p[0] != ']'
734 || p + 1 == pend
735 || (strcmp (str, "alpha") != 0
736 && strcmp (str, "upper") != 0
737 && strcmp (str, "lower") != 0
738 && strcmp (str, "digit") != 0
739 && strcmp (str, "alnum") != 0
740 && strcmp (str, "xdigit") != 0
741 && strcmp (str, "space") != 0
742 && strcmp (str, "print") != 0
743 && strcmp (str, "punct") != 0
744 && strcmp (str, "graph") != 0
745 && strcmp (str, "cntrl") != 0))
746 {
747 /* Undo the ending character, the letters, and leave
748 the leading : and [ (but set bits for them). */
749 c1++;
750 while (c1--)
751 PATUNFETCH;
752 SET_LIST_BIT ('[');
753 SET_LIST_BIT (':');
754 }
755 else
756 {
757 /* The ] at the end of the character class. */
758 PATFETCH (c);
759 if (c != ']')
760 goto invalid_pattern;
761 for (c = 0; c < (1 << BYTEWIDTH); c++)
762 {
763 if ((strcmp (str, "alpha") == 0 && isalpha (c))
764 || (strcmp (str, "upper") == 0 && isupper (c))
765 || (strcmp (str, "lower") == 0 && islower (c))
766 || (strcmp (str, "digit") == 0 && isdigit (c))
767 || (strcmp (str, "alnum") == 0 && isalnum (c))
768 || (strcmp (str, "xdigit") == 0 && isxdigit (c))
769 || (strcmp (str, "space") == 0 && isspace (c))
770 || (strcmp (str, "print") == 0 && isprint (c))
771 || (strcmp (str, "punct") == 0 && ispunct (c))
772 || (strcmp (str, "graph") == 0 && isgraph (c))
773 || (strcmp (str, "cntrl") == 0 && iscntrl (c)))
774 SET_LIST_BIT (c);
775 }
776 }
777 }
778 else if (translate)
779 SET_LIST_BIT (translate[c]);
15637ed4 780 else
78ed81a3 781 SET_LIST_BIT (c);
15637ed4 782 }
78ed81a3 783
784 /* Discard any character set/class bitmap bytes that are all
785 0 at the end of the map. Decrement the map-length byte too. */
786 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
787 b[-1]--;
788 b += b[-1];
789 break;
15637ed4
RG
790
791 case '(':
792 if (! (obscure_syntax & RE_NO_BK_PARENS))
793 goto normal_char;
794 else
795 goto handle_open;
796
797 case ')':
798 if (! (obscure_syntax & RE_NO_BK_PARENS))
799 goto normal_char;
800 else
801 goto handle_close;
802
78ed81a3 803 case '\n':
15637ed4
RG
804 if (! (obscure_syntax & RE_NEWLINE_OR))
805 goto normal_char;
806 else
807 goto handle_bar;
808
809 case '|':
78ed81a3 810 if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
811 && (! laststart || p == pend))
812 goto invalid_pattern;
813 else if (! (obscure_syntax & RE_NO_BK_VBAR))
15637ed4
RG
814 goto normal_char;
815 else
816 goto handle_bar;
817
78ed81a3 818 case '{':
819 if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
820 && (obscure_syntax & RE_INTERVALS)))
821 goto normal_char;
822 else
823 goto handle_interval;
824
15637ed4
RG
825 case '\\':
826 if (p == pend) goto invalid_pattern;
827 PATFETCH_RAW (c);
828 switch (c)
829 {
830 case '(':
831 if (obscure_syntax & RE_NO_BK_PARENS)
832 goto normal_backsl;
833 handle_open:
834 if (stackp == stacke) goto nesting_too_deep;
78ed81a3 835
836 /* Laststart should point to the start_memory that we are about
837 to push (unless the pattern has RE_NREGS or more ('s). */
838 *stackp++ = b - bufp->buffer;
15637ed4
RG
839 if (regnum < RE_NREGS)
840 {
78ed81a3 841 BUFPUSH (start_memory);
842 BUFPUSH (regnum);
15637ed4 843 }
15637ed4
RG
844 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
845 *stackp++ = regnum++;
846 *stackp++ = begalt - bufp->buffer;
847 fixup_jump = 0;
848 laststart = 0;
849 begalt = b;
850 break;
851
852 case ')':
853 if (obscure_syntax & RE_NO_BK_PARENS)
854 goto normal_backsl;
855 handle_close:
856 if (stackp == stackb) goto unmatched_close;
857 begalt = *--stackp + bufp->buffer;
858 if (fixup_jump)
859 store_jump (fixup_jump, jump, b);
860 if (stackp[-1] < RE_NREGS)
861 {
78ed81a3 862 BUFPUSH (stop_memory);
863 BUFPUSH (stackp[-1]);
15637ed4
RG
864 }
865 stackp -= 2;
78ed81a3 866 fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
867 laststart = *--stackp + bufp->buffer;
15637ed4
RG
868 break;
869
870 case '|':
78ed81a3 871 if ((obscure_syntax & RE_LIMITED_OPS)
872 || (obscure_syntax & RE_NO_BK_VBAR))
15637ed4
RG
873 goto normal_backsl;
874 handle_bar:
78ed81a3 875 if (obscure_syntax & RE_LIMITED_OPS)
876 goto normal_char;
877 /* Insert before the previous alternative a jump which
878 jumps to this alternative if the former fails. */
879 GET_BUFFER_SPACE (6);
880 insert_jump (on_failure_jump, begalt, b + 6, b);
15637ed4
RG
881 pending_exact = 0;
882 b += 3;
78ed81a3 883 /* The alternative before the previous alternative has a
884 jump after it which gets executed if it gets matched.
885 Adjust that jump so it will jump to the previous
886 alternative's analogous jump (put in below, which in
887 turn will jump to the next (if any) alternative's such
888 jump, etc.). The last such jump jumps to the correct
889 final destination. */
890 if (fixup_jump)
15637ed4 891 store_jump (fixup_jump, jump, b);
78ed81a3 892
893 /* Leave space for a jump after previous alternative---to be
894 filled in later. */
895 fixup_jump = b;
896 b += 3;
897
898 laststart = 0;
15637ed4
RG
899 begalt = b;
900 break;
901
78ed81a3 902 case '{':
903 if (! (obscure_syntax & RE_INTERVALS)
904 /* Let \{ be a literal. */
905 || ((obscure_syntax & RE_INTERVALS)
906 && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
907 /* If it's the string "\{". */
908 || (p - 2 == pattern && p == pend))
909 goto normal_backsl;
910 handle_interval:
911 beg_interval = p - 1; /* The {. */
912 /* If there is no previous pattern, this isn't an interval. */
913 if (!laststart)
914 {
915 if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
916 goto invalid_pattern;
917 else
918 goto normal_backsl;
919 }
920 /* It also isn't an interval if not preceded by an re
921 matching a single character or subexpression, or if
922 the current type of intervals can't handle back
923 references and the previous thing is a back reference. */
924 if (! (*laststart == anychar
925 || *laststart == charset
926 || *laststart == charset_not
927 || *laststart == start_memory
928 || (*laststart == exactn && laststart[1] == 1)
929 || (! (obscure_syntax & RE_NO_BK_REFS)
930 && *laststart == duplicate)))
931 {
932 if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
933 goto normal_char;
934
935 /* Posix extended syntax is handled in previous
936 statement; this is for Posix basic syntax. */
937 if (obscure_syntax & RE_INTERVALS)
938 goto invalid_pattern;
939
940 goto normal_backsl;
941 }
942 lower_bound = -1; /* So can see if are set. */
943 upper_bound = -1;
944 GET_UNSIGNED_NUMBER (lower_bound);
945 if (c == ',')
946 {
947 GET_UNSIGNED_NUMBER (upper_bound);
948 if (upper_bound < 0)
949 upper_bound = RE_DUP_MAX;
950 }
951 if (upper_bound < 0)
952 upper_bound = lower_bound;
953 if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES))
954 {
955 if (c != '\\')
956 goto invalid_pattern;
957 PATFETCH (c);
958 }
959 if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
960 || lower_bound > upper_bound
961 || ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
962 && p != pend && *p == '{'))
963 {
964 if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
965 goto unfetch_interval;
966 else
967 goto invalid_pattern;
968 }
969
970 /* If upper_bound is zero, don't want to succeed at all;
971 jump from laststart to b + 3, which will be the end of
972 the buffer after this jump is inserted. */
973
974 if (upper_bound == 0)
975 {
976 GET_BUFFER_SPACE (3);
977 insert_jump (jump, laststart, b + 3, b);
978 b += 3;
979 }
980
981 /* Otherwise, after lower_bound number of succeeds, jump
982 to after the jump_n which will be inserted at the end
983 of the buffer, and insert that jump_n. */
984 else
985 { /* Set to 5 if only one repetition is allowed and
986 hence no jump_n is inserted at the current end of
987 the buffer; then only space for the succeed_n is
988 needed. Otherwise, need space for both the
989 succeed_n and the jump_n. */
990
991 unsigned slots_needed = upper_bound == 1 ? 5 : 10;
992
993 GET_BUFFER_SPACE ((int) slots_needed);
994 /* Initialize the succeed_n to n, even though it will
995 be set by its attendant set_number_at, because
996 re_compile_fastmap will need to know it. Jump to
997 what the end of buffer will be after inserting
998 this succeed_n and possibly appending a jump_n. */
999 insert_jump_n (succeed_n, laststart, b + slots_needed,
1000 b, lower_bound);
1001 b += 5; /* Just increment for the succeed_n here. */
1002
1003 /* More than one repetition is allowed, so put in at
1004 the end of the buffer a backward jump from b to the
1005 succeed_n we put in above. By the time we've gotten
1006 to this jump when matching, we'll have matched once
1007 already, so jump back only upper_bound - 1 times. */
1008
1009 if (upper_bound > 1)
1010 {
1011 store_jump_n (b, jump_n, laststart, upper_bound - 1);
1012 b += 5;
1013 /* When hit this when matching, reset the
1014 preceding jump_n's n to upper_bound - 1. */
1015 BUFPUSH (set_number_at);
1016 GET_BUFFER_SPACE (2);
1017 STORE_NUMBER_AND_INCR (b, -5);
1018 STORE_NUMBER_AND_INCR (b, upper_bound - 1);
1019 }
1020 /* When hit this when matching, set the succeed_n's n. */
1021 GET_BUFFER_SPACE (5);
1022 insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
1023 b += 5;
1024 }
1025 pending_exact = 0;
1026 beg_interval = 0;
1027 break;
1028
1029
1030 unfetch_interval:
1031 /* If an invalid interval, match the characters as literals. */
1032 if (beg_interval)
1033 p = beg_interval;
1034 else
1035 {
1036 fprintf (stderr,
1037 "regex: no interval beginning to which to backtrack.\n");
1038 exit (1);
1039 }
1040
1041 beg_interval = 0;
1042 PATFETCH (c); /* normal_char expects char in `c'. */
1043 goto normal_char;
1044 break;
1045
15637ed4
RG
1046#ifdef emacs
1047 case '=':
78ed81a3 1048 BUFPUSH (at_dot);
15637ed4
RG
1049 break;
1050
1051 case 's':
1052 laststart = b;
78ed81a3 1053 BUFPUSH (syntaxspec);
15637ed4 1054 PATFETCH (c);
78ed81a3 1055 BUFPUSH (syntax_spec_code[c]);
15637ed4
RG
1056 break;
1057
1058 case 'S':
1059 laststart = b;
78ed81a3 1060 BUFPUSH (notsyntaxspec);
15637ed4 1061 PATFETCH (c);
78ed81a3 1062 BUFPUSH (syntax_spec_code[c]);
15637ed4
RG
1063 break;
1064#endif /* emacs */
1065
1066 case 'w':
1067 laststart = b;
78ed81a3 1068 BUFPUSH (wordchar);
15637ed4
RG
1069 break;
1070
1071 case 'W':
1072 laststart = b;
78ed81a3 1073 BUFPUSH (notwordchar);
15637ed4
RG
1074 break;
1075
1076 case '<':
78ed81a3 1077 BUFPUSH (wordbeg);
15637ed4
RG
1078 break;
1079
1080 case '>':
78ed81a3 1081 BUFPUSH (wordend);
15637ed4
RG
1082 break;
1083
1084 case 'b':
78ed81a3 1085 BUFPUSH (wordbound);
15637ed4
RG
1086 break;
1087
1088 case 'B':
78ed81a3 1089 BUFPUSH (notwordbound);
15637ed4
RG
1090 break;
1091
1092 case '`':
78ed81a3 1093 BUFPUSH (begbuf);
15637ed4
RG
1094 break;
1095
1096 case '\'':
78ed81a3 1097 BUFPUSH (endbuf);
15637ed4
RG
1098 break;
1099
1100 case '1':
1101 case '2':
1102 case '3':
1103 case '4':
1104 case '5':
1105 case '6':
1106 case '7':
1107 case '8':
1108 case '9':
78ed81a3 1109 if (obscure_syntax & RE_NO_BK_REFS)
1110 goto normal_char;
1111 c1 = c - '0';
15637ed4 1112 if (c1 >= regnum)
78ed81a3 1113 {
1114 if (obscure_syntax & RE_NO_EMPTY_BK_REF)
1115 goto invalid_pattern;
1116 else
1117 goto normal_char;
1118 }
1119 /* Can't back reference to a subexpression if inside of it. */
1120 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
15637ed4
RG
1121 if (*stackt == c1)
1122 goto normal_char;
1123 laststart = b;
78ed81a3 1124 BUFPUSH (duplicate);
1125 BUFPUSH (c1);
15637ed4
RG
1126 break;
1127
1128 case '+':
1129 case '?':
1130 if (obscure_syntax & RE_BK_PLUS_QM)
1131 goto handle_plus;
78ed81a3 1132 else
1133 goto normal_backsl;
1134 break;
15637ed4 1135
78ed81a3 1136 default:
15637ed4
RG
1137 normal_backsl:
1138 /* You might think it would be useful for \ to mean
1139 not to translate; but if we don't translate it
1140 it will never match anything. */
1141 if (translate) c = translate[c];
1142 goto normal_char;
1143 }
1144 break;
1145
1146 default:
78ed81a3 1147 normal_char: /* Expects the character in `c'. */
15637ed4
RG
1148 if (!pending_exact || pending_exact + *pending_exact + 1 != b
1149 || *pending_exact == 0177 || *p == '*' || *p == '^'
1150 || ((obscure_syntax & RE_BK_PLUS_QM)
1151 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
78ed81a3 1152 : (*p == '+' || *p == '?'))
1153 || ((obscure_syntax & RE_INTERVALS)
1154 && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
1155 ? *p == '{'
1156 : (p[0] == '\\' && p[1] == '{'))))
15637ed4
RG
1157 {
1158 laststart = b;
78ed81a3 1159 BUFPUSH (exactn);
15637ed4 1160 pending_exact = b;
78ed81a3 1161 BUFPUSH (0);
15637ed4 1162 }
78ed81a3 1163 BUFPUSH (c);
15637ed4
RG
1164 (*pending_exact)++;
1165 }
1166 }
1167
1168 if (fixup_jump)
1169 store_jump (fixup_jump, jump, b);
1170
1171 if (stackp != stackb) goto unmatched_open;
1172
1173 bufp->used = b - bufp->buffer;
1174 return 0;
1175
1176 invalid_pattern:
1177 return "Invalid regular expression";
1178
1179 unmatched_open:
1180 return "Unmatched \\(";
1181
1182 unmatched_close:
1183 return "Unmatched \\)";
1184
1185 end_of_pattern:
1186 return "Premature end of regular expression";
1187
1188 nesting_too_deep:
1189 return "Nesting too deep";
1190
1191 too_big:
1192 return "Regular expression too big";
1193
1194 memory_exhausted:
1195 return "Memory exhausted";
1196}
1197
78ed81a3 1198
1199/* Store a jump of the form <OPCODE> <relative address>.
1200 Store in the location FROM a jump operation to jump to relative
1201 address FROM - TO. OPCODE is the opcode to store. */
15637ed4
RG
1202
1203static void
1204store_jump (char *from, char opcode, char *to)
1205{
1206 from[0] = opcode;
78ed81a3 1207 STORE_NUMBER(from + 1, to - (from + 3));
15637ed4
RG
1208}
1209
78ed81a3 1210
1211/* Open up space before char FROM, and insert there a jump to TO.
1212 CURRENT_END gives the end of the storage not in use, so we know
1213 how much data to copy up. OP is the opcode of the jump to insert.
15637ed4
RG
1214
1215 If you call this function, you must zero out pending_exact. */
1216
1217static void
1218insert_jump (char op, char *from, char *to, char *current_end)
1219{
78ed81a3 1220 register char *pfrom = current_end; /* Copy from here... */
1221 register char *pto = current_end + 3; /* ...to here. */
1222
1223 while (pfrom != from)
15637ed4
RG
1224 *--pto = *--pfrom;
1225 store_jump (from, op, to);
1226}
78ed81a3 1227
1228
1229/* Store a jump of the form <opcode> <relative address> <n> .
1230
1231 Store in the location FROM a jump operation to jump to relative
1232 address FROM - TO. OPCODE is the opcode to store, N is a number the
1233 jump uses, say, to decide how many times to jump.
1234
1235 If you call this function, you must zero out pending_exact. */
1236
1237static void
1238store_jump_n (char *from, char opcode, char *to, unsigned n)
1239{
1240 from[0] = opcode;
1241 STORE_NUMBER (from + 1, to - (from + 3));
1242 STORE_NUMBER (from + 3, n);
1243}
1244
1245
1246/* Similar to insert_jump, but handles a jump which needs an extra
1247 number to handle minimum and maximum cases. Open up space at
1248 location FROM, and insert there a jump to TO. CURRENT_END gives the
1249 end of the storage in use, so we know how much data to copy up. OP is
1250 the opcode of the jump to insert.
1251
1252 If you call this function, you must zero out pending_exact. */
1253
1254static void
1255insert_jump_n (char op, char *from, char *to, char *current_end, unsigned n)
1256{
1257 register char *pfrom = current_end; /* Copy from here... */
1258 register char *pto = current_end + 5; /* ...to here. */
1259
1260 while (pfrom != from)
1261 *--pto = *--pfrom;
1262 store_jump_n (from, op, to, n);
1263}
1264
1265
1266/* Open up space at location THERE, and insert operation OP followed by
1267 NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
1268 we know how much data to copy up.
1269
1270 If you call this function, you must zero out pending_exact. */
1271
1272static void
1273insert_op_2 (char op, char *there, char *current_end, int num_1, int num_2)
1274{
1275 register char *pfrom = current_end; /* Copy from here... */
1276 register char *pto = current_end + 5; /* ...to here. */
1277
1278 while (pfrom != there)
1279 *--pto = *--pfrom;
1280
1281 there[0] = op;
1282 STORE_NUMBER (there + 1, num_1);
1283 STORE_NUMBER (there + 3, num_2);
1284}
1285
1286
15637ed4 1287\f
78ed81a3 1288/* Given a pattern, compute a fastmap from it. The fastmap records
1289 which of the (1 << BYTEWIDTH) possible characters can start a string
1290 that matches the pattern. This fastmap is used by re_search to skip
1291 quickly over totally implausible text.
15637ed4 1292
78ed81a3 1293 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
1294 area as bufp->fastmap.
1295 The other components of bufp describe the pattern to be used. */
15637ed4
RG
1296
1297void
1298re_compile_fastmap (struct re_pattern_buffer *bufp)
1299{
1300 unsigned char *pattern = (unsigned char *) bufp->buffer;
1301 int size = bufp->used;
1302 register char *fastmap = bufp->fastmap;
1303 register unsigned char *p = pattern;
1304 register unsigned char *pend = pattern + size;
78ed81a3 1305 register int j, k;
15637ed4
RG
1306 unsigned char *translate = (unsigned char *) bufp->translate;
1307
1308 unsigned char *stackb[NFAILURES];
1309 unsigned char **stackp = stackb;
1310
78ed81a3 1311 unsigned is_a_succeed_n;
1312
1313 memset (fastmap, 0, (1 << BYTEWIDTH));
15637ed4
RG
1314 bufp->fastmap_accurate = 1;
1315 bufp->can_be_null = 0;
1316
1317 while (p)
1318 {
78ed81a3 1319 is_a_succeed_n = 0;
15637ed4
RG
1320 if (p == pend)
1321 {
1322 bufp->can_be_null = 1;
1323 break;
1324 }
1325#ifdef SWITCH_ENUM_BUG
1326 switch ((int) ((enum regexpcode) *p++))
1327#else
1328 switch ((enum regexpcode) *p++)
1329#endif
1330 {
1331 case exactn:
1332 if (translate)
1333 fastmap[translate[p[1]]] = 1;
1334 else
1335 fastmap[p[1]] = 1;
1336 break;
1337
78ed81a3 1338 case unused:
15637ed4 1339 case begline:
78ed81a3 1340#ifdef emacs
15637ed4
RG
1341 case before_dot:
1342 case at_dot:
1343 case after_dot:
78ed81a3 1344#endif
15637ed4
RG
1345 case begbuf:
1346 case endbuf:
1347 case wordbound:
1348 case notwordbound:
1349 case wordbeg:
1350 case wordend:
78ed81a3 1351 continue;
15637ed4
RG
1352
1353 case endline:
1354 if (translate)
1355 fastmap[translate['\n']] = 1;
1356 else
1357 fastmap['\n'] = 1;
78ed81a3 1358
15637ed4
RG
1359 if (bufp->can_be_null != 1)
1360 bufp->can_be_null = 2;
1361 break;
1362
78ed81a3 1363 case jump_n:
1364 case finalize_jump:
15637ed4
RG
1365 case maybe_finalize_jump:
1366 case jump:
1367 case dummy_failure_jump:
78ed81a3 1368 EXTRACT_NUMBER_AND_INCR (j, p);
1369 p += j;
15637ed4
RG
1370 if (j > 0)
1371 continue;
78ed81a3 1372 /* Jump backward reached implies we just went through
15637ed4
RG
1373 the body of a loop and matched nothing.
1374 Opcode jumped to should be an on_failure_jump.
1375 Just treat it like an ordinary jump.
1376 For a * loop, it has pushed its failure point already;
78ed81a3 1377 If so, discard that as redundant. */
1378
1379 if ((enum regexpcode) *p != on_failure_jump
1380 && (enum regexpcode) *p != succeed_n)
15637ed4 1381 continue;
78ed81a3 1382 p++;
1383 EXTRACT_NUMBER_AND_INCR (j, p);
1384 p += j;
1385 if (stackp != stackb && *stackp == p)
1386 stackp--;
1387 continue;
15637ed4 1388
78ed81a3 1389 case on_failure_jump:
1390 handle_on_failure_jump:
1391 EXTRACT_NUMBER_AND_INCR (j, p);
1392 *++stackp = p + j;
1393 if (is_a_succeed_n)
1394 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
15637ed4
RG
1395 continue;
1396
78ed81a3 1397 case succeed_n:
1398 is_a_succeed_n = 1;
1399 /* Get to the number of times to succeed. */
1400 p += 2;
1401 /* Increment p past the n for when k != 0. */
1402 EXTRACT_NUMBER_AND_INCR (k, p);
1403 if (k == 0)
1404 {
1405 p -= 4;
1406 goto handle_on_failure_jump;
1407 }
1408 continue;
1409
1410 case set_number_at:
1411 p += 4;
1412 continue;
1413
1414 case start_memory:
15637ed4
RG
1415 case stop_memory:
1416 p++;
1417 continue;
1418
1419 case duplicate:
1420 bufp->can_be_null = 1;
1421 fastmap['\n'] = 1;
1422 case anychar:
1423 for (j = 0; j < (1 << BYTEWIDTH); j++)
1424 if (j != '\n')
1425 fastmap[j] = 1;
1426 if (bufp->can_be_null)
1427 return;
1428 /* Don't return; check the alternative paths
1429 so we can set can_be_null if appropriate. */
1430 break;
1431
1432 case wordchar:
1433 for (j = 0; j < (1 << BYTEWIDTH); j++)
1434 if (SYNTAX (j) == Sword)
1435 fastmap[j] = 1;
1436 break;
1437
1438 case notwordchar:
1439 for (j = 0; j < (1 << BYTEWIDTH); j++)
1440 if (SYNTAX (j) != Sword)
1441 fastmap[j] = 1;
1442 break;
1443
1444#ifdef emacs
1445 case syntaxspec:
1446 k = *p++;
1447 for (j = 0; j < (1 << BYTEWIDTH); j++)
1448 if (SYNTAX (j) == (enum syntaxcode) k)
1449 fastmap[j] = 1;
1450 break;
1451
1452 case notsyntaxspec:
1453 k = *p++;
1454 for (j = 0; j < (1 << BYTEWIDTH); j++)
1455 if (SYNTAX (j) != (enum syntaxcode) k)
1456 fastmap[j] = 1;
1457 break;
78ed81a3 1458#endif /* not emacs */
15637ed4
RG
1459
1460 case charset:
1461 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
1462 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
1463 {
1464 if (translate)
1465 fastmap[translate[j]] = 1;
1466 else
1467 fastmap[j] = 1;
1468 }
1469 break;
1470
1471 case charset_not:
1472 /* Chars beyond end of map must be allowed */
1473 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
1474 if (translate)
1475 fastmap[translate[j]] = 1;
1476 else
1477 fastmap[j] = 1;
1478
1479 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
1480 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
1481 {
1482 if (translate)
1483 fastmap[translate[j]] = 1;
1484 else
1485 fastmap[j] = 1;
1486 }
1487 break;
1488 }
1489
78ed81a3 1490 /* Get here means we have successfully found the possible starting
1491 characters of one path of the pattern. We need not follow this
1492 path any farther. Instead, look at the next alternative
1493 remembered in the stack. */
1494 if (stackp != stackb)
15637ed4
RG
1495 p = *stackp--;
1496 else
1497 break;
1498 }
1499}
78ed81a3 1500
1501
15637ed4 1502\f
78ed81a3 1503/* Like re_search_2, below, but only one string is specified, and
1504 doesn't let you say where to stop matching. */
15637ed4
RG
1505
1506int
78ed81a3 1507re_search (struct re_pattern_buffer *pbufp,
1508 char *string,
1509 int size,
1510 int startpos,
1511 int range,
1512 struct re_registers *regs)
15637ed4 1513{
78ed81a3 1514 return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range,
1515 regs, size);
15637ed4
RG
1516}
1517
15637ed4 1518
78ed81a3 1519/* Using the compiled pattern in PBUFP->buffer, first tries to match the
1520 virtual concatenation of STRING1 and STRING2, starting first at index
1521 STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of
1522 places to try before giving up. If RANGE is negative, it searches
1523 backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
1524 - 1, etc. STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
1525 In REGS, return the indices of the virtual concatenation of STRING1
1526 and STRING2 that matched the entire PBUFP->buffer and its contained
1527 subexpressions. Do not consider matching one past the index MSTOP in
1528 the virtual concatenation of STRING1 and STRING2.
1529
1530 The value returned is the position in the strings at which the match
1531 was found, or -1 if no match was found, or -2 if error (such as
1532 failure stack overflow). */
15637ed4
RG
1533
1534int
78ed81a3 1535re_search_2 (struct re_pattern_buffer *pbufp,
1536 char *string1, int size1,
1537 char *string2, int size2,
1538 int startpos,
1539 register int range,
1540 struct re_registers *regs,
1541 int mstop)
15637ed4
RG
1542{
1543 register char *fastmap = pbufp->fastmap;
1544 register unsigned char *translate = (unsigned char *) pbufp->translate;
78ed81a3 1545 int total_size = size1 + size2;
1546 int endpos = startpos + range;
15637ed4
RG
1547 int val;
1548
78ed81a3 1549 /* Check for out-of-range starting position. */
1550 if (startpos < 0 || startpos > total_size)
1551 return -1;
1552
1553 /* Fix up range if it would eventually take startpos outside of the
1554 virtual concatenation of string1 and string2. */
1555 if (endpos < -1)
1556 range = -1 - startpos;
1557 else if (endpos > total_size)
1558 range = total_size - startpos;
1559
1560 /* Update the fastmap now if not correct already. */
15637ed4
RG
1561 if (fastmap && !pbufp->fastmap_accurate)
1562 re_compile_fastmap (pbufp);
1563
78ed81a3 1564 /* If the search isn't to be a backwards one, don't waste time in a
1565 long search for a pattern that says it is anchored. */
15637ed4
RG
1566 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
1567 && range > 0)
1568 {
1569 if (startpos > 0)
1570 return -1;
1571 else
1572 range = 1;
1573 }
1574
1575 while (1)
78ed81a3 1576 {
1577 /* If a fastmap is supplied, skip quickly over characters that
1578 cannot possibly be the start of a match. Note, however, that
1579 if the pattern can possibly match the null string, we must
1580 test it at each starting point so that we take the first null
1581 string we get. */
1582
1583 if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
15637ed4 1584 {
78ed81a3 1585 if (range > 0) /* Searching forwards. */
15637ed4
RG
1586 {
1587 register int lim = 0;
1588 register unsigned char *p;
1589 int irange = range;
1590 if (startpos < size1 && startpos + range >= size1)
1591 lim = range - (size1 - startpos);
1592
1593 p = ((unsigned char *)
1594 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
1595
78ed81a3 1596 while (range > lim && !fastmap[translate
1597 ? translate[*p++]
1598 : *p++])
15637ed4 1599 range--;
15637ed4
RG
1600 startpos += irange - range;
1601 }
78ed81a3 1602 else /* Searching backwards. */
15637ed4
RG
1603 {
1604 register unsigned char c;
78ed81a3 1605
1606 if (string1 == 0 || startpos >= size1)
15637ed4 1607 c = string2[startpos - size1];
78ed81a3 1608 else
15637ed4 1609 c = string1[startpos];
78ed81a3 1610
1611 c &= 0xff;
15637ed4
RG
1612 if (translate ? !fastmap[translate[c]] : !fastmap[c])
1613 goto advance;
1614 }
1615 }
1616
78ed81a3 1617 if (range >= 0 && startpos == total_size
15637ed4
RG
1618 && fastmap && pbufp->can_be_null == 0)
1619 return -1;
1620
78ed81a3 1621 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
1622 regs, mstop);
1623 if (val >= 0)
1624 return startpos;
1625 if (val == -2)
1626 return -2;
15637ed4
RG
1627
1628#ifdef C_ALLOCA
1629 alloca (0);
1630#endif /* C_ALLOCA */
1631
1632 advance:
78ed81a3 1633 if (!range)
1634 break;
1635 else if (range > 0)
1636 {
1637 range--;
1638 startpos++;
1639 }
1640 else
1641 {
1642 range++;
1643 startpos--;
1644 }
15637ed4
RG
1645 }
1646 return -1;
1647}
78ed81a3 1648
1649
15637ed4 1650\f
78ed81a3 1651#ifndef emacs /* emacs never uses this. */
15637ed4 1652int
78ed81a3 1653re_match (struct re_pattern_buffer *pbufp,
1654 char *string,
1655 int size,
1656 int pos,
1657 struct re_registers *regs)
15637ed4 1658{
78ed81a3 1659 return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size);
15637ed4 1660}
78ed81a3 1661#endif /* not emacs */
1662
15637ed4 1663
78ed81a3 1664/* The following are used for re_match_2, defined below: */
15637ed4 1665
78ed81a3 1666/* Roughly the maximum number of failure points on the stack. Would be
1667 exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed. */
1668
15637ed4
RG
1669int re_max_failures = 2000;
1670
78ed81a3 1671/* Routine used by re_match_2. */
1672static int bcmp_translate (char *, char *, int, unsigned char *);
1673
1674
1675/* Structure and accessing macros used in re_match_2: */
1676
1677struct register_info
1678{
1679 unsigned is_active : 1;
1680 unsigned matched_something : 1;
1681};
1682
1683#define IS_ACTIVE(R) ((R).is_active)
1684#define MATCHED_SOMETHING(R) ((R).matched_something)
1685
1686
1687/* Macros used by re_match_2: */
1688
1689
1690/* I.e., regstart, regend, and reg_info. */
1691
1692#define NUM_REG_ITEMS 3
1693
1694/* We push at most this many things on the stack whenever we
1695 fail. The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
1696 arguments to the PUSH_FAILURE_POINT macro. */
1697
1698#define MAX_NUM_FAILURE_ITEMS (RE_NREGS * NUM_REG_ITEMS + 2)
1699
1700
1701/* We push this many things on the stack whenever we fail. */
1702
1703#define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + 2)
1704
1705
1706/* This pushes most of the information about the current state we will want
1707 if we ever fail back to it. */
1708
1709#define PUSH_FAILURE_POINT(pattern_place, string_place) \
1710 { \
1711 short last_used_reg, this_reg; \
1712 \
1713 /* Find out how many registers are active or have been matched. \
1714 (Aside from register zero, which is only set at the end.) */ \
1715 for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\
1716 if (regstart[last_used_reg] != (unsigned char *) -1) \
1717 break; \
1718 \
1719 if (stacke - stackp < NUM_FAILURE_ITEMS) \
1720 { \
1721 unsigned char **stackx; \
1722 int len = stacke - stackb; \
1723 if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
1724 return -2; \
1725 \
1726 /* Roughly double the size of the stack. */ \
1727 stackx = (unsigned char **) alloca (2 * len \
1728 * sizeof (unsigned char *));\
1729 /* Only copy what is in use. */ \
1730 memcpy (stackx, stackb, len * sizeof (char *)); \
1731 stackp = stackx + (stackp - stackb); \
1732 stackb = stackx; \
1733 stacke = stackb + 2 * len; \
1734 } \
1735 \
1736 /* Now push the info for each of those registers. */ \
1737 for (this_reg = 1; this_reg <= last_used_reg; this_reg++) \
1738 { \
1739 *stackp++ = regstart[this_reg]; \
1740 *stackp++ = regend[this_reg]; \
1741 *stackp++ = (unsigned char *) &reg_info[this_reg]; \
1742 } \
1743 \
1744 /* Push how many registers we saved. */ \
1745 *stackp++ = (unsigned char *) last_used_reg; \
1746 \
1747 *stackp++ = pattern_place; \
1748 *stackp++ = string_place; \
1749 }
1750
1751
1752/* This pops what PUSH_FAILURE_POINT pushes. */
1753
1754#define POP_FAILURE_POINT() \
1755 { \
1756 int temp; \
1757 stackp -= 2; /* Remove failure points. */ \
1758 temp = (int) *--stackp; /* How many regs pushed. */ \
1759 temp *= NUM_REG_ITEMS; /* How much to take off the stack. */ \
1760 stackp -= temp; /* Remove the register info. */ \
1761 }
15637ed4 1762
78ed81a3 1763
1764#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
1765
1766/* Is true if there is a first string and if PTR is pointing anywhere
1767 inside it or just past the end. */
1768
1769#define IS_IN_FIRST_STRING(ptr) \
1770 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
1771
1772/* Call before fetching a character with *d. This switches over to
1773 string2 if necessary. */
1774
1775#define PREFETCH \
1776 while (d == dend) \
1777 { \
1778 /* end of string2 => fail. */ \
1779 if (dend == end_match_2) \
1780 goto fail; \
1781 /* end of string1 => advance to string2. */ \
1782 d = string2; \
1783 dend = end_match_2; \
1784 }
1785
1786
1787/* Call this when have matched something; it sets `matched' flags for the
1788 registers corresponding to the subexpressions of which we currently
1789 are inside. */
1790#define SET_REGS_MATCHED \
1791 { unsigned this_reg; \
1792 for (this_reg = 0; this_reg < RE_NREGS; this_reg++) \
1793 { \
1794 if (IS_ACTIVE(reg_info[this_reg])) \
1795 MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
1796 else \
1797 MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
1798 } \
1799 }
1800
1801/* Test if at very beginning or at very end of the virtual concatenation
1802 of string1 and string2. If there is only one string, we've put it in
1803 string2. */
1804
1805#define AT_STRINGS_BEG (d == (size1 ? string1 : string2) || !size2)
1806#define AT_STRINGS_END (d == end2)
1807
1808#define AT_WORD_BOUNDARY \
1809 (AT_STRINGS_BEG || AT_STRINGS_END || IS_A_LETTER (d - 1) != IS_A_LETTER (d))
1810
1811/* We have two special cases to check for:
1812 1) if we're past the end of string1, we have to look at the first
1813 character in string2;
1814 2) if we're before the beginning of string2, we have to look at the
1815 last character in string1; we assume there is a string1, so use
1816 this in conjunction with AT_STRINGS_BEG. */
1817#define IS_A_LETTER(d) \
1818 (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
1819 == Sword)
1820
1821
1822/* Match the pattern described by PBUFP against the virtual
1823 concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2,
1824 respectively. Start the match at index POS in the virtual
1825 concatenation of STRING1 and STRING2. In REGS, return the indices of
1826 the virtual concatenation of STRING1 and STRING2 that matched the
1827 entire PBUFP->buffer and its contained subexpressions. Do not
1828 consider matching one past the index MSTOP in the virtual
1829 concatenation of STRING1 and STRING2.
15637ed4
RG
1830
1831 If pbufp->fastmap is nonzero, then it had better be up to date.
1832
1833 The reason that the data to match are specified as two components
78ed81a3 1834 which are to be regarded as concatenated is so this function can be
1835 used directly on the contents of an Emacs buffer.
15637ed4 1836
78ed81a3 1837 -1 is returned if there is no match. -2 is returned if there is an
1838 error (such as match stack overflow). Otherwise the value is the
1839 length of the substring which was matched. */
15637ed4
RG
1840
1841int
78ed81a3 1842re_match_2 (struct re_pattern_buffer *pbufp,
1843 char *string1_arg, int size1,
1844 char *string2_arg, int size2,
1845 int pos,
1846 struct re_registers *regs,
1847 int mstop)
15637ed4
RG
1848{
1849 register unsigned char *p = (unsigned char *) pbufp->buffer;
78ed81a3 1850
1851 /* Pointer to beyond end of buffer. */
15637ed4 1852 register unsigned char *pend = p + pbufp->used;
78ed81a3 1853
1854 unsigned char *string1 = (unsigned char *) string1_arg;
1855 unsigned char *string2 = (unsigned char *) string2_arg;
1856 unsigned char *end1; /* Just past end of first string. */
1857 unsigned char *end2; /* Just past end of second string. */
1858
1859 /* Pointers into string1 and string2, just past the last characters in
1860 each to consider matching. */
15637ed4 1861 unsigned char *end_match_1, *end_match_2;
78ed81a3 1862
15637ed4 1863 register unsigned char *d, *dend;
78ed81a3 1864 register int mcnt; /* Multipurpose. */
15637ed4 1865 unsigned char *translate = (unsigned char *) pbufp->translate;
78ed81a3 1866 unsigned is_a_jump_n = 0;
1867
1868 /* Failure point stack. Each place that can handle a failure further
1869 down the line pushes a failure point on this stack. It consists of
1870 restart, regend, and reg_info for all registers corresponding to the
1871 subexpressions we're currently inside, plus the number of such
1872 registers, and, finally, two char *'s. The first char * is where to
1873 resume scanning the pattern; the second one is where to resume
1874 scanning the strings. If the latter is zero, the failure point is a
1875 ``dummy''; if a failure happens and the failure point is a dummy, it
1876 gets discarded and the next next one is tried. */
1877
1878 unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES];
15637ed4 1879 unsigned char **stackb = initial_stack;
78ed81a3 1880 unsigned char **stackp = stackb;
1881 unsigned char **stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
15637ed4 1882
15637ed4 1883
78ed81a3 1884 /* Information on the contents of registers. These are pointers into
1885 the input strings; they record just what was matched (on this
1886 attempt) by a subexpression part of the pattern, that is, the
1887 regnum-th regstart pointer points to where in the pattern we began
1888 matching and the regnum-th regend points to right after where we
1889 stopped matching the regnum-th subexpression. (The zeroth register
1890 keeps track of what the whole pattern matches.) */
1891
15637ed4
RG
1892 unsigned char *regstart[RE_NREGS];
1893 unsigned char *regend[RE_NREGS];
78ed81a3 1894
1895 /* The is_active field of reg_info helps us keep track of which (possibly
1896 nested) subexpressions we are currently in. The matched_something
1897 field of reg_info[reg_num] helps us tell whether or not we have
1898 matched any of the pattern so far this time through the reg_num-th
1899 subexpression. These two fields get reset each time through any
1900 loop their register is in. */
1901
1902 struct register_info reg_info[RE_NREGS];
1903
1904
1905 /* The following record the register info as found in the above
1906 variables when we find a match better than any we've seen before.
1907 This happens as we backtrack through the failure points, which in
1908 turn happens only if we have not yet matched the entire string. */
1909
1910 unsigned best_regs_set = 0;
1911 unsigned char *best_regstart[RE_NREGS];
1912 unsigned char *best_regend[RE_NREGS];
1913
1914 /* Initialize subexpression text positions to -1 to mark ones that no
1915 \( or ( and \) or ) has been seen for. Also set all registers to
1916 inactive and mark them as not having matched anything or ever
1917 failed. */
1918 for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
1919 {
1920 regstart[mcnt] = regend[mcnt] = (unsigned char *) -1;
1921 IS_ACTIVE (reg_info[mcnt]) = 0;
1922 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
1923 }
1924
1925 if (regs)
1926 for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
1927 regs->start[mcnt] = regs->end[mcnt] = -1;
15637ed4
RG
1928
1929 /* Set up pointers to ends of strings.
1930 Don't allow the second string to be empty unless both are empty. */
78ed81a3 1931 if (size2 == 0)
15637ed4
RG
1932 {
1933 string2 = string1;
1934 size2 = size1;
1935 string1 = 0;
1936 size1 = 0;
1937 }
1938 end1 = string1 + size1;
1939 end2 = string2 + size2;
1940
78ed81a3 1941 /* Compute where to stop matching, within the two strings. */
15637ed4
RG
1942 if (mstop <= size1)
1943 {
1944 end_match_1 = string1 + mstop;
1945 end_match_2 = string2;
1946 }
1947 else
1948 {
1949 end_match_1 = end1;
1950 end_match_2 = string2 + mstop - size1;
1951 }
1952
78ed81a3 1953 /* `p' scans through the pattern as `d' scans through the data. `dend'
1954 is the end of the input string that `d' points within. `d' is
1955 advanced into the following input string whenever necessary, but
1956 this happens before fetching; therefore, at the beginning of the
1957 loop, `d' can be pointing at the end of a string, but it cannot
1958 equal string2. */
15637ed4 1959
78ed81a3 1960 if (size1 != 0 && pos <= size1)
15637ed4
RG
1961 d = string1 + pos, dend = end_match_1;
1962 else
1963 d = string2 + pos - size1, dend = end_match_2;
1964
15637ed4 1965
78ed81a3 1966 /* This loops over pattern commands. It exits by returning from the
1967 function if match is complete, or it drops through if match fails
1968 at this starting point in the input data. */
15637ed4
RG
1969
1970 while (1)
1971 {
78ed81a3 1972 is_a_jump_n = 0;
1973 /* End of pattern means we might have succeeded. */
15637ed4 1974 if (p == pend)
15637ed4 1975 {
78ed81a3 1976 /* If not end of string, try backtracking. Otherwise done. */
1977 if (d != end_match_2)
1978 {
1979 if (stackp != stackb)
1980 {
1981 /* More failure points to try. */
1982
1983 unsigned in_same_string =
1984 IS_IN_FIRST_STRING (best_regend[0])
1985 == MATCHING_IN_FIRST_STRING;
1986
1987 /* If exceeds best match so far, save it. */
1988 if (! best_regs_set
1989 || (in_same_string && d > best_regend[0])
1990 || (! in_same_string && ! MATCHING_IN_FIRST_STRING))
1991 {
1992 best_regs_set = 1;
1993 best_regend[0] = d; /* Never use regstart[0]. */
1994
1995 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1996 {
1997 best_regstart[mcnt] = regstart[mcnt];
1998 best_regend[mcnt] = regend[mcnt];
1999 }
2000 }
2001 goto fail;
2002 }
2003 /* If no failure points, don't restore garbage. */
2004 else if (best_regs_set)
2005 {
2006 restore_best_regs:
2007 /* Restore best match. */
2008 d = best_regend[0];
2009
2010 for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
2011 {
2012 regstart[mcnt] = best_regstart[mcnt];
2013 regend[mcnt] = best_regend[mcnt];
2014 }
2015 }
2016 }
2017
2018 /* If caller wants register contents data back, convert it
2019 to indices. */
15637ed4
RG
2020 if (regs)
2021 {
78ed81a3 2022 regs->start[0] = pos;
2023 if (MATCHING_IN_FIRST_STRING)
2024 regs->end[0] = d - string1;
2025 else
2026 regs->end[0] = d - string2 + size1;
2027 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
15637ed4
RG
2028 {
2029 if (regend[mcnt] == (unsigned char *) -1)
2030 {
2031 regs->start[mcnt] = -1;
2032 regs->end[mcnt] = -1;
2033 continue;
2034 }
78ed81a3 2035 if (IS_IN_FIRST_STRING (regstart[mcnt]))
15637ed4
RG
2036 regs->start[mcnt] = regstart[mcnt] - string1;
2037 else
2038 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
78ed81a3 2039
2040 if (IS_IN_FIRST_STRING (regend[mcnt]))
15637ed4
RG
2041 regs->end[mcnt] = regend[mcnt] - string1;
2042 else
2043 regs->end[mcnt] = regend[mcnt] - string2 + size1;
2044 }
2045 }
78ed81a3 2046 return d - pos - (MATCHING_IN_FIRST_STRING
2047 ? string1
2048 : string2 - size1);
2049 }
15637ed4 2050
78ed81a3 2051 /* Otherwise match next pattern command. */
15637ed4
RG
2052#ifdef SWITCH_ENUM_BUG
2053 switch ((int) ((enum regexpcode) *p++))
2054#else
2055 switch ((enum regexpcode) *p++)
2056#endif
2057 {
2058
78ed81a3 2059 /* \( [or `(', as appropriate] is represented by start_memory,
2060 \) by stop_memory. Both of those commands are followed by
2061 a register number in the next byte. The text matched
2062 within the \( and \) is recorded under that number. */
15637ed4 2063 case start_memory:
78ed81a3 2064 regstart[*p] = d;
2065 IS_ACTIVE (reg_info[*p]) = 1;
2066 MATCHED_SOMETHING (reg_info[*p]) = 0;
2067 p++;
2068 break;
15637ed4
RG
2069
2070 case stop_memory:
78ed81a3 2071 regend[*p] = d;
2072 IS_ACTIVE (reg_info[*p]) = 0;
2073
2074 /* If just failed to match something this time around with a sub-
2075 expression that's in a loop, try to force exit from the loop. */
2076 if ((! MATCHED_SOMETHING (reg_info[*p])
2077 || (enum regexpcode) p[-3] == start_memory)
2078 && (p + 1) != pend)
2079 {
2080 register unsigned char *p2 = p + 1;
2081 mcnt = 0;
2082 switch (*p2++)
2083 {
2084 case jump_n:
2085 is_a_jump_n = 1;
2086 case finalize_jump:
2087 case maybe_finalize_jump:
2088 case jump:
2089 case dummy_failure_jump:
2090 EXTRACT_NUMBER_AND_INCR (mcnt, p2);
2091 if (is_a_jump_n)
2092 p2 += 2;
2093 break;
2094 }
2095 p2 += mcnt;
2096
2097 /* If the next operation is a jump backwards in the pattern
2098 to an on_failure_jump, exit from the loop by forcing a
2099 failure after pushing on the stack the on_failure_jump's
2100 jump in the pattern, and d. */
2101 if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
2102 {
2103 EXTRACT_NUMBER_AND_INCR (mcnt, p2);
2104 PUSH_FAILURE_POINT (p2 + mcnt, d);
2105 goto fail;
2106 }
2107 }
2108 p++;
2109 break;
2110
2111 /* \<digit> has been turned into a `duplicate' command which is
2112 followed by the numeric value of <digit> as the register number. */
2113 case duplicate:
15637ed4
RG
2114 {
2115 int regno = *p++; /* Get which register to match against */
2116 register unsigned char *d2, *dend2;
2117
78ed81a3 2118 /* Where in input to try to start matching. */
2119 d2 = regstart[regno];
2120
2121 /* Where to stop matching; if both the place to start and
2122 the place to stop matching are in the same string, then
2123 set to the place to stop, otherwise, for now have to use
2124 the end of the first string. */
2125
2126 dend2 = ((IS_IN_FIRST_STRING (regstart[regno])
2127 == IS_IN_FIRST_STRING (regend[regno]))
15637ed4
RG
2128 ? regend[regno] : end_match_1);
2129 while (1)
2130 {
78ed81a3 2131 /* If necessary, advance to next segment in register
2132 contents. */
15637ed4
RG
2133 while (d2 == dend2)
2134 {
2135 if (dend2 == end_match_2) break;
2136 if (dend2 == regend[regno]) break;
2137 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
2138 }
2139 /* At end of register contents => success */
2140 if (d2 == dend2) break;
2141
78ed81a3 2142 /* If necessary, advance to next segment in data. */
15637ed4
RG
2143 PREFETCH;
2144
78ed81a3 2145 /* How many characters left in this segment to match. */
15637ed4 2146 mcnt = dend - d;
78ed81a3 2147
2148 /* Want how many consecutive characters we can match in
2149 one shot, so, if necessary, adjust the count. */
2150 if (mcnt > dend2 - d2)
15637ed4 2151 mcnt = dend2 - d2;
78ed81a3 2152
2153 /* Compare that many; failure if mismatch, else move
2154 past them. */
2155 if (translate
2156 ? bcmp_translate ((char*)d, (char*)d2, mcnt, translate)
2157 : memcmp (d, d2, mcnt))
15637ed4
RG
2158 goto fail;
2159 d += mcnt, d2 += mcnt;
2160 }
2161 }
2162 break;
2163
2164 case anychar:
78ed81a3 2165 PREFETCH; /* Fetch a data character. */
2166 /* Match anything but a newline, maybe even a null. */
2167 if ((translate ? translate[*d] : *d) == '\n'
2168 || ((obscure_syntax & RE_DOT_NOT_NULL)
2169 && (translate ? translate[*d] : *d) == '\000'))
15637ed4 2170 goto fail;
78ed81a3 2171 SET_REGS_MATCHED;
2172 d++;
15637ed4
RG
2173 break;
2174
2175 case charset:
2176 case charset_not:
2177 {
78ed81a3 2178 int not = 0; /* Nonzero for charset_not. */
15637ed4
RG
2179 register int c;
2180 if (*(p - 1) == (unsigned char) charset_not)
2181 not = 1;
2182
78ed81a3 2183 PREFETCH; /* Fetch a data character. */
15637ed4
RG
2184
2185 if (translate)
78ed81a3 2186 c = translate[*d];
15637ed4
RG
2187 else
2188 c = *d;
2189
2190 if (c < *p * BYTEWIDTH
2191 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
2192 not = !not;
2193
2194 p += 1 + *p;
2195
2196 if (!not) goto fail;
78ed81a3 2197 SET_REGS_MATCHED;
2198 d++;
15637ed4
RG
2199 break;
2200 }
2201
2202 case begline:
78ed81a3 2203 if ((size1 != 0 && d == string1)
2204 || (size1 == 0 && size2 != 0 && d == string2)
2205 || (d && d[-1] == '\n')
2206 || (size1 == 0 && size2 == 0))
2207 break;
2208 else
2209 goto fail;
2210
15637ed4
RG
2211 case endline:
2212 if (d == end2
2213 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
2214 break;
2215 goto fail;
2216
78ed81a3 2217 /* `or' constructs are handled by starting each alternative with
2218 an on_failure_jump that points to the start of the next
2219 alternative. Each alternative except the last ends with a
2220 jump to the joining point. (Actually, each jump except for
2221 the last one really jumps to the following jump, because
2222 tensioning the jumps is a hassle.) */
15637ed4
RG
2223
2224 /* The start of a stupid repeat has an on_failure_jump that points
78ed81a3 2225 past the end of the repeat text. This makes a failure point so
2226 that on failure to match a repetition, matching restarts past
2227 as many repetitions have been found with no way to fail and
2228 look for another one. */
15637ed4
RG
2229
2230 /* A smart repeat is similar but loops back to the on_failure_jump
78ed81a3 2231 so that each repetition makes another failure point. */
15637ed4
RG
2232
2233 case on_failure_jump:
78ed81a3 2234 on_failure:
2235 EXTRACT_NUMBER_AND_INCR (mcnt, p);
2236 PUSH_FAILURE_POINT (p + mcnt, d);
2237 break;
15637ed4 2238
78ed81a3 2239 /* The end of a smart repeat has a maybe_finalize_jump back.
2240 Change it either to a finalize_jump or an ordinary jump. */
15637ed4 2241 case maybe_finalize_jump:
78ed81a3 2242 EXTRACT_NUMBER_AND_INCR (mcnt, p);
15637ed4
RG
2243 {
2244 register unsigned char *p2 = p;
78ed81a3 2245 /* Compare what follows with the beginning of the repeat.
15637ed4 2246 If we can establish that there is nothing that they would
78ed81a3 2247 both match, we can change to finalize_jump. */
2248 while (p2 + 1 != pend
15637ed4
RG
2249 && (*p2 == (unsigned char) stop_memory
2250 || *p2 == (unsigned char) start_memory))
78ed81a3 2251 p2 += 2; /* Skip over reg number. */
15637ed4
RG
2252 if (p2 == pend)
2253 p[-3] = (unsigned char) finalize_jump;
2254 else if (*p2 == (unsigned char) exactn
2255 || *p2 == (unsigned char) endline)
2256 {
2257 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
2258 register unsigned char *p1 = p + mcnt;
2259 /* p1[0] ... p1[2] are an on_failure_jump.
78ed81a3 2260 Examine what follows that. */
15637ed4
RG
2261 if (p1[3] == (unsigned char) exactn && p1[5] != c)
2262 p[-3] = (unsigned char) finalize_jump;
2263 else if (p1[3] == (unsigned char) charset
2264 || p1[3] == (unsigned char) charset_not)
2265 {
2266 int not = p1[3] == (unsigned char) charset_not;
2267 if (c < p1[4] * BYTEWIDTH
2268 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
2269 not = !not;
78ed81a3 2270 /* `not' is 1 if c would match. */
2271 /* That means it is not safe to finalize. */
15637ed4
RG
2272 if (!not)
2273 p[-3] = (unsigned char) finalize_jump;
2274 }
2275 }
2276 }
78ed81a3 2277 p -= 2; /* Point at relative address again. */
15637ed4
RG
2278 if (p[-1] != (unsigned char) finalize_jump)
2279 {
78ed81a3 2280 p[-1] = (unsigned char) jump;
15637ed4
RG
2281 goto nofinalize;
2282 }
78ed81a3 2283 /* Note fall through. */
2284
2285 /* The end of a stupid repeat has a finalize_jump back to the
2286 start, where another failure point will be made which will
2287 point to after all the repetitions found so far. */
2288
2289 /* Take off failure points put on by matching on_failure_jump
2290 because didn't fail. Also remove the register information
2291 put on by the on_failure_jump. */
2292 case finalize_jump:
2293 POP_FAILURE_POINT ();
2294 /* Note fall through. */
2295
2296 /* Jump without taking off any failure points. */
2297 case jump:
15637ed4 2298 nofinalize:
78ed81a3 2299 EXTRACT_NUMBER_AND_INCR (mcnt, p);
2300 p += mcnt;
15637ed4
RG
2301 break;
2302
78ed81a3 2303 case dummy_failure_jump:
2304 /* Normally, the on_failure_jump pushes a failure point, which
2305 then gets popped at finalize_jump. We will end up at
2306 finalize_jump, also, and with a pattern of, say, `a+', we
2307 are skipping over the on_failure_jump, so we have to push
2308 something meaningless for finalize_jump to pop. */
2309 PUSH_FAILURE_POINT (0, 0);
2310 goto nofinalize;
2311
2312
2313 /* Have to succeed matching what follows at least n times. Then
2314 just handle like an on_failure_jump. */
2315 case succeed_n:
2316 EXTRACT_NUMBER (mcnt, p + 2);
2317 /* Originally, this is how many times we HAVE to succeed. */
2318 if (mcnt)
2319 {
2320 mcnt--;
2321 p += 2;
2322 STORE_NUMBER_AND_INCR (p, mcnt);
2323 }
2324 else if (mcnt == 0)
2325 {
2326 p[2] = unused;
2327 p[3] = unused;
2328 goto on_failure;
2329 }
2330 else
2331 {
2332 fprintf (stderr, "regex: the succeed_n's n is not set.\n");
2333 exit (1);
15637ed4 2334 }
78ed81a3 2335 break;
2336
2337 case jump_n:
2338 EXTRACT_NUMBER (mcnt, p + 2);
2339 /* Originally, this is how many times we CAN jump. */
2340 if (mcnt)
2341 {
2342 mcnt--;
2343 STORE_NUMBER(p + 2, mcnt);
2344 goto nofinalize; /* Do the jump without taking off
2345 any failure points. */
2346 }
2347 /* If don't have to jump any more, skip over the rest of command. */
2348 else
2349 p += 4;
2350 break;
2351
2352 case set_number_at:
2353 {
2354 register unsigned char *p1;
2355
2356 EXTRACT_NUMBER_AND_INCR (mcnt, p);
2357 p1 = p + mcnt;
2358 EXTRACT_NUMBER_AND_INCR (mcnt, p);
2359 STORE_NUMBER (p1, mcnt);
2360 break;
2361 }
2362
2363 /* Ignore these. Used to ignore the n of succeed_n's which
2364 currently have n == 0. */
2365 case unused:
2366 break;
2367
2368 case wordbound:
2369 if (AT_WORD_BOUNDARY)
15637ed4
RG
2370 break;
2371 goto fail;
2372
2373 case notwordbound:
78ed81a3 2374 if (AT_WORD_BOUNDARY)
15637ed4
RG
2375 goto fail;
2376 break;
2377
2378 case wordbeg:
78ed81a3 2379 /* Have to check if AT_STRINGS_BEG before looking at d - 1. */
2380 if (IS_A_LETTER (d) && (AT_STRINGS_BEG || !IS_A_LETTER (d - 1)))
15637ed4
RG
2381 break;
2382 goto fail;
2383
2384 case wordend:
78ed81a3 2385 /* Have to check if AT_STRINGS_BEG before looking at d - 1. */
2386 if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1)
2387 && (!IS_A_LETTER (d) || AT_STRINGS_END))
15637ed4
RG
2388 break;
2389 goto fail;
2390
2391#ifdef emacs
2392 case before_dot:
78ed81a3 2393 if (PTR_CHAR_POS (d) >= point)
15637ed4
RG
2394 goto fail;
2395 break;
2396
2397 case at_dot:
78ed81a3 2398 if (PTR_CHAR_POS (d) != point)
15637ed4
RG
2399 goto fail;
2400 break;
2401
2402 case after_dot:
78ed81a3 2403 if (PTR_CHAR_POS (d) <= point)
15637ed4
RG
2404 goto fail;
2405 break;
2406
2407 case wordchar:
2408 mcnt = (int) Sword;
2409 goto matchsyntax;
2410
2411 case syntaxspec:
2412 mcnt = *p++;
2413 matchsyntax:
2414 PREFETCH;
2415 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
78ed81a3 2416 SET_REGS_MATCHED;
15637ed4
RG
2417 break;
2418
2419 case notwordchar:
2420 mcnt = (int) Sword;
2421 goto matchnotsyntax;
2422
2423 case notsyntaxspec:
2424 mcnt = *p++;
2425 matchnotsyntax:
2426 PREFETCH;
2427 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
78ed81a3 2428 SET_REGS_MATCHED;
2429 break;
2430
2431#else /* not emacs */
2432
15637ed4
RG
2433 case wordchar:
2434 PREFETCH;
78ed81a3 2435 if (!IS_A_LETTER (d))
2436 goto fail;
2437 SET_REGS_MATCHED;
15637ed4
RG
2438 break;
2439
2440 case notwordchar:
2441 PREFETCH;
78ed81a3 2442 if (IS_A_LETTER (d))
2443 goto fail;
2444 SET_REGS_MATCHED;
15637ed4 2445 break;
78ed81a3 2446
15637ed4
RG
2447#endif /* not emacs */
2448
2449 case begbuf:
78ed81a3 2450 if (AT_STRINGS_BEG)
2451 break;
2452 goto fail;
15637ed4 2453
78ed81a3 2454 case endbuf:
2455 if (AT_STRINGS_END)
15637ed4
RG
2456 break;
2457 goto fail;
2458
2459 case exactn:
2460 /* Match the next few pattern characters exactly.
78ed81a3 2461 mcnt is how many characters to match. */
15637ed4 2462 mcnt = *p++;
78ed81a3 2463 /* This is written out as an if-else so we don't waste time
2464 testing `translate' inside the loop. */
2465 if (translate)
15637ed4
RG
2466 {
2467 do
2468 {
2469 PREFETCH;
2470 if (translate[*d++] != *p++) goto fail;
2471 }
2472 while (--mcnt);
2473 }
2474 else
2475 {
2476 do
2477 {
2478 PREFETCH;
2479 if (*d++ != *p++) goto fail;
2480 }
2481 while (--mcnt);
2482 }
78ed81a3 2483 SET_REGS_MATCHED;
2484 break;
15637ed4 2485 }
78ed81a3 2486 continue; /* Successfully executed one pattern command; keep going. */
15637ed4 2487
78ed81a3 2488 /* Jump here if any matching operation fails. */
15637ed4
RG
2489 fail:
2490 if (stackp != stackb)
2491 /* A restart point is known. Restart there and pop it. */
2492 {
78ed81a3 2493 short last_used_reg, this_reg;
2494
2495 /* If this failure point is from a dummy_failure_point, just
2496 skip it. */
15637ed4 2497 if (!stackp[-2])
78ed81a3 2498 {
2499 POP_FAILURE_POINT ();
2500 goto fail;
2501 }
2502
2503 d = *--stackp;
15637ed4 2504 p = *--stackp;
78ed81a3 2505 if (d >= string1 && d <= end1)
15637ed4 2506 dend = end_match_1;
78ed81a3 2507 /* Restore register info. */
2508 last_used_reg = (short) (int) *--stackp;
2509
2510 /* Make the ones that weren't saved -1 or 0 again. */
2511 for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--)
2512 {
2513 regend[this_reg] = (unsigned char *) -1;
2514 regstart[this_reg] = (unsigned char *) -1;
2515 IS_ACTIVE (reg_info[this_reg]) = 0;
2516 MATCHED_SOMETHING (reg_info[this_reg]) = 0;
2517 }
2518
2519 /* And restore the rest from the stack. */
2520 for ( ; this_reg > 0; this_reg--)
2521 {
2522 reg_info[this_reg] = *(struct register_info *) *--stackp;
2523 regend[this_reg] = *--stackp;
2524 regstart[this_reg] = *--stackp;
2525 }
15637ed4 2526 }
78ed81a3 2527 else
2528 break; /* Matching at this starting point really fails. */
15637ed4 2529 }
78ed81a3 2530
2531 if (best_regs_set)
2532 goto restore_best_regs;
2533 return -1; /* Failure to match. */
15637ed4
RG
2534}
2535
78ed81a3 2536
15637ed4 2537static int
78ed81a3 2538bcmp_translate (char *s1, char *s2, int len, unsigned char *translate)
15637ed4 2539{
78ed81a3 2540 register unsigned char *p1 = (unsigned char*)s1;
2541 register unsigned char *p2 = (unsigned char*)s2;
15637ed4
RG
2542 while (len)
2543 {
2544 if (translate [*p1++] != translate [*p2++]) return 1;
2545 len--;
2546 }
2547 return 0;
2548}
78ed81a3 2549
2550
15637ed4 2551\f
78ed81a3 2552/* Entry points compatible with 4.2 BSD regex library. */
15637ed4 2553
78ed81a3 2554#if 0
15637ed4
RG
2555
2556static struct re_pattern_buffer re_comp_buf;
2557
2558char *
2559re_comp (char *s)
2560{
2561 if (!s)
2562 {
2563 if (!re_comp_buf.buffer)
2564 return "No previous regular expression";
2565 return 0;
2566 }
2567
2568 if (!re_comp_buf.buffer)
2569 {
78ed81a3 2570 if (!(re_comp_buf.buffer = (char *) malloc (200)))
15637ed4
RG
2571 return "Memory exhausted";
2572 re_comp_buf.allocated = 200;
78ed81a3 2573 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
15637ed4
RG
2574 return "Memory exhausted";
2575 }
2576 return re_compile_pattern (s, strlen (s), &re_comp_buf);
2577}
2578
2579int
2580re_exec (char *s)
2581{
2582 int len = strlen (s);
78ed81a3 2583 return 0 <= re_search (&re_comp_buf, s, len, 0, len,
2584 (struct re_registers *) 0);
15637ed4 2585}
78ed81a3 2586#endif /* not emacs */
2587
15637ed4 2588
15637ed4
RG
2589\f
2590#ifdef test
2591
2592#include <stdio.h>
2593
78ed81a3 2594/* Indexed by a character, gives the upper case equivalent of the
2595 character. */
15637ed4 2596
78ed81a3 2597char upcase[0400] =
15637ed4
RG
2598 { 000, 001, 002, 003, 004, 005, 006, 007,
2599 010, 011, 012, 013, 014, 015, 016, 017,
2600 020, 021, 022, 023, 024, 025, 026, 027,
2601 030, 031, 032, 033, 034, 035, 036, 037,
2602 040, 041, 042, 043, 044, 045, 046, 047,
2603 050, 051, 052, 053, 054, 055, 056, 057,
2604 060, 061, 062, 063, 064, 065, 066, 067,
2605 070, 071, 072, 073, 074, 075, 076, 077,
2606 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2607 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
2608 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
2609 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
2610 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2611 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
2612 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
2613 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
2614 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
2615 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
2616 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
2617 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
2618 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
2619 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2620 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
2621 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
2622 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
2623 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
2624 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
2625 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2626 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
2627 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
2628 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
2629 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
2630 };
2631
78ed81a3 2632#ifdef canned
2633
2634#include "tests.h"
2635
2636typedef enum { extended_test, basic_test } test_type;
2637
2638/* Use this to run the tests we've thought of. */
2639
2640void
2641main ()
2642{
2643 test_type t = extended_test;
2644
2645 if (t == basic_test)
2646 {
2647 printf ("Running basic tests:\n\n");
2648 test_posix_basic ();
2649 }
2650 else if (t == extended_test)
2651 {
2652 printf ("Running extended tests:\n\n");
2653 test_posix_extended ();
2654 }
2655}
2656
2657#else /* not canned */
2658
2659/* Use this to run interactive tests. */
2660
2661void
15637ed4
RG
2662main (int argc, char **argv)
2663{
2664 char pat[80];
2665 struct re_pattern_buffer buf;
2666 int i;
2667 char c;
2668 char fastmap[(1 << BYTEWIDTH)];
2669
2670 /* Allow a command argument to specify the style of syntax. */
2671 if (argc > 1)
2672 obscure_syntax = atoi (argv[1]);
2673
2674 buf.allocated = 40;
78ed81a3 2675 buf.buffer = (char *) malloc (buf.allocated);
15637ed4
RG
2676 buf.fastmap = fastmap;
2677 buf.translate = upcase;
2678
2679 while (1)
2680 {
2681 gets (pat);
2682
2683 if (*pat)
2684 {
2685 re_compile_pattern (pat, strlen(pat), &buf);
2686
2687 for (i = 0; i < buf.used; i++)
2688 printchar (buf.buffer[i]);
2689
2690 putchar ('\n');
2691
2692 printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
2693
2694 re_compile_fastmap (&buf);
2695 printf ("Allowed by fastmap: ");
2696 for (i = 0; i < (1 << BYTEWIDTH); i++)
2697 if (fastmap[i]) printchar (i);
2698 putchar ('\n');
2699 }
2700
2701 gets (pat); /* Now read the string to match against */
2702
2703 i = re_match (&buf, pat, strlen (pat), 0, 0);
2704 printf ("Match value %d.\n", i);
2705 }
2706}
2707
78ed81a3 2708#endif
2709
2710
15637ed4 2711#ifdef NOTDEF
78ed81a3 2712void
2713print_buf (struct re_pattern_buffer *bufpbufp)
15637ed4
RG
2714{
2715 int i;
2716
2717 printf ("buf is :\n----------------\n");
2718 for (i = 0; i < bufp->used; i++)
2719 printchar (bufp->buffer[i]);
2720
2721 printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
2722
2723 printf ("Allowed by fastmap: ");
2724 for (i = 0; i < (1 << BYTEWIDTH); i++)
2725 if (bufp->fastmap[i])
2726 printchar (i);
2727 printf ("\nAllowed by translate: ");
2728 if (bufp->translate)
2729 for (i = 0; i < (1 << BYTEWIDTH); i++)
2730 if (bufp->translate[i])
2731 printchar (i);
2732 printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
2733 printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
2734}
78ed81a3 2735#endif /* NOTDEF */
15637ed4 2736
78ed81a3 2737void
15637ed4
RG
2738printchar (char c)
2739{
78ed81a3 2740 if (c < 040 || c >= 0177)
15637ed4
RG
2741 {
2742 putchar ('\\');
2743 putchar (((c >> 6) & 3) + '0');
2744 putchar (((c >> 3) & 7) + '0');
2745 putchar ((c & 7) + '0');
2746 }
2747 else
2748 putchar (c);
2749}
2750
78ed81a3 2751void
15637ed4
RG
2752error (char *string)
2753{
2754 puts (string);
2755 exit (1);
2756}
15637ed4 2757#endif /* test */