* Copyright (c) 1980 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid
[] = "@(#)regex.c 5.2 (Berkeley) %G%";
#endif LIBC_SCCS and not lint
* routines to do regular expression matching
* ... returns 0 if the string s was compiled successfully,
* a pointer to an error message otherwise.
* If passed 0 or a null string returns without changing
* the currently compiled re (see note 11 below).
* ... returns 1 if the string s matches the last compiled regular
* 0 if the string s failed to match the last compiled
* regular expression, and
* -1 if the compiled regular expression was invalid
* (indicating an internal error).
* The strings passed to both re_comp and re_exec may have trailing or
* embedded newline characters; they are terminated by nulls.
* The identity of the author of these routines is lost in antiquity;
* this is essentially the same as the re code in the original V6 ed.
* The regular expressions recognized are described below. This description
* is essentially the same as that for ed.
* A regular expression specifies a set of strings of characters.
* A member of this set of strings is said to be matched by
* the regular expression. In the following specification for
* regular expressions the word `character' means any character but NUL.
* 1. Any character except a special character matches itself.
* Special characters are the regular expression delimiter plus
* \ [ . and sometimes ^ * $.
* 2. A . matches any character.
* 3. A \ followed by any character except a digit or ( )
* matches that character.
* 4. A nonempty string s bracketed [s] (or [^s]) matches any
* character in (or not in) s. In s, \ has no special meaning,
* and ] may only appear as the first letter. A substring
* a-b, with a and b in ascending ASCII order, stands for
* the inclusive range of ASCII characters.
* 5. A regular expression of form 1-4 followed by * matches a
* sequence of 0 or more matches of the regular expression.
* 6. A regular expression, x, of form 1-8, bracketed \(x\)
* matches what x matches.
* 7. A \ followed by a digit n matches a copy of the string that the
* bracketed regular expression beginning with the nth \( matched.
* 8. A regular expression of form 1-8, x, followed by a regular
* expression of form 1-7, y matches a match for x followed by
* a match for y, with the x match being as long as possible
* while still permitting a y match.
* 9. A regular expression of form 1-8 preceded by ^ (or followed
* by $), is constrained to matches that begin at the left
* (or end at the right) end of a line.
* 10. A regular expression of form 1-9 picks out the longest among
* the leftmost matches in a line.
* 11. An empty regular expression stands for a copy of the last
* regular expression encountered.
static char expbuf
[ESIZE
], *braslist
[NBRA
], *braelist
[NBRA
];
* compile the regular expression argument into a dfa
register char *ep
= expbuf
;
char *bracketp
= &bracket
[0];
static char *retoolong
= "Regular expression too long";
#define comerr(msg) {expbuf[0] = 0; numbra = 0; return(msg); }
if (sp
== 0 || *sp
== '\0') {
return("No previous regular expression");
if (ep
>= &expbuf
[ESIZE
])
if ((c
= *sp
++) == '\0') {
if (lastep
== 0 || *lastep
== CBRA
|| *lastep
== CKET
)
if ((c
= *sp
++) == '^') {
if (c
== '-' && ep
[-1] != 0) {
if ((c
= *sp
++) == ']') {
if (ep
>= &expbuf
[ESIZE
])
if (ep
>= &expbuf
[ESIZE
])
} while ((c
= *sp
++) != ']');
if ((c
= *sp
++) == '(') {
comerr("too many \\(\\) pairs");
if (c
>= '1' && c
< ('1' + NBRA
)) {
* match the argument string against the compiled re
register char *p2
= expbuf
;
for (c
= 0; c
< NBRA
; c
++) {
return((advance(p1
, p2
)));
* fast check for first character
if (rv
= advance(p1
, p2
))
if (rv
= advance(p1
, p2
))
* try to match the next thing in the dfa
if (cclass(ep
, *lp
++, 1)) {
if (cclass(ep
, *lp
++, 0)) {
if (braelist
[i
= *ep
++] == 0)
lp
+= braelist
[i
] - braslist
[i
];
if (braelist
[i
= *ep
++] == 0)
ct
= braelist
[i
] - braslist
[i
];
if (rv
= advance(lp
, ep
))
while (cclass(ep
, *lp
++, ep
[-1] == (CCL
|CSTAR
)))
if (rv
= advance(lp
, ep
))