static char sccsid
[] = "@(#)regexp.c 4.1 (Berkeley) %G%";
boolean l_onecase
; /* true if upper and lower equivalent */
#define makelower(c) (isupper((c)) ? tolower((c)) : (c))
/* STRNCMP - like strncmp except that we convert the
* first string to lower case before comparing
if (*s2
- makelower(*s1
))
return (*s2
- makelower(*s1
));
/* The following routine converts an irregular expression to
* Either meta symbols (\a \d or \p) or character strings or
* operations ( alternation or perenthesizing ) can be
* specified. Each starts with a descriptor byte. The descriptor
* byte has STR set for strings, META set for meta symbols
* and OPER set for operations.
* The descriptor byte can also have the OPT bit set if the object
* defined is optional. Also ALT can be set to indicate an alternation.
* For metasymbols the byte following the descriptor byte identities
* the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
* strings the byte after the descriptor is a character count for
* meta symbols := descriptor
* operatins := descriptor
* handy macros for accessing parts of match blocks
#define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
#define MNEXT(A) (A+2) /* character following a metasymbol block */
#define OSYM(A) (*(A+1)) /* symbol in an operation block */
#define OCNT(A) (*(A+2)) /* character count */
#define ONEXT(A) (A+3) /* next character after the operation */
#define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
#define SCNT(A) (*(A+1)) /* byte count of a string */
#define SSTR(A) (A+2) /* address of the string */
#define SNEXT(A) (A+2+*(A+1)) /* character following the string */
* bit flags in the descriptor
char *ure
; /* pointer current position in unconverted exp */
char *ccre
; /* pointer to current position in converted exp*/
char *re
; /* unconverted irregular expression */
register char *cre
; /* pointer to converted regular expression */
/* allocate room for the converted expression */
cre
= malloc (4 * strlen(re
) + 3);
/* start the conversion with a \a */
/* start the conversion (its recursive) */
register char *cs
; /* pointer to current symbol in converted exp */
register char c
; /* character being processed */
register char *acs
; /* pinter to last alternate */
/* let the conversion begin */
/* escaped characters are just characters */
/* normal(?) metacharacters */
if (acs
!= NIL
&& acs
!= cs
) {
/* just put the symbol in */
if (acs
!= NIL
&& acs
!= cs
) {
/* mark the last match sequence as optional */
/* recurse and define a subexpression */
if (acs
!= NIL
&& acs
!= cs
) {
OCNT(cs
) = ccre
- cs
; /* offset to next symbol */
/* return from a recursion */
/* mark the last match sequence as having an alternate */
/* the third byte will contain an offset to jump over the */
/* alternate match in case the first did not fail */
if (acs
!= NIL
&& acs
!= cs
)
OCNT(ccre
) = ccre
- acs
; /* make a back pointer */
acs
= cs
; /* remember that the pointer is to be filles */
/* if its not a metasymbol just build a scharacter string */
* The following routine recognises an irregular expresion
* with the following special characters:
* \? - means last match was optional
* \a - matches any number of characters
* \d - matches any number of spaces and tabs
* \p - matches any number of alphanumeric
* characters matched will be copied into
* the area pointed to by 'name'.
* \( \) - grouping used mostly for alternation and
* The irregular expression must be translated to internal form
* prior to calling this routine
* The value returned is the pointer to the first non \a
boolean _escaped
; /* true if we are currently _escaped */
char *_start
; /* start of string */
expmatch (s
, re
, mstring
)
register char *s
; /* string to check for a match in */
register char *re
; /* a converted irregular expression */
register char *mstring
; /* where to put whatever matches a \p */
register char *cs
; /* the current symbol */
register char *ptr
,*s1
; /* temporary pointer */
boolean matched
; /* a temporary boolean */
/* loop till expression string is exhausted (or at least pretty tired) */
switch (*cs
& (OPER
| STR
| META
)) {
/* try to match a string */
matched
= !STRNCMP (s
, SSTR(cs
), SCNT(cs
));
/* alternation, skip to next expression */
/* the match is optional */
matched
= 1; /* indicate a successful match */
/* no match, error return */
/* an operator, do something fancy */
/* this is an alternation */
/* last thing in the alternation was a match, skip ahead */
/* no match, keep trying */
/* this is a grouping, recurse */
ptr
= expmatch (s
, ONEXT(cs
), mstring
);
/* the subexpression matched */
/* alternation, skip to next expression */
/* the match is optional */
matched
= 1; /* indicate a successful match */
/* no match, error return */
/* try to match a metasymbol */
/* try to match anything and remember what was matched */
* This is really the same as trying the match the
* remaining parts of the expression to any subset
ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
if (ptr
!= NIL
&& s1
!= s
) {
/* we have a match, remember the match */
strncpy (mstring
, s
, s1
- s
);
} else if (ptr
!= NIL
&& (*cs
& OPT
)) {
/* it was aoptional so no match is ok */
/* not optional and we still matched */
if (!isalnum(*s1
) && *s1
!= '_')
_escaped
= _escaped
? FALSE
: TRUE
;
/* try to match anything */
* This is really the same as trying the match the
* remaining parts of the expression to any subset
ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
if (ptr
!= NIL
&& s1
!= s
) {
} else if (ptr
!= NIL
&& (*cs
& OPT
)) {
/* it was aoptional so no match is ok */
/* not optional and we still matched */
_escaped
= _escaped
? FALSE
: TRUE
;
/* fail if we are currently _escaped */
/* match any number of tabs and spaces */
while (*s
== ' ' || *s
== '\t')
if (s
!= ptr
|| s
== _start
) {
} else if (*s
== '\n' || *s
== '\0') {
/* no match, error return */
/* check for end of line */
if (*s
== '\0' || *s
== '\n') {
/* no match, error return */
/* check for start of line */
/* no match, error return */
/* end of a subexpression, return success */