/* $Header: search.c,v 4.3 85/05/01 11:50:16 lwall Exp $
* Revision 4.3 85/05/01 11:50:16 lwall
* Baseline for release with 4.3bsd.
/* string search routines */
/* Copyright (c) 1981,1980 James Gosling */
/* Modified Aug. 12, 1981 by Tom London to include regular expressions
as in ed. RE stuff hacked over by jag to correct a few major problems,
mainly dealing with searching within the buffer rather than copying
each line to a separate array. Newlines can now appear in RE's */
/* Ripped to shreds and glued back together to make a search package,
* July 6, 1984, by Larry Wall. (If it doesn't work, it's probably my fault.)
* Buffer, window, and mlisp stuff gone.
* Translation tables reduced to 1 table.
* Expression buffer is now dynamically allocated.
* Character classes now implemented with a bitmap.
#define BMAPSIZ (127 / BITSPERBYTE + 1)
/* meta characters in the "compiled" form of a regular expression */
#define CBRA 2 /* \( -- begin bracket */
#define CCHR 4 /* a vanilla character */
#define CDOT 6 /* . -- match anything except a newline */
#define CCL 8 /* [...] -- character class */
#define NCCL 10 /* [^...] -- negated character class */
#define CDOL 12 /* $ -- matches the end of a line */
#define CEND 14 /* The end of the pattern */
#define CKET 16 /* \) -- close bracket */
#define CBACK 18 /* \N -- backreference to the Nth bracketed
#define CIRC 20 /* ^ matches the beginning of a line */
#define WORD 32 /* matches word character \w */
#define NWORD 34 /* matches non-word characer \W */
#define WBOUND 36 /* matches word boundary \b */
#define NWBOUND 38 /* matches non-(word boundary) \B */
#define STAR 01 /* * -- Kleene star, repeats the previous
REas many times as possible; the value
ORs with the other operator types */
typedef char TRANSTABLE
[ASCSIZ
];
static TRANSTABLE trans
= {
0000,0001,0002,0003,0004,0005,0006,0007,
0010,0011,0012,0013,0014,0015,0016,0017,
0020,0021,0022,0023,0024,0025,0026,0027,
0030,0031,0032,0033,0034,0035,0036,0037,
0040,0041,0042,0043,0044,0045,0046,0047,
0050,0051,0052,0053,0054,0055,0056,0057,
0060,0061,0062,0063,0064,0065,0066,0067,
0070,0071,0072,0073,0074,0075,0076,0077,
0100,0101,0102,0103,0104,0105,0106,0107,
0110,0111,0112,0113,0114,0115,0116,0117,
0120,0121,0122,0123,0124,0125,0126,0127,
0130,0131,0132,0133,0134,0135,0136,0137,
0140,0141,0142,0143,0144,0145,0146,0147,
0150,0151,0152,0153,0154,0155,0156,0157,
0160,0161,0162,0163,0164,0165,0166,0167,
0170,0171,0172,0173,0174,0175,0176,0177,
static bool folding
= FALSE
;
static char *FirstCharacter
;
for (i
= 0; i
< ASCSIZ
; i
++)
/* the following must start off zeroed */
static char *gbr_str
= Nullch
;
int length
= compex
->braelist
[n
] - compex
->braslist
[n
];
if (!compex
->nbra
|| n
> compex
->nbra
|| !compex
->braelist
[n
] || length
<0)
growstr(&gbr_str
, &gbr_siz
, length
+1);
safecpy(gbr_str
, compex
->braslist
[n
], length
+1);
for (i
= 'A'; i
<= 'Z'; i
++)
for (i
= 'A'; i
<= 'Z'; i
++)
/* Compile the given regular expression into a [secret] internal format */
compile (compex
, strp
, RE
, fold
)
char **alt
= compex
->alternatives
;
char *retmes
= "Badly formed search string";
case_fold(compex
->do_folding
= fold
);
compex
->expbuf
= safemalloc(84);
ep
= compex
->expbuf
; /* point at expression buffer */
*alt
++ = ep
; /* first alternative starts here */
bracketp
= bracket
; /* first bracket goes here */
if (*strp
== 0) { /* nothing to compile? */
if (*ep
== 0) /* nothing there yet? */
return "Null search string";
return Nullch
; /* just keep old expression */
compex
->nbra
= 0; /* no brackets yet */
if (ep
- compex
->expbuf
>= compex
->eblen
)
c
= *strp
++; /* fetch next char of pattern */
if (c
== 0) { /* end of pattern? */
if (bracketp
!= bracket
) { /* balanced brackets? */
retmes
= "Unbalanced parens";
*ep
++ = CEND
; /* terminate expression */
*alt
++ = 0; /* terminal alternative list */
compex->eblen = ep - compex->expbuf + 1;
compex->expbuf = saferealloc(compex->expbuf,compex->eblen+4); */
return Nullch
; /* return success */
if (!RE
) { /* just a normal search string? */
*ep
++ = CCHR
; /* everything is a normal char */
else /* it is a regular expression */
case '\\': /* meta something */
if (compex
->nbra
>= NBRA
) {
retmes
= "Too many parens";
*bracketp
++ = ++compex
->nbra
;
retmes
= "No \\| in parens"; /* Alas! */
if (bracketp
<= bracket
) {
retmes
= "Unmatched right paren";
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (lastep
== 0 || *lastep
== CBRA
|| *lastep
== CKET
|| (*lastep
&STAR
)|| *lastep
>NWORD
)
if (ep
!= compex
->expbuf
&& ep
[-1] != CEND
)
if (*strp
!= 0 && (*strp
!= '\\' || strp
[1] != '|'))
case '[': { /* character class */
if (ep
- compex
->expbuf
>= compex
->eblen
- BMAPSIZ
)
grow_eb(compex
); /* reserve bitmap */
for (i
= BMAPSIZ
; i
; --i
)
if ((c
= *strp
++) == '^') {
*ep
++ = NCCL
; /* negated */
*ep
++ = CCL
; /* normal */
i
= 0; /* remember oldchar */
if (*strp
== '-' && *(++strp
))
ep
[c
/ BITSPERBYTE
] |= 1 << (c
% BITSPERBYTE
);
ep
[(c
^ 32) / BITSPERBYTE
] |=
1 << ((c
^ 32) % BITSPERBYTE
);
/* set the other bit too */
} while ((c
= *strp
++) != ']');
compex
->expbuf
= saferealloc(compex
->expbuf
, (MEM_SIZE
)compex
->eblen
+ 4);
register char *p1
= addr
;
register char *trt
= trans
;
if (compex
->nbra
) { /* any brackets? */
for (c
= 0; c
<= compex
->nbra
; c
++)
compex
->braslist
[c
] = compex
->braelist
[c
] = Nullch
;
compex
->brastr
= savestr(p1
); /* in case p1 is not static */
p1
= compex
->brastr
; /* ! */
case_fold(compex
->do_folding
); /* make sure table is correct */
FirstCharacter
= p1
; /* for ^ tests */
if (compex
->expbuf
[0] == CCHR
&& !compex
->alternatives
[1]) {
c
= trt
[compex
->expbuf
[1]]; /* fast check for first character */
if (trt
[*p1
] == c
&& advance (compex
, p1
, compex
->expbuf
))
else { /* regular algorithm */
register char **alt
= compex
->alternatives
;
if (advance (compex
, p1
, *alt
++))
/* advance the match of the regular expression starting at ep along the
string lp, simulates an NDFSA */
register char *trt
= trans
;
while ((*ep
& STAR
) || *lp
|| *ep
== CIRC
|| *ep
== CKET
)
if (trt
[*ep
++] != trt
[*lp
]) return FALSE
;
if (*lp
== '\n') return FALSE
;
if (lp
== FirstCharacter
|| lp
[-1]=='\n')
if ((lp
== FirstCharacter
|| !isalnum(lp
[-1])) !=
(!*lp
|| !isalnum(*lp
)) )
if ((lp
== FirstCharacter
|| !isalnum(lp
[-1])) ==
if (cclass (ep
, *lp
, 1)) {
if (cclass (ep
, *lp
, 0)) {
compex
->braslist
[*ep
++] = lp
;
compex
->braelist
[i
] = lp
;
compex
->braelist
[0] = lp
;
compex
->braslist
[0] = compex
->braslist
[i
];
if (compex
->braelist
[i
= *ep
++] == 0) {
fputs("bad braces\n",stdout
) FLUSH
;
if (backref (compex
, i
, lp
)) {
lp
+= compex
->braelist
[i
] - compex
->braslist
[i
];
if (compex
->braelist
[i
= *ep
++] == 0) {
fputs("bad braces\n",stdout
) FLUSH
;
while (backref (compex
, i
, lp
)) {
lp
+= compex
->braelist
[i
] - compex
->braslist
[i
];
if (advance (compex
, lp
, ep
))
lp
-= compex
->braelist
[i
] - compex
->braslist
[i
];
while (*lp
++ && lp
[-1] != '\n');
while (*lp
++ && isalnum(lp
[-1]));
while (*lp
++ && !isalnum(lp
[-1]));
while (*lp
++ && trt
[lp
[-1]] == trt
[*ep
]);
while (*lp
++ && cclass (ep
, lp
[-1], ep
[-1] == (CCL
| STAR
)));
if (advance (compex
, lp
, ep
))
fputs("Badly compiled pattern\n",stdout
) FLUSH
;
if (*ep
== CEND
|| *ep
== CDOL
) {
bp
= compex
->braslist
[i
];
while (*lp
&& *bp
== *lp
) {
if (bp
>= compex
->braelist
[i
])
if (set
[c
>> 3] & 1 << (c
& 7))
if (set
[c
/ BITSPERBYTE
] & 1 << (c
% BITSPERBYTE
))