protect against systems that use negative numbers for CHAR_MIN
[unix-history] / usr / src / lib / libc / regex / engine.c
CommitLineData
f83322e8
KB
1/*-
2 * Copyright (c) 1992 Henry Spencer.
3 * Copyright (c) 1992 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Henry Spencer of the University of Toronto.
8 *
9 * %sccs.include.redist.c%
10 *
6175ca7c 11 * @(#)engine.c 5.5 (Berkeley) %G%
f83322e8
KB
12 */
13
14/*
15 * The matching engine and friends. This file is #included by regexec.c
16 * after suitable #defines of a variety of macros used herein, so that
17 * different state representations can be used without duplicating masses
18 * of code.
19 */
20
21#ifdef SNAMES
22#define matcher smatcher
23#define fast sfast
24#define slow sslow
25#define dissect sdissect
26#define backref sbackref
f83322e8
KB
27#define step sstep
28#define print sprint
4d4cb2b0 29#define at sat
f83322e8
KB
30#define match smat
31#endif
32#ifdef LNAMES
33#define matcher lmatcher
34#define fast lfast
35#define slow lslow
36#define dissect ldissect
37#define backref lbackref
f83322e8
KB
38#define step lstep
39#define print lprint
4d4cb2b0 40#define at lat
f83322e8
KB
41#define match lmat
42#endif
43
44/* another structure passed up and down to avoid zillions of parameters */
45struct match {
46 struct re_guts *g;
47 int eflags;
48 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
6175ca7c
KB
49 char *offp; /* offsets work from here */
50 char *beginp; /* start of string -- virtual NUL precedes */
51 char *endp; /* end of string -- virtual NUL here */
52 char *coldp; /* can be no match starting before here */
53 char **lastpos; /* [nplus+1] */
f83322e8
KB
54 STATEVARS;
55 states st; /* current states */
56 states fresh; /* states for a fresh start */
57 states tmp; /* temporary */
58 states empty; /* empty set of states */
59};
60
6175ca7c
KB
61static int matcher(/*register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags*/);
62static char *dissect(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst*/);
63static char *backref(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev*/);
64static char *fast(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst*/);
65static char *slow(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst*/);
66static states step(/*register struct re_guts *g, int start, int stop, register states bef, int ch, register states aft*/);
67#define BOL (OUT+1)
68#define EOL (BOL+1)
69#define BOLEOL (BOL+2)
70#define NOTHING (BOL+3)
71#define BOW (BOL+4)
72#define EOW (BOL+5)
73#define CODEMAX (BOL+5) /* highest code used */
74#define NONCHAR(c) ((c) > CHAR_MAX)
75#define NNONCHAR (CODEMAX-CHAR_MAX)
76#ifdef REDEBUG
77static void print(/*struct match *m, char *caption, states st, int ch, FILE *d*/);
78#endif
79#ifdef REDEBUG
80static void at(/*struct match *m, char *title, char *start, char *stop, sopno startst, stopno stopst*/);
81#endif
82#ifdef REDEBUG
83static char *pchar(/*int ch*/);
84#endif
85
4d4cb2b0
KB
86#ifdef REDEBUG
87#define SP(t, s, c) print(m, t, s, c, stdout)
88#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
89#define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
f83322e8
KB
90#else
91#define SP(t, s, c) /* nothing */
4d4cb2b0
KB
92#define AT(t, p1, p2, s1, s2) /* nothing */
93#define NOTE(s) /* nothing */
f83322e8
KB
94#endif
95
f83322e8
KB
96/*
97 - matcher - the actual matching engine
6175ca7c
KB
98 == static int matcher(register struct re_guts *g, char *string, \
99 == size_t nmatch, regmatch_t pmatch[], int eflags);
f83322e8
KB
100 */
101static int /* 0 success, REG_NOMATCH failure */
102matcher(g, string, nmatch, pmatch, eflags)
103register struct re_guts *g;
6175ca7c 104char *string;
f83322e8
KB
105size_t nmatch;
106regmatch_t pmatch[];
107int eflags;
108{
6175ca7c 109 register char *endp;
f83322e8
KB
110 register int i;
111 struct match mv;
112 register struct match *m = &mv;
6175ca7c 113 register char *dp;
f83322e8
KB
114 const register sopno gf = g->firststate+1; /* +1 for OEND */
115 const register sopno gl = g->laststate;
6175ca7c
KB
116 char *start;
117 char *stop;
f83322e8 118
4d4cb2b0 119 /* simplify the situation where possible */
f83322e8 120 if (g->cflags&REG_NOSUB)
4d4cb2b0 121 nmatch = 0;
f83322e8
KB
122 if (eflags&REG_STARTEND) {
123 start = string + pmatch[0].rm_so;
124 stop = string + pmatch[0].rm_eo;
125 } else {
126 start = string;
6175ca7c 127 stop = start + strlen(start);
f83322e8 128 }
6175ca7c
KB
129 if (stop < start)
130 return(REG_INVARG);
f83322e8 131
945a2176
KB
132 /* prescreening; this does wonders for this rather slow code */
133 if (g->must != NULL) {
134 for (dp = start; dp < stop; dp++)
6175ca7c
KB
135 if (*dp == g->must[0] && stop - dp >= g->mlen &&
136 memcmp(dp, g->must, (size_t)g->mlen) == 0)
945a2176
KB
137 break;
138 if (dp == stop) /* we didn't find g->must */
139 return(REG_NOMATCH);
140 }
141
f83322e8
KB
142 /* match struct setup */
143 m->g = g;
144 m->eflags = eflags;
145 m->pmatch = NULL;
146 m->lastpos = NULL;
147 m->offp = string;
148 m->beginp = start;
149 m->endp = stop;
150 STATESETUP(m, 4);
151 SETUP(m->st);
152 SETUP(m->fresh);
153 SETUP(m->tmp);
154 SETUP(m->empty);
155 CLEAR(m->empty);
156
157 /* this loop does only one repetition except for backrefs */
158 for (;;) {
159 endp = fast(m, start, stop, gf, gl);
160 if (endp == NULL) { /* a miss */
161 STATETEARDOWN(m);
162 return(REG_NOMATCH);
163 }
164 if (nmatch == 0 && !g->backrefs)
165 break; /* no further info needed */
166
167 /* where? */
168 assert(m->coldp != NULL);
169 for (;;) {
4d4cb2b0 170 NOTE("finding start");
f83322e8
KB
171 endp = slow(m, m->coldp, stop, gf, gl);
172 if (endp != NULL)
173 break;
4d4cb2b0 174 assert(m->coldp < m->endp);
f83322e8
KB
175 m->coldp++;
176 }
177 if (nmatch == 1 && !g->backrefs)
178 break; /* no further info needed */
179
180 /* oh my, he wants the subexpressions... */
181 if (m->pmatch == NULL)
182 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
183 sizeof(regmatch_t));
184 if (m->pmatch == NULL) {
185 STATETEARDOWN(m);
186 return(REG_ESPACE);
187 }
4d4cb2b0
KB
188 for (i = 1; i <= m->g->nsub; i++)
189 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
190 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
191 NOTE("dissecting");
f83322e8 192 dp = dissect(m, m->coldp, endp, gf, gl);
4d4cb2b0 193 } else {
f83322e8 194 if (g->nplus > 0 && m->lastpos == NULL)
6175ca7c
KB
195 m->lastpos = (char **)malloc((g->nplus+1) *
196 sizeof(char *));
f83322e8
KB
197 if (g->nplus > 0 && m->lastpos == NULL) {
198 free(m->pmatch);
199 STATETEARDOWN(m);
200 return(REG_ESPACE);
201 }
4d4cb2b0 202 NOTE("backref dissect");
f83322e8
KB
203 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
204 }
205 if (dp != NULL)
206 break;
207
208 /* uh-oh... we couldn't find a subexpression-level match */
209 assert(g->backrefs); /* must be back references doing it */
210 assert(g->nplus == 0 || m->lastpos != NULL);
4d4cb2b0
KB
211 for (;;) {
212 if (dp != NULL || endp <= m->coldp)
213 break; /* defeat */
214 NOTE("backoff");
215 endp = slow(m, m->coldp, endp-1, gf, gl);
216 if (endp == NULL)
217 break; /* defeat */
f83322e8 218 /* try it on a shorter possibility */
4d4cb2b0 219#ifndef NDEBUG
f83322e8 220 for (i = 1; i <= m->g->nsub; i++) {
4d4cb2b0
KB
221 assert(m->pmatch[i].rm_so == -1);
222 assert(m->pmatch[i].rm_eo == -1);
f83322e8 223 }
4d4cb2b0
KB
224#endif
225 NOTE("backoff dissect");
f83322e8
KB
226 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
227 }
228 assert(dp == NULL || dp == endp);
229 if (dp != NULL) /* found a shorter one */
230 break;
231
232 /* despite initial appearances, there is no match here */
4d4cb2b0 233 NOTE("false alarm");
f83322e8
KB
234 start = m->coldp + 1; /* recycle starting later */
235 assert(start <= stop);
236 }
237
238 /* fill in the details if requested */
239 if (nmatch > 0) {
240 pmatch[0].rm_so = m->coldp - m->offp;
241 pmatch[0].rm_eo = endp - m->offp;
242 }
243 if (nmatch > 1) {
244 assert(m->pmatch != NULL);
245 for (i = 1; i < nmatch; i++)
246 if (i <= m->g->nsub)
247 pmatch[i] = m->pmatch[i];
248 else {
249 pmatch[i].rm_so = -1;
250 pmatch[i].rm_eo = -1;
251 }
252 }
253
254 if (m->pmatch != NULL)
255 free((char *)m->pmatch);
256 if (m->lastpos != NULL)
257 free((char *)m->lastpos);
258 STATETEARDOWN(m);
259 return(0);
260}
261
262/*
263 - dissect - figure out what matched what, no back references
6175ca7c
KB
264 == static char *dissect(register struct match *m, char *start, \
265 == char *stop, sopno startst, sopno stopst);
f83322e8 266 */
6175ca7c 267static char * /* == stop (success) always */
f83322e8
KB
268dissect(m, start, stop, startst, stopst)
269register struct match *m;
6175ca7c
KB
270char *start;
271char *stop;
f83322e8
KB
272sopno startst;
273sopno stopst;
274{
275 register int i;
276 register sopno ss; /* start sop of current subRE */
277 register sopno es; /* end sop of current subRE */
6175ca7c
KB
278 register char *sp; /* start of string matched by it */
279 register char *stp; /* string matched by it cannot pass here */
280 register char *rest; /* start of rest of string */
281 register char *tail; /* string unmatched by rest of RE */
f83322e8
KB
282 register sopno ssub; /* start sop of subsubRE */
283 register sopno esub; /* end sop of subsubRE */
6175ca7c
KB
284 register char *ssp; /* start of string matched by subsubRE */
285 register char *sep; /* end of string matched by subsubRE */
286 register char *oldssp; /* previous ssp */
287 register char *dp;
f83322e8 288
4d4cb2b0 289 AT("diss", start, stop, startst, stopst);
f83322e8
KB
290 sp = start;
291 for (ss = startst; ss < stopst; ss = es) {
292 /* identify end of subRE */
293 es = ss;
294 switch (OP(m->g->strip[es])) {
295 case OPLUS_:
296 case OQUEST_:
297 es += OPND(m->g->strip[es]);
298 break;
299 case OCH_:
300 while (OP(m->g->strip[es]) != O_CH)
301 es += OPND(m->g->strip[es]);
302 break;
303 }
304 es++;
305
306 /* figure out what it matched */
307 switch (OP(m->g->strip[ss])) {
308 case OEND:
6175ca7c 309 assert(nope);
f83322e8
KB
310 break;
311 case OCHAR:
312 sp++;
313 break;
314 case OBOL:
315 case OEOL:
6175ca7c
KB
316 case OBOW:
317 case OEOW:
f83322e8
KB
318 break;
319 case OANY:
320 case OANYOF:
321 sp++;
322 break;
323 case OBACK_:
324 case O_BACK:
6175ca7c 325 assert(nope);
f83322e8
KB
326 break;
327 /* cases where length of match is hard to find */
328 case OQUEST_:
329 stp = stop;
330 for (;;) {
331 /* how long could this one be? */
332 rest = slow(m, sp, stp, ss, es);
333 assert(rest != NULL); /* it did match */
334 /* could the rest match the rest? */
335 tail = slow(m, rest, stop, es, stopst);
336 if (tail == stop)
337 break; /* yes! */
338 /* no -- try a shorter match for this one */
339 stp = rest - 1;
340 assert(stp >= sp); /* it did work */
341 }
342 ssub = ss + 1;
343 esub = es - 1;
344 /* did innards match? */
345 if (slow(m, sp, rest, ssub, esub) != NULL) {
346 dp = dissect(m, sp, rest, ssub, esub);
347 assert(dp == rest);
348 } else /* no */
349 assert(sp == rest);
350 sp = rest;
351 break;
352 case OPLUS_:
353 stp = stop;
354 for (;;) {
355 /* how long could this one be? */
356 rest = slow(m, sp, stp, ss, es);
357 assert(rest != NULL); /* it did match */
358 /* could the rest match the rest? */
359 tail = slow(m, rest, stop, es, stopst);
360 if (tail == stop)
361 break; /* yes! */
362 /* no -- try a shorter match for this one */
363 stp = rest - 1;
364 assert(stp >= sp); /* it did work */
365 }
366 ssub = ss + 1;
367 esub = es - 1;
368 ssp = sp;
369 oldssp = ssp;
370 for (;;) { /* find last match of innards */
371 sep = slow(m, ssp, rest, ssub, esub);
372 if (sep == NULL || sep == ssp)
373 break; /* failed or matched null */
374 oldssp = ssp; /* on to next try */
375 ssp = sep;
376 }
377 if (sep == NULL) {
378 /* last successful match */
379 sep = ssp;
380 ssp = oldssp;
381 }
382 assert(sep == rest); /* must exhaust substring */
383 assert(slow(m, ssp, sep, ssub, esub) == rest);
384 dp = dissect(m, ssp, sep, ssub, esub);
385 assert(dp == sep);
386 sp = rest;
387 break;
388 case OCH_:
389 stp = stop;
390 for (;;) {
391 /* how long could this one be? */
392 rest = slow(m, sp, stp, ss, es);
393 assert(rest != NULL); /* it did match */
394 /* could the rest match the rest? */
395 tail = slow(m, rest, stop, es, stopst);
396 if (tail == stop)
397 break; /* yes! */
398 /* no -- try a shorter match for this one */
399 stp = rest - 1;
400 assert(stp >= sp); /* it did work */
401 }
402 ssub = ss + 1;
403 esub = ss + OPND(m->g->strip[ss]) - 1;
404 assert(OP(m->g->strip[esub]) == OOR1);
405 for (;;) { /* find first matching branch */
406 if (slow(m, sp, rest, ssub, esub) == rest)
407 break; /* it matched all of it */
408 /* that one missed, try next one */
409 assert(OP(m->g->strip[esub]) == OOR1);
410 esub++;
411 assert(OP(m->g->strip[esub]) == OOR2);
412 ssub = esub + 1;
413 esub += OPND(m->g->strip[esub]);
414 if (OP(m->g->strip[esub]) == OOR2)
415 esub--;
416 else
417 assert(OP(m->g->strip[esub]) == O_CH);
418 }
419 dp = dissect(m, sp, rest, ssub, esub);
420 assert(dp == rest);
421 sp = rest;
422 break;
423 case O_PLUS:
424 case O_QUEST:
425 case OOR1:
426 case OOR2:
427 case O_CH:
6175ca7c 428 assert(nope);
f83322e8
KB
429 break;
430 case OLPAREN:
431 i = OPND(m->g->strip[ss]);
432 m->pmatch[i].rm_so = sp - m->offp;
433 break;
434 case ORPAREN:
435 i = OPND(m->g->strip[ss]);
436 m->pmatch[i].rm_eo = sp - m->offp;
437 break;
438 default: /* uh oh */
6175ca7c 439 assert(nope);
f83322e8
KB
440 break;
441 }
442 }
443
444 assert(sp == stop);
445 return(sp);
446}
447
448/*
449 - backref - figure out what matched what, figuring in back references
6175ca7c
KB
450 == static char *backref(register struct match *m, char *start, \
451 == char *stop, sopno startst, sopno stopst, sopno lev);
f83322e8 452 */
6175ca7c 453static char * /* == stop (success) or NULL (failure) */
f83322e8
KB
454backref(m, start, stop, startst, stopst, lev)
455register struct match *m;
6175ca7c
KB
456char *start;
457char *stop;
f83322e8
KB
458sopno startst;
459sopno stopst;
460sopno lev; /* PLUS nesting level */
461{
462 register int i;
463 register sopno ss; /* start sop of current subRE */
6175ca7c 464 register char *sp; /* start of string matched by it */
f83322e8
KB
465 register sopno ssub; /* start sop of subsubRE */
466 register sopno esub; /* end sop of subsubRE */
6175ca7c
KB
467 register char *ssp; /* start of string matched by subsubRE */
468 register char *dp;
f83322e8
KB
469 register size_t len;
470 register int hard;
471 register sop s;
472 register regoff_t offsave;
473 register cset *cs;
474
4d4cb2b0 475 AT("back", start, stop, startst, stopst);
f83322e8
KB
476 sp = start;
477
478 /* get as far as we can with easy stuff */
479 hard = 0;
480 for (ss = startst; !hard && ss < stopst; ss++)
481 switch (OP(s = m->g->strip[ss])) {
482 case OCHAR:
6175ca7c 483 if (sp == stop || *sp++ != (char)OPND(s))
f83322e8
KB
484 return(NULL);
485 break;
486 case OANY:
487 if (sp == stop)
488 return(NULL);
489 sp++;
490 break;
491 case OANYOF:
492 cs = &m->g->sets[OPND(s)];
493 if (sp == stop || !CHIN(cs, *sp++))
494 return(NULL);
495 break;
496 case OBOL:
497 if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
498 (sp < m->endp && *(sp-1) == '\n' &&
499 (m->g->cflags&REG_NEWLINE)) )
500 { /* yes */ }
501 else
502 return(NULL);
503 break;
504 case OEOL:
505 if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
506 (sp < m->endp && *sp == '\n' &&
507 (m->g->cflags&REG_NEWLINE)) )
508 { /* yes */ }
509 else
510 return(NULL);
511 break;
6175ca7c
KB
512 case OBOW:
513 if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
514 (sp < m->endp && *(sp-1) == '\n' &&
515 (m->g->cflags&REG_NEWLINE)) ||
516 (sp > m->beginp &&
517 !isalnum(*(sp-1))) ) &&
518 (sp < m->endp && isalnum(*sp)) )
519 { /* yes */ }
520 else
521 return(NULL);
522 break;
523 case OEOW:
524 if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
525 (sp < m->endp && *sp == '\n' &&
526 (m->g->cflags&REG_NEWLINE)) ||
527 (sp < m->endp && !isalnum(*sp)) ) &&
528 (sp > m->beginp && isalnum(*(sp-1))) )
529 { /* yes */ }
530 else
531 return(NULL);
532 break;
f83322e8
KB
533 case O_QUEST:
534 break;
535 case OOR1: /* matches null but needs to skip */
536 ss++;
537 s = m->g->strip[ss];
538 do {
539 assert(OP(s) == OOR2);
540 ss += OPND(s);
541 } while (OP(s = m->g->strip[ss]) != O_CH);
542 /* note that the ss++ gets us past the O_CH */
543 break;
544 default: /* have to make a choice */
545 hard = 1;
546 break;
547 }
548 if (!hard) { /* that was it! */
549 if (sp != stop)
550 return(NULL);
551 return(sp);
552 }
553 ss--; /* adjust for the for's final increment */
554
555 /* the hard stuff */
4d4cb2b0 556 AT("hard", sp, stop, ss, stopst);
f83322e8
KB
557 s = m->g->strip[ss];
558 switch (OP(s)) {
559 case OBACK_: /* the vilest depths */
560 i = OPND(s);
561 if (m->pmatch[i].rm_eo == -1)
562 return(NULL);
563 assert(m->pmatch[i].rm_so != -1);
564 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
565 assert(stop - m->beginp >= len);
566 if (sp > stop - len)
567 return(NULL); /* not enough left to match */
568 ssp = m->offp + m->pmatch[i].rm_so;
6175ca7c 569 if (memcmp(sp, ssp, len) != 0)
f83322e8
KB
570 return(NULL);
571 while (m->g->strip[ss] != SOP(O_BACK, i))
572 ss++;
573 return(backref(m, sp+len, stop, ss+1, stopst, lev));
574 break;
575 case OQUEST_: /* to null or not */
576 dp = backref(m, sp, stop, ss+1, stopst, lev);
577 if (dp != NULL)
578 return(dp); /* not */
579 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
580 break;
581 case OPLUS_:
582 assert(m->lastpos != NULL);
583 assert(lev+1 <= m->g->nplus);
584 m->lastpos[lev+1] = sp;
585 return(backref(m, sp, stop, ss+1, stopst, lev+1));
586 break;
587 case O_PLUS:
588 if (sp == m->lastpos[lev]) /* last pass matched null */
589 return(backref(m, sp, stop, ss+1, stopst, lev-1));
590 /* try another pass */
591 m->lastpos[lev] = sp;
592 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
593 if (dp == NULL)
594 return(backref(m, sp, stop, ss+1, stopst, lev-1));
595 else
596 return(dp);
597 break;
598 case OCH_: /* find the right one, if any */
599 ssub = ss + 1;
600 esub = ss + OPND(s) - 1;
601 assert(OP(m->g->strip[esub]) == OOR1);
602 for (;;) { /* find first matching branch */
603 dp = backref(m, sp, stop, ssub, esub, lev);
604 if (dp != NULL)
605 return(dp);
606 /* that one missed, try next one */
607 if (OP(m->g->strip[esub]) == O_CH)
608 return(NULL); /* there is none */
609 esub++;
610 assert(OP(m->g->strip[esub]) == OOR2);
611 ssub = esub + 1;
612 esub += OPND(m->g->strip[esub]);
613 if (OP(m->g->strip[esub]) == OOR2)
614 esub--;
615 else
616 assert(OP(m->g->strip[esub]) == O_CH);
617 }
618 break;
619 case OLPAREN: /* must undo assignment if rest fails */
620 i = OPND(s);
621 offsave = m->pmatch[i].rm_so;
622 m->pmatch[i].rm_so = sp - m->offp;
623 dp = backref(m, sp, stop, ss+1, stopst, lev);
624 if (dp != NULL)
625 return(dp);
626 m->pmatch[i].rm_so = offsave;
627 return(NULL);
628 break;
629 case ORPAREN: /* must undo assignment if rest fails */
630 i = OPND(s);
631 offsave = m->pmatch[i].rm_eo;
632 m->pmatch[i].rm_eo = sp - m->offp;
633 dp = backref(m, sp, stop, ss+1, stopst, lev);
634 if (dp != NULL)
635 return(dp);
636 m->pmatch[i].rm_eo = offsave;
637 return(NULL);
638 break;
639 default: /* uh oh */
6175ca7c 640 assert(nope);
f83322e8
KB
641 break;
642 }
643
644 /* "can't happen" */
6175ca7c 645 assert(nope);
f83322e8
KB
646 /* NOTREACHED */
647}
648
649/*
650 - fast - step through the string at top speed
6175ca7c
KB
651 == static char *fast(register struct match *m, char *start, \
652 == char *stop, sopno startst, sopno stopst);
f83322e8 653 */
6175ca7c 654static char * /* where tentative match ended, or NULL */
f83322e8
KB
655fast(m, start, stop, startst, stopst)
656register struct match *m;
6175ca7c
KB
657char *start;
658char *stop;
f83322e8
KB
659sopno startst;
660sopno stopst;
661{
662 register states st = m->st;
663 register states fresh = m->fresh;
664 register states tmp = m->tmp;
6175ca7c
KB
665 register char *p = start;
666 register int c = (start == m->beginp) ? OUT : *(start-1);
667 register int lastc; /* previous c */
668 register int flagch;
669 register int i;
670 register char *coldp; /* last p after which no match was underway */
f83322e8
KB
671
672 CLEAR(st);
673 SET1(st, startst);
6175ca7c 674 st = step(m->g, startst, stopst, st, NOTHING, st);
f83322e8
KB
675 ASSIGN(fresh, st);
676 SP("start", st, *p);
f83322e8
KB
677 coldp = NULL;
678 for (;;) {
679 /* next character */
680 lastc = c;
6175ca7c 681 c = (p == m->endp) ? OUT : *p;
f83322e8
KB
682 if (EQ(st, fresh))
683 coldp = p;
684
685 /* is there an EOL and/or BOL between lastc and c? */
6175ca7c
KB
686 flagch = '\0';
687 i = 0;
688 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
689 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
690 flagch = BOL;
691 i = m->g->nbol;
692 }
693 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
694 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
695 flagch = (flagch == BOL) ? BOLEOL : EOL;
696 i += m->g->neol;
697 }
698 if (i != 0) {
699 for (; i > 0; i--)
700 st = step(m->g, startst, stopst, st, flagch, st);
701 SP("boleol", st, c);
702 }
703
704 /* how about a word boundary? */
705 if ( (flagch == BOL || (lastc != OUT && !isalnum(lastc))) &&
706 (c != OUT && isalnum(c)) ) {
707 flagch = BOW;
708 }
709 if ( (lastc != OUT && isalnum(lastc)) &&
710 (flagch == EOL || (c != OUT && !isalnum(c))) ) {
711 flagch = EOW;
712 }
713 if (flagch == BOW || flagch == EOW) {
714 st = step(m->g, startst, stopst, st, flagch, st);
715 SP("boweow", st, c);
f83322e8
KB
716 }
717
718 /* are we done? */
719 if (ISSET(st, stopst) || p == stop)
720 break; /* NOTE BREAK OUT */
721
722 /* no, we must deal with this character */
723 ASSIGN(tmp, st);
724 ASSIGN(st, fresh);
6175ca7c 725 assert(c != OUT);
f83322e8
KB
726 st = step(m->g, startst, stopst, tmp, c, st);
727 SP("aft", st, c);
6175ca7c 728 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
f83322e8
KB
729 p++;
730 }
731
732 assert(coldp != NULL);
733 m->coldp = coldp;
734 if (ISSET(st, stopst))
735 return(p+1);
736 else
737 return(NULL);
738}
739
740/*
741 - slow - step through the string more deliberately
6175ca7c
KB
742 == static char *slow(register struct match *m, char *start, \
743 == char *stop, sopno startst, sopno stopst);
f83322e8 744 */
6175ca7c 745static char * /* where it ended */
f83322e8
KB
746slow(m, start, stop, startst, stopst)
747register struct match *m;
6175ca7c
KB
748char *start;
749char *stop;
f83322e8
KB
750sopno startst;
751sopno stopst;
752{
753 register states st = m->st;
754 register states empty = m->empty;
755 register states tmp = m->tmp;
6175ca7c
KB
756 register char *p = start;
757 register int c = (start == m->beginp) ? OUT : *(start-1);
758 register int lastc; /* previous c */
759 register int flagch;
760 register int i;
761 register char *matchp; /* last p at which a match ended */
f83322e8 762
4d4cb2b0 763 AT("slow", start, stop, startst, stopst);
f83322e8
KB
764 CLEAR(st);
765 SET1(st, startst);
766 SP("sstart", st, *p);
6175ca7c 767 st = step(m->g, startst, stopst, st, NOTHING, st);
f83322e8
KB
768 matchp = NULL;
769 for (;;) {
770 /* next character */
771 lastc = c;
6175ca7c 772 c = (p == m->endp) ? OUT : *p;
f83322e8
KB
773
774 /* is there an EOL and/or BOL between lastc and c? */
6175ca7c
KB
775 flagch = '\0';
776 i = 0;
777 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
778 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
779 flagch = BOL;
780 i = m->g->nbol;
781 }
782 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
783 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
784 flagch = (flagch == BOL) ? BOLEOL : EOL;
785 i += m->g->neol;
786 }
787 if (i != 0) {
788 for (; i > 0; i--)
789 st = step(m->g, startst, stopst, st, flagch, st);
790 SP("sboleol", st, c);
791 }
792
793 /* how about a word boundary? */
794 if ( (flagch == BOL || (lastc != OUT && !isalnum(lastc))) &&
795 (c != OUT && isalnum(c)) ) {
796 flagch = BOW;
797 }
798 if ( (lastc != OUT && isalnum(lastc)) &&
799 (flagch == EOL || (c != OUT && !isalnum(c))) ) {
800 flagch = EOW;
801 }
802 if (flagch == BOW || flagch == EOW) {
803 st = step(m->g, startst, stopst, st, flagch, st);
804 SP("sboweow", st, c);
f83322e8
KB
805 }
806
807 /* are we done? */
808 if (ISSET(st, stopst))
809 matchp = p;
810 if (EQ(st, empty) || p == stop)
811 break; /* NOTE BREAK OUT */
812
813 /* no, we must deal with this character */
814 ASSIGN(tmp, st);
815 ASSIGN(st, empty);
6175ca7c 816 assert(c != OUT);
f83322e8
KB
817 st = step(m->g, startst, stopst, tmp, c, st);
818 SP("saft", st, c);
6175ca7c 819 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
f83322e8
KB
820 p++;
821 }
822
823 return(matchp);
824}
825
826
f83322e8
KB
827/*
828 - step - map set of states reachable before char to set reachable after
6175ca7c
KB
829 == static states step(register struct re_guts *g, int start, int stop, \
830 == register states bef, int ch, register states aft);
831 == #define BOL (OUT+1)
832 == #define EOL (BOL+1)
833 == #define BOLEOL (BOL+2)
834 == #define NOTHING (BOL+3)
835 == #define BOW (BOL+4)
836 == #define EOW (BOL+5)
837 == #define CODEMAX (BOL+5) // highest code used
838 == #define NONCHAR(c) ((c) > CHAR_MAX)
839 == #define NNONCHAR (CODEMAX-CHAR_MAX)
f83322e8
KB
840 */
841static states
842step(g, start, stop, bef, ch, aft)
843register struct re_guts *g;
844int start; /* start state within strip */
845int stop; /* state after stop state within strip */
846register states bef; /* states reachable before */
6175ca7c 847int ch; /* character or NONCHAR code */
f83322e8
KB
848register states aft; /* states already known reachable after */
849{
850 register cset *cs;
851 register sop s;
852 register sopno pc;
853 register onestate here; /* note, macros know this name */
854 register sopno look;
855 register int i;
856
857 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
858 s = g->strip[pc];
859 switch (OP(s)) {
860 case OEND:
861 assert(pc == stop-1);
862 break;
863 case OCHAR:
6175ca7c
KB
864 /* only characters can match */
865 assert(!NONCHAR(ch) || ch != (char)OPND(s));
866 if (ch == (char)OPND(s))
867 FWD(aft, bef, 1);
868 break;
869 case OBOL:
870 if (ch == BOL || ch == BOLEOL)
f83322e8
KB
871 FWD(aft, bef, 1);
872 break;
f83322e8 873 case OEOL:
6175ca7c
KB
874 if (ch == EOL || ch == BOLEOL)
875 FWD(aft, bef, 1);
876 break;
877 case OBOW:
878 if (ch == BOW)
879 FWD(aft, bef, 1);
880 break;
881 case OEOW:
882 if (ch == EOW)
883 FWD(aft, bef, 1);
f83322e8
KB
884 break;
885 case OANY:
6175ca7c
KB
886 if (!NONCHAR(ch))
887 FWD(aft, bef, 1);
f83322e8
KB
888 break;
889 case OANYOF:
890 cs = &g->sets[OPND(s)];
6175ca7c 891 if (!NONCHAR(ch) && CHIN(cs, ch))
f83322e8
KB
892 FWD(aft, bef, 1);
893 break;
894 case OBACK_: /* ignored here */
895 case O_BACK:
896 FWD(aft, aft, 1);
897 break;
898 case OPLUS_: /* forward, this is just an empty */
899 FWD(aft, aft, 1);
900 break;
901 case O_PLUS: /* both forward and back */
902 FWD(aft, aft, 1);
903 i = ISSETBACK(aft, OPND(s));
904 BACK(aft, aft, OPND(s));
905 if (!i && ISSETBACK(aft, OPND(s))) {
906 /* oho, must reconsider loop body */
907 pc -= OPND(s) + 1;
908 INIT(here, pc);
909 }
910 break;
911 case OQUEST_: /* two branches, both forward */
912 FWD(aft, aft, 1);
913 FWD(aft, aft, OPND(s));
914 break;
915 case O_QUEST: /* just an empty */
916 FWD(aft, aft, 1);
917 break;
918 case OLPAREN: /* not significant here */
919 case ORPAREN:
920 FWD(aft, aft, 1);
921 break;
922 case OCH_: /* mark the first two branches */
923 FWD(aft, aft, 1);
924 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
925 FWD(aft, aft, OPND(s));
926 break;
927 case OOR1: /* done a branch, find the O_CH */
928 if (ISSTATEIN(aft, here)) {
929 for (look = 1;
930 OP(s = g->strip[pc+look]) != O_CH;
931 look += OPND(s))
932 assert(OP(s) == OOR2);
933 FWD(aft, aft, look);
934 }
935 break;
936 case OOR2: /* propagate OCH_'s marking */
937 FWD(aft, aft, 1);
938 if (OP(g->strip[pc+OPND(s)]) != O_CH) {
939 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
940 FWD(aft, aft, OPND(s));
941 }
942 break;
943 case O_CH: /* just empty */
944 FWD(aft, aft, 1);
945 break;
946 default: /* ooooops... */
6175ca7c 947 assert(nope);
f83322e8
KB
948 break;
949 }
950 }
951
952 return(aft);
953}
954
4d4cb2b0 955#ifdef REDEBUG
f83322e8
KB
956/*
957 - print - print a set of states
6175ca7c
KB
958 == #ifdef REDEBUG
959 == static void print(struct match *m, char *caption, states st, \
960 == int ch, FILE *d);
961 == #endif
f83322e8
KB
962 */
963static void
4d4cb2b0
KB
964print(m, caption, st, ch, d)
965struct match *m;
f83322e8
KB
966char *caption;
967states st;
6175ca7c 968int ch;
f83322e8
KB
969FILE *d;
970{
4d4cb2b0 971 register struct re_guts *g = m->g;
f83322e8
KB
972 register int i;
973 register int first = 1;
974
4d4cb2b0
KB
975 if (!(m->eflags&REG_TRACE))
976 return;
977
f83322e8
KB
978 fprintf(d, "%s", caption);
979 if (ch != '\0')
4d4cb2b0 980 fprintf(d, " %s", pchar(ch));
f83322e8
KB
981 for (i = 0; i < g->nstates; i++)
982 if (ISSET(st, i)) {
983 fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
984 first = 0;
985 }
986 fprintf(d, "\n");
987}
4d4cb2b0
KB
988
989/*
990 - at - print current situation
6175ca7c
KB
991 == #ifdef REDEBUG
992 == static void at(struct match *m, char *title, char *start, char *stop, \
993 == sopno startst, stopno stopst);
994 == #endif
4d4cb2b0
KB
995 */
996static void
997at(m, title, start, stop, startst, stopst)
998struct match *m;
999char *title;
6175ca7c
KB
1000char *start;
1001char *stop;
4d4cb2b0
KB
1002sopno startst;
1003sopno stopst;
1004{
1005 if (!(m->eflags&REG_TRACE))
1006 return;
1007
1008 printf("%s %s-", title, pchar(*start));
1009 printf("%s ", pchar(*stop));
1010 printf("%ld-%ld\n", (long)startst, (long)stopst);
1011}
1012
1013#ifndef PCHARDONE
1014#define PCHARDONE /* never again */
1015/*
1016 - pchar - make a character printable
6175ca7c
KB
1017 == #ifdef REDEBUG
1018 == static char *pchar(int ch);
1019 == #endif
4d4cb2b0
KB
1020 *
1021 * Is this identical to regchar() over in debug.c? Well, yes. But a
1022 * duplicate here avoids having a debugging-capable regexec.o tied to
1023 * a matching debug.o, and this is convenient. It all disappears in
1024 * the non-debug compilation anyway, so it doesn't matter much.
1025 */
1026static char * /* -> representation */
1027pchar(ch)
1028int ch;
1029{
1030 static char pbuf[10];
1031
1032 if (isprint(ch) || ch == ' ')
1033 sprintf(pbuf, "%c", ch);
1034 else
1035 sprintf(pbuf, "\\%o", ch);
1036 return(pbuf);
1037}
1038#endif
f83322e8
KB
1039#endif
1040
1041#undef matcher
1042#undef fast
1043#undef slow
1044#undef dissect
1045#undef backref
f83322e8
KB
1046#undef step
1047#undef print
4d4cb2b0 1048#undef at
f83322e8 1049#undef match