Commit | Line | Data |
---|---|---|
299f2784 | 1 | /* Copyright (c) 1981 Regents of the University of California */ |
427286eb | 2 | static char *sccsid = "@(#)ex_re.c 7.2 %G%"; |
22316d4f MH |
3 | #include "ex.h" |
4 | #include "ex_re.h" | |
5 | ||
6 | /* | |
7 | * Global, substitute and regular expressions. | |
8 | * Very similar to ed, with some re extensions and | |
9 | * confirmed substitute. | |
10 | */ | |
11 | global(k) | |
12 | bool k; | |
13 | { | |
14 | register char *gp; | |
15 | register int c; | |
16 | register line *a1; | |
17 | char globuf[GBSIZE], *Cwas; | |
18 | int lines = lineDOL(); | |
19 | int oinglobal = inglobal; | |
20 | char *oglobp = globp; | |
21 | ||
22 | Cwas = Command; | |
23 | /* | |
24 | * States of inglobal: | |
25 | * 0: ordinary - not in a global command. | |
26 | * 1: text coming from some buffer, not tty. | |
27 | * 2: like 1, but the source of the buffer is a global command. | |
28 | * Hence you're only in a global command if inglobal==2. This | |
29 | * strange sounding convention is historically derived from | |
30 | * everybody simulating a global command. | |
31 | */ | |
32 | if (inglobal==2) | |
33 | error("Global within global@not allowed"); | |
34 | markDOT(); | |
35 | setall(); | |
36 | nonzero(); | |
37 | if (skipend()) | |
38 | error("Global needs re|Missing regular expression for global"); | |
39 | c = getchar(); | |
40 | ignore(compile(c, 1)); | |
41 | savere(scanre); | |
42 | gp = globuf; | |
43 | while ((c = getchar()) != '\n') { | |
44 | switch (c) { | |
45 | ||
46 | case EOF: | |
47 | c = '\n'; | |
48 | goto brkwh; | |
49 | ||
50 | case '\\': | |
51 | c = getchar(); | |
52 | switch (c) { | |
53 | ||
54 | case '\\': | |
55 | ungetchar(c); | |
56 | break; | |
57 | ||
58 | case '\n': | |
59 | break; | |
60 | ||
61 | default: | |
62 | *gp++ = '\\'; | |
63 | break; | |
64 | } | |
65 | break; | |
66 | } | |
67 | *gp++ = c; | |
68 | if (gp >= &globuf[GBSIZE - 2]) | |
69 | error("Global command too long"); | |
70 | } | |
71 | brkwh: | |
72 | ungetchar(c); | |
73 | out: | |
74 | newline(); | |
75 | *gp++ = c; | |
76 | *gp++ = 0; | |
887e3e0d | 77 | saveall(); |
22316d4f MH |
78 | inglobal = 2; |
79 | for (a1 = one; a1 <= dol; a1++) { | |
80 | *a1 &= ~01; | |
81 | if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) | |
82 | *a1 |= 01; | |
83 | } | |
f0f2d980 MH |
84 | #ifdef notdef |
85 | /* | |
86 | * This code is commented out for now. The problem is that we don't | |
87 | * fix up the undo area the way we should. Basically, I think what has | |
88 | * to be done is to copy the undo area down (since we shrunk everything) | |
89 | * and move the various pointers into it down too. I will do this later | |
90 | * when I have time. (Mark, 10-20-80) | |
91 | */ | |
04379bab MH |
92 | /* |
93 | * Special case: g/.../d (avoid n^2 algorithm) | |
94 | */ | |
95 | if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { | |
96 | gdelete(); | |
97 | return; | |
98 | } | |
f0f2d980 | 99 | #endif |
22316d4f MH |
100 | if (inopen) |
101 | inopen = -1; | |
04379bab MH |
102 | /* |
103 | * Now for each marked line, set dot there and do the commands. | |
104 | * Note the n^2 behavior here for lots of lines matching. | |
105 | * This is really needed: in some cases you could delete lines, | |
106 | * causing a marked line to be moved before a1 and missed if | |
107 | * we didn't restart at zero each time. | |
108 | */ | |
22316d4f MH |
109 | for (a1 = one; a1 <= dol; a1++) { |
110 | if (*a1 & 01) { | |
111 | *a1 &= ~01; | |
112 | dot = a1; | |
113 | globp = globuf; | |
114 | commands(1, 1); | |
115 | a1 = zero; | |
116 | } | |
117 | } | |
118 | globp = oglobp; | |
119 | inglobal = oinglobal; | |
120 | endline = 1; | |
121 | Command = Cwas; | |
122 | netchHAD(lines); | |
123 | setlastchar(EOF); | |
124 | if (inopen) { | |
125 | ungetchar(EOF); | |
126 | inopen = 1; | |
127 | } | |
128 | } | |
129 | ||
04379bab MH |
130 | /* |
131 | * gdelete: delete inside a global command. Handles the | |
132 | * special case g/r.e./d. All lines to be deleted have | |
133 | * already been marked. Squeeze the remaining lines together. | |
134 | * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, | |
135 | * and g/r.e./.,/r.e.2/d are not treated specially. There is no | |
136 | * good reason for this except the question: where to you draw the line? | |
137 | */ | |
138 | gdelete() | |
139 | { | |
140 | register line *a1, *a2, *a3; | |
141 | ||
142 | a3 = dol; | |
143 | /* find first marked line. can skip all before it */ | |
144 | for (a1=zero; (*a1&01)==0; a1++) | |
145 | if (a1>=a3) | |
146 | return; | |
147 | /* copy down unmarked lines, compacting as we go. */ | |
148 | for (a2=a1+1; a2<=a3;) { | |
149 | if (*a2&01) { | |
150 | a2++; /* line is marked, skip it */ | |
151 | dot = a1; /* dot left after line deletion */ | |
152 | } else | |
153 | *a1++ = *a2++; /* unmarked, copy it */ | |
154 | } | |
155 | dol = a1-1; | |
156 | if (dot>dol) | |
157 | dot = dol; | |
158 | change(); | |
159 | } | |
160 | ||
d266c416 | 161 | bool cflag; |
22316d4f MH |
162 | int scount, slines, stotal; |
163 | ||
164 | substitute(c) | |
165 | int c; | |
166 | { | |
167 | register line *addr; | |
168 | register int n; | |
427286eb | 169 | int gsubf, hopcount; |
22316d4f MH |
170 | |
171 | gsubf = compsub(c); | |
887e3e0d | 172 | if(FIXUNDO) |
22316d4f MH |
173 | save12(), undkind = UNDCHANGE; |
174 | stotal = 0; | |
175 | slines = 0; | |
176 | for (addr = addr1; addr <= addr2; addr++) { | |
427286eb | 177 | scount = hopcount = 0; |
22316d4f MH |
178 | if (dosubcon(0, addr) == 0) |
179 | continue; | |
180 | if (gsubf) { | |
22316d4f | 181 | /* |
04379bab MH |
182 | * The loop can happen from s/\</&/g |
183 | * but we don't want to break other, reasonable cases. | |
22316d4f | 184 | */ |
04379bab MH |
185 | while (*loc2) { |
186 | if (++hopcount > sizeof linebuf) | |
187 | error("substitution loop"); | |
22316d4f MH |
188 | if (dosubcon(1, addr) == 0) |
189 | break; | |
04379bab | 190 | } |
22316d4f MH |
191 | } |
192 | if (scount) { | |
193 | stotal += scount; | |
194 | slines++; | |
195 | putmark(addr); | |
196 | n = append(getsub, addr); | |
197 | addr += n; | |
198 | addr2 += n; | |
199 | } | |
200 | } | |
d266c416 | 201 | if (stotal == 0 && !inglobal && !cflag) |
22316d4f MH |
202 | error("Fail|Substitute pattern match failed"); |
203 | snote(stotal, slines); | |
204 | return (stotal); | |
205 | } | |
206 | ||
207 | compsub(ch) | |
208 | { | |
209 | register int seof, c, uselastre; | |
210 | static int gsubf; | |
211 | ||
212 | if (!value(EDCOMPATIBLE)) | |
d266c416 | 213 | gsubf = cflag = 0; |
22316d4f MH |
214 | uselastre = 0; |
215 | switch (ch) { | |
216 | ||
217 | case 's': | |
218 | ignore(skipwh()); | |
219 | seof = getchar(); | |
220 | if (endcmd(seof) || any(seof, "gcr")) { | |
221 | ungetchar(seof); | |
222 | goto redo; | |
223 | } | |
224 | if (isalpha(seof) || isdigit(seof)) | |
225 | error("Substitute needs re|Missing regular expression for substitute"); | |
226 | seof = compile(seof, 1); | |
227 | uselastre = 1; | |
228 | comprhs(seof); | |
229 | gsubf = 0; | |
d266c416 | 230 | cflag = 0; |
22316d4f MH |
231 | break; |
232 | ||
233 | case '~': | |
234 | uselastre = 1; | |
235 | /* fall into ... */ | |
236 | case '&': | |
237 | redo: | |
238 | if (re.Expbuf[0] == 0) | |
239 | error("No previous re|No previous regular expression"); | |
d266c416 MH |
240 | if (subre.Expbuf[0] == 0) |
241 | error("No previous substitute re|No previous substitute to repeat"); | |
22316d4f MH |
242 | break; |
243 | } | |
244 | for (;;) { | |
245 | c = getchar(); | |
246 | switch (c) { | |
247 | ||
248 | case 'g': | |
249 | gsubf = !gsubf; | |
250 | continue; | |
251 | ||
252 | case 'c': | |
d266c416 | 253 | cflag = !cflag; |
22316d4f MH |
254 | continue; |
255 | ||
256 | case 'r': | |
257 | uselastre = 1; | |
258 | continue; | |
259 | ||
260 | default: | |
261 | ungetchar(c); | |
262 | setcount(); | |
263 | newline(); | |
264 | if (uselastre) | |
265 | savere(subre); | |
266 | else | |
267 | resre(subre); | |
268 | return (gsubf); | |
269 | } | |
270 | } | |
271 | } | |
272 | ||
273 | comprhs(seof) | |
274 | int seof; | |
275 | { | |
276 | register char *rp, *orp; | |
277 | register int c; | |
04379bab | 278 | char orhsbuf[RHSSIZE]; |
22316d4f MH |
279 | |
280 | rp = rhsbuf; | |
281 | CP(orhsbuf, rp); | |
282 | for (;;) { | |
283 | c = getchar(); | |
284 | if (c == seof) | |
285 | break; | |
286 | switch (c) { | |
287 | ||
288 | case '\\': | |
289 | c = getchar(); | |
290 | if (c == EOF) { | |
291 | ungetchar(c); | |
292 | break; | |
293 | } | |
294 | if (value(MAGIC)) { | |
295 | /* | |
296 | * When "magic", \& turns into a plain &, | |
297 | * and all other chars work fine quoted. | |
298 | */ | |
299 | if (c != '&') | |
300 | c |= QUOTE; | |
301 | break; | |
302 | } | |
303 | magic: | |
304 | if (c == '~') { | |
305 | for (orp = orhsbuf; *orp; *rp++ = *orp++) | |
04379bab | 306 | if (rp >= &rhsbuf[RHSSIZE - 1]) |
22316d4f MH |
307 | goto toobig; |
308 | continue; | |
309 | } | |
310 | c |= QUOTE; | |
311 | break; | |
312 | ||
313 | case '\n': | |
314 | case EOF: | |
887e3e0d MH |
315 | if (!(globp && globp[0])) { |
316 | ungetchar(c); | |
317 | goto endrhs; | |
318 | } | |
22316d4f MH |
319 | |
320 | case '~': | |
321 | case '&': | |
322 | if (value(MAGIC)) | |
323 | goto magic; | |
324 | break; | |
325 | } | |
04379bab | 326 | if (rp >= &rhsbuf[RHSSIZE - 1]) { |
22316d4f | 327 | toobig: |
04379bab | 328 | *rp = 0; |
22316d4f | 329 | error("Replacement pattern too long@- limit 256 characters"); |
04379bab | 330 | } |
22316d4f MH |
331 | *rp++ = c; |
332 | } | |
333 | endrhs: | |
334 | *rp++ = 0; | |
335 | } | |
336 | ||
337 | getsub() | |
338 | { | |
339 | register char *p; | |
340 | ||
341 | if ((p = linebp) == 0) | |
342 | return (EOF); | |
343 | strcLIN(p); | |
344 | linebp = 0; | |
345 | return (0); | |
346 | } | |
347 | ||
348 | dosubcon(f, a) | |
349 | bool f; | |
350 | line *a; | |
351 | { | |
352 | ||
353 | if (execute(f, a) == 0) | |
354 | return (0); | |
355 | if (confirmed(a)) { | |
356 | dosub(); | |
357 | scount++; | |
358 | } | |
359 | return (1); | |
360 | } | |
361 | ||
362 | confirmed(a) | |
363 | line *a; | |
364 | { | |
365 | register int c, ch; | |
366 | ||
d266c416 | 367 | if (cflag == 0) |
22316d4f MH |
368 | return (1); |
369 | pofix(); | |
370 | pline(lineno(a)); | |
371 | if (inopen) | |
372 | putchar('\n' | QUOTE); | |
373 | c = column(loc1 - 1); | |
374 | ugo(c - 1 + (inopen ? 1 : 0), ' '); | |
375 | ugo(column(loc2 - 1) - c, '^'); | |
376 | flush(); | |
377 | ch = c = getkey(); | |
378 | again: | |
379 | if (c == '\r') | |
380 | c = '\n'; | |
381 | if (inopen) | |
382 | putchar(c), flush(); | |
383 | if (c != '\n' && c != EOF) { | |
384 | c = getkey(); | |
385 | goto again; | |
386 | } | |
387 | noteinp(); | |
388 | return (ch == 'y'); | |
389 | } | |
390 | ||
391 | getch() | |
392 | { | |
393 | char c; | |
394 | ||
395 | if (read(2, &c, 1) != 1) | |
396 | return (EOF); | |
397 | return (c & TRIM); | |
398 | } | |
399 | ||
400 | ugo(cnt, with) | |
401 | int with; | |
402 | int cnt; | |
403 | { | |
404 | ||
405 | if (cnt > 0) | |
406 | do | |
407 | putchar(with); | |
408 | while (--cnt > 0); | |
409 | } | |
410 | ||
411 | int casecnt; | |
412 | bool destuc; | |
413 | ||
414 | dosub() | |
415 | { | |
416 | register char *lp, *sp, *rp; | |
417 | int c; | |
418 | ||
419 | lp = linebuf; | |
420 | sp = genbuf; | |
421 | rp = rhsbuf; | |
422 | while (lp < loc1) | |
423 | *sp++ = *lp++; | |
424 | casecnt = 0; | |
425 | while (c = *rp++) { | |
299f2784 MH |
426 | /* ^V <return> from vi to split lines */ |
427 | if (c == '\r') | |
428 | c = '\n'; | |
429 | ||
22316d4f MH |
430 | if (c & QUOTE) |
431 | switch (c & TRIM) { | |
432 | ||
433 | case '&': | |
434 | sp = place(sp, loc1, loc2); | |
435 | if (sp == 0) | |
436 | goto ovflo; | |
437 | continue; | |
438 | ||
439 | case 'l': | |
440 | casecnt = 1; | |
441 | destuc = 0; | |
442 | continue; | |
443 | ||
444 | case 'L': | |
445 | casecnt = LBSIZE; | |
446 | destuc = 0; | |
447 | continue; | |
448 | ||
449 | case 'u': | |
450 | casecnt = 1; | |
451 | destuc = 1; | |
452 | continue; | |
453 | ||
454 | case 'U': | |
455 | casecnt = LBSIZE; | |
456 | destuc = 1; | |
457 | continue; | |
458 | ||
459 | case 'E': | |
460 | case 'e': | |
461 | casecnt = 0; | |
462 | continue; | |
463 | } | |
464 | if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { | |
465 | sp = place(sp, braslist[c - '1'], braelist[c - '1']); | |
466 | if (sp == 0) | |
467 | goto ovflo; | |
468 | continue; | |
469 | } | |
470 | if (casecnt) | |
471 | *sp++ = fixcase(c & TRIM); | |
472 | else | |
473 | *sp++ = c & TRIM; | |
474 | if (sp >= &genbuf[LBSIZE]) | |
475 | ovflo: | |
44232d5b | 476 | error("Line overflow@in substitute"); |
22316d4f MH |
477 | } |
478 | lp = loc2; | |
479 | loc2 = sp + (linebuf - genbuf); | |
480 | while (*sp++ = *lp++) | |
481 | if (sp >= &genbuf[LBSIZE]) | |
482 | goto ovflo; | |
483 | strcLIN(genbuf); | |
484 | } | |
485 | ||
486 | fixcase(c) | |
487 | register int c; | |
488 | { | |
489 | ||
490 | if (casecnt == 0) | |
491 | return (c); | |
492 | casecnt--; | |
493 | if (destuc) { | |
494 | if (islower(c)) | |
495 | c = toupper(c); | |
496 | } else | |
497 | if (isupper(c)) | |
498 | c = tolower(c); | |
499 | return (c); | |
500 | } | |
501 | ||
502 | char * | |
503 | place(sp, l1, l2) | |
504 | register char *sp, *l1, *l2; | |
505 | { | |
506 | ||
507 | while (l1 < l2) { | |
508 | *sp++ = fixcase(*l1++); | |
509 | if (sp >= &genbuf[LBSIZE]) | |
510 | return (0); | |
511 | } | |
512 | return (sp); | |
513 | } | |
514 | ||
515 | snote(total, lines) | |
516 | register int total, lines; | |
517 | { | |
518 | ||
519 | if (!notable(total)) | |
520 | return; | |
521 | printf(mesg("%d subs|%d substitutions"), total); | |
522 | if (lines != 1 && lines != total) | |
523 | printf(" on %d lines", lines); | |
524 | noonl(); | |
525 | flush(); | |
526 | } | |
527 | ||
528 | compile(eof, oknl) | |
529 | int eof; | |
530 | int oknl; | |
531 | { | |
532 | register int c; | |
533 | register char *ep; | |
534 | char *lastep; | |
535 | char bracket[NBRA], *bracketp, *rhsp; | |
536 | int cclcnt; | |
537 | ||
538 | if (isalpha(eof) || isdigit(eof)) | |
539 | error("Regular expressions cannot be delimited by letters or digits"); | |
540 | ep = expbuf; | |
541 | c = getchar(); | |
542 | if (eof == '\\') | |
543 | switch (c) { | |
544 | ||
545 | case '/': | |
546 | case '?': | |
547 | if (scanre.Expbuf[0] == 0) | |
548 | error("No previous scan re|No previous scanning regular expression"); | |
549 | resre(scanre); | |
550 | return (c); | |
551 | ||
552 | case '&': | |
553 | if (subre.Expbuf[0] == 0) | |
554 | error("No previous substitute re|No previous substitute regular expression"); | |
555 | resre(subre); | |
556 | return (c); | |
557 | ||
558 | default: | |
559 | error("Badly formed re|Regular expression \\ must be followed by / or ?"); | |
560 | } | |
561 | if (c == eof || c == '\n' || c == EOF) { | |
562 | if (*ep == 0) | |
563 | error("No previous re|No previous regular expression"); | |
564 | if (c == '\n' && oknl == 0) | |
565 | error("Missing closing delimiter@for regular expression"); | |
566 | if (c != eof) | |
567 | ungetchar(c); | |
568 | return (eof); | |
569 | } | |
570 | bracketp = bracket; | |
571 | nbra = 0; | |
572 | circfl = 0; | |
573 | if (c == '^') { | |
574 | c = getchar(); | |
575 | circfl++; | |
576 | } | |
577 | ungetchar(c); | |
578 | for (;;) { | |
579 | if (ep >= &expbuf[ESIZE - 2]) | |
580 | complex: | |
581 | cerror("Re too complex|Regular expression too complicated"); | |
582 | c = getchar(); | |
583 | if (c == eof || c == EOF) { | |
584 | if (bracketp != bracket) | |
585 | cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); | |
d266c416 | 586 | *ep++ = CEOFC; |
22316d4f MH |
587 | if (c == EOF) |
588 | ungetchar(c); | |
589 | return (eof); | |
590 | } | |
591 | if (value(MAGIC)) { | |
592 | if (c != '*' || ep == expbuf) | |
593 | lastep = ep; | |
594 | } else | |
595 | if (c != '\\' || peekchar() != '*' || ep == expbuf) | |
596 | lastep = ep; | |
597 | switch (c) { | |
598 | ||
599 | case '\\': | |
600 | c = getchar(); | |
601 | switch (c) { | |
602 | ||
603 | case '(': | |
604 | if (nbra >= NBRA) | |
605 | cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); | |
606 | *bracketp++ = nbra; | |
607 | *ep++ = CBRA; | |
608 | *ep++ = nbra++; | |
609 | continue; | |
610 | ||
611 | case ')': | |
612 | if (bracketp <= bracket) | |
613 | cerror("Extra \\)|More \\)'s than \\('s in regular expression"); | |
614 | *ep++ = CKET; | |
615 | *ep++ = *--bracketp; | |
616 | continue; | |
617 | ||
618 | case '<': | |
619 | *ep++ = CBRC; | |
620 | continue; | |
621 | ||
622 | case '>': | |
623 | *ep++ = CLET; | |
624 | continue; | |
625 | } | |
626 | if (value(MAGIC) == 0) | |
627 | magic: | |
628 | switch (c) { | |
629 | ||
630 | case '.': | |
631 | *ep++ = CDOT; | |
632 | continue; | |
633 | ||
634 | case '~': | |
635 | rhsp = rhsbuf; | |
636 | while (*rhsp) { | |
637 | if (*rhsp & QUOTE) { | |
638 | c = *rhsp & TRIM; | |
639 | if (c == '&') | |
640 | error("Replacement pattern contains &@- cannot use in re"); | |
641 | if (c >= '1' && c <= '9') | |
642 | error("Replacement pattern contains \\d@- cannot use in re"); | |
643 | } | |
644 | if (ep >= &expbuf[ESIZE-2]) | |
645 | goto complex; | |
646 | *ep++ = CCHR; | |
647 | *ep++ = *rhsp++ & TRIM; | |
648 | } | |
649 | continue; | |
650 | ||
651 | case '*': | |
652 | if (ep == expbuf) | |
653 | break; | |
654 | if (*lastep == CBRA || *lastep == CKET) | |
655 | cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); | |
656 | if (*lastep == CCHR && (lastep[1] & QUOTE)) | |
657 | cerror("Illegal *|Can't * a \\n in regular expression"); | |
658 | *lastep |= STAR; | |
659 | continue; | |
660 | ||
661 | case '[': | |
662 | *ep++ = CCL; | |
663 | *ep++ = 0; | |
664 | cclcnt = 1; | |
665 | c = getchar(); | |
666 | if (c == '^') { | |
667 | c = getchar(); | |
668 | ep[-2] = NCCL; | |
669 | } | |
670 | if (c == ']') | |
671 | cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); | |
672 | while (c != ']') { | |
673 | if (c == '\\' && any(peekchar(), "]-^\\")) | |
674 | c = getchar() | QUOTE; | |
675 | if (c == '\n' || c == EOF) | |
676 | cerror("Missing ]"); | |
677 | *ep++ = c; | |
678 | cclcnt++; | |
679 | if (ep >= &expbuf[ESIZE]) | |
680 | goto complex; | |
681 | c = getchar(); | |
682 | } | |
683 | lastep[1] = cclcnt; | |
684 | continue; | |
685 | } | |
686 | if (c == EOF) { | |
687 | ungetchar(EOF); | |
688 | c = '\\'; | |
689 | goto defchar; | |
690 | } | |
691 | *ep++ = CCHR; | |
692 | if (c == '\n') | |
693 | cerror("No newlines in re's|Can't escape newlines into regular expressions"); | |
694 | /* | |
695 | if (c < '1' || c > NBRA + '1') { | |
696 | */ | |
697 | *ep++ = c; | |
698 | continue; | |
699 | /* | |
700 | } | |
701 | c -= '1'; | |
702 | if (c >= nbra) | |
703 | cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); | |
704 | *ep++ = c | QUOTE; | |
705 | continue; | |
706 | */ | |
707 | ||
708 | case '\n': | |
709 | if (oknl) { | |
710 | ungetchar(c); | |
d266c416 | 711 | *ep++ = CEOFC; |
22316d4f MH |
712 | return (eof); |
713 | } | |
714 | cerror("Badly formed re|Missing closing delimiter for regular expression"); | |
715 | ||
716 | case '$': | |
717 | if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { | |
718 | *ep++ = CDOL; | |
719 | continue; | |
720 | } | |
721 | goto defchar; | |
722 | ||
723 | case '.': | |
724 | case '~': | |
725 | case '*': | |
726 | case '[': | |
727 | if (value(MAGIC)) | |
728 | goto magic; | |
729 | defchar: | |
730 | default: | |
731 | *ep++ = CCHR; | |
732 | *ep++ = c; | |
733 | continue; | |
734 | } | |
735 | } | |
736 | } | |
737 | ||
738 | cerror(s) | |
739 | char *s; | |
740 | { | |
741 | ||
742 | expbuf[0] = 0; | |
743 | error(s); | |
744 | } | |
745 | ||
746 | same(a, b) | |
747 | register int a, b; | |
748 | { | |
749 | ||
750 | return (a == b || value(IGNORECASE) && | |
751 | ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); | |
752 | } | |
753 | ||
754 | char *locs; | |
755 | ||
756 | execute(gf, addr) | |
757 | line *addr; | |
758 | { | |
759 | register char *p1, *p2; | |
760 | register int c; | |
761 | ||
762 | if (gf) { | |
763 | if (circfl) | |
764 | return (0); | |
22316d4f MH |
765 | locs = p1 = loc2; |
766 | } else { | |
767 | if (addr == zero) | |
768 | return (0); | |
769 | p1 = linebuf; | |
770 | getline(*addr); | |
771 | locs = 0; | |
772 | } | |
773 | p2 = expbuf; | |
774 | if (circfl) { | |
775 | loc1 = p1; | |
776 | return (advance(p1, p2)); | |
777 | } | |
778 | /* fast check for first character */ | |
779 | if (*p2 == CCHR) { | |
780 | c = p2[1]; | |
781 | do { | |
782 | if (c != *p1 && (!value(IGNORECASE) || | |
783 | !((islower(c) && toupper(c) == *p1) || | |
784 | (islower(*p1) && toupper(*p1) == c)))) | |
785 | continue; | |
786 | if (advance(p1, p2)) { | |
787 | loc1 = p1; | |
788 | return (1); | |
789 | } | |
790 | } while (*p1++); | |
791 | return (0); | |
792 | } | |
793 | /* regular algorithm */ | |
794 | do { | |
795 | if (advance(p1, p2)) { | |
796 | loc1 = p1; | |
797 | return (1); | |
798 | } | |
799 | } while (*p1++); | |
800 | return (0); | |
801 | } | |
802 | ||
803 | #define uletter(c) (isalpha(c) || c == '_') | |
804 | ||
805 | advance(lp, ep) | |
806 | register char *lp, *ep; | |
807 | { | |
808 | register char *curlp; | |
809 | char *sp, *sp1; | |
810 | int c; | |
811 | ||
812 | for (;;) switch (*ep++) { | |
813 | ||
814 | case CCHR: | |
815 | /* useless | |
816 | if (*ep & QUOTE) { | |
817 | c = *ep++ & TRIM; | |
818 | sp = braslist[c]; | |
819 | sp1 = braelist[c]; | |
820 | while (sp < sp1) { | |
821 | if (!same(*sp, *lp)) | |
822 | return (0); | |
823 | sp++, lp++; | |
824 | } | |
825 | continue; | |
826 | } | |
827 | */ | |
828 | if (!same(*ep, *lp)) | |
829 | return (0); | |
830 | ep++, lp++; | |
831 | continue; | |
832 | ||
833 | case CDOT: | |
834 | if (*lp++) | |
835 | continue; | |
836 | return (0); | |
837 | ||
838 | case CDOL: | |
839 | if (*lp == 0) | |
840 | continue; | |
841 | return (0); | |
842 | ||
d266c416 | 843 | case CEOFC: |
22316d4f MH |
844 | loc2 = lp; |
845 | return (1); | |
846 | ||
847 | case CCL: | |
848 | if (cclass(ep, *lp++, 1)) { | |
849 | ep += *ep; | |
850 | continue; | |
851 | } | |
852 | return (0); | |
853 | ||
854 | case NCCL: | |
855 | if (cclass(ep, *lp++, 0)) { | |
856 | ep += *ep; | |
857 | continue; | |
858 | } | |
859 | return (0); | |
860 | ||
861 | case CBRA: | |
862 | braslist[*ep++] = lp; | |
863 | continue; | |
864 | ||
865 | case CKET: | |
866 | braelist[*ep++] = lp; | |
867 | continue; | |
868 | ||
869 | case CDOT|STAR: | |
870 | curlp = lp; | |
871 | while (*lp++) | |
872 | continue; | |
873 | goto star; | |
874 | ||
875 | case CCHR|STAR: | |
876 | curlp = lp; | |
877 | while (same(*lp, *ep)) | |
878 | lp++; | |
879 | lp++; | |
880 | ep++; | |
881 | goto star; | |
882 | ||
883 | case CCL|STAR: | |
884 | case NCCL|STAR: | |
885 | curlp = lp; | |
886 | while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) | |
887 | continue; | |
888 | ep += *ep; | |
889 | goto star; | |
890 | star: | |
891 | do { | |
892 | lp--; | |
893 | if (lp == locs) | |
894 | break; | |
895 | if (advance(lp, ep)) | |
896 | return (1); | |
897 | } while (lp > curlp); | |
898 | return (0); | |
899 | ||
900 | case CBRC: | |
901 | if (lp == expbuf) | |
902 | continue; | |
903 | if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) | |
904 | continue; | |
905 | return (0); | |
906 | ||
907 | case CLET: | |
908 | if (!uletter(*lp) && !isdigit(*lp)) | |
909 | continue; | |
910 | return (0); | |
911 | ||
912 | default: | |
913 | error("Re internal error"); | |
914 | } | |
915 | } | |
916 | ||
917 | cclass(set, c, af) | |
918 | register char *set; | |
919 | register int c; | |
920 | int af; | |
921 | { | |
922 | register int n; | |
923 | ||
924 | if (c == 0) | |
925 | return (0); | |
926 | if (value(IGNORECASE) && isupper(c)) | |
927 | c = tolower(c); | |
928 | n = *set++; | |
929 | while (--n) | |
930 | if (n > 2 && set[1] == '-') { | |
931 | if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) | |
932 | return (af); | |
933 | set += 3; | |
934 | n -= 2; | |
935 | } else | |
936 | if ((*set++ & TRIM) == c) | |
937 | return (af); | |
938 | return (!af); | |
939 | } |