Start development on BSD 2
[unix-history] / .ref-BSD-1 / ex-1.1 / ex_re.c
CommitLineData
d43bb1bf
BJ
1#include "ex.h"
2#include "ex_re.h"
3/*
4 * Ex - a text editor
5 * Bill Joy UCB June/September 1977
6 */
7
8compile(eof, oknl)
9 int eof;
10 char oknl;
11{
12 register c;
13 register char *ep;
14 char *lastep;
15 char bracket[NBRA], *bracketp, *rhsp;
16 int cclcnt;
17
18 if (letter(eof) || digit(eof))
19 error("Re delimiter must not be letter or digit|Regular expressions cannot be delimited by letters or digits");
20 ep = expbuf;
21 c = getchar();
22 if (eof == '\\')
23 switch (c) {
24
25 case '/':
26 case '?':
27 if (scanre.sexpbuf[0] == 0)
28noscanre:
29 error("No previous scan re|No previous scanning regular expression");
30 resre(&scanre);
31 return (c);
32
33 case '&':
34 if (subre.sexpbuf[0] == 0)
35nosubre:
36 error("No previous substitute re|No previous substitute regular expression");
37 resre(&subre);
38 return (c);
39
40 default:
41 error("Badly formed re|Regular expression \\ must be followed by /, ?, or &");
42 }
43 if (c == eof || c == '\n' || c == EOF) {
44 if (*ep == 0)
45 error("No previous re|No previous regular expression");
46 if (c == '\n' && oknl == 0)
47 error("Missing closing delimiter@for regular expression");
48 if (c == '\n')
49 ungetchar(c);
50 return (eof);
51 }
52 bracketp = bracket;
53 nbra = 0;
54 circfl = 0;
55 if (c == '^') {
56 c = getchar();
57 circfl++;
58 }
59 ungetchar(c);
60 for (;;) {
61 if (ep >= &expbuf[ESIZE - 2])
62complex:
63 cerror("Re too complex|Regular expression too complicated");
64 c = getchar();
65 if (c == eof || c == EOF) {
66 if (bracketp != bracket)
67 cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
68 *ep++ = CEOF;
69 return (eof);
70 }
71 if (value(MAGIC)) {
72 if (c != '*' || ep == expbuf)
73 lastep = ep;
74 } else
75 if (c != '\\' || peekchar() != '*' || ep == expbuf)
76 lastep = ep;
77 switch (c) {
78
79 case '\\':
80 c = getchar();
81 switch (c) {
82
83 case '(':
84 if (nbra >= NBRA)
85 cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
86 *bracketp++ = nbra;
87 *ep++ = CBRA;
88 *ep++ = nbra++;
89 continue;
90
91 case ')':
92 if (bracketp <= bracket)
93 cerror("Extra \\)|More \\)'s than \\('s in regular expression");
94 *ep++ = CKET;
95 *ep++ = *--bracketp;
96 continue;
97
98 case '<':
99 *ep++ = CBRC;
100 continue;
101
102 case '>':
103 *ep++ = CLET;
104 continue;
105 }
106 if (value(MAGIC) == 0)
107magic:
108 switch (c) {
109
110 case '.':
111 *ep++ = CDOT;
112 continue;
113
114 case '~':
115 rhsp = rhsbuf;
116 while (*rhsp) {
117 if (*rhsp & QUOTE) {
118 c = *rhsp & 0177;
119 if (c == '&')
120 error("Replacement pattern contains &@- cannot use in re");
121 if (c >= '1' && c <= '9')
122 error("Replacement pattern contains \\d@- cannot use in re");
123 }
124 if (ep >= &expbuf[ESIZE-2])
125 goto complex;
126 *ep++ = CCHR;
127 *ep++ = *rhsp++ & 0177;
128 }
129 continue;
130
131 case '*':
132 if (ep == expbuf)
133 break;
134 if (*lastep == CBRA || *lastep == CKET)
135 cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
136 if (*lastep == CCHR && (lastep[1] & QUOTE))
137 cerror("Illegal *|Can't * a \\n in regular expression");
138 *lastep =| STAR;
139 continue;
140
141 case '[':
142 *ep++ = CCL;
143 *ep++ = 0;
144 cclcnt = 1;
145 c = getchar();
146 if (c == '^') {
147 c = getchar();
148 ep[-2] = NCCL;
149 }
150 if (c == ']')
151 cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
152 while (c != ']') {
153 if (c == '\\' && any(peekchar(), "]-^\\"))
154 c = getchar() | QUOTE;
155 if (c == '\n' || c == EOF)
156 cerror("Missing ]");
157 *ep++ = c;
158 cclcnt++;
159 if (ep >= &expbuf[ESIZE])
160 goto complex;
161 c = getchar();
162 }
163 lastep[1] = cclcnt;
164 continue;
165 }
166 if (c == EOF) {
167 ungetchar(EOF);
168 c = '\\';
169 goto defchar;
170 }
171 *ep++ = CCHR;
172 if (c == '\n')
173 cerror("No newlines in re's|Can't escape newlines into regular expressions");
174 if (c < '1' || c > NBRA + '1') {
175 *ep++ = c;
176 continue;
177 }
178 c =- '1';
179 if (c >= nbra)
180 cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
181 *ep++ = c | QUOTE;
182 continue;
183
184 case '\n':
185 if (oknl) {
186 ungetchar(c);
187 *ep++ = CEOF;
188 return (eof);
189 }
190 cerror("Badly formed re|Missing closing delimiter for regular expression");
191
192 case '$':
193 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
194 *ep++ = CDOL;
195 continue;
196 }
197 goto defchar;
198
199 case '.':
200 case '~':
201 case '*':
202 case '[':
203 if (value(MAGIC))
204 goto magic;
205defchar:
206 default:
207 *ep++ = CCHR;
208 *ep++ = c;
209 continue;
210 }
211 }
212}
213
214cerror(s)
215 char *s;
216{
217
218 expbuf[0] = 0;
219 error(s);
220}
221
222same(a, b)
223 register int a, b;
224{
225
226 return (a == b || value(IGNORECASE) && (a ^ b) == ' ' && letter(a) == letter(b));
227}
228
229execute(gf, addr)
230 int *addr;
231{
232 register char *p1, *p2;
233 register c;
234
235 if (gf) {
236 if (circfl)
237 return (0);
238 locs = p1 = loc2;
239 } else {
240 if (addr == zero)
241 return (0);
242 p1 = getline(*addr);
243 locs = 0;
244 }
245 p2 = expbuf;
246 if (circfl) {
247 loc1 = p1;
248 return (advance(p1, p2));
249 }
250 /* fast check for first character */
251 if (*p2 == CCHR) {
252 c = p2[1];
253 do {
254 if (c != *p1 && (!value(IGNORECASE) ||
255 (c ^ *p1) != ' ' || letter(c) != letter(*p1)))
256 continue;
257 if (advance(p1, p2)) {
258 loc1 = p1;
259 return (1);
260 }
261 } while (*p1++);
262 return (0);
263 }
264 /* regular algorithm */
265 do {
266 if (advance(p1, p2)) {
267 loc1 = p1;
268 return (1);
269 }
270 } while (*p1++);
271 return (0);
272}
273
274#define uletter(c) (letter(c) || c == '_')
275
276advance(lp, ep)
277 register char *lp, *ep;
278{
279 register char *curlp;
280 char *nextep, *sp, *sp1, c;
281
282 for (;;) switch (*ep++) {
283
284 case CCHR:
285 if (*ep & QUOTE) {
286 c = *ep++ & 0177;
287 sp = braslist[c];
288 sp1 = braelist[c];
289 while (sp < sp1) {
290 if (!same(*sp, *lp))
291 return (0);
292 sp++, lp++;
293 }
294 continue;
295 }
296 if (!same(*ep, *lp))
297 return (0);
298 ep++, lp++;
299 continue;
300
301 case CDOT:
302 if (*lp++)
303 continue;
304 return (0);
305
306 case CDOL:
307 if (*lp == 0)
308 continue;
309 return (0);
310
311 case CEOF:
312 loc2 = lp;
313 return (1);
314
315 case CCL:
316 if (cclass(ep, *lp++, 1)) {
317 ep =+ *ep;
318 continue;
319 }
320 return (0);
321
322 case NCCL:
323 if (cclass(ep, *lp++, 0)) {
324 ep =+ *ep;
325 continue;
326 }
327 return (0);
328
329 case CBRA:
330 braslist[*ep++] = lp;
331 continue;
332
333 case CKET:
334 braelist[*ep++] = lp;
335 continue;
336
337 case CDOT|STAR:
338 curlp = lp;
339 while (*lp++)
340 continue;
341 goto star;
342
343 case CCHR|STAR:
344 curlp = lp;
345 while (same(*lp, *ep))
346 lp++;
347 lp++;
348 ep++;
349 goto star;
350
351 case CCL|STAR:
352 case NCCL|STAR:
353 curlp = lp;
354 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
355 continue;
356 ep =+ *ep;
357 goto star;
358star:
359 do {
360 lp--;
361 if (lp == locs)
362 break;
363 if (advance(lp, ep))
364 return (1);
365 } while (lp > curlp);
366 return (0);
367
368 case CBRC:
369 if (lp == expbuf)
370 continue;
371 if (uletter(*lp) && !uletter(lp[-1]) && !digit(lp[-1]))
372 continue;
373 return (0);
374
375 case CLET:
376 if (!uletter(*lp) && !digit(*lp))
377 continue;
378 return (0);
379
380 default:
381 error("Re internal error@- if possible remember what you did and tell system staff");
382 }
383}
384
385cclass(set, c, af)
386 register char *set;
387 register c;
388 int af;
389{
390 register n;
391
392 if (c == 0)
393 return (0);
394 if (value(IGNORECASE) && ucletter(c))
395 c = letter(c);
396 n = *set++;
397 while (--n)
398 if (n > 2 && set[1] == '-') {
399 if (c >= (set[0] & 0177) && c <= (set[2] & 0177))
400 return (af);
401 set =+ 3;
402 n =- 2;
403 } else
404 if ((*set++ & 0177) == c)
405 return (af);
406 return (!af);
407}
408
409copy(to, from, size)
410 register char *from, *to;
411 register int size;
412{
413
414 if (size > 0)
415 do
416 *to++ = *from++;
417 while (--size > 0);
418}