Commit | Line | Data |
---|---|---|
d43bb1bf BJ |
1 | #include "ex.h" |
2 | #include "ex_re.h" | |
3 | /* | |
4 | * Ex - a text editor | |
5 | * Bill Joy UCB June/September 1977 | |
6 | */ | |
7 | ||
8 | compile(eof, oknl) | |
9 | int eof; | |
10 | char oknl; | |
11 | { | |
12 | register c; | |
13 | register char *ep; | |
14 | char *lastep; | |
15 | char bracket[NBRA], *bracketp, *rhsp; | |
16 | int cclcnt; | |
17 | ||
18 | if (letter(eof) || digit(eof)) | |
19 | error("Re delimiter must not be letter or digit|Regular expressions cannot be delimited by letters or digits"); | |
20 | ep = expbuf; | |
21 | c = getchar(); | |
22 | if (eof == '\\') | |
23 | switch (c) { | |
24 | ||
25 | case '/': | |
26 | case '?': | |
27 | if (scanre.sexpbuf[0] == 0) | |
28 | noscanre: | |
29 | error("No previous scan re|No previous scanning regular expression"); | |
30 | resre(&scanre); | |
31 | return (c); | |
32 | ||
33 | case '&': | |
34 | if (subre.sexpbuf[0] == 0) | |
35 | nosubre: | |
36 | error("No previous substitute re|No previous substitute regular expression"); | |
37 | resre(&subre); | |
38 | return (c); | |
39 | ||
40 | default: | |
41 | error("Badly formed re|Regular expression \\ must be followed by /, ?, or &"); | |
42 | } | |
43 | if (c == eof || c == '\n' || c == EOF) { | |
44 | if (*ep == 0) | |
45 | error("No previous re|No previous regular expression"); | |
46 | if (c == '\n' && oknl == 0) | |
47 | error("Missing closing delimiter@for regular expression"); | |
48 | if (c == '\n') | |
49 | ungetchar(c); | |
50 | return (eof); | |
51 | } | |
52 | bracketp = bracket; | |
53 | nbra = 0; | |
54 | circfl = 0; | |
55 | if (c == '^') { | |
56 | c = getchar(); | |
57 | circfl++; | |
58 | } | |
59 | ungetchar(c); | |
60 | for (;;) { | |
61 | if (ep >= &expbuf[ESIZE - 2]) | |
62 | complex: | |
63 | cerror("Re too complex|Regular expression too complicated"); | |
64 | c = getchar(); | |
65 | if (c == eof || c == EOF) { | |
66 | if (bracketp != bracket) | |
67 | cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); | |
68 | *ep++ = CEOF; | |
69 | return (eof); | |
70 | } | |
71 | if (value(MAGIC)) { | |
72 | if (c != '*' || ep == expbuf) | |
73 | lastep = ep; | |
74 | } else | |
75 | if (c != '\\' || peekchar() != '*' || ep == expbuf) | |
76 | lastep = ep; | |
77 | switch (c) { | |
78 | ||
79 | case '\\': | |
80 | c = getchar(); | |
81 | switch (c) { | |
82 | ||
83 | case '(': | |
84 | if (nbra >= NBRA) | |
85 | cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); | |
86 | *bracketp++ = nbra; | |
87 | *ep++ = CBRA; | |
88 | *ep++ = nbra++; | |
89 | continue; | |
90 | ||
91 | case ')': | |
92 | if (bracketp <= bracket) | |
93 | cerror("Extra \\)|More \\)'s than \\('s in regular expression"); | |
94 | *ep++ = CKET; | |
95 | *ep++ = *--bracketp; | |
96 | continue; | |
97 | ||
98 | case '<': | |
99 | *ep++ = CBRC; | |
100 | continue; | |
101 | ||
102 | case '>': | |
103 | *ep++ = CLET; | |
104 | continue; | |
105 | } | |
106 | if (value(MAGIC) == 0) | |
107 | magic: | |
108 | switch (c) { | |
109 | ||
110 | case '.': | |
111 | *ep++ = CDOT; | |
112 | continue; | |
113 | ||
114 | case '~': | |
115 | rhsp = rhsbuf; | |
116 | while (*rhsp) { | |
117 | if (*rhsp & QUOTE) { | |
118 | c = *rhsp & 0177; | |
119 | if (c == '&') | |
120 | error("Replacement pattern contains &@- cannot use in re"); | |
121 | if (c >= '1' && c <= '9') | |
122 | error("Replacement pattern contains \\d@- cannot use in re"); | |
123 | } | |
124 | if (ep >= &expbuf[ESIZE-2]) | |
125 | goto complex; | |
126 | *ep++ = CCHR; | |
127 | *ep++ = *rhsp++ & 0177; | |
128 | } | |
129 | continue; | |
130 | ||
131 | case '*': | |
132 | if (ep == expbuf) | |
133 | break; | |
134 | if (*lastep == CBRA || *lastep == CKET) | |
135 | cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); | |
136 | if (*lastep == CCHR && (lastep[1] & QUOTE)) | |
137 | cerror("Illegal *|Can't * a \\n in regular expression"); | |
138 | *lastep =| STAR; | |
139 | continue; | |
140 | ||
141 | case '[': | |
142 | *ep++ = CCL; | |
143 | *ep++ = 0; | |
144 | cclcnt = 1; | |
145 | c = getchar(); | |
146 | if (c == '^') { | |
147 | c = getchar(); | |
148 | ep[-2] = NCCL; | |
149 | } | |
150 | if (c == ']') | |
151 | cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); | |
152 | while (c != ']') { | |
153 | if (c == '\\' && any(peekchar(), "]-^\\")) | |
154 | c = getchar() | QUOTE; | |
155 | if (c == '\n' || c == EOF) | |
156 | cerror("Missing ]"); | |
157 | *ep++ = c; | |
158 | cclcnt++; | |
159 | if (ep >= &expbuf[ESIZE]) | |
160 | goto complex; | |
161 | c = getchar(); | |
162 | } | |
163 | lastep[1] = cclcnt; | |
164 | continue; | |
165 | } | |
166 | if (c == EOF) { | |
167 | ungetchar(EOF); | |
168 | c = '\\'; | |
169 | goto defchar; | |
170 | } | |
171 | *ep++ = CCHR; | |
172 | if (c == '\n') | |
173 | cerror("No newlines in re's|Can't escape newlines into regular expressions"); | |
174 | if (c < '1' || c > NBRA + '1') { | |
175 | *ep++ = c; | |
176 | continue; | |
177 | } | |
178 | c =- '1'; | |
179 | if (c >= nbra) | |
180 | cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); | |
181 | *ep++ = c | QUOTE; | |
182 | continue; | |
183 | ||
184 | case '\n': | |
185 | if (oknl) { | |
186 | ungetchar(c); | |
187 | *ep++ = CEOF; | |
188 | return (eof); | |
189 | } | |
190 | cerror("Badly formed re|Missing closing delimiter for regular expression"); | |
191 | ||
192 | case '$': | |
193 | if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { | |
194 | *ep++ = CDOL; | |
195 | continue; | |
196 | } | |
197 | goto defchar; | |
198 | ||
199 | case '.': | |
200 | case '~': | |
201 | case '*': | |
202 | case '[': | |
203 | if (value(MAGIC)) | |
204 | goto magic; | |
205 | defchar: | |
206 | default: | |
207 | *ep++ = CCHR; | |
208 | *ep++ = c; | |
209 | continue; | |
210 | } | |
211 | } | |
212 | } | |
213 | ||
214 | cerror(s) | |
215 | char *s; | |
216 | { | |
217 | ||
218 | expbuf[0] = 0; | |
219 | error(s); | |
220 | } | |
221 | ||
222 | same(a, b) | |
223 | register int a, b; | |
224 | { | |
225 | ||
226 | return (a == b || value(IGNORECASE) && (a ^ b) == ' ' && letter(a) == letter(b)); | |
227 | } | |
228 | ||
229 | execute(gf, addr) | |
230 | int *addr; | |
231 | { | |
232 | register char *p1, *p2; | |
233 | register c; | |
234 | ||
235 | if (gf) { | |
236 | if (circfl) | |
237 | return (0); | |
238 | locs = p1 = loc2; | |
239 | } else { | |
240 | if (addr == zero) | |
241 | return (0); | |
242 | p1 = getline(*addr); | |
243 | locs = 0; | |
244 | } | |
245 | p2 = expbuf; | |
246 | if (circfl) { | |
247 | loc1 = p1; | |
248 | return (advance(p1, p2)); | |
249 | } | |
250 | /* fast check for first character */ | |
251 | if (*p2 == CCHR) { | |
252 | c = p2[1]; | |
253 | do { | |
254 | if (c != *p1 && (!value(IGNORECASE) || | |
255 | (c ^ *p1) != ' ' || letter(c) != letter(*p1))) | |
256 | continue; | |
257 | if (advance(p1, p2)) { | |
258 | loc1 = p1; | |
259 | return (1); | |
260 | } | |
261 | } while (*p1++); | |
262 | return (0); | |
263 | } | |
264 | /* regular algorithm */ | |
265 | do { | |
266 | if (advance(p1, p2)) { | |
267 | loc1 = p1; | |
268 | return (1); | |
269 | } | |
270 | } while (*p1++); | |
271 | return (0); | |
272 | } | |
273 | ||
274 | #define uletter(c) (letter(c) || c == '_') | |
275 | ||
276 | advance(lp, ep) | |
277 | register char *lp, *ep; | |
278 | { | |
279 | register char *curlp; | |
280 | char *nextep, *sp, *sp1, c; | |
281 | ||
282 | for (;;) switch (*ep++) { | |
283 | ||
284 | case CCHR: | |
285 | if (*ep & QUOTE) { | |
286 | c = *ep++ & 0177; | |
287 | sp = braslist[c]; | |
288 | sp1 = braelist[c]; | |
289 | while (sp < sp1) { | |
290 | if (!same(*sp, *lp)) | |
291 | return (0); | |
292 | sp++, lp++; | |
293 | } | |
294 | continue; | |
295 | } | |
296 | if (!same(*ep, *lp)) | |
297 | return (0); | |
298 | ep++, lp++; | |
299 | continue; | |
300 | ||
301 | case CDOT: | |
302 | if (*lp++) | |
303 | continue; | |
304 | return (0); | |
305 | ||
306 | case CDOL: | |
307 | if (*lp == 0) | |
308 | continue; | |
309 | return (0); | |
310 | ||
311 | case CEOF: | |
312 | loc2 = lp; | |
313 | return (1); | |
314 | ||
315 | case CCL: | |
316 | if (cclass(ep, *lp++, 1)) { | |
317 | ep =+ *ep; | |
318 | continue; | |
319 | } | |
320 | return (0); | |
321 | ||
322 | case NCCL: | |
323 | if (cclass(ep, *lp++, 0)) { | |
324 | ep =+ *ep; | |
325 | continue; | |
326 | } | |
327 | return (0); | |
328 | ||
329 | case CBRA: | |
330 | braslist[*ep++] = lp; | |
331 | continue; | |
332 | ||
333 | case CKET: | |
334 | braelist[*ep++] = lp; | |
335 | continue; | |
336 | ||
337 | case CDOT|STAR: | |
338 | curlp = lp; | |
339 | while (*lp++) | |
340 | continue; | |
341 | goto star; | |
342 | ||
343 | case CCHR|STAR: | |
344 | curlp = lp; | |
345 | while (same(*lp, *ep)) | |
346 | lp++; | |
347 | lp++; | |
348 | ep++; | |
349 | goto star; | |
350 | ||
351 | case CCL|STAR: | |
352 | case NCCL|STAR: | |
353 | curlp = lp; | |
354 | while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) | |
355 | continue; | |
356 | ep =+ *ep; | |
357 | goto star; | |
358 | star: | |
359 | do { | |
360 | lp--; | |
361 | if (lp == locs) | |
362 | break; | |
363 | if (advance(lp, ep)) | |
364 | return (1); | |
365 | } while (lp > curlp); | |
366 | return (0); | |
367 | ||
368 | case CBRC: | |
369 | if (lp == expbuf) | |
370 | continue; | |
371 | if (uletter(*lp) && !uletter(lp[-1]) && !digit(lp[-1])) | |
372 | continue; | |
373 | return (0); | |
374 | ||
375 | case CLET: | |
376 | if (!uletter(*lp) && !digit(*lp)) | |
377 | continue; | |
378 | return (0); | |
379 | ||
380 | default: | |
381 | error("Re internal error@- if possible remember what you did and tell system staff"); | |
382 | } | |
383 | } | |
384 | ||
385 | cclass(set, c, af) | |
386 | register char *set; | |
387 | register c; | |
388 | int af; | |
389 | { | |
390 | register n; | |
391 | ||
392 | if (c == 0) | |
393 | return (0); | |
394 | if (value(IGNORECASE) && ucletter(c)) | |
395 | c = letter(c); | |
396 | n = *set++; | |
397 | while (--n) | |
398 | if (n > 2 && set[1] == '-') { | |
399 | if (c >= (set[0] & 0177) && c <= (set[2] & 0177)) | |
400 | return (af); | |
401 | set =+ 3; | |
402 | n =- 2; | |
403 | } else | |
404 | if ((*set++ & 0177) == c) | |
405 | return (af); | |
406 | return (!af); | |
407 | } | |
408 | ||
409 | copy(to, from, size) | |
410 | register char *from, *to; | |
411 | register int size; | |
412 | { | |
413 | ||
414 | if (size > 0) | |
415 | do | |
416 | *to++ = *from++; | |
417 | while (--size > 0); | |
418 | } |