Commit | Line | Data |
---|---|---|
3b381e99 BJ |
1 | /* |
2 | * diction -- print all sentences containing one of default phrases | |
3 | * | |
4 | * status returns: | |
5 | * 0 - ok, and some matches | |
6 | * 1 - ok, but no matches | |
7 | * 2 - some error | |
8 | */ | |
9 | ||
10 | #include <stdio.h> | |
11 | #include <ctype.h> | |
12 | ||
13 | #define MAXSIZ 6500 | |
14 | #define QSIZE 650 | |
15 | struct words { | |
16 | char inp; | |
17 | char out; | |
18 | struct words *nst; | |
19 | struct words *link; | |
20 | struct words *fail; | |
21 | } w[MAXSIZ], *smax, *q; | |
22 | ||
23 | int fflag; | |
24 | int nflag = 1; /*use default file*/ | |
25 | char *filename; | |
26 | int nfile; | |
27 | int nsucc; | |
28 | long nsent = 0; | |
29 | long nhits = 0; | |
30 | char *nlp; | |
31 | char *begp, *endp; | |
32 | int oct = 0; | |
33 | FILE *wordf; | |
34 | char *argptr; | |
35 | ||
36 | main(argc, argv) | |
37 | char **argv; | |
38 | { | |
39 | while (--argc > 0 && (++argv)[0][0]=='-') | |
40 | switch (argv[0][1]) { | |
41 | ||
42 | case 'f': | |
43 | fflag++; | |
44 | filename = ++argv; | |
45 | argc--; | |
46 | continue; | |
47 | ||
48 | case 'n': | |
49 | nflag = 0; | |
50 | continue; | |
51 | case 'd': | |
52 | continue; | |
53 | default: | |
54 | fprintf(stderr, "diction: unknown flag\n"); | |
55 | continue; | |
56 | } | |
57 | out: | |
58 | if(nflag){ | |
59 | wordf = fopen(DICT,"r"); | |
60 | if(wordf == NULL){ | |
61 | fprintf(stderr,"diction: can't open default dictionary\n"); | |
62 | exit(2); | |
63 | } | |
64 | } | |
65 | else { | |
66 | wordf = fopen(*filename,"r"); | |
67 | if(wordf == NULL){ | |
68 | fprintf(stderr,"diction: can't open %s\n",filename); | |
69 | exit(2); | |
70 | } | |
71 | } | |
72 | ||
73 | cgotofn(); | |
74 | cfail(); | |
75 | nfile = argc; | |
76 | if (argc<=0) { | |
77 | execute((char *)NULL); | |
78 | } | |
79 | else while (--argc >= 0) { | |
80 | execute(*argv); | |
81 | argv++; | |
82 | } | |
83 | printf("number of sentences %ld number of hits %ld\n",nsent,nhits); | |
84 | exit(nsucc == 0); | |
85 | } | |
86 | ||
87 | execute(file) | |
88 | char *file; | |
89 | { | |
90 | register char *p; | |
91 | register struct words *c; | |
92 | register ccount; | |
93 | struct words *savc; | |
94 | char *savp; | |
95 | int savct; | |
96 | int scr; | |
97 | char buf[1024]; | |
98 | int f; | |
99 | int hit; | |
100 | if (file) { | |
101 | if ((f = open(file, 0)) < 0) { | |
102 | fprintf(stderr, "diction: can't open %s\n", file); | |
103 | exit(2); | |
104 | } | |
105 | } | |
106 | else f = 0; | |
107 | ccount = 0; | |
108 | p = buf; | |
109 | nlp = p; | |
110 | c = w; | |
111 | oct = hit = 0; | |
112 | savc = savp = 0; | |
113 | for (;;) { | |
114 | if (--ccount <= 0) { | |
115 | if (p == &buf[1024]) p = buf; | |
116 | if (p > &buf[512]) { | |
117 | if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; | |
118 | } | |
119 | else if ((ccount = read(f, p, 512)) <= 0) break; | |
120 | convert(p,ccount); | |
121 | } | |
122 | if(p == &buf[1024])p=buf; | |
123 | nstate: | |
124 | if (c->inp == *p) { | |
125 | c = c->nst; | |
126 | } | |
127 | else if (c->link != 0) { | |
128 | c = c->link; | |
129 | goto nstate; | |
130 | } | |
131 | else { | |
132 | if(savp != 0){ | |
133 | c=savc; | |
134 | p=savp; | |
135 | if(ccount > savct)ccount += savct; | |
136 | else ccount = savct; | |
137 | savc=savp=0; | |
138 | goto hadone; | |
139 | } | |
140 | c = c->fail; | |
141 | if (c==0) { | |
142 | c = w; | |
143 | istate: | |
144 | if (c->inp == *p) { | |
145 | c = c->nst; | |
146 | } | |
147 | else if (c->link != 0) { | |
148 | c = c->link; | |
149 | goto istate; | |
150 | } | |
151 | } | |
152 | else goto nstate; | |
153 | } | |
154 | if(c->out){ | |
155 | if((c->inp == *(p+1)) && (c->nst != 0)){ | |
156 | savp=p; | |
157 | savc=c; | |
158 | savct=ccount; | |
159 | goto cont; | |
160 | } | |
161 | else if(c->link != 0){ | |
162 | savc=c; | |
163 | while((savc=savc->link)!= 0){ | |
164 | if(savc->inp == *(p+1)){ | |
165 | savp=p; | |
166 | savc=c; | |
167 | savct=ccount; | |
168 | goto cont; | |
169 | } | |
170 | } | |
171 | } | |
172 | hadone: | |
173 | savc=savp=0; | |
174 | if(c->out == (char)(0377)){ | |
175 | c=w; | |
176 | goto nstate; | |
177 | } | |
178 | begp = p - (c->out); | |
179 | if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); | |
180 | endp=p; | |
181 | hit = 1; | |
182 | nhits++; | |
183 | if (*p++ == '.') { | |
184 | if (--ccount <= 0) { | |
185 | if (p == &buf[1024]) p = buf; | |
186 | if (p > &buf[512]) { | |
187 | if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; | |
188 | } | |
189 | else if ((ccount = read(f, p, 512)) <= 0) break; | |
190 | convert(p,ccount); | |
191 | } | |
192 | } | |
193 | succeed: nsucc = 1; | |
194 | { | |
195 | if (p <= nlp) { | |
196 | outc(&buf[1024]); | |
197 | nlp = buf; | |
198 | } | |
199 | outc(p); | |
200 | } | |
201 | nomatch: | |
202 | nlp = p; | |
203 | c = w; | |
204 | begp = endp = 0; | |
205 | continue; | |
206 | } | |
207 | cont: | |
208 | if (*p++ == '.'){ | |
209 | if(hit){ | |
210 | if(p <= nlp){ | |
211 | outc(&buf[1024]); | |
212 | nlp = buf; | |
213 | } | |
214 | outc(p); | |
215 | putchar('\n'); putchar('\n'); | |
216 | } | |
217 | hit = 0; | |
218 | oct = 0; | |
219 | nlp = p; | |
220 | c = w; | |
221 | begp = endp = 0; | |
222 | } | |
223 | } | |
224 | close(f); | |
225 | } | |
226 | ||
227 | getargc() | |
228 | { | |
229 | register c; | |
230 | if (wordf){ | |
231 | if((c=getc(wordf))==EOF){ | |
232 | fclose(wordf); | |
233 | if(nflag && fflag){ | |
234 | nflag=0; | |
235 | wordf=fopen(*filename,"r"); | |
236 | if(wordf == NULL){ | |
237 | fprintf("can't open %s\n",filename); | |
238 | exit(2); | |
239 | } | |
240 | return(getc(wordf)); | |
241 | } | |
242 | else return(EOF); | |
243 | } | |
244 | else return(c); | |
245 | } | |
246 | if ((c = *argptr++) == '\0') | |
247 | return(EOF); | |
248 | return(c); | |
249 | } | |
250 | ||
251 | cgotofn() { | |
252 | register c; | |
253 | register struct words *s; | |
254 | register ct; | |
255 | int neg; | |
256 | ||
257 | s = smax = w; | |
258 | neg = ct = 0; | |
259 | nword: for(;;) { | |
260 | c = getargc(); | |
261 | if(c == '~'){ | |
262 | neg++; | |
263 | c = getargc(); | |
264 | } | |
265 | if (c==EOF) | |
266 | return; | |
267 | if (c == '\n') { | |
268 | if(neg)s->out = 0377; | |
269 | else s->out = ct-1; | |
270 | neg = ct = 0; | |
271 | s = w; | |
272 | } else { | |
273 | loop: if (s->inp == c) { | |
274 | s = s->nst; | |
275 | ct++; | |
276 | continue; | |
277 | } | |
278 | if (s->inp == 0) goto enter; | |
279 | if (s->link == 0) { | |
280 | if (smax >= &w[MAXSIZ - 1]) overflo(); | |
281 | s->link = ++smax; | |
282 | s = smax; | |
283 | goto enter; | |
284 | } | |
285 | s = s->link; | |
286 | goto loop; | |
287 | } | |
288 | } | |
289 | ||
290 | enter: | |
291 | do { | |
292 | s->inp = c; | |
293 | ct++; | |
294 | if (smax >= &w[MAXSIZ - 1]) overflo(); | |
295 | s->nst = ++smax; | |
296 | s = smax; | |
297 | } while ((c = getargc()) != '\n' && c!=EOF); | |
298 | if(neg)smax->out = 0377; | |
299 | else smax->out = ct-1; | |
300 | neg = ct = 0; | |
301 | s = w; | |
302 | if (c != EOF) | |
303 | goto nword; | |
304 | } | |
305 | ||
306 | overflo() { | |
307 | fprintf(stderr, "wordlist too large\n"); | |
308 | exit(2); | |
309 | } | |
310 | cfail() { | |
311 | struct words *queue[QSIZE]; | |
312 | struct words **front, **rear; | |
313 | struct words *state; | |
314 | int bstart; | |
315 | register char c; | |
316 | register struct words *s; | |
317 | s = w; | |
318 | front = rear = queue; | |
319 | init: if ((s->inp) != 0) { | |
320 | *rear++ = s->nst; | |
321 | if (rear >= &queue[QSIZE - 1]) overflo(); | |
322 | } | |
323 | if ((s = s->link) != 0) { | |
324 | goto init; | |
325 | } | |
326 | ||
327 | while (rear!=front) { | |
328 | s = *front; | |
329 | if (front == &queue[QSIZE-1]) | |
330 | front = queue; | |
331 | else front++; | |
332 | cloop: if ((c = s->inp) != 0) { | |
333 | bstart=0; | |
334 | *rear = (q = s->nst); | |
335 | if (front < rear) | |
336 | if (rear >= &queue[QSIZE-1]) | |
337 | if (front == queue) overflo(); | |
338 | else rear = queue; | |
339 | else rear++; | |
340 | else | |
341 | if (++rear == front) overflo(); | |
342 | state = s->fail; | |
343 | floop: if (state == 0){ state = w;bstart=1;} | |
344 | if (state->inp == c) { | |
345 | qloop: q->fail = state->nst; | |
346 | if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; | |
347 | if((q=q->link) != 0)goto qloop; | |
348 | } | |
349 | else if ((state = state->link) != 0) | |
350 | goto floop; | |
351 | else if(bstart==0){state=0; goto floop;} | |
352 | } | |
353 | if ((s = s->link) != 0) | |
354 | goto cloop; | |
355 | } | |
356 | /* for(s=w;s<=smax;s++) | |
357 | printf("s %d ch %c out %d nst %d link %d fail %d\n",s, | |
358 | s->inp,s->out,s->nst,s->link,s->fail); | |
359 | */ | |
360 | } | |
361 | convert(p,ccount) | |
362 | char *p; | |
363 | { | |
364 | int ct; | |
365 | char *pt; | |
366 | for(pt=p,ct=ccount;--ct>=0;pt++){ | |
367 | if(isupper(*pt))*pt=tolower(*pt); | |
368 | else if(isspace(*pt))*pt=' '; | |
369 | else if(*pt=='.' || *pt=='?'||*pt=='!'){ | |
370 | *pt='.'; | |
371 | nsent++; | |
372 | } | |
373 | else if(ispunct(*pt))*pt=' '; | |
374 | } | |
375 | } | |
376 | outc(addr) | |
377 | char *addr; | |
378 | { | |
379 | ||
380 | while(nlp < addr){ | |
381 | if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){ | |
382 | oct=0; | |
383 | putchar('\n'); | |
384 | } | |
385 | if(nlp == begp){ | |
386 | putchar('['); | |
387 | } | |
388 | putchar(*nlp); | |
389 | if(nlp == endp){ | |
390 | putchar(']'); | |
391 | } | |
392 | nlp++; | |
393 | } | |
394 | } |