Commit | Line | Data |
---|---|---|
a36a91d2 | 1 | #ifndef lint |
ad651ab5 | 2 | static char sccsid[] = "@(#)spell.c 4.2 %G%"; |
a36a91d2 SL |
3 | #endif |
4 | ||
5 | #include "spell.h" | |
6 | #define DLEV 2 | |
7 | ||
8 | char *strcat(); | |
9 | int strip(); | |
10 | char *skipv(); | |
11 | int an(); | |
12 | int s(); | |
13 | int es(); | |
14 | int ily(); | |
15 | int ncy(); | |
16 | int CCe(); | |
17 | int VCe(); | |
18 | int bility(); | |
19 | int tion(); | |
20 | int ize(); | |
21 | int y_to_e(); | |
22 | int i_to_y(); | |
23 | int nop(); | |
24 | int metry(); | |
25 | ||
26 | struct suftab { | |
27 | char *suf; | |
28 | int (*p1)(); | |
29 | int n1; | |
30 | char *d1; | |
31 | char *a1; | |
32 | int (*p2)(); | |
33 | int n2; | |
34 | char *d2; | |
35 | char *a2; | |
36 | } suftab[] = { | |
37 | {"ssen",ily,4,"-y+iness","+ness" }, | |
38 | {"ssel",ily,4,"-y+i+less","+less" }, | |
39 | {"se",s,1,"","+s", es,2,"-y+ies","+es" }, | |
40 | {"s'",s,2,"","+'s"}, | |
41 | {"s",s,1,"","+s"}, | |
42 | {"ecn",ncy,1,"","-t+ce"}, | |
43 | {"ycn",ncy,1,"","-cy+t"}, | |
44 | {"ytilb",nop,0,"",""}, | |
45 | {"ytilib",bility,5,"-le+ility",""}, | |
46 | {"elbaif",i_to_y,4,"-y+iable",""}, | |
47 | {"elba",CCe,4,"-e+able","+able"}, | |
48 | {"yti",CCe,3,"-e+ity","+ity"}, | |
49 | {"ylb",y_to_e,1,"-e+y",""}, | |
50 | {"yl",ily,2,"-y+ily","+ly"}, | |
51 | {"laci",strip,2,"","+al"}, | |
52 | {"latnem",strip,2,"","+al"}, | |
53 | {"lanoi",strip,2,"","+al"}, | |
54 | {"tnem",strip,4,"","+ment"}, | |
55 | {"gni",CCe,3,"-e+ing","+ing"}, | |
56 | {"reta",nop,0,"",""}, | |
57 | {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, | |
58 | {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, | |
59 | {"citsi",strip,2,"","+ic"}, | |
60 | {"cihparg",i_to_y,1,"-y+ic",""}, | |
61 | {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, | |
62 | {"cirtem",i_to_y,1,"-y+ic",""}, | |
63 | {"yrtem",metry,0,"-ry+er",""}, | |
64 | {"cigol",i_to_y,1,"-y+ic",""}, | |
65 | {"tsigol",i_to_y,2,"-y+ist",""}, | |
66 | {"tsi",VCe,3,"-e+ist","+ist"}, | |
67 | {"msi",VCe,3,"-e+ism","+ist"}, | |
68 | {"noitacif",i_to_y,6,"-y+ication",""}, | |
69 | {"noitazi",ize,5,"-e+ation",""}, | |
70 | {"rota",tion,2,"-e+or",""}, | |
71 | {"noit",tion,3,"-e+ion","+ion"}, | |
72 | {"naino",an,3,"","+ian"}, | |
73 | {"na",an,1,"","+n"}, | |
74 | {"evit",tion,3,"-e+ive","+ive"}, | |
75 | {"ezi",CCe,3,"-e+ize","+ize"}, | |
76 | {"pihs",strip,4,"","+ship"}, | |
77 | {"dooh",ily,4,"-y+hood","+hood"}, | |
78 | {"ekil",strip,4,"","+like"}, | |
79 | 0 | |
80 | }; | |
81 | ||
82 | char *preftab[] = { | |
83 | "anti", | |
84 | "bio", | |
85 | "dis", | |
86 | "electro", | |
87 | "en", | |
88 | "fore", | |
89 | "hyper", | |
90 | "intra", | |
91 | "inter", | |
92 | "iso", | |
93 | "kilo", | |
94 | "magneto", | |
95 | "meta", | |
96 | "micro", | |
97 | "milli", | |
98 | "mis", | |
99 | "mono", | |
100 | "multi", | |
101 | "non", | |
102 | "out", | |
103 | "over", | |
104 | "photo", | |
105 | "poly", | |
106 | "pre", | |
107 | "pseudo", | |
108 | "re", | |
109 | "semi", | |
110 | "stereo", | |
111 | "sub", | |
112 | "super", | |
113 | "thermo", | |
114 | "ultra", | |
115 | "under", /*must precede un*/ | |
116 | "un", | |
117 | 0 | |
118 | }; | |
119 | ||
120 | int vflag; | |
121 | int xflag; | |
122 | char word[100]; | |
123 | char original[100]; | |
124 | char *deriv[40]; | |
125 | char affix[40]; | |
126 | ||
127 | main(argc,argv) | |
128 | char **argv; | |
129 | { | |
130 | register char *ep, *cp; | |
131 | register char *dp; | |
132 | int fold; | |
133 | int j; | |
134 | FILE *file, *found; | |
135 | if(!prime(argc,argv)) { | |
136 | fprintf(stderr, | |
137 | "spell: cannot initialize hash table\n"); | |
138 | exit(1); | |
139 | } | |
140 | found = fopen(argv[2],"w"); | |
141 | for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) | |
142 | switch(argv[0][1]) { | |
143 | case 'b': | |
144 | ise(); | |
145 | break; | |
146 | case 'v': | |
147 | vflag++; | |
148 | break; | |
149 | case 'x': | |
150 | xflag++; | |
151 | break; | |
152 | } | |
153 | for(;; fprintf(file,"%s%s\n",affix,original)) { | |
154 | affix[0] = 0; | |
155 | file = found; | |
156 | for(ep=word;(*ep=j=getchar())!='\n';ep++) | |
ad651ab5 JB |
157 | if(j == EOF) { |
158 | fclose(found); | |
a36a91d2 | 159 | exit(0); |
ad651ab5 | 160 | } |
a36a91d2 SL |
161 | for(cp=word,dp=original; cp<ep; ) |
162 | *dp++ = *cp++; | |
163 | *dp = 0; | |
164 | fold = 0; | |
165 | for(cp=word;cp<ep;cp++) | |
166 | if(islower(*cp)) | |
167 | goto lcase; | |
168 | if(putsuf(ep,".",0)) | |
169 | continue; | |
170 | ++fold; | |
171 | for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) | |
172 | *dp = Tolower(*cp); | |
173 | lcase: | |
174 | if(putsuf(ep,".",0)||suffix(ep,0)) | |
175 | continue; | |
176 | if(isupper(word[0])) { | |
177 | for(cp=original,dp=word; *dp = *cp++; dp++) | |
178 | if (fold) *dp = Tolower(*dp); | |
179 | word[0] = Tolower(word[0]); | |
180 | goto lcase; | |
181 | } | |
182 | file = stdout; | |
183 | } | |
184 | } | |
185 | ||
186 | suffix(ep,lev) | |
187 | char *ep; | |
188 | { | |
189 | register struct suftab *t; | |
190 | register char *cp, *sp; | |
191 | lev += DLEV; | |
192 | deriv[lev] = deriv[lev-1] = 0; | |
193 | for(t= &suftab[0];sp=t->suf;t++) { | |
194 | cp = ep; | |
195 | while(*sp) | |
196 | if(*--cp!=*sp++) | |
197 | goto next; | |
198 | for(sp=cp; --sp>=word&&!vowel(*sp); ) ; | |
199 | if(sp<word) | |
200 | return(0); | |
201 | if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) | |
202 | return(1); | |
203 | if(t->p2!=0) { | |
204 | deriv[lev] = deriv[lev+1] = 0; | |
205 | return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); | |
206 | } | |
207 | return(0); | |
208 | next: ; | |
209 | } | |
210 | return(0); | |
211 | } | |
212 | ||
213 | nop() | |
214 | { | |
215 | return(0); | |
216 | } | |
217 | ||
218 | strip(ep,d,a,lev) | |
219 | char *ep,*d,*a; | |
220 | { | |
221 | return(putsuf(ep,a,lev)||suffix(ep,lev)); | |
222 | } | |
223 | ||
224 | s(ep,d,a,lev) | |
225 | char *ep,*d,*a; | |
226 | { | |
227 | if(lev>DLEV+1) | |
228 | return(0); | |
229 | if(*ep=='s'&&ep[-1]=='s') | |
230 | return(0); | |
231 | return(strip(ep,d,a,lev)); | |
232 | } | |
233 | ||
234 | an(ep,d,a,lev) | |
235 | char *ep,*d,*a; | |
236 | { | |
237 | if(!isupper(*word)) /*must be proper name*/ | |
238 | return(0); | |
239 | return(putsuf(ep,a,lev)); | |
240 | } | |
241 | ||
242 | ize(ep,d,a,lev) | |
243 | char *ep,*d,*a; | |
244 | { | |
245 | *ep++ = 'e'; | |
246 | return(strip(ep,"",d,lev)); | |
247 | } | |
248 | ||
249 | y_to_e(ep,d,a,lev) | |
250 | char *ep,*d,*a; | |
251 | { | |
252 | *ep++ = 'e'; | |
253 | return(strip(ep,"",d,lev)); | |
254 | } | |
255 | ||
256 | ily(ep,d,a,lev) | |
257 | char *ep,*d,*a; | |
258 | { | |
259 | if(ep[-1]=='i') | |
260 | return(i_to_y(ep,d,a,lev)); | |
261 | else | |
262 | return(strip(ep,d,a,lev)); | |
263 | } | |
264 | ||
265 | ncy(ep,d,a,lev) | |
266 | char *ep, *d, *a; | |
267 | { | |
268 | if(skipv(skipv(ep-1))<word) | |
269 | return(0); | |
270 | ep[-1] = 't'; | |
271 | return(strip(ep,d,a,lev)); | |
272 | } | |
273 | ||
274 | bility(ep,d,a,lev) | |
275 | char *ep,*d,*a; | |
276 | { | |
277 | *ep++ = 'l'; | |
278 | return(y_to_e(ep,d,a,lev)); | |
279 | } | |
280 | ||
281 | i_to_y(ep,d,a,lev) | |
282 | char *ep,*d,*a; | |
283 | { | |
284 | if(ep[-1]=='i') { | |
285 | ep[-1] = 'y'; | |
286 | a = d; | |
287 | } | |
288 | return(strip(ep,"",a,lev)); | |
289 | } | |
290 | ||
291 | es(ep,d,a,lev) | |
292 | char *ep,*d,*a; | |
293 | { | |
294 | if(lev>DLEV) | |
295 | return(0); | |
296 | switch(ep[-1]) { | |
297 | default: | |
298 | return(0); | |
299 | case 'i': | |
300 | return(i_to_y(ep,d,a,lev)); | |
301 | case 's': | |
302 | case 'h': | |
303 | case 'z': | |
304 | case 'x': | |
305 | return(strip(ep,d,a,lev)); | |
306 | } | |
307 | } | |
308 | ||
309 | metry(ep,d,a,lev) | |
310 | char *ep, *d,*a; | |
311 | { | |
312 | ep[-2] = 'e'; | |
313 | ep[-1] = 'r'; | |
314 | return(strip(ep,d,a,lev)); | |
315 | } | |
316 | ||
317 | tion(ep,d,a,lev) | |
318 | char *ep,*d,*a; | |
319 | { | |
320 | switch(ep[-2]) { | |
321 | case 'c': | |
322 | case 'r': | |
323 | return(putsuf(ep,a,lev)); | |
324 | case 'a': | |
325 | return(y_to_e(ep,d,a,lev)); | |
326 | } | |
327 | return(0); | |
328 | } | |
329 | ||
330 | /* possible consonant-consonant-e ending*/ | |
331 | CCe(ep,d,a,lev) | |
332 | char *ep,*d,*a; | |
333 | { | |
334 | switch(ep[-1]) { | |
335 | case 'l': | |
336 | if(vowel(ep[-2])) | |
337 | break; | |
338 | switch(ep[-2]) { | |
339 | case 'l': | |
340 | case 'r': | |
341 | case 'w': | |
342 | break; | |
343 | default: | |
344 | return(y_to_e(ep,d,a,lev)); | |
345 | } | |
346 | break; | |
347 | case 's': | |
348 | if(ep[-2]=='s') | |
349 | break; | |
350 | case 'c': | |
351 | case 'g': | |
352 | if(*ep=='a') | |
353 | return(0); | |
354 | case 'v': | |
355 | case 'z': | |
356 | if(vowel(ep[-2])) | |
357 | break; | |
358 | case 'u': | |
359 | if(y_to_e(ep,d,a,lev)) | |
360 | return(1); | |
361 | if(!(ep[-2]=='n'&&ep[-1]=='g')) | |
362 | return(0); | |
363 | } | |
364 | return(VCe(ep,d,a,lev)); | |
365 | } | |
366 | ||
367 | /* possible consonant-vowel-consonant-e ending*/ | |
368 | VCe(ep,d,a,lev) | |
369 | char *ep,*d,*a; | |
370 | { | |
371 | char c; | |
372 | c = ep[-1]; | |
373 | if(c=='e') | |
374 | return(0); | |
375 | if(!vowel(c) && vowel(ep[-2])) { | |
376 | c = *ep; | |
377 | *ep++ = 'e'; | |
378 | if(putsuf(ep,d,lev)||suffix(ep,lev)) | |
379 | return(1); | |
380 | ep--; | |
381 | *ep = c; | |
382 | } | |
383 | return(strip(ep,d,a,lev)); | |
384 | } | |
385 | ||
386 | char *lookuppref(wp,ep) | |
387 | char **wp; | |
388 | char *ep; | |
389 | { | |
390 | register char **sp; | |
391 | register char *bp,*cp; | |
392 | for(sp=preftab;*sp;sp++) { | |
393 | bp = *wp; | |
394 | for(cp= *sp;*cp;cp++,bp++) | |
395 | if(Tolower(*bp)!=*cp) | |
396 | goto next; | |
397 | for(cp=bp;cp<ep;cp++) | |
398 | if(vowel(*cp)) { | |
399 | *wp = bp; | |
400 | return(*sp); | |
401 | } | |
402 | next: ; | |
403 | } | |
404 | return(0); | |
405 | } | |
406 | ||
407 | putsuf(ep,a,lev) | |
408 | char *ep,*a; | |
409 | { | |
410 | register char *cp; | |
411 | char *bp; | |
412 | register char *pp; | |
413 | int val = 0; | |
414 | char space[20]; | |
415 | deriv[lev] = a; | |
416 | if(putw(word,ep,lev)) | |
417 | return(1); | |
418 | bp = word; | |
419 | pp = space; | |
420 | deriv[lev+1] = pp; | |
421 | while(cp=lookuppref(&bp,ep)) { | |
422 | *pp++ = '+'; | |
423 | while(*pp = *cp++) | |
424 | pp++; | |
425 | if(putw(bp,ep,lev+1)) { | |
426 | val = 1; | |
427 | break; | |
428 | } | |
429 | } | |
430 | deriv[lev+1] = deriv[lev+2] = 0; | |
431 | return(val); | |
432 | } | |
433 | ||
434 | putw(bp,ep,lev) | |
435 | char *bp,*ep; | |
436 | { | |
437 | register i, j; | |
438 | char duple[3]; | |
439 | if(ep-bp<=1) | |
440 | return(0); | |
441 | if(vowel(*ep)) { | |
442 | if(monosyl(bp,ep)) | |
443 | return(0); | |
444 | } | |
445 | i = dict(bp,ep); | |
446 | if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { | |
447 | ep--; | |
448 | deriv[++lev] = duple; | |
449 | duple[0] = '+'; | |
450 | duple[1] = *ep; | |
451 | duple[2] = 0; | |
452 | i = dict(bp,ep); | |
453 | } | |
454 | if(vflag==0||i==0) | |
455 | return(i); | |
456 | j = lev; | |
457 | do { | |
458 | if(deriv[j]) | |
459 | strcat(affix,deriv[j]); | |
460 | } while(--j>0); | |
461 | strcat(affix,"\t"); | |
462 | return(i); | |
463 | } | |
464 | ||
465 | ||
466 | monosyl(bp,ep) | |
467 | char *bp, *ep; | |
468 | { | |
469 | if(ep<bp+2) | |
470 | return(0); | |
471 | if(vowel(*--ep)||!vowel(*--ep) | |
472 | ||ep[1]=='x'||ep[1]=='w') | |
473 | return(0); | |
474 | while(--ep>=bp) | |
475 | if(vowel(*ep)) | |
476 | return(0); | |
477 | return(1); | |
478 | } | |
479 | ||
480 | char * | |
481 | skipv(s) | |
482 | char *s; | |
483 | { | |
484 | if(s>=word&&vowel(*s)) | |
485 | s--; | |
486 | while(s>=word&&!vowel(*s)) | |
487 | s--; | |
488 | return(s); | |
489 | } | |
490 | ||
491 | vowel(c) | |
492 | { | |
493 | switch(Tolower(c)) { | |
494 | case 'a': | |
495 | case 'e': | |
496 | case 'i': | |
497 | case 'o': | |
498 | case 'u': | |
499 | case 'y': | |
500 | return(1); | |
501 | } | |
502 | return(0); | |
503 | } | |
504 | ||
505 | /* crummy way to Britishise */ | |
506 | ise() | |
507 | { | |
508 | register struct suftab *p; | |
509 | for(p = suftab;p->suf;p++) { | |
510 | ztos(p->suf); | |
511 | ztos(p->d1); | |
512 | ztos(p->a1); | |
513 | } | |
514 | } | |
515 | ztos(s) | |
516 | char *s; | |
517 | { | |
518 | for(;*s;s++) | |
519 | if(*s=='z') | |
520 | *s = 's'; | |
521 | } | |
522 | ||
523 | dict(bp,ep) | |
524 | char *bp, *ep; | |
525 | { | |
526 | register char *wp; | |
527 | long h; | |
528 | register long *lp; | |
529 | register i; | |
530 | if(xflag) | |
531 | printf("=%.*s\n",ep-bp,bp); | |
532 | for(i=0; i<NP; i++) { | |
533 | for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) | |
534 | h += *wp * *lp; | |
535 | h += '\n' * *lp; | |
536 | h %= p[i]; | |
537 | if(get(h)==0) | |
538 | return(0); | |
539 | } | |
540 | return(1); | |
541 | } |