Commit | Line | Data |
---|---|---|
a36a91d2 SL |
1 | #ifndef lint |
2 | static char sccsid[] = "@(#)spell.c 4.1 %G%"; | |
3 | #endif | |
4 | ||
5 | #include "spell.h" | |
6 | #define DLEV 2 | |
7 | ||
8 | char *strcat(); | |
9 | int strip(); | |
10 | char *skipv(); | |
11 | int an(); | |
12 | int s(); | |
13 | int es(); | |
14 | int ily(); | |
15 | int ncy(); | |
16 | int CCe(); | |
17 | int VCe(); | |
18 | int bility(); | |
19 | int tion(); | |
20 | int ize(); | |
21 | int y_to_e(); | |
22 | int i_to_y(); | |
23 | int nop(); | |
24 | int metry(); | |
25 | ||
26 | struct suftab { | |
27 | char *suf; | |
28 | int (*p1)(); | |
29 | int n1; | |
30 | char *d1; | |
31 | char *a1; | |
32 | int (*p2)(); | |
33 | int n2; | |
34 | char *d2; | |
35 | char *a2; | |
36 | } suftab[] = { | |
37 | {"ssen",ily,4,"-y+iness","+ness" }, | |
38 | {"ssel",ily,4,"-y+i+less","+less" }, | |
39 | {"se",s,1,"","+s", es,2,"-y+ies","+es" }, | |
40 | {"s'",s,2,"","+'s"}, | |
41 | {"s",s,1,"","+s"}, | |
42 | {"ecn",ncy,1,"","-t+ce"}, | |
43 | {"ycn",ncy,1,"","-cy+t"}, | |
44 | {"ytilb",nop,0,"",""}, | |
45 | {"ytilib",bility,5,"-le+ility",""}, | |
46 | {"elbaif",i_to_y,4,"-y+iable",""}, | |
47 | {"elba",CCe,4,"-e+able","+able"}, | |
48 | {"yti",CCe,3,"-e+ity","+ity"}, | |
49 | {"ylb",y_to_e,1,"-e+y",""}, | |
50 | {"yl",ily,2,"-y+ily","+ly"}, | |
51 | {"laci",strip,2,"","+al"}, | |
52 | {"latnem",strip,2,"","+al"}, | |
53 | {"lanoi",strip,2,"","+al"}, | |
54 | {"tnem",strip,4,"","+ment"}, | |
55 | {"gni",CCe,3,"-e+ing","+ing"}, | |
56 | {"reta",nop,0,"",""}, | |
57 | {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, | |
58 | {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, | |
59 | {"citsi",strip,2,"","+ic"}, | |
60 | {"cihparg",i_to_y,1,"-y+ic",""}, | |
61 | {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, | |
62 | {"cirtem",i_to_y,1,"-y+ic",""}, | |
63 | {"yrtem",metry,0,"-ry+er",""}, | |
64 | {"cigol",i_to_y,1,"-y+ic",""}, | |
65 | {"tsigol",i_to_y,2,"-y+ist",""}, | |
66 | {"tsi",VCe,3,"-e+ist","+ist"}, | |
67 | {"msi",VCe,3,"-e+ism","+ist"}, | |
68 | {"noitacif",i_to_y,6,"-y+ication",""}, | |
69 | {"noitazi",ize,5,"-e+ation",""}, | |
70 | {"rota",tion,2,"-e+or",""}, | |
71 | {"noit",tion,3,"-e+ion","+ion"}, | |
72 | {"naino",an,3,"","+ian"}, | |
73 | {"na",an,1,"","+n"}, | |
74 | {"evit",tion,3,"-e+ive","+ive"}, | |
75 | {"ezi",CCe,3,"-e+ize","+ize"}, | |
76 | {"pihs",strip,4,"","+ship"}, | |
77 | {"dooh",ily,4,"-y+hood","+hood"}, | |
78 | {"ekil",strip,4,"","+like"}, | |
79 | 0 | |
80 | }; | |
81 | ||
82 | char *preftab[] = { | |
83 | "anti", | |
84 | "bio", | |
85 | "dis", | |
86 | "electro", | |
87 | "en", | |
88 | "fore", | |
89 | "hyper", | |
90 | "intra", | |
91 | "inter", | |
92 | "iso", | |
93 | "kilo", | |
94 | "magneto", | |
95 | "meta", | |
96 | "micro", | |
97 | "milli", | |
98 | "mis", | |
99 | "mono", | |
100 | "multi", | |
101 | "non", | |
102 | "out", | |
103 | "over", | |
104 | "photo", | |
105 | "poly", | |
106 | "pre", | |
107 | "pseudo", | |
108 | "re", | |
109 | "semi", | |
110 | "stereo", | |
111 | "sub", | |
112 | "super", | |
113 | "thermo", | |
114 | "ultra", | |
115 | "under", /*must precede un*/ | |
116 | "un", | |
117 | 0 | |
118 | }; | |
119 | ||
120 | int vflag; | |
121 | int xflag; | |
122 | char word[100]; | |
123 | char original[100]; | |
124 | char *deriv[40]; | |
125 | char affix[40]; | |
126 | ||
127 | main(argc,argv) | |
128 | char **argv; | |
129 | { | |
130 | register char *ep, *cp; | |
131 | register char *dp; | |
132 | int fold; | |
133 | int j; | |
134 | FILE *file, *found; | |
135 | if(!prime(argc,argv)) { | |
136 | fprintf(stderr, | |
137 | "spell: cannot initialize hash table\n"); | |
138 | exit(1); | |
139 | } | |
140 | found = fopen(argv[2],"w"); | |
141 | for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) | |
142 | switch(argv[0][1]) { | |
143 | case 'b': | |
144 | ise(); | |
145 | break; | |
146 | case 'v': | |
147 | vflag++; | |
148 | break; | |
149 | case 'x': | |
150 | xflag++; | |
151 | break; | |
152 | } | |
153 | for(;; fprintf(file,"%s%s\n",affix,original)) { | |
154 | affix[0] = 0; | |
155 | file = found; | |
156 | for(ep=word;(*ep=j=getchar())!='\n';ep++) | |
157 | if(j == EOF) | |
158 | exit(0); | |
159 | for(cp=word,dp=original; cp<ep; ) | |
160 | *dp++ = *cp++; | |
161 | *dp = 0; | |
162 | fold = 0; | |
163 | for(cp=word;cp<ep;cp++) | |
164 | if(islower(*cp)) | |
165 | goto lcase; | |
166 | if(putsuf(ep,".",0)) | |
167 | continue; | |
168 | ++fold; | |
169 | for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) | |
170 | *dp = Tolower(*cp); | |
171 | lcase: | |
172 | if(putsuf(ep,".",0)||suffix(ep,0)) | |
173 | continue; | |
174 | if(isupper(word[0])) { | |
175 | for(cp=original,dp=word; *dp = *cp++; dp++) | |
176 | if (fold) *dp = Tolower(*dp); | |
177 | word[0] = Tolower(word[0]); | |
178 | goto lcase; | |
179 | } | |
180 | file = stdout; | |
181 | } | |
182 | } | |
183 | ||
184 | suffix(ep,lev) | |
185 | char *ep; | |
186 | { | |
187 | register struct suftab *t; | |
188 | register char *cp, *sp; | |
189 | lev += DLEV; | |
190 | deriv[lev] = deriv[lev-1] = 0; | |
191 | for(t= &suftab[0];sp=t->suf;t++) { | |
192 | cp = ep; | |
193 | while(*sp) | |
194 | if(*--cp!=*sp++) | |
195 | goto next; | |
196 | for(sp=cp; --sp>=word&&!vowel(*sp); ) ; | |
197 | if(sp<word) | |
198 | return(0); | |
199 | if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) | |
200 | return(1); | |
201 | if(t->p2!=0) { | |
202 | deriv[lev] = deriv[lev+1] = 0; | |
203 | return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); | |
204 | } | |
205 | return(0); | |
206 | next: ; | |
207 | } | |
208 | return(0); | |
209 | } | |
210 | ||
211 | nop() | |
212 | { | |
213 | return(0); | |
214 | } | |
215 | ||
216 | strip(ep,d,a,lev) | |
217 | char *ep,*d,*a; | |
218 | { | |
219 | return(putsuf(ep,a,lev)||suffix(ep,lev)); | |
220 | } | |
221 | ||
222 | s(ep,d,a,lev) | |
223 | char *ep,*d,*a; | |
224 | { | |
225 | if(lev>DLEV+1) | |
226 | return(0); | |
227 | if(*ep=='s'&&ep[-1]=='s') | |
228 | return(0); | |
229 | return(strip(ep,d,a,lev)); | |
230 | } | |
231 | ||
232 | an(ep,d,a,lev) | |
233 | char *ep,*d,*a; | |
234 | { | |
235 | if(!isupper(*word)) /*must be proper name*/ | |
236 | return(0); | |
237 | return(putsuf(ep,a,lev)); | |
238 | } | |
239 | ||
240 | ize(ep,d,a,lev) | |
241 | char *ep,*d,*a; | |
242 | { | |
243 | *ep++ = 'e'; | |
244 | return(strip(ep,"",d,lev)); | |
245 | } | |
246 | ||
247 | y_to_e(ep,d,a,lev) | |
248 | char *ep,*d,*a; | |
249 | { | |
250 | *ep++ = 'e'; | |
251 | return(strip(ep,"",d,lev)); | |
252 | } | |
253 | ||
254 | ily(ep,d,a,lev) | |
255 | char *ep,*d,*a; | |
256 | { | |
257 | if(ep[-1]=='i') | |
258 | return(i_to_y(ep,d,a,lev)); | |
259 | else | |
260 | return(strip(ep,d,a,lev)); | |
261 | } | |
262 | ||
263 | ncy(ep,d,a,lev) | |
264 | char *ep, *d, *a; | |
265 | { | |
266 | if(skipv(skipv(ep-1))<word) | |
267 | return(0); | |
268 | ep[-1] = 't'; | |
269 | return(strip(ep,d,a,lev)); | |
270 | } | |
271 | ||
272 | bility(ep,d,a,lev) | |
273 | char *ep,*d,*a; | |
274 | { | |
275 | *ep++ = 'l'; | |
276 | return(y_to_e(ep,d,a,lev)); | |
277 | } | |
278 | ||
279 | i_to_y(ep,d,a,lev) | |
280 | char *ep,*d,*a; | |
281 | { | |
282 | if(ep[-1]=='i') { | |
283 | ep[-1] = 'y'; | |
284 | a = d; | |
285 | } | |
286 | return(strip(ep,"",a,lev)); | |
287 | } | |
288 | ||
289 | es(ep,d,a,lev) | |
290 | char *ep,*d,*a; | |
291 | { | |
292 | if(lev>DLEV) | |
293 | return(0); | |
294 | switch(ep[-1]) { | |
295 | default: | |
296 | return(0); | |
297 | case 'i': | |
298 | return(i_to_y(ep,d,a,lev)); | |
299 | case 's': | |
300 | case 'h': | |
301 | case 'z': | |
302 | case 'x': | |
303 | return(strip(ep,d,a,lev)); | |
304 | } | |
305 | } | |
306 | ||
307 | metry(ep,d,a,lev) | |
308 | char *ep, *d,*a; | |
309 | { | |
310 | ep[-2] = 'e'; | |
311 | ep[-1] = 'r'; | |
312 | return(strip(ep,d,a,lev)); | |
313 | } | |
314 | ||
315 | tion(ep,d,a,lev) | |
316 | char *ep,*d,*a; | |
317 | { | |
318 | switch(ep[-2]) { | |
319 | case 'c': | |
320 | case 'r': | |
321 | return(putsuf(ep,a,lev)); | |
322 | case 'a': | |
323 | return(y_to_e(ep,d,a,lev)); | |
324 | } | |
325 | return(0); | |
326 | } | |
327 | ||
328 | /* possible consonant-consonant-e ending*/ | |
329 | CCe(ep,d,a,lev) | |
330 | char *ep,*d,*a; | |
331 | { | |
332 | switch(ep[-1]) { | |
333 | case 'l': | |
334 | if(vowel(ep[-2])) | |
335 | break; | |
336 | switch(ep[-2]) { | |
337 | case 'l': | |
338 | case 'r': | |
339 | case 'w': | |
340 | break; | |
341 | default: | |
342 | return(y_to_e(ep,d,a,lev)); | |
343 | } | |
344 | break; | |
345 | case 's': | |
346 | if(ep[-2]=='s') | |
347 | break; | |
348 | case 'c': | |
349 | case 'g': | |
350 | if(*ep=='a') | |
351 | return(0); | |
352 | case 'v': | |
353 | case 'z': | |
354 | if(vowel(ep[-2])) | |
355 | break; | |
356 | case 'u': | |
357 | if(y_to_e(ep,d,a,lev)) | |
358 | return(1); | |
359 | if(!(ep[-2]=='n'&&ep[-1]=='g')) | |
360 | return(0); | |
361 | } | |
362 | return(VCe(ep,d,a,lev)); | |
363 | } | |
364 | ||
365 | /* possible consonant-vowel-consonant-e ending*/ | |
366 | VCe(ep,d,a,lev) | |
367 | char *ep,*d,*a; | |
368 | { | |
369 | char c; | |
370 | c = ep[-1]; | |
371 | if(c=='e') | |
372 | return(0); | |
373 | if(!vowel(c) && vowel(ep[-2])) { | |
374 | c = *ep; | |
375 | *ep++ = 'e'; | |
376 | if(putsuf(ep,d,lev)||suffix(ep,lev)) | |
377 | return(1); | |
378 | ep--; | |
379 | *ep = c; | |
380 | } | |
381 | return(strip(ep,d,a,lev)); | |
382 | } | |
383 | ||
384 | char *lookuppref(wp,ep) | |
385 | char **wp; | |
386 | char *ep; | |
387 | { | |
388 | register char **sp; | |
389 | register char *bp,*cp; | |
390 | for(sp=preftab;*sp;sp++) { | |
391 | bp = *wp; | |
392 | for(cp= *sp;*cp;cp++,bp++) | |
393 | if(Tolower(*bp)!=*cp) | |
394 | goto next; | |
395 | for(cp=bp;cp<ep;cp++) | |
396 | if(vowel(*cp)) { | |
397 | *wp = bp; | |
398 | return(*sp); | |
399 | } | |
400 | next: ; | |
401 | } | |
402 | return(0); | |
403 | } | |
404 | ||
405 | putsuf(ep,a,lev) | |
406 | char *ep,*a; | |
407 | { | |
408 | register char *cp; | |
409 | char *bp; | |
410 | register char *pp; | |
411 | int val = 0; | |
412 | char space[20]; | |
413 | deriv[lev] = a; | |
414 | if(putw(word,ep,lev)) | |
415 | return(1); | |
416 | bp = word; | |
417 | pp = space; | |
418 | deriv[lev+1] = pp; | |
419 | while(cp=lookuppref(&bp,ep)) { | |
420 | *pp++ = '+'; | |
421 | while(*pp = *cp++) | |
422 | pp++; | |
423 | if(putw(bp,ep,lev+1)) { | |
424 | val = 1; | |
425 | break; | |
426 | } | |
427 | } | |
428 | deriv[lev+1] = deriv[lev+2] = 0; | |
429 | return(val); | |
430 | } | |
431 | ||
432 | putw(bp,ep,lev) | |
433 | char *bp,*ep; | |
434 | { | |
435 | register i, j; | |
436 | char duple[3]; | |
437 | if(ep-bp<=1) | |
438 | return(0); | |
439 | if(vowel(*ep)) { | |
440 | if(monosyl(bp,ep)) | |
441 | return(0); | |
442 | } | |
443 | i = dict(bp,ep); | |
444 | if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { | |
445 | ep--; | |
446 | deriv[++lev] = duple; | |
447 | duple[0] = '+'; | |
448 | duple[1] = *ep; | |
449 | duple[2] = 0; | |
450 | i = dict(bp,ep); | |
451 | } | |
452 | if(vflag==0||i==0) | |
453 | return(i); | |
454 | j = lev; | |
455 | do { | |
456 | if(deriv[j]) | |
457 | strcat(affix,deriv[j]); | |
458 | } while(--j>0); | |
459 | strcat(affix,"\t"); | |
460 | return(i); | |
461 | } | |
462 | ||
463 | ||
464 | monosyl(bp,ep) | |
465 | char *bp, *ep; | |
466 | { | |
467 | if(ep<bp+2) | |
468 | return(0); | |
469 | if(vowel(*--ep)||!vowel(*--ep) | |
470 | ||ep[1]=='x'||ep[1]=='w') | |
471 | return(0); | |
472 | while(--ep>=bp) | |
473 | if(vowel(*ep)) | |
474 | return(0); | |
475 | return(1); | |
476 | } | |
477 | ||
478 | char * | |
479 | skipv(s) | |
480 | char *s; | |
481 | { | |
482 | if(s>=word&&vowel(*s)) | |
483 | s--; | |
484 | while(s>=word&&!vowel(*s)) | |
485 | s--; | |
486 | return(s); | |
487 | } | |
488 | ||
489 | vowel(c) | |
490 | { | |
491 | switch(Tolower(c)) { | |
492 | case 'a': | |
493 | case 'e': | |
494 | case 'i': | |
495 | case 'o': | |
496 | case 'u': | |
497 | case 'y': | |
498 | return(1); | |
499 | } | |
500 | return(0); | |
501 | } | |
502 | ||
503 | /* crummy way to Britishise */ | |
504 | ise() | |
505 | { | |
506 | register struct suftab *p; | |
507 | for(p = suftab;p->suf;p++) { | |
508 | ztos(p->suf); | |
509 | ztos(p->d1); | |
510 | ztos(p->a1); | |
511 | } | |
512 | } | |
513 | ztos(s) | |
514 | char *s; | |
515 | { | |
516 | for(;*s;s++) | |
517 | if(*s=='z') | |
518 | *s = 's'; | |
519 | } | |
520 | ||
521 | dict(bp,ep) | |
522 | char *bp, *ep; | |
523 | { | |
524 | register char *wp; | |
525 | long h; | |
526 | register long *lp; | |
527 | register i; | |
528 | if(xflag) | |
529 | printf("=%.*s\n",ep-bp,bp); | |
530 | for(i=0; i<NP; i++) { | |
531 | for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) | |
532 | h += *wp * *lp; | |
533 | h += '\n' * *lp; | |
534 | h %= p[i]; | |
535 | if(get(h)==0) | |
536 | return(0); | |
537 | } | |
538 | return(1); | |
539 | } |