Commit | Line | Data |
---|---|---|
1acdf60c SJ |
1 | #include "spell.h" |
2 | #define DLEV 2 | |
3 | ||
4 | char *strcat(); | |
5 | int strip(); | |
6 | char *skipv(); | |
7 | int an(); | |
8 | int s(); | |
9 | int es(); | |
10 | int ily(); | |
11 | int ncy(); | |
12 | int CCe(); | |
13 | int VCe(); | |
14 | int bility(); | |
15 | int tion(); | |
16 | int ize(); | |
17 | int y_to_e(); | |
18 | int i_to_y(); | |
19 | int nop(); | |
20 | int metry(); | |
21 | ||
22 | struct suftab { | |
23 | char *suf; | |
24 | int (*p1)(); | |
25 | int n1; | |
26 | char *d1; | |
27 | char *a1; | |
28 | int (*p2)(); | |
29 | int n2; | |
30 | char *d2; | |
31 | char *a2; | |
32 | } suftab[] = { | |
33 | {"ssen",ily,4,"-y+iness","+ness" }, | |
34 | {"ssel",ily,4,"-y+i+less","+less" }, | |
35 | {"se",s,1,"","+s", es,2,"-y+ies","+es" }, | |
36 | {"s'",s,2,"","+'s"}, | |
37 | {"s",s,1,"","+s"}, | |
38 | {"ecn",ncy,1,"","-t+ce"}, | |
39 | {"ycn",ncy,1,"","-cy+t"}, | |
40 | {"ytilb",nop,0,"",""}, | |
41 | {"ytilib",bility,5,"-le+ility",""}, | |
42 | {"elbaif",i_to_y,4,"-y+iable",""}, | |
43 | {"elba",CCe,4,"-e+able","+able"}, | |
44 | {"yti",CCe,3,"-e+ity","+ity"}, | |
45 | {"ylb",y_to_e,1,"-e+y",""}, | |
46 | {"yl",ily,2,"-y+ily","+ly"}, | |
47 | {"laci",strip,2,"","+al"}, | |
48 | {"latnem",strip,2,"","+al"}, | |
49 | {"lanoi",strip,2,"","+al"}, | |
50 | {"tnem",strip,4,"","+ment"}, | |
51 | {"gni",CCe,3,"-e+ing","+ing"}, | |
52 | {"reta",nop,0,"",""}, | |
53 | {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, | |
54 | {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, | |
55 | {"citsi",strip,2,"","+ic"}, | |
56 | {"cihparg",i_to_y,1,"-y+ic",""}, | |
57 | {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, | |
58 | {"cirtem",i_to_y,1,"-y+ic",""}, | |
59 | {"yrtem",metry,0,"-ry+er",""}, | |
60 | {"cigol",i_to_y,1,"-y+ic",""}, | |
61 | {"tsigol",i_to_y,2,"-y+ist",""}, | |
62 | {"tsi",VCe,3,"-e+ist","+ist"}, | |
63 | {"msi",VCe,3,"-e+ism","+ist"}, | |
64 | {"noitacif",i_to_y,6,"-y+ication",""}, | |
65 | {"noitazi",ize,5,"-e+ation",""}, | |
66 | {"rota",tion,2,"-e+or",""}, | |
67 | {"noit",tion,3,"-e+ion","+ion"}, | |
68 | {"naino",an,3,"","+ian"}, | |
69 | {"na",an,1,"","+n"}, | |
70 | {"evit",tion,3,"-e+ive","+ive"}, | |
71 | {"ezi",CCe,3,"-e+ize","+ize"}, | |
72 | {"pihs",strip,4,"","+ship"}, | |
73 | {"dooh",ily,4,"-y+ihood","+hood"}, | |
74 | {"luf",ily,3,"-y+iful","+ful"}, | |
75 | {"ekil",strip,4,"","+like"}, | |
76 | 0 | |
77 | }; | |
78 | ||
79 | char *preftab[] = { | |
80 | "anti", | |
81 | "bio", | |
82 | "dis", | |
83 | "electro", | |
84 | "en", | |
85 | "fore", | |
86 | "hyper", | |
87 | "intra", | |
88 | "inter", | |
89 | "iso", | |
90 | "kilo", | |
91 | "magneto", | |
92 | "meta", | |
93 | "micro", | |
94 | "milli", | |
95 | "mis", | |
96 | "mono", | |
97 | "multi", | |
98 | "non", | |
99 | "out", | |
100 | "over", | |
101 | "photo", | |
102 | "poly", | |
103 | "pre", | |
104 | "pseudo", | |
105 | "re", | |
106 | "semi", | |
107 | "stereo", | |
108 | "sub", | |
109 | "super", | |
110 | "thermo", | |
111 | "ultra", | |
112 | "under", /*must precede un*/ | |
113 | "un", | |
114 | 0 | |
115 | }; | |
116 | ||
117 | int vflag; | |
118 | int xflag; | |
119 | char word[100]; | |
120 | char original[100]; | |
121 | char *deriv[40]; | |
122 | char affix[40]; | |
123 | ||
124 | main(argc,argv) | |
125 | char **argv; | |
126 | { | |
127 | register char *ep, *cp; | |
128 | register char *dp; | |
129 | int fold; | |
130 | int j; | |
131 | FILE *file, *found; | |
132 | if(!prime(argc,argv)) { | |
133 | fprintf(stderr, | |
134 | "spell: cannot initialize hash table\n"); | |
135 | exit(1); | |
136 | } | |
137 | found = fopen(argv[2],"w"); | |
138 | for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) | |
139 | switch(argv[0][1]) { | |
140 | case 'b': | |
141 | ise(); | |
142 | break; | |
143 | case 'v': | |
144 | vflag++; | |
145 | break; | |
146 | case 'x': | |
147 | xflag++; | |
148 | break; | |
149 | } | |
150 | for(;; fprintf(file,"%s%s\n",affix,original)) { | |
151 | affix[0] = 0; | |
152 | file = found; | |
153 | for(ep=word;(*ep=j=getchar())!='\n';ep++) | |
154 | if(j == EOF) | |
155 | exit(0); | |
156 | for(cp=word,dp=original; cp<ep; ) | |
157 | *dp++ = *cp++; | |
158 | *dp = 0; | |
159 | fold = 0; | |
160 | for(cp=word;cp<ep;cp++) | |
161 | if(islower(*cp)) | |
162 | goto lcase; | |
163 | if(putsuf(ep,".",0)) | |
164 | continue; | |
165 | ++fold; | |
166 | for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) | |
167 | *dp = Tolower(*cp); | |
168 | lcase: | |
169 | if(putsuf(ep,".",0)||suffix(ep,0)) | |
170 | continue; | |
171 | if(isupper(word[0])) { | |
172 | for(cp=original,dp=word; *dp = *cp++; dp++) | |
173 | if (fold) *dp = Tolower(*dp); | |
174 | word[0] = Tolower(word[0]); | |
175 | goto lcase; | |
176 | } | |
177 | file = stdout; | |
178 | } | |
179 | } | |
180 | ||
181 | suffix(ep,lev) | |
182 | char *ep; | |
183 | { | |
184 | register struct suftab *t; | |
185 | register char *cp, *sp; | |
186 | lev += DLEV; | |
187 | deriv[lev] = deriv[lev-1] = 0; | |
188 | for(t= &suftab[0];sp=t->suf;t++) { | |
189 | cp = ep; | |
190 | while(*sp) | |
191 | if(*--cp!=*sp++) | |
192 | goto next; | |
193 | for(sp=cp; --sp>=word&&!vowel(*sp); ) ; | |
194 | if(sp<word) | |
195 | return(0); | |
196 | if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) | |
197 | return(1); | |
198 | if(t->p2!=0) { | |
199 | deriv[lev] = deriv[lev+1] = 0; | |
200 | return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); | |
201 | } | |
202 | return(0); | |
203 | next: ; | |
204 | } | |
205 | return(0); | |
206 | } | |
207 | ||
208 | nop() | |
209 | { | |
210 | return(0); | |
211 | } | |
212 | ||
213 | strip(ep,d,a,lev) | |
214 | char *ep,*d,*a; | |
215 | { | |
216 | return(putsuf(ep,a,lev)||suffix(ep,lev)); | |
217 | } | |
218 | ||
219 | s(ep,d,a,lev) | |
220 | char *ep,*d,*a; | |
221 | { | |
222 | if(lev>DLEV+1) | |
223 | return(0); | |
224 | if(*ep=='s'&&ep[-1]=='s') | |
225 | return(0); | |
226 | return(strip(ep,d,a,lev)); | |
227 | } | |
228 | ||
229 | an(ep,d,a,lev) | |
230 | char *ep,*d,*a; | |
231 | { | |
232 | if(!isupper(*word)) /*must be proper name*/ | |
233 | return(0); | |
234 | return(putsuf(ep,a,lev)); | |
235 | } | |
236 | ||
237 | ize(ep,d,a,lev) | |
238 | char *ep,*d,*a; | |
239 | { | |
240 | *ep++ = 'e'; | |
241 | return(strip(ep,"",d,lev)); | |
242 | } | |
243 | ||
244 | y_to_e(ep,d,a,lev) | |
245 | char *ep,*d,*a; | |
246 | { | |
247 | *ep++ = 'e'; | |
248 | return(strip(ep,"",d,lev)); | |
249 | } | |
250 | ||
251 | ily(ep,d,a,lev) | |
252 | char *ep,*d,*a; | |
253 | { | |
254 | if(ep[-1]=='i') | |
255 | return(i_to_y(ep,d,a,lev)); | |
256 | else | |
257 | return(strip(ep,d,a,lev)); | |
258 | } | |
259 | ||
260 | ncy(ep,d,a,lev) | |
261 | char *ep, *d, *a; | |
262 | { | |
263 | if(skipv(skipv(ep-1))<word) | |
264 | return(0); | |
265 | ep[-1] = 't'; | |
266 | return(strip(ep,d,a,lev)); | |
267 | } | |
268 | ||
269 | bility(ep,d,a,lev) | |
270 | char *ep,*d,*a; | |
271 | { | |
272 | *ep++ = 'l'; | |
273 | return(y_to_e(ep,d,a,lev)); | |
274 | } | |
275 | ||
276 | i_to_y(ep,d,a,lev) | |
277 | char *ep,*d,*a; | |
278 | { | |
279 | if(ep[-1]=='i') { | |
280 | ep[-1] = 'y'; | |
281 | a = d; | |
282 | } | |
283 | return(strip(ep,"",a,lev)); | |
284 | } | |
285 | ||
286 | es(ep,d,a,lev) | |
287 | char *ep,*d,*a; | |
288 | { | |
289 | if(lev>DLEV) | |
290 | return(0); | |
291 | switch(ep[-1]) { | |
292 | default: | |
293 | return(0); | |
294 | case 'i': | |
295 | return(i_to_y(ep,d,a,lev)); | |
296 | case 's': | |
297 | case 'h': | |
298 | case 'z': | |
299 | case 'x': | |
300 | return(strip(ep,d,a,lev)); | |
301 | } | |
302 | } | |
303 | ||
304 | metry(ep,d,a,lev) | |
305 | char *ep, *d,*a; | |
306 | { | |
307 | ep[-2] = 'e'; | |
308 | ep[-1] = 'r'; | |
309 | return(strip(ep,d,a,lev)); | |
310 | } | |
311 | ||
312 | tion(ep,d,a,lev) | |
313 | char *ep,*d,*a; | |
314 | { | |
315 | switch(ep[-2]) { | |
316 | case 'c': | |
317 | case 'r': | |
318 | return(putsuf(ep,a,lev)); | |
319 | case 'a': | |
320 | return(y_to_e(ep,d,a,lev)); | |
321 | } | |
322 | return(0); | |
323 | } | |
324 | ||
325 | /* possible consonant-consonant-e ending*/ | |
326 | CCe(ep,d,a,lev) | |
327 | char *ep,*d,*a; | |
328 | { | |
329 | switch(ep[-1]) { | |
330 | case 'l': | |
331 | if(vowel(ep[-2])) | |
332 | break; | |
333 | switch(ep[-2]) { | |
334 | case 'l': | |
335 | case 'r': | |
336 | case 'w': | |
337 | break; | |
338 | default: | |
339 | return(y_to_e(ep,d,a,lev)); | |
340 | } | |
341 | break; | |
342 | case 's': | |
343 | if(ep[-2]=='s') | |
344 | break; | |
345 | case 'c': | |
346 | case 'g': | |
347 | if(*ep=='a') | |
348 | return(0); | |
349 | case 'v': | |
350 | case 'z': | |
351 | if(vowel(ep[-2])) | |
352 | break; | |
353 | case 'u': | |
354 | if(y_to_e(ep,d,a,lev)) | |
355 | return(1); | |
356 | if(!(ep[-2]=='n'&&ep[-1]=='g')) | |
357 | return(0); | |
358 | } | |
359 | return(VCe(ep,d,a,lev)); | |
360 | } | |
361 | ||
362 | /* possible consonant-vowel-consonant-e ending*/ | |
363 | VCe(ep,d,a,lev) | |
364 | char *ep,*d,*a; | |
365 | { | |
366 | char c; | |
367 | c = ep[-1]; | |
368 | if(c=='e') | |
369 | return(0); | |
370 | if(!vowel(c) && vowel(ep[-2])) { | |
371 | c = *ep; | |
372 | *ep++ = 'e'; | |
373 | if(putsuf(ep,d,lev)||suffix(ep,lev)) | |
374 | return(1); | |
375 | ep--; | |
376 | *ep = c; | |
377 | } | |
378 | return(strip(ep,d,a,lev)); | |
379 | } | |
380 | ||
381 | char *lookuppref(wp,ep) | |
382 | char **wp; | |
383 | char *ep; | |
384 | { | |
385 | register char **sp; | |
386 | register char *bp,*cp; | |
387 | for(sp=preftab;*sp;sp++) { | |
388 | bp = *wp; | |
389 | for(cp= *sp;*cp;cp++,bp++) | |
390 | if(Tolower(*bp)!=*cp) | |
391 | goto next; | |
392 | for(cp=bp;cp<ep;cp++) | |
393 | if(vowel(*cp)) { | |
394 | *wp = bp; | |
395 | return(*sp); | |
396 | } | |
397 | next: ; | |
398 | } | |
399 | return(0); | |
400 | } | |
401 | ||
402 | putsuf(ep,a,lev) | |
403 | char *ep,*a; | |
404 | { | |
405 | register char *cp; | |
406 | char *bp; | |
407 | register char *pp; | |
408 | int val = 0; | |
409 | char space[20]; | |
410 | deriv[lev] = a; | |
411 | if(putw(word,ep,lev)) | |
412 | return(1); | |
413 | bp = word; | |
414 | pp = space; | |
415 | deriv[lev+1] = pp; | |
416 | while(cp=lookuppref(&bp,ep)) { | |
417 | *pp++ = '+'; | |
418 | while(*pp = *cp++) | |
419 | pp++; | |
420 | if(putw(bp,ep,lev+1)) { | |
421 | val = 1; | |
422 | break; | |
423 | } | |
424 | } | |
425 | deriv[lev+1] = deriv[lev+2] = 0; | |
426 | return(val); | |
427 | } | |
428 | ||
429 | putw(bp,ep,lev) | |
430 | char *bp,*ep; | |
431 | { | |
432 | register i, j; | |
433 | char duple[3]; | |
434 | if(ep-bp<=1) | |
435 | return(0); | |
436 | if(vowel(*ep)) { | |
437 | if(monosyl(bp,ep)) | |
438 | return(0); | |
439 | } | |
440 | i = dict(bp,ep); | |
441 | if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { | |
442 | ep--; | |
443 | deriv[++lev] = duple; | |
444 | duple[0] = '+'; | |
445 | duple[1] = *ep; | |
446 | duple[2] = 0; | |
447 | i = dict(bp,ep); | |
448 | } | |
449 | if(vflag==0||i==0) | |
450 | return(i); | |
451 | j = lev; | |
452 | do { | |
453 | if(deriv[j]) | |
454 | strcat(affix,deriv[j]); | |
455 | } while(--j>0); | |
456 | strcat(affix,"\t"); | |
457 | return(i); | |
458 | } | |
459 | ||
460 | ||
461 | monosyl(bp,ep) | |
462 | char *bp, *ep; | |
463 | { | |
464 | if(ep<bp+2) | |
465 | return(0); | |
466 | if(vowel(*--ep)||!vowel(*--ep) | |
467 | ||ep[1]=='x'||ep[1]=='w') | |
468 | return(0); | |
469 | while(--ep>=bp) | |
470 | if(vowel(*ep)) | |
471 | return(0); | |
472 | return(1); | |
473 | } | |
474 | ||
475 | char * | |
476 | skipv(s) | |
477 | char *s; | |
478 | { | |
479 | if(s>=word&&vowel(*s)) | |
480 | s--; | |
481 | while(s>=word&&!vowel(*s)) | |
482 | s--; | |
483 | return(s); | |
484 | } | |
485 | ||
486 | vowel(c) | |
487 | { | |
488 | switch(Tolower(c)) { | |
489 | case 'a': | |
490 | case 'e': | |
491 | case 'i': | |
492 | case 'o': | |
493 | case 'u': | |
494 | case 'y': | |
495 | return(1); | |
496 | } | |
497 | return(0); | |
498 | } | |
499 | ||
500 | /* crummy way to Britishise */ | |
501 | ise() | |
502 | { | |
503 | register struct suftab *p; | |
504 | for(p = suftab;p->suf;p++) { | |
505 | ztos(p->suf); | |
506 | ztos(p->d1); | |
507 | ztos(p->a1); | |
508 | } | |
509 | } | |
510 | ztos(s) | |
511 | char *s; | |
512 | { | |
513 | for(;*s;s++) | |
514 | if(*s=='z') | |
515 | *s = 's'; | |
516 | } | |
517 | ||
518 | dict(bp,ep) | |
519 | char *bp, *ep; | |
520 | { | |
521 | register char *wp; | |
522 | long h; | |
523 | register long *lp; | |
524 | register i; | |
525 | if(xflag) | |
526 | printf("=%.*s\n",ep-bp,bp); | |
527 | for(i=0; i<NP; i++) { | |
528 | for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) | |
529 | h += *wp * *lp; | |
530 | h += '\n' * *lp; | |
531 | h %= p[i]; | |
532 | if(get(h)==0) | |
533 | return(0); | |
534 | } | |
535 | return(1); | |
536 | } |