Commit | Line | Data |
---|---|---|
42d6e430 BJ |
1 | #include "spell.h" |
2 | #define DLEV 2 | |
3 | ||
4 | char *strcat(); | |
5 | int strip(); | |
6 | char *skipv(); | |
7 | int an(); | |
8 | int s(); | |
9 | int es(); | |
10 | int ily(); | |
11 | int ncy(); | |
12 | int CCe(); | |
13 | int VCe(); | |
14 | int bility(); | |
15 | int tion(); | |
16 | int ize(); | |
17 | int y_to_e(); | |
18 | int i_to_y(); | |
19 | int nop(); | |
20 | int metry(); | |
21 | ||
22 | struct suftab { | |
23 | char *suf; | |
24 | int (*p1)(); | |
25 | int n1; | |
26 | char *d1; | |
27 | char *a1; | |
28 | int (*p2)(); | |
29 | int n2; | |
30 | char *d2; | |
31 | char *a2; | |
32 | } suftab[] = { | |
33 | {"ssen",ily,4,"-y+iness","+ness" }, | |
34 | {"ssel",ily,4,"-y+i+less","+less" }, | |
35 | {"se",s,1,"","+s", es,2,"-y+ies","+es" }, | |
36 | {"s'",s,2,"","+'s"}, | |
37 | {"s",s,1,"","+s"}, | |
38 | {"ecn",ncy,1,"","-t+ce"}, | |
39 | {"ycn",ncy,1,"","-cy+t"}, | |
40 | {"ytilb",nop,0,"",""}, | |
41 | {"ytilib",bility,5,"-le+ility",""}, | |
42 | {"elbaif",i_to_y,4,"-y+iable",""}, | |
43 | {"elba",CCe,4,"-e+able","+able"}, | |
44 | {"yti",CCe,3,"-e+ity","+ity"}, | |
45 | {"ylb",y_to_e,1,"-e+y",""}, | |
46 | {"yl",ily,2,"-y+ily","+ly"}, | |
47 | {"laci",strip,2,"","+al"}, | |
48 | {"latnem",strip,2,"","+al"}, | |
49 | {"lanoi",strip,2,"","+al"}, | |
50 | {"tnem",strip,4,"","+ment"}, | |
51 | {"gni",CCe,3,"-e+ing","+ing"}, | |
52 | {"reta",nop,0,"",""}, | |
53 | {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, | |
54 | {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, | |
55 | {"citsi",strip,2,"","+ic"}, | |
56 | {"cihparg",i_to_y,1,"-y+ic",""}, | |
57 | {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, | |
58 | {"cirtem",i_to_y,1,"-y+ic",""}, | |
59 | {"yrtem",metry,0,"-ry+er",""}, | |
60 | {"cigol",i_to_y,1,"-y+ic",""}, | |
61 | {"tsigol",i_to_y,2,"-y+ist",""}, | |
62 | {"tsi",VCe,3,"-e+ist","+ist"}, | |
63 | {"msi",VCe,3,"-e+ism","+ist"}, | |
64 | {"noitacif",i_to_y,6,"-y+ication",""}, | |
65 | {"noitazi",ize,5,"-e+ation",""}, | |
66 | {"rota",tion,2,"-e+or",""}, | |
67 | {"noit",tion,3,"-e+ion","+ion"}, | |
68 | {"naino",an,3,"","+ian"}, | |
69 | {"na",an,1,"","+n"}, | |
70 | {"evit",tion,3,"-e+ive","+ive"}, | |
71 | {"ezi",CCe,3,"-e+ize","+ize"}, | |
72 | {"pihs",strip,4,"","+ship"}, | |
73 | {"dooh",ily,4,"-y+hood","+hood"}, | |
74 | {"ekil",strip,4,"","+like"}, | |
75 | 0 | |
76 | }; | |
77 | ||
78 | char *preftab[] = { | |
79 | "anti", | |
80 | "bio", | |
81 | "dis", | |
82 | "electro", | |
83 | "en", | |
84 | "fore", | |
85 | "hyper", | |
86 | "intra", | |
87 | "inter", | |
88 | "iso", | |
89 | "kilo", | |
90 | "magneto", | |
91 | "meta", | |
92 | "micro", | |
93 | "milli", | |
94 | "mis", | |
95 | "mono", | |
96 | "multi", | |
97 | "non", | |
98 | "out", | |
99 | "over", | |
100 | "photo", | |
101 | "poly", | |
102 | "pre", | |
103 | "pseudo", | |
104 | "re", | |
105 | "semi", | |
106 | "stereo", | |
107 | "sub", | |
108 | "super", | |
109 | "thermo", | |
110 | "ultra", | |
111 | "under", /*must precede un*/ | |
112 | "un", | |
113 | 0 | |
114 | }; | |
115 | ||
116 | int vflag; | |
117 | int xflag; | |
118 | char word[100]; | |
119 | char original[100]; | |
120 | char *deriv[40]; | |
121 | char affix[40]; | |
122 | ||
123 | main(argc,argv) | |
124 | char **argv; | |
125 | { | |
126 | register char *ep, *cp; | |
127 | register char *dp; | |
128 | int fold; | |
129 | int j; | |
130 | FILE *file, *found; | |
131 | if(!prime(argc,argv)) { | |
132 | fprintf(stderr, | |
133 | "spell: cannot initialize hash table\n"); | |
134 | exit(1); | |
135 | } | |
136 | found = fopen(argv[2],"w"); | |
137 | for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) | |
138 | switch(argv[0][1]) { | |
139 | case 'b': | |
140 | ise(); | |
141 | break; | |
142 | case 'v': | |
143 | vflag++; | |
144 | break; | |
145 | case 'x': | |
146 | xflag++; | |
147 | break; | |
148 | } | |
149 | for(;; fprintf(file,"%s%s\n",affix,original)) { | |
150 | affix[0] = 0; | |
151 | file = found; | |
152 | for(ep=word;(*ep=j=getchar())!='\n';ep++) | |
153 | if(j == EOF) | |
154 | exit(0); | |
155 | for(cp=word,dp=original; cp<ep; ) | |
156 | *dp++ = *cp++; | |
157 | *dp = 0; | |
158 | fold = 0; | |
159 | for(cp=word;cp<ep;cp++) | |
160 | if(islower(*cp)) | |
161 | goto lcase; | |
162 | if(putsuf(ep,".",0)) | |
163 | continue; | |
164 | ++fold; | |
165 | for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) | |
166 | *dp = Tolower(*cp); | |
167 | lcase: | |
168 | if(putsuf(ep,".",0)||suffix(ep,0)) | |
169 | continue; | |
170 | if(isupper(word[0])) { | |
171 | for(cp=original,dp=word; *dp = *cp++; dp++) | |
172 | if (fold) *dp = Tolower(*dp); | |
173 | word[0] = Tolower(word[0]); | |
174 | goto lcase; | |
175 | } | |
176 | file = stdout; | |
177 | } | |
178 | } | |
179 | ||
180 | suffix(ep,lev) | |
181 | char *ep; | |
182 | { | |
183 | register struct suftab *t; | |
184 | register char *cp, *sp; | |
185 | lev += DLEV; | |
186 | deriv[lev] = deriv[lev-1] = 0; | |
187 | for(t= &suftab[0];sp=t->suf;t++) { | |
188 | cp = ep; | |
189 | while(*sp) | |
190 | if(*--cp!=*sp++) | |
191 | goto next; | |
192 | for(sp=cp; --sp>=word&&!vowel(*sp); ) ; | |
193 | if(sp<word) | |
194 | return(0); | |
195 | if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) | |
196 | return(1); | |
197 | if(t->p2!=0) { | |
198 | deriv[lev] = deriv[lev+1] = 0; | |
199 | return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); | |
200 | } | |
201 | return(0); | |
202 | next: ; | |
203 | } | |
204 | return(0); | |
205 | } | |
206 | ||
207 | nop() | |
208 | { | |
209 | return(0); | |
210 | } | |
211 | ||
212 | strip(ep,d,a,lev) | |
213 | char *ep,*d,*a; | |
214 | { | |
215 | return(putsuf(ep,a,lev)||suffix(ep,lev)); | |
216 | } | |
217 | ||
218 | s(ep,d,a,lev) | |
219 | char *ep,*d,*a; | |
220 | { | |
221 | if(lev>DLEV+1) | |
222 | return(0); | |
223 | if(*ep=='s'&&ep[-1]=='s') | |
224 | return(0); | |
225 | return(strip(ep,d,a,lev)); | |
226 | } | |
227 | ||
228 | an(ep,d,a,lev) | |
229 | char *ep,*d,*a; | |
230 | { | |
231 | if(!isupper(*word)) /*must be proper name*/ | |
232 | return(0); | |
233 | return(putsuf(ep,a,lev)); | |
234 | } | |
235 | ||
236 | ize(ep,d,a,lev) | |
237 | char *ep,*d,*a; | |
238 | { | |
239 | *ep++ = 'e'; | |
240 | return(strip(ep,"",d,lev)); | |
241 | } | |
242 | ||
243 | y_to_e(ep,d,a,lev) | |
244 | char *ep,*d,*a; | |
245 | { | |
246 | *ep++ = 'e'; | |
247 | return(strip(ep,"",d,lev)); | |
248 | } | |
249 | ||
250 | ily(ep,d,a,lev) | |
251 | char *ep,*d,*a; | |
252 | { | |
253 | if(ep[-1]=='i') | |
254 | return(i_to_y(ep,d,a,lev)); | |
255 | else | |
256 | return(strip(ep,d,a,lev)); | |
257 | } | |
258 | ||
259 | ncy(ep,d,a,lev) | |
260 | char *ep, *d, *a; | |
261 | { | |
262 | if(skipv(skipv(ep-1))<word) | |
263 | return(0); | |
264 | ep[-1] = 't'; | |
265 | return(strip(ep,d,a,lev)); | |
266 | } | |
267 | ||
268 | bility(ep,d,a,lev) | |
269 | char *ep,*d,*a; | |
270 | { | |
271 | *ep++ = 'l'; | |
272 | return(y_to_e(ep,d,a,lev)); | |
273 | } | |
274 | ||
275 | i_to_y(ep,d,a,lev) | |
276 | char *ep,*d,*a; | |
277 | { | |
278 | if(ep[-1]=='i') { | |
279 | ep[-1] = 'y'; | |
280 | a = d; | |
281 | } | |
282 | return(strip(ep,"",a,lev)); | |
283 | } | |
284 | ||
285 | es(ep,d,a,lev) | |
286 | char *ep,*d,*a; | |
287 | { | |
288 | if(lev>DLEV) | |
289 | return(0); | |
290 | switch(ep[-1]) { | |
291 | default: | |
292 | return(0); | |
293 | case 'i': | |
294 | return(i_to_y(ep,d,a,lev)); | |
295 | case 's': | |
296 | case 'h': | |
297 | case 'z': | |
298 | case 'x': | |
299 | return(strip(ep,d,a,lev)); | |
300 | } | |
301 | } | |
302 | ||
303 | metry(ep,d,a,lev) | |
304 | char *ep, *d,*a; | |
305 | { | |
306 | ep[-2] = 'e'; | |
307 | ep[-1] = 'r'; | |
308 | return(strip(ep,d,a,lev)); | |
309 | } | |
310 | ||
311 | tion(ep,d,a,lev) | |
312 | char *ep,*d,*a; | |
313 | { | |
314 | switch(ep[-2]) { | |
315 | case 'c': | |
316 | case 'r': | |
317 | return(putsuf(ep,a,lev)); | |
318 | case 'a': | |
319 | return(y_to_e(ep,d,a,lev)); | |
320 | } | |
321 | return(0); | |
322 | } | |
323 | ||
324 | /* possible consonant-consonant-e ending*/ | |
325 | CCe(ep,d,a,lev) | |
326 | char *ep,*d,*a; | |
327 | { | |
328 | switch(ep[-1]) { | |
329 | case 'l': | |
330 | if(vowel(ep[-2])) | |
331 | break; | |
332 | switch(ep[-2]) { | |
333 | case 'l': | |
334 | case 'r': | |
335 | case 'w': | |
336 | break; | |
337 | default: | |
338 | return(y_to_e(ep,d,a,lev)); | |
339 | } | |
340 | break; | |
341 | case 's': | |
342 | if(ep[-2]=='s') | |
343 | break; | |
344 | case 'c': | |
345 | case 'g': | |
346 | if(*ep=='a') | |
347 | return(0); | |
348 | case 'v': | |
349 | case 'z': | |
350 | if(vowel(ep[-2])) | |
351 | break; | |
352 | case 'u': | |
353 | if(y_to_e(ep,d,a,lev)) | |
354 | return(1); | |
355 | if(!(ep[-2]=='n'&&ep[-1]=='g')) | |
356 | return(0); | |
357 | } | |
358 | return(VCe(ep,d,a,lev)); | |
359 | } | |
360 | ||
361 | /* possible consonant-vowel-consonant-e ending*/ | |
362 | VCe(ep,d,a,lev) | |
363 | char *ep,*d,*a; | |
364 | { | |
365 | char c; | |
366 | c = ep[-1]; | |
367 | if(c=='e') | |
368 | return(0); | |
369 | if(!vowel(c) && vowel(ep[-2])) { | |
370 | c = *ep; | |
371 | *ep++ = 'e'; | |
372 | if(putsuf(ep,d,lev)||suffix(ep,lev)) | |
373 | return(1); | |
374 | ep--; | |
375 | *ep = c; | |
376 | } | |
377 | return(strip(ep,d,a,lev)); | |
378 | } | |
379 | ||
380 | char *lookuppref(wp,ep) | |
381 | char **wp; | |
382 | char *ep; | |
383 | { | |
384 | register char **sp; | |
385 | register char *bp,*cp; | |
386 | for(sp=preftab;*sp;sp++) { | |
387 | bp = *wp; | |
388 | for(cp= *sp;*cp;cp++,bp++) | |
389 | if(Tolower(*bp)!=*cp) | |
390 | goto next; | |
391 | for(cp=bp;cp<ep;cp++) | |
392 | if(vowel(*cp)) { | |
393 | *wp = bp; | |
394 | return(*sp); | |
395 | } | |
396 | next: ; | |
397 | } | |
398 | return(0); | |
399 | } | |
400 | ||
401 | putsuf(ep,a,lev) | |
402 | char *ep,*a; | |
403 | { | |
404 | register char *cp; | |
405 | char *bp; | |
406 | register char *pp; | |
407 | int val = 0; | |
408 | char space[20]; | |
409 | deriv[lev] = a; | |
410 | if(putw(word,ep,lev)) | |
411 | return(1); | |
412 | bp = word; | |
413 | pp = space; | |
414 | deriv[lev+1] = pp; | |
415 | while(cp=lookuppref(&bp,ep)) { | |
416 | *pp++ = '+'; | |
417 | while(*pp = *cp++) | |
418 | pp++; | |
419 | if(putw(bp,ep,lev+1)) { | |
420 | val = 1; | |
421 | break; | |
422 | } | |
423 | } | |
424 | deriv[lev+1] = deriv[lev+2] = 0; | |
425 | return(val); | |
426 | } | |
427 | ||
428 | putw(bp,ep,lev) | |
429 | char *bp,*ep; | |
430 | { | |
431 | register i, j; | |
432 | char duple[3]; | |
433 | if(ep-bp<=1) | |
434 | return(0); | |
435 | if(vowel(*ep)) { | |
436 | if(monosyl(bp,ep)) | |
437 | return(0); | |
438 | } | |
439 | i = dict(bp,ep); | |
440 | if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { | |
441 | ep--; | |
442 | deriv[++lev] = duple; | |
443 | duple[0] = '+'; | |
444 | duple[1] = *ep; | |
445 | duple[2] = 0; | |
446 | i = dict(bp,ep); | |
447 | } | |
448 | if(vflag==0||i==0) | |
449 | return(i); | |
450 | j = lev; | |
451 | do { | |
452 | if(deriv[j]) | |
453 | strcat(affix,deriv[j]); | |
454 | } while(--j>0); | |
455 | strcat(affix,"\t"); | |
456 | return(i); | |
457 | } | |
458 | ||
459 | ||
460 | monosyl(bp,ep) | |
461 | char *bp, *ep; | |
462 | { | |
463 | if(ep<bp+2) | |
464 | return(0); | |
465 | if(vowel(*--ep)||!vowel(*--ep) | |
466 | ||ep[1]=='x'||ep[1]=='w') | |
467 | return(0); | |
468 | while(--ep>=bp) | |
469 | if(vowel(*ep)) | |
470 | return(0); | |
471 | return(1); | |
472 | } | |
473 | ||
474 | char * | |
475 | skipv(s) | |
476 | char *s; | |
477 | { | |
478 | if(s>=word&&vowel(*s)) | |
479 | s--; | |
480 | while(s>=word&&!vowel(*s)) | |
481 | s--; | |
482 | return(s); | |
483 | } | |
484 | ||
485 | vowel(c) | |
486 | { | |
487 | switch(Tolower(c)) { | |
488 | case 'a': | |
489 | case 'e': | |
490 | case 'i': | |
491 | case 'o': | |
492 | case 'u': | |
493 | case 'y': | |
494 | return(1); | |
495 | } | |
496 | return(0); | |
497 | } | |
498 | ||
499 | /* crummy way to Britishise */ | |
500 | ise() | |
501 | { | |
502 | register struct suftab *p; | |
503 | for(p = suftab;p->suf;p++) { | |
504 | ztos(p->suf); | |
505 | ztos(p->d1); | |
506 | ztos(p->a1); | |
507 | } | |
508 | } | |
509 | ztos(s) | |
510 | char *s; | |
511 | { | |
512 | for(;*s;s++) | |
513 | if(*s=='z') | |
514 | *s = 's'; | |
515 | } | |
516 | ||
517 | dict(bp,ep) | |
518 | char *bp, *ep; | |
519 | { | |
520 | register char *wp; | |
521 | long h; | |
522 | register long *lp; | |
523 | register i; | |
524 | if(xflag) | |
525 | printf("=%.*s\n",ep-bp,bp); | |
526 | for(i=0; i<NP; i++) { | |
527 | for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) | |
528 | h += *wp * *lp; | |
529 | h += '\n' * *lp; | |
530 | h %= p[i]; | |
531 | if(get(h)==0) | |
532 | return(0); | |
533 | } | |
534 | return(1); | |
535 | } |