date and time created 83/02/11 15:44:08 by rrh
[unix-history] / usr / src / usr.bin / spell / spell.c
CommitLineData
a36a91d2
SL
1#ifndef lint
2static char sccsid[] = "@(#)spell.c 4.1 %G%";
3#endif
4
5#include "spell.h"
6#define DLEV 2
7
8char *strcat();
9int strip();
10char *skipv();
11int an();
12int s();
13int es();
14int ily();
15int ncy();
16int CCe();
17int VCe();
18int bility();
19int tion();
20int ize();
21int y_to_e();
22int i_to_y();
23int nop();
24int metry();
25
26struct suftab {
27 char *suf;
28 int (*p1)();
29 int n1;
30 char *d1;
31 char *a1;
32 int (*p2)();
33 int n2;
34 char *d2;
35 char *a2;
36} suftab[] = {
37 {"ssen",ily,4,"-y+iness","+ness" },
38 {"ssel",ily,4,"-y+i+less","+less" },
39 {"se",s,1,"","+s", es,2,"-y+ies","+es" },
40 {"s'",s,2,"","+'s"},
41 {"s",s,1,"","+s"},
42 {"ecn",ncy,1,"","-t+ce"},
43 {"ycn",ncy,1,"","-cy+t"},
44 {"ytilb",nop,0,"",""},
45 {"ytilib",bility,5,"-le+ility",""},
46 {"elbaif",i_to_y,4,"-y+iable",""},
47 {"elba",CCe,4,"-e+able","+able"},
48 {"yti",CCe,3,"-e+ity","+ity"},
49 {"ylb",y_to_e,1,"-e+y",""},
50 {"yl",ily,2,"-y+ily","+ly"},
51 {"laci",strip,2,"","+al"},
52 {"latnem",strip,2,"","+al"},
53 {"lanoi",strip,2,"","+al"},
54 {"tnem",strip,4,"","+ment"},
55 {"gni",CCe,3,"-e+ing","+ing"},
56 {"reta",nop,0,"",""},
57 {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"},
58 {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"},
59 {"citsi",strip,2,"","+ic"},
60 {"cihparg",i_to_y,1,"-y+ic",""},
61 {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"},
62 {"cirtem",i_to_y,1,"-y+ic",""},
63 {"yrtem",metry,0,"-ry+er",""},
64 {"cigol",i_to_y,1,"-y+ic",""},
65 {"tsigol",i_to_y,2,"-y+ist",""},
66 {"tsi",VCe,3,"-e+ist","+ist"},
67 {"msi",VCe,3,"-e+ism","+ist"},
68 {"noitacif",i_to_y,6,"-y+ication",""},
69 {"noitazi",ize,5,"-e+ation",""},
70 {"rota",tion,2,"-e+or",""},
71 {"noit",tion,3,"-e+ion","+ion"},
72 {"naino",an,3,"","+ian"},
73 {"na",an,1,"","+n"},
74 {"evit",tion,3,"-e+ive","+ive"},
75 {"ezi",CCe,3,"-e+ize","+ize"},
76 {"pihs",strip,4,"","+ship"},
77 {"dooh",ily,4,"-y+hood","+hood"},
78 {"ekil",strip,4,"","+like"},
79 0
80};
81
82char *preftab[] = {
83 "anti",
84 "bio",
85 "dis",
86 "electro",
87 "en",
88 "fore",
89 "hyper",
90 "intra",
91 "inter",
92 "iso",
93 "kilo",
94 "magneto",
95 "meta",
96 "micro",
97 "milli",
98 "mis",
99 "mono",
100 "multi",
101 "non",
102 "out",
103 "over",
104 "photo",
105 "poly",
106 "pre",
107 "pseudo",
108 "re",
109 "semi",
110 "stereo",
111 "sub",
112 "super",
113 "thermo",
114 "ultra",
115 "under", /*must precede un*/
116 "un",
117 0
118};
119
120int vflag;
121int xflag;
122char word[100];
123char original[100];
124char *deriv[40];
125char affix[40];
126
127main(argc,argv)
128char **argv;
129{
130 register char *ep, *cp;
131 register char *dp;
132 int fold;
133 int j;
134 FILE *file, *found;
135 if(!prime(argc,argv)) {
136 fprintf(stderr,
137 "spell: cannot initialize hash table\n");
138 exit(1);
139 }
140 found = fopen(argv[2],"w");
141 for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
142 switch(argv[0][1]) {
143 case 'b':
144 ise();
145 break;
146 case 'v':
147 vflag++;
148 break;
149 case 'x':
150 xflag++;
151 break;
152 }
153 for(;; fprintf(file,"%s%s\n",affix,original)) {
154 affix[0] = 0;
155 file = found;
156 for(ep=word;(*ep=j=getchar())!='\n';ep++)
157 if(j == EOF)
158 exit(0);
159 for(cp=word,dp=original; cp<ep; )
160 *dp++ = *cp++;
161 *dp = 0;
162 fold = 0;
163 for(cp=word;cp<ep;cp++)
164 if(islower(*cp))
165 goto lcase;
166 if(putsuf(ep,".",0))
167 continue;
168 ++fold;
169 for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
170 *dp = Tolower(*cp);
171lcase:
172 if(putsuf(ep,".",0)||suffix(ep,0))
173 continue;
174 if(isupper(word[0])) {
175 for(cp=original,dp=word; *dp = *cp++; dp++)
176 if (fold) *dp = Tolower(*dp);
177 word[0] = Tolower(word[0]);
178 goto lcase;
179 }
180 file = stdout;
181 }
182}
183
184suffix(ep,lev)
185char *ep;
186{
187 register struct suftab *t;
188 register char *cp, *sp;
189 lev += DLEV;
190 deriv[lev] = deriv[lev-1] = 0;
191 for(t= &suftab[0];sp=t->suf;t++) {
192 cp = ep;
193 while(*sp)
194 if(*--cp!=*sp++)
195 goto next;
196 for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
197 if(sp<word)
198 return(0);
199 if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
200 return(1);
201 if(t->p2!=0) {
202 deriv[lev] = deriv[lev+1] = 0;
203 return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
204 }
205 return(0);
206next: ;
207 }
208 return(0);
209}
210
211nop()
212{
213 return(0);
214}
215
216strip(ep,d,a,lev)
217char *ep,*d,*a;
218{
219 return(putsuf(ep,a,lev)||suffix(ep,lev));
220}
221
222s(ep,d,a,lev)
223char *ep,*d,*a;
224{
225 if(lev>DLEV+1)
226 return(0);
227 if(*ep=='s'&&ep[-1]=='s')
228 return(0);
229 return(strip(ep,d,a,lev));
230}
231
232an(ep,d,a,lev)
233char *ep,*d,*a;
234{
235 if(!isupper(*word)) /*must be proper name*/
236 return(0);
237 return(putsuf(ep,a,lev));
238}
239
240ize(ep,d,a,lev)
241char *ep,*d,*a;
242{
243 *ep++ = 'e';
244 return(strip(ep,"",d,lev));
245}
246
247y_to_e(ep,d,a,lev)
248char *ep,*d,*a;
249{
250 *ep++ = 'e';
251 return(strip(ep,"",d,lev));
252}
253
254ily(ep,d,a,lev)
255char *ep,*d,*a;
256{
257 if(ep[-1]=='i')
258 return(i_to_y(ep,d,a,lev));
259 else
260 return(strip(ep,d,a,lev));
261}
262
263ncy(ep,d,a,lev)
264char *ep, *d, *a;
265{
266 if(skipv(skipv(ep-1))<word)
267 return(0);
268 ep[-1] = 't';
269 return(strip(ep,d,a,lev));
270}
271
272bility(ep,d,a,lev)
273char *ep,*d,*a;
274{
275 *ep++ = 'l';
276 return(y_to_e(ep,d,a,lev));
277}
278
279i_to_y(ep,d,a,lev)
280char *ep,*d,*a;
281{
282 if(ep[-1]=='i') {
283 ep[-1] = 'y';
284 a = d;
285 }
286 return(strip(ep,"",a,lev));
287}
288
289es(ep,d,a,lev)
290char *ep,*d,*a;
291{
292 if(lev>DLEV)
293 return(0);
294 switch(ep[-1]) {
295 default:
296 return(0);
297 case 'i':
298 return(i_to_y(ep,d,a,lev));
299 case 's':
300 case 'h':
301 case 'z':
302 case 'x':
303 return(strip(ep,d,a,lev));
304 }
305}
306
307metry(ep,d,a,lev)
308char *ep, *d,*a;
309{
310 ep[-2] = 'e';
311 ep[-1] = 'r';
312 return(strip(ep,d,a,lev));
313}
314
315tion(ep,d,a,lev)
316char *ep,*d,*a;
317{
318 switch(ep[-2]) {
319 case 'c':
320 case 'r':
321 return(putsuf(ep,a,lev));
322 case 'a':
323 return(y_to_e(ep,d,a,lev));
324 }
325 return(0);
326}
327
328/* possible consonant-consonant-e ending*/
329CCe(ep,d,a,lev)
330char *ep,*d,*a;
331{
332 switch(ep[-1]) {
333 case 'l':
334 if(vowel(ep[-2]))
335 break;
336 switch(ep[-2]) {
337 case 'l':
338 case 'r':
339 case 'w':
340 break;
341 default:
342 return(y_to_e(ep,d,a,lev));
343 }
344 break;
345 case 's':
346 if(ep[-2]=='s')
347 break;
348 case 'c':
349 case 'g':
350 if(*ep=='a')
351 return(0);
352 case 'v':
353 case 'z':
354 if(vowel(ep[-2]))
355 break;
356 case 'u':
357 if(y_to_e(ep,d,a,lev))
358 return(1);
359 if(!(ep[-2]=='n'&&ep[-1]=='g'))
360 return(0);
361 }
362 return(VCe(ep,d,a,lev));
363}
364
365/* possible consonant-vowel-consonant-e ending*/
366VCe(ep,d,a,lev)
367char *ep,*d,*a;
368{
369 char c;
370 c = ep[-1];
371 if(c=='e')
372 return(0);
373 if(!vowel(c) && vowel(ep[-2])) {
374 c = *ep;
375 *ep++ = 'e';
376 if(putsuf(ep,d,lev)||suffix(ep,lev))
377 return(1);
378 ep--;
379 *ep = c;
380 }
381 return(strip(ep,d,a,lev));
382}
383
384char *lookuppref(wp,ep)
385char **wp;
386char *ep;
387{
388 register char **sp;
389 register char *bp,*cp;
390 for(sp=preftab;*sp;sp++) {
391 bp = *wp;
392 for(cp= *sp;*cp;cp++,bp++)
393 if(Tolower(*bp)!=*cp)
394 goto next;
395 for(cp=bp;cp<ep;cp++)
396 if(vowel(*cp)) {
397 *wp = bp;
398 return(*sp);
399 }
400next: ;
401 }
402 return(0);
403}
404
405putsuf(ep,a,lev)
406char *ep,*a;
407{
408 register char *cp;
409 char *bp;
410 register char *pp;
411 int val = 0;
412 char space[20];
413 deriv[lev] = a;
414 if(putw(word,ep,lev))
415 return(1);
416 bp = word;
417 pp = space;
418 deriv[lev+1] = pp;
419 while(cp=lookuppref(&bp,ep)) {
420 *pp++ = '+';
421 while(*pp = *cp++)
422 pp++;
423 if(putw(bp,ep,lev+1)) {
424 val = 1;
425 break;
426 }
427 }
428 deriv[lev+1] = deriv[lev+2] = 0;
429 return(val);
430}
431
432putw(bp,ep,lev)
433char *bp,*ep;
434{
435 register i, j;
436 char duple[3];
437 if(ep-bp<=1)
438 return(0);
439 if(vowel(*ep)) {
440 if(monosyl(bp,ep))
441 return(0);
442 }
443 i = dict(bp,ep);
444 if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
445 ep--;
446 deriv[++lev] = duple;
447 duple[0] = '+';
448 duple[1] = *ep;
449 duple[2] = 0;
450 i = dict(bp,ep);
451 }
452 if(vflag==0||i==0)
453 return(i);
454 j = lev;
455 do {
456 if(deriv[j])
457 strcat(affix,deriv[j]);
458 } while(--j>0);
459 strcat(affix,"\t");
460 return(i);
461}
462
463
464monosyl(bp,ep)
465char *bp, *ep;
466{
467 if(ep<bp+2)
468 return(0);
469 if(vowel(*--ep)||!vowel(*--ep)
470 ||ep[1]=='x'||ep[1]=='w')
471 return(0);
472 while(--ep>=bp)
473 if(vowel(*ep))
474 return(0);
475 return(1);
476}
477
478char *
479skipv(s)
480char *s;
481{
482 if(s>=word&&vowel(*s))
483 s--;
484 while(s>=word&&!vowel(*s))
485 s--;
486 return(s);
487}
488
489vowel(c)
490{
491 switch(Tolower(c)) {
492 case 'a':
493 case 'e':
494 case 'i':
495 case 'o':
496 case 'u':
497 case 'y':
498 return(1);
499 }
500 return(0);
501}
502
503/* crummy way to Britishise */
504ise()
505{
506 register struct suftab *p;
507 for(p = suftab;p->suf;p++) {
508 ztos(p->suf);
509 ztos(p->d1);
510 ztos(p->a1);
511 }
512}
513ztos(s)
514char *s;
515{
516 for(;*s;s++)
517 if(*s=='z')
518 *s = 's';
519}
520
521dict(bp,ep)
522char *bp, *ep;
523{
524 register char *wp;
525 long h;
526 register long *lp;
527 register i;
528 if(xflag)
529 printf("=%.*s\n",ep-bp,bp);
530 for(i=0; i<NP; i++) {
531 for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
532 h += *wp * *lp;
533 h += '\n' * *lp;
534 h %= p[i];
535 if(get(h)==0)
536 return(0);
537 }
538 return(1);
539}