BSD 3 development
[unix-history] / usr / src / cmd / spell / spell.c
CommitLineData
42d6e430
BJ
1#include "spell.h"
2#define DLEV 2
3
4char *strcat();
5int strip();
6char *skipv();
7int an();
8int s();
9int es();
10int ily();
11int ncy();
12int CCe();
13int VCe();
14int bility();
15int tion();
16int ize();
17int y_to_e();
18int i_to_y();
19int nop();
20int metry();
21
22struct suftab {
23 char *suf;
24 int (*p1)();
25 int n1;
26 char *d1;
27 char *a1;
28 int (*p2)();
29 int n2;
30 char *d2;
31 char *a2;
32} suftab[] = {
33 {"ssen",ily,4,"-y+iness","+ness" },
34 {"ssel",ily,4,"-y+i+less","+less" },
35 {"se",s,1,"","+s", es,2,"-y+ies","+es" },
36 {"s'",s,2,"","+'s"},
37 {"s",s,1,"","+s"},
38 {"ecn",ncy,1,"","-t+ce"},
39 {"ycn",ncy,1,"","-cy+t"},
40 {"ytilb",nop,0,"",""},
41 {"ytilib",bility,5,"-le+ility",""},
42 {"elbaif",i_to_y,4,"-y+iable",""},
43 {"elba",CCe,4,"-e+able","+able"},
44 {"yti",CCe,3,"-e+ity","+ity"},
45 {"ylb",y_to_e,1,"-e+y",""},
46 {"yl",ily,2,"-y+ily","+ly"},
47 {"laci",strip,2,"","+al"},
48 {"latnem",strip,2,"","+al"},
49 {"lanoi",strip,2,"","+al"},
50 {"tnem",strip,4,"","+ment"},
51 {"gni",CCe,3,"-e+ing","+ing"},
52 {"reta",nop,0,"",""},
53 {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"},
54 {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"},
55 {"citsi",strip,2,"","+ic"},
56 {"cihparg",i_to_y,1,"-y+ic",""},
57 {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"},
58 {"cirtem",i_to_y,1,"-y+ic",""},
59 {"yrtem",metry,0,"-ry+er",""},
60 {"cigol",i_to_y,1,"-y+ic",""},
61 {"tsigol",i_to_y,2,"-y+ist",""},
62 {"tsi",VCe,3,"-e+ist","+ist"},
63 {"msi",VCe,3,"-e+ism","+ist"},
64 {"noitacif",i_to_y,6,"-y+ication",""},
65 {"noitazi",ize,5,"-e+ation",""},
66 {"rota",tion,2,"-e+or",""},
67 {"noit",tion,3,"-e+ion","+ion"},
68 {"naino",an,3,"","+ian"},
69 {"na",an,1,"","+n"},
70 {"evit",tion,3,"-e+ive","+ive"},
71 {"ezi",CCe,3,"-e+ize","+ize"},
72 {"pihs",strip,4,"","+ship"},
73 {"dooh",ily,4,"-y+hood","+hood"},
74 {"ekil",strip,4,"","+like"},
75 0
76};
77
78char *preftab[] = {
79 "anti",
80 "bio",
81 "dis",
82 "electro",
83 "en",
84 "fore",
85 "hyper",
86 "intra",
87 "inter",
88 "iso",
89 "kilo",
90 "magneto",
91 "meta",
92 "micro",
93 "milli",
94 "mis",
95 "mono",
96 "multi",
97 "non",
98 "out",
99 "over",
100 "photo",
101 "poly",
102 "pre",
103 "pseudo",
104 "re",
105 "semi",
106 "stereo",
107 "sub",
108 "super",
109 "thermo",
110 "ultra",
111 "under", /*must precede un*/
112 "un",
113 0
114};
115
116int vflag;
117int xflag;
118char word[100];
119char original[100];
120char *deriv[40];
121char affix[40];
122
123main(argc,argv)
124char **argv;
125{
126 register char *ep, *cp;
127 register char *dp;
128 int fold;
129 int j;
130 FILE *file, *found;
131 if(!prime(argc,argv)) {
132 fprintf(stderr,
133 "spell: cannot initialize hash table\n");
134 exit(1);
135 }
136 found = fopen(argv[2],"w");
137 for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
138 switch(argv[0][1]) {
139 case 'b':
140 ise();
141 break;
142 case 'v':
143 vflag++;
144 break;
145 case 'x':
146 xflag++;
147 break;
148 }
149 for(;; fprintf(file,"%s%s\n",affix,original)) {
150 affix[0] = 0;
151 file = found;
152 for(ep=word;(*ep=j=getchar())!='\n';ep++)
153 if(j == EOF)
154 exit(0);
155 for(cp=word,dp=original; cp<ep; )
156 *dp++ = *cp++;
157 *dp = 0;
158 fold = 0;
159 for(cp=word;cp<ep;cp++)
160 if(islower(*cp))
161 goto lcase;
162 if(putsuf(ep,".",0))
163 continue;
164 ++fold;
165 for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
166 *dp = Tolower(*cp);
167lcase:
168 if(putsuf(ep,".",0)||suffix(ep,0))
169 continue;
170 if(isupper(word[0])) {
171 for(cp=original,dp=word; *dp = *cp++; dp++)
172 if (fold) *dp = Tolower(*dp);
173 word[0] = Tolower(word[0]);
174 goto lcase;
175 }
176 file = stdout;
177 }
178}
179
180suffix(ep,lev)
181char *ep;
182{
183 register struct suftab *t;
184 register char *cp, *sp;
185 lev += DLEV;
186 deriv[lev] = deriv[lev-1] = 0;
187 for(t= &suftab[0];sp=t->suf;t++) {
188 cp = ep;
189 while(*sp)
190 if(*--cp!=*sp++)
191 goto next;
192 for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
193 if(sp<word)
194 return(0);
195 if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
196 return(1);
197 if(t->p2!=0) {
198 deriv[lev] = deriv[lev+1] = 0;
199 return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
200 }
201 return(0);
202next: ;
203 }
204 return(0);
205}
206
207nop()
208{
209 return(0);
210}
211
212strip(ep,d,a,lev)
213char *ep,*d,*a;
214{
215 return(putsuf(ep,a,lev)||suffix(ep,lev));
216}
217
218s(ep,d,a,lev)
219char *ep,*d,*a;
220{
221 if(lev>DLEV+1)
222 return(0);
223 if(*ep=='s'&&ep[-1]=='s')
224 return(0);
225 return(strip(ep,d,a,lev));
226}
227
228an(ep,d,a,lev)
229char *ep,*d,*a;
230{
231 if(!isupper(*word)) /*must be proper name*/
232 return(0);
233 return(putsuf(ep,a,lev));
234}
235
236ize(ep,d,a,lev)
237char *ep,*d,*a;
238{
239 *ep++ = 'e';
240 return(strip(ep,"",d,lev));
241}
242
243y_to_e(ep,d,a,lev)
244char *ep,*d,*a;
245{
246 *ep++ = 'e';
247 return(strip(ep,"",d,lev));
248}
249
250ily(ep,d,a,lev)
251char *ep,*d,*a;
252{
253 if(ep[-1]=='i')
254 return(i_to_y(ep,d,a,lev));
255 else
256 return(strip(ep,d,a,lev));
257}
258
259ncy(ep,d,a,lev)
260char *ep, *d, *a;
261{
262 if(skipv(skipv(ep-1))<word)
263 return(0);
264 ep[-1] = 't';
265 return(strip(ep,d,a,lev));
266}
267
268bility(ep,d,a,lev)
269char *ep,*d,*a;
270{
271 *ep++ = 'l';
272 return(y_to_e(ep,d,a,lev));
273}
274
275i_to_y(ep,d,a,lev)
276char *ep,*d,*a;
277{
278 if(ep[-1]=='i') {
279 ep[-1] = 'y';
280 a = d;
281 }
282 return(strip(ep,"",a,lev));
283}
284
285es(ep,d,a,lev)
286char *ep,*d,*a;
287{
288 if(lev>DLEV)
289 return(0);
290 switch(ep[-1]) {
291 default:
292 return(0);
293 case 'i':
294 return(i_to_y(ep,d,a,lev));
295 case 's':
296 case 'h':
297 case 'z':
298 case 'x':
299 return(strip(ep,d,a,lev));
300 }
301}
302
303metry(ep,d,a,lev)
304char *ep, *d,*a;
305{
306 ep[-2] = 'e';
307 ep[-1] = 'r';
308 return(strip(ep,d,a,lev));
309}
310
311tion(ep,d,a,lev)
312char *ep,*d,*a;
313{
314 switch(ep[-2]) {
315 case 'c':
316 case 'r':
317 return(putsuf(ep,a,lev));
318 case 'a':
319 return(y_to_e(ep,d,a,lev));
320 }
321 return(0);
322}
323
324/* possible consonant-consonant-e ending*/
325CCe(ep,d,a,lev)
326char *ep,*d,*a;
327{
328 switch(ep[-1]) {
329 case 'l':
330 if(vowel(ep[-2]))
331 break;
332 switch(ep[-2]) {
333 case 'l':
334 case 'r':
335 case 'w':
336 break;
337 default:
338 return(y_to_e(ep,d,a,lev));
339 }
340 break;
341 case 's':
342 if(ep[-2]=='s')
343 break;
344 case 'c':
345 case 'g':
346 if(*ep=='a')
347 return(0);
348 case 'v':
349 case 'z':
350 if(vowel(ep[-2]))
351 break;
352 case 'u':
353 if(y_to_e(ep,d,a,lev))
354 return(1);
355 if(!(ep[-2]=='n'&&ep[-1]=='g'))
356 return(0);
357 }
358 return(VCe(ep,d,a,lev));
359}
360
361/* possible consonant-vowel-consonant-e ending*/
362VCe(ep,d,a,lev)
363char *ep,*d,*a;
364{
365 char c;
366 c = ep[-1];
367 if(c=='e')
368 return(0);
369 if(!vowel(c) && vowel(ep[-2])) {
370 c = *ep;
371 *ep++ = 'e';
372 if(putsuf(ep,d,lev)||suffix(ep,lev))
373 return(1);
374 ep--;
375 *ep = c;
376 }
377 return(strip(ep,d,a,lev));
378}
379
380char *lookuppref(wp,ep)
381char **wp;
382char *ep;
383{
384 register char **sp;
385 register char *bp,*cp;
386 for(sp=preftab;*sp;sp++) {
387 bp = *wp;
388 for(cp= *sp;*cp;cp++,bp++)
389 if(Tolower(*bp)!=*cp)
390 goto next;
391 for(cp=bp;cp<ep;cp++)
392 if(vowel(*cp)) {
393 *wp = bp;
394 return(*sp);
395 }
396next: ;
397 }
398 return(0);
399}
400
401putsuf(ep,a,lev)
402char *ep,*a;
403{
404 register char *cp;
405 char *bp;
406 register char *pp;
407 int val = 0;
408 char space[20];
409 deriv[lev] = a;
410 if(putw(word,ep,lev))
411 return(1);
412 bp = word;
413 pp = space;
414 deriv[lev+1] = pp;
415 while(cp=lookuppref(&bp,ep)) {
416 *pp++ = '+';
417 while(*pp = *cp++)
418 pp++;
419 if(putw(bp,ep,lev+1)) {
420 val = 1;
421 break;
422 }
423 }
424 deriv[lev+1] = deriv[lev+2] = 0;
425 return(val);
426}
427
428putw(bp,ep,lev)
429char *bp,*ep;
430{
431 register i, j;
432 char duple[3];
433 if(ep-bp<=1)
434 return(0);
435 if(vowel(*ep)) {
436 if(monosyl(bp,ep))
437 return(0);
438 }
439 i = dict(bp,ep);
440 if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
441 ep--;
442 deriv[++lev] = duple;
443 duple[0] = '+';
444 duple[1] = *ep;
445 duple[2] = 0;
446 i = dict(bp,ep);
447 }
448 if(vflag==0||i==0)
449 return(i);
450 j = lev;
451 do {
452 if(deriv[j])
453 strcat(affix,deriv[j]);
454 } while(--j>0);
455 strcat(affix,"\t");
456 return(i);
457}
458
459
460monosyl(bp,ep)
461char *bp, *ep;
462{
463 if(ep<bp+2)
464 return(0);
465 if(vowel(*--ep)||!vowel(*--ep)
466 ||ep[1]=='x'||ep[1]=='w')
467 return(0);
468 while(--ep>=bp)
469 if(vowel(*ep))
470 return(0);
471 return(1);
472}
473
474char *
475skipv(s)
476char *s;
477{
478 if(s>=word&&vowel(*s))
479 s--;
480 while(s>=word&&!vowel(*s))
481 s--;
482 return(s);
483}
484
485vowel(c)
486{
487 switch(Tolower(c)) {
488 case 'a':
489 case 'e':
490 case 'i':
491 case 'o':
492 case 'u':
493 case 'y':
494 return(1);
495 }
496 return(0);
497}
498
499/* crummy way to Britishise */
500ise()
501{
502 register struct suftab *p;
503 for(p = suftab;p->suf;p++) {
504 ztos(p->suf);
505 ztos(p->d1);
506 ztos(p->a1);
507 }
508}
509ztos(s)
510char *s;
511{
512 for(;*s;s++)
513 if(*s=='z')
514 *s = 's';
515}
516
517dict(bp,ep)
518char *bp, *ep;
519{
520 register char *wp;
521 long h;
522 register long *lp;
523 register i;
524 if(xflag)
525 printf("=%.*s\n",ep-bp,bp);
526 for(i=0; i<NP; i++) {
527 for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
528 h += *wp * *lp;
529 h += '\n' * *lp;
530 h %= p[i];
531 if(get(h)==0)
532 return(0);
533 }
534 return(1);
535}