Research V7 development
[unix-history] / usr / src / cmd / spell / spell.c
CommitLineData
1acdf60c
SJ
1#include "spell.h"
2#define DLEV 2
3
4char *strcat();
5int strip();
6char *skipv();
7int an();
8int s();
9int es();
10int ily();
11int ncy();
12int CCe();
13int VCe();
14int bility();
15int tion();
16int ize();
17int y_to_e();
18int i_to_y();
19int nop();
20int metry();
21
22struct suftab {
23 char *suf;
24 int (*p1)();
25 int n1;
26 char *d1;
27 char *a1;
28 int (*p2)();
29 int n2;
30 char *d2;
31 char *a2;
32} suftab[] = {
33 {"ssen",ily,4,"-y+iness","+ness" },
34 {"ssel",ily,4,"-y+i+less","+less" },
35 {"se",s,1,"","+s", es,2,"-y+ies","+es" },
36 {"s'",s,2,"","+'s"},
37 {"s",s,1,"","+s"},
38 {"ecn",ncy,1,"","-t+ce"},
39 {"ycn",ncy,1,"","-cy+t"},
40 {"ytilb",nop,0,"",""},
41 {"ytilib",bility,5,"-le+ility",""},
42 {"elbaif",i_to_y,4,"-y+iable",""},
43 {"elba",CCe,4,"-e+able","+able"},
44 {"yti",CCe,3,"-e+ity","+ity"},
45 {"ylb",y_to_e,1,"-e+y",""},
46 {"yl",ily,2,"-y+ily","+ly"},
47 {"laci",strip,2,"","+al"},
48 {"latnem",strip,2,"","+al"},
49 {"lanoi",strip,2,"","+al"},
50 {"tnem",strip,4,"","+ment"},
51 {"gni",CCe,3,"-e+ing","+ing"},
52 {"reta",nop,0,"",""},
53 {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"},
54 {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"},
55 {"citsi",strip,2,"","+ic"},
56 {"cihparg",i_to_y,1,"-y+ic",""},
57 {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"},
58 {"cirtem",i_to_y,1,"-y+ic",""},
59 {"yrtem",metry,0,"-ry+er",""},
60 {"cigol",i_to_y,1,"-y+ic",""},
61 {"tsigol",i_to_y,2,"-y+ist",""},
62 {"tsi",VCe,3,"-e+ist","+ist"},
63 {"msi",VCe,3,"-e+ism","+ist"},
64 {"noitacif",i_to_y,6,"-y+ication",""},
65 {"noitazi",ize,5,"-e+ation",""},
66 {"rota",tion,2,"-e+or",""},
67 {"noit",tion,3,"-e+ion","+ion"},
68 {"naino",an,3,"","+ian"},
69 {"na",an,1,"","+n"},
70 {"evit",tion,3,"-e+ive","+ive"},
71 {"ezi",CCe,3,"-e+ize","+ize"},
72 {"pihs",strip,4,"","+ship"},
73 {"dooh",ily,4,"-y+ihood","+hood"},
74 {"luf",ily,3,"-y+iful","+ful"},
75 {"ekil",strip,4,"","+like"},
76 0
77};
78
79char *preftab[] = {
80 "anti",
81 "bio",
82 "dis",
83 "electro",
84 "en",
85 "fore",
86 "hyper",
87 "intra",
88 "inter",
89 "iso",
90 "kilo",
91 "magneto",
92 "meta",
93 "micro",
94 "milli",
95 "mis",
96 "mono",
97 "multi",
98 "non",
99 "out",
100 "over",
101 "photo",
102 "poly",
103 "pre",
104 "pseudo",
105 "re",
106 "semi",
107 "stereo",
108 "sub",
109 "super",
110 "thermo",
111 "ultra",
112 "under", /*must precede un*/
113 "un",
114 0
115};
116
117int vflag;
118int xflag;
119char word[100];
120char original[100];
121char *deriv[40];
122char affix[40];
123
124main(argc,argv)
125char **argv;
126{
127 register char *ep, *cp;
128 register char *dp;
129 int fold;
130 int j;
131 FILE *file, *found;
132 if(!prime(argc,argv)) {
133 fprintf(stderr,
134 "spell: cannot initialize hash table\n");
135 exit(1);
136 }
137 found = fopen(argv[2],"w");
138 for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
139 switch(argv[0][1]) {
140 case 'b':
141 ise();
142 break;
143 case 'v':
144 vflag++;
145 break;
146 case 'x':
147 xflag++;
148 break;
149 }
150 for(;; fprintf(file,"%s%s\n",affix,original)) {
151 affix[0] = 0;
152 file = found;
153 for(ep=word;(*ep=j=getchar())!='\n';ep++)
154 if(j == EOF)
155 exit(0);
156 for(cp=word,dp=original; cp<ep; )
157 *dp++ = *cp++;
158 *dp = 0;
159 fold = 0;
160 for(cp=word;cp<ep;cp++)
161 if(islower(*cp))
162 goto lcase;
163 if(putsuf(ep,".",0))
164 continue;
165 ++fold;
166 for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
167 *dp = Tolower(*cp);
168lcase:
169 if(putsuf(ep,".",0)||suffix(ep,0))
170 continue;
171 if(isupper(word[0])) {
172 for(cp=original,dp=word; *dp = *cp++; dp++)
173 if (fold) *dp = Tolower(*dp);
174 word[0] = Tolower(word[0]);
175 goto lcase;
176 }
177 file = stdout;
178 }
179}
180
181suffix(ep,lev)
182char *ep;
183{
184 register struct suftab *t;
185 register char *cp, *sp;
186 lev += DLEV;
187 deriv[lev] = deriv[lev-1] = 0;
188 for(t= &suftab[0];sp=t->suf;t++) {
189 cp = ep;
190 while(*sp)
191 if(*--cp!=*sp++)
192 goto next;
193 for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
194 if(sp<word)
195 return(0);
196 if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
197 return(1);
198 if(t->p2!=0) {
199 deriv[lev] = deriv[lev+1] = 0;
200 return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
201 }
202 return(0);
203next: ;
204 }
205 return(0);
206}
207
208nop()
209{
210 return(0);
211}
212
213strip(ep,d,a,lev)
214char *ep,*d,*a;
215{
216 return(putsuf(ep,a,lev)||suffix(ep,lev));
217}
218
219s(ep,d,a,lev)
220char *ep,*d,*a;
221{
222 if(lev>DLEV+1)
223 return(0);
224 if(*ep=='s'&&ep[-1]=='s')
225 return(0);
226 return(strip(ep,d,a,lev));
227}
228
229an(ep,d,a,lev)
230char *ep,*d,*a;
231{
232 if(!isupper(*word)) /*must be proper name*/
233 return(0);
234 return(putsuf(ep,a,lev));
235}
236
237ize(ep,d,a,lev)
238char *ep,*d,*a;
239{
240 *ep++ = 'e';
241 return(strip(ep,"",d,lev));
242}
243
244y_to_e(ep,d,a,lev)
245char *ep,*d,*a;
246{
247 *ep++ = 'e';
248 return(strip(ep,"",d,lev));
249}
250
251ily(ep,d,a,lev)
252char *ep,*d,*a;
253{
254 if(ep[-1]=='i')
255 return(i_to_y(ep,d,a,lev));
256 else
257 return(strip(ep,d,a,lev));
258}
259
260ncy(ep,d,a,lev)
261char *ep, *d, *a;
262{
263 if(skipv(skipv(ep-1))<word)
264 return(0);
265 ep[-1] = 't';
266 return(strip(ep,d,a,lev));
267}
268
269bility(ep,d,a,lev)
270char *ep,*d,*a;
271{
272 *ep++ = 'l';
273 return(y_to_e(ep,d,a,lev));
274}
275
276i_to_y(ep,d,a,lev)
277char *ep,*d,*a;
278{
279 if(ep[-1]=='i') {
280 ep[-1] = 'y';
281 a = d;
282 }
283 return(strip(ep,"",a,lev));
284}
285
286es(ep,d,a,lev)
287char *ep,*d,*a;
288{
289 if(lev>DLEV)
290 return(0);
291 switch(ep[-1]) {
292 default:
293 return(0);
294 case 'i':
295 return(i_to_y(ep,d,a,lev));
296 case 's':
297 case 'h':
298 case 'z':
299 case 'x':
300 return(strip(ep,d,a,lev));
301 }
302}
303
304metry(ep,d,a,lev)
305char *ep, *d,*a;
306{
307 ep[-2] = 'e';
308 ep[-1] = 'r';
309 return(strip(ep,d,a,lev));
310}
311
312tion(ep,d,a,lev)
313char *ep,*d,*a;
314{
315 switch(ep[-2]) {
316 case 'c':
317 case 'r':
318 return(putsuf(ep,a,lev));
319 case 'a':
320 return(y_to_e(ep,d,a,lev));
321 }
322 return(0);
323}
324
325/* possible consonant-consonant-e ending*/
326CCe(ep,d,a,lev)
327char *ep,*d,*a;
328{
329 switch(ep[-1]) {
330 case 'l':
331 if(vowel(ep[-2]))
332 break;
333 switch(ep[-2]) {
334 case 'l':
335 case 'r':
336 case 'w':
337 break;
338 default:
339 return(y_to_e(ep,d,a,lev));
340 }
341 break;
342 case 's':
343 if(ep[-2]=='s')
344 break;
345 case 'c':
346 case 'g':
347 if(*ep=='a')
348 return(0);
349 case 'v':
350 case 'z':
351 if(vowel(ep[-2]))
352 break;
353 case 'u':
354 if(y_to_e(ep,d,a,lev))
355 return(1);
356 if(!(ep[-2]=='n'&&ep[-1]=='g'))
357 return(0);
358 }
359 return(VCe(ep,d,a,lev));
360}
361
362/* possible consonant-vowel-consonant-e ending*/
363VCe(ep,d,a,lev)
364char *ep,*d,*a;
365{
366 char c;
367 c = ep[-1];
368 if(c=='e')
369 return(0);
370 if(!vowel(c) && vowel(ep[-2])) {
371 c = *ep;
372 *ep++ = 'e';
373 if(putsuf(ep,d,lev)||suffix(ep,lev))
374 return(1);
375 ep--;
376 *ep = c;
377 }
378 return(strip(ep,d,a,lev));
379}
380
381char *lookuppref(wp,ep)
382char **wp;
383char *ep;
384{
385 register char **sp;
386 register char *bp,*cp;
387 for(sp=preftab;*sp;sp++) {
388 bp = *wp;
389 for(cp= *sp;*cp;cp++,bp++)
390 if(Tolower(*bp)!=*cp)
391 goto next;
392 for(cp=bp;cp<ep;cp++)
393 if(vowel(*cp)) {
394 *wp = bp;
395 return(*sp);
396 }
397next: ;
398 }
399 return(0);
400}
401
402putsuf(ep,a,lev)
403char *ep,*a;
404{
405 register char *cp;
406 char *bp;
407 register char *pp;
408 int val = 0;
409 char space[20];
410 deriv[lev] = a;
411 if(putw(word,ep,lev))
412 return(1);
413 bp = word;
414 pp = space;
415 deriv[lev+1] = pp;
416 while(cp=lookuppref(&bp,ep)) {
417 *pp++ = '+';
418 while(*pp = *cp++)
419 pp++;
420 if(putw(bp,ep,lev+1)) {
421 val = 1;
422 break;
423 }
424 }
425 deriv[lev+1] = deriv[lev+2] = 0;
426 return(val);
427}
428
429putw(bp,ep,lev)
430char *bp,*ep;
431{
432 register i, j;
433 char duple[3];
434 if(ep-bp<=1)
435 return(0);
436 if(vowel(*ep)) {
437 if(monosyl(bp,ep))
438 return(0);
439 }
440 i = dict(bp,ep);
441 if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
442 ep--;
443 deriv[++lev] = duple;
444 duple[0] = '+';
445 duple[1] = *ep;
446 duple[2] = 0;
447 i = dict(bp,ep);
448 }
449 if(vflag==0||i==0)
450 return(i);
451 j = lev;
452 do {
453 if(deriv[j])
454 strcat(affix,deriv[j]);
455 } while(--j>0);
456 strcat(affix,"\t");
457 return(i);
458}
459
460
461monosyl(bp,ep)
462char *bp, *ep;
463{
464 if(ep<bp+2)
465 return(0);
466 if(vowel(*--ep)||!vowel(*--ep)
467 ||ep[1]=='x'||ep[1]=='w')
468 return(0);
469 while(--ep>=bp)
470 if(vowel(*ep))
471 return(0);
472 return(1);
473}
474
475char *
476skipv(s)
477char *s;
478{
479 if(s>=word&&vowel(*s))
480 s--;
481 while(s>=word&&!vowel(*s))
482 s--;
483 return(s);
484}
485
486vowel(c)
487{
488 switch(Tolower(c)) {
489 case 'a':
490 case 'e':
491 case 'i':
492 case 'o':
493 case 'u':
494 case 'y':
495 return(1);
496 }
497 return(0);
498}
499
500/* crummy way to Britishise */
501ise()
502{
503 register struct suftab *p;
504 for(p = suftab;p->suf;p++) {
505 ztos(p->suf);
506 ztos(p->d1);
507 ztos(p->a1);
508 }
509}
510ztos(s)
511char *s;
512{
513 for(;*s;s++)
514 if(*s=='z')
515 *s = 's';
516}
517
518dict(bp,ep)
519char *bp, *ep;
520{
521 register char *wp;
522 long h;
523 register long *lp;
524 register i;
525 if(xflag)
526 printf("=%.*s\n",ep-bp,bp);
527 for(i=0; i<NP; i++) {
528 for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
529 h += *wp * *lp;
530 h += '\n' * *lp;
531 h %= p[i];
532 if(get(h)==0)
533 return(0);
534 }
535 return(1);
536}