Bill Jolitz @ Berkeley received this version from Lorindia Cherry
[unix-history] / usr / src / usr.bin / diction / style1 / style1.l
CommitLineData
2958d0fe
RH
1%{
2/* break out words, output cap + word(inverted) */
3
4#ifndef lint
280efb10 5static char sccsid[] = "@(#)style1.l 4.2 (Berkeley) 82/11/06";
2958d0fe
RH
6#endif not lint
7
8#include <stdio.h>
280efb10 9#include <ctype.h>
2958d0fe
RH
10#define OUT() for(i=yyleng-1;i>=0; i--)putchar(yytext[i]); putchar('\n')
11#define OUT1(nam) printf("%c:%s\n",nam,yytext)
12#define OUTN(string) printf("%s\n",string)
13#include "names.h"
14#include "nhash.c"
15#include "dict.c"
16#include "ydict.c"
280efb10 17#include "abbrev.c"
2958d0fe
RH
18char nt[] = "D:n't";
19char qs[] = "c:'s";
20char fin[] = "E:.";
280efb10 21int NOCAPS = 0; /* if set all caps are turned to lower case */
2958d0fe
RH
22int i,j;
23int dot = 0;
24int first = 1;
25int qflg,nflg;
26int cap = 0;
27%}
28%p 3000
280efb10
RH
29%a 3300
30%o 4500
2958d0fe
RH
31
32L [a-z]
33N [0-9]
34C [A-Z]
280efb10
RH
35A [a-zA-Z]
36P [a-zA-Z0-9]
2958d0fe
RH
37
38%%
280efb10
RH
39^[.!].+[\n] {
40 if(dot){
41 OUTN(fin);
42 dot = 0;
43 first = 1;
44 }
45 printf(":%s",yytext);
46 }
47May {
48 if(first == 0){
49 OUT1(NOUN);
50 }
51 else {
52 first = 0;
53 yytext[0] = tolower(yytext[0]);
54 cap = 1;
55 goto wd;
56 }
57 }
58"U.S." {
2958d0fe
RH
59 OUT1(NOUN);
60 }
61{C}{L}*'[s] {
62 pos(1);
63 if(first==1)first=0;
64 }
280efb10
RH
65{C}+['][s] {
66 if(NOCAPS)
67 for(i=0;i<yyleng;i++)
68 if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]);
2958d0fe
RH
69 OUT1(POS);
70 }
280efb10
RH
71{P}+([-]{P}+)+ {
72 if(NOCAPS)
73 for(i=0;i<yyleng;i++)
74 if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]);
2958d0fe
RH
75 OUT1(NOUN_ADJ);
76 }
77{C}{C}+ {
280efb10
RH
78 if(NOCAPS)
79 for(i=0;i<yyleng;i++)
80 yytext[i] = tolower(yytext[i]);
2958d0fe
RH
81 if((i=input()) == 's'){
82 yytext[yyleng++] = 's';
83 yytext[yyleng] = '\0';
280efb10 84 OUT1(PNOUN);
2958d0fe
RH
85 }
86 else {
87 unput(i);
280efb10
RH
88 if(!NOCAPS)
89 for(i=0;i<yyleng;i++)yytext[i] = tolower(yytext[i]);
2958d0fe
RH
90 goto wd;
91 }
92 }
93[LD][']{C}{L}* {
280efb10
RH
94 if(NOCAPS){
95 yytext[0] = tolower(yytext[0]);
96 yytext[2] = tolower(yytext[2]);
97 }
2958d0fe
RH
98 OUT1(NOUN_ADJ);
99 }
100{C}{L}* {
101 if(first==1)
102 first=0;
103 else cap = 1;
104 if(yyleng==1 && yytext[0] == 'I'){
105 cap = 0;
106 goto wd;
107 }
280efb10 108 yytext[0] = tolower(yytext[0]);
2958d0fe
RH
109 goto wd;
110 }
280efb10 111{N}":"{N}{N} {
2958d0fe
RH
112 OUT1(NOUN_ADJ);
113 }
114({N}*[,])*({N}+".")+[ \t\n]+{C} {
115 for(i=yyleng-1;i>0;i--)
116 if(yytext[i] == '.')break;
117 unput(yytext[yyleng-1]);
118 yytext[i] = '\0';
119 OUT1(NOUN_ADJ);
120 OUTN(fin);
121 first = 1;
122 }
280efb10
RH
123([hH]e"/"[sS]he)|([sS]he"/"[hH]e) {
124 if(NOCAPS)
125 if(isupper(yytext[0]))yytext[0] = tolower(yytext[0]);
126 OUT1(PRONS);
127 }
128([hH]is"/"[hH]er)|([hH]er"/"[hH]is) {
129 if(NOCAPS)
130 if(isupper(yytext[0]))yytext[0] = tolower(yytext[0]);
131 OUT1(POS);
132 }
133[ \t`]*[a-zA-Z0-9.]*("\/"[a-zA-Z0-9.]+)+[']* {
134 if(yytext[yyleng-1] == '.'){
135 if(ahead() == 0)dot=1;
136 }
137 if(NOCAPS)
138 for(i=0;i<yyleng;i++)
139 if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]);
2958d0fe
RH
140 OUT1(NOUN_ADJ);
141 }
142{N}+([,]{N}+)*("."{N}+)*[']*[s]* {
143 OUT1(NOUN_ADJ);
144 }
145{N}*([,]{N}+)*("."{N}+)+[']*[s]* {
146 OUT1(NOUN_ADJ);
147 }
148{N}+([,]{N}+)*("."{N}*)*[']*[s]* {
149 if(yytext[yyleng-1] == '.')dot=1;
150 OUT1(NOUN_ADJ);
151 }
280efb10
RH
152({A}*{N}+{A}*)+ {
153 if(input() == '.')
154 ahead();
155 if(NOCAPS)
156 for(i=0;i<yyleng;i++)
157 if(isupper(yytext[i]))yytext[i]=tolower(yytext[i]);
2958d0fe
RH
158 OUT1(NOUN_ADJ);
159 }
2958d0fe
RH
160{N}+[%] {
161 OUT1(NOUN_ADJ);
162 }
163"$"{N}+([,]{N}+)*("."{N}*)* {
164 if(yytext[yyleng-1] == '.')dot=1;
165 OUT1(NOUN);
166 }
167[Aa]"."[ ]*[Mm]"." {
168 OUT1(ADJ_ADV);
169 }
170[Pp]"."[ ]*[Mm]"." {
171 OUT1(ADJ_ADV);
172 }
173"a."[ ]*"d." {
174 OUT1(ADJ_ADV);
175 }
176"b."[ ]*"c." {
177 OUT1(ADJ_ADV);
178 }
179"i."[ ]*"e." {
180 OUT1(PREP);
181 }
182"e."[ ]*"g." {
183 OUT1(PREP);
184 }
185"etc."[ \n]*[,)]* {
186 i = yytext[4];
187 yytext[4] = '\0';
188 OUT1(NOUN);
189 yytext[4] = i;
190 yytext[0] = yytext[yyleng-1];
191 yytext[1] = '\0';
192 if(yytext[0] == ',' || yytext[0] == ')')
193 OUT1(',');
194 else {
195 OUTN(fin);
196 first = 1;
197 }
198 }
199"et al." {
200 OUT1(NOUN);
201 }
2958d0fe
RH
202in"."[ \n]*{C} {
203 unput(yytext[yyleng-1]);
204 yytext[2] = '\0';
205 OUT1(PREP);
206 OUTN(fin);
207 first = 1;
208 }
2958d0fe
RH
209Ph"."[ ]*[Dd]"." {
210 OUT1(ADJ);
211 }
2958d0fe
RH
212[A-Z]"." {
213 dot=1;
214 OUT1(NOUN);
215 }
216can't {
217 yytext[3]='\0';
218 yyleng -= 2;
219 nflg=1;
220 goto wd;
221 }
222won't {
223 OUT1('X');
224 }
280efb10
RH
225ain't {
226 OUT1('g');
227 }
2958d0fe
RH
228{L}+n't {
229 nflg=1;
230 yytext[yyleng-3]='\0';
231 yyleng -= 3;
232 goto wd;
233 }
234[A-Z]{L}+n't {
280efb10 235 yytext[0] = tolower(yytext[0]);
2958d0fe
RH
236 nflg=1;
237 yytext[yyleng-3]='\0';
238 yyleng -= 3;
239 goto wd;
240 }
280efb10 241o'clock {
2958d0fe
RH
242 OUT1(ADV);
243 }
244{L}+'[s] {
245 pos(0);
246 }
247'll {
248 OUT1(lookup("will",1,0));
249 }
250've {
251 OUT1(lookup("have",1,0));
252 }
253're {
254 OUT1(lookup("are",1,0));
255 }
256'd {
257 OUT1(lookup("had",1,0));
258 }
259'm {
260 OUT1(lookup("am",1,0));
261 }
262'ld {
263 OUT1(lookup("would",1,0));
264 }
265{L}+ {
266wd:
267 if((j = lookup(yytext,1,0)) != 0){
268 first=0;
269 if(cap){
280efb10
RH
270 if(!NOCAPS)
271 yytext[0] = toupper(yytext[0]);
2958d0fe
RH
272 cap = 0;
273 if(dot)OUTN(fin);
274 }
275 dot=0;
276 OUT1(j);
277 if(nflg==1){
278 nflg=0;
279 OUTN(nt);
280 }
281 }
282 else{
283 first = dot=0;
284 if(yytext[yyleng-1] == 'y' && cap == 0){
285 switch(yytext[yyleng-2]){
286 case 'c': look(cy,yyleng-2,NOUN);
287 break;
288 case 'f': look(fy,yyleng-2,VERB);
289 break;
290 case 'l': look(ly,yyleng-2,ADV);
291 break;
292 case 'g': if(yytext[yyleng-3] == 'o'){
293 OUT1(NOUN);
294 break;
295 }
280efb10 296 look(gy,yyleng-2,ADJ);
2958d0fe
RH
297 break;
298 case 'r': switch(yytext[yyleng-3]){
299 case 'a': look(ary,yyleng-3,ADJ);
300 break;
301 case 'o': look(ory,yyleng-3,ADJ);
302 break;
303 case 'e': look(ery,yyleng-3,NOUN);
304 break;
305 default: look(ry,yyleng-2,NOUN);
306 }
307 break;
308 case 't': if(yytext[yyleng-3] == 'i')look(ity,yyleng-3,NOUN);
309 else look(ty,yyleng-2,ADJ);
310 break;
311 default: OUT();
312 }
280efb10 313 }
2958d0fe
RH
314 else {
315 if(cap){
280efb10 316 if(!NOCAPS)yytext[0] = toupper(yytext[0]);
2958d0fe
RH
317 cap = 0;
318 OUT1(NOUN_ADJ);
319 }
320 else {
321 OUT();
322 }
323 }
324 }
325 }
326[\n] ;
327[ ]+ ;
328[\t]+ ;
329";" {
330 OUT1(';');
331 first=1;
332 }
333(\"|`|')+ {
334 if(dot){
335 OUTN(fin);
336 dot=0;
337 }
338 if(qflg==1){
339 qflg=0;
340 OUT1('"');
341 }
342 else {
343 qflg=1;
344 first=1;
345 OUT1('"');
346 }
347 }
348".\"" {
349 qflg=0;
350 first=1;
351 OUT1(END);
352 }
353"..." {
354 OUT1(',');
355 }
356"/." {
357 first = 1;
358 OUT1(END);
359 }
280efb10
RH
360{A}{A}+"." {
361 yytext[yyleng-1] = '\0';
362 if((j=abbrev(yytext,1,0)) != 0){
363 if(isupper(yytext[0])){
364 if(NOCAPS)yytext[0] = tolower(yytext[0]);
365 if(first == 1)first=0;
366 }
367 yytext[yyleng-1] = '.';
368 OUT1(j);
369 }
370 else {
371 j = ahead();
372 if(j == 0)
373 yyleng--;
374 for(i=0;i<yyleng;i++)
375 if(isupper(yytext[i])){
376 yytext[i] = tolower(yytext[i]);
377 if(i == 0)cap = 1;
378 else cap = 0;
379 }
380 if(j == 0)goto wd;
381 OUT1(NOUN_ADJ);
382 }
383 }
2958d0fe
RH
384"." {
385 first=1;
386 OUT1(END);
387 }
388"!\"" {
389 qflg=0;
390 first=1;
391 OUT1(END);
392 }
393"!" {
394 first=1;
395 OUT1(END);
396 }
397"?\"" {
398 qflg=0;
399 first=1;
400 OUT1(END);
401 }
402"?" {
403 first=1;
404 OUT1(END);
405 }
406":" {
407 OUT1(',');
408 first=1;
409 }
410[-]+ {
411 OUT1(',');
412 first=1;
413 }
414"," {
415 OUT1(',');
416 }
417(\[|\(|\{|\]|\)|\}) {
418 OUT1(',');
419 }
420. {
421/* fprintf(stderr,"nwords funny char: %c\n",yytext[0])*/ ;
422 }
423%%
424look(f,n,cc)
425char (*f)();
426int n;
427char cc;
428{
429 int nn;
430 char save;
431 save=yytext[n];
432 yytext[n] = '\0';
433 nn=(*f)(yytext,1,0);
434 yytext[n] = save;
435 if(nn != 0){
436 OUT1(nn);
437 }
438 else {
439 OUT1(cc);
440 }
441}
442pos(flg){
443 int ii,j;
280efb10 444 if(flg == 1)yytext[0] = tolower(yytext[0]);
2958d0fe
RH
445 for(ii=yyleng-1;yytext[ii] != '\''; ii--);
446 yytext[ii] = '\0';
447 if((j=lookup(yytext,1,0)) != 0){
448 yyleng = ii;
449 OUT1(j);
450 OUTN(qs);
451 }
452 else{
280efb10 453 if(flg==1 && !NOCAPS)yytext[0] = toupper(yytext[0]);
2958d0fe
RH
454 yytext[ii] = '\'';
455 OUT1(POS);
456 }
457}
458char *filename="-";
459
460main(argc,argv)
461int argc;
462char *argv[];
463{
464 register int rc=0;
465 putchar(':'); putchar('\n');
466 getd();
280efb10 467 getab();
2958d0fe
RH
468 ygetd();
469 if(argc<=1) {
470 yylex();
280efb10 471 OUTN(fin);
2958d0fe
RH
472 }else{
473 while(argc>1) {
474 if(freopen(argv[1],"r",stdin)==NULL) {
475 fprintf(stderr,"%s: cannot open\n", argv[1]);
476 rc++;
477 }else{
478 filename=argv[1];
479 yylex();
280efb10 480 OUTN(fin);
2958d0fe
RH
481 }
482 argc--; argv++;
483 }
484 }
485 return(rc);
486}
280efb10
RH
487ahead(){
488 register int c;
489 if(isalnum((c=input()))){
490 yytext[yyleng++] = '.';
491 while(!isspace((c=input() )))
492 yytext[yyleng++] = c;
493 yytext[yyleng] = '\0';
494 unput(c);
495 return(1);
496 }
497 unput(c);
498 unput('.');
499 return(0);
500}