From 280efb104c03e7c87181e2538243f033cbf616d2 Mon Sep 17 00:00:00 2001 From: "Robert R. Henry" Date: Sat, 6 Nov 1982 22:35:35 -0800 Subject: [PATCH] Bill Jolitz @ Berkeley received this version from Lorindia Cherry around September 1981, as the ``most recent version''. Deltas seem to be progressive, rather than regressive, although the BTL sid for deroff.c is retrograde; this is probably a case of parallel development SCCS-vsn: usr.bin/diction/style3/conp.h 4.2 SCCS-vsn: usr.bin/diction/style1/names.h 4.2 SCCS-vsn: usr.bin/diction/style3/style.h 4.2 SCCS-vsn: usr.bin/diction/diction/diction.1 4.2 SCCS-vsn: usr.bin/deroff/deroff.c 4.2 SCCS-vsn: usr.bin/diction/diction/dprog.c 4.2 SCCS-vsn: usr.bin/diction/style2/edict.c 4.2 SCCS-vsn: usr.bin/diction/style1/nhash.c 4.2 SCCS-vsn: usr.bin/diction/style3/outp.c 4.2 SCCS-vsn: usr.bin/diction/style3/pscan.c 4.2 SCCS-vsn: usr.bin/diction/style1/ydict.c 4.2 SCCS-vsn: usr.bin/diction/style2/style2.l 4.2 SCCS-vsn: usr.bin/diction/style1/style1.l 4.2 SCCS-vsn: usr.bin/diction/style3/part.l 4.2 SCCS-vsn: usr.bin/diction/Makefile 4.2 SCCS-vsn: usr.bin/diction/diction/dict.d 4.2 SCCS-vsn: usr.bin/diction/explain/explain.sh 4.2 SCCS-vsn: usr.bin/diction/README 4.2 SCCS-vsn: usr.bin/diction/style/style.sh 4.2 SCCS-vsn: usr.bin/diction/explain/explain.d 4.2 SCCS-vsn: usr.bin/diction/diction/diction.sh 4.2 --- usr/src/usr.bin/deroff/deroff.c | 846 +++++++++++++-------- usr/src/usr.bin/diction/Makefile | 12 +- usr/src/usr.bin/diction/README | 62 +- usr/src/usr.bin/diction/diction/dict.d | 22 +- usr/src/usr.bin/diction/diction/diction.1 | 2 +- usr/src/usr.bin/diction/diction/diction.sh | 4 +- usr/src/usr.bin/diction/diction/dprog.c | 227 ++++-- usr/src/usr.bin/diction/explain/explain.d | 1 + usr/src/usr.bin/diction/explain/explain.sh | 2 +- usr/src/usr.bin/diction/style/style.sh | 28 +- usr/src/usr.bin/diction/style1/names.h | 5 +- usr/src/usr.bin/diction/style1/nhash.c | 2 +- usr/src/usr.bin/diction/style1/style1.l | 186 +++-- usr/src/usr.bin/diction/style1/ydict.c | 2 +- usr/src/usr.bin/diction/style2/edict.c | 2 +- usr/src/usr.bin/diction/style2/style2.l | 68 +- usr/src/usr.bin/diction/style3/conp.h | 3 +- usr/src/usr.bin/diction/style3/outp.c | 62 +- usr/src/usr.bin/diction/style3/part.l | 99 ++- usr/src/usr.bin/diction/style3/pscan.c | 108 ++- usr/src/usr.bin/diction/style3/style.h | 8 +- 21 files changed, 1191 insertions(+), 560 deletions(-) diff --git a/usr/src/usr.bin/deroff/deroff.c b/usr/src/usr.bin/deroff/deroff.c index e44cffe584..36b7edf43d 100644 --- a/usr/src/usr.bin/deroff/deroff.c +++ b/usr/src/usr.bin/deroff/deroff.c @@ -1,8 +1,8 @@ #ifndef lint -static char sccsid[] = "@(#)deroff.c 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)deroff.c 4.2 (Berkeley) 82/11/06"; #endif not lint -char *xxxvers = "\nDeroff Version (Bell Labs) 2.0 29 December 1979\n"; +char *xxxvers = "\nDeroff Version 1.02 (Bell Labs) 24 July 1978\n"; #include @@ -10,10 +10,9 @@ char *xxxvers = "\nDeroff Version (Bell Labs) 2.0 29 December 1979\n"; /* Deroff command -- strip troff, eqn, and Tbl sequences from a file. Has two flags argument, -w, to cause output one word per line rather than in the original format. --ms (or -m) causes -ms macro's to be interpreted so that just -sentences are output, -mm does same for -mm macro's, --ml in addition to interpreting -ms macros also gets rid of -lists. +-mm (or -ms) causes the corresponding macro's to be interpreted +so that just sentences are output +-ml also gets rid of lists. Deroff follows .so and .nx commands, removes contents of macro definitions, equations (both .EQ ... .EN and $...$), Tbl command sequences, and Troff backslash constructions. @@ -36,17 +35,19 @@ All input is through the C macro; the most recently read character is in c. #define NOCHAR -2 #define SPECIAL 0 #define APOS 1 -#define DIGIT 2 -#define LETTER 3 +#define PUNCT 2 +#define DIGIT 3 +#define LETTER 4 int wordflag = NO; int msflag = NO; -int mac = MS; +int mac = MM; int disp = 0; +int parag = 0; int inmacro = NO; int intable = NO; -char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */ +char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ char line[512]; char *lp; @@ -60,161 +61,155 @@ int rdelim = NOCHAR; int argc; char **argv; +extern int optind; +extern char*optarg; char fname[50]; FILE *files[15]; FILE **filesp; FILE *infile; -char *calloc(); - - - main(ac, av) int ac; char **av; { -register int i; -register char *p; -static char onechar[2] = "X"; -FILE *opn(); - -argc = ac - 1; -argv = av + 1; - -while(argc>0 && argv[0][0]=='-' && argv[0][1]!='\0') - { - for(p=argv[0]+1; *p; ++p) switch(*p) - { - case 'w': - wordflag = YES; - break; - case 'm': - msflag = YES; - if(*(p+1) == 'm'){ - mac=MM; - p++; - } - else if(*(p+1) == 's') - p++; - else if(*(p+1) == 'l'){ - disp=1; - p++; - } - break; - default: - onechar[0] = *p; - fatal("Invalid flag %s\n", onechar); - } - --argc; - ++argv; + register int i; + int errflg = 0; + register optchar; + FILE *opn(); + + argc = ac; + argv = av; + while ((optchar = getopt(argc, argv, "wpm:")) != EOF) switch(optchar) { + case 'w': + wordflag = YES; + break; + case 'm': + msflag = YES; + if (*optarg == 'm') + mac = MM; + else if (*optarg == 's') + mac = MS; + else if (*optarg == 'l') + disp = 1; + else errflg++; + break; + case 'p': + parag=YES; + break; + case '?': + errflg++; } + if (errflg) + fatal("usage: deroff [ -w ] [ -m (m s l) ] [ file ] ... \n", (char *) NULL); + if ( optind == argc ) + infile = stdin; + else + infile = opn(argv[optind++]); + files[0] = infile; + filesp = &files[0]; + + for(i='a'; i<='z' ; ++i) + chars[i] = LETTER; + for(i='A'; i<='Z'; ++i) + chars[i] = LETTER; + for(i='0'; i<='9'; ++i) + chars[i] = DIGIT; + chars['\''] = APOS; + chars['&'] = APOS; + chars['.'] = PUNCT; + chars[','] = PUNCT; + chars[';'] = PUNCT; + chars['?'] = PUNCT; + chars[':'] = PUNCT; + work(); +} +char *calloc(); -if(argc == 0) - infile = stdin; -else { - infile = opn(argv[0]); - --argc; - ++argv; - } -files[0] = infile; -filesp = &files[0]; -for(i='a'; i<='z' ; ++i) - chars[i] = LETTER; -for(i='A'; i<='Z'; ++i) - chars[i] = LETTER; -for(i='0'; i<='9'; ++i) - chars[i] = DIGIT; -chars['\''] = APOS; -chars['&'] = APOS; - -work(); -} skeqn() { -while((c = getc(infile)) != rdelim) - if(c == EOF) - c = eof(); - else if(c == '"') - while( (c = getc(infile)) != '"') - if(c == EOF) - c = eof(); - else if(c == '\\') - if((c = getc(infile)) == EOF) + while((c = getc(infile)) != rdelim) + if(c == EOF) + c = eof(); + else if(c == '"') + while( (c = getc(infile)) != '"') + if(c == EOF) c = eof(); -if(msflag)return(c='x'); -return(c = ' '); + else if(c == '\\') + if((c = getc(infile)) == EOF) + c = eof(); + if(msflag)return(c='x'); + return(c = ' '); } FILE *opn(p) register char *p; { -FILE *fd; + FILE *fd; -if(p[0]=='-' && p[1]=='\0') - fd = stdin; -else if( (fd = fopen(p, "r")) == NULL) - fatal("Cannot open file %s\n", p); + if( (fd = fopen(p, "r")) == NULL) + fatal("Cannot open file %s\n", p); -return(fd); + return(fd); } eof() { -if(infile != stdin) - fclose(infile); -if(filesp > files) - infile = *--filesp; -else if(argc > 0) + if(infile != stdin) + fclose(infile); + if(filesp > files) + infile = *--filesp; + else if(optind < argc) { - infile = opn(argv[0]); - --argc; - ++argv; + infile = opn(argv[optind++]); } -else - exit(0); + else + exit(0); -return(C); + return(C); } getfname() { -register char *p; -struct chain { struct chain *nextp; char *datap; } *chainblock; -register struct chain *q; -static struct chain *namechain = NULL; -char *copys(); - -while(C == ' ') ; - -for(p = fname ; (*p=c)!= '\n' && c!=' ' && c!='\t' && c!='\\' ; ++p) - C; -*p = '\0'; -while(c != '\n') - C; - -/* see if this name has already been used */ - -for(q = namechain ; q; q = q->nextp) - if( ! strcmp(fname, q->datap)) + register char *p; + struct chain { + struct chain *nextp; + char *datap; + } *chainblock; + register struct chain *q; + static struct chain *namechain = NULL; + char *copys(); + + while(C == ' ') ; + + for(p = fname ; (*p=c)!= '\n' && c!=' ' && c!='\t' && c!='\\' ; ++p) + C; + *p = '\0'; + while(c != '\n') + C; + + /* see if this name has already been used */ + + for(q = namechain ; q; q = q->nextp) + if( ! strcmp(fname, q->datap)) { - fname[0] = '\0'; - return; + fname[0] = '\0'; + return; } -q = (struct chain *) calloc(1, sizeof(*chainblock)); -q->nextp = namechain; -q->datap = copys(fname); -namechain = q; + q = (struct chain *) calloc(1, sizeof(*chainblock)); + q->nextp = namechain; + q->datap = copys(fname); + namechain = q; } @@ -223,20 +218,20 @@ namechain = q; fatal(s,p) char *s, *p; { -fprintf(stderr, "Deroff: "); -fprintf(stderr, s, p); -exit(1); + fprintf(stderr, "Deroff: "); + fprintf(stderr, s, p); + exit(1); } - + work() { -for( ;; ) + for( ;; ) { - if(C == '.' || c == '\'') - comline(); - else - regline(NO,TWO); + if(C == '.' || c == '\'') + comline(); + else + regline(NO,TWO); } } @@ -247,37 +242,37 @@ regline(macline,const) int macline; int const; { -line[0] = c; -lp = line; -for( ; ; ) + line[0] = c; + lp = line; + for( ; ; ) { - if(c == '\\') + if(c == '\\') { - *lp = ' '; - backsl(); + *lp = ' '; + backsl(); } - if(c == '\n') break; - if(intable && c=='T') + if(c == '\n') break; + if(intable && c=='T') { - *++lp = C; - if(c=='{' || c=='}') + *++lp = C; + if(c=='{' || c=='}') { - lp[-1] = ' '; - *lp = C; + lp[-1] = ' '; + *lp = C; } } - else *++lp = C; + else *++lp = C; } -*lp = '\0'; + *lp = '\0'; -if(line[0] != '\0') - if(wordflag) - putwords(macline); - else if(macline) - putmac(line,const); - else - puts(line); + if(line[0] != '\0') + if(wordflag) + putwords(macline); + else if(macline) + putmac(line,const); + else + puts(line); } @@ -287,24 +282,38 @@ putmac(s,const) register char *s; int const; { -register char *t; + register char *t; + register found; + int last; + found = 0; -while(*s) + while(*s) { - while(*s==' ' || *s=='\t') - putchar(*s++); - for(t = s ; *t!=' ' && *t!='\t' && *t!='\0' ; ++t) - ; - if(*s == '\"')s++; - if(t>s+const && chars[ s[0] ]==LETTER && chars[ s[1] ]==LETTER) - while(s < t) - if(*s == '\"')s++; - else - putchar(*s++); - else - s = t; + while(*s==' ' || *s=='\t') + putchar(*s++); + for(t = s ; *t!=' ' && *t!='\t' && *t!='\0' ; ++t) + ; + if(*s == '\"')s++; + if(t>s+const && chars[ s[0] ]==LETTER && chars[ s[1] ]==LETTER){ + while(s < t) + if(*s == '\"')s++; + else + putchar(*s++); + last = *(t-1); + found++; + } + else if(found && chars[ s[0] ] == PUNCT && s[1] == '\0') + putchar(*s++); + else{ + last = *(t-1); + s = t; + } + } + putchar('\n'); + if(msflag && chars[last] == PUNCT){ + putchar(last); + putchar('\n'); } -putchar('\n'); } @@ -312,130 +321,195 @@ putchar('\n'); putwords(macline) /* break into words for -w option */ int macline; { -register char *p, *p1; -int i, nlet; + register char *p, *p1; + int i, nlet; -for(p1 = line ; ;) + for(p1 = line ; ;) { - /* skip initial specials ampersands and apostrophes */ - while( chars[*p1] < DIGIT) - if(*p1++ == '\0') return; - nlet = 0; - for(p = p1 ; (i=chars[*p]) != SPECIAL ; ++p) - if(i == LETTER) ++nlet; - - if( (!macline && nlet>1) /* MDM definition of word */ - || (macline && nlet>2 && chars[ p1[0] ]==LETTER && chars[ p1[1] ]==LETTER) ) + /* skip initial specials ampersands and apostrophes */ + while( chars[*p1] < DIGIT) + if(*p1++ == '\0') return; + nlet = 0; + for(p = p1 ; (i=chars[*p]) != SPECIAL ; ++p) + if(i == LETTER) ++nlet; + + if( (!macline && nlet>1) /* MDM definition of word */ + || (macline && nlet>2 && chars[ p1[0] ]==LETTER && chars[ p1[1] ]==LETTER) ) { - /* delete trailing ampersands and apostrophes */ - while(p[-1]=='\'' || p[-1]=='&') - --p; - while(p1 < p) putchar(*p1++); - putchar('\n'); + /* delete trailing ampersands and apostrophes */ + while(p[-1]=='\'' || p[-1]=='&'|| chars[ p[-1] ] == PUNCT) + --p; + while(p1 < p) putchar(*p1++); + putchar('\n'); } - else - p1 = p; + else + p1 = p; } } - comline() { -register int c1, c2; + register int c1, c2; com: -while(C==' ' || c=='\t') - ; + while(C==' ' || c=='\t') + ; comx: -if( (c1=c) == '\n') - return; -c2 = C; -if(c1=='.' && c2!='.') - inmacro = NO; -if(c2 == '\n') - return; - -if(c1=='E' && c2=='Q' && filesp==files) - eqn(); -else if(c1=='T' && (c2=='S' || c2=='C' || c2=='&') && filesp==files){ - if(msflag){ stbl(); } - else tbl(); } -else if(c1=='T' && c2=='E') - intable = NO; -else if(c1=='G' && c2 == 'R') - sdis('G','E'); -else if(!inmacro && c1=='d' && c2=='e') - macro(); -else if(!inmacro && c1=='i' && c2=='g') - macro(); -else if(!inmacro && c1=='a' && c2 == 'm') - macro(); -else if(c1=='s' && c2=='o') + if( (c1=c) == '\n') + return; + c2 = C; + if(c1=='.' && c2!='.') + inmacro = NO; + if(msflag && c1 == '['){ + refer(c2); + return; + } + if(parag && mac==MM && c1 == 'P' && c2 == '\n'){ + printf(".P\n"); + return; + } + if(c2 == '\n') + return; + + if(c1 == '\\' && c2 == '\"') + SKIP; + else if(c1=='E' && c2=='Q' && filesp==files) + eqn(); + else if(c1=='T' && (c2=='S' || c2=='C' || c2=='&') && filesp==files){ + if(msflag){ + stbl(); + } + else tbl(); + } + else if(c1=='T' && c2=='E') + intable = NO; + else if(!inmacro && c1=='d' && c2=='e') + macro(); + else if(!inmacro && c1=='i' && c2=='g') + macro(); + else if(!inmacro && c1=='a' && c2 == 'm') + macro(); + else if(c1=='s' && c2=='o') { - getfname(); - if( fname[0] ) - infile = *++filesp = opn( fname ); + getfname(); + if( fname[0] ) + infile = *++filesp = opn( fname ); } -else if(c1=='n' && c2=='x') + else if(c1=='n' && c2=='x') { - getfname(); - if(fname[0] == '\0') exit(0); - if(infile != stdin) - fclose(infile); - infile = *filesp = opn(fname); + getfname(); + if(fname[0] == '\0') exit(0); + if(infile != stdin) + fclose(infile); + infile = *filesp = opn(fname); } -else if(c1=='h' && c2=='w') - { SKIP; } -else if(msflag && c1 == 'T' && c2 == 'L'){ - SKIP_TO_COM; - goto comx; } -else if(msflag && c1=='N' && c2 == 'R')SKIP; -else if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){ - if(mac==MM)SKIP; - else { + else if(c1 == 't' && c2 == 'm') + SKIP; + else if(c1=='h' && c2=='w') + SKIP; + else if(msflag && c1 == 'T' && c2 == 'L'){ SKIP_TO_COM; - goto comx; } + goto comx; } -else if(msflag && c1 == 'F' && c2 == 'S'){ - SKIP_TO_COM; - goto comx; } -else if(msflag && c1 == 'S' && c2 == 'H'){ - SKIP_TO_COM; - goto comx; } -else if(msflag && c1 == 'N' && c2 == 'H'){ - SKIP_TO_COM; - goto comx; } -else if(msflag && c1 == 'O' && c2 == 'K'){ - SKIP_TO_COM; - goto comx; } -else if(msflag && c1 == 'N' && c2 == 'D') - SKIP; -else if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U')) - SKIP; -else if(msflag && mac==MM && c2=='L'){ - if(disp || c1 == 'R')sdis('L','E'); - else{ + else if(msflag && c1=='N' && c2 == 'R')SKIP; + else if(parag && msflag && (c1 == 'P' || c1 == 'I' || c1 == 'L') && c2 == 'P'){ + printf(".%c%c",c1,c2); + while(C != '\n')putchar(c); + putchar('\n'); + } + else if(parag && mac==MM && c1 == 'P' && c2 == ' '){ + printf(".%c%c",c1,c2); + while(C != '\n')putchar(c); + putchar('\n'); + } + else if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){ + if(mac==MM)SKIP; + else { + SKIP_TO_COM; + goto comx; + } + } + else if(msflag && c1 == 'F' && c2 == 'S'){ + SKIP_TO_COM; + goto comx; + } + else if(msflag && (c1 == 'S' || c1 == 'N') && c2 == 'H'){ + if(parag){ + printf(".%c%c",c1,c2); + while(C != '\n')putchar(c); + putchar(c); + putchar('!'); + while(1){ + while(C != '\n')putchar(c); + putchar('\n'); + if(C == '.')goto com; + putchar('!'); + putchar(c); + } + } + else { + SKIP_TO_COM; + goto comx; + } + } + else if(c1 == 'U' && c2 == 'X'){ + if(wordflag)printf("UNIX\n"); + else printf("UNIX "); + } + else if(msflag && c1 == 'O' && c2 == 'K'){ + SKIP_TO_COM; + goto comx; + } + else if(msflag && c1 == 'N' && c2 == 'D') SKIP; - putchar('.'); + else if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U')){ + if(parag){ + printf(".%c%c",c1,c2); + while(C != '\n')putchar(c); + putchar('\n'); + } + else { + SKIP; + } + } + else if(msflag && mac==MM && c2=='L'){ + if(disp || c1 == 'R')sdis('L','E'); + else{ + SKIP; + putchar('.'); } } -else if(msflag && (c1 == 'D' || c1 == 'N' || c1 == 'K') && c2 == 'S') - { sdis(c1,'E'); } /* removed RS-RE */ -else if(msflag && c1 == 'n' && c2 == 'f') - sdis('f','i'); -else if(msflag && c1 == 'c' && c2 == 'e') - sce(); -else + else if(!msflag &&c1 == 'P' && c2 == 'S'){ + inpic(); + } + else if(msflag && (c1 == 'D' || c1 == 'N' || c1 == 'K'|| c1=='P') && c2 == 'S') + { + sdis(c1,'E'); + } /* removed RS-RE */ + else if(msflag && (c1 == 'K' && c2 == 'F')) + { + sdis(c1,'E'); + } + else if(msflag && c1 == 'n' && c2 == 'f') + sdis('f','i'); + else if(msflag && c1 == 'c' && c2 == 'e') + sce(); + else { - if(c1=='.' && c2=='.') - while(C == '.') - ; - ++inmacro; - if(c1 <= 'Z' && msflag)regline(YES,ONE); - else regline(YES,TWO); - --inmacro; + if(c1=='.' && c2=='.'){ + if(msflag){ + SKIP; + return; + } + while(C == '.') + ; + } + ++inmacro; + if(c1 <= 'Z' && msflag)regline(YES,ONE); + else regline(YES,TWO); + --inmacro; } } @@ -443,14 +517,15 @@ else macro() { -if(msflag){ - do { SKIP; } - while(C!='.' || C!='.' || C=='.'); /* look for .. */ - if(c != '\n')SKIP; - return; -} -SKIP; -inmacro = YES; + if(msflag){ + do { + SKIP; + } while(C!='.' || C!='.' || C=='.'); /* look for .. */ + if(c != '\n')SKIP; + return; + } + SKIP; + inmacro = YES; } @@ -461,97 +536,122 @@ char a1,a2; { register int c1,c2; register int eqnf; + int lct; + lct = 0; eqnf=1; SKIP; while(1){ - while(C != '.')SKIP; + while(C != '.') + if(c == '\n')continue; + else SKIP; if((c1=C) == '\n')continue; if((c2=C) == '\n')continue; if(c1==a1 && c2 == a2){ SKIP; + if(lct != 0){ + lct--; + continue; + } if(eqnf)putchar('.'); putchar('\n'); return; } - else if(a1 == 'D' && c1 == 'E' && c2 == 'Q'){eqn(); eqnf=0;} + else if(a1 == 'L' && c2 == 'L'){ + lct++; + SKIP; + } + else if(a1 == 'D' && c1 == 'E' && c2 == 'Q'){ + eqn(); + eqnf=0; + } + else if(a1 == 'f' && (c1 == 'P' || c2 == 'P')){ + SKIP; + return; + } else SKIP; } } tbl() { -while(C != '.'); -SKIP; -intable = YES; + while(C != '.'); + SKIP; + intable = YES; } stbl() { -while(C != '.'); -SKIP_TO_COM; -if(c != 'T' || C != 'E'){ - SKIP; - pc=c; - while(C != '.' || pc != '\n' || C != 'T' || C != 'E')pc=c; -} + while(C != '.'); + SKIP_TO_COM; + if(c != 'T' || C != 'E'){ + SKIP; + pc=c; + while(C != '.' || pc != '\n' || C != 'T' || C != 'E')pc=c; + } } eqn() { -register int c1, c2; -register int dflg; -int last; + register int c1, c2; + register int dflg; + char last; -last=0; -dflg = 1; -SKIP; + last=0; + dflg = 1; + SKIP; -for( ;;) + for( ;;) { - if(C == '.' || c == '\'') + if(C1 == '.' || c == '\'') { - while(C==' ' || c=='\t') - ; - if(c=='E' && C=='N') + while(C1==' ' || c=='\t') + ; + if(c=='E' && C1=='N') { - SKIP; - if(msflag && dflg){ - putchar('x'); - putchar(' '); - if(last){putchar('.'); putchar(' '); } - } - return; + SKIP; + if(msflag && dflg){ + putchar('x'); + putchar(' '); + if(last){ + putchar(last); + putchar('\n'); + } + } + return; } } - else if(c == 'd') /* look for delim */ + else if(c == 'd') /* look for delim */ { - if(C=='e' && C=='l') - if( C=='i' && C=='m') - { - while(C1 == ' '); - if((c1=c)=='\n' || (c2=C1)=='\n' - || (c1=='o' && c2=='f' && C1=='f') ) + if(C1=='e' && C1=='l') + if( C1=='i' && C1=='m') { - ldelim = NOCHAR; - rdelim = NOCHAR; + while(C1 == ' '); + if((c1=c)=='\n' || (c2=C1)=='\n' + || (c1=='o' && c2=='f' && C1=='f') ) + { + ldelim = NOCHAR; + rdelim = NOCHAR; + } + else { + ldelim = c1; + rdelim = c2; + } } - else { - ldelim = c1; - rdelim = c2; - } - } dflg = 0; } - if(c != '\n') while(C != '\n'){ if(c == '.')last=1; else last=0; } + if(c != '\n') while(C1 != '\n'){ + if(chars[c] == PUNCT)last = c; + else if(c != ' ')last = 0; + } } } - backsl() /* skip over a complete backslash construction */ { -int bdelim; + int bdelim; -sw: switch(C) +sw: + switch(C) { case '"': SKIP; @@ -562,18 +662,27 @@ sw: switch(C) while(C>='0' && c<='9') ; ungetc(c,infile); c = '0'; - } + } --lp; return; case 'f': case 'n': - case 'k': case '*': if(C != '(') return; case '(': + if(msflag){ + if(C == 'e'){ + if(C == 'm'){ + *lp = '-'; + return; + } + } + else if(c != '\n')C; + return; + } if(C != '\n') C; return; @@ -609,19 +718,19 @@ sw: switch(C) char *copys(s) register char *s; { -register char *t, *t0; + register char *t, *t0; -if( (t0 = t = calloc( strlen(s)+1, sizeof(*t) ) ) == NULL) - fatal("Cannot allocate memory", (char *) NULL); + if( (t0 = t = calloc( (unsigned)(strlen(s)+1), sizeof(*t) ) ) == NULL) + fatal("Cannot allocate memory", (char *) NULL); -while( *t++ = *s++ ) - ; -return(t0); + while( *t++ = *s++ ) + ; + return(t0); } sce(){ -register char *ap; -register int n, i; -char a[10]; + register char *ap; + register int n, i; + char a[10]; for(ap=a;C != '\n';ap++){ *ap = c; if(ap == &a[9]){ @@ -637,12 +746,19 @@ char a[10]; if(C == 'c'){ if(C == 'e'){ while(C == ' '); - if(c == '0')break; + if(c == '0'){ + SKIP; + break; + } else SKIP; } else SKIP; } - else SKIP; + else if(c == 'P' || C == 'P'){ + if(c != '\n')SKIP; + break; + } + else if(c != '\n')SKIP; } else { SKIP; @@ -650,3 +766,65 @@ char a[10]; } } } +refer(c1) +{ + register int c2; + if(c1 != '\n') + SKIP; + while(1){ + if(C != '.') + SKIP; + else { + if(C != ']') + SKIP; + else { + while(C != '\n') + c2=c; + if(chars[c2] == PUNCT)putchar(c2); + return; + } + } + } +} +inpic(){ + register int c1; + register char *p1; + SKIP; + p1 = line; + c = '\n'; + while(1){ + c1 = c; + if(C == '.' && c1 == '\n'){ + if(C != 'P'){ + if(c == '\n')continue; + else { SKIP; c='\n'; continue;} + } + if(C != 'E'){ + if(c == '\n')continue; + else { SKIP; c='\n';continue; } + } + SKIP; + return; + } + else if(c == '\"'){ + while(C != '\"'){ + if(c == '\\'){ + if(C == '\"')continue; + ungetc(c,infile); + backsl(); + } + else *p1++ = c; + } + *p1++ = ' '; + } + else if(c == '\n' && p1 != line){ + *p1 = '\0'; + if(wordflag)putwords(NO); + else { + puts(line); + putchar('\n'); + } + p1 = line; + } + } +} diff --git a/usr/src/usr.bin/diction/Makefile b/usr/src/usr.bin/diction/Makefile index fd9789ddb3..96de4d8061 100644 --- a/usr/src/usr.bin/diction/Makefile +++ b/usr/src/usr.bin/diction/Makefile @@ -1,5 +1,5 @@ # -# @(#)Makefile 4.1 (Berkeley) 82/11/06 +# @(#)Makefile 4.2 (Berkeley) 82/11/06 # # make file for STYLE and DICTION # @@ -11,7 +11,7 @@ BIN = ${DESTDIR}/usr/lib DICT= -DDICT=\"/usr/lib/dict.d\" -all: diction style explain deroff +all: diction style suggest deroff style: style1 style2 style3 install: style diction @@ -27,6 +27,8 @@ install: style diction install -c explain ${DESTDIR}/usr/bin install -c dict.d ${DESTDIR}/usr/lib install -c explain.d ${DESTDIR}/usr/lib +# ln ${DESTDIR}/usr/bin/explain ${DESTDIR}/usr/bin/suggest +# ln ${DESTDIR}/usr/lib/explain.d ${DESTDIR}/usr/bin/suggest.d clean: rm -f prtlex.c wdslex.c endlex.c @@ -46,7 +48,7 @@ outp.o: names.h conp.h style.h style1: wdslex.o cc wdslex.o -ll -o style1 -wdslex.o: wdslex.c nhash.c dict.c ydict.c names.h +wdslex.o: wdslex.c nhash.c dict.c ydict.c names.h abbrev.c cc -c wdslex.c wdslex.c: nwords.l lex nwords.l @@ -64,5 +66,5 @@ diction: dprog dprog: diction.c cc -O $(DICT) diction.c -i -o dprog -deroff: deroff.c - cc -O deroff.c -i -o deroff +deroff: deroff.c getopt.c + cc -O deroff.c getopt.c -i -o deroff diff --git a/usr/src/usr.bin/diction/README b/usr/src/usr.bin/diction/README index 1c333e52eb..557bde54b9 100644 --- a/usr/src/usr.bin/diction/README +++ b/usr/src/usr.bin/diction/README @@ -1,52 +1,64 @@ -@(#)README 4.1 (Berkeley) 82/11/06" +@(#)README 4.2 (Berkeley) 82/11/06" The writing tools package contains 3 command files, - style, diction, and explain. + style, diction, and suggest. ``style'' reads a document and analyzes the surface characteristics of the writing style of the document. ``diction'' locates all sentences in a document containing commonly misused or wordy phrases. -``explain'' is an interactive thesaures for the phrases found by diction. +``suggest'' is an interactive thesaures for the phrases found by diction. + suggest was originally named ``explain''. -The manual pages are in files style.1 and diction.1 - see the enclosed documentation - for more details. +The manual pages are in files style.1 and diction.1 - see the enclosed + documentation for more details. ``style'' is a runcom that runs 4 programs, style1, style2, style3, and deroff. - style1, style2, style3 are included on the tape, you should not need to - make them, but if you do - -To make style1, style2, style3: - set BIN in Makefile to the directory where the object files - are to be stored. +To make style: + set LIB in Makefile to the directory where the object files + are to be stored. (probably /usr/lib/style) + set BIN in Makefile to the directory for the shell programs + (usually /usr/bin) + in the file ``style'' set L to the same directory as LIB above. + the same is needed in ``topic'', which is installed + when you make style + if you wish to collect data on the documents run, also define SCATCH + otherwise delete the definition of SCATCH in Makefile + if you are collecting data, the file must exist and be writeable + by everyone then run ``make style'' - in the file ``style'' set B to the same directory as BIN above. + this will compile and install all of the programs in both + LIB and BIN ``diction'' is a runcom that runs 2 programs, dprog and deroff. - dprog has been included on the tape, but you will need to make it - to include the full path name of dict.d To make dprog: set DICT in Makefile to the full path name of diction's data file ``dict.d'' + the file ``diction'' also needs L set to the same directory as LIB the program ``dprog'' will get put in the same place as - the object files for style (BIN). + the object files for style (LIB). + if you wish to collect data on the documents run, also define CATCH + otherwise delete the definition of CATCH in Makefile + the collection file must exist and be writeable by everyone then run ``make diction'' - the file ``diction'' also needs B set to the same directory as BIN - above -The file ``explain'' needs D set to the full path name of the file - explain.d +The file ``suggest'' needs D set to the full path name of the file + suggest.d Both ``style'' and ``diction'' expect the document to contain formatting commands. A new version of deroff is included that knows about the standard UNIX formatting macro packages. - the command ``make deroff'' will install deroff in BIN mentioned - above. You may wish to install this version of deroff in /usr/bin - in which case the path names to it in files ``style'' and - ``diction'' should be changed accordingly. + the source for getopt.c is included just in case you don't have it. + if the loader complains about not finding getopt, compile & load + it with deroff.c + typing + make deroff + will compile and install it in /usr/bin -The default flag passed to deroff (either -ms or -ml) in both +The default flag passed to deroff (either -ms or -mm) in both style and diction should be set to the proper one for your installation. -These programs will not compile under UNIX version 6 or PWB 1.2 due - to size limitations on these systems. +These programs will not compile under UNIX version 6 or PWB 1.2 + because of size limitations on these systems. + diff --git a/usr/src/usr.bin/diction/diction/dict.d b/usr/src/usr.bin/diction/diction/dict.d index 1cea532b9d..186fb90635 100644 --- a/usr/src/usr.bin/diction/diction/dict.d +++ b/usr/src/usr.bin/diction/diction/dict.d @@ -1,14 +1,16 @@ + ing behavior ability to a great deal of majority of number of +~the number of a tendency to abovementioned absolutely complete absolutely essential accordingly - activate + activate actual added increments adequate enough @@ -66,7 +68,7 @@ ing behavior but what by means of by the use of - capability + capability carry out center portion center around @@ -173,7 +175,7 @@ ing behavior generally agreed rise to an indication of - an order of magnitude + order of magnitude authorization gratuitous greatly minimize @@ -190,7 +192,7 @@ ing behavior if at all possible it stands to reason impact - implement + implement important essentials importantly in a large measure @@ -247,9 +249,11 @@ ing behavior in violation of inasmuch as indicate + indicating indicative of initialize - initiate + initiate + initiating injurious to inquire inside of @@ -269,7 +273,7 @@ ing behavior not be un ~ not under ~ not until -~ not unique +~ not unique joint cooperation joint partnership just exactly @@ -440,6 +444,7 @@ ing behavior up to this time upshot utilize + utilizing very complete very very unique @@ -454,7 +459,7 @@ ing behavior ~ through which ~ without which ~ which -~ at which +~ at which ~ about which ~ by which ~ on which @@ -470,6 +475,8 @@ ing behavior ~likewise ~otherwise ~clockwise +~unwise +~ wise wise with a view to with reference to @@ -509,3 +516,4 @@ wise from the standpoint of employ as shown in +~ rather than diff --git a/usr/src/usr.bin/diction/diction/diction.1 b/usr/src/usr.bin/diction/diction/diction.1 index 0f13f7f11c..5a27a5182a 100644 --- a/usr/src/usr.bin/diction/diction/diction.1 +++ b/usr/src/usr.bin/diction/diction/diction.1 @@ -1,5 +1,5 @@ .\" -.\" @(#)diction.1 4.1 (Berkeley) 82/11/06 +.\" @(#)diction.1 4.2 (Berkeley) 82/11/06 .\" .TH DICTION 1 .SH NAME diff --git a/usr/src/usr.bin/diction/diction/diction.sh b/usr/src/usr.bin/diction/diction/diction.sh index 9549c61a48..a9bd8b65ed 100644 --- a/usr/src/usr.bin/diction/diction/diction.sh +++ b/usr/src/usr.bin/diction/diction/diction.sh @@ -1,6 +1,6 @@ #! /bin/sh # -# @(#)diction.sh 4.1 (Berkeley) 82/11/06 +# @(#)diction.sh 4.2 (Berkeley) 82/11/06 # B=/usr/lib echo $* @@ -8,7 +8,7 @@ rest= flag= nflag= mflag=-mm -lflag= +lflag= -ml file= for i do case $i in diff --git a/usr/src/usr.bin/diction/diction/dprog.c b/usr/src/usr.bin/diction/diction/dprog.c index 836e5b2197..4ef39e225c 100644 --- a/usr/src/usr.bin/diction/diction/dprog.c +++ b/usr/src/usr.bin/diction/diction/dprog.c @@ -1,9 +1,9 @@ #ifndef lint -static char sccsid[] = "@(#)dprog.c 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)dprog.c 4.2 (Berkeley) 82/11/06"; #endif not lint /* -* diction -- print all sentences containing one of default phrases + * diction -- print all sentences containing one of default phrases * * status returns: * 0 - ok, and some matches @@ -16,6 +16,9 @@ static char sccsid[] = "@(#)dprog.c 4.1 (Berkeley) 82/11/06"; #define MAXSIZ 6500 #define QSIZE 650 +int linemsg; +long olcount; +long lcount; struct words { char inp; char out; @@ -24,28 +27,56 @@ struct words { struct words *fail; } w[MAXSIZ], *smax, *q; +char table[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, ' ', 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ' ', ' ', ' ', ' ', ' ', '.', + ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', + ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' + }; +int caps = 0; +int lineno = 0; int fflag; int nflag = 1; /*use default file*/ char *filename; +int mflg = 0; /*don't catch output*/ int nfile; int nsucc; long nsent = 0; long nhits = 0; char *nlp; char *begp, *endp; +int beg, last; +char *myst; +int myct = 0; int oct = 0; FILE *wordf; +FILE *mine; char *argptr; +long tl = 0; +long th = 0; main(argc, argv) -char **argv; +char *argv[]; { + int sv; while (--argc > 0 && (++argv)[0][0]=='-') switch (argv[0][1]) { case 'f': fflag++; - filename = ++argv; + filename = (++argv)[0]; argc--; continue; @@ -53,6 +84,13 @@ char **argv; nflag = 0; continue; case 'd': + mflg=0; + continue; + case 'c': + caps++; + continue; + case 'l': + lineno++; continue; default: fprintf(stderr, "diction: unknown flag\n"); @@ -67,13 +105,24 @@ out: } } else { - wordf = fopen(*filename,"r"); + wordf = fopen(filename,"r"); if(wordf == NULL){ fprintf(stderr,"diction: can't open %s\n",filename); exit(2); } } +#ifdef CATCH + if(fopen(CATCH,"r") != NULL) + if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; +#endif +#ifdef MACS + if(caps){ + printf(".so "); + printf(MACS); + printf("\n"); + } +#endif cgotofn(); cfail(); nfile = argc; @@ -82,9 +131,20 @@ out: } else while (--argc >= 0) { execute(*argv); + if(lineno){ + printf("file %s: number of lines %ld number of phrases found %ld\n", + *argv, lcount-1, nhits); + tl += lcount-1; + th += nhits; + sv = lcount-1; + lcount = nhits = 0; + } argv++; } - printf("number of sentences %ld number of hits %ld\n",nsent,nhits); + if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); + if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); + else if(tl != sv) + if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); exit(nsucc == 0); } @@ -94,6 +154,8 @@ char *file; register char *p; register struct words *c; register ccount; + int count1; + char *beg1; struct words *savc; char *savp; int savct; @@ -101,6 +163,7 @@ char *file; char buf[1024]; int f; int hit; + last = 0; if (file) { if ((f = open(file, 0)) < 0) { fprintf(stderr, "diction: can't open %s\n", file); @@ -108,24 +171,31 @@ char *file; } } else f = 0; + lcount = olcount = 1; + linemsg = 1; ccount = 0; + count1 = -1; p = buf; nlp = p; c = w; oct = hit = 0; - savc = savp = 0; + savc = (struct words *) 0; + savp = (char *) 0; for (;;) { - if (--ccount <= 0) { + if(--ccount <= 0) { if (p == &buf[1024]) p = buf; if (p > &buf[512]) { if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; } else if ((ccount = read(f, p, 512)) <= 0) break; - convert(p,ccount); + if(caps && (count1 > 0)) + fwrite(beg1,sizeof(*beg1),count1,stdout); + count1 = ccount; + beg1 = p; } if(p == &buf[1024])p=buf; nstate: - if (c->inp == *p) { + if (c->inp == table[*p]) { c = c->nst; } else if (c->link != 0) { @@ -138,14 +208,15 @@ char *file; p=savp; if(ccount > savct)ccount += savct; else ccount = savct; - savc=savp=0; + savc = (struct words *) 0; + savp = (char *) 0; goto hadone; } c = c->fail; if (c==0) { c = w; istate: - if (c->inp == *p) { + if (c->inp == table[*p]) { c = c->nst; } else if (c->link != 0) { @@ -156,7 +227,7 @@ char *file; else goto nstate; } if(c->out){ - if((c->inp == *(p+1)) && (c->nst != 0)){ + if((c->inp == table[*(p+1)]) && (c->nst != 0)){ savp=p; savc=c; savct=ccount; @@ -165,7 +236,7 @@ char *file; else if(c->link != 0){ savc=c; while((savc=savc->link)!= 0){ - if(savc->inp == *(p+1)){ + if(savc->inp == table[*(p+1)]){ savp=p; savc=c; savct=ccount; @@ -174,7 +245,8 @@ char *file; } } hadone: - savc=savp=0; + savc = (struct words *) 0; + savp = (char *) 0; if(c->out == (char)(0377)){ c=w; goto nstate; @@ -182,26 +254,41 @@ char *file; begp = p - (c->out); if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); endp=p; + if(mflg){ + if(begp-20 < &buf[0]){ + myst = &buf[1024]-20; + if(nlp < &buf[512])myst=nlp; + } + else myst = begp-20; + if(myst < nlp)myst = nlp; + beg = 0; + } hit = 1; nhits++; - if (*p++ == '.') { + if(*p == '\n')lcount++; + if (table[*p++] == '.') { + linemsg = 1; if (--ccount <= 0) { if (p == &buf[1024]) p = buf; if (p > &buf[512]) { if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; } else if ((ccount = read(f, p, 512)) <= 0) break; - convert(p,ccount); + if(caps && (count1 > 0)) + fwrite(beg1,sizeof(*beg1),count1,stdout); + count1=ccount; + beg1=p; } } succeed: nsucc = 1; { if (p <= nlp) { - outc(&buf[1024]); + outc(&buf[1024],file); nlp = buf; } - outc(p); + outc(p,file); } + if(mflg)last=1; nomatch: nlp = p; c = w; @@ -209,22 +296,32 @@ char *file; continue; } cont: - if (*p++ == '.'){ + if(*p == '\n')lcount++; + if (table[*p++] == '.'){ if(hit){ if(p <= nlp){ - outc(&buf[1024]); + outc(&buf[1024],file); nlp = buf; } - outc(p); - putchar('\n'); putchar('\n'); + outc(p,file); + if(!caps)printf("\n\n"); + if(mflg && last){putc('\n',mine);myct = 0;} } + linemsg = 1; + if(*p == '\n')olcount = lcount+1; + else + olcount=lcount; + last = 0; hit = 0; oct = 0; nlp = p; c = w; begp = endp = 0; + nsent++; } } + if(caps && (count1 > 0)) + fwrite(beg1,sizeof(*beg1),count1,stdout); close(f); } @@ -236,9 +333,9 @@ getargc() fclose(wordf); if(nflag && fflag){ nflag=0; - wordf=fopen(*filename,"r"); + wordf=fopen(filename,"r"); if(wordf == NULL){ - fprintf("can't open %s\n",filename); + fprintf("diction can't open %s\n",filename); exit(2); } return(getc(wordf)); @@ -362,36 +459,76 @@ init: if ((s->inp) != 0) { s->inp,s->out,s->nst,s->link,s->fail); */ } -convert(p,ccount) -char *p; -{ - int ct; - char *pt; - for(pt=p,ct=ccount;--ct>=0;pt++){ - if(isupper(*pt))*pt=tolower(*pt); - else if(isspace(*pt))*pt=' '; - else if(*pt=='.' || *pt=='?'||*pt=='!'){ - *pt='.'; - nsent++; - } - else if(ispunct(*pt))*pt=' '; - } -} -outc(addr) +outc(addr,file) char *addr; +char *file; { + int inside; + inside = 0; + if(!caps && lineno && linemsg){ + printf("beginning line %ld",olcount); + if(file != (char *)NULL)printf(" %s\n",file); + else printf("\n"); + linemsg = 0; + } while(nlp < addr){ - if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){ + if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ oct=0; putchar('\n'); } if(nlp == begp){ - putchar('['); + if(caps)inside++; + else { + if( oct >45){putchar('\n'); + oct=0; + } + if( oct==0 || table[*nlp] != ' '){ + printf("*["); + oct+=2; + } + else {printf(" *[");; + oct+=3; + } + } + if(mflg)putc('[',mine); + } + if(inside){ + if(islower(*nlp))*nlp = toupper(*nlp); + } + else { + if(!caps && *nlp == '\n')*nlp = ' '; + if(*nlp == ' ' && oct==0); + else if(!caps) {putchar(*nlp); oct++;} } - putchar(*nlp); if(nlp == endp){ - putchar(']'); + if(caps) + inside= 0; + else { + if(*(nlp) != ' '){printf("]*"); + oct+=2; + } + else {printf("]* "); + oct+=3; + } + if(oct >60){putchar('\n'); + oct=0; + } + } + if(mflg)putc(']',mine); + beg = 0; + } + if(mflg){ + if(nlp == myst)beg = 1; + if(beg || last){ + putc(*nlp,mine); + if(myct++ >= 72 || last == 20){ + putc('\n',mine); + if(last == 20)last=myct=0; + else myct=0; + } + if(last)last++; + } } nlp++; } diff --git a/usr/src/usr.bin/diction/explain/explain.d b/usr/src/usr.bin/diction/explain/explain.d index 33187361b0..a7e4ce4045 100644 --- a/usr/src/usr.bin/diction/explain/explain.d +++ b/usr/src/usr.bin/diction/explain/explain.d @@ -1,3 +1,4 @@ + exhibit filing behavior file which "that" when clause is restrictive in accordance with your request as you requested diff --git a/usr/src/usr.bin/diction/explain/explain.sh b/usr/src/usr.bin/diction/explain/explain.sh index 3b19cfc472..32b1277139 100644 --- a/usr/src/usr.bin/diction/explain/explain.sh +++ b/usr/src/usr.bin/diction/explain/explain.sh @@ -1,6 +1,6 @@ #! /bin/sh # -# @(#)explain.sh 4.1 (Berkeley) 82/11/06 +# @(#)explain.sh 4.2 (Berkeley) 82/11/06 # trap 'rm $$; exit' 1 2 3 15 D=/usr/lib/explain.d diff --git a/usr/src/usr.bin/diction/style/style.sh b/usr/src/usr.bin/diction/style/style.sh index 63bf5021ac..975285b680 100644 --- a/usr/src/usr.bin/diction/style/style.sh +++ b/usr/src/usr.bin/diction/style/style.sh @@ -1,29 +1,41 @@ #! /bin/sh # -# @(#)style.sh 4.1 (Berkeley) 82/11/06 +# @(#)style.sh 4.2 (Berkeley) 82/11/06 # -B=/usr/lib -echo $* +L=/usr/lib/style +C=/usr/llc/collect/scatch +if test -w $C +then +echo $HOME $* `date` >>$C 2>/dev/null +fi +echo " " $* sflag=-s +eflag= +Pflag= +nflag= lflag= lcon= rflag= rcon= mflag=-ms -mlflag= -eflag= -Pflag= +mlflag=-ml for i in $* do case $i in -r) rflag=-r; shift; rcon=$1;shift;continue;; -l)lflag=-l; shift; lcon=$1;shift;continue;; -mm) mflag=-mm;shift;continue;; -ms) mflag=-ms;shift;continue;; --ml) mlflag=-ml;shift;continue;; +-li|-ml) mlflag=-ml;shift;continue;; ++li|-tt)mlflag=;shift;continue;; -p) sflag=-p;shift;continue;; -a) sflag=-a;shift;continue;; -e) eflag=-e;shift;continue;; -P) Pflag=-P;shift;continue;; +-n) nflag=-n;shift;continue;; +-N) nflag=-N;shift;continue;; +-flags) echo $0 "[-flags] [-r num] [-l num] [-e] [-p] [-n] [-N] [-a] [-P] [-mm|-ms] [-li|+li] [file ...]";exit;; +-*) echo unknown style flag $i; exit;; +*) break;; esac done -$B/deroff $mflag $mlflag $*^$B/style1^$B/style2^$B/style3 $rflag $rcon $lflag $lcon $sflag $eflag $Pflag +deroff $mflag $mlflag $*^$L/style1^$L/style2^$L/style3 $rflag $rcon $lflag $lcon $sflag $nflag $eflag $Pflag diff --git a/usr/src/usr.bin/diction/style1/names.h b/usr/src/usr.bin/diction/style1/names.h index c7a7aad583..7d6961a12a 100644 --- a/usr/src/usr.bin/diction/style1/names.h +++ b/usr/src/usr.bin/diction/style1/names.h @@ -1,5 +1,5 @@ /* - * @(#)names.h 4.1 (Berkeley) 82/11/06 + * @(#)names.h 4.2 (Berkeley) 82/11/06 */ #define ART 'A' @@ -21,6 +21,7 @@ #define POS 'O' #define SUBCONJ 'S' #define NOUN 'N' +#define NOM 'm' #define VERB 'V' #define ADJ 'J' #define ADV 'D' @@ -42,5 +43,7 @@ #define AUXX 'Q' #define NV_PL 'n' #define INTER 'i' +#define PNOUN 's' +#define MVERB 'v' #define PLURAL 1 #define SING 2 diff --git a/usr/src/usr.bin/diction/style1/nhash.c b/usr/src/usr.bin/diction/style1/nhash.c index b4e7e8e7fb..361a18b357 100644 --- a/usr/src/usr.bin/diction/style1/nhash.c +++ b/usr/src/usr.bin/diction/style1/nhash.c @@ -1,5 +1,5 @@ #ifndef lint -static char sccsid[] = "@(#)nhash.c 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)nhash.c 4.2 (Berkeley) 82/11/06"; #endif not lint struct dict { diff --git a/usr/src/usr.bin/diction/style1/style1.l b/usr/src/usr.bin/diction/style1/style1.l index a994cebe1a..9fbc6deeae 100644 --- a/usr/src/usr.bin/diction/style1/style1.l +++ b/usr/src/usr.bin/diction/style1/style1.l @@ -2,10 +2,11 @@ /* break out words, output cap + word(inverted) */ #ifndef lint -static char sccsid[] = "@(#)style1.l 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)style1.l 4.2 (Berkeley) 82/11/06"; #endif not lint #include +#include #define OUT() for(i=yyleng-1;i>=0; i--)putchar(yytext[i]); putchar('\n') #define OUT1(nam) printf("%c:%s\n",nam,yytext) #define OUTN(string) printf("%s\n",string) @@ -13,9 +14,11 @@ static char sccsid[] = "@(#)style1.l 4.1 (Berkeley) 82/11/06"; #include "nhash.c" #include "dict.c" #include "ydict.c" +#include "abbrev.c" char nt[] = "D:n't"; char qs[] = "c:'s"; char fin[] = "E:."; +int NOCAPS = 0; /* if set all caps are turned to lower case */ int i,j; int dot = 0; int first = 1; @@ -23,39 +26,75 @@ int qflg,nflg; int cap = 0; %} %p 3000 -%a 2500 +%a 3300 +%o 4500 L [a-z] N [0-9] C [A-Z] +A [a-zA-Z] +P [a-zA-Z0-9] %% -(St|Dr|Drs|Mr|Mrs|Ms)"." { +^[.!].+[\n] { + if(dot){ + OUTN(fin); + dot = 0; + first = 1; + } + printf(":%s",yytext); + } +May { + if(first == 0){ + OUT1(NOUN); + } + else { + first = 0; + yytext[0] = tolower(yytext[0]); + cap = 1; + goto wd; + } + } +"U.S." { OUT1(NOUN); } {C}{L}*'[s] { pos(1); if(first==1)first=0; } -{C}+['][s]* { +{C}+['][s] { + if(NOCAPS) + for(i=0;i1) { if(freopen(argv[1],"r",stdin)==NULL) { @@ -430,9 +477,24 @@ char *argv[]; }else{ filename=argv[1]; yylex(); + OUTN(fin); } argc--; argv++; } } return(rc); } +ahead(){ + register int c; + if(isalnum((c=input()))){ + yytext[yyleng++] = '.'; + while(!isspace((c=input() ))) + yytext[yyleng++] = c; + yytext[yyleng] = '\0'; + unput(c); + return(1); + } + unput(c); + unput('.'); + return(0); +} diff --git a/usr/src/usr.bin/diction/style1/ydict.c b/usr/src/usr.bin/diction/style1/ydict.c index 9c89ecb160..6860cbe364 100644 --- a/usr/src/usr.bin/diction/style1/ydict.c +++ b/usr/src/usr.bin/diction/style1/ydict.c @@ -1,5 +1,5 @@ #ifndef lint -static char sccsid[] = "@(#)ydict.c 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)ydict.c 4.2 (Berkeley) 82/11/06"; #endif not lint struct dict ary_d[] = { diff --git a/usr/src/usr.bin/diction/style2/edict.c b/usr/src/usr.bin/diction/style2/edict.c index b2290f5766..d11aac03a1 100644 --- a/usr/src/usr.bin/diction/style2/edict.c +++ b/usr/src/usr.bin/diction/style2/edict.c @@ -1,5 +1,5 @@ #ifndef lint -static char sccsid[] = "@(#)edict.c 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)edict.c 4.2 (Berkeley) 82/11/06"; #endif not lint struct dict able_d[] = { diff --git a/usr/src/usr.bin/diction/style2/style2.l b/usr/src/usr.bin/diction/style2/style2.l index 744ba510cc..dd324451e2 100644 --- a/usr/src/usr.bin/diction/style2/style2.l +++ b/usr/src/usr.bin/diction/style2/style2.l @@ -1,15 +1,18 @@ %{ #ifndef lint -static char sccsid[] = "@(#)style2.l 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)style2.l 4.2 (Berkeley) 82/11/06"; #endif not lint #include +#include #include "names.h" #include "ehash.c" #include "edict.c" #define OUT1(c) putchar(c); putchar(':'); for(i=yyleng-1;i>=0;)putchar(yytext[i--]) +#define POUT1(c) putchar(c); putchar(':'); for(i=yyleng-1;i>0;)putchar(yytext[i--]) int i; int nomin = 0; +int NOCAPS = 0; /*if set, all caps mapped to lower, plurals stripped*/ %} C [A-Z0-9a-z",;] @@ -26,6 +29,9 @@ L [a-zA-Z-] look(ed,2,ED); } ^draw.+ { + if(yytext[5] == 'E'){ + OUT1(NOUN_ADJ); + } OUT1(ADJ_ADV); } ^eca.+ { @@ -35,10 +41,10 @@ L [a-zA-Z-] look(ice,3,NOUN_VERB); } ^ecne.+ { - if(look(ence,4,NOUN))nomin++; + if(look(ence,4,NOM))nomin++; } ^ecna.+ { - if(look(ance,4,NOUN))nomin++; + if(look(ance,4,NOM))nomin++; } ^ee.+ { look(ee,2,NOUN); @@ -56,7 +62,7 @@ L [a-zA-Z-] look(cle,3,NOUN); } ^eru.+ { - if(look(ure,3,NOUN))nomin++; + look(ure,3,NOUN); } ^eti.+ { look(ite,3,NOUN); @@ -92,7 +98,7 @@ L [a-zA-Z-] OUT1(NV_PL); } ^noi.+ { - if(look(ion,3,NOUN))nomin++; + if(look(ion,3,NOM))nomin++; } ^pihs.+ { look(ship,4,NOUN); @@ -127,19 +133,36 @@ L [a-zA-Z-] ^su.+ { look(us,2,NOUN); } +^snoi.+ { + if(look(ion,4,PNOUN))nomin++; + } +^stnem.+ { + if(look(ment,5,PNOUN))nomin++; + } +^secne.+ { + if(look(ence,5,PNOUN))nomin++; + } +^secna.+ { + if(look(ance,5,PNOUN))nomin++; + } ^s.+ { - if(yytext[yyleng-1] >= 'A' && yytext[yyleng-1]<= 'Z'){ - OUT1(NOUN); + if(isupper(yytext[yyleng-1])){ + if(NOCAPS){ + yytext[yyleng-1] = tolower(yytext[yyleng-1]); + POUT1(PNOUN); + } + else { OUT1(PNOUN); } } else { - OUT1(NV_PL); + if(NOCAPS){POUT1(NV_PL);} + else{ OUT1(NV_PL); } } } ^tna.+ { look(ant,3,NOUN_ADJ); } ^tnem.+ { - if(look(ment,4,NOUN))nomin++; + if(look(ment,4,NOM))nomin++; } ^tse.+ { look(est,3,ADJ); @@ -151,7 +174,9 @@ L [a-zA-Z-] putchar(yytext[0]); } {L}+ { - if(yytext[yyleng-1] >= 'A' && yytext[yyleng-1] <= 'Z'){ + if(isupper(yytext[yyleng-1])){ + if(NOCAPS) + yytext[yyleng-1] = tolower(yytext[yyleng-1]); if((yytext[0] == 'n' || yytext[0] == 'l') && yytext[1] == 'a'){ OUT1(NOUN_ADJ); } @@ -163,9 +188,12 @@ L [a-zA-Z-] OUT1(UNK); } } -: { +^:[\n] { egetd(); } +^:.+[\n] { + printf("%s",yytext); + } %% look(f,n,nc) char (*f)(); @@ -176,12 +204,17 @@ char nc; char sch; in=0; sch=yytext[yyleng-1]; - if(sch>= 'A' && sch <= 'Z'){ - yytext[yyleng-1] = sch -'A' +'a'; + if(isupper(sch)){ + yytext[yyleng-1] = tolower(sch); in=1; } if((*f)(&yytext[n],1,0) != 0){ nn = (*f)(&yytext[n],1,0); + if(nc == PNOUN) + if(nn == NOUN_VERB){ + if(in == 1)nn=PNOUN; + else nn=NV_PL; + } ret = 0; } else { @@ -190,9 +223,14 @@ char nc; } if(in==1){ if(nn == NOUN_VERB)nn=NOUN; - yytext[yyleng-1]=sch; + if(!NOCAPS)yytext[yyleng-1]=sch; + } + if(nn==PNOUN && yytext[0] == 's' && NOCAPS){ + POUT1(nn); + } + else { + OUT1(nn); } - OUT1(nn); return(ret); } yywrap(){ diff --git a/usr/src/usr.bin/diction/style3/conp.h b/usr/src/usr.bin/diction/style3/conp.h index 3f1c2d030c..8e6c71183f 100644 --- a/usr/src/usr.bin/diction/style3/conp.h +++ b/usr/src/usr.bin/diction/style3/conp.h @@ -1,11 +1,12 @@ /* - * @(#)conp.h 4.1 (Berkeley) 82/11/06 + * @(#)conp.h 4.2 (Berkeley) 82/11/06 */ #define SLENG 250 #define SCHAR 1500 extern struct ss {char *sp,ic,cc; int leng;} sent[SLENG]; extern struct ss *sentp; extern comma,j,i; +extern int nsleng; extern question; int must; int be; diff --git a/usr/src/usr.bin/diction/style3/outp.c b/usr/src/usr.bin/diction/style3/outp.c index 014956338d..8ac17f6fc5 100644 --- a/usr/src/usr.bin/diction/style3/outp.c +++ b/usr/src/usr.bin/diction/style3/outp.c @@ -1,14 +1,17 @@ #ifndef lint -static char sccsid[] = "@(#)outp.c 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)outp.c 4.2 (Berkeley) 82/11/06"; #endif not lint +#include +#include #include "style.h" #include "names.h" #include "conp.h" char *names[] = { "noun", "verb", "interjection", "adjective", "adverb", "conjunction", "possessive", - "pronoun", "article", "preposition", "auxiliary", "tobe", "subordinate conjunction", "", "expletive" + "pronoun", "article", "preposition", "auxiliary", "tobe", "", "subordinate conjunction", "expletive" }; +extern int barebones; outp(){ struct ss *st; char *ssp; @@ -16,11 +19,31 @@ outp(){ int index, lverbc; int nn, sc, f, kk,comp, begsc; int conjf, verbf,lpas,bflg,lexp,olvb; + int nom; int infinf, ovflg; int lvowel,nlet; int imper; float rd; + extern FILE *deb; + extern int nosave; + if(barebones){ + for(sentp=sent;sentp->cc != END;sentp++) + printf("%s %c %c\n",sentp->sp,sentp->ic,sentp->cc); + printf("%s %c %c\n",sentp->sp,sentp->ic,sentp->cc); + return; + } + if(topic){ + for(sentp=sent;sentp->cc != END;sentp++){ + if(sentp->cc==ADJ && (sentp+1)->cc==NOUN){ + printf("%s ",sentp->sp); + sentp++; + printf("%s\n",sentp->sp); + } + else if(sentp->cc==NOUN)printf("%s\n",sentp->sp); + } + return; + } if(style){ nn = kk = 0; for(sentp=sent;sentp->cc != END;sentp++){ @@ -30,7 +53,7 @@ outp(){ if(nn < 4 && kk == 0)return; } - imper = lexp = lpas = index = lverbc = 0; + imper = lexp = lpas = index = lverbc = nom = 0; conjf = verbf = kk = nn = sc = comp = begsc = 0; bflg = olvb = infinf = ovflg = 0; nlet = 0; @@ -52,6 +75,12 @@ outp(){ case NOUN: spart ="noun"; if(f)index=0; + if((sentp->ic==NOM)||(sentp->ic==PNOUN && islower(*(sentp->sp)))){ + sentp->ic = NOM; + nom++; + if(nosave && (deb != NULL)) /* SAVE NOM */ + fprintf(deb,"%s\n",sentp->sp); + } if(*sentp->sp != 'x'){ noun++; numnonf++; @@ -80,7 +109,7 @@ outp(){ if(verbf == 0)verbf++; else if(conjf)comp++; } - if(bflg && sentp->ic == ED)lpas = ++passive; + if(bflg && sentp->ic == ED){lpas++; ++passive;} } break; case INTER: @@ -93,6 +122,11 @@ outp(){ if(f)index=3; adj++; numnonf++; + if(sentp->ic == NOM){ + nom++; + if(nosave && (deb != NULL)) /* SAVE NOM */ + fprintf(deb,"%s\n",sentp->sp); + } letnonf += sentp->leng; bflg = infinf = ovflg = 0; break; @@ -186,13 +220,6 @@ outp(){ if(part){ printf("%s %s\n",spart,sentp->sp); } - else if(pstyle){ - printf("%s ",sentp->sp); - if(kk == 15){ - printf("\n"); - kk=0; - } - } if(style){ ssp=sentp->sp; lvowel = 0; @@ -230,8 +257,11 @@ outp(){ sentp++; } if(part){ - printf(". %s\n",sentp->sp); + printf(". %s\n",sentp->sp); + if(sentno < MAXPAR && nn > 0) + leng[sentno++] = nn; } + if(nn == 0)return; numsent++; numlet += nlet; tverbc += lverbc; @@ -243,10 +273,14 @@ outp(){ else if(*(sentp->sp) == '/')icount++; else if(imper)icount++; if(rstyle||pstyle)rd = 4.71*((float)(nlet)/(float)(nn))+.5*(float)(nn)-21.43; - if((rstyle&& rd>=rthresh)||(lstyle&&nn >= lthresh)||(pastyle&&lpas)||(estyle&&lexp)){ + if(pstyle || + (rstyle&& rd>=rthresh)||(lstyle&&nn >= lthresh)||(pastyle&&lpas)||(estyle&&lexp) + || (nstyle && (nom > 1 || (nom && lpas)))|| (Nstyle && nom)){ if(!part){ for(st=sent, kk=0;st->cc != END;st++){ - printf("%s ",st->sp); + if(st->ic == NOM) + printf("*%s* ",st->sp); + else printf("%s ",st->sp); if(kk++ >= 15){ kk=0; printf("\n"); diff --git a/usr/src/usr.bin/diction/style3/part.l b/usr/src/usr.bin/diction/style3/part.l index c0de5691ab..d53d63b1d8 100644 --- a/usr/src/usr.bin/diction/style3/part.l +++ b/usr/src/usr.bin/diction/style3/part.l @@ -1,19 +1,25 @@ %{ #ifndef lint -static char sccsid[] = "@(#)part.l 4.1 (Berkeley) 82/11/06"; +static char sccsid[] = "@(#)part.l 4.2 (Berkeley) 82/11/06"; #endif not lint #include "style.h" #include "names.h" #include "conp.h" +FILE *deb; +int nosave = 1; int part = 0; +int barebones = 0; +int topic = 0; int style = 1; int pastyle = 0; int pstyle = 0; int lstyle = 0; int rstyle = 0; int estyle = 0; +int nstyle = 0; +int Nstyle = 0; int lthresh; int rthresh; int nomin; @@ -56,6 +62,7 @@ int noun = 0; long numlet = 0; int beg[15] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; int sleng[50]; +int nsleng = 0; int j,jj,i; int comma = 0; int cflg; @@ -64,13 +71,20 @@ int quote = 0; char *st; int initf = 0; int over = 1; +int nroff = 0; +int nrofflg = 0; +int leng[MAXPAR]; +int sentno= 0; %} C [A-DF-Za-z] %% ^{C}:.+ { collect: sentp->cc = sentp->ic = yytext[0]; + if(sentp->cc == NOM) + sentp->cc = NOUN; collect1: + nsleng++; sentp->leng = yyleng-2; sentp++->sp = sptr; if(sentp >= &sent[SLENG-1]){ @@ -119,6 +133,7 @@ sdone: if(yytext[2]=='?')question=1; else question=0; +fragment: jj=0; if(quote == 1 && sent[jj].cc == ED){ sent[jj].cc = VERB; @@ -152,6 +167,18 @@ sdone: if(*(st+1) == '"') if(*st == '?' || *st == '!')quote = 1; outp(); + nsleng = 0; + if(nroff){ + if(sentno > 0){ + printf(".SL \""); + for(i=0;icc = sentp->ic = END; + sentp++->sp = sptr; + *sptr++ = '.'; + *sptr++ = '\0'; + over = 1; + nroff = 1; + goto fragment; + } + if(sentno > 0){ + printf(".SL \""); + for(i=0;i 49)mg = 49; ml = avw - 4.5; if(ml <= 0)ml = 1; - else if(ml > 49)ml = 48; + else if(ml > 49)ml=48; gsum = lsum = 0; for(ii=0;ii<50;ii++){ if(ii < ml)lsum += sleng[ii]; @@ -227,9 +274,23 @@ yywrap(){ printf(" subject opener: noun (%d) pron (%d) pos (%d) adj (%d) art (%d) tot %3.0f%%\n", beg[0],beg[7],beg[6],beg[3],beg[8],sperc(ii)); printf(" prep %3.0f%% (%d) adv %3.0f%% (%d) \n",sperc(beg[9]),beg[9],sperc(beg[4]),beg[4]); - printf(" verb %3.0f%% (%d) ",sperc(beg[1]+beg[10]+beg[12]),beg[1]+beg[10]+beg[12]); + printf(" verb %3.0f%% (%d) ",sperc(beg[1]+beg[10]+beg[11]),beg[1]+beg[10]+beg[11]); printf(" sub_conj %3.0f%% (%d) conj %3.0f%% (%d)\n",sperc(beg[13]),beg[13],sperc(beg[5]),beg[5]); printf(" expletives %3.0f%% (%d)\n",sperc(beg[14]),beg[14]); +#ifdef SCATCH + if(nosave && (fopen(SCATCH,"r")) != NULL){ + if(((io=fopen(SCATCH,"a")) != NULL)){ + fprintf(io," read %4.1f %4.1f %4.1f %4.1f %4.1f\n",kindex, aindex, cindex, findex, fgrad); + fprintf(io," sentl %d %ld %4.2f %4.2f %d %d %ld %4.2f\n",numsent,numwds,avw,avl,qcount,icount,numnonf,snonf); + fprintf(io," l var %d %d %d %d %d\n",ml,lsum,mg,gsum,maxsent); + fprintf(io," t var %d %d %d %d\n",simple,complex,compound,compdx); + fprintf(io," verbs %d %d %d %d %d %d\n",tverbc,verbc,tobe,aux,infin,passive); + fprintf(io," ty %d %d %d %d %d %d %d\n",prepc,conjc,adv,noun,adj,pron,nomin); + fprintf(io," beg %d %d %d %d %d %d\n",beg[0],beg[7],beg[6],beg[3],beg[8],ii); + fprintf(io," sbeg %d %d %d %d %d %d\n",beg[9],beg[4],beg[1]+beg[10]+beg[11],beg[13],beg[5],beg[14]); + } + } +#endif } return(1); } @@ -259,6 +320,8 @@ char **argv; { while(--argc > 0 && (++argv)[0][0] == '-' ){ switch(argv[0][1]){ + case 'd': nosave = 0; + continue; case 's': style=1; continue; case 'p': pastyle=style=1; @@ -267,15 +330,30 @@ char **argv; continue; case 'e': estyle = style = 1; continue; + case 'n': nstyle = style = 1; + continue; + case 'N': Nstyle = style = 1; + continue; case 'l': style=lstyle=1; lthresh = atoi(*(++argv)); + argc--; continue; case 'r': style=rstyle=1; rthresh = atoi(*(++argv)); + argc--; continue; case 'P': part = 1; + style = 0; + continue; + case 'b': /* print bare bones info rje */ + barebones = 1; + style = 0; + continue; + case 'T': /*topic*/ + style = 0; + topic = 1; continue; default: fprintf(stderr,"unknown flag to part %s\n",*argv); @@ -283,5 +361,18 @@ char **argv; } argv++; } +#ifdef SNOM + if(fopen(SNOM,"r") != NULL){ + deb = fopen(SNOM,"a"); /* SAVE NOM*/ + } +#else + deb = NULL; +#endif yylex(); + if(nrofflg && sentno > 0){ + printf(".SL \""); + for(i=0;i