BSD 4 development
authorBill Joy <wnj@ucbvax.Berkeley.EDU>
Thu, 17 Jan 1980 07:25:49 +0000 (23:25 -0800)
committerBill Joy <wnj@ucbvax.Berkeley.EDU>
Thu, 17 Jan 1980 07:25:49 +0000 (23:25 -0800)
Work on file usr/src/cmd/diction/diction.c

Synthesized-from: CSRG//cd1/4.0

usr/src/cmd/diction/diction.c [new file with mode: 0644]

diff --git a/usr/src/cmd/diction/diction.c b/usr/src/cmd/diction/diction.c
new file mode 100644 (file)
index 0000000..2bafd49
--- /dev/null
@@ -0,0 +1,394 @@
+/*
+* diction -- print all sentences containing one of default phrases
+ *
+ *     status returns:
+ *             0 - ok, and some matches
+ *             1 - ok, but no matches
+ *             2 - some error
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+
+#define        MAXSIZ 6500
+#define QSIZE 650
+struct words {
+       char    inp;
+       char    out;
+       struct  words *nst;
+       struct  words *link;
+       struct  words *fail;
+} w[MAXSIZ], *smax, *q;
+
+int fflag;
+int nflag      = 1; /*use default file*/
+char *filename;
+int    nfile;
+int    nsucc;
+long nsent = 0;
+long nhits = 0;
+char *nlp;
+char *begp, *endp;
+int oct = 0;
+FILE   *wordf;
+char   *argptr;
+
+main(argc, argv)
+char **argv;
+{
+       while (--argc > 0 && (++argv)[0][0]=='-')
+               switch (argv[0][1]) {
+
+               case 'f':
+                       fflag++;
+                       filename = ++argv;
+                       argc--;
+                       continue;
+
+               case 'n':
+                       nflag = 0;
+                       continue;
+               case 'd':
+                       continue;
+               default:
+                       fprintf(stderr, "diction: unknown flag\n");
+                       continue;
+               }
+out:
+       if(nflag){
+               wordf = fopen(DICT,"r");
+               if(wordf == NULL){
+                       fprintf(stderr,"diction: can't open default dictionary\n");
+                       exit(2);
+               }
+       }
+       else {
+               wordf = fopen(*filename,"r");
+               if(wordf == NULL){
+                       fprintf(stderr,"diction: can't open %s\n",filename);
+                       exit(2);
+               }
+       }
+
+       cgotofn();
+       cfail();
+       nfile = argc;
+       if (argc<=0) {
+               execute((char *)NULL);
+       }
+       else while (--argc >= 0) {
+               execute(*argv);
+               argv++;
+       }
+       printf("number of sentences %ld number of hits %ld\n",nsent,nhits);
+       exit(nsucc == 0);
+}
+
+execute(file)
+char *file;
+{
+       register char *p;
+       register struct words *c;
+       register ccount;
+       struct words *savc;
+       char *savp;
+       int savct;
+       int scr;
+       char buf[1024];
+       int f;
+       int hit;
+       if (file) {
+               if ((f = open(file, 0)) < 0) {
+                       fprintf(stderr, "diction: can't open %s\n", file);
+                       exit(2);
+               }
+       }
+       else f = 0;
+       ccount = 0;
+       p = buf;
+       nlp = p;
+       c = w;
+       oct = hit = 0;
+       savc = savp = 0;
+       for (;;) {
+               if (--ccount <= 0) {
+                       if (p == &buf[1024]) p = buf;
+                       if (p > &buf[512]) {
+                               if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
+                       }
+                       else if ((ccount = read(f, p, 512)) <= 0) break;
+                       convert(p,ccount);
+               }
+               if(p == &buf[1024])p=buf;
+               nstate:
+                       if (c->inp == *p) {
+                               c = c->nst;
+                       }
+                       else if (c->link != 0) {
+                               c = c->link;
+                               goto nstate;
+                       }
+                       else {
+                               if(savp != 0){
+                                       c=savc;
+                                       p=savp;
+                                       if(ccount > savct)ccount += savct;
+                                       else ccount = savct;
+                                       savc=savp=0;
+                                       goto hadone;
+                               }
+                               c = c->fail;
+                               if (c==0) {
+                                       c = w;
+                                       istate:
+                                       if (c->inp == *p) {
+                                               c = c->nst;
+                                       }
+                                       else if (c->link != 0) {
+                                               c = c->link;
+                                               goto istate;
+                                       }
+                               }
+                               else goto nstate;
+                       }
+               if(c->out){
+                       if((c->inp == *(p+1)) && (c->nst != 0)){
+                               savp=p;
+                               savc=c;
+                               savct=ccount;
+                               goto cont;
+                       }
+                       else if(c->link != 0){
+                               savc=c;
+                               while((savc=savc->link)!= 0){
+                                       if(savc->inp == *(p+1)){
+                                               savp=p;
+                                               savc=c;
+                                               savct=ccount;
+                                               goto cont;
+                                       }
+                               }
+                       }
+               hadone:
+                       savc=savp=0;
+                       if(c->out == (char)(0377)){
+                               c=w;
+                               goto nstate;
+                       }
+                       begp = p - (c->out);
+                       if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
+                       endp=p;
+                       hit = 1;
+                       nhits++;
+                       if (*p++ == '.') {
+                               if (--ccount <= 0) {
+                                       if (p == &buf[1024]) p = buf;
+                                       if (p > &buf[512]) {
+                                               if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
+                                       }
+                                       else if ((ccount = read(f, p, 512)) <= 0) break;
+                                       convert(p,ccount);
+                               }
+                       }
+       succeed:        nsucc = 1;
+                       {
+                               if (p <= nlp) {
+                                       outc(&buf[1024]);
+                                       nlp = buf;
+                               }
+                               outc(p);
+                       }
+       nomatch:
+                       nlp = p;
+                       c = w;
+                       begp = endp = 0;
+                       continue;
+               }
+       cont:
+               if (*p++ == '.'){
+                               if(hit){
+                                       if(p <= nlp){
+                                               outc(&buf[1024]);
+                                               nlp = buf;
+                                       }
+                                       outc(p);
+                                       putchar('\n'); putchar('\n');
+                                       }
+                               hit = 0;
+                               oct = 0;
+                               nlp = p;
+                               c = w;
+                               begp = endp = 0;
+                       }
+       }
+       close(f);
+}
+
+getargc()
+{
+       register c;
+       if (wordf){
+               if((c=getc(wordf))==EOF){
+                       fclose(wordf);
+                       if(nflag && fflag){
+                               nflag=0;
+                               wordf=fopen(*filename,"r");
+                               if(wordf == NULL){
+                                       fprintf("can't open %s\n",filename);
+                                       exit(2);
+                               }
+                               return(getc(wordf));
+                       }
+                       else return(EOF);
+               }
+               else return(c);
+       }
+       if ((c = *argptr++) == '\0')
+               return(EOF);
+       return(c);
+}
+
+cgotofn() {
+       register c;
+       register struct words *s;
+       register ct;
+       int neg;
+
+       s = smax = w;
+       neg = ct = 0;
+nword: for(;;) {
+               c = getargc();
+               if(c == '~'){
+                       neg++;
+                       c = getargc();
+               }
+               if (c==EOF)
+                       return;
+               if (c == '\n') {
+                       if(neg)s->out = 0377;
+                       else s->out = ct-1;
+                       neg = ct = 0;
+                       s = w;
+               } else {
+               loop:   if (s->inp == c) {
+                               s = s->nst;
+                               ct++;
+                               continue;
+                       }
+                       if (s->inp == 0) goto enter;
+                       if (s->link == 0) {
+                               if (smax >= &w[MAXSIZ - 1]) overflo();
+                               s->link = ++smax;
+                               s = smax;
+                               goto enter;
+                       }
+                       s = s->link;
+                       goto loop;
+               }
+       }
+
+       enter:
+       do {
+               s->inp = c;
+               ct++;
+               if (smax >= &w[MAXSIZ - 1]) overflo();
+               s->nst = ++smax;
+               s = smax;
+       } while ((c = getargc()) != '\n' && c!=EOF);
+       if(neg)smax->out = 0377;
+       else smax->out = ct-1;
+       neg = ct = 0;
+       s = w;
+       if (c != EOF)
+               goto nword;
+}
+
+overflo() {
+       fprintf(stderr, "wordlist too large\n");
+       exit(2);
+}
+cfail() {
+       struct words *queue[QSIZE];
+       struct words **front, **rear;
+       struct words *state;
+       int bstart;
+       register char c;
+       register struct words *s;
+       s = w;
+       front = rear = queue;
+init:  if ((s->inp) != 0) {
+               *rear++ = s->nst;
+               if (rear >= &queue[QSIZE - 1]) overflo();
+       }
+       if ((s = s->link) != 0) {
+               goto init;
+       }
+
+       while (rear!=front) {
+               s = *front;
+               if (front == &queue[QSIZE-1])
+                       front = queue;
+               else front++;
+       cloop:  if ((c = s->inp) != 0) {
+                       bstart=0;
+                       *rear = (q = s->nst);
+                       if (front < rear)
+                               if (rear >= &queue[QSIZE-1])
+                                       if (front == queue) overflo();
+                                       else rear = queue;
+                               else rear++;
+                       else
+                               if (++rear == front) overflo();
+                       state = s->fail;
+               floop:  if (state == 0){ state = w;bstart=1;}
+                       if (state->inp == c) {
+                       qloop:  q->fail = state->nst;
+                               if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
+                               if((q=q->link) != 0)goto qloop;
+                       }
+                       else if ((state = state->link) != 0)
+                               goto floop;
+                       else if(bstart==0){state=0; goto floop;}
+               }
+               if ((s = s->link) != 0)
+                       goto cloop;
+       }
+/*     for(s=w;s<=smax;s++)
+               printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
+                       s->inp,s->out,s->nst,s->link,s->fail);
+*/
+}
+convert(p,ccount)
+char *p;
+{
+       int ct;
+       char *pt;
+       for(pt=p,ct=ccount;--ct>=0;pt++){
+               if(isupper(*pt))*pt=tolower(*pt);
+               else if(isspace(*pt))*pt=' ';
+               else if(*pt=='.' || *pt=='?'||*pt=='!'){
+                       *pt='.';
+                       nsent++;
+               }
+               else if(ispunct(*pt))*pt=' ';
+       }
+}
+outc(addr)
+char *addr;
+{
+
+       while(nlp < addr){
+               if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){
+                       oct=0;
+                       putchar('\n');
+               }
+               if(nlp == begp){
+                       putchar('[');
+               }
+               putchar(*nlp);
+               if(nlp == endp){
+                       putchar(']');
+               }
+               nlp++;
+       }
+}