Research V7 development
[unix-history] / usr / src / cmd / refer / what4.c
CommitLineData
52960f2f
ML
1# include "what..c"
2struct wst { char *tx; int ct; } ;
3# define NW 5
4# define ZIPF 10
5# define HASHF 3
6# define WLEN 10
7# define SAME 0
8# define TSIZE HASHF*ZIPF*NW
9int HSIZE;
10static struct wst word[TSIZE];
11static char tbuf[NW*ZIPF*WLEN], *tp tbuf;
12# define NF 10
13
14freqwd ( fn, wd, nin )
15 char *fn[], *wd[];
16{
17 FILE *fi[NF];
18 int nw 0, i, any, nf, j, wexch(), wcomp();
19 char tw[20];
20for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--);
21for(nf=0; fn[nf] && nf<NF; nf++)
22 fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL;
23do {
24 any=0;
25 for(i=0; i<nf; i++)
26 {
27 if (fi[i]==NULL) continue;
28 if (gw(fi[i], tw)==0)
29 {
30 fclose(fi[i]);
31 fi[i]==NULL;
32 continue;
33 }
34 any=1;
35 if (common(tw)) continue;
36 if (strlen(tw)<3) continue;
37 j = lookup (tw);
38 if (j<0 && nw < ZIPF*NW)
39 {
40 j = -j;
41 strcpy (tp, tw);
42 word[j].tx = tp;
43 while (*tp++);
44 _assert (tp < tbuf+NW*ZIPF*WLEN);
45 word[j].ct = 1;
46 nw++;
47 }
48 else if (j>0)
49 word[j].ct++;
50 }
51 } while (any>0);
52shell ( TSIZE, wcomp, wexch );
53for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++)
54 if (nw>=nin*2 && word[nw].ct != word[0].ct)
55 break;
56for(i=0; i<nw; i++)
57 wd[i] = word[i].tx;
58return(nw);
59}
60
61lookup (wt)
62 char *wt;
63{
64int h;
65h = hash(wt);
66for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE)
67 {
68 if (h==0) continue;
69 if (strcmp(wt, word[h].tx) == SAME)
70 return (h);
71 }
72return ( -h );
73}
74
75hash (s)
76 char *s;
77{
78int k 0, c 0, i 0;
79while ( c = *s++ )
80 k ^= (c << (i++%5) );
81return (k>0 ? k : -k);
82}
83
84gw (f, t)
85 char *t;
86 FILE *f;
87{
88int start 1, oldc ' ', c;
89if (f==NULL) return (0);
90while ( (c=getc(f)) != EOF)
91 {
92 if (isupper(c)) c= tolower(c);
93 if (start==1)
94 if (!alphanum(c, oldc))
95 continue;
96 else
97 start=0;
98 if (start==0)
99 if (alphanum(c, oldc))
100 *t++ = c;
101 else
102 {
103 *t=0;
104 return(1);
105 }
106 oldc=c;
107 }
108return(0);
109}
110
111alphanum( c, oldc )
112{
113if (isalpha(c) || isdigit(c)) return(1);
114if (isalpha(oldc))
115 if (c== '\'' || c == '-') return(1);
116return(0);
117}
118
119wcomp (n1, n2)
120{
121return (word[n1].ct >= word[n2].ct);
122}
123
124wexch (n1, n2)
125{
126struct wst tt;
127tt.tx = word[n1].tx; tt.ct = word[n1].ct;
128word[n1].tx = word[n2].tx; word[n1].ct = word[n2].ct;
129word[n2].tx = tt.tx; word[n2].ct = tt.ct;
130}
131
132prime(n)
133{
134/* only executed once- slow is ok */
135int i;
136if (n%2==0) return(0);
137for(i=3; i*i<=n; i+= 2)
138 if (n%i ==0 ) return(0);
139return(1);
140}
141trimnl(s)
142 char *s;
143{
144 while (*s)s++;
145 if (*--s=='\n') *s=0;
146}
147
148
149/* this is the test for what4.c as a standalone prog ...
150main (argc, argv)
151 char *argv[];
152{
153char *ff[10], *wd[20], **ffp ff;
154int n, i;
155while (--argc)
156 *ffp++ = *++argv;
157*ffp=0;
158n=freqwd(ff,wd);
159for(i=0; i<n; i++)
160 printf("%s\n",wd[i]);
161printf("total of %d items\n",n);
162}
163 /* .... */