Commit | Line | Data |
---|---|---|
2b1a705e TL |
1 | # include "what..c" |
2 | struct wst { char *tx; int ct; } ; | |
3 | # define NW 5 | |
4 | # define ZIPF 10 | |
5 | # define HASHF 3 | |
6 | # define WLEN 10 | |
7 | # define SAME 0 | |
8 | # define TSIZE HASHF*ZIPF*NW | |
9 | int HSIZE; | |
10 | static struct wst word[TSIZE]; | |
11 | static char tbuf[NW*ZIPF*WLEN], *tp tbuf; | |
12 | # define NF 10 | |
13 | ||
14 | freqwd ( fn, wd, nin ) | |
15 | char *fn[], *wd[]; | |
16 | { | |
17 | FILE *fi[NF]; | |
18 | int nw 0, i, any, nf, j, wexch(), wcomp(); | |
19 | char tw[20]; | |
20 | for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--); | |
21 | for(nf=0; fn[nf] && nf<NF; nf++) | |
22 | fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL; | |
23 | do { | |
24 | any=0; | |
25 | for(i=0; i<nf; i++) | |
26 | { | |
27 | if (fi[i]==NULL) continue; | |
28 | if (gw(fi[i], tw)==0) | |
29 | { | |
30 | fclose(fi[i]); | |
31 | fi[i]==NULL; | |
32 | continue; | |
33 | } | |
34 | any=1; | |
35 | if (common(tw)) continue; | |
36 | if (strlen(tw)<3) continue; | |
37 | j = lookup (tw); | |
38 | if (j<0 && nw < ZIPF*NW) | |
39 | { | |
40 | j = -j; | |
41 | strcpy (tp, tw); | |
42 | word[j].tx = tp; | |
43 | while (*tp++); | |
44 | _assert (tp < tbuf+NW*ZIPF*WLEN); | |
45 | word[j].ct = 1; | |
46 | nw++; | |
47 | } | |
48 | else if (j>0) | |
49 | word[j].ct++; | |
50 | } | |
51 | } while (any>0); | |
52 | shell ( TSIZE, wcomp, wexch ); | |
53 | for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++) | |
54 | if (nw>=nin*2 && word[nw].ct != word[0].ct) | |
55 | break; | |
56 | for(i=0; i<nw; i++) | |
57 | wd[i] = word[i].tx; | |
58 | return(nw); | |
59 | } | |
60 | ||
61 | lookup (wt) | |
62 | char *wt; | |
63 | { | |
64 | int h; | |
65 | h = hash(wt); | |
66 | for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE) | |
67 | { | |
68 | if (h==0) continue; | |
69 | if (strcmp(wt, word[h].tx) == SAME) | |
70 | return (h); | |
71 | } | |
72 | return ( -h ); | |
73 | } | |
74 | ||
75 | hash (s) | |
76 | char *s; | |
77 | { | |
78 | int k 0, c 0, i 0; | |
79 | while ( c = *s++ ) | |
80 | k ^= (c << (i++%5) ); | |
81 | return (k>0 ? k : -k); | |
82 | } | |
83 | ||
84 | gw (f, t) | |
85 | char *t; | |
86 | FILE *f; | |
87 | { | |
88 | int start 1, oldc ' ', c; | |
89 | if (f==NULL) return (0); | |
90 | while ( (c=getc(f)) != EOF) | |
91 | { | |
92 | if (isupper(c)) c= tolower(c); | |
93 | if (start==1) | |
94 | if (!alphanum(c, oldc)) | |
95 | continue; | |
96 | else | |
97 | start=0; | |
98 | if (start==0) | |
99 | if (alphanum(c, oldc)) | |
100 | *t++ = c; | |
101 | else | |
102 | { | |
103 | *t=0; | |
104 | return(1); | |
105 | } | |
106 | oldc=c; | |
107 | } | |
108 | return(0); | |
109 | } | |
110 | ||
111 | alphanum( c, oldc ) | |
112 | { | |
113 | if (isalpha(c) || isdigit(c)) return(1); | |
114 | if (isalpha(oldc)) | |
115 | if (c== '\'' || c == '-') return(1); | |
116 | return(0); | |
117 | } | |
118 | ||
119 | wcomp (n1, n2) | |
120 | { | |
121 | return (word[n1].ct >= word[n2].ct); | |
122 | } | |
123 | ||
124 | wexch (n1, n2) | |
125 | { | |
126 | struct wst tt; | |
127 | tt.tx = word[n1].tx; tt.ct = word[n1].ct; | |
128 | word[n1].tx = word[n2].tx; word[n1].ct = word[n2].ct; | |
129 | word[n2].tx = tt.tx; word[n2].ct = tt.ct; | |
130 | } | |
131 | ||
132 | prime(n) | |
133 | { | |
134 | /* only executed once- slow is ok */ | |
135 | int i; | |
136 | if (n%2==0) return(0); | |
137 | for(i=3; i*i<=n; i+= 2) | |
138 | if (n%i ==0 ) return(0); | |
139 | return(1); | |
140 | } | |
141 | trimnl(s) | |
142 | char *s; | |
143 | { | |
144 | while (*s)s++; | |
145 | if (*--s=='\n') *s=0; | |
146 | } | |
147 | ||
148 | ||
149 | /* this is the test for what4.c as a standalone prog ... | |
150 | main (argc, argv) | |
151 | char *argv[]; | |
152 | { | |
153 | char *ff[10], *wd[20], **ffp ff; | |
154 | int n, i; | |
155 | while (--argc) | |
156 | *ffp++ = *++argv; | |
157 | *ffp=0; | |
158 | n=freqwd(ff,wd); | |
159 | for(i=0; i<n; i++) | |
160 | printf("%s\n",wd[i]); | |
161 | printf("total of %d items\n",n); | |
162 | } | |
163 | /* .... */ |