Commit | Line | Data |
---|---|---|
a1606d18 | 1 | #ifndef lint |
33d22bf7 | 2 | static char *sccsid = "@(#)hunt1.c 4.2 (Berkeley) %G%"; |
a1606d18 BT |
3 | #endif |
4 | ||
33d22bf7 GL |
5 | # include <stdio.h> |
6 | # include <assert.h> | |
a1606d18 BT |
7 | extern char refdir[]; |
8 | extern int keepold; | |
9 | extern char *fgnames[]; | |
10 | extern char **fgnamp; | |
33d22bf7 GL |
11 | FILE *fd =NULL; |
12 | int lmaster =500; | |
a1606d18 | 13 | int *hfreq, hfrflg; |
33d22bf7 GL |
14 | int colevel =0; |
15 | int measure=0; | |
16 | int soutlen =1000; | |
17 | int reached =0; | |
18 | int iflong =0; | |
19 | int prfreqs =0; | |
a1606d18 | 20 | char usedir[100]; |
33d22bf7 GL |
21 | char * calloc(); |
22 | char * todir(); | |
a1606d18 | 23 | char gfile[50]; |
33d22bf7 GL |
24 | static int full =1000; |
25 | static int tags =0; | |
a1606d18 | 26 | char *sinput, *soutput, *tagout; |
33d22bf7 | 27 | long indexdate =0, gdate(); |
a1606d18 BT |
28 | |
29 | main(argc,argv) | |
30 | char *argv[]; | |
31 | { | |
32 | /* read query from stdin, expect name of indexes in argv[1] */ | |
33 | static FILE *fa, *fb, *fc; | |
33d22bf7 | 34 | char nma[100], nmb[100], nmc[100], *qitem[100], *rprog = NULL; |
a1606d18 BT |
35 | char nmd[100], grepquery[256]; |
36 | static char oldname[30] ; | |
33d22bf7 | 37 | static int was =0; |
a1606d18 BT |
38 | /* these pointers are unions of pointer to int and pointer to long */ |
39 | long *hpt; | |
33d22bf7 | 40 | unsigned *master =0; |
a1606d18 BT |
41 | int falseflg, nhash, nitem, nfound, frtbl, kk; |
42 | ||
43 | /* special wart for refpart: default is tags only */ | |
44 | ||
45 | while (argv[1][0] == '-') | |
46 | { | |
47 | switch(argv[1][1]) | |
48 | { | |
49 | case 'a': /* all output, incl. false drops */ | |
50 | falseflg = 1; | |
51 | break; | |
52 | case 'r': | |
53 | argc--; | |
54 | argv++; | |
55 | rprog = argv[1]; | |
56 | break; | |
57 | case 'F': /* put out full text */ | |
58 | full = setfrom(argv[1][2]); | |
59 | break; | |
60 | case 'T': /* put out tags */ | |
61 | tags = setfrom(argv[1][2]); | |
62 | break; | |
63 | case 'i': /* input in argument string */ | |
64 | argc--; | |
65 | argv++; | |
66 | sinput = argv[1]; | |
67 | break; | |
68 | case 's': /*text output to string */ | |
69 | case 'o': | |
70 | argc--; | |
71 | argv++; | |
72 | soutput = argv[1]; | |
73 | if (argv[2]<16000) | |
74 | { | |
33d22bf7 | 75 | soutlen = (int)argv[2]; |
a1606d18 BT |
76 | argc--; |
77 | argv++; | |
78 | } | |
79 | break; | |
80 | case 't': /*tag output to string */ | |
81 | argc--; | |
82 | argv++; | |
83 | tagout = argv[1]; | |
84 | break; | |
85 | case 'l': /* length of internal lists */ | |
86 | argc--; | |
87 | argv++; | |
88 | lmaster = atoi(argv[1]); | |
89 | break; | |
90 | case 'g': /* suppress fgrep search on old files */ | |
91 | keepold = 0; | |
92 | break; | |
93 | case 'C': /* coordination level */ | |
94 | colevel = atoi(argv[1]+2); | |
95 | # if D1 | |
96 | fprintf(stderr, "colevel set to %d\n",colevel); | |
97 | # endif | |
98 | break; | |
99 | case 'P': /* print term freqs */ | |
100 | prfreqs=1; | |
101 | break; | |
33d22bf7 GL |
102 | case 'm': |
103 | measure=1; | |
104 | break; | |
a1606d18 BT |
105 | } |
106 | argc--; | |
107 | argv++; | |
108 | } | |
109 | strcpy (nma, todir(argv[1])); | |
110 | if (was == 0 || strcmp (oldname, nma) !=0) | |
111 | { | |
112 | strcpy (oldname,nma); | |
113 | strcpy (nmb, nma); | |
114 | strcpy (nmc, nmb); | |
115 | strcpy(nmd,nma); | |
116 | strcat (nma, ".ia"); | |
117 | strcat (nmb, ".ib"); | |
118 | strcat (nmc, ".ic"); | |
119 | strcat (nmd, ".id"); | |
120 | if (was) | |
121 | { | |
122 | fclose(fa); | |
123 | fclose(fb); | |
124 | fclose(fc); | |
125 | } | |
126 | ||
127 | fa = fopen(nma, "r"); | |
128 | if (fa==NULL) | |
129 | { | |
33d22bf7 GL |
130 | strcpy(*fgnamp++ = calloc(strlen(oldname)+2,1), oldname); |
131 | fb=NULL; | |
132 | goto search; | |
a1606d18 BT |
133 | } |
134 | fb = fopen(nmb, "r"); | |
135 | fc = fopen(nmc, "r"); | |
136 | was =1; | |
137 | if (fb== NULL || fc ==NULL) | |
138 | { | |
139 | err("Index incomplete %s", nmb); | |
140 | exit(1); | |
141 | } | |
142 | indexdate = gdate(fb); | |
143 | fd = fopen(nmd, "r"); | |
144 | } | |
145 | fseek (fa, 0L, 0); | |
146 | fread (&nhash, sizeof(nhash), 1, fa); | |
147 | fread (&iflong, sizeof(iflong), 1, fa); | |
148 | if(master==0) | |
33d22bf7 | 149 | master = calloc (lmaster, iflong? 4: 2); |
a1606d18 BT |
150 | hpt = calloc(nhash, sizeof(*hpt)); |
151 | kk=fread( hpt, sizeof(*hpt), nhash, fa); | |
152 | # if D1 | |
153 | fprintf(stderr,"read %d hashes, iflong %d, nhash %d\n", kk, iflong, nhash); | |
154 | # endif | |
155 | _assert (kk==nhash); | |
156 | hfreq = calloc(nhash, sizeof(*hfreq)); | |
157 | _assert (hfreq != NULL); | |
158 | frtbl = fread(hfreq, sizeof(*hfreq), nhash, fa); | |
159 | hfrflg = (frtbl == nhash); | |
160 | # if D1 | |
161 | fprintf(stderr, "read freqs %d\n", frtbl); | |
162 | # endif | |
163 | ||
33d22bf7 | 164 | search: |
a1606d18 BT |
165 | while (1) |
166 | { | |
167 | nitem = getq(qitem); | |
33d22bf7 | 168 | if (measure) tick(); |
a1606d18 BT |
169 | if (nitem==0) continue; |
170 | if (nitem < 0) break; | |
33d22bf7 GL |
171 | if (tagout) tagout[0]=0; |
172 | if (fb!=NULL) | |
173 | { | |
174 | nfound = doquery(hpt, nhash, fb, nitem, qitem, master); | |
a1606d18 | 175 | # if D1 |
33d22bf7 | 176 | fprintf(stderr,"after doquery nfound %d\n", nfound); |
a1606d18 | 177 | # endif |
33d22bf7 GL |
178 | fgnamp=fgnames; |
179 | if (falseflg == 0) | |
180 | nfound = baddrop(master, nfound, fc, nitem, qitem, rprog, full); | |
a1606d18 | 181 | # if D1 |
33d22bf7 | 182 | fprintf(stderr,"after baddrop nfound %d\n", nfound); |
a1606d18 | 183 | # endif |
33d22bf7 | 184 | } |
a1606d18 BT |
185 | if (fgnamp>fgnames) |
186 | { | |
187 | char **fgp, tgbuff[100]; | |
188 | int k; | |
189 | # if D1 | |
190 | fprintf(stderr, "were %d bad files\n", fgnamp-fgnames); | |
191 | # endif | |
192 | grepquery[0]=0; | |
193 | for(k=0; k<nitem; k++) | |
194 | { | |
195 | strcat(grepquery, " "); | |
196 | strcat(grepquery, qitem[k]); | |
197 | } | |
198 | # if D1 | |
199 | fprintf(stderr, "grepquery %s\n",grepquery); | |
200 | # endif | |
201 | for(fgp=fgnames; fgp<fgnamp; fgp++) | |
202 | { | |
203 | # if D1 | |
204 | fprintf(stderr, "Now on %s query /%s/\n", *fgp, grepquery); | |
205 | # endif | |
206 | makefgrep(*fgp); | |
207 | # if D1 | |
208 | fprintf(stderr, "grepmade\n"); | |
209 | # endif | |
210 | if (tagout==0) | |
211 | tagout=tgbuff; | |
212 | grepcall(grepquery, tagout, *fgp); | |
213 | # if D1 | |
214 | fprintf(stderr, "tagout now /%s/\n", tagout); | |
215 | # endif | |
216 | if (full) | |
217 | { | |
218 | char bout[1000]; | |
33d22bf7 GL |
219 | char *tagp; |
220 | char *oldtagp; | |
221 | tagp = tagout; | |
222 | while (*tagp) { | |
223 | oldtagp = tagp; | |
224 | while (*tagp && (*tagp != '\n')) | |
225 | tagp++; | |
226 | if (*tagp) | |
227 | tagp++; | |
228 | findline(oldtagp, bout, 1000); | |
229 | fputs(bout,stdout); | |
230 | } | |
a1606d18 BT |
231 | } |
232 | } | |
233 | } | |
234 | if (tags) | |
235 | result (master, nfound >tags ? tags: nfound, fc); | |
33d22bf7 | 236 | if (measure) tock(); |
a1606d18 BT |
237 | } |
238 | } | |
239 | ||
240 | char * | |
241 | todir(t) | |
242 | char *t; | |
243 | { | |
244 | char *s; | |
245 | s=t; | |
246 | while (*s) s++; | |
247 | while (s>=t && *s != '/') s--; | |
248 | if (s<t) return(t); | |
249 | *s++ = 0; | |
250 | t = (*t ? t : "/"); | |
251 | chdir (t); | |
252 | strcpy (usedir,t); | |
253 | return(s); | |
254 | } | |
a1606d18 BT |
255 | setfrom(c) |
256 | { | |
257 | switch(c) | |
258 | { | |
259 | case 'y': | |
260 | case '\0': | |
261 | default: | |
262 | return(1000); | |
263 | case '1': | |
264 | case '2': | |
265 | case '3': | |
266 | case '4': | |
267 | case '5': | |
268 | case '6': | |
269 | case '7': | |
270 | case '8': | |
271 | case '9': | |
272 | return(c-'0'); | |
273 | case 'n': | |
274 | case '0': | |
275 | return(0); | |
276 | } | |
277 | } |