BSD 4_3_Reno release
[unix-history] / usr / src / usr.bin / refer / inv / inv1.c
CommitLineData
ea99717b 1#ifndef lint
1c15e888 2static char *sccsid = "@(#)inv1.c 4.3 (Berkeley) 5/11/89";
ea99717b
BT
3#endif
4
5#include <stdio.h>
6#include <assert.h>
ade4c407 7#include "pathnames.h"
ea99717b
BT
8
9main(argc, argv)
10char *argv[];
11{
12 /* Make inverted file indexes. Reads a stream from mkey which
13 * gives record pointer items and keys. Generates set of files
14 * a. NHASH pointers to file b.
15 * b. lists of record numbers.
16 * c. record pointer items.
17 *
18 * these files are named xxx.ia, xxx.ib, xxx.ic;
19 * where xxx is taken from arg1.
20 * If the files exist they are updated.
21 */
22
23 FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
24 int nhash = 256;
25 int appflg = 1;
26 int keepkey = 0, pipein = 0;
27 char nma[100], nmb[100], nmc[100], com[100], nmd[100];
28 char tmpa[20], tmpb[20], tmpc[20];
29 char *remove = NULL;
30 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
31 int i,j,k;
32 long keys;
33 int iflong =0;
34 char *sortdir;
35
ade4c407 36 sortdir = _PATH_USRTMP;
ea99717b
BT
37 while (argv[1][0] == '-')
38 {
39 switch(argv[1][1])
40 {
41 case 'h': /* size of hash table */
42 nhash = atoi (argv[1]+2);
43 break;
44 case 'n': /* new, don't append */
45 appflg=0;
46 break;
47 case 'a': /* append to old file */
48 appflg=1;
49 break;
50 case 'v': /* verbose output */
51 chatty=1;
52 break;
53 case 'd': /* keep keys on file .id for check on searching */
54 keepkey=1;
55 break;
56 case 'p': /* pipe into sort (saves space, costs time)*/
57 pipein = 1;
58 break;
59 case 'i': /* input is on file, not stdin */
60 close(0);
61 if (open(argv[2], 0) != 0)
62 err("Can't read input %s", argv[2]);
63 if (argv[1][2]=='u') /* unlink */
64 remove = argv[2];
65 argc--;
66 argv++;
67 break;
68 }
69 argc--;
70 argv++;
71 }
72 strcpy (nma, argc >= 2 ? argv[1] : "Index");
73 strcpy (nmb, nma);
74 strcpy (nmc, nma);
75 strcpy (nmd, nma);
76 strcat (nma, ".ia");
77 strcat (nmb, ".ib");
78 strcat (nmc, ".ic");
79 strcat (nmd, ".id");
80
81 sprintf(tmpa, "junk%di", getpid());
82 if (pipein)
83 {
84 pipe(fp);
85 fr=fp[0];
86 fw=fp[1];
87 if ( (pfork=fork()) == 0)
88 {
89 close(fw);
90 close(0);
91 _assert(dup(fr)==0);
92 close(fr);
ade4c407 93 execl(_PATH_SORT, "sort", "-T", sortdir, "-o", tmpa, 0);
ea99717b
BT
94 _assert(0);
95 }
96 _assert(pfork!= -1);
97 close(fr);
ade4c407 98 fta = fopen(_PATH_DEVNULL, "w");
ea99717b
BT
99 close(fta->_file);
100 fta->_file = fw;
101 }
102 else /* use tmp file */
103 {
104 fta = fopen(tmpa, "w");
105 _assert (fta != NULL);
106 }
107 fb = 0;
108 if (appflg )
109 {
110 if (fb = fopen(nmb, "r"))
111 {
112 sprintf(tmpb, "junk%dj", getpid());
113 ftb = fopen(tmpb, "w");
114 if (ftb==NULL)
115 err("Can't get scratch file %s",tmpb);
116 nhash = recopy(ftb, fb, fopen(nma, "r"));
117 fclose(ftb);
118 }
119 else
120 appflg=0;
121 }
122 fc = fopen(nmc, appflg ? "a" : "w");
ade4c407 123 fd = keepkey ? fopen(nmd, "w") : 0;
ea99717b
BT
124 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
125 fclose(stdin);
126 if (remove != NULL)
127 unlink(remove);
128 fclose(fta);
129 if (pipein)
130 {
131 pwait = wait(&status);
132 printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
133 _assert(pwait==pfork);
134 _assert(status==0);
135 }
136 else
137 {
138 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
139 system(com);
140 }
141 if (appflg)
142 {
143 sprintf(tmpc, "junk%dk", getpid());
144 sprintf(com, "mv %s %s", tmpa, tmpc);
145 system(com);
146 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
147 tmpb, tmpc, tmpa);
148 system(com);
149 }
150 fta = fopen(tmpa, "r");
151 fa = fopen(nma, "w");
152 fb = fopen(nmb, "w");
153 whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
154 fclose(fta);
155# ifndef D1
156 unlink(tmpa);
157# endif
158 if (appflg)
159 {
160 unlink(tmpb);
161 unlink(tmpc);
162 }
163 if (chatty)
164
165 printf ("%ld key occurrences, %d hashes, %d docs\n",
166 keys, hashes, docs);
167}