Bell 32V development
[unix-history] / usr / src / cmd / refer / inv1.c
CommitLineData
2b1a705e
TL
1# include "stdio.h"
2# include "assert.h"
3
4main(argc, argv)
5 char *argv[];
6{
7/* make inverted file indexes. Reads a stream from mkey which
8 gives record pointer items and keys. Generates a set of files
9 a. NHASH pointers to file b.
10 b. lists of record numbers.
11 c. record pointer items.
12
13 these files are named xxx.ia, xxx.ib, xxx.ic;
14 where xxx is taken from arg1.
15 If the files exist they are updated.
16*/
17
18FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
19int nhash = 256;
20int appflg = 1;
21int keepkey = 0, pipein = 0;
22char nma[100], nmb[100], nmc[100], com[100], nmd[100];
23char tmpa[20], tmpb[20], tmpc[20];
24char *remove = NULL;
25int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
26int i,j,k;
27long keys;
28int iflong =0;
29char *sortdir;
30
31sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp";
32while (argv[1][0] == '-')
33 {
34 switch(argv[1][1])
35 {
36 case 'h': /* size of hash table */
37 nhash = atoi (argv[1]+2); break;
38 case 'n': /* new, don't append */
39 appflg=0; break;
40 case 'a': /* append to old file */
41 appflg=1; break;
42 case 'v': /* verbose output */
43 chatty=1; break;
44 case 'd': /* keep keys on file .id for check on searching */
45 keepkey=1; break;
46 case 'p': /* pipe into sort (saves space, costs time)*/
47 pipein = 1; break;
48 case 'i': /* input is on file, not stdin */
49 close(0);
50 if (open(argv[2], 0) != 0)
51 err("Can't read input %s", argv[2]);
52 if (argv[1][2]=='u') /* unlink */
53 remove = argv[2];
54 argc--; argv++;
55 break;
56 }
57 argc--;
58 argv++;
59 }
60
61strcpy (nma, argc >= 2 ? argv[1] : "Index");
62strcpy (nmb, nma);
63strcpy (nmc, nma);
64strcpy (nmd, nma);
65strcat (nma, ".ia");
66strcat (nmb, ".ib");
67strcat (nmc, ".ic");
68strcat (nmd, ".id");
69
70sprintf(tmpa, "junk%di", getpid());
71if (pipein)
72 {
73 pipe(fp); fr=fp[0]; fw=fp[1];
74 if ( (pfork=fork()) == 0)
75 {
76 close(fw);
77 close(0);
78 _assert(dup(fr)==0);
79 close(fr);
80 execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
81 execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
82 _assert(0);
83 }
84 _assert(pfork!= -1);
85 close(fr);
86 fta = fopen("/dev/null", "w");
87 close(fta->_file);
88 fta->_file = fw;
89 }
90else /* use tmp file */
91 {
92 fta = fopen(tmpa, "w");
93 _assert (fta != NULL);
94 }
95fb = 0;
96if (appflg )
97 {
98 if (fb = fopen(nmb, "r"))
99 {
100 sprintf(tmpb, "junk%dj", getpid());
101 ftb = fopen(tmpb, "w");
102 if (ftb==NULL)
103 err("Can't get scratch file %s",tmpb);
104 nhash = recopy(ftb, fb, fopen(nma, "r"));
105 fclose(ftb);
106 }
107 else
108 appflg=0;
109 }
110fc = fopen(nmc, appflg ? "a" : "w");
111if (keepkey)
112fd = keepkey ? fopen(nmd, "w") : 0;
113docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
114fclose(stdin);
115if (remove != NULL)
116 unlink(remove);
117fclose(fta);
118if (pipein)
119 {
120 pwait = wait(&status);
121 printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
122 _assert(pwait==pfork);
123 _assert(status==0);
124 }
125else
126 {
127 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
128 system(com);
129 }
130
131if (appflg)
132 {
133 sprintf(tmpc, "junk%dk", getpid());
134 sprintf(com, "mv %s %s", tmpa, tmpc);
135 system(com);
136 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
137 tmpb, tmpc, tmpa);
138 system(com);
139 }
140fta = fopen(tmpa, "r");
141fa = fopen(nma, "w");
142fb = fopen(nmb, "w");
143whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
144fclose(fta);
145# ifndef D1
146unlink(tmpa);
147# endif
148if (appflg)
149 {
150 unlink(tmpb);
151 unlink(tmpc);
152 }
153if (chatty)
154
155 printf ("%ld key occurrences, %d hashes, %d docs\n",
156 keys, hashes, docs);
157}