Commit | Line | Data |
---|---|---|
ea99717b | 1 | #ifndef lint |
1c15e888 | 2 | static char *sccsid = "@(#)inv1.c 4.3 (Berkeley) 5/11/89"; |
ea99717b BT |
3 | #endif |
4 | ||
5 | #include <stdio.h> | |
6 | #include <assert.h> | |
ade4c407 | 7 | #include "pathnames.h" |
ea99717b BT |
8 | |
9 | main(argc, argv) | |
10 | char *argv[]; | |
11 | { | |
12 | /* Make inverted file indexes. Reads a stream from mkey which | |
13 | * gives record pointer items and keys. Generates set of files | |
14 | * a. NHASH pointers to file b. | |
15 | * b. lists of record numbers. | |
16 | * c. record pointer items. | |
17 | * | |
18 | * these files are named xxx.ia, xxx.ib, xxx.ic; | |
19 | * where xxx is taken from arg1. | |
20 | * If the files exist they are updated. | |
21 | */ | |
22 | ||
23 | FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; | |
24 | int nhash = 256; | |
25 | int appflg = 1; | |
26 | int keepkey = 0, pipein = 0; | |
27 | char nma[100], nmb[100], nmc[100], com[100], nmd[100]; | |
28 | char tmpa[20], tmpb[20], tmpc[20]; | |
29 | char *remove = NULL; | |
30 | int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; | |
31 | int i,j,k; | |
32 | long keys; | |
33 | int iflong =0; | |
34 | char *sortdir; | |
35 | ||
ade4c407 | 36 | sortdir = _PATH_USRTMP; |
ea99717b BT |
37 | while (argv[1][0] == '-') |
38 | { | |
39 | switch(argv[1][1]) | |
40 | { | |
41 | case 'h': /* size of hash table */ | |
42 | nhash = atoi (argv[1]+2); | |
43 | break; | |
44 | case 'n': /* new, don't append */ | |
45 | appflg=0; | |
46 | break; | |
47 | case 'a': /* append to old file */ | |
48 | appflg=1; | |
49 | break; | |
50 | case 'v': /* verbose output */ | |
51 | chatty=1; | |
52 | break; | |
53 | case 'd': /* keep keys on file .id for check on searching */ | |
54 | keepkey=1; | |
55 | break; | |
56 | case 'p': /* pipe into sort (saves space, costs time)*/ | |
57 | pipein = 1; | |
58 | break; | |
59 | case 'i': /* input is on file, not stdin */ | |
60 | close(0); | |
61 | if (open(argv[2], 0) != 0) | |
62 | err("Can't read input %s", argv[2]); | |
63 | if (argv[1][2]=='u') /* unlink */ | |
64 | remove = argv[2]; | |
65 | argc--; | |
66 | argv++; | |
67 | break; | |
68 | } | |
69 | argc--; | |
70 | argv++; | |
71 | } | |
72 | strcpy (nma, argc >= 2 ? argv[1] : "Index"); | |
73 | strcpy (nmb, nma); | |
74 | strcpy (nmc, nma); | |
75 | strcpy (nmd, nma); | |
76 | strcat (nma, ".ia"); | |
77 | strcat (nmb, ".ib"); | |
78 | strcat (nmc, ".ic"); | |
79 | strcat (nmd, ".id"); | |
80 | ||
81 | sprintf(tmpa, "junk%di", getpid()); | |
82 | if (pipein) | |
83 | { | |
84 | pipe(fp); | |
85 | fr=fp[0]; | |
86 | fw=fp[1]; | |
87 | if ( (pfork=fork()) == 0) | |
88 | { | |
89 | close(fw); | |
90 | close(0); | |
91 | _assert(dup(fr)==0); | |
92 | close(fr); | |
ade4c407 | 93 | execl(_PATH_SORT, "sort", "-T", sortdir, "-o", tmpa, 0); |
ea99717b BT |
94 | _assert(0); |
95 | } | |
96 | _assert(pfork!= -1); | |
97 | close(fr); | |
ade4c407 | 98 | fta = fopen(_PATH_DEVNULL, "w"); |
ea99717b BT |
99 | close(fta->_file); |
100 | fta->_file = fw; | |
101 | } | |
102 | else /* use tmp file */ | |
103 | { | |
104 | fta = fopen(tmpa, "w"); | |
105 | _assert (fta != NULL); | |
106 | } | |
107 | fb = 0; | |
108 | if (appflg ) | |
109 | { | |
110 | if (fb = fopen(nmb, "r")) | |
111 | { | |
112 | sprintf(tmpb, "junk%dj", getpid()); | |
113 | ftb = fopen(tmpb, "w"); | |
114 | if (ftb==NULL) | |
115 | err("Can't get scratch file %s",tmpb); | |
116 | nhash = recopy(ftb, fb, fopen(nma, "r")); | |
117 | fclose(ftb); | |
118 | } | |
119 | else | |
120 | appflg=0; | |
121 | } | |
122 | fc = fopen(nmc, appflg ? "a" : "w"); | |
ade4c407 | 123 | fd = keepkey ? fopen(nmd, "w") : 0; |
ea99717b BT |
124 | docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); |
125 | fclose(stdin); | |
126 | if (remove != NULL) | |
127 | unlink(remove); | |
128 | fclose(fta); | |
129 | if (pipein) | |
130 | { | |
131 | pwait = wait(&status); | |
132 | printf("pfork %o pwait %o status %d\n",pfork,pwait,status); | |
133 | _assert(pwait==pfork); | |
134 | _assert(status==0); | |
135 | } | |
136 | else | |
137 | { | |
138 | sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); | |
139 | system(com); | |
140 | } | |
141 | if (appflg) | |
142 | { | |
143 | sprintf(tmpc, "junk%dk", getpid()); | |
144 | sprintf(com, "mv %s %s", tmpa, tmpc); | |
145 | system(com); | |
146 | sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, | |
147 | tmpb, tmpc, tmpa); | |
148 | system(com); | |
149 | } | |
150 | fta = fopen(tmpa, "r"); | |
151 | fa = fopen(nma, "w"); | |
152 | fb = fopen(nmb, "w"); | |
153 | whash(fta, fa, fb, nhash, iflong, &keys, &hashes); | |
154 | fclose(fta); | |
155 | # ifndef D1 | |
156 | unlink(tmpa); | |
157 | # endif | |
158 | if (appflg) | |
159 | { | |
160 | unlink(tmpb); | |
161 | unlink(tmpc); | |
162 | } | |
163 | if (chatty) | |
164 | ||
165 | printf ("%ld key occurrences, %d hashes, %d docs\n", | |
166 | keys, hashes, docs); | |
167 | } |