| 1 | # include "stdio.h" |
| 2 | # include "assert.h" |
| 3 | |
| 4 | main(argc, argv) |
| 5 | char *argv[]; |
| 6 | { |
| 7 | /* make inverted file indexes. Reads a stream from mkey which |
| 8 | gives record pointer items and keys. Generates a set of files |
| 9 | a. NHASH pointers to file b. |
| 10 | b. lists of record numbers. |
| 11 | c. record pointer items. |
| 12 | |
| 13 | these files are named xxx.ia, xxx.ib, xxx.ic; |
| 14 | where xxx is taken from arg1. |
| 15 | If the files exist they are updated. |
| 16 | */ |
| 17 | |
| 18 | FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; |
| 19 | int nhash = 256; |
| 20 | int appflg = 1; |
| 21 | int keepkey = 0, pipein = 0; |
| 22 | char nma[100], nmb[100], nmc[100], com[100], nmd[100]; |
| 23 | char tmpa[20], tmpb[20], tmpc[20]; |
| 24 | char *remove = NULL; |
| 25 | int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; |
| 26 | int i,j,k; |
| 27 | long keys; |
| 28 | int iflong =0; |
| 29 | char *sortdir; |
| 30 | |
| 31 | sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; |
| 32 | while (argv[1][0] == '-') |
| 33 | { |
| 34 | switch(argv[1][1]) |
| 35 | { |
| 36 | case 'h': /* size of hash table */ |
| 37 | nhash = atoi (argv[1]+2); break; |
| 38 | case 'n': /* new, don't append */ |
| 39 | appflg=0; break; |
| 40 | case 'a': /* append to old file */ |
| 41 | appflg=1; break; |
| 42 | case 'v': /* verbose output */ |
| 43 | chatty=1; break; |
| 44 | case 'd': /* keep keys on file .id for check on searching */ |
| 45 | keepkey=1; break; |
| 46 | case 'p': /* pipe into sort (saves space, costs time)*/ |
| 47 | pipein = 1; break; |
| 48 | case 'i': /* input is on file, not stdin */ |
| 49 | close(0); |
| 50 | if (open(argv[2], 0) != 0) |
| 51 | err("Can't read input %s", argv[2]); |
| 52 | if (argv[1][2]=='u') /* unlink */ |
| 53 | remove = argv[2]; |
| 54 | argc--; argv++; |
| 55 | break; |
| 56 | } |
| 57 | argc--; |
| 58 | argv++; |
| 59 | } |
| 60 | |
| 61 | strcpy (nma, argc >= 2 ? argv[1] : "Index"); |
| 62 | strcpy (nmb, nma); |
| 63 | strcpy (nmc, nma); |
| 64 | strcpy (nmd, nma); |
| 65 | strcat (nma, ".ia"); |
| 66 | strcat (nmb, ".ib"); |
| 67 | strcat (nmc, ".ic"); |
| 68 | strcat (nmd, ".id"); |
| 69 | |
| 70 | sprintf(tmpa, "junk%di", getpid()); |
| 71 | if (pipein) |
| 72 | { |
| 73 | pipe(fp); fr=fp[0]; fw=fp[1]; |
| 74 | if ( (pfork=fork()) == 0) |
| 75 | { |
| 76 | close(fw); |
| 77 | close(0); |
| 78 | _assert(dup(fr)==0); |
| 79 | close(fr); |
| 80 | execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); |
| 81 | execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); |
| 82 | _assert(0); |
| 83 | } |
| 84 | _assert(pfork!= -1); |
| 85 | close(fr); |
| 86 | fta = fopen("/dev/null", "w"); |
| 87 | close(fta->_file); |
| 88 | fta->_file = fw; |
| 89 | } |
| 90 | else /* use tmp file */ |
| 91 | { |
| 92 | fta = fopen(tmpa, "w"); |
| 93 | _assert (fta != NULL); |
| 94 | } |
| 95 | fb = 0; |
| 96 | if (appflg ) |
| 97 | { |
| 98 | if (fb = fopen(nmb, "r")) |
| 99 | { |
| 100 | sprintf(tmpb, "junk%dj", getpid()); |
| 101 | ftb = fopen(tmpb, "w"); |
| 102 | if (ftb==NULL) |
| 103 | err("Can't get scratch file %s",tmpb); |
| 104 | nhash = recopy(ftb, fb, fopen(nma, "r")); |
| 105 | fclose(ftb); |
| 106 | } |
| 107 | else |
| 108 | appflg=0; |
| 109 | } |
| 110 | fc = fopen(nmc, appflg ? "a" : "w"); |
| 111 | if (keepkey) |
| 112 | fd = keepkey ? fopen(nmd, "w") : 0; |
| 113 | docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); |
| 114 | fclose(stdin); |
| 115 | if (remove != NULL) |
| 116 | unlink(remove); |
| 117 | fclose(fta); |
| 118 | if (pipein) |
| 119 | { |
| 120 | pwait = wait(&status); |
| 121 | printf("pfork %o pwait %o status %d\n",pfork,pwait,status); |
| 122 | _assert(pwait==pfork); |
| 123 | _assert(status==0); |
| 124 | } |
| 125 | else |
| 126 | { |
| 127 | sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); |
| 128 | system(com); |
| 129 | } |
| 130 | |
| 131 | if (appflg) |
| 132 | { |
| 133 | sprintf(tmpc, "junk%dk", getpid()); |
| 134 | sprintf(com, "mv %s %s", tmpa, tmpc); |
| 135 | system(com); |
| 136 | sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, |
| 137 | tmpb, tmpc, tmpa); |
| 138 | system(com); |
| 139 | } |
| 140 | fta = fopen(tmpa, "r"); |
| 141 | fa = fopen(nma, "w"); |
| 142 | fb = fopen(nmb, "w"); |
| 143 | whash(fta, fa, fb, nhash, iflong, &keys, &hashes); |
| 144 | fclose(fta); |
| 145 | # ifndef D1 |
| 146 | unlink(tmpa); |
| 147 | # endif |
| 148 | if (appflg) |
| 149 | { |
| 150 | unlink(tmpb); |
| 151 | unlink(tmpc); |
| 152 | } |
| 153 | if (chatty) |
| 154 | |
| 155 | printf ("%ld key occurrences, %d hashes, %d docs\n", |
| 156 | keys, hashes, docs); |
| 157 | } |