Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /*- |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Ken Arnold. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | */ | |
36 | ||
37 | #ifndef lint | |
38 | char copyright[] = | |
39 | "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ | |
40 | All rights reserved.\n"; | |
41 | #endif /* not lint */ | |
42 | ||
43 | #ifndef lint | |
44 | static char sccsid[] = "@(#)strfile.c 5.12 (Berkeley) 4/8/91"; | |
45 | #endif /* not lint */ | |
46 | ||
47 | # include <machine/endian.h> | |
48 | # include <sys/param.h> | |
49 | # include <stdio.h> | |
50 | # include <ctype.h> | |
51 | # include "strfile.h" | |
52 | ||
53 | # ifndef MAXPATHLEN | |
54 | # define MAXPATHLEN 1024 | |
55 | # endif /* MAXPATHLEN */ | |
56 | ||
57 | /* | |
58 | * This program takes a file composed of strings seperated by | |
59 | * lines starting with two consecutive delimiting character (default | |
60 | * character is '%') and creates another file which consists of a table | |
61 | * describing the file (structure from "strfile.h"), a table of seek | |
62 | * pointers to the start of the strings, and the strings, each terminated | |
63 | * by a null byte. Usage: | |
64 | * | |
65 | * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] | |
66 | * | |
67 | * c - Change delimiting character from '%' to 'C' | |
68 | * s - Silent. Give no summary of data processed at the end of | |
69 | * the run. | |
70 | * o - order the strings in alphabetic order | |
71 | * i - if ordering, ignore case | |
72 | * r - randomize the order of the strings | |
73 | * x - set rotated bit | |
74 | * | |
75 | * Ken Arnold Sept. 7, 1978 -- | |
76 | * | |
77 | * Added ordering options. | |
78 | */ | |
79 | ||
80 | # define TRUE 1 | |
81 | # define FALSE 0 | |
82 | ||
83 | # define STORING_PTRS (Oflag || Rflag) | |
84 | # define CHUNKSIZE 512 | |
85 | ||
86 | #ifdef lint | |
87 | # define ALWAYS atoi("1") | |
88 | #else | |
89 | # define ALWAYS 1 | |
90 | #endif | |
91 | # define ALLOC(ptr,sz) if (ALWAYS) { \ | |
92 | if (ptr == NULL) \ | |
93 | ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \ | |
94 | else if (((sz) + 1) % CHUNKSIZE == 0) \ | |
95 | ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \ | |
96 | if (ptr == NULL) { \ | |
97 | fprintf(stderr, "out of space\n"); \ | |
98 | exit(1); \ | |
99 | } \ | |
100 | } else | |
101 | ||
102 | #ifdef NO_VOID | |
103 | # define void char | |
104 | #endif | |
105 | ||
106 | typedef struct { | |
107 | char first; | |
108 | off_t pos; | |
109 | } STR; | |
110 | ||
111 | char *Infile = NULL, /* input file name */ | |
112 | Outfile[MAXPATHLEN] = "", /* output file name */ | |
113 | Delimch = '%'; /* delimiting character */ | |
114 | ||
115 | int Sflag = FALSE; /* silent run flag */ | |
116 | int Oflag = FALSE; /* ordering flag */ | |
117 | int Iflag = FALSE; /* ignore case flag */ | |
118 | int Rflag = FALSE; /* randomize order flag */ | |
119 | int Xflag = FALSE; /* set rotated bit */ | |
120 | long Num_pts = 0; /* number of pointers/strings */ | |
121 | ||
122 | off_t *Seekpts; | |
123 | ||
124 | FILE *Sort_1, *Sort_2; /* pointers for sorting */ | |
125 | ||
126 | STRFILE Tbl; /* statistics table */ | |
127 | ||
128 | STR *Firstch; /* first chars of each string */ | |
129 | ||
130 | char *fgets(), *strcpy(), *strcat(); | |
131 | ||
132 | void *malloc(), *realloc(); | |
133 | ||
134 | /* | |
135 | * main: | |
136 | * Drive the sucker. There are two main modes -- either we store | |
137 | * the seek pointers, if the table is to be sorted or randomized, | |
138 | * or we write the pointer directly to the file, if we are to stay | |
139 | * in file order. If the former, we allocate and re-allocate in | |
140 | * CHUNKSIZE blocks; if the latter, we just write each pointer, | |
141 | * and then seek back to the beginning to write in the table. | |
142 | */ | |
143 | main(ac, av) | |
144 | int ac; | |
145 | char **av; | |
146 | { | |
147 | register char *sp, dc; | |
148 | register FILE *inf, *outf; | |
149 | register off_t last_off, length, pos, *p; | |
150 | register int first, cnt; | |
151 | register char *nsp; | |
152 | register STR *fp; | |
153 | static char string[257]; | |
154 | ||
155 | getargs(ac, av); /* evalute arguments */ | |
156 | dc = Delimch; | |
157 | if ((inf = fopen(Infile, "r")) == NULL) { | |
158 | perror(Infile); | |
159 | exit(1); | |
160 | } | |
161 | ||
162 | if ((outf = fopen(Outfile, "w")) == NULL) { | |
163 | perror(Outfile); | |
164 | exit(1); | |
165 | } | |
166 | if (!STORING_PTRS) | |
167 | (void) fseek(outf, sizeof Tbl, 0); | |
168 | ||
169 | /* | |
170 | * Write the strings onto the file | |
171 | */ | |
172 | ||
173 | Tbl.str_longlen = 0; | |
174 | Tbl.str_shortlen = (unsigned int) 0xffffffff; | |
175 | Tbl.str_delim = dc; | |
176 | Tbl.str_version = VERSION; | |
177 | first = Oflag; | |
178 | add_offset(outf, ftell(inf)); | |
179 | last_off = 0; | |
180 | do { | |
181 | sp = fgets(string, 256, inf); | |
182 | if (sp == NULL || sp[0] == dc && sp[1] == '\n') { | |
183 | pos = ftell(inf); | |
184 | length = pos - last_off - (sp ? strlen(sp) : 0); | |
185 | last_off = pos; | |
186 | if (!length) | |
187 | continue; | |
188 | add_offset(outf, pos); | |
189 | if (Tbl.str_longlen < length) | |
190 | Tbl.str_longlen = length; | |
191 | if (Tbl.str_shortlen > length) | |
192 | Tbl.str_shortlen = length; | |
193 | first = Oflag; | |
194 | } | |
195 | else if (first) { | |
196 | for (nsp = sp; !isalnum(*nsp); nsp++) | |
197 | continue; | |
198 | ALLOC(Firstch, Num_pts); | |
199 | fp = &Firstch[Num_pts - 1]; | |
200 | if (Iflag && isupper(*nsp)) | |
201 | fp->first = tolower(*nsp); | |
202 | else | |
203 | fp->first = *nsp; | |
204 | fp->pos = Seekpts[Num_pts - 1]; | |
205 | first = FALSE; | |
206 | } | |
207 | } while (sp != NULL); | |
208 | ||
209 | /* | |
210 | * write the tables in | |
211 | */ | |
212 | ||
213 | (void) fclose(inf); | |
214 | ||
215 | if (Oflag) | |
216 | do_order(); | |
217 | else if (Rflag) | |
218 | randomize(); | |
219 | ||
220 | if (Xflag) | |
221 | Tbl.str_flags |= STR_ROTATED; | |
222 | ||
223 | if (!Sflag) { | |
224 | printf("\"%s\" created\n", Outfile); | |
225 | if (Num_pts == 2) | |
226 | puts("There was 1 string"); | |
227 | else | |
228 | printf("There were %d strings\n", Num_pts - 1); | |
229 | printf("Longest string: %lu byte%s\n", Tbl.str_longlen, | |
230 | Tbl.str_longlen == 1 ? "" : "s"); | |
231 | printf("Shortest string: %lu byte%s\n", Tbl.str_shortlen, | |
232 | Tbl.str_shortlen == 1 ? "" : "s"); | |
233 | } | |
234 | ||
235 | (void) fseek(outf, (off_t) 0, 0); | |
236 | Tbl.str_version = htonl(Tbl.str_version); | |
237 | Tbl.str_numstr = htonl(Num_pts - 1); | |
238 | Tbl.str_longlen = htonl(Tbl.str_longlen); | |
239 | Tbl.str_shortlen = htonl(Tbl.str_shortlen); | |
240 | Tbl.str_flags = htonl(Tbl.str_flags); | |
241 | (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); | |
242 | if (STORING_PTRS) { | |
243 | for (p = Seekpts, cnt = Num_pts; cnt--; ++p) | |
244 | *p = htonl(*p); | |
245 | (void) fwrite((char *) Seekpts, sizeof *Seekpts, (int) Num_pts, outf); | |
246 | } | |
247 | (void) fclose(outf); | |
248 | exit(0); | |
249 | } | |
250 | ||
251 | /* | |
252 | * This routine evaluates arguments from the command line | |
253 | */ | |
254 | getargs(argc, argv) | |
255 | int argc; | |
256 | char **argv; | |
257 | { | |
258 | extern char *optarg; | |
259 | extern int optind; | |
260 | int ch; | |
261 | ||
262 | while ((ch = getopt(argc, argv, "c:iorsx")) != EOF) | |
263 | switch(ch) { | |
264 | case 'c': /* new delimiting char */ | |
265 | Delimch = *optarg; | |
266 | if (!isascii(Delimch)) { | |
267 | printf("bad delimiting character: '\\%o\n'", | |
268 | Delimch); | |
269 | } | |
270 | break; | |
271 | case 'i': /* ignore case in ordering */ | |
272 | Iflag++; | |
273 | break; | |
274 | case 'o': /* order strings */ | |
275 | Oflag++; | |
276 | break; | |
277 | case 'r': /* randomize pointers */ | |
278 | Rflag++; | |
279 | break; | |
280 | case 's': /* silent */ | |
281 | Sflag++; | |
282 | break; | |
283 | case 'x': /* set the rotated bit */ | |
284 | Xflag++; | |
285 | break; | |
286 | case '?': | |
287 | default: | |
288 | usage(); | |
289 | } | |
290 | argv += optind; | |
291 | ||
292 | if (*argv) { | |
293 | Infile = *argv; | |
294 | if (*++argv) | |
295 | (void) strcpy(Outfile, *argv); | |
296 | } | |
297 | if (!Infile) { | |
298 | puts("No input file name"); | |
299 | usage(); | |
300 | } | |
301 | if (*Outfile == '\0') { | |
302 | (void) strcpy(Outfile, Infile); | |
303 | (void) strcat(Outfile, ".dat"); | |
304 | } | |
305 | } | |
306 | ||
307 | usage() | |
308 | { | |
309 | (void) fprintf(stderr, | |
310 | "strfile [-iorsx] [-c char] sourcefile [datafile]\n"); | |
311 | exit(1); | |
312 | } | |
313 | ||
314 | /* | |
315 | * add_offset: | |
316 | * Add an offset to the list, or write it out, as appropriate. | |
317 | */ | |
318 | add_offset(fp, off) | |
319 | FILE *fp; | |
320 | off_t off; | |
321 | { | |
322 | off_t net; | |
323 | ||
324 | if (!STORING_PTRS) { | |
325 | net = htonl(off); | |
326 | fwrite(&net, 1, sizeof net, fp); | |
327 | } else { | |
328 | ALLOC(Seekpts, Num_pts + 1); | |
329 | Seekpts[Num_pts] = off; | |
330 | } | |
331 | Num_pts++; | |
332 | } | |
333 | ||
334 | /* | |
335 | * do_order: | |
336 | * Order the strings alphabetically (possibly ignoring case). | |
337 | */ | |
338 | do_order() | |
339 | { | |
340 | register int i; | |
341 | register off_t *lp; | |
342 | register STR *fp; | |
343 | extern int cmp_str(); | |
344 | ||
345 | Sort_1 = fopen(Infile, "r"); | |
346 | Sort_2 = fopen(Infile, "r"); | |
347 | qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); | |
348 | i = Tbl.str_numstr; | |
349 | lp = Seekpts; | |
350 | fp = Firstch; | |
351 | while (i--) | |
352 | *lp++ = fp++->pos; | |
353 | (void) fclose(Sort_1); | |
354 | (void) fclose(Sort_2); | |
355 | Tbl.str_flags |= STR_ORDERED; | |
356 | } | |
357 | ||
358 | /* | |
359 | * cmp_str: | |
360 | * Compare two strings in the file | |
361 | */ | |
362 | char * | |
363 | unctrl(c) | |
364 | char c; | |
365 | { | |
366 | static char buf[3]; | |
367 | ||
368 | if (isprint(c)) { | |
369 | buf[0] = c; | |
370 | buf[1] = '\0'; | |
371 | } | |
372 | else if (c == 0177) { | |
373 | buf[0] = '^'; | |
374 | buf[1] = '?'; | |
375 | } | |
376 | else { | |
377 | buf[0] = '^'; | |
378 | buf[1] = c + 'A' - 1; | |
379 | } | |
380 | return buf; | |
381 | } | |
382 | ||
383 | cmp_str(p1, p2) | |
384 | STR *p1, *p2; | |
385 | { | |
386 | register int c1, c2; | |
387 | register int n1, n2; | |
388 | ||
389 | # define SET_N(nf,ch) (nf = (ch == '\n')) | |
390 | # define IS_END(ch,nf) (ch == Delimch && nf) | |
391 | ||
392 | c1 = p1->first; | |
393 | c2 = p2->first; | |
394 | if (c1 != c2) | |
395 | return c1 - c2; | |
396 | ||
397 | (void) fseek(Sort_1, p1->pos, 0); | |
398 | (void) fseek(Sort_2, p2->pos, 0); | |
399 | ||
400 | n1 = FALSE; | |
401 | n2 = FALSE; | |
402 | while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') | |
403 | SET_N(n1, c1); | |
404 | while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') | |
405 | SET_N(n2, c2); | |
406 | ||
407 | while (!IS_END(c1, n1) && !IS_END(c2, n2)) { | |
408 | if (Iflag) { | |
409 | if (isupper(c1)) | |
410 | c1 = tolower(c1); | |
411 | if (isupper(c2)) | |
412 | c2 = tolower(c2); | |
413 | } | |
414 | if (c1 != c2) | |
415 | return c1 - c2; | |
416 | SET_N(n1, c1); | |
417 | SET_N(n2, c2); | |
418 | c1 = getc(Sort_1); | |
419 | c2 = getc(Sort_2); | |
420 | } | |
421 | if (IS_END(c1, n1)) | |
422 | c1 = 0; | |
423 | if (IS_END(c2, n2)) | |
424 | c2 = 0; | |
425 | return c1 - c2; | |
426 | } | |
427 | ||
428 | /* | |
429 | * randomize: | |
430 | * Randomize the order of the string table. We must be careful | |
431 | * not to randomize across delimiter boundaries. All | |
432 | * randomization is done within each block. | |
433 | */ | |
434 | randomize() | |
435 | { | |
436 | register int cnt, i; | |
437 | register off_t tmp; | |
438 | register off_t *sp; | |
439 | extern time_t time(); | |
440 | ||
441 | srandom((int)(time((time_t *) NULL) + getpid())); | |
442 | ||
443 | Tbl.str_flags |= STR_RANDOM; | |
444 | cnt = Tbl.str_numstr; | |
445 | ||
446 | /* | |
447 | * move things around randomly | |
448 | */ | |
449 | ||
450 | for (sp = Seekpts; cnt > 0; cnt--, sp++) { | |
451 | i = random() % cnt; | |
452 | tmp = sp[0]; | |
453 | sp[0] = sp[i]; | |
454 | sp[i] = tmp; | |
455 | } | |
456 | } |