Commit | Line | Data |
---|---|---|
94c01391 KB |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Ken Arnold. | |
7 | * | |
8 | * Redistribution and use in source and binary forms are permitted | |
9 | * provided that the above copyright notice and this paragraph are | |
10 | * duplicated in all such forms and that any documentation, | |
11 | * advertising materials, and other materials related to such | |
12 | * distribution and use acknowledge that the software was developed | |
13 | * by the University of California, Berkeley. The name of the | |
14 | * University may not be used to endorse or promote products derived | |
15 | * from this software without specific prior written permission. | |
16 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |
17 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | |
18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
19 | */ | |
20 | ||
21 | #ifndef lint | |
22 | char copyright[] = | |
23 | "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ | |
24 | All rights reserved.\n"; | |
25 | #endif /* not lint */ | |
26 | ||
27 | #ifndef lint | |
7dd2a8a0 | 28 | static char sccsid[] = "@(#)strfile.c 5.10 (Berkeley) %G%"; |
94c01391 | 29 | #endif /* not lint */ |
9abe7b42 | 30 | |
e249006e KB |
31 | # include <sys/param.h> |
32 | # include <sys/types.h> | |
9abe7b42 KB |
33 | # include <stdio.h> |
34 | # include <ctype.h> | |
35 | # include "strfile.h" | |
5c683701 KB |
36 | |
37 | # ifndef MAXPATHLEN | |
38 | # define MAXPATHLEN 1024 | |
39 | # endif /* MAXPATHLEN */ | |
9abe7b42 KB |
40 | |
41 | /* | |
42 | * This program takes a file composed of strings seperated by | |
43 | * lines starting with two consecutive delimiting character (default | |
44 | * character is '%') and creates another file which consists of a table | |
45 | * describing the file (structure from "strfile.h"), a table of seek | |
2579efac | 46 | * pointers to the start of the strings, and the strings, each terminated |
9abe7b42 KB |
47 | * by a null byte. Usage: |
48 | * | |
0b661c16 | 49 | * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] |
9abe7b42 | 50 | * |
9abe7b42 KB |
51 | * c - Change delimiting character from '%' to 'C' |
52 | * s - Silent. Give no summary of data processed at the end of | |
53 | * the run. | |
9abe7b42 KB |
54 | * o - order the strings in alphabetic order |
55 | * i - if ordering, ignore case | |
56 | * r - randomize the order of the strings | |
0b661c16 | 57 | * x - set rotated bit |
9abe7b42 KB |
58 | * |
59 | * Ken Arnold Sept. 7, 1978 -- | |
60 | * | |
9abe7b42 KB |
61 | * Added ordering options. |
62 | */ | |
63 | ||
64 | # define TRUE 1 | |
65 | # define FALSE 0 | |
66 | ||
5c683701 KB |
67 | # define STORING_PTRS (Oflag || Rflag) |
68 | # define CHUNKSIZE 512 | |
69 | ||
70 | #ifdef lint | |
71 | # define ALWAYS atoi("1") | |
72 | #else | |
73 | # define ALWAYS 1 | |
74 | #endif | |
75 | # define ALLOC(ptr,sz) if (ALWAYS) { \ | |
76 | if (ptr == NULL) \ | |
77 | ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \ | |
78 | else if (((sz) + 1) % CHUNKSIZE == 0) \ | |
79 | ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \ | |
80 | if (ptr == NULL) { \ | |
81 | fprintf(stderr, "out of space\n"); \ | |
82 | exit(1); \ | |
83 | } \ | |
84 | } else | |
85 | ||
86 | #ifdef NO_VOID | |
87 | # define void char | |
88 | #endif | |
9abe7b42 KB |
89 | |
90 | typedef struct { | |
91 | char first; | |
5c683701 | 92 | off_t pos; |
9abe7b42 KB |
93 | } STR; |
94 | ||
95 | char *Infile = NULL, /* input file name */ | |
5c683701 | 96 | Outfile[MAXPATHLEN] = "", /* output file name */ |
2579efac | 97 | Delimch = '%'; /* delimiting character */ |
9abe7b42 KB |
98 | |
99 | int Sflag = FALSE; /* silent run flag */ | |
100 | int Oflag = FALSE; /* ordering flag */ | |
101 | int Iflag = FALSE; /* ignore case flag */ | |
102 | int Rflag = FALSE; /* randomize order flag */ | |
0b661c16 | 103 | int Xflag = FALSE; /* set rotated bit */ |
447fd1f6 | 104 | long Num_pts = 0; /* number of pointers/strings */ |
9abe7b42 | 105 | |
5c683701 | 106 | off_t *Seekpts; |
9abe7b42 KB |
107 | |
108 | FILE *Sort_1, *Sort_2; /* pointers for sorting */ | |
109 | ||
110 | STRFILE Tbl; /* statistics table */ | |
111 | ||
112 | STR *Firstch; /* first chars of each string */ | |
113 | ||
5c683701 | 114 | char *fgets(), *strcpy(), *strcat(); |
9abe7b42 | 115 | |
5c683701 | 116 | void *malloc(), *realloc(); |
9abe7b42 | 117 | |
5c683701 KB |
118 | /* |
119 | * main: | |
120 | * Drive the sucker. There are two main modes -- either we store | |
121 | * the seek pointers, if the table is to be sorted or randomized, | |
122 | * or we write the pointer directly to the file, if we are to stay | |
123 | * in file order. If the former, we allocate and re-allocate in | |
124 | * CHUNKSIZE blocks; if the latter, we just write each pointer, | |
125 | * and then seek back to the beginning to write in the table. | |
126 | */ | |
9abe7b42 KB |
127 | main(ac, av) |
128 | int ac; | |
129 | char **av; | |
130 | { | |
131 | register char *sp, dc; | |
9abe7b42 | 132 | register FILE *inf, *outf; |
447fd1f6 KB |
133 | register off_t last_off, length, pos, *p; |
134 | register int first, cnt; | |
9abe7b42 KB |
135 | register char *nsp; |
136 | register STR *fp; | |
137 | static char string[257]; | |
138 | ||
139 | getargs(ac, av); /* evalute arguments */ | |
9abe7b42 KB |
140 | dc = Delimch; |
141 | if ((inf = fopen(Infile, "r")) == NULL) { | |
142 | perror(Infile); | |
526fa11f | 143 | exit(1); |
9abe7b42 | 144 | } |
9abe7b42 KB |
145 | |
146 | if ((outf = fopen(Outfile, "w")) == NULL) { | |
147 | perror(Outfile); | |
526fa11f | 148 | exit(1); |
9abe7b42 | 149 | } |
5c683701 KB |
150 | if (!STORING_PTRS) |
151 | (void) fseek(outf, sizeof Tbl, 0); | |
9abe7b42 KB |
152 | |
153 | /* | |
5c683701 | 154 | * Write the strings onto the file |
9abe7b42 KB |
155 | */ |
156 | ||
157 | Tbl.str_longlen = 0; | |
158 | Tbl.str_shortlen = (unsigned int) 0xffffffff; | |
5c683701 | 159 | Tbl.str_delim = dc; |
447fd1f6 | 160 | Tbl.str_version = VERSION; |
9abe7b42 | 161 | first = Oflag; |
5c683701 KB |
162 | add_offset(outf, ftell(inf)); |
163 | last_off = 0; | |
9abe7b42 KB |
164 | do { |
165 | sp = fgets(string, 256, inf); | |
2579efac | 166 | if (sp == NULL || sp[0] == dc && sp[1] == '\n') { |
5c683701 | 167 | pos = ftell(inf); |
5c683701 KB |
168 | length = pos - last_off - strlen(sp); |
169 | last_off = pos; | |
2579efac KB |
170 | if (!length) |
171 | continue; | |
172 | add_offset(outf, pos); | |
5c683701 KB |
173 | if (Tbl.str_longlen < length) |
174 | Tbl.str_longlen = length; | |
175 | if (Tbl.str_shortlen > length) | |
176 | Tbl.str_shortlen = length; | |
9abe7b42 KB |
177 | first = Oflag; |
178 | } | |
2579efac KB |
179 | else if (first) { |
180 | for (nsp = sp; !isalnum(*nsp); nsp++) | |
181 | continue; | |
182 | ALLOC(Firstch, Num_pts); | |
183 | fp = &Firstch[Num_pts - 1]; | |
184 | if (Iflag && isupper(*nsp)) | |
185 | fp->first = tolower(*nsp); | |
186 | else | |
187 | fp->first = *nsp; | |
188 | fp->pos = Seekpts[Num_pts - 1]; | |
189 | first = FALSE; | |
9abe7b42 KB |
190 | } |
191 | } while (sp != NULL); | |
192 | ||
193 | /* | |
194 | * write the tables in | |
195 | */ | |
196 | ||
197 | (void) fclose(inf); | |
9abe7b42 KB |
198 | |
199 | if (Oflag) | |
5c683701 | 200 | do_order(); |
9abe7b42 | 201 | else if (Rflag) |
5c683701 | 202 | randomize(); |
9abe7b42 | 203 | |
0b661c16 KB |
204 | if (Xflag) |
205 | Tbl.str_flags |= STR_ROTATED; | |
206 | ||
9abe7b42 | 207 | if (!Sflag) { |
5c683701 KB |
208 | printf("\"%s\" created\n", Outfile); |
209 | if (Num_pts == 2) | |
9abe7b42 KB |
210 | puts("There was 1 string"); |
211 | else | |
e249006e KB |
212 | printf("There were %d strings\n", Num_pts - 1); |
213 | printf("Longest string: %lu byte%s\n", Tbl.str_longlen, | |
9abe7b42 | 214 | Tbl.str_longlen == 1 ? "" : "s"); |
e249006e | 215 | printf("Shortest string: %lu byte%s\n", Tbl.str_shortlen, |
9abe7b42 KB |
216 | Tbl.str_shortlen == 1 ? "" : "s"); |
217 | } | |
447fd1f6 KB |
218 | |
219 | (void) fseek(outf, (off_t) 0, 0); | |
220 | Tbl.str_version = htonl(Tbl.str_version); | |
221 | Tbl.str_numstr = htonl(Num_pts - 1); | |
222 | Tbl.str_longlen = htonl(Tbl.str_longlen); | |
223 | Tbl.str_shortlen = htonl(Tbl.str_shortlen); | |
224 | Tbl.str_flags = htonl(Tbl.str_flags); | |
225 | (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); | |
226 | if (STORING_PTRS) { | |
227 | for (p = Seekpts, cnt = Num_pts; cnt--; ++p) | |
228 | *p = htonl(*p); | |
229 | (void) fwrite((char *) Seekpts, sizeof *Seekpts, (int) Num_pts, outf); | |
230 | } | |
231 | (void) fclose(outf); | |
9abe7b42 KB |
232 | exit(0); |
233 | } | |
234 | ||
235 | /* | |
236 | * This routine evaluates arguments from the command line | |
237 | */ | |
2579efac KB |
238 | getargs(argc, argv) |
239 | int argc; | |
240 | char **argv; | |
9abe7b42 | 241 | { |
2579efac KB |
242 | extern char *optarg; |
243 | extern int optind; | |
244 | int ch; | |
245 | ||
0b661c16 | 246 | while ((ch = getopt(argc, argv, "c:iorsx")) != EOF) |
2579efac KB |
247 | switch(ch) { |
248 | case 'c': /* new delimiting char */ | |
249 | Delimch = *optarg; | |
250 | if (!isascii(Delimch)) { | |
251 | printf("bad delimiting character: '\\%o\n'", | |
252 | Delimch); | |
253 | } | |
254 | break; | |
255 | case 'i': /* ignore case in ordering */ | |
256 | Iflag++; | |
257 | break; | |
258 | case 'o': /* order strings */ | |
259 | Oflag++; | |
260 | break; | |
0b661c16 | 261 | case 'r': /* randomize pointers */ |
2579efac KB |
262 | Rflag++; |
263 | break; | |
264 | case 's': /* silent */ | |
265 | Sflag++; | |
266 | break; | |
0b661c16 KB |
267 | case 'x': /* set the rotated bit */ |
268 | Xflag++; | |
269 | break; | |
2579efac KB |
270 | case '?': |
271 | default: | |
272 | usage(); | |
9abe7b42 | 273 | } |
2579efac KB |
274 | argv += optind; |
275 | ||
276 | if (*argv) { | |
277 | Infile = *argv; | |
278 | if (*++argv) | |
279 | (void) strcpy(Outfile, *argv); | |
280 | } | |
9abe7b42 | 281 | if (!Infile) { |
9abe7b42 | 282 | puts("No input file name"); |
2579efac | 283 | usage(); |
9abe7b42 | 284 | } |
2579efac | 285 | if (*Outfile == '\0') { |
9abe7b42 KB |
286 | (void) strcpy(Outfile, Infile); |
287 | (void) strcat(Outfile, ".dat"); | |
288 | } | |
2579efac KB |
289 | } |
290 | ||
291 | usage() | |
292 | { | |
293 | (void) fprintf(stderr, | |
0b661c16 | 294 | "strfile [-iorsx] [-c char] sourcefile [datafile]\n"); |
2579efac | 295 | exit(1); |
9abe7b42 KB |
296 | } |
297 | ||
5c683701 KB |
298 | /* |
299 | * add_offset: | |
300 | * Add an offset to the list, or write it out, as appropriate. | |
301 | */ | |
302 | add_offset(fp, off) | |
303 | FILE *fp; | |
304 | off_t off; | |
305 | { | |
447fd1f6 KB |
306 | off_t net; |
307 | ||
308 | if (!STORING_PTRS) { | |
309 | net = htonl(off); | |
310 | fwrite(&net, 1, sizeof net, fp); | |
311 | } else { | |
5c683701 KB |
312 | ALLOC(Seekpts, Num_pts + 1); |
313 | Seekpts[Num_pts] = off; | |
314 | } | |
315 | Num_pts++; | |
316 | } | |
317 | ||
9abe7b42 KB |
318 | /* |
319 | * do_order: | |
320 | * Order the strings alphabetically (possibly ignoring case). | |
321 | */ | |
5c683701 | 322 | do_order() |
9abe7b42 KB |
323 | { |
324 | register int i; | |
5c683701 | 325 | register off_t *lp; |
9abe7b42 KB |
326 | register STR *fp; |
327 | extern int cmp_str(); | |
328 | ||
5c683701 KB |
329 | Sort_1 = fopen(Infile, "r"); |
330 | Sort_2 = fopen(Infile, "r"); | |
331 | qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); | |
9abe7b42 | 332 | i = Tbl.str_numstr; |
5c683701 | 333 | lp = Seekpts; |
9abe7b42 KB |
334 | fp = Firstch; |
335 | while (i--) | |
336 | *lp++ = fp++->pos; | |
337 | (void) fclose(Sort_1); | |
338 | (void) fclose(Sort_2); | |
339 | Tbl.str_flags |= STR_ORDERED; | |
340 | } | |
341 | ||
342 | /* | |
343 | * cmp_str: | |
344 | * Compare two strings in the file | |
345 | */ | |
5c683701 KB |
346 | char * |
347 | unctrl(c) | |
348 | char c; | |
349 | { | |
350 | static char buf[3]; | |
351 | ||
352 | if (isprint(c)) { | |
353 | buf[0] = c; | |
354 | buf[1] = '\0'; | |
355 | } | |
356 | else if (c == 0177) { | |
357 | buf[0] = '^'; | |
358 | buf[1] = '?'; | |
359 | } | |
360 | else { | |
361 | buf[0] = '^'; | |
362 | buf[1] = c + 'A' - 1; | |
363 | } | |
364 | return buf; | |
365 | } | |
366 | ||
9abe7b42 KB |
367 | cmp_str(p1, p2) |
368 | STR *p1, *p2; | |
369 | { | |
370 | register int c1, c2; | |
5c683701 KB |
371 | register int n1, n2; |
372 | ||
373 | # define SET_N(nf,ch) (nf = (ch == '\n')) | |
374 | # define IS_END(ch,nf) (ch == Delimch && nf) | |
9abe7b42 KB |
375 | |
376 | c1 = p1->first; | |
377 | c2 = p2->first; | |
378 | if (c1 != c2) | |
379 | return c1 - c2; | |
380 | ||
381 | (void) fseek(Sort_1, p1->pos, 0); | |
382 | (void) fseek(Sort_2, p2->pos, 0); | |
383 | ||
5c683701 KB |
384 | n1 = FALSE; |
385 | n2 = FALSE; | |
9abe7b42 | 386 | while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') |
5c683701 | 387 | SET_N(n1, c1); |
9abe7b42 | 388 | while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') |
5c683701 | 389 | SET_N(n2, c2); |
9abe7b42 | 390 | |
5c683701 | 391 | while (!IS_END(c1, n1) && !IS_END(c2, n2)) { |
9abe7b42 KB |
392 | if (Iflag) { |
393 | if (isupper(c1)) | |
394 | c1 = tolower(c1); | |
395 | if (isupper(c2)) | |
396 | c2 = tolower(c2); | |
397 | } | |
398 | if (c1 != c2) | |
399 | return c1 - c2; | |
5c683701 KB |
400 | SET_N(n1, c1); |
401 | SET_N(n2, c2); | |
9abe7b42 KB |
402 | c1 = getc(Sort_1); |
403 | c2 = getc(Sort_2); | |
404 | } | |
5c683701 KB |
405 | if (IS_END(c1, n1)) |
406 | c1 = 0; | |
407 | if (IS_END(c2, n2)) | |
408 | c2 = 0; | |
9abe7b42 KB |
409 | return c1 - c2; |
410 | } | |
411 | ||
412 | /* | |
413 | * randomize: | |
414 | * Randomize the order of the string table. We must be careful | |
415 | * not to randomize across delimiter boundaries. All | |
416 | * randomization is done within each block. | |
417 | */ | |
5c683701 | 418 | randomize() |
9abe7b42 | 419 | { |
5c683701 KB |
420 | register int cnt, i; |
421 | register off_t tmp; | |
422 | register off_t *sp; | |
423 | extern time_t time(); | |
9abe7b42 | 424 | |
7dd2a8a0 KB |
425 | srandom((int)(time((time_t *) NULL) + getpid())); |
426 | ||
9abe7b42 | 427 | Tbl.str_flags |= STR_RANDOM; |
5c683701 | 428 | cnt = Tbl.str_numstr; |
9abe7b42 | 429 | |
5c683701 KB |
430 | /* |
431 | * move things around randomly | |
432 | */ | |
9abe7b42 | 433 | |
5c683701 | 434 | for (sp = Seekpts; cnt > 0; cnt--, sp++) { |
e249006e | 435 | i = random() % cnt; |
5c683701 KB |
436 | tmp = sp[0]; |
437 | sp[0] = sp[i]; | |
438 | sp[i] = tmp; | |
9abe7b42 KB |
439 | } |
440 | } |