Commit | Line | Data |
---|---|---|
9a316f2c C |
1 | /* Prepare Tex index dribble output into an actual index. |
2 | Copyright (C) 1987 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
17 | ||
18 | #include <stdio.h> | |
19 | #include <ctype.h> | |
20 | #include <errno.h> | |
21 | ||
22 | #ifdef VMS | |
23 | #ifndef VAX11C | |
24 | #define noshare | |
25 | #endif | |
26 | ||
27 | #include <perror.h> | |
28 | #include <file.h> | |
29 | ||
30 | #define EXIT_SUCCESS ((1 << 28) | 1) | |
31 | #define EXIT_FATAL ((1 << 28) | 4) | |
32 | #define unlink delete | |
33 | #define tell(fd) lseek(fd, 0L, 1) | |
34 | ||
35 | #else /* Not VMS */ | |
36 | ||
37 | #ifdef USG | |
38 | #include <sys/types.h> | |
39 | #include <sys/fcntl.h> | |
40 | #endif | |
41 | #include <sys/file.h> | |
42 | ||
43 | #define EXIT_SUCCESS 0 | |
44 | #define EXIT_FATAL 1 | |
45 | ||
46 | #endif /* Not VMS */ | |
47 | ||
48 | ||
49 | #ifndef L_XTND | |
50 | #define L_XTND 2 | |
51 | #endif | |
52 | ||
53 | #ifdef VMS | |
54 | extern noshare int sys_nerr; | |
55 | extern noshare char *sys_errlist[]; | |
56 | #else | |
57 | extern int sys_nerr; | |
58 | extern char *sys_errlist[]; | |
59 | #endif | |
60 | ||
61 | /* When sorting in core, this structure describes one line | |
62 | and the position and length of its first keyfield. */ | |
63 | ||
64 | struct lineinfo | |
65 | { | |
66 | char *text; /* The actual text of the line */ | |
67 | union | |
68 | { /* The start of the key (for textual comparison) */ | |
69 | char *text; | |
70 | long number; /* or the numeric value (for numeric comparison) */ | |
71 | } key; | |
72 | long keylen; /* Length of key field */ | |
73 | }; | |
74 | ||
75 | /* This structure describes a field to use as a sort key */ | |
76 | ||
77 | struct keyfield | |
78 | { | |
79 | int startwords; /* # words to skip */ | |
80 | int startchars; /* and # additional chars to skip, to start of field */ | |
81 | int endwords; /* similar, from beg (or end) of line, to find end of field */ | |
82 | int endchars; | |
83 | char ignore_blanks; /* Ignore spaces and tabs within the field */ | |
84 | char fold_case; /* Convert upper case to lower before comparing */ | |
85 | char reverse; /* Compare in reverse order */ | |
86 | char numeric; /* Parse text as an integer and compare the integers */ | |
87 | char positional; /* Sort according to position within the file */ | |
88 | char braced; /* Count balanced-braced groupings as fields */ | |
89 | }; | |
90 | ||
91 | /* Vector of keyfields to use */ | |
92 | ||
93 | struct keyfield keyfields[3]; | |
94 | ||
95 | /* Number of keyfields stored in that vector. */ | |
96 | ||
97 | int num_keyfields = 3; | |
98 | ||
99 | /* Vector of input file names, terminated with a zero (null pointer) */ | |
100 | ||
101 | char **infiles; | |
102 | ||
103 | /* Vector of corresponding output file names, or zero meaning default it */ | |
104 | ||
105 | char **outfiles; | |
106 | ||
107 | /* Length of `infiles' */ | |
108 | ||
109 | int num_infiles; | |
110 | ||
111 | /* Pointer to the array of pointers to lines being sorted */ | |
112 | ||
113 | char **linearray; | |
114 | ||
115 | /* The allocated length of `linearray'. */ | |
116 | ||
117 | long nlines; | |
118 | ||
119 | /* Directory to use for temporary files. On Unix, it ends with a slash. */ | |
120 | ||
121 | char *tempdir; | |
122 | ||
123 | /* Start of filename to use for temporary files. */ | |
124 | ||
125 | char *tempbase; | |
126 | ||
127 | /* Number of last temporary file. */ | |
128 | ||
129 | int tempcount; | |
130 | ||
131 | /* Number of last temporary file already deleted. | |
132 | Temporary files are deleted by `flush_tempfiles' in order of creation. */ | |
133 | ||
134 | int last_deleted_tempcount; | |
135 | ||
136 | /* During in-core sort, this points to the base of the data block | |
137 | which contains all the lines of data. */ | |
138 | ||
139 | char *text_base; | |
140 | ||
141 | /* Additional command switches */ | |
142 | ||
143 | int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */ | |
144 | ||
145 | /* Forward declarations of functions in this file */ | |
146 | ||
147 | void decode_command (); | |
148 | void sort_in_core (); | |
149 | void sort_offline (); | |
150 | char **parsefile (); | |
151 | char *find_field (); | |
152 | char *find_pos (); | |
153 | long find_value (); | |
154 | char *find_braced_pos (); | |
155 | char *find_braced_end (); | |
156 | void writelines (); | |
157 | int compare_full (); | |
158 | long readline (); | |
159 | int merge_files (); | |
160 | int merge_direct (); | |
161 | char *concat (); | |
162 | char *maketempname (); | |
163 | void flush_tempfiles (); | |
164 | char *tempcopy (); | |
165 | ||
166 | extern char *mktemp (); | |
167 | \f | |
168 | #define MAX_IN_CORE_SORT 500000 | |
169 | ||
170 | int | |
171 | main (argc, argv) | |
172 | int argc; | |
173 | char **argv; | |
174 | { | |
175 | int i; | |
176 | ||
177 | tempcount = 0; | |
178 | last_deleted_tempcount = 0; | |
179 | ||
180 | /* Describe the kind of sorting to do. */ | |
181 | /* The first keyfield uses the first braced field and folds case */ | |
182 | keyfields[0].braced = 1; | |
183 | keyfields[0].fold_case = 1; | |
184 | keyfields[0].endwords = -1; | |
185 | keyfields[0].endchars = -1; | |
186 | /* The second keyfield uses the second braced field, numerically */ | |
187 | keyfields[1].braced = 1; | |
188 | keyfields[1].numeric = 1; | |
189 | keyfields[1].startwords = 1; | |
190 | keyfields[1].endwords = -1; | |
191 | keyfields[1].endchars = -1; | |
192 | /* The third keyfield (which is ignored while discarding duplicates) | |
193 | compares the whole line */ | |
194 | keyfields[2].endwords = -1; | |
195 | keyfields[2].endchars = -1; | |
196 | ||
197 | decode_command (argc, argv); | |
198 | ||
199 | tempbase = mktemp (concat ("txiXXXXXX", "", "")); | |
200 | ||
201 | /* Process input files completely, one by one. */ | |
202 | ||
203 | for (i = 0; i < num_infiles; i++) | |
204 | { | |
205 | int desc; | |
206 | long ptr; | |
207 | char *outfile; | |
208 | char *p; | |
209 | ||
210 | desc = open (infiles[i], 0, 0); | |
211 | if (desc < 0) pfatal_with_name (infiles[i]); | |
212 | lseek (desc, 0, L_XTND); | |
213 | ptr = tell (desc); | |
214 | close (desc); | |
215 | ||
216 | outfile = outfiles[i]; | |
217 | if (!outfile) | |
218 | { | |
219 | outfile = concat (infiles[i], "s", ""); | |
220 | } | |
221 | ||
222 | if (ptr < MAX_IN_CORE_SORT) | |
223 | /* Sort a small amount of data */ | |
224 | sort_in_core (infiles[i], ptr, outfile); | |
225 | else | |
226 | sort_offline (infiles[i], ptr, outfile); | |
227 | } | |
228 | ||
229 | flush_tempfiles (tempcount); | |
230 | exit (EXIT_SUCCESS); | |
231 | } | |
232 | \f | |
233 | /* This page decodes the command line arguments to set the parameter variables | |
234 | and set up the vector of keyfields and the vector of input files */ | |
235 | ||
236 | void | |
237 | decode_command (argc, argv) | |
238 | int argc; | |
239 | char **argv; | |
240 | { | |
241 | int i; | |
242 | char **ip; | |
243 | char **op; | |
244 | ||
245 | /* Store default values into parameter variables */ | |
246 | ||
247 | #ifdef VMS | |
248 | tempdir = "sys$scratch:"; | |
249 | #else | |
250 | tempdir = "/tmp/"; | |
251 | #endif | |
252 | ||
253 | keep_tempfiles = 0; | |
254 | ||
255 | /* Allocate argc input files, which must be enough. */ | |
256 | ||
257 | infiles = (char **) xmalloc (argc * sizeof (char *)); | |
258 | outfiles = (char **) xmalloc (argc * sizeof (char *)); | |
259 | ip = infiles; | |
260 | op = outfiles; | |
261 | ||
262 | /* First find all switches that control the default kind-of-sort */ | |
263 | ||
264 | for (i = 1; i < argc; i++) | |
265 | { | |
266 | int tem = classify_arg (argv[i]); | |
267 | char c; | |
268 | char *p; | |
269 | ||
270 | if (tem <= 0) | |
271 | { | |
272 | *ip++ = argv[i]; | |
273 | *op++ = 0; | |
274 | continue; | |
275 | } | |
276 | if (tem > 1) | |
277 | { | |
278 | if (i + 1 == argc) | |
279 | fatal ("switch %s given with no argument following it", argv[i]); | |
280 | else if (!strcmp (argv[i], "-T")) | |
281 | tempdir = argv[i + 1]; | |
282 | else if (!strcmp (argv[i], "-o")) | |
283 | *(op - 1) = argv[i + 1]; | |
284 | i += tem - 1; | |
285 | continue; | |
286 | } | |
287 | ||
288 | p = &argv[i][1]; | |
289 | while (c = *p++) | |
290 | switch (c) | |
291 | { | |
292 | case 'k': | |
293 | keep_tempfiles = 1; | |
294 | break; | |
295 | ||
296 | default: | |
297 | fatal ("invalid command switch %c", c); | |
298 | } | |
299 | switchdone: ; | |
300 | } | |
301 | ||
302 | /* Record number of keyfields, terminate list of filenames */ | |
303 | ||
304 | num_infiles = ip - infiles; | |
305 | *ip = 0; | |
306 | } | |
307 | ||
308 | /* Return 0 for an argument that is not a switch; | |
309 | for a switch, return 1 plus the number of following arguments that the switch swallows. | |
310 | */ | |
311 | ||
312 | int | |
313 | classify_arg (arg) | |
314 | char *arg; | |
315 | { | |
316 | if (!strcmp (arg, "-T") || !strcmp (arg, "-o")) | |
317 | return 2; | |
318 | if (arg[0] == '-') | |
319 | return 1; | |
320 | return 0; | |
321 | } | |
322 | \f | |
323 | /* Create a name for a temporary file */ | |
324 | ||
325 | char * | |
326 | maketempname (count) | |
327 | int count; | |
328 | { | |
329 | char tempsuffix[10]; | |
330 | sprintf (tempsuffix, "%d", count); | |
331 | return concat (tempdir, tempbase, tempsuffix); | |
332 | } | |
333 | ||
334 | /* Delete all temporary files up to the specified count */ | |
335 | ||
336 | void | |
337 | flush_tempfiles (to_count) | |
338 | int to_count; | |
339 | { | |
340 | if (keep_tempfiles) return; | |
341 | while (last_deleted_tempcount < to_count) | |
342 | unlink (maketempname (++last_deleted_tempcount)); | |
343 | } | |
344 | ||
345 | /* Copy an input file into a temporary file, and return the temporary file name */ | |
346 | ||
347 | #define BUFSIZE 1024 | |
348 | ||
349 | char * | |
350 | tempcopy (idesc) | |
351 | int idesc; | |
352 | { | |
353 | char *outfile = maketempname (++tempcount); | |
354 | int odesc; | |
355 | char buffer[BUFSIZE]; | |
356 | ||
357 | odesc = open (outfile, O_WRONLY | O_CREAT, 0666); | |
358 | ||
359 | if (odesc < 0) pfatal_with_name (outfile); | |
360 | ||
361 | while (1) | |
362 | { | |
363 | int nread = read (idesc, buffer, BUFSIZE); | |
364 | write (odesc, buffer, nread); | |
365 | if (!nread) break; | |
366 | } | |
367 | ||
368 | close (odesc); | |
369 | ||
370 | return outfile; | |
371 | } | |
372 | \f | |
373 | /* Compare two lines, provided as pointers to pointers to text, | |
374 | according to the specified set of keyfields */ | |
375 | ||
376 | int | |
377 | compare_full (line1, line2) | |
378 | char **line1, **line2; | |
379 | { | |
380 | int i; | |
381 | ||
382 | /* Compare using the first keyfield; | |
383 | if that does not distinguish the lines, try the second keyfield; and so on. */ | |
384 | ||
385 | for (i = 0; i < num_keyfields; i++) | |
386 | { | |
387 | long length1, length2; | |
388 | char *start1 = find_field (&keyfields[i], *line1, &length1); | |
389 | char *start2 = find_field (&keyfields[i], *line2, &length2); | |
390 | int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base, | |
391 | start2, length2, *line2 - text_base); | |
392 | if (tem) | |
393 | { | |
394 | if (keyfields[i].reverse) | |
395 | return - tem; | |
396 | return tem; | |
397 | } | |
398 | } | |
399 | ||
400 | return 0; /* Lines match exactly */ | |
401 | } | |
402 | ||
403 | /* Compare two lines described by structures | |
404 | in which the first keyfield is identified in advance. | |
405 | For positional sorting, assumes that the order of the lines in core | |
406 | reflects their nominal order. */ | |
407 | ||
408 | int | |
409 | compare_prepared (line1, line2) | |
410 | struct lineinfo *line1, *line2; | |
411 | { | |
412 | int i; | |
413 | int tem; | |
414 | char *text1, *text2; | |
415 | ||
416 | /* Compare using the first keyfield, which has been found for us already */ | |
417 | if (keyfields->positional) | |
418 | { | |
419 | if (line1->text - text_base > line2->text - text_base) | |
420 | tem = 1; | |
421 | else | |
422 | tem = -1; | |
423 | } | |
424 | else if (keyfields->numeric) | |
425 | tem = line1->key.number - line2->key.number; | |
426 | else | |
427 | tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0); | |
428 | if (tem) | |
429 | { | |
430 | if (keyfields->reverse) | |
431 | return - tem; | |
432 | return tem; | |
433 | } | |
434 | ||
435 | text1 = line1->text; | |
436 | text2 = line2->text; | |
437 | ||
438 | /* Compare using the second keyfield; | |
439 | if that does not distinguish the lines, try the third keyfield; and so on. */ | |
440 | ||
441 | for (i = 1; i < num_keyfields; i++) | |
442 | { | |
443 | long length1, length2; | |
444 | char *start1 = find_field (&keyfields[i], text1, &length1); | |
445 | char *start2 = find_field (&keyfields[i], text2, &length2); | |
446 | int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base, | |
447 | start2, length2, text2 - text_base); | |
448 | if (tem) | |
449 | { | |
450 | if (keyfields[i].reverse) | |
451 | return - tem; | |
452 | return tem; | |
453 | } | |
454 | } | |
455 | ||
456 | return 0; /* Lines match exactly */ | |
457 | } | |
458 | ||
459 | /* Like compare_full but more general. | |
460 | You can pass any strings, and you can say how many keyfields to use. | |
461 | `pos1' and `pos2' should indicate the nominal positional ordering of | |
462 | the two lines in the input. */ | |
463 | ||
464 | int | |
465 | compare_general (str1, str2, pos1, pos2, use_keyfields) | |
466 | char *str1, *str2; | |
467 | long pos1, pos2; | |
468 | int use_keyfields; | |
469 | { | |
470 | int i; | |
471 | ||
472 | /* Compare using the first keyfield; | |
473 | if that does not distinguish the lines, try the second keyfield; and so on. */ | |
474 | ||
475 | for (i = 0; i < use_keyfields; i++) | |
476 | { | |
477 | long length1, length2; | |
478 | char *start1 = find_field (&keyfields[i], str1, &length1); | |
479 | char *start2 = find_field (&keyfields[i], str2, &length2); | |
480 | int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2); | |
481 | if (tem) | |
482 | { | |
483 | if (keyfields[i].reverse) | |
484 | return - tem; | |
485 | return tem; | |
486 | } | |
487 | } | |
488 | ||
489 | return 0; /* Lines match exactly */ | |
490 | } | |
491 | ||
492 | /* Find the start and length of a field in `str' according to `keyfield'. | |
493 | A pointer to the starting character is returned, and the length | |
494 | is stored into the int that `lengthptr' points to. */ | |
495 | ||
496 | char * | |
497 | find_field (keyfield, str, lengthptr) | |
498 | struct keyfield *keyfield; | |
499 | char *str; | |
500 | long *lengthptr; | |
501 | { | |
502 | char *start; | |
503 | char *end; | |
504 | char *(*fun) (); | |
505 | ||
506 | if (keyfield->braced) fun = find_braced_pos; | |
507 | else fun = find_pos; | |
508 | ||
509 | start = ( *fun )(str, keyfield->startwords, keyfield->startchars, | |
510 | keyfield->ignore_blanks); | |
511 | if (keyfield->endwords < 0) | |
512 | { | |
513 | if (keyfield->braced) | |
514 | end = find_braced_end (start); | |
515 | else | |
516 | { | |
517 | end = start; | |
518 | while (*end && *end != '\n') end++; | |
519 | } | |
520 | } | |
521 | else | |
522 | { | |
523 | end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0); | |
524 | if (end - str < start - str) end = start; | |
525 | } | |
526 | *lengthptr = end - start; | |
527 | return start; | |
528 | } | |
529 | ||
530 | /* Find a pointer to a specified place within `str', | |
531 | skipping (from the beginning) `words' words and then `chars' chars. | |
532 | If `ignore_blanks' is nonzero, we skip all blanks | |
533 | after finding the specified word. */ | |
534 | ||
535 | char * | |
536 | find_pos (str, words, chars, ignore_blanks) | |
537 | char *str; | |
538 | int words, chars; | |
539 | int ignore_blanks; | |
540 | { | |
541 | int i; | |
542 | char *p = str; | |
543 | ||
544 | for (i = 0; i < words; i++) | |
545 | { | |
546 | char c; | |
547 | /* Find next bunch of nonblanks and skip them. */ | |
548 | while ((c = *p) == ' ' || c == '\t') p++; | |
549 | while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++; | |
550 | if (!*p || *p == '\n') return p; | |
551 | } | |
552 | ||
553 | while (*p == ' ' || *p == '\t') p++; | |
554 | ||
555 | for (i = 0; i < chars; i++) | |
556 | { | |
557 | if (!*p || *p == '\n') break; | |
558 | p++; | |
559 | } | |
560 | return p; | |
561 | } | |
562 | ||
563 | /* Like find_pos but assumes that each field is surrounded by braces | |
564 | and that braces within fields are balanced. */ | |
565 | ||
566 | char * | |
567 | find_braced_pos (str, words, chars, ignore_blanks) | |
568 | char *str; | |
569 | int words, chars; | |
570 | int ignore_blanks; | |
571 | { | |
572 | int i; | |
573 | int bracelevel; | |
574 | char *p = str; | |
575 | char c; | |
576 | ||
577 | for (i = 0; i < words; i++) | |
578 | { | |
579 | bracelevel = 1; | |
580 | while ((c = *p++) != '{' && c != '\n' && c); | |
581 | if (c != '{') | |
582 | return p - 1; | |
583 | while (bracelevel) | |
584 | { | |
585 | c = *p++; | |
586 | if (c == '{') bracelevel++; | |
587 | if (c == '}') bracelevel--; | |
588 | #if 0 | |
589 | if (c == '\\' || c == '@') c = *p++; /* \ quotes braces and \ */ | |
590 | #endif | |
591 | if (c == 0 || c == '\n') return p-1; | |
592 | } | |
593 | } | |
594 | ||
595 | while ((c = *p++) != '{' && c != '\n' && c); | |
596 | ||
597 | if (c != '{') | |
598 | return p-1; | |
599 | ||
600 | if (ignore_blanks) | |
601 | while ((c = *p) == ' ' || c == '\t') p++; | |
602 | ||
603 | for (i = 0; i < chars; i++) | |
604 | { | |
605 | if (!*p || *p == '\n') break; | |
606 | p++; | |
607 | } | |
608 | return p; | |
609 | } | |
610 | ||
611 | /* Find the end of the balanced-brace field which starts at `str'. | |
612 | The position returned is just before the closing brace. */ | |
613 | ||
614 | char * | |
615 | find_braced_end (str) | |
616 | char *str; | |
617 | { | |
618 | int bracelevel; | |
619 | char *p = str; | |
620 | char c; | |
621 | ||
622 | bracelevel = 1; | |
623 | while (bracelevel) | |
624 | { | |
625 | c = *p++; | |
626 | if (c == '{') bracelevel++; | |
627 | if (c == '}') bracelevel--; | |
628 | #if 0 | |
629 | if (c == '\\' || c == '@') c = *p++; | |
630 | #endif | |
631 | if (c == 0 || c == '\n') return p-1; | |
632 | } | |
633 | return p - 1; | |
634 | } | |
635 | ||
636 | long | |
637 | find_value (start, length) | |
638 | char *start; | |
639 | long length; | |
640 | { | |
641 | while (length != 0L) { | |
642 | if (isdigit(*start)) | |
643 | return atol(start); | |
644 | length--; | |
645 | start++; | |
646 | } | |
647 | return 0l; | |
648 | } | |
649 | ||
650 | /* Vector used to translate characters for comparison. | |
651 | This is how we make all alphanumerics follow all else, | |
652 | and ignore case in the first sorting. */ | |
653 | int char_order[256]; | |
654 | ||
655 | init_char_order () | |
656 | { | |
657 | int i; | |
658 | for (i = 1; i < 256; i++) | |
659 | char_order[i] = i; | |
660 | ||
661 | for (i = '0'; i <= '9'; i++) | |
662 | char_order[i] += 512; | |
663 | ||
664 | for (i = 'a'; i <= 'z'; i++) { | |
665 | char_order[i] = 512 + i; | |
666 | char_order[i + 'A' - 'a'] = 512 + i; | |
667 | } | |
668 | } | |
669 | ||
670 | /* Compare two fields (each specified as a start pointer and a character count) | |
671 | according to `keyfield'. The sign of the value reports the relation between the fields */ | |
672 | ||
673 | int | |
674 | compare_field (keyfield, start1, length1, pos1, start2, length2, pos2) | |
675 | struct keyfield *keyfield; | |
676 | char *start1; | |
677 | long length1; | |
678 | long pos1; | |
679 | char *start2; | |
680 | long length2; | |
681 | long pos2; | |
682 | { | |
683 | if (keyfields->positional) | |
684 | { | |
685 | if (pos1 > pos2) | |
686 | return 1; | |
687 | else | |
688 | return -1; | |
689 | } | |
690 | if (keyfield->numeric) | |
691 | { | |
692 | long value = find_value (start1, length1) - find_value (start2, length2); | |
693 | if (value > 0) return 1; | |
694 | if (value < 0) return -1; | |
695 | return 0; | |
696 | } | |
697 | else | |
698 | { | |
699 | char *p1 = start1; | |
700 | char *p2 = start2; | |
701 | char *e1 = start1 + length1; | |
702 | char *e2 = start2 + length2; | |
703 | ||
704 | int fold_case = keyfield->fold_case; | |
705 | ||
706 | while (1) | |
707 | { | |
708 | int c1, c2; | |
709 | ||
710 | if (p1 == e1) c1 = 0; | |
711 | else c1 = *p1++; | |
712 | if (p2 == e2) c2 = 0; | |
713 | else c2 = *p2++; | |
714 | ||
715 | if (char_order[c1] != char_order[c2]) | |
716 | return char_order[c1] - char_order[c2]; | |
717 | if (!c1) break; | |
718 | } | |
719 | ||
720 | /* Strings are equal except possibly for case. */ | |
721 | p1 = start1; | |
722 | p2 = start2; | |
723 | while (1) | |
724 | { | |
725 | int c1, c2; | |
726 | ||
727 | if (p1 == e1) c1 = 0; | |
728 | else c1 = *p1++; | |
729 | if (p2 == e2) c2 = 0; | |
730 | else c2 = *p2++; | |
731 | ||
732 | if (c1 != c2) | |
733 | /* Reverse sign here so upper case comes out last. */ | |
734 | return c2 - c1; | |
735 | if (!c1) break; | |
736 | } | |
737 | ||
738 | return 0; | |
739 | } | |
740 | } | |
741 | \f | |
742 | /* A `struct linebuffer' is a structure which holds a line of text. | |
743 | `readline' reads a line from a stream into a linebuffer | |
744 | and works regardless of the length of the line. */ | |
745 | ||
746 | struct linebuffer | |
747 | { | |
748 | long size; | |
749 | char *buffer; | |
750 | }; | |
751 | ||
752 | /* Initialize a linebuffer for use */ | |
753 | ||
754 | void | |
755 | initbuffer (linebuffer) | |
756 | struct linebuffer *linebuffer; | |
757 | { | |
758 | linebuffer->size = 200; | |
759 | linebuffer->buffer = (char *) xmalloc (200); | |
760 | } | |
761 | ||
762 | /* Read a line of text from `stream' into `linebuffer'. | |
763 | Return the length of the line. */ | |
764 | ||
765 | long | |
766 | readline (linebuffer, stream) | |
767 | struct linebuffer *linebuffer; | |
768 | FILE *stream; | |
769 | { | |
770 | char *buffer = linebuffer->buffer; | |
771 | char *p = linebuffer->buffer; | |
772 | char *end = p + linebuffer->size; | |
773 | ||
774 | while (1) | |
775 | { | |
776 | int c = getc (stream); | |
777 | if (p == end) | |
778 | { | |
779 | buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); | |
780 | p += buffer - linebuffer->buffer; | |
781 | end += buffer - linebuffer->buffer; | |
782 | linebuffer->buffer = buffer; | |
783 | } | |
784 | if (c < 0 || c == '\n') | |
785 | { | |
786 | *p = 0; | |
787 | break; | |
788 | } | |
789 | *p++ = c; | |
790 | } | |
791 | ||
792 | return p - buffer; | |
793 | } | |
794 | \f | |
795 | /* Sort an input file too big to sort in core. */ | |
796 | ||
797 | void | |
798 | sort_offline (infile, nfiles, total, outfile) | |
799 | char *infile; | |
800 | long total; | |
801 | char *outfile; | |
802 | { | |
803 | int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */ | |
804 | char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | |
805 | FILE *istream = fopen (infile, "r"); | |
806 | int i; | |
807 | struct linebuffer lb; | |
808 | long linelength; | |
809 | int failure = 0; | |
810 | ||
811 | initbuffer (&lb); | |
812 | ||
813 | /* Read in one line of input data. */ | |
814 | ||
815 | linelength = readline (&lb, istream); | |
816 | ||
817 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | |
818 | { | |
819 | error ("%s: not a texinfo index file", infile); | |
820 | return; | |
821 | } | |
822 | ||
823 | /* Split up the input into `ntemps' temporary files, or maybe fewer, | |
824 | and put the new files' names into `tempfiles' */ | |
825 | ||
826 | for (i = 0; i < ntemps; i++) | |
827 | { | |
828 | char *outname = maketempname (++tempcount); | |
829 | FILE *ostream = fopen (outname, "w"); | |
830 | long tempsize = 0; | |
831 | ||
832 | if (!ostream) pfatal_with_name (outname); | |
833 | tempfiles[i] = outname; | |
834 | ||
835 | /* Copy lines into this temp file as long as it does not make file "too big" | |
836 | or until there are no more lines. */ | |
837 | ||
838 | while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT) | |
839 | { | |
840 | tempsize += linelength + 1; | |
841 | fputs (lb.buffer, ostream); | |
842 | putc ('\n', ostream); | |
843 | ||
844 | /* Read another line of input data. */ | |
845 | ||
846 | linelength = readline (&lb, istream); | |
847 | if (!linelength && feof (istream)) break; | |
848 | ||
849 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | |
850 | { | |
851 | error ("%s: not a texinfo index file", infile); | |
852 | failure = 1; | |
853 | goto fail; | |
854 | } | |
855 | } | |
856 | fclose (ostream); | |
857 | if (feof (istream)) break; | |
858 | } | |
859 | ||
860 | free (lb.buffer); | |
861 | ||
862 | fail: | |
863 | /* Record number of temp files we actually needed. */ | |
864 | ||
865 | ntemps = i; | |
866 | ||
867 | /* Sort each tempfile into another tempfile. | |
868 | Delete the first set of tempfiles and put the names of the second into `tempfiles' */ | |
869 | ||
870 | for (i = 0; i < ntemps; i++) | |
871 | { | |
872 | char *newtemp = maketempname (++tempcount); | |
873 | sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp); | |
874 | if (!keep_tempfiles) | |
875 | unlink (tempfiles[i]); | |
876 | tempfiles[i] = newtemp; | |
877 | } | |
878 | ||
879 | if (failure) | |
880 | return; | |
881 | ||
882 | /* Merge the tempfiles together and indexify */ | |
883 | ||
884 | merge_files (tempfiles, ntemps, outfile); | |
885 | } | |
886 | \f | |
887 | /* Sort `infile', whose size is `total', | |
888 | assuming that is small enough to be done in-core, | |
889 | then indexify it and send the output to `outfile' (or to stdout). */ | |
890 | ||
891 | void | |
892 | sort_in_core (infile, total, outfile) | |
893 | char *infile; | |
894 | long total; | |
895 | char *outfile; | |
896 | { | |
897 | char **nextline; | |
898 | char *data = (char *) xmalloc (total + 1); | |
899 | char *file_data; | |
900 | long file_size; | |
901 | int i; | |
902 | FILE *ostream = stdout; | |
903 | struct lineinfo *lineinfo; | |
904 | ||
905 | /* Read the contents of the file into the moby array `data' */ | |
906 | ||
907 | int desc = open (infile, 0, 0); | |
908 | ||
909 | if (desc < 0) | |
910 | fatal ("failure reopening %s", infile); | |
911 | for (file_size = 0; ; ) | |
912 | { | |
913 | if ((i = read (desc, data + file_size, total - file_size)) <= 0) | |
914 | break; | |
915 | file_size += i; | |
916 | } | |
917 | file_data = data; | |
918 | data[file_size] = 0; | |
919 | ||
920 | close (desc); | |
921 | ||
922 | if (file_size > 0 && data[0] != '\\' && data[0] != '@') | |
923 | { | |
924 | error ("%s: not a texinfo index file", infile); | |
925 | return; | |
926 | } | |
927 | ||
928 | init_char_order (); | |
929 | ||
930 | /* Sort routines want to know this address */ | |
931 | ||
932 | text_base = data; | |
933 | ||
934 | /* Create the array of pointers to lines, with a default size frequently enough. */ | |
935 | ||
936 | nlines = total / 50; | |
937 | if (!nlines) nlines = 2; | |
938 | linearray = (char **) xmalloc (nlines * sizeof (char *)); | |
939 | ||
940 | /* `nextline' points to the next free slot in this array. | |
941 | `nlines' is the allocated size. */ | |
942 | ||
943 | nextline = linearray; | |
944 | ||
945 | /* Parse the input file's data, and make entries for the lines. */ | |
946 | ||
947 | nextline = parsefile (infile, nextline, file_data, file_size); | |
948 | if (nextline == 0) | |
949 | { | |
950 | error ("%s: not a texinfo index file", infile); | |
951 | return; | |
952 | } | |
953 | ||
954 | /* Sort the lines */ | |
955 | ||
956 | /* If we have enough space, find the first keyfield of each line in advance. | |
957 | Make a `struct lineinfo' for each line, which records the keyfield | |
958 | as well as the line, and sort them. */ | |
959 | ||
960 | lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo)); | |
961 | ||
962 | if (lineinfo) | |
963 | { | |
964 | struct lineinfo *lp; | |
965 | char **p; | |
966 | ||
967 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | |
968 | { | |
969 | lp->text = *p; | |
970 | lp->key.text = find_field (keyfields, *p, &lp->keylen); | |
971 | if (keyfields->numeric) | |
972 | lp->key.number = find_value (lp->key.text, lp->keylen); | |
973 | } | |
974 | ||
975 | qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared); | |
976 | ||
977 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | |
978 | *p = lp->text; | |
979 | ||
980 | free (lineinfo); | |
981 | } | |
982 | else | |
983 | qsort (linearray, nextline - linearray, sizeof (char *), compare_full); | |
984 | ||
985 | /* Open the output file */ | |
986 | ||
987 | if (outfile) | |
988 | { | |
989 | ostream = fopen (outfile, "w"); | |
990 | if (!ostream) | |
991 | pfatal_with_name (outfile); | |
992 | } | |
993 | ||
994 | writelines (linearray, nextline - linearray, ostream); | |
995 | if (outfile) fclose (ostream); | |
996 | ||
997 | free (linearray); | |
998 | free (data); | |
999 | } | |
1000 | \f | |
1001 | /* Parse an input string in core into lines. | |
1002 | DATA is the input string, and SIZE is its length. | |
1003 | Data goes in LINEARRAY starting at NEXTLINE. | |
1004 | The value returned is the first entry in LINEARRAY still unused. | |
1005 | Value 0 means input file contents are invalid. */ | |
1006 | ||
1007 | char ** | |
1008 | parsefile (filename, nextline, data, size) | |
1009 | char *filename; | |
1010 | char **nextline; | |
1011 | char *data; | |
1012 | long size; | |
1013 | { | |
1014 | char *p, *end; | |
1015 | char **line = nextline; | |
1016 | ||
1017 | p = data; | |
1018 | end = p + size; | |
1019 | *end = 0; | |
1020 | ||
1021 | while (p != end) | |
1022 | { | |
1023 | if (p[0] != '\\' && p[0] != '@') | |
1024 | return 0; | |
1025 | ||
1026 | *line = p; | |
1027 | while (*p && *p != '\n') p++; | |
1028 | if (p != end) p++; | |
1029 | ||
1030 | line++; | |
1031 | if (line == linearray + nlines) | |
1032 | { | |
1033 | char **old = linearray; | |
1034 | linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4)); | |
1035 | line += linearray - old; | |
1036 | } | |
1037 | } | |
1038 | ||
1039 | return line; | |
1040 | } | |
1041 | \f | |
1042 | /* Indexification is a filter applied to the sorted lines | |
1043 | as they are being written to the output file. | |
1044 | Multiple entries for the same name, with different page numbers, | |
1045 | get combined into a single entry with multiple page numbers. | |
1046 | The first braced field, which is used for sorting, is discarded. | |
1047 | However, its first character is examined, folded to lower case, | |
1048 | and if it is different from that in the previous line fed to us | |
1049 | a \initial line is written with one argument, the new initial. | |
1050 | ||
1051 | If an entry has four braced fields, then the second and third | |
1052 | constitute primary and secondary names. | |
1053 | In this case, each change of primary name | |
1054 | generates a \primary line which contains only the primary name, | |
1055 | and in between these are \secondary lines which contain | |
1056 | just a secondary name and page numbers. | |
1057 | */ | |
1058 | ||
1059 | /* The last primary name we wrote a \primary entry for. | |
1060 | If only one level of indexing is being done, this is the last name seen */ | |
1061 | char *lastprimary; | |
1062 | int lastprimarylength; /* Length of storage allocated for lastprimary */ | |
1063 | ||
1064 | /* Similar, for the secondary name. */ | |
1065 | char *lastsecondary; | |
1066 | int lastsecondarylength; | |
1067 | ||
1068 | /* Zero if we are not in the middle of writing an entry. | |
1069 | One if we have written the beginning of an entry but have not | |
1070 | yet written any page numbers into it. | |
1071 | Greater than one if we have written the beginning of an entry | |
1072 | plus at least one page number. */ | |
1073 | int pending; | |
1074 | ||
1075 | /* The initial (for sorting purposes) of the last primary entry written. | |
1076 | When this changes, a \initial {c} line is written */ | |
1077 | ||
1078 | char * lastinitial; | |
1079 | ||
1080 | int lastinitiallength; | |
1081 | ||
1082 | /* When we need a string of length 1 for the value of lastinitial, | |
1083 | store it here. */ | |
1084 | ||
1085 | char lastinitial1[2]; | |
1086 | ||
1087 | /* Initialize static storage for writing an index */ | |
1088 | ||
1089 | void | |
1090 | init_index () | |
1091 | { | |
1092 | pending = 0; | |
1093 | lastinitial = lastinitial1; | |
1094 | lastinitial1[0] = 0; | |
1095 | lastinitial1[1] = 0; | |
1096 | lastinitiallength = 0; | |
1097 | lastprimarylength = 100; | |
1098 | lastprimary = (char *) xmalloc (lastprimarylength + 1); | |
1099 | bzero (lastprimary, lastprimarylength + 1); | |
1100 | lastsecondarylength = 100; | |
1101 | lastsecondary = (char *) xmalloc (lastsecondarylength + 1); | |
1102 | bzero (lastsecondary, lastsecondarylength + 1); | |
1103 | } | |
1104 | ||
1105 | /* Indexify. Merge entries for the same name, | |
1106 | insert headers for each initial character, etc. */ | |
1107 | ||
1108 | indexify (line, ostream) | |
1109 | char *line; | |
1110 | FILE *ostream; | |
1111 | { | |
1112 | char *primary, *secondary, *pagenumber; | |
1113 | int primarylength, secondarylength, pagelength; | |
1114 | int len = strlen (line); | |
1115 | int nosecondary; | |
1116 | int initiallength; | |
1117 | char *initial; | |
1118 | char initial1[2]; | |
1119 | register char *p; | |
1120 | ||
1121 | /* First, analyze the parts of the entry fed to us this time */ | |
1122 | ||
1123 | p = find_braced_pos (line, 0, 0, 0); | |
1124 | if (*p == '{') | |
1125 | { | |
1126 | initial = p; | |
1127 | /* Get length of inner pair of braces starting at p, | |
1128 | including that inner pair of braces. */ | |
1129 | initiallength = find_braced_end (p + 1) + 1 - p; | |
1130 | } | |
1131 | else | |
1132 | { | |
1133 | initial = initial1; | |
1134 | initial1[0] = *p; | |
1135 | initial1[1] = 0; | |
1136 | initiallength = 1; | |
1137 | ||
1138 | if (initial1[0] >= 'a' && initial1[0] <= 'z') | |
1139 | initial1[0] -= 040; | |
1140 | } | |
1141 | ||
1142 | pagenumber = find_braced_pos (line, 1, 0, 0); | |
1143 | pagelength = find_braced_end (pagenumber) - pagenumber; | |
1144 | if (pagelength == 0) | |
1145 | abort (); | |
1146 | ||
1147 | primary = find_braced_pos (line, 2, 0, 0); | |
1148 | primarylength = find_braced_end (primary) - primary; | |
1149 | ||
1150 | secondary = find_braced_pos (line, 3, 0, 0); | |
1151 | nosecondary = !*secondary; | |
1152 | if (!nosecondary) | |
1153 | secondarylength = find_braced_end (secondary) - secondary; | |
1154 | ||
1155 | /* If the primary is different from before, make a new primary entry */ | |
1156 | if (strncmp (primary, lastprimary, primarylength)) | |
1157 | { | |
1158 | /* Close off current secondary entry first, if one is open */ | |
1159 | if (pending) | |
1160 | { | |
1161 | fputs ("}\n", ostream); | |
1162 | pending = 0; | |
1163 | } | |
1164 | ||
1165 | /* If this primary has a different initial, include an entry for the initial */ | |
1166 | if (initiallength != lastinitiallength || | |
1167 | strncmp (initial, lastinitial, initiallength)) | |
1168 | { | |
1169 | fprintf (ostream, "\\initial {"); | |
1170 | fwrite (initial, 1, initiallength, ostream); | |
1171 | fprintf (ostream, "}\n", initial); | |
1172 | if (initial == initial1) | |
1173 | { | |
1174 | lastinitial = lastinitial1; | |
1175 | *lastinitial1 = *initial1; | |
1176 | } | |
1177 | else | |
1178 | { | |
1179 | lastinitial = initial; | |
1180 | } | |
1181 | lastinitiallength = initiallength; | |
1182 | } | |
1183 | ||
1184 | /* Make the entry for the primary. */ | |
1185 | if (nosecondary) | |
1186 | fputs ("\\entry {", ostream); | |
1187 | else | |
1188 | fputs ("\\primary {", ostream); | |
1189 | fwrite (primary, primarylength, 1, ostream); | |
1190 | if (nosecondary) | |
1191 | { | |
1192 | fputs ("}{", ostream); | |
1193 | pending = 1; | |
1194 | } | |
1195 | else | |
1196 | fputs ("}\n", ostream); | |
1197 | ||
1198 | /* Record name of most recent primary */ | |
1199 | if (lastprimarylength < primarylength) | |
1200 | { | |
1201 | lastprimarylength = primarylength + 100; | |
1202 | lastprimary = (char *) xrealloc (lastprimary, | |
1203 | 1 + lastprimarylength); | |
1204 | } | |
1205 | strncpy (lastprimary, primary, primarylength); | |
1206 | lastprimary[primarylength] = 0; | |
1207 | ||
1208 | /* There is no current secondary within this primary, now */ | |
1209 | lastsecondary[0] = 0; | |
1210 | } | |
1211 | ||
1212 | /* Should not have an entry with no subtopic following one with a subtopic */ | |
1213 | ||
1214 | if (nosecondary && *lastsecondary) | |
1215 | error ("entry %s follows an entry with a secondary name", line); | |
1216 | ||
1217 | /* Start a new secondary entry if necessary */ | |
1218 | if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) | |
1219 | { | |
1220 | if (pending) | |
1221 | { | |
1222 | fputs ("}\n", ostream); | |
1223 | pending = 0; | |
1224 | } | |
1225 | ||
1226 | /* Write the entry for the secondary. */ | |
1227 | fputs ("\\secondary {", ostream); | |
1228 | fwrite (secondary, secondarylength, 1, ostream); | |
1229 | fputs ("}{", ostream); | |
1230 | pending = 1; | |
1231 | ||
1232 | /* Record name of most recent secondary */ | |
1233 | if (lastsecondarylength < secondarylength) | |
1234 | { | |
1235 | lastsecondarylength = secondarylength + 100; | |
1236 | lastsecondary = (char *) xrealloc (lastsecondary, | |
1237 | 1 + lastsecondarylength); | |
1238 | } | |
1239 | strncpy (lastsecondary, secondary, secondarylength); | |
1240 | lastsecondary[secondarylength] = 0; | |
1241 | } | |
1242 | ||
1243 | /* Here to add one more page number to the current entry */ | |
1244 | if (pending++ != 1) | |
1245 | fputs (", ", ostream); /* Punctuate first, if this is not the first */ | |
1246 | fwrite (pagenumber, pagelength, 1, ostream); | |
1247 | } | |
1248 | ||
1249 | /* Close out any unfinished output entry */ | |
1250 | ||
1251 | void | |
1252 | finish_index (ostream) | |
1253 | FILE *ostream; | |
1254 | { | |
1255 | if (pending) | |
1256 | fputs ("}\n", ostream); | |
1257 | free (lastprimary); | |
1258 | free (lastsecondary); | |
1259 | } | |
1260 | \f | |
1261 | /* Copy the lines in the sorted order. | |
1262 | Each line is copied out of the input file it was found in. */ | |
1263 | ||
1264 | void | |
1265 | writelines (linearray, nlines, ostream) | |
1266 | char **linearray; | |
1267 | int nlines; | |
1268 | FILE *ostream; | |
1269 | { | |
1270 | char **stop_line = linearray + nlines; | |
1271 | char **next_line; | |
1272 | ||
1273 | init_index (); | |
1274 | ||
1275 | /* Output the text of the lines, and free the buffer space */ | |
1276 | ||
1277 | for (next_line = linearray; next_line != stop_line; next_line++) | |
1278 | { | |
1279 | /* If -u was specified, output the line only if distinct from previous one. */ | |
1280 | if (next_line == linearray | |
1281 | /* Compare previous line with this one, using only the explicitly specd keyfields */ | |
1282 | || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1)) | |
1283 | { | |
1284 | char *p = *next_line; | |
1285 | char c; | |
1286 | while ((c = *p++) && c != '\n'); | |
1287 | *(p-1) = 0; | |
1288 | indexify (*next_line, ostream); | |
1289 | } | |
1290 | } | |
1291 | ||
1292 | finish_index (ostream); | |
1293 | } | |
1294 | \f | |
1295 | /* Assume (and optionally verify) that each input file is sorted; | |
1296 | merge them and output the result. | |
1297 | Returns nonzero if any input file fails to be sorted. | |
1298 | ||
1299 | This is the high-level interface that can handle an unlimited number of files. */ | |
1300 | ||
1301 | #define MAX_DIRECT_MERGE 10 | |
1302 | ||
1303 | int | |
1304 | merge_files (infiles, nfiles, outfile) | |
1305 | char **infiles; | |
1306 | int nfiles; | |
1307 | char *outfile; | |
1308 | { | |
1309 | char **tempfiles; | |
1310 | int ntemps; | |
1311 | int i; | |
1312 | int value = 0; | |
1313 | int start_tempcount = tempcount; | |
1314 | ||
1315 | if (nfiles <= MAX_DIRECT_MERGE) | |
1316 | return merge_direct (infiles, nfiles, outfile); | |
1317 | ||
1318 | /* Merge groups of MAX_DIRECT_MERGE input files at a time, | |
1319 | making a temporary file to hold each group's result. */ | |
1320 | ||
1321 | ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE; | |
1322 | tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | |
1323 | for (i = 0; i < ntemps; i++) | |
1324 | { | |
1325 | int nf = MAX_DIRECT_MERGE; | |
1326 | if (i + 1 == ntemps) | |
1327 | nf = nfiles - i * MAX_DIRECT_MERGE; | |
1328 | tempfiles[i] = maketempname (++tempcount); | |
1329 | value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]); | |
1330 | } | |
1331 | ||
1332 | /* All temporary files that existed before are no longer needed | |
1333 | since their contents have been merged into our new tempfiles. | |
1334 | So delete them. */ | |
1335 | flush_tempfiles (start_tempcount); | |
1336 | ||
1337 | /* Now merge the temporary files we created. */ | |
1338 | ||
1339 | merge_files (tempfiles, ntemps, outfile); | |
1340 | ||
1341 | free (tempfiles); | |
1342 | ||
1343 | return value; | |
1344 | } | |
1345 | \f | |
1346 | /* Assume (and optionally verify) that each input file is sorted; | |
1347 | merge them and output the result. | |
1348 | Returns nonzero if any input file fails to be sorted. | |
1349 | ||
1350 | This version of merging will not work if the number of | |
1351 | input files gets too high. Higher level functions | |
1352 | use it only with a bounded number of input files. */ | |
1353 | ||
1354 | int | |
1355 | merge_direct (infiles, nfiles, outfile) | |
1356 | char **infiles; | |
1357 | int nfiles; | |
1358 | char *outfile; | |
1359 | { | |
1360 | char **ip = infiles; | |
1361 | struct linebuffer *lb1, *lb2; | |
1362 | struct linebuffer **thisline, **prevline; | |
1363 | FILE **streams; | |
1364 | int i; | |
1365 | int nleft; | |
1366 | int lossage = 0; | |
1367 | int *file_lossage; | |
1368 | struct linebuffer *prev_out = 0; | |
1369 | FILE *ostream = stdout; | |
1370 | ||
1371 | if (outfile) | |
1372 | { | |
1373 | ostream = fopen (outfile, "w"); | |
1374 | } | |
1375 | if (!ostream) pfatal_with_name (outfile); | |
1376 | ||
1377 | init_index (); | |
1378 | ||
1379 | if (nfiles == 0) | |
1380 | { | |
1381 | if (outfile) | |
1382 | fclose (ostream); | |
1383 | return 0; | |
1384 | } | |
1385 | ||
1386 | /* For each file, make two line buffers. | |
1387 | Also, for each file, there is an element of `thisline' | |
1388 | which points at any time to one of the file's two buffers, | |
1389 | and an element of `prevline' which points to the other buffer. | |
1390 | `thisline' is supposed to point to the next available line from the file, | |
1391 | while `prevline' holds the last file line used, | |
1392 | which is remembered so that we can verify that the file is properly sorted. */ | |
1393 | ||
1394 | /* lb1 and lb2 contain one buffer each per file */ | |
1395 | lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | |
1396 | lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | |
1397 | ||
1398 | /* thisline[i] points to the linebuffer holding the next available line in file i, | |
1399 | or is zero if there are no lines left in that file. */ | |
1400 | thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *)); | |
1401 | /* prevline[i] points to the linebuffer holding the last used line from file i. | |
1402 | This is just for verifying that file i is properly sorted. */ | |
1403 | prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *)); | |
1404 | /* streams[i] holds the input stream for file i. */ | |
1405 | streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); | |
1406 | /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */ | |
1407 | file_lossage = (int *) xmalloc (nfiles * sizeof (int)); | |
1408 | ||
1409 | /* Allocate and initialize all that storage */ | |
1410 | ||
1411 | for (i = 0; i < nfiles; i++) | |
1412 | { | |
1413 | initbuffer (&lb1[i]); | |
1414 | initbuffer (&lb2[i]); | |
1415 | thisline[i] = &lb1[i]; | |
1416 | prevline[i] = &lb2[i]; | |
1417 | file_lossage[i] = 0; | |
1418 | streams[i] = fopen (infiles[i], "r"); | |
1419 | if (!streams[i]) | |
1420 | pfatal_with_name (infiles[i]); | |
1421 | ||
1422 | readline (thisline[i], streams[i]); | |
1423 | } | |
1424 | ||
1425 | /* Keep count of number of files not at eof */ | |
1426 | nleft = nfiles; | |
1427 | ||
1428 | while (nleft) | |
1429 | { | |
1430 | struct linebuffer *best = 0; | |
1431 | struct linebuffer *exch; | |
1432 | int bestfile = -1; | |
1433 | int i; | |
1434 | ||
1435 | /* Look at the next avail line of each file; choose the least one. */ | |
1436 | ||
1437 | for (i = 0; i < nfiles; i++) | |
1438 | { | |
1439 | if (thisline[i] && | |
1440 | (!best || | |
1441 | 0 < compare_general (best->buffer, thisline[i]->buffer, | |
1442 | (long) bestfile, (long) i, num_keyfields))) | |
1443 | { | |
1444 | best = thisline[i]; | |
1445 | bestfile = i; | |
1446 | } | |
1447 | } | |
1448 | ||
1449 | /* Output that line, unless it matches the previous one and we don't want duplicates */ | |
1450 | ||
1451 | if (!(prev_out && | |
1452 | !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1))) | |
1453 | indexify (best->buffer, ostream); | |
1454 | prev_out = best; | |
1455 | ||
1456 | /* Now make the line the previous of its file, and fetch a new line from that file */ | |
1457 | ||
1458 | exch = prevline[bestfile]; | |
1459 | prevline[bestfile] = thisline[bestfile]; | |
1460 | thisline[bestfile] = exch; | |
1461 | ||
1462 | while (1) | |
1463 | { | |
1464 | /* If the file has no more, mark it empty */ | |
1465 | ||
1466 | if (feof (streams[bestfile])) | |
1467 | { | |
1468 | thisline[bestfile] = 0; | |
1469 | nleft--; /* Update the number of files still not empty */ | |
1470 | break; | |
1471 | } | |
1472 | readline (thisline[bestfile], streams[bestfile]); | |
1473 | if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break; | |
1474 | } | |
1475 | } | |
1476 | ||
1477 | finish_index (ostream); | |
1478 | ||
1479 | /* Free all storage and close all input streams */ | |
1480 | ||
1481 | for (i = 0; i < nfiles; i++) | |
1482 | { | |
1483 | fclose (streams[i]); | |
1484 | free (lb1[i].buffer); | |
1485 | free (lb2[i].buffer); | |
1486 | } | |
1487 | free (file_lossage); | |
1488 | free (lb1); | |
1489 | free (lb2); | |
1490 | free (thisline); | |
1491 | free (prevline); | |
1492 | free (streams); | |
1493 | ||
1494 | if (outfile) | |
1495 | fclose (ostream); | |
1496 | ||
1497 | return lossage; | |
1498 | } | |
1499 | \f | |
1500 | /* Print error message and exit. */ | |
1501 | ||
1502 | fatal (s1, s2) | |
1503 | char *s1, *s2; | |
1504 | { | |
1505 | error (s1, s2); | |
1506 | exit (EXIT_FATAL); | |
1507 | } | |
1508 | ||
1509 | /* Print error message. `s1' is printf control string, `s2' is arg for it. */ | |
1510 | ||
1511 | error (s1, s2) | |
1512 | char *s1, *s2; | |
1513 | { | |
1514 | printf ("texindex: "); | |
1515 | printf (s1, s2); | |
1516 | printf ("\n"); | |
1517 | } | |
1518 | ||
1519 | perror_with_name (name) | |
1520 | char *name; | |
1521 | { | |
1522 | char *s; | |
1523 | ||
1524 | if (errno < sys_nerr) | |
1525 | s = concat ("", sys_errlist[errno], " for %s"); | |
1526 | else | |
1527 | s = "cannot open %s"; | |
1528 | error (s, name); | |
1529 | } | |
1530 | ||
1531 | pfatal_with_name (name) | |
1532 | char *name; | |
1533 | { | |
1534 | char *s; | |
1535 | ||
1536 | if (errno < sys_nerr) | |
1537 | s = concat ("", sys_errlist[errno], " for %s"); | |
1538 | else | |
1539 | s = "cannot open %s"; | |
1540 | fatal (s, name); | |
1541 | } | |
1542 | ||
1543 | /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */ | |
1544 | ||
1545 | char * | |
1546 | concat (s1, s2, s3) | |
1547 | char *s1, *s2, *s3; | |
1548 | { | |
1549 | int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); | |
1550 | char *result = (char *) xmalloc (len1 + len2 + len3 + 1); | |
1551 | ||
1552 | strcpy (result, s1); | |
1553 | strcpy (result + len1, s2); | |
1554 | strcpy (result + len1 + len2, s3); | |
1555 | *(result + len1 + len2 + len3) = 0; | |
1556 | ||
1557 | return result; | |
1558 | } | |
1559 | ||
1560 | /* Like malloc but get fatal error if memory is exhausted. */ | |
1561 | ||
1562 | int | |
1563 | xmalloc (size) | |
1564 | int size; | |
1565 | { | |
1566 | int result = malloc (size); | |
1567 | if (!result) | |
1568 | fatal ("virtual memory exhausted", 0); | |
1569 | return result; | |
1570 | } | |
1571 | ||
1572 | ||
1573 | int | |
1574 | xrealloc (ptr, size) | |
1575 | char *ptr; | |
1576 | int size; | |
1577 | { | |
1578 | int result = realloc (ptr, size); | |
1579 | if (!result) | |
1580 | fatal ("virtual memory exhausted"); | |
1581 | return result; | |
1582 | } | |
1583 | ||
1584 | bzero (b, length) | |
1585 | register char *b; | |
1586 | register int length; | |
1587 | { | |
1588 | #ifdef VMS | |
1589 | short zero = 0; | |
1590 | long max_str = 65535; | |
1591 | long len; | |
1592 | ||
1593 | while (length > max_str) | |
1594 | { | |
1595 | (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b); | |
1596 | length -= max_str; | |
1597 | b += max_str; | |
1598 | } | |
1599 | len = length; | |
1600 | (void) LIB$MOVC5 (&zero, &zero, &zero, &len, b); | |
1601 | #else | |
1602 | while (length-- > 0) | |
1603 | *b++ = 0; | |
1604 | #endif /* not VMS */ | |
1605 | } |