BSD 4_4_Lite2 development
[unix-history] / usr / src / contrib / gawk-2.15.2 / support / texindex.c
CommitLineData
9a316f2c
C
1/* Prepare Tex index dribble output into an actual index.
2 Copyright (C) 1987 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
17
18#include <stdio.h>
19#include <ctype.h>
20#include <errno.h>
21
22#ifdef VMS
23#ifndef VAX11C
24#define noshare
25#endif
26
27#include <perror.h>
28#include <file.h>
29
30#define EXIT_SUCCESS ((1 << 28) | 1)
31#define EXIT_FATAL ((1 << 28) | 4)
32#define unlink delete
33#define tell(fd) lseek(fd, 0L, 1)
34
35#else /* Not VMS */
36
37#ifdef USG
38#include <sys/types.h>
39#include <sys/fcntl.h>
40#endif
41#include <sys/file.h>
42
43#define EXIT_SUCCESS 0
44#define EXIT_FATAL 1
45
46#endif /* Not VMS */
47
48
49#ifndef L_XTND
50#define L_XTND 2
51#endif
52
53#ifdef VMS
54extern noshare int sys_nerr;
55extern noshare char *sys_errlist[];
56#else
57extern int sys_nerr;
58extern char *sys_errlist[];
59#endif
60
61/* When sorting in core, this structure describes one line
62 and the position and length of its first keyfield. */
63
64struct lineinfo
65 {
66 char *text; /* The actual text of the line */
67 union
68 { /* The start of the key (for textual comparison) */
69 char *text;
70 long number; /* or the numeric value (for numeric comparison) */
71 } key;
72 long keylen; /* Length of key field */
73 };
74
75/* This structure describes a field to use as a sort key */
76
77struct keyfield
78 {
79 int startwords; /* # words to skip */
80 int startchars; /* and # additional chars to skip, to start of field */
81 int endwords; /* similar, from beg (or end) of line, to find end of field */
82 int endchars;
83 char ignore_blanks; /* Ignore spaces and tabs within the field */
84 char fold_case; /* Convert upper case to lower before comparing */
85 char reverse; /* Compare in reverse order */
86 char numeric; /* Parse text as an integer and compare the integers */
87 char positional; /* Sort according to position within the file */
88 char braced; /* Count balanced-braced groupings as fields */
89 };
90
91/* Vector of keyfields to use */
92
93struct keyfield keyfields[3];
94
95/* Number of keyfields stored in that vector. */
96
97int num_keyfields = 3;
98
99/* Vector of input file names, terminated with a zero (null pointer) */
100
101char **infiles;
102
103/* Vector of corresponding output file names, or zero meaning default it */
104
105char **outfiles;
106
107/* Length of `infiles' */
108
109int num_infiles;
110
111/* Pointer to the array of pointers to lines being sorted */
112
113char **linearray;
114
115/* The allocated length of `linearray'. */
116
117long nlines;
118
119/* Directory to use for temporary files. On Unix, it ends with a slash. */
120
121char *tempdir;
122
123/* Start of filename to use for temporary files. */
124
125char *tempbase;
126
127/* Number of last temporary file. */
128
129int tempcount;
130
131/* Number of last temporary file already deleted.
132 Temporary files are deleted by `flush_tempfiles' in order of creation. */
133
134int last_deleted_tempcount;
135
136/* During in-core sort, this points to the base of the data block
137 which contains all the lines of data. */
138
139char *text_base;
140
141/* Additional command switches */
142
143int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */
144
145/* Forward declarations of functions in this file */
146
147void decode_command ();
148void sort_in_core ();
149void sort_offline ();
150char **parsefile ();
151char *find_field ();
152char *find_pos ();
153long find_value ();
154char *find_braced_pos ();
155char *find_braced_end ();
156void writelines ();
157int compare_full ();
158long readline ();
159int merge_files ();
160int merge_direct ();
161char *concat ();
162char *maketempname ();
163void flush_tempfiles ();
164char *tempcopy ();
165
166extern char *mktemp ();
167\f
168#define MAX_IN_CORE_SORT 500000
169
170int
171main (argc, argv)
172 int argc;
173 char **argv;
174{
175 int i;
176
177 tempcount = 0;
178 last_deleted_tempcount = 0;
179
180 /* Describe the kind of sorting to do. */
181 /* The first keyfield uses the first braced field and folds case */
182 keyfields[0].braced = 1;
183 keyfields[0].fold_case = 1;
184 keyfields[0].endwords = -1;
185 keyfields[0].endchars = -1;
186 /* The second keyfield uses the second braced field, numerically */
187 keyfields[1].braced = 1;
188 keyfields[1].numeric = 1;
189 keyfields[1].startwords = 1;
190 keyfields[1].endwords = -1;
191 keyfields[1].endchars = -1;
192 /* The third keyfield (which is ignored while discarding duplicates)
193 compares the whole line */
194 keyfields[2].endwords = -1;
195 keyfields[2].endchars = -1;
196
197 decode_command (argc, argv);
198
199 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
200
201 /* Process input files completely, one by one. */
202
203 for (i = 0; i < num_infiles; i++)
204 {
205 int desc;
206 long ptr;
207 char *outfile;
208 char *p;
209
210 desc = open (infiles[i], 0, 0);
211 if (desc < 0) pfatal_with_name (infiles[i]);
212 lseek (desc, 0, L_XTND);
213 ptr = tell (desc);
214 close (desc);
215
216 outfile = outfiles[i];
217 if (!outfile)
218 {
219 outfile = concat (infiles[i], "s", "");
220 }
221
222 if (ptr < MAX_IN_CORE_SORT)
223 /* Sort a small amount of data */
224 sort_in_core (infiles[i], ptr, outfile);
225 else
226 sort_offline (infiles[i], ptr, outfile);
227 }
228
229 flush_tempfiles (tempcount);
230 exit (EXIT_SUCCESS);
231}
232\f
233/* This page decodes the command line arguments to set the parameter variables
234 and set up the vector of keyfields and the vector of input files */
235
236void
237decode_command (argc, argv)
238 int argc;
239 char **argv;
240{
241 int i;
242 char **ip;
243 char **op;
244
245 /* Store default values into parameter variables */
246
247#ifdef VMS
248 tempdir = "sys$scratch:";
249#else
250 tempdir = "/tmp/";
251#endif
252
253 keep_tempfiles = 0;
254
255 /* Allocate argc input files, which must be enough. */
256
257 infiles = (char **) xmalloc (argc * sizeof (char *));
258 outfiles = (char **) xmalloc (argc * sizeof (char *));
259 ip = infiles;
260 op = outfiles;
261
262 /* First find all switches that control the default kind-of-sort */
263
264 for (i = 1; i < argc; i++)
265 {
266 int tem = classify_arg (argv[i]);
267 char c;
268 char *p;
269
270 if (tem <= 0)
271 {
272 *ip++ = argv[i];
273 *op++ = 0;
274 continue;
275 }
276 if (tem > 1)
277 {
278 if (i + 1 == argc)
279 fatal ("switch %s given with no argument following it", argv[i]);
280 else if (!strcmp (argv[i], "-T"))
281 tempdir = argv[i + 1];
282 else if (!strcmp (argv[i], "-o"))
283 *(op - 1) = argv[i + 1];
284 i += tem - 1;
285 continue;
286 }
287
288 p = &argv[i][1];
289 while (c = *p++)
290 switch (c)
291 {
292 case 'k':
293 keep_tempfiles = 1;
294 break;
295
296 default:
297 fatal ("invalid command switch %c", c);
298 }
299 switchdone: ;
300 }
301
302 /* Record number of keyfields, terminate list of filenames */
303
304 num_infiles = ip - infiles;
305 *ip = 0;
306}
307
308/* Return 0 for an argument that is not a switch;
309 for a switch, return 1 plus the number of following arguments that the switch swallows.
310*/
311
312int
313classify_arg (arg)
314 char *arg;
315{
316 if (!strcmp (arg, "-T") || !strcmp (arg, "-o"))
317 return 2;
318 if (arg[0] == '-')
319 return 1;
320 return 0;
321}
322\f
323/* Create a name for a temporary file */
324
325char *
326maketempname (count)
327 int count;
328{
329 char tempsuffix[10];
330 sprintf (tempsuffix, "%d", count);
331 return concat (tempdir, tempbase, tempsuffix);
332}
333
334/* Delete all temporary files up to the specified count */
335
336void
337flush_tempfiles (to_count)
338 int to_count;
339{
340 if (keep_tempfiles) return;
341 while (last_deleted_tempcount < to_count)
342 unlink (maketempname (++last_deleted_tempcount));
343}
344
345/* Copy an input file into a temporary file, and return the temporary file name */
346
347#define BUFSIZE 1024
348
349char *
350tempcopy (idesc)
351 int idesc;
352{
353 char *outfile = maketempname (++tempcount);
354 int odesc;
355 char buffer[BUFSIZE];
356
357 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
358
359 if (odesc < 0) pfatal_with_name (outfile);
360
361 while (1)
362 {
363 int nread = read (idesc, buffer, BUFSIZE);
364 write (odesc, buffer, nread);
365 if (!nread) break;
366 }
367
368 close (odesc);
369
370 return outfile;
371}
372\f
373/* Compare two lines, provided as pointers to pointers to text,
374 according to the specified set of keyfields */
375
376int
377compare_full (line1, line2)
378 char **line1, **line2;
379{
380 int i;
381
382 /* Compare using the first keyfield;
383 if that does not distinguish the lines, try the second keyfield; and so on. */
384
385 for (i = 0; i < num_keyfields; i++)
386 {
387 long length1, length2;
388 char *start1 = find_field (&keyfields[i], *line1, &length1);
389 char *start2 = find_field (&keyfields[i], *line2, &length2);
390 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
391 start2, length2, *line2 - text_base);
392 if (tem)
393 {
394 if (keyfields[i].reverse)
395 return - tem;
396 return tem;
397 }
398 }
399
400 return 0; /* Lines match exactly */
401}
402
403/* Compare two lines described by structures
404 in which the first keyfield is identified in advance.
405 For positional sorting, assumes that the order of the lines in core
406 reflects their nominal order. */
407
408int
409compare_prepared (line1, line2)
410 struct lineinfo *line1, *line2;
411{
412 int i;
413 int tem;
414 char *text1, *text2;
415
416 /* Compare using the first keyfield, which has been found for us already */
417 if (keyfields->positional)
418 {
419 if (line1->text - text_base > line2->text - text_base)
420 tem = 1;
421 else
422 tem = -1;
423 }
424 else if (keyfields->numeric)
425 tem = line1->key.number - line2->key.number;
426 else
427 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0);
428 if (tem)
429 {
430 if (keyfields->reverse)
431 return - tem;
432 return tem;
433 }
434
435 text1 = line1->text;
436 text2 = line2->text;
437
438 /* Compare using the second keyfield;
439 if that does not distinguish the lines, try the third keyfield; and so on. */
440
441 for (i = 1; i < num_keyfields; i++)
442 {
443 long length1, length2;
444 char *start1 = find_field (&keyfields[i], text1, &length1);
445 char *start2 = find_field (&keyfields[i], text2, &length2);
446 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
447 start2, length2, text2 - text_base);
448 if (tem)
449 {
450 if (keyfields[i].reverse)
451 return - tem;
452 return tem;
453 }
454 }
455
456 return 0; /* Lines match exactly */
457}
458
459/* Like compare_full but more general.
460 You can pass any strings, and you can say how many keyfields to use.
461 `pos1' and `pos2' should indicate the nominal positional ordering of
462 the two lines in the input. */
463
464int
465compare_general (str1, str2, pos1, pos2, use_keyfields)
466 char *str1, *str2;
467 long pos1, pos2;
468 int use_keyfields;
469{
470 int i;
471
472 /* Compare using the first keyfield;
473 if that does not distinguish the lines, try the second keyfield; and so on. */
474
475 for (i = 0; i < use_keyfields; i++)
476 {
477 long length1, length2;
478 char *start1 = find_field (&keyfields[i], str1, &length1);
479 char *start2 = find_field (&keyfields[i], str2, &length2);
480 int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2);
481 if (tem)
482 {
483 if (keyfields[i].reverse)
484 return - tem;
485 return tem;
486 }
487 }
488
489 return 0; /* Lines match exactly */
490}
491
492/* Find the start and length of a field in `str' according to `keyfield'.
493 A pointer to the starting character is returned, and the length
494 is stored into the int that `lengthptr' points to. */
495
496char *
497find_field (keyfield, str, lengthptr)
498 struct keyfield *keyfield;
499 char *str;
500 long *lengthptr;
501{
502 char *start;
503 char *end;
504 char *(*fun) ();
505
506 if (keyfield->braced) fun = find_braced_pos;
507 else fun = find_pos;
508
509 start = ( *fun )(str, keyfield->startwords, keyfield->startchars,
510 keyfield->ignore_blanks);
511 if (keyfield->endwords < 0)
512 {
513 if (keyfield->braced)
514 end = find_braced_end (start);
515 else
516 {
517 end = start;
518 while (*end && *end != '\n') end++;
519 }
520 }
521 else
522 {
523 end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0);
524 if (end - str < start - str) end = start;
525 }
526 *lengthptr = end - start;
527 return start;
528}
529
530/* Find a pointer to a specified place within `str',
531 skipping (from the beginning) `words' words and then `chars' chars.
532 If `ignore_blanks' is nonzero, we skip all blanks
533 after finding the specified word. */
534
535char *
536find_pos (str, words, chars, ignore_blanks)
537 char *str;
538 int words, chars;
539 int ignore_blanks;
540{
541 int i;
542 char *p = str;
543
544 for (i = 0; i < words; i++)
545 {
546 char c;
547 /* Find next bunch of nonblanks and skip them. */
548 while ((c = *p) == ' ' || c == '\t') p++;
549 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++;
550 if (!*p || *p == '\n') return p;
551 }
552
553 while (*p == ' ' || *p == '\t') p++;
554
555 for (i = 0; i < chars; i++)
556 {
557 if (!*p || *p == '\n') break;
558 p++;
559 }
560 return p;
561}
562
563/* Like find_pos but assumes that each field is surrounded by braces
564 and that braces within fields are balanced. */
565
566char *
567find_braced_pos (str, words, chars, ignore_blanks)
568 char *str;
569 int words, chars;
570 int ignore_blanks;
571{
572 int i;
573 int bracelevel;
574 char *p = str;
575 char c;
576
577 for (i = 0; i < words; i++)
578 {
579 bracelevel = 1;
580 while ((c = *p++) != '{' && c != '\n' && c);
581 if (c != '{')
582 return p - 1;
583 while (bracelevel)
584 {
585 c = *p++;
586 if (c == '{') bracelevel++;
587 if (c == '}') bracelevel--;
588#if 0
589 if (c == '\\' || c == '@') c = *p++; /* \ quotes braces and \ */
590#endif
591 if (c == 0 || c == '\n') return p-1;
592 }
593 }
594
595 while ((c = *p++) != '{' && c != '\n' && c);
596
597 if (c != '{')
598 return p-1;
599
600 if (ignore_blanks)
601 while ((c = *p) == ' ' || c == '\t') p++;
602
603 for (i = 0; i < chars; i++)
604 {
605 if (!*p || *p == '\n') break;
606 p++;
607 }
608 return p;
609}
610
611/* Find the end of the balanced-brace field which starts at `str'.
612 The position returned is just before the closing brace. */
613
614char *
615find_braced_end (str)
616 char *str;
617{
618 int bracelevel;
619 char *p = str;
620 char c;
621
622 bracelevel = 1;
623 while (bracelevel)
624 {
625 c = *p++;
626 if (c == '{') bracelevel++;
627 if (c == '}') bracelevel--;
628#if 0
629 if (c == '\\' || c == '@') c = *p++;
630#endif
631 if (c == 0 || c == '\n') return p-1;
632 }
633 return p - 1;
634}
635
636long
637find_value (start, length)
638 char *start;
639 long length;
640{
641 while (length != 0L) {
642 if (isdigit(*start))
643 return atol(start);
644 length--;
645 start++;
646 }
647 return 0l;
648}
649
650/* Vector used to translate characters for comparison.
651 This is how we make all alphanumerics follow all else,
652 and ignore case in the first sorting. */
653int char_order[256];
654
655init_char_order ()
656{
657 int i;
658 for (i = 1; i < 256; i++)
659 char_order[i] = i;
660
661 for (i = '0'; i <= '9'; i++)
662 char_order[i] += 512;
663
664 for (i = 'a'; i <= 'z'; i++) {
665 char_order[i] = 512 + i;
666 char_order[i + 'A' - 'a'] = 512 + i;
667 }
668}
669
670/* Compare two fields (each specified as a start pointer and a character count)
671 according to `keyfield'. The sign of the value reports the relation between the fields */
672
673int
674compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
675 struct keyfield *keyfield;
676 char *start1;
677 long length1;
678 long pos1;
679 char *start2;
680 long length2;
681 long pos2;
682{
683 if (keyfields->positional)
684 {
685 if (pos1 > pos2)
686 return 1;
687 else
688 return -1;
689 }
690 if (keyfield->numeric)
691 {
692 long value = find_value (start1, length1) - find_value (start2, length2);
693 if (value > 0) return 1;
694 if (value < 0) return -1;
695 return 0;
696 }
697 else
698 {
699 char *p1 = start1;
700 char *p2 = start2;
701 char *e1 = start1 + length1;
702 char *e2 = start2 + length2;
703
704 int fold_case = keyfield->fold_case;
705
706 while (1)
707 {
708 int c1, c2;
709
710 if (p1 == e1) c1 = 0;
711 else c1 = *p1++;
712 if (p2 == e2) c2 = 0;
713 else c2 = *p2++;
714
715 if (char_order[c1] != char_order[c2])
716 return char_order[c1] - char_order[c2];
717 if (!c1) break;
718 }
719
720 /* Strings are equal except possibly for case. */
721 p1 = start1;
722 p2 = start2;
723 while (1)
724 {
725 int c1, c2;
726
727 if (p1 == e1) c1 = 0;
728 else c1 = *p1++;
729 if (p2 == e2) c2 = 0;
730 else c2 = *p2++;
731
732 if (c1 != c2)
733 /* Reverse sign here so upper case comes out last. */
734 return c2 - c1;
735 if (!c1) break;
736 }
737
738 return 0;
739 }
740}
741\f
742/* A `struct linebuffer' is a structure which holds a line of text.
743 `readline' reads a line from a stream into a linebuffer
744 and works regardless of the length of the line. */
745
746struct linebuffer
747 {
748 long size;
749 char *buffer;
750 };
751
752/* Initialize a linebuffer for use */
753
754void
755initbuffer (linebuffer)
756 struct linebuffer *linebuffer;
757{
758 linebuffer->size = 200;
759 linebuffer->buffer = (char *) xmalloc (200);
760}
761
762/* Read a line of text from `stream' into `linebuffer'.
763 Return the length of the line. */
764
765long
766readline (linebuffer, stream)
767 struct linebuffer *linebuffer;
768 FILE *stream;
769{
770 char *buffer = linebuffer->buffer;
771 char *p = linebuffer->buffer;
772 char *end = p + linebuffer->size;
773
774 while (1)
775 {
776 int c = getc (stream);
777 if (p == end)
778 {
779 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
780 p += buffer - linebuffer->buffer;
781 end += buffer - linebuffer->buffer;
782 linebuffer->buffer = buffer;
783 }
784 if (c < 0 || c == '\n')
785 {
786 *p = 0;
787 break;
788 }
789 *p++ = c;
790 }
791
792 return p - buffer;
793}
794\f
795/* Sort an input file too big to sort in core. */
796
797void
798sort_offline (infile, nfiles, total, outfile)
799 char *infile;
800 long total;
801 char *outfile;
802{
803 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */
804 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
805 FILE *istream = fopen (infile, "r");
806 int i;
807 struct linebuffer lb;
808 long linelength;
809 int failure = 0;
810
811 initbuffer (&lb);
812
813 /* Read in one line of input data. */
814
815 linelength = readline (&lb, istream);
816
817 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
818 {
819 error ("%s: not a texinfo index file", infile);
820 return;
821 }
822
823 /* Split up the input into `ntemps' temporary files, or maybe fewer,
824 and put the new files' names into `tempfiles' */
825
826 for (i = 0; i < ntemps; i++)
827 {
828 char *outname = maketempname (++tempcount);
829 FILE *ostream = fopen (outname, "w");
830 long tempsize = 0;
831
832 if (!ostream) pfatal_with_name (outname);
833 tempfiles[i] = outname;
834
835 /* Copy lines into this temp file as long as it does not make file "too big"
836 or until there are no more lines. */
837
838 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
839 {
840 tempsize += linelength + 1;
841 fputs (lb.buffer, ostream);
842 putc ('\n', ostream);
843
844 /* Read another line of input data. */
845
846 linelength = readline (&lb, istream);
847 if (!linelength && feof (istream)) break;
848
849 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
850 {
851 error ("%s: not a texinfo index file", infile);
852 failure = 1;
853 goto fail;
854 }
855 }
856 fclose (ostream);
857 if (feof (istream)) break;
858 }
859
860 free (lb.buffer);
861
862 fail:
863 /* Record number of temp files we actually needed. */
864
865 ntemps = i;
866
867 /* Sort each tempfile into another tempfile.
868 Delete the first set of tempfiles and put the names of the second into `tempfiles' */
869
870 for (i = 0; i < ntemps; i++)
871 {
872 char *newtemp = maketempname (++tempcount);
873 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
874 if (!keep_tempfiles)
875 unlink (tempfiles[i]);
876 tempfiles[i] = newtemp;
877 }
878
879 if (failure)
880 return;
881
882 /* Merge the tempfiles together and indexify */
883
884 merge_files (tempfiles, ntemps, outfile);
885}
886\f
887/* Sort `infile', whose size is `total',
888 assuming that is small enough to be done in-core,
889 then indexify it and send the output to `outfile' (or to stdout). */
890
891void
892sort_in_core (infile, total, outfile)
893 char *infile;
894 long total;
895 char *outfile;
896{
897 char **nextline;
898 char *data = (char *) xmalloc (total + 1);
899 char *file_data;
900 long file_size;
901 int i;
902 FILE *ostream = stdout;
903 struct lineinfo *lineinfo;
904
905 /* Read the contents of the file into the moby array `data' */
906
907 int desc = open (infile, 0, 0);
908
909 if (desc < 0)
910 fatal ("failure reopening %s", infile);
911 for (file_size = 0; ; )
912 {
913 if ((i = read (desc, data + file_size, total - file_size)) <= 0)
914 break;
915 file_size += i;
916 }
917 file_data = data;
918 data[file_size] = 0;
919
920 close (desc);
921
922 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
923 {
924 error ("%s: not a texinfo index file", infile);
925 return;
926 }
927
928 init_char_order ();
929
930 /* Sort routines want to know this address */
931
932 text_base = data;
933
934 /* Create the array of pointers to lines, with a default size frequently enough. */
935
936 nlines = total / 50;
937 if (!nlines) nlines = 2;
938 linearray = (char **) xmalloc (nlines * sizeof (char *));
939
940 /* `nextline' points to the next free slot in this array.
941 `nlines' is the allocated size. */
942
943 nextline = linearray;
944
945 /* Parse the input file's data, and make entries for the lines. */
946
947 nextline = parsefile (infile, nextline, file_data, file_size);
948 if (nextline == 0)
949 {
950 error ("%s: not a texinfo index file", infile);
951 return;
952 }
953
954 /* Sort the lines */
955
956 /* If we have enough space, find the first keyfield of each line in advance.
957 Make a `struct lineinfo' for each line, which records the keyfield
958 as well as the line, and sort them. */
959
960 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
961
962 if (lineinfo)
963 {
964 struct lineinfo *lp;
965 char **p;
966
967 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
968 {
969 lp->text = *p;
970 lp->key.text = find_field (keyfields, *p, &lp->keylen);
971 if (keyfields->numeric)
972 lp->key.number = find_value (lp->key.text, lp->keylen);
973 }
974
975 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
976
977 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
978 *p = lp->text;
979
980 free (lineinfo);
981 }
982 else
983 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
984
985 /* Open the output file */
986
987 if (outfile)
988 {
989 ostream = fopen (outfile, "w");
990 if (!ostream)
991 pfatal_with_name (outfile);
992 }
993
994 writelines (linearray, nextline - linearray, ostream);
995 if (outfile) fclose (ostream);
996
997 free (linearray);
998 free (data);
999}
1000\f
1001/* Parse an input string in core into lines.
1002 DATA is the input string, and SIZE is its length.
1003 Data goes in LINEARRAY starting at NEXTLINE.
1004 The value returned is the first entry in LINEARRAY still unused.
1005 Value 0 means input file contents are invalid. */
1006
1007char **
1008parsefile (filename, nextline, data, size)
1009 char *filename;
1010 char **nextline;
1011 char *data;
1012 long size;
1013{
1014 char *p, *end;
1015 char **line = nextline;
1016
1017 p = data;
1018 end = p + size;
1019 *end = 0;
1020
1021 while (p != end)
1022 {
1023 if (p[0] != '\\' && p[0] != '@')
1024 return 0;
1025
1026 *line = p;
1027 while (*p && *p != '\n') p++;
1028 if (p != end) p++;
1029
1030 line++;
1031 if (line == linearray + nlines)
1032 {
1033 char **old = linearray;
1034 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1035 line += linearray - old;
1036 }
1037 }
1038
1039 return line;
1040}
1041\f
1042/* Indexification is a filter applied to the sorted lines
1043 as they are being written to the output file.
1044 Multiple entries for the same name, with different page numbers,
1045 get combined into a single entry with multiple page numbers.
1046 The first braced field, which is used for sorting, is discarded.
1047 However, its first character is examined, folded to lower case,
1048 and if it is different from that in the previous line fed to us
1049 a \initial line is written with one argument, the new initial.
1050
1051 If an entry has four braced fields, then the second and third
1052 constitute primary and secondary names.
1053 In this case, each change of primary name
1054 generates a \primary line which contains only the primary name,
1055 and in between these are \secondary lines which contain
1056 just a secondary name and page numbers.
1057*/
1058
1059/* The last primary name we wrote a \primary entry for.
1060 If only one level of indexing is being done, this is the last name seen */
1061char *lastprimary;
1062int lastprimarylength; /* Length of storage allocated for lastprimary */
1063
1064/* Similar, for the secondary name. */
1065char *lastsecondary;
1066int lastsecondarylength;
1067
1068/* Zero if we are not in the middle of writing an entry.
1069 One if we have written the beginning of an entry but have not
1070 yet written any page numbers into it.
1071 Greater than one if we have written the beginning of an entry
1072 plus at least one page number. */
1073int pending;
1074
1075/* The initial (for sorting purposes) of the last primary entry written.
1076 When this changes, a \initial {c} line is written */
1077
1078char * lastinitial;
1079
1080int lastinitiallength;
1081
1082/* When we need a string of length 1 for the value of lastinitial,
1083 store it here. */
1084
1085char lastinitial1[2];
1086
1087/* Initialize static storage for writing an index */
1088
1089void
1090init_index ()
1091{
1092 pending = 0;
1093 lastinitial = lastinitial1;
1094 lastinitial1[0] = 0;
1095 lastinitial1[1] = 0;
1096 lastinitiallength = 0;
1097 lastprimarylength = 100;
1098 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1099 bzero (lastprimary, lastprimarylength + 1);
1100 lastsecondarylength = 100;
1101 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1102 bzero (lastsecondary, lastsecondarylength + 1);
1103}
1104
1105/* Indexify. Merge entries for the same name,
1106 insert headers for each initial character, etc. */
1107
1108indexify (line, ostream)
1109 char *line;
1110 FILE *ostream;
1111{
1112 char *primary, *secondary, *pagenumber;
1113 int primarylength, secondarylength, pagelength;
1114 int len = strlen (line);
1115 int nosecondary;
1116 int initiallength;
1117 char *initial;
1118 char initial1[2];
1119 register char *p;
1120
1121 /* First, analyze the parts of the entry fed to us this time */
1122
1123 p = find_braced_pos (line, 0, 0, 0);
1124 if (*p == '{')
1125 {
1126 initial = p;
1127 /* Get length of inner pair of braces starting at p,
1128 including that inner pair of braces. */
1129 initiallength = find_braced_end (p + 1) + 1 - p;
1130 }
1131 else
1132 {
1133 initial = initial1;
1134 initial1[0] = *p;
1135 initial1[1] = 0;
1136 initiallength = 1;
1137
1138 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1139 initial1[0] -= 040;
1140 }
1141
1142 pagenumber = find_braced_pos (line, 1, 0, 0);
1143 pagelength = find_braced_end (pagenumber) - pagenumber;
1144 if (pagelength == 0)
1145 abort ();
1146
1147 primary = find_braced_pos (line, 2, 0, 0);
1148 primarylength = find_braced_end (primary) - primary;
1149
1150 secondary = find_braced_pos (line, 3, 0, 0);
1151 nosecondary = !*secondary;
1152 if (!nosecondary)
1153 secondarylength = find_braced_end (secondary) - secondary;
1154
1155 /* If the primary is different from before, make a new primary entry */
1156 if (strncmp (primary, lastprimary, primarylength))
1157 {
1158 /* Close off current secondary entry first, if one is open */
1159 if (pending)
1160 {
1161 fputs ("}\n", ostream);
1162 pending = 0;
1163 }
1164
1165 /* If this primary has a different initial, include an entry for the initial */
1166 if (initiallength != lastinitiallength ||
1167 strncmp (initial, lastinitial, initiallength))
1168 {
1169 fprintf (ostream, "\\initial {");
1170 fwrite (initial, 1, initiallength, ostream);
1171 fprintf (ostream, "}\n", initial);
1172 if (initial == initial1)
1173 {
1174 lastinitial = lastinitial1;
1175 *lastinitial1 = *initial1;
1176 }
1177 else
1178 {
1179 lastinitial = initial;
1180 }
1181 lastinitiallength = initiallength;
1182 }
1183
1184 /* Make the entry for the primary. */
1185 if (nosecondary)
1186 fputs ("\\entry {", ostream);
1187 else
1188 fputs ("\\primary {", ostream);
1189 fwrite (primary, primarylength, 1, ostream);
1190 if (nosecondary)
1191 {
1192 fputs ("}{", ostream);
1193 pending = 1;
1194 }
1195 else
1196 fputs ("}\n", ostream);
1197
1198 /* Record name of most recent primary */
1199 if (lastprimarylength < primarylength)
1200 {
1201 lastprimarylength = primarylength + 100;
1202 lastprimary = (char *) xrealloc (lastprimary,
1203 1 + lastprimarylength);
1204 }
1205 strncpy (lastprimary, primary, primarylength);
1206 lastprimary[primarylength] = 0;
1207
1208 /* There is no current secondary within this primary, now */
1209 lastsecondary[0] = 0;
1210 }
1211
1212 /* Should not have an entry with no subtopic following one with a subtopic */
1213
1214 if (nosecondary && *lastsecondary)
1215 error ("entry %s follows an entry with a secondary name", line);
1216
1217 /* Start a new secondary entry if necessary */
1218 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1219 {
1220 if (pending)
1221 {
1222 fputs ("}\n", ostream);
1223 pending = 0;
1224 }
1225
1226 /* Write the entry for the secondary. */
1227 fputs ("\\secondary {", ostream);
1228 fwrite (secondary, secondarylength, 1, ostream);
1229 fputs ("}{", ostream);
1230 pending = 1;
1231
1232 /* Record name of most recent secondary */
1233 if (lastsecondarylength < secondarylength)
1234 {
1235 lastsecondarylength = secondarylength + 100;
1236 lastsecondary = (char *) xrealloc (lastsecondary,
1237 1 + lastsecondarylength);
1238 }
1239 strncpy (lastsecondary, secondary, secondarylength);
1240 lastsecondary[secondarylength] = 0;
1241 }
1242
1243 /* Here to add one more page number to the current entry */
1244 if (pending++ != 1)
1245 fputs (", ", ostream); /* Punctuate first, if this is not the first */
1246 fwrite (pagenumber, pagelength, 1, ostream);
1247}
1248
1249/* Close out any unfinished output entry */
1250
1251void
1252finish_index (ostream)
1253 FILE *ostream;
1254{
1255 if (pending)
1256 fputs ("}\n", ostream);
1257 free (lastprimary);
1258 free (lastsecondary);
1259}
1260\f
1261/* Copy the lines in the sorted order.
1262 Each line is copied out of the input file it was found in. */
1263
1264void
1265writelines (linearray, nlines, ostream)
1266 char **linearray;
1267 int nlines;
1268 FILE *ostream;
1269{
1270 char **stop_line = linearray + nlines;
1271 char **next_line;
1272
1273 init_index ();
1274
1275 /* Output the text of the lines, and free the buffer space */
1276
1277 for (next_line = linearray; next_line != stop_line; next_line++)
1278 {
1279 /* If -u was specified, output the line only if distinct from previous one. */
1280 if (next_line == linearray
1281 /* Compare previous line with this one, using only the explicitly specd keyfields */
1282 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1283 {
1284 char *p = *next_line;
1285 char c;
1286 while ((c = *p++) && c != '\n');
1287 *(p-1) = 0;
1288 indexify (*next_line, ostream);
1289 }
1290 }
1291
1292 finish_index (ostream);
1293}
1294\f
1295/* Assume (and optionally verify) that each input file is sorted;
1296 merge them and output the result.
1297 Returns nonzero if any input file fails to be sorted.
1298
1299 This is the high-level interface that can handle an unlimited number of files. */
1300
1301#define MAX_DIRECT_MERGE 10
1302
1303int
1304merge_files (infiles, nfiles, outfile)
1305 char **infiles;
1306 int nfiles;
1307 char *outfile;
1308{
1309 char **tempfiles;
1310 int ntemps;
1311 int i;
1312 int value = 0;
1313 int start_tempcount = tempcount;
1314
1315 if (nfiles <= MAX_DIRECT_MERGE)
1316 return merge_direct (infiles, nfiles, outfile);
1317
1318 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1319 making a temporary file to hold each group's result. */
1320
1321 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1322 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1323 for (i = 0; i < ntemps; i++)
1324 {
1325 int nf = MAX_DIRECT_MERGE;
1326 if (i + 1 == ntemps)
1327 nf = nfiles - i * MAX_DIRECT_MERGE;
1328 tempfiles[i] = maketempname (++tempcount);
1329 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1330 }
1331
1332 /* All temporary files that existed before are no longer needed
1333 since their contents have been merged into our new tempfiles.
1334 So delete them. */
1335 flush_tempfiles (start_tempcount);
1336
1337 /* Now merge the temporary files we created. */
1338
1339 merge_files (tempfiles, ntemps, outfile);
1340
1341 free (tempfiles);
1342
1343 return value;
1344}
1345\f
1346/* Assume (and optionally verify) that each input file is sorted;
1347 merge them and output the result.
1348 Returns nonzero if any input file fails to be sorted.
1349
1350 This version of merging will not work if the number of
1351 input files gets too high. Higher level functions
1352 use it only with a bounded number of input files. */
1353
1354int
1355merge_direct (infiles, nfiles, outfile)
1356 char **infiles;
1357 int nfiles;
1358 char *outfile;
1359{
1360 char **ip = infiles;
1361 struct linebuffer *lb1, *lb2;
1362 struct linebuffer **thisline, **prevline;
1363 FILE **streams;
1364 int i;
1365 int nleft;
1366 int lossage = 0;
1367 int *file_lossage;
1368 struct linebuffer *prev_out = 0;
1369 FILE *ostream = stdout;
1370
1371 if (outfile)
1372 {
1373 ostream = fopen (outfile, "w");
1374 }
1375 if (!ostream) pfatal_with_name (outfile);
1376
1377 init_index ();
1378
1379 if (nfiles == 0)
1380 {
1381 if (outfile)
1382 fclose (ostream);
1383 return 0;
1384 }
1385
1386 /* For each file, make two line buffers.
1387 Also, for each file, there is an element of `thisline'
1388 which points at any time to one of the file's two buffers,
1389 and an element of `prevline' which points to the other buffer.
1390 `thisline' is supposed to point to the next available line from the file,
1391 while `prevline' holds the last file line used,
1392 which is remembered so that we can verify that the file is properly sorted. */
1393
1394 /* lb1 and lb2 contain one buffer each per file */
1395 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1396 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1397
1398 /* thisline[i] points to the linebuffer holding the next available line in file i,
1399 or is zero if there are no lines left in that file. */
1400 thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1401 /* prevline[i] points to the linebuffer holding the last used line from file i.
1402 This is just for verifying that file i is properly sorted. */
1403 prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1404 /* streams[i] holds the input stream for file i. */
1405 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1406 /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */
1407 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1408
1409 /* Allocate and initialize all that storage */
1410
1411 for (i = 0; i < nfiles; i++)
1412 {
1413 initbuffer (&lb1[i]);
1414 initbuffer (&lb2[i]);
1415 thisline[i] = &lb1[i];
1416 prevline[i] = &lb2[i];
1417 file_lossage[i] = 0;
1418 streams[i] = fopen (infiles[i], "r");
1419 if (!streams[i])
1420 pfatal_with_name (infiles[i]);
1421
1422 readline (thisline[i], streams[i]);
1423 }
1424
1425 /* Keep count of number of files not at eof */
1426 nleft = nfiles;
1427
1428 while (nleft)
1429 {
1430 struct linebuffer *best = 0;
1431 struct linebuffer *exch;
1432 int bestfile = -1;
1433 int i;
1434
1435 /* Look at the next avail line of each file; choose the least one. */
1436
1437 for (i = 0; i < nfiles; i++)
1438 {
1439 if (thisline[i] &&
1440 (!best ||
1441 0 < compare_general (best->buffer, thisline[i]->buffer,
1442 (long) bestfile, (long) i, num_keyfields)))
1443 {
1444 best = thisline[i];
1445 bestfile = i;
1446 }
1447 }
1448
1449 /* Output that line, unless it matches the previous one and we don't want duplicates */
1450
1451 if (!(prev_out &&
1452 !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1)))
1453 indexify (best->buffer, ostream);
1454 prev_out = best;
1455
1456 /* Now make the line the previous of its file, and fetch a new line from that file */
1457
1458 exch = prevline[bestfile];
1459 prevline[bestfile] = thisline[bestfile];
1460 thisline[bestfile] = exch;
1461
1462 while (1)
1463 {
1464 /* If the file has no more, mark it empty */
1465
1466 if (feof (streams[bestfile]))
1467 {
1468 thisline[bestfile] = 0;
1469 nleft--; /* Update the number of files still not empty */
1470 break;
1471 }
1472 readline (thisline[bestfile], streams[bestfile]);
1473 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break;
1474 }
1475 }
1476
1477 finish_index (ostream);
1478
1479 /* Free all storage and close all input streams */
1480
1481 for (i = 0; i < nfiles; i++)
1482 {
1483 fclose (streams[i]);
1484 free (lb1[i].buffer);
1485 free (lb2[i].buffer);
1486 }
1487 free (file_lossage);
1488 free (lb1);
1489 free (lb2);
1490 free (thisline);
1491 free (prevline);
1492 free (streams);
1493
1494 if (outfile)
1495 fclose (ostream);
1496
1497 return lossage;
1498}
1499\f
1500/* Print error message and exit. */
1501
1502fatal (s1, s2)
1503 char *s1, *s2;
1504{
1505 error (s1, s2);
1506 exit (EXIT_FATAL);
1507}
1508
1509/* Print error message. `s1' is printf control string, `s2' is arg for it. */
1510
1511error (s1, s2)
1512 char *s1, *s2;
1513{
1514 printf ("texindex: ");
1515 printf (s1, s2);
1516 printf ("\n");
1517}
1518
1519perror_with_name (name)
1520 char *name;
1521{
1522 char *s;
1523
1524 if (errno < sys_nerr)
1525 s = concat ("", sys_errlist[errno], " for %s");
1526 else
1527 s = "cannot open %s";
1528 error (s, name);
1529}
1530
1531pfatal_with_name (name)
1532 char *name;
1533{
1534 char *s;
1535
1536 if (errno < sys_nerr)
1537 s = concat ("", sys_errlist[errno], " for %s");
1538 else
1539 s = "cannot open %s";
1540 fatal (s, name);
1541}
1542
1543/* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */
1544
1545char *
1546concat (s1, s2, s3)
1547 char *s1, *s2, *s3;
1548{
1549 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1550 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1551
1552 strcpy (result, s1);
1553 strcpy (result + len1, s2);
1554 strcpy (result + len1 + len2, s3);
1555 *(result + len1 + len2 + len3) = 0;
1556
1557 return result;
1558}
1559
1560/* Like malloc but get fatal error if memory is exhausted. */
1561
1562int
1563xmalloc (size)
1564 int size;
1565{
1566 int result = malloc (size);
1567 if (!result)
1568 fatal ("virtual memory exhausted", 0);
1569 return result;
1570}
1571
1572
1573int
1574xrealloc (ptr, size)
1575 char *ptr;
1576 int size;
1577{
1578 int result = realloc (ptr, size);
1579 if (!result)
1580 fatal ("virtual memory exhausted");
1581 return result;
1582}
1583
1584bzero (b, length)
1585 register char *b;
1586 register int length;
1587{
1588#ifdef VMS
1589 short zero = 0;
1590 long max_str = 65535;
1591 long len;
1592
1593 while (length > max_str)
1594 {
1595 (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b);
1596 length -= max_str;
1597 b += max_str;
1598 }
1599 len = length;
1600 (void) LIB$MOVC5 (&zero, &zero, &zero, &len, b);
1601#else
1602 while (length-- > 0)
1603 *b++ = 0;
1604#endif /* not VMS */
1605}