manual page distributed with 4.2BSD
[unix-history] / usr / src / usr.bin / ctags / ctags.c
CommitLineData
3e0cab55
KB
1#ifndef lint
2static char *sccsid = "@(#)ctags.c 4.4 (Berkeley) 8/30/82";
3#endif
2e314d68 4
f5c61c07
KB
5#include <stdio.h>
6#include <ctype.h>
16bc359f
KB
7
8/*
9927b527 9 * ctags: create a tags file
16bc359f
KB
10 */
11
12#define reg register
13#define logical char
14
15#define TRUE (1)
16#define FALSE (0)
17
18#define iswhite(arg) (_wht[arg]) /* T if char is white */
19#define begtoken(arg) (_btk[arg]) /* T if char can start token */
20#define intoken(arg) (_itk[arg]) /* T if char can be in token */
21#define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
22#define isgood(arg) (_gd[arg]) /* T if char can be after ')' */
23
24#define max(I1,I2) (I1 > I2 ? I1 : I2)
25
26struct nd_st { /* sorting structure */
2e314d68 27 char *entry; /* function or type name */
16bc359f 28 char *file; /* file name */
2e314d68 29 logical f; /* use pattern or line no */
f5c61c07 30 int lno; /* for -x option */
16bc359f
KB
31 char *pat; /* search pattern */
32 logical been_warned; /* set if noticed dup */
33 struct nd_st *left,*right; /* left and right sons */
34};
35
36long ftell();
16bc359f
KB
37typedef struct nd_st NODE;
38
39logical number, /* T if on line starting with # */
16bc359f
KB
40 gotone, /* found a func already on line */
41 /* boolean "func" (see init) */
42 _wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177];
43
2e314d68
KB
44 /* typedefs are recognized using a simple finite automata,
45 * tydef is its state variable.
46 */
47typedef enum {none, begin, middle, end } TYST;
48
49TYST tydef = none;
50
51char searchar = '/'; /* use /.../ searches */
16bc359f
KB
52
53int lineno; /* line number of current line */
f5c61c07 54char line[4*BUFSIZ], /* current input line */
16bc359f
KB
55 *curfile, /* current input file name */
56 *outfile= "tags", /* output file */
57 *white = " \f\t\n", /* white chars */
58 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
59 /* token ending chars */
60 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
61 /* token starting chars */
2e314d68
KB
62 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789",
63 /* valid in-token chars */
16bc359f
KB
64 *notgd = ",;"; /* non-valid after-function chars */
65
66int file_num; /* current file number */
67int aflag; /* -a: append to tags */
2e314d68 68int tflag; /* -t: create tags for typedefs */
16bc359f
KB
69int uflag; /* -u: update tags */
70int wflag; /* -w: suppress warnings */
9927b527 71int vflag; /* -v: create vgrind style index output */
f5c61c07
KB
72int xflag; /* -x: create cxref style output */
73
74char lbuf[BUFSIZ];
16bc359f
KB
75
76FILE *inf, /* ioptr for current input file */
77 *outf; /* ioptr for tags file */
78
79long lineftell; /* ftell after getc( inf ) == '\n' */
80
81NODE *head; /* the head of the sorted binary tree */
82
f5c61c07
KB
83char *savestr();
84char *rindex();
16bc359f
KB
85main(ac,av)
86int ac;
87char *av[];
88{
89 char cmd[100];
90 int i;
91
92 while (ac > 1 && av[1][0] == '-') {
93 for (i=1; av[1][i]; i++) {
94 switch(av[1][i]) {
2e314d68
KB
95 case 'B':
96 searchar='?';
97 break;
98 case 'F':
99 searchar='/';
100 break;
16bc359f
KB
101 case 'a':
102 aflag++;
103 break;
2e314d68
KB
104 case 't':
105 tflag++;
106 break;
16bc359f
KB
107 case 'u':
108 uflag++;
109 break;
110 case 'w':
111 wflag++;
112 break;
9927b527
KB
113 case 'v':
114 vflag++;
115 xflag++;
116 break;
f5c61c07
KB
117 case 'x':
118 xflag++;
119 break;
16bc359f
KB
120 default:
121 goto usage;
122 }
123 }
124 ac--; av++;
125 }
126
127 if (ac <= 1) {
2e314d68 128usage: printf("Usage: ctags [-BFatuwvx] file ...\n");
16bc359f
KB
129 exit(1);
130 }
131
132 init(); /* set up boolean "functions" */
133 /*
134 * loop through files finding functions
135 */
136 for (file_num = 1; file_num < ac; file_num++)
2e314d68 137 find_entries(av[file_num]);
16bc359f 138
f5c61c07 139 if (xflag) {
2e314d68 140 put_entries(head);
f5c61c07
KB
141 exit(0);
142 }
16bc359f
KB
143 if (uflag) {
144 for (i=1; i<ac; i++) {
f5c61c07
KB
145 sprintf(cmd,
146 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
147 outfile, av[i], outfile);
16bc359f
KB
148 system(cmd);
149 }
150 aflag++;
151 }
f5c61c07
KB
152 outf = fopen(outfile, aflag ? "a" : "w");
153 if (outf == NULL) {
16bc359f
KB
154 perror(outfile);
155 exit(1);
156 }
2e314d68 157 put_entries(head);
f5c61c07
KB
158 fclose(outf);
159 if (uflag) {
160 sprintf(cmd, "sort %s -o %s", outfile, outfile);
161 system(cmd);
162 }
16bc359f
KB
163 exit(0);
164}
165
166/*
f5c61c07 167 * This routine sets up the boolean psuedo-functions which work
16bc359f 168 * by seting boolean flags dependent upon the corresponding character
16bc359f
KB
169 * Every char which is NOT in that string is not a white char. Therefore,
170 * all of the array "_wht" is set to FALSE, and then the elements
171 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
172 * of a char is TRUE if it is the string "white", else FALSE.
16bc359f
KB
173 */
174init()
175{
176
177 reg char *sp;
178 reg int i;
179
180 for (i = 0; i < 0177; i++) {
181 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
182 _gd[i] = TRUE;
183 }
184 for (sp = white; *sp; sp++)
185 _wht[*sp] = TRUE;
186 for (sp = endtk; *sp; sp++)
187 _etk[*sp] = TRUE;
188 for (sp = intk; *sp; sp++)
189 _itk[*sp] = TRUE;
190 for (sp = begtk; *sp; sp++)
191 _btk[*sp] = TRUE;
192 for (sp = notgd; *sp; sp++)
193 _gd[*sp] = FALSE;
194}
195
196/*
f5c61c07 197 * This routine opens the specified file and calls the function
2e314d68 198 * which finds the function and type definitions.
16bc359f 199 */
2e314d68 200find_entries(file)
16bc359f
KB
201char *file;
202{
f5c61c07 203 char *cp;
16bc359f
KB
204
205 if ((inf=fopen(file,"r")) == NULL) {
206 perror(file);
207 return;
208 }
f5c61c07
KB
209 curfile = savestr(file);
210 cp = rindex(file, '.');
3e0cab55
KB
211 if (cp && (cp[1] != 'c' || cp[1] != 'h') && cp[2] == 0) {
212 if (PF_funcs(inf) == 0) {
213 rewind(inf);
214 C_entries();
f5c61c07 215 }
3e0cab55
KB
216 } else
217 C_entries();
16bc359f
KB
218 fclose(inf);
219}
220
2e314d68 221pfnote(name, ln, f)
f5c61c07 222 char *name;
2e314d68 223 logical f; /* f == TRUE when function */
f5c61c07
KB
224{
225 register char *fp;
226 register NODE *np;
227 char nbuf[BUFSIZ];
228
229 if ((np = (NODE *) malloc(sizeof (NODE))) == NULL) {
2e314d68
KB
230 fprintf(stderr, "ctags: too many entries to sort\n");
231 put_entries(head);
f5c61c07
KB
232 free_tree(head);
233 head = np = (NODE *) malloc(sizeof (NODE));
234 }
235 if (xflag == 0 && !strcmp(name, "main")) {
236 fp = rindex(curfile, '/');
237 if (fp == 0)
238 fp = curfile;
239 else
240 fp++;
241 sprintf(nbuf, "M%s", fp);
242 fp = rindex(nbuf, '.');
243 if (fp && fp[2] == 0)
244 *fp = 0;
245 name = nbuf;
246 }
2e314d68 247 np->entry = savestr(name);
f5c61c07 248 np->file = curfile;
2e314d68 249 np->f = f;
f5c61c07
KB
250 np->lno = ln;
251 np->left = np->right = 0;
252 if (xflag == 0) {
253 lbuf[50] = 0;
254 strcat(lbuf, "$");
255 lbuf[50] = 0;
256 }
257 np->pat = savestr(lbuf);
258 if (head == NULL)
259 head = np;
260 else
261 add_node(np, head);
262}
263
16bc359f 264/*
2e314d68 265 * This routine finds functions and typedefs in C syntax and adds them
16bc359f
KB
266 * to the list.
267 */
2e314d68 268C_entries()
16bc359f 269{
f5c61c07
KB
270 register int c;
271 register char *token, *tp;
2e314d68
KB
272 logical incomm, inquote, inchar, midtoken;
273 int level;
f5c61c07
KB
274 char *sp;
275 char tok[BUFSIZ];
16bc359f 276
f5c61c07
KB
277 lineno = 1;
278 number = gotone = midtoken = inquote = inchar = incomm = FALSE;
279 level = 0;
16bc359f 280 sp = tp = token = line;
f5c61c07
KB
281 for (;;) {
282 *sp=c=getc(inf);
283 if (feof(inf))
284 break;
285 if (c == '\n')
286 lineno++;
16bc359f
KB
287 if (c == '\\') {
288 c = *++sp = getc(inf);
16bc359f
KB
289 if (c = '\n')
290 c = ' ';
f5c61c07 291 } else if (incomm) {
16bc359f 292 if (c == '*') {
f5c61c07 293 while ((*++sp=c=getc(inf)) == '*')
16bc359f 294 continue;
f5c61c07
KB
295 if (c == '\n')
296 lineno++;
16bc359f 297 if (c == '/')
f5c61c07 298 incomm = FALSE;
16bc359f 299 }
f5c61c07 300 } else if (inquote) {
16bc359f
KB
301 /*
302 * Too dumb to know about \" not being magic, but
303 * they usually occur in pairs anyway.
304 */
f5c61c07 305 if (c == '"')
16bc359f
KB
306 inquote = FALSE;
307 continue;
f5c61c07
KB
308 } else if (inchar) {
309 if (c == '\'')
16bc359f
KB
310 inchar = FALSE;
311 continue;
f5c61c07
KB
312 } else switch (c) {
313 case '"':
16bc359f 314 inquote = TRUE;
f5c61c07
KB
315 continue;
316 case '\'':
16bc359f 317 inchar = TRUE;
f5c61c07
KB
318 continue;
319 case '/':
16bc359f 320 if ((*++sp=c=getc(inf)) == '*')
f5c61c07 321 incomm = TRUE;
16bc359f 322 else
f5c61c07
KB
323 ungetc(*sp, inf);
324 continue;
325 case '#':
16bc359f 326 if (sp == line)
f5c61c07
KB
327 number = TRUE;
328 continue;
329 case '{':
2e314d68
KB
330 if (tydef == begin) {
331 tydef=middle;
332 }
f5c61c07
KB
333 level++;
334 continue;
335 case '}':
336 if (sp == line)
337 level = 0; /* reset */
16bc359f 338 else
f5c61c07 339 level--;
2e314d68
KB
340 if (!level && tydef==middle) {
341 tydef=end;
342 }
f5c61c07
KB
343 continue;
344 }
2e314d68 345 if (!level && !inquote && !incomm && gotone == FALSE) {
16bc359f
KB
346 if (midtoken) {
347 if (endtoken(c)) {
2e314d68 348 int f;
f5c61c07 349 int pfline = lineno;
3e0cab55 350 if (start_entry(&sp,token,&f)) {
16bc359f
KB
351 strncpy(tok,token,tp-token+1);
352 tok[tp-token+1] = 0;
f5c61c07 353 getline();
2e314d68
KB
354 pfnote(tok, pfline, f);
355 gotone = f; /* function */
16bc359f
KB
356 }
357 midtoken = FALSE;
358 token = sp;
f5c61c07 359 } else if (intoken(c))
16bc359f 360 tp++;
f5c61c07 361 } else if (begtoken(c)) {
16bc359f
KB
362 token = tp = sp;
363 midtoken = TRUE;
364 }
365 }
2e314d68
KB
366 if (c == ';' && tydef==end) /* clean with typedefs */
367 tydef=none;
16bc359f 368 sp++;
f5c61c07 369 if (c == '\n' || sp > &line[sizeof (line) - BUFSIZ]) {
16bc359f 370 tp = token = sp = line;
f5c61c07 371 lineftell = ftell(inf);
16bc359f 372 number = gotone = midtoken = inquote = inchar = FALSE;
16bc359f
KB
373 }
374 }
375}
376
377/*
2e314d68
KB
378 * This routine checks to see if the current token is
379 * at the start of a function, or corresponds to a typedef
380 * It updates the input line * so that the '(' will be
381 * in it when it returns.
16bc359f 382 */
3e0cab55
KB
383start_entry(lp,token,f)
384char **lp;
385register char *token;
2e314d68 386int *f;
16bc359f
KB
387{
388
3e0cab55 389 reg char c,*sp;
16bc359f
KB
390 static logical found;
391 logical firsttok; /* T if have seen first token in ()'s */
392 int bad;
393
2e314d68 394 *f = 1; /* a function */
16bc359f
KB
395 sp = *lp;
396 c = *sp;
397 bad = FALSE;
f5c61c07 398 if (!number) { /* space is not allowed in macro defs */
16bc359f
KB
399 while (iswhite(c)) {
400 *++sp = c = getc(inf);
f5c61c07
KB
401 if (c == '\n') {
402 lineno++;
403 if (sp > &line[sizeof (line) - BUFSIZ])
404 goto ret;
405 }
16bc359f
KB
406 }
407 /* the following tries to make it so that a #define a b(c) */
408 /* doesn't count as a define of b. */
f5c61c07 409 } else {
2e314d68 410 if (!strncmp(token, "define", 6))
16bc359f
KB
411 found = 0;
412 else
413 found++;
414 if (found >= 2) {
415 gotone = TRUE;
416badone: bad = TRUE;
417 goto ret;
418 }
419 }
2e314d68
KB
420 /* check for the typedef cases */
421 if (tflag && !strncmp(token, "typedef", 7)) {
422 tydef=begin;
423 goto badone;
424 }
425 if (tydef==begin && (!strncmp(token, "struct", 6) ||
426 !strncmp(token, "union", 5) || !strncmp(token, "enum", 4))) {
427 goto badone;
428 }
429 if (tydef==begin) {
430 tydef=end;
431 goto badone;
432 }
433 if (tydef==end) {
434 *f = 0;
435 goto ret;
436 }
16bc359f
KB
437 if (c != '(')
438 goto badone;
439 firsttok = FALSE;
440 while ((*++sp=c=getc(inf)) != ')') {
f5c61c07
KB
441 if (c == '\n') {
442 lineno++;
443 if (sp > &line[sizeof (line) - BUFSIZ])
444 goto ret;
445 }
16bc359f
KB
446 /*
447 * This line used to confuse ctags:
448 * int (*oldhup)();
449 * This fixes it. A nonwhite char before the first
450 * token, other than a / (in case of a comment in there)
451 * makes this not a declaration.
452 */
453 if (begtoken(c) || c=='/') firsttok++;
454 else if (!iswhite(c) && !firsttok) goto badone;
16bc359f 455 }
16bc359f 456 while (iswhite(*++sp=c=getc(inf)))
f5c61c07
KB
457 if (c == '\n') {
458 lineno++;
459 if (sp > &line[sizeof (line) - BUFSIZ])
460 break;
461 }
16bc359f
KB
462ret:
463 *lp = --sp;
f5c61c07
KB
464 if (c == '\n')
465 lineno--;
16bc359f 466 ungetc(c,inf);
2e314d68
KB
467 return !bad && (!*f || isgood(c));
468 /* hack for typedefs */
16bc359f
KB
469}
470
f5c61c07 471getline()
16bc359f 472{
f5c61c07
KB
473 long saveftell = ftell( inf );
474 register char *cp;
475
476 fseek( inf , lineftell , 0 );
477 fgets(lbuf, sizeof lbuf, inf);
478 cp = rindex(lbuf, '\n');
479 if (cp)
480 *cp = 0;
481 fseek(inf, saveftell, 0);
16bc359f
KB
482}
483
16bc359f
KB
484free_tree(node)
485NODE *node;
486{
487
488 while (node) {
489 free_tree(node->right);
490 cfree(node);
491 node = node->left;
492 }
493}
494
f5c61c07
KB
495add_node(node, cur_node)
496 NODE *node,*cur_node;
16bc359f 497{
f5c61c07 498 register int dif;
16bc359f 499
2e314d68 500 dif = strcmp(node->entry, cur_node->entry);
16bc359f
KB
501 if (dif == 0) {
502 if (node->file == cur_node->file) {
503 if (!wflag) {
2e314d68
KB
504fprintf(stderr,"Duplicate entry in file %s, line %d: %s\n",
505 node->file,lineno,node->entry);
f5c61c07 506fprintf(stderr,"Second entry ignored\n");
16bc359f
KB
507 }
508 return;
509 }
f5c61c07
KB
510 if (!cur_node->been_warned)
511 if (!wflag)
2e314d68
KB
512fprintf(stderr,"Duplicate entry in files %s and %s: %s (Warning only)\n",
513 node->file, cur_node->file, node->entry);
f5c61c07
KB
514 cur_node->been_warned = TRUE;
515 return;
516 }
517 if (dif < 0) {
16bc359f
KB
518 if (cur_node->left != NULL)
519 add_node(node,cur_node->left);
f5c61c07 520 else
16bc359f 521 cur_node->left = node;
f5c61c07
KB
522 return;
523 }
524 if (cur_node->right != NULL)
525 add_node(node,cur_node->right);
16bc359f 526 else
f5c61c07 527 cur_node->right = node;
16bc359f
KB
528}
529
2e314d68 530put_entries(node)
f5c61c07 531reg NODE *node;
16bc359f 532{
f5c61c07 533 reg char *sp;
16bc359f
KB
534
535 if (node == NULL)
536 return;
2e314d68
KB
537 put_entries(node->left);
538 if (xflag == 0)
539 if (node->f) { /* a function */
540 fprintf(outf, "%s\t%s\t%c^",
541 node->entry, node->file, searchar);
542 for (sp = node->pat; *sp; sp++)
543 if (*sp == '\\')
544 fprintf(outf, "\\\\");
545 else if (*sp == searchar)
546 fprintf(outf, "\\%c", searchar);
547 else
548 putc(*sp, outf);
549 fprintf(outf, "%c\n", searchar);
550 } else { /* a typedef; text pattern inadequate */
551 fprintf(outf, "%s\t%s\t%d\n",
552 node->entry, node->file, node->lno);
553 }
9927b527 554 else if (vflag)
2e314d68
KB
555 fprintf(stdout, "%s %s %d\n",
556 node->entry, node->file, (node->lno+63)/64);
f5c61c07
KB
557 else
558 fprintf(stdout, "%-16s%4d %-16s %s\n",
2e314d68
KB
559 node->entry, node->lno, node->file, node->pat);
560 put_entries(node->right);
16bc359f
KB
561}
562
f5c61c07
KB
563char *dbp = lbuf;
564int pfcnt;
565
566PF_funcs(fi)
567 FILE *fi;
16bc359f 568{
f5c61c07
KB
569
570 lineno = 0;
571 pfcnt = 0;
572 while (fgets(lbuf, sizeof(lbuf), fi)) {
573 lineno++;
574 dbp = lbuf;
575 if ( *dbp == '%' ) dbp++ ; /* Ratfor escape to fortran */
576 while (isspace(*dbp))
577 dbp++;
578 if (*dbp == 0)
579 continue;
580 switch (*dbp |' ') {
581
582 case 'i':
583 if (tail("integer"))
584 takeprec();
585 break;
586 case 'r':
587 if (tail("real"))
588 takeprec();
16bc359f 589 break;
f5c61c07
KB
590 case 'l':
591 if (tail("logical"))
592 takeprec();
16bc359f 593 break;
f5c61c07
KB
594 case 'c':
595 if (tail("complex") || tail("character"))
596 takeprec();
597 break;
598 case 'd':
599 if (tail("double")) {
600 while (isspace(*dbp))
601 dbp++;
602 if (*dbp == 0)
603 continue;
604 if (tail("precision"))
605 break;
606 continue;
607 }
16bc359f 608 break;
16bc359f 609 }
f5c61c07
KB
610 while (isspace(*dbp))
611 dbp++;
612 if (*dbp == 0)
613 continue;
614 switch (*dbp|' ') {
615
616 case 'f':
617 if (tail("function"))
618 getit();
619 continue;
620 case 's':
621 if (tail("subroutine"))
622 getit();
623 continue;
624 case 'p':
625 if (tail("program")) {
626 getit();
627 continue;
628 }
629 if (tail("procedure"))
630 getit();
631 continue;
632 }
633 }
634 return (pfcnt);
635}
636
637tail(cp)
638 char *cp;
639{
640 register int len = 0;
641
642 while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
643 cp++, len++;
644 if (*cp == 0) {
645 dbp += len;
646 return (1);
647 }
648 return (0);
649}
650
651takeprec()
652{
653
654 while (isspace(*dbp))
655 dbp++;
656 if (*dbp != '*')
657 return;
658 dbp++;
659 while (isspace(*dbp))
660 dbp++;
661 if (!isdigit(*dbp)) {
662 --dbp; /* force failure */
663 return;
16bc359f 664 }
f5c61c07
KB
665 do
666 dbp++;
667 while (isdigit(*dbp));
668}
669
670getit()
671{
672 register char *cp;
673 char c;
674 char nambuf[BUFSIZ];
675
676 for (cp = lbuf; *cp; cp++)
677 ;
678 *--cp = 0; /* zap newline */
679 while (isspace(*dbp))
680 dbp++;
681 if (*dbp == 0 || !isalpha(*dbp))
682 return;
683 for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
684 continue;
685 c = cp[0];
686 cp[0] = 0;
687 strcpy(nambuf, dbp);
688 cp[0] = c;
3e0cab55 689 pfnote(nambuf, lineno, FALSE);
f5c61c07
KB
690 pfcnt++;
691}
692
693char *
694savestr(cp)
695 char *cp;
696{
697 register int len;
698 register char *dp;
699
700 len = strlen(cp);
701 dp = (char *)malloc(len+1);
702 strcpy(dp, cp);
703 return (dp);
704}
705
706/*
707 * Return the ptr in sp at which the character c last
708 * appears; NULL if not found
709 *
710 * Identical to v7 rindex, included for portability.
711 */
712
713char *
714rindex(sp, c)
715register char *sp, c;
716{
717 register char *r;
718
719 r = NULL;
720 do {
721 if (*sp == c)
722 r = sp;
723 } while (*sp++);
724 return(r);
16bc359f 725}