BSD 3 development
[unix-history] / usr / src / cmd / ctags.c
CommitLineData
50a2a77c
BJ
1#include <stdio.h>
2
3/*
4 * This program examines each of its arguments for C function
5 * definitions, and puts them in a file "tags" for use by the editor
6 * (and anyone else who wants to).
7 */
8
9/*
10 * program history:
11 * ken arnold wrote this program. ask him.
12 * brought over to the vax by peter b. kessler 7/79
13 * who disavows any knowledge of its actions,
14 * except for the stuff related to the construction
15 * of the search patterns.
16 * Some additional enhancements made by Mark Horton, involving
17 * the options and special treatment of "main", "}" at beginning
18 * of line, and a few bug fixes.
19 */
20
21#define reg register
22#define logical char
23
24#define TRUE (1)
25#define FALSE (0)
26
27#define iswhite(arg) (_wht[arg]) /* T if char is white */
28#define begtoken(arg) (_btk[arg]) /* T if char can start token */
29#define intoken(arg) (_itk[arg]) /* T if char can be in token */
30#define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
31#define isgood(arg) (_gd[arg]) /* T if char can be after ')' */
32
33#define max(I1,I2) (I1 > I2 ? I1 : I2)
34
35struct nd_st { /* sorting structure */
36 char *func; /* function name */
37 char *file; /* file name */
38 char *pat; /* search pattern */
39 logical been_warned; /* set if noticed dup */
40 struct nd_st *left,*right; /* left and right sons */
41};
42
43long ftell();
44#ifdef DEBUG
45char *unctrl();
46#endif
47typedef struct nd_st NODE;
48
49logical number, /* T if on line starting with # */
50 term = FALSE, /* T if print on terminal */
51 makefile= TRUE, /* T if to creat "tags" file */
52 gotone, /* found a func already on line */
53 /* boolean "func" (see init) */
54 _wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177];
55
56char searchar = '?'; /* use ?...? searches */
57#define MAXPATTERN 50 /* according to bill */
58
59int lineno; /* line number of current line */
60char line[256], /* current input line */
61 *curfile, /* current input file name */
62 *outfile= "tags", /* output file */
63 *white = " \f\t\n", /* white chars */
64 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
65 /* token ending chars */
66 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
67 /* token starting chars */
68 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789", /* valid in-token chars */
69 *notgd = ",;"; /* non-valid after-function chars */
70
71int file_num; /* current file number */
72int aflag; /* -a: append to tags */
73int uflag; /* -u: update tags */
74int wflag; /* -w: suppress warnings */
75
76FILE *inf, /* ioptr for current input file */
77 *outf; /* ioptr for tags file */
78
79long lineftell; /* ftell after getc( inf ) == '\n' */
80
81NODE *head; /* the head of the sorted binary tree */
82
83main(ac,av)
84int ac;
85char *av[];
86{
87 char cmd[100];
88 int i;
89
90 while (ac > 1 && av[1][0] == '-') {
91 for (i=1; av[1][i]; i++) {
92 switch(av[1][i]) {
93 case 'a':
94 aflag++;
95 break;
96 case 'u':
97 uflag++;
98 break;
99 case 'w':
100 wflag++;
101 break;
102
103 default:
104 goto usage;
105 }
106 }
107 ac--; av++;
108 }
109
110 if (ac <= 1) {
111 usage: printf("Usage: ctags [-au] file ...\n");
112 exit(1);
113 }
114
115 init(); /* set up boolean "functions" */
116 /*
117 * loop through files finding functions
118 */
119 for (file_num = 1; file_num < ac; file_num++)
120 find_funcs(av[file_num]);
121
122 if (uflag) {
123 for (i=1; i<ac; i++) {
124 sprintf(cmd, "mv %s OTAGS ; fgrep -v '\t%s\t' OTAGS > %s ; rm OTAGS", outfile, av[i], outfile);
125 system(cmd);
126 }
127 aflag++;
128 }
129
130 if ((outf = fopen(outfile, aflag ? "a" : "w")) == NULL) {
131 perror(outfile);
132 exit(1);
133 }
134 put_funcs(head); /* put the data in "tags" */
135 exit(0);
136}
137
138/*
139 * This routine sets up the boolean psuedo-functions which work
140 * by seting boolean flags dependent upon the corresponding character
141
142 * Every char which is NOT in that string is not a white char. Therefore,
143 * all of the array "_wht" is set to FALSE, and then the elements
144 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
145 * of a char is TRUE if it is the string "white", else FALSE.
146 * It also open up the "tags" output file.
147 */
148init()
149{
150
151 reg char *sp;
152 reg int i;
153
154 for (i = 0; i < 0177; i++) {
155 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
156 _gd[i] = TRUE;
157 }
158 for (sp = white; *sp; sp++)
159 _wht[*sp] = TRUE;
160 for (sp = endtk; *sp; sp++)
161 _etk[*sp] = TRUE;
162 for (sp = intk; *sp; sp++)
163 _itk[*sp] = TRUE;
164 for (sp = begtk; *sp; sp++)
165 _btk[*sp] = TRUE;
166 for (sp = notgd; *sp; sp++)
167 _gd[*sp] = FALSE;
168}
169
170/*
171 * This program opens the specified file and calls the function
172 * which finds the function defenitions.
173 */
174find_funcs(file)
175char *file;
176{
177
178 if ((inf=fopen(file,"r")) == NULL) {
179 perror(file);
180 return;
181 }
182
183 curfile = (char *) calloc(strlen(file)+1,1);
184 strcpy(curfile, file);
185 lineno = 1;
186 C_funcs(); /* find the C-style functions */
187 fclose(inf);
188}
189
190/*
191 * This routine finds functions in C syntax and adds them
192 * to the list.
193 */
194C_funcs()
195{
196
197 reg char c, /* current input char */
198 *token, /* start of current token */
199 *tp; /* end of current token */
200 logical incom, /* T if inside a comment */
201 inquote, /* T if inside a quoted string */
202 inchar, /* T if inside a single char ' */
203 midtoken; /* T if in middle of token */
204 char *sp; /* current input char */
205 char tok[100];
206 long insub; /* level of "{}"s deep */
207
208 /*
209 * init boolean flags, counters, and pointers
210 */
211
212 number = gotone = midtoken = inquote = inchar = incom = FALSE;
213 insub = 0L;
214 sp = tp = token = line;
215#ifdef DEBUG
216 printf(" t s c m q c g n\n");
217 printf(" s t k u o i u h o u\n");
218 printf(" c p p n b m d o r t m\n");
219#endif
220 while ((*sp=c=getc(inf)) != EOF) {
221#ifdef DEBUG
222 printf("%2.2s: ",unctrl(c));
223 printf("%2.2s ",unctrl(*sp));
224 printf("%2.2s ",unctrl(*tp));
225 printf("%2.2s ",unctrl(*token));
226 printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number);
227#endif
228 /*
229 * action based on mixture of character type, *sp,
230 * and logical flags
231 */
232
233 if (c == '\\') {
234 c = *++sp = getc(inf);
235 /*
236 * Handling of backslash is very naive.
237 * We do, however, turn escaped newlines
238 * into spaces.
239 */
240 if (c = '\n')
241 c = ' ';
242 }
243 else if (incom) {
244 if (c == '*') {
245 while ((*++sp=c=getc(inf)) == '*') {
246#ifdef DEBUG
247 printf("%2.2s- ",unctrl(c));
248 printf("%2.2s ",unctrl(*sp));
249 printf("%2.2s ",unctrl(*tp));
250 printf("%2.2s ",unctrl(*token));
251 printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number);
252#endif
253 continue;
254 }
255#ifdef DEBUG
256 printf("%2.2s- ",unctrl(c));
257 printf("%2.2s ",unctrl(*sp));
258 printf("%2.2s ",unctrl(*tp));
259 printf("%2.2s ",unctrl(*token));
260 printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number);
261#endif
262 if (c == '/')
263 incom = FALSE;
264 }
265 }
266 else if (inquote) {
267 /*
268 * Too dumb to know about \" not being magic, but
269 * they usually occur in pairs anyway.
270 */
271 if ( c == '"' )
272 inquote = FALSE;
273 continue;
274 }
275 else if (inchar) {
276 if ( c == '\'' )
277 inchar = FALSE;
278 continue;
279 }
280 else if (c == '"')
281 inquote = TRUE;
282 else if (c == '\'')
283 inchar = TRUE;
284 else if (c == '/')
285 if ((*++sp=c=getc(inf)) == '*')
286 incom = TRUE;
287 else
288 ungetc(*sp,inf);
289 else if (c == '#' && sp == line)
290 number = TRUE;
291 else if (c == '{')
292 insub++;
293 else if (c == '}')
294 if (sp == line)
295 /*
296 * Kludge to get back in sync after getting confused.
297 * We really shouldn't be looking at indenting style,
298 * but tricking with the preprocessor can get us off,
299 * and most people indent this way anyway.
300 * This resets level of indenting to zero if '}' is
301 * found at beginning of line.
302 */
303 insub = 0;
304 else
305 insub--;
306 else if (!insub && !inquote && !inchar && !gotone) {
307 if (midtoken) {
308 if (endtoken(c)) {
309 if (start_func(&sp,token,tp)) {
310 strncpy(tok,token,tp-token+1);
311 tok[tp-token+1] = 0;
312 add_func(tok);
313 gotone = TRUE;
314 }
315 midtoken = FALSE;
316 token = sp;
317 }
318 else if (intoken(c))
319 tp++;
320 }
321 else if (begtoken(c)) {
322 token = tp = sp;
323 midtoken = TRUE;
324 }
325 }
326
327 /*
328 * move on to next char, and set flags accordingly
329 */
330
331 sp++;
332 if (c == '\n') {
333 tp = token = sp = line;
334 lineftell = ftell( inf );
335#ifdef DEBUG
336 printf("lineftell saved as %ld\n",lineftell);
337#endif
338 number = gotone = midtoken = inquote = inchar = FALSE;
339 lineno++;
340 }
341 }
342}
343
344/*
345 * This routine checks to see if the current token is
346 * at the start of a function. It updates the input line
347 * so that the '(' will be in it when it returns.
348 */
349start_func(lp,token,tp)
350char **lp,*token,*tp;
351{
352
353 reg char c,*sp,*tsp;
354 static logical found;
355 logical firsttok; /* T if have seen first token in ()'s */
356 int bad;
357
358 sp = *lp;
359 c = *sp;
360 bad = FALSE;
361 if (!number) /* space is not allowed in macro defs */
362 while (iswhite(c)) {
363 *++sp = c = getc(inf);
364#ifdef DEBUG
365 printf("%2.2s:\n",unctrl(c));
366#endif
367 }
368 /* the following tries to make it so that a #define a b(c) */
369 /* doesn't count as a define of b. */
370 else {
371 logical define;
372
373 define = TRUE;
374 for (tsp = "define"; *tsp && token < tp; tsp++)
375 if (*tsp != *token++) {
376 define = FALSE;
377 break;
378 }
379 if (define)
380 found = 0;
381 else
382 found++;
383 if (found >= 2) {
384 gotone = TRUE;
385badone: bad = TRUE;
386 goto ret;
387 }
388 }
389 if (c != '(')
390 goto badone;
391 firsttok = FALSE;
392 while ((*++sp=c=getc(inf)) != ')') {
393 /*
394 * This line used to confuse ctags:
395 * int (*oldhup)();
396 * This fixes it. A nonwhite char before the first
397 * token, other than a / (in case of a comment in there)
398 * makes this not a declaration.
399 */
400 if (begtoken(c) || c=='/') firsttok++;
401 else if (!iswhite(c) && !firsttok) goto badone;
402#ifdef DEBUG
403 printf("%2.2s:\n",unctrl(c));
404#endif
405 }
406#ifdef DEBUG
407 printf("%2.2s:\n",unctrl(c));
408#endif
409 while (iswhite(*++sp=c=getc(inf)))
410#ifdef DEBUG
411 printf("%2.2s:\n",unctrl(c))
412#endif
413 ;
414#ifdef DEBUG
415 printf("%2.2s:\n",unctrl(c));
416#endif
417ret:
418 *lp = --sp;
419 ungetc(c,inf);
420 return !bad && isgood(c);
421}
422
423/*
424 * This routine adds a function to the list
425 */
426add_func(token)
427char *token;
428{
429 reg char *fp,*pp;
430 reg NODE *np;
431
432 if ((np = (NODE *) calloc(1,sizeof (NODE))) == NULL) {
433 printf("too many functions to sort\n");
434 put_funcs(head);
435 free_tree(head);
436 head = np = (NODE *) calloc(1,sizeof (NODE));
437 }
438 if (strcmp(token,"main") == 0) {
439 /*
440 * Since there are so many directories with lots of
441 * misc. complete programs in them, main tends to get
442 * redefined a lot. So we change all mains to instead
443 * refer to the name of the file, without leading
444 * pathname components and without a trailing .c.
445 */
446 fp = curfile;
447 for (pp=curfile; *pp; pp++)
448 if (*pp == '/')
449 fp = pp+1;
450 *token = 'M';
451 strcpy(token+1, fp);
452 pp = &token[strlen(token)-2];
453 if (*pp == '.')
454 *pp = 0;
455 }
456 fp = np->func = (char *) calloc(strlen(token)+1,sizeof (char));
457 np->file = curfile;
458 strcpy(fp, token);
459 { /*
460 * this change to make the whole line the pattern
461 */
462 long saveftell = ftell( inf );
463 int patlen;
464 char ch;
465
466 patlen = 0;
467 fseek( inf , lineftell , 0 );
468#ifdef DEBUG
469 printf("saveftell=%ld, lseek back to %ld\n",saveftell,lineftell);
470#endif
471 ch = getc( inf );
472 while ( ch != '\n' && ch != searchar && patlen < MAXPATTERN ) {
473 patlen ++;
474 ch = getc( inf );
475 }
476 pp = np -> pat = (char *) calloc( patlen + 2 , sizeof( char ) );
477 fseek( inf , lineftell , 0 );
478 ch = getc( inf );
479 while ( patlen -- ) {
480 *pp ++ = ch;
481 ch = getc( inf );
482 }
483 if ( ch == '\n' )
484 *pp ++ = '$';
485 *pp = '\0';
486 fseek( inf , saveftell , 0 );
487#ifdef DEBUG
488 printf("seek back to %ld, ftell is now %ld\n",saveftell,ftell(inf));
489#endif
490 }
491#ifdef DEBUG
492 printf("\"%s\"\t\"%s\"\t\"%s\"\n",np->func,np->file,np->pat);
493#endif
494 if (head == NULL)
495 head = np;
496 else
497 add_node(np,head);
498}
499
500/*
501 * This routine cfrees the entire tree from the node down.
502 */
503free_tree(node)
504NODE *node;
505{
506
507 while (node) {
508 free_tree(node->right);
509 cfree(node);
510 node = node->left;
511 }
512}
513
514/*
515 * This routine finds the node where the new function node
516 * should be added.
517 */
518add_node(node,cur_node)
519NODE *node,*cur_node;
520{
521
522 reg int dif;
523
524 dif = strcmp(node->func,cur_node->func);
525#ifdef DEBUG
526 printf("strcmp(\"%s\",\"%s\") == %d\n",node->func,cur_node->func,dif);
527#endif
528 if (dif == 0) {
529 if (node->file == cur_node->file) {
530 if (!wflag) {
531 fprintf(stderr,"Duplicate function in file \"%s\", line %d: %s\n",node->file,lineno,node->func);
532 fprintf(stderr,"Second entry ignored\n");
533 }
534 return;
535 }
536 else {
537 if (!cur_node->been_warned)
538 if (!wflag)
539 fprintf(stderr,"Duplicate function name in files %s and %s: %s (Warning only)\n",
540 node->file, cur_node->file, node->func);
541 cur_node->been_warned = TRUE;
542 }
543 }
544 if (dif < 0)
545 if (cur_node->left != NULL)
546 add_node(node,cur_node->left);
547 else {
548#ifdef DEBUG
549 printf("adding to left branch\n");
550#endif
551 cur_node->left = node;
552 }
553 else
554 if (cur_node->right != NULL)
555 add_node(node,cur_node->right);
556 else {
557#ifdef DEBUG
558 printf("adding to right branch\n");
559#endif
560 cur_node->right = node;
561 }
562}
563
564/*
565 * This routine puts the functions in the file.
566 */
567put_funcs(node)
568NODE *node;
569{
570
571 if (node == NULL)
572 return;
573 put_funcs(node->left);
574 fprintf(outf,"%s\t%s\t%c^%s%c\n",node->func,node->file
575 ,searchar,node->pat,searchar);
576 put_funcs(node->right);
577}
578
579#ifdef DEBUG
580char *
581unctrl(c)
582char c;
583{
584 static char buf[3];
585 if (c>=' ' && c<='~') {
586 buf[0] = c;
587 buf[1] = 0;
588 } else if (c > '~') {
589 buf[0] = '^';
590 buf[1] = '?';
591 buf[2] = 0;
592 } else if (c < 0) {
593 buf[0] = buf[1] = '?';
594 buf[2] = 0;
595 } else {
596 buf[0] = '\\';
597 buf[2] = 0;
598 switch(c) {
599 case '\b':
600 buf[1] = 'b';
601 break;
602 case '\t':
603 buf[1] = 't';
604 break;
605 case '\n':
606 buf[1] = 'n';
607 break;
608 default:
609 buf[0] = '^';
610 buf[1] = c + 64;
611 }
612 }
613 return(buf);
614}
615#endif