Commit | Line | Data |
---|---|---|
50a2a77c BJ |
1 | #include <stdio.h> |
2 | ||
3 | /* | |
4 | * This program examines each of its arguments for C function | |
5 | * definitions, and puts them in a file "tags" for use by the editor | |
6 | * (and anyone else who wants to). | |
7 | */ | |
8 | ||
9 | /* | |
10 | * program history: | |
11 | * ken arnold wrote this program. ask him. | |
12 | * brought over to the vax by peter b. kessler 7/79 | |
13 | * who disavows any knowledge of its actions, | |
14 | * except for the stuff related to the construction | |
15 | * of the search patterns. | |
16 | * Some additional enhancements made by Mark Horton, involving | |
17 | * the options and special treatment of "main", "}" at beginning | |
18 | * of line, and a few bug fixes. | |
19 | */ | |
20 | ||
21 | #define reg register | |
22 | #define logical char | |
23 | ||
24 | #define TRUE (1) | |
25 | #define FALSE (0) | |
26 | ||
27 | #define iswhite(arg) (_wht[arg]) /* T if char is white */ | |
28 | #define begtoken(arg) (_btk[arg]) /* T if char can start token */ | |
29 | #define intoken(arg) (_itk[arg]) /* T if char can be in token */ | |
30 | #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ | |
31 | #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ | |
32 | ||
33 | #define max(I1,I2) (I1 > I2 ? I1 : I2) | |
34 | ||
35 | struct nd_st { /* sorting structure */ | |
36 | char *func; /* function name */ | |
37 | char *file; /* file name */ | |
38 | char *pat; /* search pattern */ | |
39 | logical been_warned; /* set if noticed dup */ | |
40 | struct nd_st *left,*right; /* left and right sons */ | |
41 | }; | |
42 | ||
43 | long ftell(); | |
44 | #ifdef DEBUG | |
45 | char *unctrl(); | |
46 | #endif | |
47 | typedef struct nd_st NODE; | |
48 | ||
49 | logical number, /* T if on line starting with # */ | |
50 | term = FALSE, /* T if print on terminal */ | |
51 | makefile= TRUE, /* T if to creat "tags" file */ | |
52 | gotone, /* found a func already on line */ | |
53 | /* boolean "func" (see init) */ | |
54 | _wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177]; | |
55 | ||
56 | char searchar = '?'; /* use ?...? searches */ | |
57 | #define MAXPATTERN 50 /* according to bill */ | |
58 | ||
59 | int lineno; /* line number of current line */ | |
60 | char line[256], /* current input line */ | |
61 | *curfile, /* current input file name */ | |
62 | *outfile= "tags", /* output file */ | |
63 | *white = " \f\t\n", /* white chars */ | |
64 | *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", | |
65 | /* token ending chars */ | |
66 | *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", | |
67 | /* token starting chars */ | |
68 | *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789", /* valid in-token chars */ | |
69 | *notgd = ",;"; /* non-valid after-function chars */ | |
70 | ||
71 | int file_num; /* current file number */ | |
72 | int aflag; /* -a: append to tags */ | |
73 | int uflag; /* -u: update tags */ | |
74 | int wflag; /* -w: suppress warnings */ | |
75 | ||
76 | FILE *inf, /* ioptr for current input file */ | |
77 | *outf; /* ioptr for tags file */ | |
78 | ||
79 | long lineftell; /* ftell after getc( inf ) == '\n' */ | |
80 | ||
81 | NODE *head; /* the head of the sorted binary tree */ | |
82 | ||
83 | main(ac,av) | |
84 | int ac; | |
85 | char *av[]; | |
86 | { | |
87 | char cmd[100]; | |
88 | int i; | |
89 | ||
90 | while (ac > 1 && av[1][0] == '-') { | |
91 | for (i=1; av[1][i]; i++) { | |
92 | switch(av[1][i]) { | |
93 | case 'a': | |
94 | aflag++; | |
95 | break; | |
96 | case 'u': | |
97 | uflag++; | |
98 | break; | |
99 | case 'w': | |
100 | wflag++; | |
101 | break; | |
102 | ||
103 | default: | |
104 | goto usage; | |
105 | } | |
106 | } | |
107 | ac--; av++; | |
108 | } | |
109 | ||
110 | if (ac <= 1) { | |
111 | usage: printf("Usage: ctags [-au] file ...\n"); | |
112 | exit(1); | |
113 | } | |
114 | ||
115 | init(); /* set up boolean "functions" */ | |
116 | /* | |
117 | * loop through files finding functions | |
118 | */ | |
119 | for (file_num = 1; file_num < ac; file_num++) | |
120 | find_funcs(av[file_num]); | |
121 | ||
122 | if (uflag) { | |
123 | for (i=1; i<ac; i++) { | |
124 | sprintf(cmd, "mv %s OTAGS ; fgrep -v '\t%s\t' OTAGS > %s ; rm OTAGS", outfile, av[i], outfile); | |
125 | system(cmd); | |
126 | } | |
127 | aflag++; | |
128 | } | |
129 | ||
130 | if ((outf = fopen(outfile, aflag ? "a" : "w")) == NULL) { | |
131 | perror(outfile); | |
132 | exit(1); | |
133 | } | |
134 | put_funcs(head); /* put the data in "tags" */ | |
135 | exit(0); | |
136 | } | |
137 | ||
138 | /* | |
139 | * This routine sets up the boolean psuedo-functions which work | |
140 | * by seting boolean flags dependent upon the corresponding character | |
141 | ||
142 | * Every char which is NOT in that string is not a white char. Therefore, | |
143 | * all of the array "_wht" is set to FALSE, and then the elements | |
144 | * subscripted by the chars in "white" are set to TRUE. Thus "_wht" | |
145 | * of a char is TRUE if it is the string "white", else FALSE. | |
146 | * It also open up the "tags" output file. | |
147 | */ | |
148 | init() | |
149 | { | |
150 | ||
151 | reg char *sp; | |
152 | reg int i; | |
153 | ||
154 | for (i = 0; i < 0177; i++) { | |
155 | _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; | |
156 | _gd[i] = TRUE; | |
157 | } | |
158 | for (sp = white; *sp; sp++) | |
159 | _wht[*sp] = TRUE; | |
160 | for (sp = endtk; *sp; sp++) | |
161 | _etk[*sp] = TRUE; | |
162 | for (sp = intk; *sp; sp++) | |
163 | _itk[*sp] = TRUE; | |
164 | for (sp = begtk; *sp; sp++) | |
165 | _btk[*sp] = TRUE; | |
166 | for (sp = notgd; *sp; sp++) | |
167 | _gd[*sp] = FALSE; | |
168 | } | |
169 | ||
170 | /* | |
171 | * This program opens the specified file and calls the function | |
172 | * which finds the function defenitions. | |
173 | */ | |
174 | find_funcs(file) | |
175 | char *file; | |
176 | { | |
177 | ||
178 | if ((inf=fopen(file,"r")) == NULL) { | |
179 | perror(file); | |
180 | return; | |
181 | } | |
182 | ||
183 | curfile = (char *) calloc(strlen(file)+1,1); | |
184 | strcpy(curfile, file); | |
185 | lineno = 1; | |
186 | C_funcs(); /* find the C-style functions */ | |
187 | fclose(inf); | |
188 | } | |
189 | ||
190 | /* | |
191 | * This routine finds functions in C syntax and adds them | |
192 | * to the list. | |
193 | */ | |
194 | C_funcs() | |
195 | { | |
196 | ||
197 | reg char c, /* current input char */ | |
198 | *token, /* start of current token */ | |
199 | *tp; /* end of current token */ | |
200 | logical incom, /* T if inside a comment */ | |
201 | inquote, /* T if inside a quoted string */ | |
202 | inchar, /* T if inside a single char ' */ | |
203 | midtoken; /* T if in middle of token */ | |
204 | char *sp; /* current input char */ | |
205 | char tok[100]; | |
206 | long insub; /* level of "{}"s deep */ | |
207 | ||
208 | /* | |
209 | * init boolean flags, counters, and pointers | |
210 | */ | |
211 | ||
212 | number = gotone = midtoken = inquote = inchar = incom = FALSE; | |
213 | insub = 0L; | |
214 | sp = tp = token = line; | |
215 | #ifdef DEBUG | |
216 | printf(" t s c m q c g n\n"); | |
217 | printf(" s t k u o i u h o u\n"); | |
218 | printf(" c p p n b m d o r t m\n"); | |
219 | #endif | |
220 | while ((*sp=c=getc(inf)) != EOF) { | |
221 | #ifdef DEBUG | |
222 | printf("%2.2s: ",unctrl(c)); | |
223 | printf("%2.2s ",unctrl(*sp)); | |
224 | printf("%2.2s ",unctrl(*tp)); | |
225 | printf("%2.2s ",unctrl(*token)); | |
226 | printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); | |
227 | #endif | |
228 | /* | |
229 | * action based on mixture of character type, *sp, | |
230 | * and logical flags | |
231 | */ | |
232 | ||
233 | if (c == '\\') { | |
234 | c = *++sp = getc(inf); | |
235 | /* | |
236 | * Handling of backslash is very naive. | |
237 | * We do, however, turn escaped newlines | |
238 | * into spaces. | |
239 | */ | |
240 | if (c = '\n') | |
241 | c = ' '; | |
242 | } | |
243 | else if (incom) { | |
244 | if (c == '*') { | |
245 | while ((*++sp=c=getc(inf)) == '*') { | |
246 | #ifdef DEBUG | |
247 | printf("%2.2s- ",unctrl(c)); | |
248 | printf("%2.2s ",unctrl(*sp)); | |
249 | printf("%2.2s ",unctrl(*tp)); | |
250 | printf("%2.2s ",unctrl(*token)); | |
251 | printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); | |
252 | #endif | |
253 | continue; | |
254 | } | |
255 | #ifdef DEBUG | |
256 | printf("%2.2s- ",unctrl(c)); | |
257 | printf("%2.2s ",unctrl(*sp)); | |
258 | printf("%2.2s ",unctrl(*tp)); | |
259 | printf("%2.2s ",unctrl(*token)); | |
260 | printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); | |
261 | #endif | |
262 | if (c == '/') | |
263 | incom = FALSE; | |
264 | } | |
265 | } | |
266 | else if (inquote) { | |
267 | /* | |
268 | * Too dumb to know about \" not being magic, but | |
269 | * they usually occur in pairs anyway. | |
270 | */ | |
271 | if ( c == '"' ) | |
272 | inquote = FALSE; | |
273 | continue; | |
274 | } | |
275 | else if (inchar) { | |
276 | if ( c == '\'' ) | |
277 | inchar = FALSE; | |
278 | continue; | |
279 | } | |
280 | else if (c == '"') | |
281 | inquote = TRUE; | |
282 | else if (c == '\'') | |
283 | inchar = TRUE; | |
284 | else if (c == '/') | |
285 | if ((*++sp=c=getc(inf)) == '*') | |
286 | incom = TRUE; | |
287 | else | |
288 | ungetc(*sp,inf); | |
289 | else if (c == '#' && sp == line) | |
290 | number = TRUE; | |
291 | else if (c == '{') | |
292 | insub++; | |
293 | else if (c == '}') | |
294 | if (sp == line) | |
295 | /* | |
296 | * Kludge to get back in sync after getting confused. | |
297 | * We really shouldn't be looking at indenting style, | |
298 | * but tricking with the preprocessor can get us off, | |
299 | * and most people indent this way anyway. | |
300 | * This resets level of indenting to zero if '}' is | |
301 | * found at beginning of line. | |
302 | */ | |
303 | insub = 0; | |
304 | else | |
305 | insub--; | |
306 | else if (!insub && !inquote && !inchar && !gotone) { | |
307 | if (midtoken) { | |
308 | if (endtoken(c)) { | |
309 | if (start_func(&sp,token,tp)) { | |
310 | strncpy(tok,token,tp-token+1); | |
311 | tok[tp-token+1] = 0; | |
312 | add_func(tok); | |
313 | gotone = TRUE; | |
314 | } | |
315 | midtoken = FALSE; | |
316 | token = sp; | |
317 | } | |
318 | else if (intoken(c)) | |
319 | tp++; | |
320 | } | |
321 | else if (begtoken(c)) { | |
322 | token = tp = sp; | |
323 | midtoken = TRUE; | |
324 | } | |
325 | } | |
326 | ||
327 | /* | |
328 | * move on to next char, and set flags accordingly | |
329 | */ | |
330 | ||
331 | sp++; | |
332 | if (c == '\n') { | |
333 | tp = token = sp = line; | |
334 | lineftell = ftell( inf ); | |
335 | #ifdef DEBUG | |
336 | printf("lineftell saved as %ld\n",lineftell); | |
337 | #endif | |
338 | number = gotone = midtoken = inquote = inchar = FALSE; | |
339 | lineno++; | |
340 | } | |
341 | } | |
342 | } | |
343 | ||
344 | /* | |
345 | * This routine checks to see if the current token is | |
346 | * at the start of a function. It updates the input line | |
347 | * so that the '(' will be in it when it returns. | |
348 | */ | |
349 | start_func(lp,token,tp) | |
350 | char **lp,*token,*tp; | |
351 | { | |
352 | ||
353 | reg char c,*sp,*tsp; | |
354 | static logical found; | |
355 | logical firsttok; /* T if have seen first token in ()'s */ | |
356 | int bad; | |
357 | ||
358 | sp = *lp; | |
359 | c = *sp; | |
360 | bad = FALSE; | |
361 | if (!number) /* space is not allowed in macro defs */ | |
362 | while (iswhite(c)) { | |
363 | *++sp = c = getc(inf); | |
364 | #ifdef DEBUG | |
365 | printf("%2.2s:\n",unctrl(c)); | |
366 | #endif | |
367 | } | |
368 | /* the following tries to make it so that a #define a b(c) */ | |
369 | /* doesn't count as a define of b. */ | |
370 | else { | |
371 | logical define; | |
372 | ||
373 | define = TRUE; | |
374 | for (tsp = "define"; *tsp && token < tp; tsp++) | |
375 | if (*tsp != *token++) { | |
376 | define = FALSE; | |
377 | break; | |
378 | } | |
379 | if (define) | |
380 | found = 0; | |
381 | else | |
382 | found++; | |
383 | if (found >= 2) { | |
384 | gotone = TRUE; | |
385 | badone: bad = TRUE; | |
386 | goto ret; | |
387 | } | |
388 | } | |
389 | if (c != '(') | |
390 | goto badone; | |
391 | firsttok = FALSE; | |
392 | while ((*++sp=c=getc(inf)) != ')') { | |
393 | /* | |
394 | * This line used to confuse ctags: | |
395 | * int (*oldhup)(); | |
396 | * This fixes it. A nonwhite char before the first | |
397 | * token, other than a / (in case of a comment in there) | |
398 | * makes this not a declaration. | |
399 | */ | |
400 | if (begtoken(c) || c=='/') firsttok++; | |
401 | else if (!iswhite(c) && !firsttok) goto badone; | |
402 | #ifdef DEBUG | |
403 | printf("%2.2s:\n",unctrl(c)); | |
404 | #endif | |
405 | } | |
406 | #ifdef DEBUG | |
407 | printf("%2.2s:\n",unctrl(c)); | |
408 | #endif | |
409 | while (iswhite(*++sp=c=getc(inf))) | |
410 | #ifdef DEBUG | |
411 | printf("%2.2s:\n",unctrl(c)) | |
412 | #endif | |
413 | ; | |
414 | #ifdef DEBUG | |
415 | printf("%2.2s:\n",unctrl(c)); | |
416 | #endif | |
417 | ret: | |
418 | *lp = --sp; | |
419 | ungetc(c,inf); | |
420 | return !bad && isgood(c); | |
421 | } | |
422 | ||
423 | /* | |
424 | * This routine adds a function to the list | |
425 | */ | |
426 | add_func(token) | |
427 | char *token; | |
428 | { | |
429 | reg char *fp,*pp; | |
430 | reg NODE *np; | |
431 | ||
432 | if ((np = (NODE *) calloc(1,sizeof (NODE))) == NULL) { | |
433 | printf("too many functions to sort\n"); | |
434 | put_funcs(head); | |
435 | free_tree(head); | |
436 | head = np = (NODE *) calloc(1,sizeof (NODE)); | |
437 | } | |
438 | if (strcmp(token,"main") == 0) { | |
439 | /* | |
440 | * Since there are so many directories with lots of | |
441 | * misc. complete programs in them, main tends to get | |
442 | * redefined a lot. So we change all mains to instead | |
443 | * refer to the name of the file, without leading | |
444 | * pathname components and without a trailing .c. | |
445 | */ | |
446 | fp = curfile; | |
447 | for (pp=curfile; *pp; pp++) | |
448 | if (*pp == '/') | |
449 | fp = pp+1; | |
450 | *token = 'M'; | |
451 | strcpy(token+1, fp); | |
452 | pp = &token[strlen(token)-2]; | |
453 | if (*pp == '.') | |
454 | *pp = 0; | |
455 | } | |
456 | fp = np->func = (char *) calloc(strlen(token)+1,sizeof (char)); | |
457 | np->file = curfile; | |
458 | strcpy(fp, token); | |
459 | { /* | |
460 | * this change to make the whole line the pattern | |
461 | */ | |
462 | long saveftell = ftell( inf ); | |
463 | int patlen; | |
464 | char ch; | |
465 | ||
466 | patlen = 0; | |
467 | fseek( inf , lineftell , 0 ); | |
468 | #ifdef DEBUG | |
469 | printf("saveftell=%ld, lseek back to %ld\n",saveftell,lineftell); | |
470 | #endif | |
471 | ch = getc( inf ); | |
472 | while ( ch != '\n' && ch != searchar && patlen < MAXPATTERN ) { | |
473 | patlen ++; | |
474 | ch = getc( inf ); | |
475 | } | |
476 | pp = np -> pat = (char *) calloc( patlen + 2 , sizeof( char ) ); | |
477 | fseek( inf , lineftell , 0 ); | |
478 | ch = getc( inf ); | |
479 | while ( patlen -- ) { | |
480 | *pp ++ = ch; | |
481 | ch = getc( inf ); | |
482 | } | |
483 | if ( ch == '\n' ) | |
484 | *pp ++ = '$'; | |
485 | *pp = '\0'; | |
486 | fseek( inf , saveftell , 0 ); | |
487 | #ifdef DEBUG | |
488 | printf("seek back to %ld, ftell is now %ld\n",saveftell,ftell(inf)); | |
489 | #endif | |
490 | } | |
491 | #ifdef DEBUG | |
492 | printf("\"%s\"\t\"%s\"\t\"%s\"\n",np->func,np->file,np->pat); | |
493 | #endif | |
494 | if (head == NULL) | |
495 | head = np; | |
496 | else | |
497 | add_node(np,head); | |
498 | } | |
499 | ||
500 | /* | |
501 | * This routine cfrees the entire tree from the node down. | |
502 | */ | |
503 | free_tree(node) | |
504 | NODE *node; | |
505 | { | |
506 | ||
507 | while (node) { | |
508 | free_tree(node->right); | |
509 | cfree(node); | |
510 | node = node->left; | |
511 | } | |
512 | } | |
513 | ||
514 | /* | |
515 | * This routine finds the node where the new function node | |
516 | * should be added. | |
517 | */ | |
518 | add_node(node,cur_node) | |
519 | NODE *node,*cur_node; | |
520 | { | |
521 | ||
522 | reg int dif; | |
523 | ||
524 | dif = strcmp(node->func,cur_node->func); | |
525 | #ifdef DEBUG | |
526 | printf("strcmp(\"%s\",\"%s\") == %d\n",node->func,cur_node->func,dif); | |
527 | #endif | |
528 | if (dif == 0) { | |
529 | if (node->file == cur_node->file) { | |
530 | if (!wflag) { | |
531 | fprintf(stderr,"Duplicate function in file \"%s\", line %d: %s\n",node->file,lineno,node->func); | |
532 | fprintf(stderr,"Second entry ignored\n"); | |
533 | } | |
534 | return; | |
535 | } | |
536 | else { | |
537 | if (!cur_node->been_warned) | |
538 | if (!wflag) | |
539 | fprintf(stderr,"Duplicate function name in files %s and %s: %s (Warning only)\n", | |
540 | node->file, cur_node->file, node->func); | |
541 | cur_node->been_warned = TRUE; | |
542 | } | |
543 | } | |
544 | if (dif < 0) | |
545 | if (cur_node->left != NULL) | |
546 | add_node(node,cur_node->left); | |
547 | else { | |
548 | #ifdef DEBUG | |
549 | printf("adding to left branch\n"); | |
550 | #endif | |
551 | cur_node->left = node; | |
552 | } | |
553 | else | |
554 | if (cur_node->right != NULL) | |
555 | add_node(node,cur_node->right); | |
556 | else { | |
557 | #ifdef DEBUG | |
558 | printf("adding to right branch\n"); | |
559 | #endif | |
560 | cur_node->right = node; | |
561 | } | |
562 | } | |
563 | ||
564 | /* | |
565 | * This routine puts the functions in the file. | |
566 | */ | |
567 | put_funcs(node) | |
568 | NODE *node; | |
569 | { | |
570 | ||
571 | if (node == NULL) | |
572 | return; | |
573 | put_funcs(node->left); | |
574 | fprintf(outf,"%s\t%s\t%c^%s%c\n",node->func,node->file | |
575 | ,searchar,node->pat,searchar); | |
576 | put_funcs(node->right); | |
577 | } | |
578 | ||
579 | #ifdef DEBUG | |
580 | char * | |
581 | unctrl(c) | |
582 | char c; | |
583 | { | |
584 | static char buf[3]; | |
585 | if (c>=' ' && c<='~') { | |
586 | buf[0] = c; | |
587 | buf[1] = 0; | |
588 | } else if (c > '~') { | |
589 | buf[0] = '^'; | |
590 | buf[1] = '?'; | |
591 | buf[2] = 0; | |
592 | } else if (c < 0) { | |
593 | buf[0] = buf[1] = '?'; | |
594 | buf[2] = 0; | |
595 | } else { | |
596 | buf[0] = '\\'; | |
597 | buf[2] = 0; | |
598 | switch(c) { | |
599 | case '\b': | |
600 | buf[1] = 'b'; | |
601 | break; | |
602 | case '\t': | |
603 | buf[1] = 't'; | |
604 | break; | |
605 | case '\n': | |
606 | buf[1] = 'n'; | |
607 | break; | |
608 | default: | |
609 | buf[0] = '^'; | |
610 | buf[1] = c + 64; | |
611 | } | |
612 | } | |
613 | return(buf); | |
614 | } | |
615 | #endif |