Commit | Line | Data |
---|---|---|
bcb9ffff | 1 | /* |
374464e3 KB |
2 | * Copyright (c) 1987, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
083e16be | 4 | * |
836fe169 | 5 | * %sccs.include.redist.c% |
bcb9ffff KB |
6 | */ |
7 | ||
8 | #ifndef lint | |
d3acad8d | 9 | static char sccsid[] = "@(#)C.c 8.3 (Berkeley) %G%"; |
083e16be | 10 | #endif /* not lint */ |
bcb9ffff | 11 | |
d3acad8d | 12 | #include <limits.h> |
bcb9ffff | 13 | #include <stdio.h> |
6c2ce1d3 | 14 | #include <string.h> |
d3acad8d | 15 | |
6c2ce1d3 KB |
16 | #include "ctags.h" |
17 | ||
d3acad8d JSP |
18 | static int func_entry __P((void)); |
19 | static void hash_entry __P((void)); | |
20 | static void skip_string __P((int)); | |
21 | static int str_entry __P((int)); | |
bcb9ffff KB |
22 | |
23 | /* | |
24 | * c_entries -- | |
25 | * read .c and .h files and call appropriate routines | |
26 | */ | |
d3acad8d | 27 | void |
bcb9ffff KB |
28 | c_entries() |
29 | { | |
d3acad8d JSP |
30 | int c; /* current character */ |
31 | int level; /* brace level */ | |
32 | int token; /* if reading a token */ | |
33 | int t_def; /* if reading a typedef */ | |
34 | int t_level; /* typedef's brace level */ | |
35 | char *sp; /* buffer pointer */ | |
bcb9ffff KB |
36 | char tok[MAXTOKEN]; /* token buffer */ |
37 | ||
38 | lineftell = ftell(inf); | |
39 | sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; | |
d3acad8d JSP |
40 | while (GETC(!=, EOF)) { |
41 | switch (c) { | |
bcb9ffff | 42 | /* |
d3acad8d | 43 | * Here's where it DOESN'T handle: { |
bcb9ffff KB |
44 | * foo(a) |
45 | * { | |
46 | * #ifdef notdef | |
47 | * } | |
48 | * #endif | |
49 | * if (a) | |
50 | * puts("hello, world"); | |
51 | * } | |
52 | */ | |
53 | case '{': | |
54 | ++level; | |
55 | goto endtok; | |
56 | case '}': | |
57 | /* | |
58 | * if level goes below zero, try and fix | |
59 | * it, even though we've already messed up | |
60 | */ | |
61 | if (--level < 0) | |
62 | level = 0; | |
63 | goto endtok; | |
64 | ||
65 | case '\n': | |
66 | SETLINE; | |
67 | /* | |
68 | * the above 3 cases are similar in that they | |
69 | * are special characters that also end tokens. | |
70 | */ | |
d3acad8d | 71 | endtok: if (sp > tok) { |
bcb9ffff KB |
72 | *sp = EOS; |
73 | token = YES; | |
74 | sp = tok; | |
75 | } | |
76 | else | |
77 | token = NO; | |
78 | continue; | |
79 | ||
59a02ac4 KB |
80 | /* |
81 | * We ignore quoted strings and character constants | |
82 | * completely. | |
83 | */ | |
bcb9ffff KB |
84 | case '"': |
85 | case '\'': | |
59a02ac4 | 86 | (void)skip_string(c); |
bcb9ffff KB |
87 | break; |
88 | ||
89 | /* | |
90 | * comments can be fun; note the state is unchanged after | |
91 | * return, in case we found: | |
92 | * "foo() XX comment XX { int bar; }" | |
93 | */ | |
94 | case '/': | |
d3acad8d | 95 | if (GETC(==, '*')) { |
bcb9ffff KB |
96 | skip_comment(); |
97 | continue; | |
98 | } | |
d3acad8d | 99 | (void)ungetc(c, inf); |
bcb9ffff KB |
100 | c = '/'; |
101 | goto storec; | |
102 | ||
103 | /* hash marks flag #define's. */ | |
104 | case '#': | |
105 | if (sp == tok) { | |
106 | hash_entry(); | |
107 | break; | |
108 | } | |
109 | goto storec; | |
110 | ||
111 | /* | |
d3acad8d | 112 | * if we have a current token, parenthesis on |
bcb9ffff KB |
113 | * level zero indicates a function. |
114 | */ | |
115 | case '(': | |
116 | if (!level && token) { | |
117 | int curline; | |
118 | ||
119 | if (sp != tok) | |
120 | *sp = EOS; | |
121 | /* | |
122 | * grab the line immediately, we may | |
123 | * already be wrong, for example, | |
124 | * foo\n | |
125 | * (arg1, | |
126 | */ | |
127 | getline(); | |
128 | curline = lineno; | |
129 | if (func_entry()) { | |
130 | ++level; | |
d3acad8d | 131 | pfnote(tok, curline); |
bcb9ffff KB |
132 | } |
133 | break; | |
134 | } | |
135 | goto storec; | |
136 | ||
137 | /* | |
138 | * semi-colons indicate the end of a typedef; if we find a | |
139 | * typedef we search for the next semi-colon of the same | |
140 | * level as the typedef. Ignoring "structs", they are | |
141 | * tricky, since you can find: | |
142 | * | |
143 | * "typedef long time_t;" | |
144 | * "typedef unsigned int u_int;" | |
145 | * "typedef unsigned int u_int [10];" | |
146 | * | |
147 | * If looking at a typedef, we save a copy of the last token | |
148 | * found. Then, when we find the ';' we take the current | |
149 | * token if it starts with a valid token name, else we take | |
150 | * the one we saved. There's probably some reasonable | |
151 | * alternative to this... | |
152 | */ | |
153 | case ';': | |
154 | if (t_def && level == t_level) { | |
155 | t_def = NO; | |
156 | getline(); | |
157 | if (sp != tok) | |
158 | *sp = EOS; | |
d3acad8d | 159 | pfnote(tok, lineno); |
bcb9ffff KB |
160 | break; |
161 | } | |
162 | goto storec; | |
163 | ||
164 | /* | |
165 | * store characters until one that can't be part of a token | |
166 | * comes along; check the current token against certain | |
167 | * reserved words. | |
168 | */ | |
169 | default: | |
d3acad8d | 170 | storec: if (!intoken(c)) { |
bcb9ffff KB |
171 | if (sp == tok) |
172 | break; | |
173 | *sp = EOS; | |
174 | if (tflag) { | |
175 | /* no typedefs inside typedefs */ | |
d3acad8d JSP |
176 | if (!t_def && |
177 | !memcmp(tok, "typedef",8)) { | |
bcb9ffff KB |
178 | t_def = YES; |
179 | t_level = level; | |
180 | break; | |
181 | } | |
182 | /* catch "typedef struct" */ | |
183 | if ((!t_def || t_level < level) | |
d3acad8d JSP |
184 | && (!memcmp(tok, "struct", 7) |
185 | || !memcmp(tok, "union", 6) | |
186 | || !memcmp(tok, "enum", 5))) { | |
bcb9ffff KB |
187 | /* |
188 | * get line immediately; | |
189 | * may change before '{' | |
190 | */ | |
191 | getline(); | |
192 | if (str_entry(c)) | |
193 | ++level; | |
194 | break; | |
d3acad8d | 195 | /* } */ |
bcb9ffff KB |
196 | } |
197 | } | |
198 | sp = tok; | |
199 | } | |
200 | else if (sp != tok || begtoken(c)) { | |
201 | *sp++ = c; | |
202 | token = YES; | |
203 | } | |
204 | continue; | |
205 | } | |
d3acad8d | 206 | |
bcb9ffff KB |
207 | sp = tok; |
208 | token = NO; | |
209 | } | |
210 | } | |
211 | ||
212 | /* | |
213 | * func_entry -- | |
214 | * handle a function reference | |
215 | */ | |
59a02ac4 | 216 | static int |
bcb9ffff KB |
217 | func_entry() |
218 | { | |
d3acad8d JSP |
219 | int c; /* current character */ |
220 | int level = 0; /* for matching '()' */ | |
bcb9ffff | 221 | |
59a02ac4 KB |
222 | /* |
223 | * Find the end of the assumed function declaration. | |
224 | * Note that ANSI C functions can have type definitions so keep | |
225 | * track of the parentheses nesting level. | |
226 | */ | |
d3acad8d JSP |
227 | while (GETC(!=, EOF)) { |
228 | switch (c) { | |
59a02ac4 KB |
229 | case '\'': |
230 | case '"': | |
231 | /* skip strings and character constants */ | |
232 | skip_string(c); | |
233 | break; | |
234 | case '/': | |
235 | /* skip comments */ | |
d3acad8d | 236 | if (GETC(==, '*')) |
59a02ac4 KB |
237 | skip_comment(); |
238 | break; | |
239 | case '(': | |
240 | level++; | |
241 | break; | |
242 | case ')': | |
243 | if (level == 0) | |
244 | goto fnd; | |
245 | level--; | |
246 | break; | |
247 | case '\n': | |
248 | SETLINE; | |
249 | } | |
250 | } | |
d3acad8d | 251 | return (NO); |
59a02ac4 | 252 | fnd: |
bcb9ffff KB |
253 | /* |
254 | * we assume that the character after a function's right paren | |
255 | * is a token character if it's a function and a non-token | |
256 | * character if it's a declaration. Comments don't count... | |
257 | */ | |
bcb9ffff | 258 | for (;;) { |
d3acad8d JSP |
259 | while (GETC(!=, EOF) && iswhite(c)) |
260 | if (c == '\n') | |
bcb9ffff | 261 | SETLINE; |
d3acad8d | 262 | if (intoken(c) || c == '{') |
bcb9ffff | 263 | break; |
d3acad8d | 264 | if (c == '/' && GETC(==, '*')) |
bcb9ffff KB |
265 | skip_comment(); |
266 | else { /* don't ever "read" '/' */ | |
d3acad8d JSP |
267 | (void)ungetc(c, inf); |
268 | return (NO); | |
bcb9ffff KB |
269 | } |
270 | } | |
d3acad8d JSP |
271 | if (c != '{') |
272 | (void)skip_key('{'); | |
273 | return (YES); | |
bcb9ffff KB |
274 | } |
275 | ||
276 | /* | |
277 | * hash_entry -- | |
278 | * handle a line starting with a '#' | |
279 | */ | |
6c2ce1d3 | 280 | static void |
bcb9ffff KB |
281 | hash_entry() |
282 | { | |
d3acad8d JSP |
283 | int c; /* character read */ |
284 | int curline; /* line started on */ | |
285 | char *sp; /* buffer pointer */ | |
bcb9ffff KB |
286 | char tok[MAXTOKEN]; /* storage buffer */ |
287 | ||
288 | curline = lineno; | |
289 | for (sp = tok;;) { /* get next token */ | |
d3acad8d | 290 | if (GETC(==, EOF)) |
bcb9ffff KB |
291 | return; |
292 | if (iswhite(c)) | |
293 | break; | |
294 | *sp++ = c; | |
295 | } | |
296 | *sp = EOS; | |
d3acad8d | 297 | if (memcmp(tok, "define", 6)) /* only interested in #define's */ |
bcb9ffff KB |
298 | goto skip; |
299 | for (;;) { /* this doesn't handle "#define \n" */ | |
d3acad8d | 300 | if (GETC(==, EOF)) |
bcb9ffff KB |
301 | return; |
302 | if (!iswhite(c)) | |
303 | break; | |
304 | } | |
305 | for (sp = tok;;) { /* get next token */ | |
306 | *sp++ = c; | |
d3acad8d | 307 | if (GETC(==, EOF)) |
bcb9ffff KB |
308 | return; |
309 | /* | |
310 | * this is where it DOESN'T handle | |
311 | * "#define \n" | |
312 | */ | |
313 | if (!intoken(c)) | |
314 | break; | |
315 | } | |
316 | *sp = EOS; | |
d3acad8d | 317 | if (dflag || c == '(') { /* only want macros */ |
bcb9ffff | 318 | getline(); |
d3acad8d | 319 | pfnote(tok, curline); |
bcb9ffff | 320 | } |
d3acad8d | 321 | skip: if (c == '\n') { /* get rid of rest of define */ |
bcb9ffff KB |
322 | SETLINE |
323 | if (*(sp - 1) != '\\') | |
324 | return; | |
325 | } | |
d3acad8d | 326 | (void)skip_key('\n'); |
bcb9ffff KB |
327 | } |
328 | ||
329 | /* | |
330 | * str_entry -- | |
331 | * handle a struct, union or enum entry | |
332 | */ | |
d3acad8d | 333 | static int |
bcb9ffff | 334 | str_entry(c) |
d3acad8d | 335 | int c; /* current character */ |
bcb9ffff | 336 | { |
bcb9ffff | 337 | int curline; /* line started on */ |
d3acad8d JSP |
338 | char *sp; /* buffer pointer */ |
339 | char tok[LINE_MAX]; /* storage buffer */ | |
bcb9ffff KB |
340 | |
341 | curline = lineno; | |
342 | while (iswhite(c)) | |
d3acad8d JSP |
343 | if (GETC(==, EOF)) |
344 | return (NO); | |
345 | if (c == '{') /* it was "struct {" */ | |
346 | return (YES); | |
bcb9ffff KB |
347 | for (sp = tok;;) { /* get next token */ |
348 | *sp++ = c; | |
d3acad8d JSP |
349 | if (GETC(==, EOF)) |
350 | return (NO); | |
bcb9ffff KB |
351 | if (!intoken(c)) |
352 | break; | |
353 | } | |
d3acad8d | 354 | switch (c) { |
bcb9ffff KB |
355 | case '{': /* it was "struct foo{" */ |
356 | --sp; | |
357 | break; | |
358 | case '\n': /* it was "struct foo\n" */ | |
359 | SETLINE; | |
360 | /*FALLTHROUGH*/ | |
361 | default: /* probably "struct foo " */ | |
d3acad8d | 362 | while (GETC(!=, EOF)) |
bcb9ffff KB |
363 | if (!iswhite(c)) |
364 | break; | |
d3acad8d | 365 | if (c != '{') { |
4c5197ec | 366 | (void)ungetc(c, inf); |
d3acad8d | 367 | return (NO); |
4c5197ec | 368 | } |
bcb9ffff KB |
369 | } |
370 | *sp = EOS; | |
d3acad8d JSP |
371 | pfnote(tok, curline); |
372 | return (YES); | |
bcb9ffff KB |
373 | } |
374 | ||
375 | /* | |
376 | * skip_comment -- | |
377 | * skip over comment | |
378 | */ | |
d3acad8d | 379 | void |
bcb9ffff KB |
380 | skip_comment() |
381 | { | |
d3acad8d JSP |
382 | int c; /* character read */ |
383 | int star; /* '*' flag */ | |
bcb9ffff | 384 | |
d3acad8d JSP |
385 | for (star = 0; GETC(!=, EOF);) |
386 | switch(c) { | |
387 | /* comments don't nest, nor can they be escaped. */ | |
388 | case '*': | |
389 | star = YES; | |
390 | break; | |
391 | case '/': | |
392 | if (star) | |
393 | return; | |
394 | break; | |
395 | case '\n': | |
396 | SETLINE; | |
397 | /*FALLTHROUGH*/ | |
398 | default: | |
399 | star = NO; | |
400 | break; | |
bcb9ffff KB |
401 | } |
402 | } | |
403 | ||
59a02ac4 KB |
404 | /* |
405 | * skip_string -- | |
406 | * skip to the end of a string or character constant. | |
407 | */ | |
408 | void | |
409 | skip_string(key) | |
d3acad8d | 410 | int key; |
59a02ac4 | 411 | { |
d3acad8d JSP |
412 | int c, |
413 | skip; | |
59a02ac4 | 414 | |
d3acad8d JSP |
415 | for (skip = NO; GETC(!=, EOF); ) |
416 | switch (c) { | |
59a02ac4 KB |
417 | case '\\': /* a backslash escapes anything */ |
418 | skip = !skip; /* we toggle in case it's "\\" */ | |
419 | break; | |
420 | case '\n': | |
421 | SETLINE; | |
422 | /*FALLTHROUGH*/ | |
423 | default: | |
424 | if (c == key && !skip) | |
425 | return; | |
426 | skip = NO; | |
427 | } | |
428 | } | |
429 | ||
bcb9ffff KB |
430 | /* |
431 | * skip_key -- | |
432 | * skip to next char "key" | |
433 | */ | |
59a02ac4 | 434 | int |
bcb9ffff | 435 | skip_key(key) |
d3acad8d | 436 | int key; |
bcb9ffff | 437 | { |
d3acad8d JSP |
438 | int c, |
439 | skip, | |
440 | retval; | |
bcb9ffff | 441 | |
d3acad8d JSP |
442 | for (skip = retval = NO; GETC(!=, EOF);) |
443 | switch(c) { | |
bcb9ffff KB |
444 | case '\\': /* a backslash escapes anything */ |
445 | skip = !skip; /* we toggle in case it's "\\" */ | |
446 | break; | |
447 | case ';': /* special case for yacc; if one */ | |
448 | case '|': /* of these chars occurs, we may */ | |
449 | retval = YES; /* have moved out of the rule */ | |
450 | break; /* not used by C */ | |
59a02ac4 KB |
451 | case '\'': |
452 | case '"': | |
453 | /* skip strings and character constants */ | |
454 | skip_string(c); | |
455 | break; | |
456 | case '/': | |
457 | /* skip comments */ | |
d3acad8d | 458 | if (GETC(==, '*')) { |
59a02ac4 KB |
459 | skip_comment(); |
460 | break; | |
461 | } | |
d3acad8d | 462 | (void)ungetc(c, inf); |
59a02ac4 KB |
463 | c = '/'; |
464 | goto norm; | |
bcb9ffff KB |
465 | case '\n': |
466 | SETLINE; | |
467 | /*FALLTHROUGH*/ | |
468 | default: | |
59a02ac4 | 469 | norm: |
bcb9ffff | 470 | if (c == key && !skip) |
d3acad8d | 471 | return (retval); |
bcb9ffff KB |
472 | skip = NO; |
473 | } | |
d3acad8d | 474 | return (retval); |
bcb9ffff | 475 | } |