prettyness police
[unix-history] / usr / src / usr.bin / ctags / C.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1987, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 */
7
8#ifndef lint
9static char sccsid[] = "@(#)C.c 8.3 (Berkeley) %G%";
10#endif /* not lint */
11
12#include <limits.h>
13#include <stdio.h>
14#include <string.h>
15
16#include "ctags.h"
17
18static int func_entry __P((void));
19static void hash_entry __P((void));
20static void skip_string __P((int));
21static int str_entry __P((int));
22
23/*
24 * c_entries --
25 * read .c and .h files and call appropriate routines
26 */
27void
28c_entries()
29{
30 int c; /* current character */
31 int level; /* brace level */
32 int token; /* if reading a token */
33 int t_def; /* if reading a typedef */
34 int t_level; /* typedef's brace level */
35 char *sp; /* buffer pointer */
36 char tok[MAXTOKEN]; /* token buffer */
37
38 lineftell = ftell(inf);
39 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
40 while (GETC(!=, EOF)) {
41 switch (c) {
42 /*
43 * Here's where it DOESN'T handle: {
44 * foo(a)
45 * {
46 * #ifdef notdef
47 * }
48 * #endif
49 * if (a)
50 * puts("hello, world");
51 * }
52 */
53 case '{':
54 ++level;
55 goto endtok;
56 case '}':
57 /*
58 * if level goes below zero, try and fix
59 * it, even though we've already messed up
60 */
61 if (--level < 0)
62 level = 0;
63 goto endtok;
64
65 case '\n':
66 SETLINE;
67 /*
68 * the above 3 cases are similar in that they
69 * are special characters that also end tokens.
70 */
71 endtok: if (sp > tok) {
72 *sp = EOS;
73 token = YES;
74 sp = tok;
75 }
76 else
77 token = NO;
78 continue;
79
80 /*
81 * We ignore quoted strings and character constants
82 * completely.
83 */
84 case '"':
85 case '\'':
86 (void)skip_string(c);
87 break;
88
89 /*
90 * comments can be fun; note the state is unchanged after
91 * return, in case we found:
92 * "foo() XX comment XX { int bar; }"
93 */
94 case '/':
95 if (GETC(==, '*')) {
96 skip_comment();
97 continue;
98 }
99 (void)ungetc(c, inf);
100 c = '/';
101 goto storec;
102
103 /* hash marks flag #define's. */
104 case '#':
105 if (sp == tok) {
106 hash_entry();
107 break;
108 }
109 goto storec;
110
111 /*
112 * if we have a current token, parenthesis on
113 * level zero indicates a function.
114 */
115 case '(':
116 if (!level && token) {
117 int curline;
118
119 if (sp != tok)
120 *sp = EOS;
121 /*
122 * grab the line immediately, we may
123 * already be wrong, for example,
124 * foo\n
125 * (arg1,
126 */
127 getline();
128 curline = lineno;
129 if (func_entry()) {
130 ++level;
131 pfnote(tok, curline);
132 }
133 break;
134 }
135 goto storec;
136
137 /*
138 * semi-colons indicate the end of a typedef; if we find a
139 * typedef we search for the next semi-colon of the same
140 * level as the typedef. Ignoring "structs", they are
141 * tricky, since you can find:
142 *
143 * "typedef long time_t;"
144 * "typedef unsigned int u_int;"
145 * "typedef unsigned int u_int [10];"
146 *
147 * If looking at a typedef, we save a copy of the last token
148 * found. Then, when we find the ';' we take the current
149 * token if it starts with a valid token name, else we take
150 * the one we saved. There's probably some reasonable
151 * alternative to this...
152 */
153 case ';':
154 if (t_def && level == t_level) {
155 t_def = NO;
156 getline();
157 if (sp != tok)
158 *sp = EOS;
159 pfnote(tok, lineno);
160 break;
161 }
162 goto storec;
163
164 /*
165 * store characters until one that can't be part of a token
166 * comes along; check the current token against certain
167 * reserved words.
168 */
169 default:
170 storec: if (!intoken(c)) {
171 if (sp == tok)
172 break;
173 *sp = EOS;
174 if (tflag) {
175 /* no typedefs inside typedefs */
176 if (!t_def &&
177 !memcmp(tok, "typedef",8)) {
178 t_def = YES;
179 t_level = level;
180 break;
181 }
182 /* catch "typedef struct" */
183 if ((!t_def || t_level < level)
184 && (!memcmp(tok, "struct", 7)
185 || !memcmp(tok, "union", 6)
186 || !memcmp(tok, "enum", 5))) {
187 /*
188 * get line immediately;
189 * may change before '{'
190 */
191 getline();
192 if (str_entry(c))
193 ++level;
194 break;
195 /* } */
196 }
197 }
198 sp = tok;
199 }
200 else if (sp != tok || begtoken(c)) {
201 *sp++ = c;
202 token = YES;
203 }
204 continue;
205 }
206
207 sp = tok;
208 token = NO;
209 }
210}
211
212/*
213 * func_entry --
214 * handle a function reference
215 */
216static int
217func_entry()
218{
219 int c; /* current character */
220 int level = 0; /* for matching '()' */
221
222 /*
223 * Find the end of the assumed function declaration.
224 * Note that ANSI C functions can have type definitions so keep
225 * track of the parentheses nesting level.
226 */
227 while (GETC(!=, EOF)) {
228 switch (c) {
229 case '\'':
230 case '"':
231 /* skip strings and character constants */
232 skip_string(c);
233 break;
234 case '/':
235 /* skip comments */
236 if (GETC(==, '*'))
237 skip_comment();
238 break;
239 case '(':
240 level++;
241 break;
242 case ')':
243 if (level == 0)
244 goto fnd;
245 level--;
246 break;
247 case '\n':
248 SETLINE;
249 }
250 }
251 return (NO);
252fnd:
253 /*
254 * we assume that the character after a function's right paren
255 * is a token character if it's a function and a non-token
256 * character if it's a declaration. Comments don't count...
257 */
258 for (;;) {
259 while (GETC(!=, EOF) && iswhite(c))
260 if (c == '\n')
261 SETLINE;
262 if (intoken(c) || c == '{')
263 break;
264 if (c == '/' && GETC(==, '*'))
265 skip_comment();
266 else { /* don't ever "read" '/' */
267 (void)ungetc(c, inf);
268 return (NO);
269 }
270 }
271 if (c != '{')
272 (void)skip_key('{');
273 return (YES);
274}
275
276/*
277 * hash_entry --
278 * handle a line starting with a '#'
279 */
280static void
281hash_entry()
282{
283 int c; /* character read */
284 int curline; /* line started on */
285 char *sp; /* buffer pointer */
286 char tok[MAXTOKEN]; /* storage buffer */
287
288 curline = lineno;
289 for (sp = tok;;) { /* get next token */
290 if (GETC(==, EOF))
291 return;
292 if (iswhite(c))
293 break;
294 *sp++ = c;
295 }
296 *sp = EOS;
297 if (memcmp(tok, "define", 6)) /* only interested in #define's */
298 goto skip;
299 for (;;) { /* this doesn't handle "#define \n" */
300 if (GETC(==, EOF))
301 return;
302 if (!iswhite(c))
303 break;
304 }
305 for (sp = tok;;) { /* get next token */
306 *sp++ = c;
307 if (GETC(==, EOF))
308 return;
309 /*
310 * this is where it DOESN'T handle
311 * "#define \n"
312 */
313 if (!intoken(c))
314 break;
315 }
316 *sp = EOS;
317 if (dflag || c == '(') { /* only want macros */
318 getline();
319 pfnote(tok, curline);
320 }
321skip: if (c == '\n') { /* get rid of rest of define */
322 SETLINE
323 if (*(sp - 1) != '\\')
324 return;
325 }
326 (void)skip_key('\n');
327}
328
329/*
330 * str_entry --
331 * handle a struct, union or enum entry
332 */
333static int
334str_entry(c)
335 int c; /* current character */
336{
337 int curline; /* line started on */
338 char *sp; /* buffer pointer */
339 char tok[LINE_MAX]; /* storage buffer */
340
341 curline = lineno;
342 while (iswhite(c))
343 if (GETC(==, EOF))
344 return (NO);
345 if (c == '{') /* it was "struct {" */
346 return (YES);
347 for (sp = tok;;) { /* get next token */
348 *sp++ = c;
349 if (GETC(==, EOF))
350 return (NO);
351 if (!intoken(c))
352 break;
353 }
354 switch (c) {
355 case '{': /* it was "struct foo{" */
356 --sp;
357 break;
358 case '\n': /* it was "struct foo\n" */
359 SETLINE;
360 /*FALLTHROUGH*/
361 default: /* probably "struct foo " */
362 while (GETC(!=, EOF))
363 if (!iswhite(c))
364 break;
365 if (c != '{') {
366 (void)ungetc(c, inf);
367 return (NO);
368 }
369 }
370 *sp = EOS;
371 pfnote(tok, curline);
372 return (YES);
373}
374
375/*
376 * skip_comment --
377 * skip over comment
378 */
379void
380skip_comment()
381{
382 int c; /* character read */
383 int star; /* '*' flag */
384
385 for (star = 0; GETC(!=, EOF);)
386 switch(c) {
387 /* comments don't nest, nor can they be escaped. */
388 case '*':
389 star = YES;
390 break;
391 case '/':
392 if (star)
393 return;
394 break;
395 case '\n':
396 SETLINE;
397 /*FALLTHROUGH*/
398 default:
399 star = NO;
400 break;
401 }
402}
403
404/*
405 * skip_string --
406 * skip to the end of a string or character constant.
407 */
408void
409skip_string(key)
410 int key;
411{
412 int c,
413 skip;
414
415 for (skip = NO; GETC(!=, EOF); )
416 switch (c) {
417 case '\\': /* a backslash escapes anything */
418 skip = !skip; /* we toggle in case it's "\\" */
419 break;
420 case '\n':
421 SETLINE;
422 /*FALLTHROUGH*/
423 default:
424 if (c == key && !skip)
425 return;
426 skip = NO;
427 }
428}
429
430/*
431 * skip_key --
432 * skip to next char "key"
433 */
434int
435skip_key(key)
436 int key;
437{
438 int c,
439 skip,
440 retval;
441
442 for (skip = retval = NO; GETC(!=, EOF);)
443 switch(c) {
444 case '\\': /* a backslash escapes anything */
445 skip = !skip; /* we toggle in case it's "\\" */
446 break;
447 case ';': /* special case for yacc; if one */
448 case '|': /* of these chars occurs, we may */
449 retval = YES; /* have moved out of the rule */
450 break; /* not used by C */
451 case '\'':
452 case '"':
453 /* skip strings and character constants */
454 skip_string(c);
455 break;
456 case '/':
457 /* skip comments */
458 if (GETC(==, '*')) {
459 skip_comment();
460 break;
461 }
462 (void)ungetc(c, inf);
463 c = '/';
464 goto norm;
465 case '\n':
466 SETLINE;
467 /*FALLTHROUGH*/
468 default:
469 norm:
470 if (c == key && !skip)
471 return (retval);
472 skip = NO;
473 }
474 return (retval);
475}