386BSD 0.1 development
[unix-history] / usr / src / usr.bin / ctags / C.c
CommitLineData
d9e9dbd6
WJ
1/*
2 * Copyright (c) 1987 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char sccsid[] = "@(#)C.c 5.5 (Berkeley) 2/26/91";
36#endif /* not lint */
37
38#include <stdio.h>
39#include <string.h>
40#include "ctags.h"
41
42static int func_entry(), str_entry();
43static void hash_entry();
44
45/*
46 * c_entries --
47 * read .c and .h files and call appropriate routines
48 */
49c_entries()
50{
51 extern int tflag; /* -t: create tags for typedefs */
52 register int c, /* current character */
53 level; /* brace level */
54 register char *sp; /* buffer pointer */
55 int token, /* if reading a token */
56 t_def, /* if reading a typedef */
57 t_level; /* typedef's brace level */
58 char tok[MAXTOKEN]; /* token buffer */
59
60 lineftell = ftell(inf);
61 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
62 while (GETC(!=,EOF)) {
63
64 switch ((char)c) {
65 /*
66 * Here's where it DOESN'T handle:
67 * foo(a)
68 * {
69 * #ifdef notdef
70 * }
71 * #endif
72 * if (a)
73 * puts("hello, world");
74 * }
75 */
76 case '{':
77 ++level;
78 goto endtok;
79 case '}':
80 /*
81 * if level goes below zero, try and fix
82 * it, even though we've already messed up
83 */
84 if (--level < 0)
85 level = 0;
86 goto endtok;
87
88 case '\n':
89 SETLINE;
90 /*
91 * the above 3 cases are similar in that they
92 * are special characters that also end tokens.
93 */
94endtok: if (sp > tok) {
95 *sp = EOS;
96 token = YES;
97 sp = tok;
98 }
99 else
100 token = NO;
101 continue;
102
103 /* we ignore quoted strings and comments in their entirety */
104 case '"':
105 case '\'':
106 (void)skip_key(c);
107 break;
108
109 /*
110 * comments can be fun; note the state is unchanged after
111 * return, in case we found:
112 * "foo() XX comment XX { int bar; }"
113 */
114 case '/':
115 if (GETC(==,'*')) {
116 skip_comment();
117 continue;
118 }
119 (void)ungetc(c,inf);
120 c = '/';
121 goto storec;
122
123 /* hash marks flag #define's. */
124 case '#':
125 if (sp == tok) {
126 hash_entry();
127 break;
128 }
129 goto storec;
130
131 /*
132 * if we have a current token, parenthesis on
133 * level zero indicates a function.
134 */
135 case '(':
136 if (!level && token) {
137 int curline;
138
139 if (sp != tok)
140 *sp = EOS;
141 /*
142 * grab the line immediately, we may
143 * already be wrong, for example,
144 * foo\n
145 * (arg1,
146 */
147 getline();
148 curline = lineno;
149 if (func_entry()) {
150 ++level;
151 pfnote(tok,curline);
152 }
153 break;
154 }
155 goto storec;
156
157 /*
158 * semi-colons indicate the end of a typedef; if we find a
159 * typedef we search for the next semi-colon of the same
160 * level as the typedef. Ignoring "structs", they are
161 * tricky, since you can find:
162 *
163 * "typedef long time_t;"
164 * "typedef unsigned int u_int;"
165 * "typedef unsigned int u_int [10];"
166 *
167 * If looking at a typedef, we save a copy of the last token
168 * found. Then, when we find the ';' we take the current
169 * token if it starts with a valid token name, else we take
170 * the one we saved. There's probably some reasonable
171 * alternative to this...
172 */
173 case ';':
174 if (t_def && level == t_level) {
175 t_def = NO;
176 getline();
177 if (sp != tok)
178 *sp = EOS;
179 pfnote(tok,lineno);
180 break;
181 }
182 goto storec;
183
184 /*
185 * store characters until one that can't be part of a token
186 * comes along; check the current token against certain
187 * reserved words.
188 */
189 default:
190storec: if (!intoken(c)) {
191 if (sp == tok)
192 break;
193 *sp = EOS;
194 if (tflag) {
195 /* no typedefs inside typedefs */
196 if (!t_def && !bcmp(tok,"typedef",8)) {
197 t_def = YES;
198 t_level = level;
199 break;
200 }
201 /* catch "typedef struct" */
202 if ((!t_def || t_level < level)
203 && (!bcmp(tok,"struct",7)
204 || !bcmp(tok,"union",6)
205 || !bcmp(tok,"enum",5))) {
206 /*
207 * get line immediately;
208 * may change before '{'
209 */
210 getline();
211 if (str_entry(c))
212 ++level;
213 break;
214 }
215 }
216 sp = tok;
217 }
218 else if (sp != tok || begtoken(c)) {
219 *sp++ = c;
220 token = YES;
221 }
222 continue;
223 }
224 sp = tok;
225 token = NO;
226 }
227}
228
229/*
230 * func_entry --
231 * handle a function reference
232 */
233static
234func_entry()
235{
236 register int c; /* current character */
237
238 /*
239 * we assume that the character after a function's right paren
240 * is a token character if it's a function and a non-token
241 * character if it's a declaration. Comments don't count...
242 */
243 (void)skip_key((int)')');
244 for (;;) {
245 while (GETC(!=,EOF) && iswhite(c))
246 if (c == (int)'\n')
247 SETLINE;
248 if (intoken(c) || c == (int)'{')
249 break;
250 if (c == (int)'/' && GETC(==,'*'))
251 skip_comment();
252 else { /* don't ever "read" '/' */
253 (void)ungetc(c,inf);
254 return(NO);
255 }
256 }
257 if (c != (int)'{')
258 (void)skip_key((int)'{');
259 return(YES);
260}
261
262/*
263 * hash_entry --
264 * handle a line starting with a '#'
265 */
266static void
267hash_entry()
268{
269 extern int dflag; /* -d: non-macro defines */
270 register int c, /* character read */
271 curline; /* line started on */
272 register char *sp; /* buffer pointer */
273 char tok[MAXTOKEN]; /* storage buffer */
274
275 curline = lineno;
276 for (sp = tok;;) { /* get next token */
277 if (GETC(==,EOF))
278 return;
279 if (iswhite(c))
280 break;
281 *sp++ = c;
282 }
283 *sp = EOS;
284 if (bcmp(tok,"define",6)) /* only interested in #define's */
285 goto skip;
286 for (;;) { /* this doesn't handle "#define \n" */
287 if (GETC(==,EOF))
288 return;
289 if (!iswhite(c))
290 break;
291 }
292 for (sp = tok;;) { /* get next token */
293 *sp++ = c;
294 if (GETC(==,EOF))
295 return;
296 /*
297 * this is where it DOESN'T handle
298 * "#define \n"
299 */
300 if (!intoken(c))
301 break;
302 }
303 *sp = EOS;
304 if (dflag || c == (int)'(') { /* only want macros */
305 getline();
306 pfnote(tok,curline);
307 }
308skip: if (c == (int)'\n') { /* get rid of rest of define */
309 SETLINE
310 if (*(sp - 1) != '\\')
311 return;
312 }
313 (void)skip_key((int)'\n');
314}
315
316/*
317 * str_entry --
318 * handle a struct, union or enum entry
319 */
320static
321str_entry(c)
322 register int c; /* current character */
323{
324 register char *sp; /* buffer pointer */
325 int curline; /* line started on */
326 char tok[BUFSIZ]; /* storage buffer */
327
328 curline = lineno;
329 while (iswhite(c))
330 if (GETC(==,EOF))
331 return(NO);
332 if (c == (int)'{') /* it was "struct {" */
333 return(YES);
334 for (sp = tok;;) { /* get next token */
335 *sp++ = c;
336 if (GETC(==,EOF))
337 return(NO);
338 if (!intoken(c))
339 break;
340 }
341 switch ((char)c) {
342 case '{': /* it was "struct foo{" */
343 --sp;
344 break;
345 case '\n': /* it was "struct foo\n" */
346 SETLINE;
347 /*FALLTHROUGH*/
348 default: /* probably "struct foo " */
349 while (GETC(!=,EOF))
350 if (!iswhite(c))
351 break;
352 if (c != (int)'{') {
353 (void)ungetc(c, inf);
354 return(NO);
355 }
356 }
357 *sp = EOS;
358 pfnote(tok,curline);
359 return(YES);
360}
361
362/*
363 * skip_comment --
364 * skip over comment
365 */
366skip_comment()
367{
368 register int c, /* character read */
369 star; /* '*' flag */
370
371 for (star = 0;GETC(!=,EOF);)
372 switch((char)c) {
373 /* comments don't nest, nor can they be escaped. */
374 case '*':
375 star = YES;
376 break;
377 case '/':
378 if (star)
379 return;
380 break;
381 case '\n':
382 SETLINE;
383 /*FALLTHROUGH*/
384 default:
385 star = NO;
386 }
387}
388
389/*
390 * skip_key --
391 * skip to next char "key"
392 */
393skip_key(key)
394 register int key;
395{
396 register int c,
397 skip,
398 retval;
399
400 for (skip = retval = NO;GETC(!=,EOF);)
401 switch((char)c) {
402 case '\\': /* a backslash escapes anything */
403 skip = !skip; /* we toggle in case it's "\\" */
404 break;
405 case ';': /* special case for yacc; if one */
406 case '|': /* of these chars occurs, we may */
407 retval = YES; /* have moved out of the rule */
408 break; /* not used by C */
409 case '\n':
410 SETLINE;
411 /*FALLTHROUGH*/
412 default:
413 if (c == key && !skip)
414 return(retval);
415 skip = NO;
416 }
417 return(retval);
418}