[unix-history] / usr / src / usr.bin / ctags / C.c

/*
 * Copyright (c) 1987 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char sccsid[] = "@(#)C.c	5.5 (Berkeley) 2/26/91";
#endif /* not lint */

#include <stdio.h>
#include <string.h>
#include "ctags.h"

static int func_entry(), str_entry();
static void hash_entry();

/*
 * c_entries --
 *	read .c and .h files and call appropriate routines
 */
c_entries()
{
	extern int	tflag;		/* -t: create tags for typedefs */
	register int	c,		/* current character */
			level;		/* brace level */
	register char	*sp;		/* buffer pointer */
	int	token,			/* if reading a token */
		t_def,			/* if reading a typedef */
		t_level;		/* typedef's brace level */
	char	tok[MAXTOKEN];		/* token buffer */

	lineftell = ftell(inf);
	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
	while (GETC(!=,EOF)) {

	switch ((char)c) {
		/*
		 * Here's where it DOESN'T handle:
		 *	foo(a)
		 *	{
		 *	#ifdef notdef
		 *		}
		 *	#endif
		 *		if (a)
		 *			puts("hello, world");
		 *	}
		 */
		case '{':
			++level;
			goto endtok;
		case '}':
			/*
			 * if level goes below zero, try and fix
			 * it, even though we've already messed up
			 */
			if (--level < 0)
				level = 0;
			goto endtok;

		case '\n':
			SETLINE;
			/*
			 * the above 3 cases are similar in that they
			 * are special characters that also end tokens.
			 */
endtok:			if (sp > tok) {
				*sp = EOS;
				token = YES;
				sp = tok;
			}
			else
				token = NO;
			continue;

		/* we ignore quoted strings and comments in their entirety */
		case '"':
		case '\'':
			(void)skip_key(c);
			break;

		/*
		 * comments can be fun; note the state is unchanged after
		 * return, in case we found:
		 *	"foo() XX comment XX { int bar; }"
		 */
		case '/':
			if (GETC(==,'*')) {
				skip_comment();
				continue;
			}
			(void)ungetc(c,inf);
			c = '/';
			goto storec;

		/* hash marks flag #define's. */
		case '#':
			if (sp == tok) {
				hash_entry();
				break;
			}
			goto storec;

		/*
	 	 * if we have a current token, parenthesis on
		 * level zero indicates a function.
		 */
		case '(':
			if (!level && token) {
				int	curline;

				if (sp != tok)
					*sp = EOS;
				/*
				 * grab the line immediately, we may
				 * already be wrong, for example,
				 *	foo\n
				 *	(arg1,
				 */
				getline();
				curline = lineno;
				if (func_entry()) {
					++level;
					pfnote(tok,curline);
				}
				break;
			}
			goto storec;

		/*
		 * semi-colons indicate the end of a typedef; if we find a
		 * typedef we search for the next semi-colon of the same
		 * level as the typedef.  Ignoring "structs", they are
		 * tricky, since you can find:
		 *
		 *	"typedef long time_t;"
		 *	"typedef unsigned int u_int;"
		 *	"typedef unsigned int u_int [10];"
		 *
		 * If looking at a typedef, we save a copy of the last token
		 * found.  Then, when we find the ';' we take the current
		 * token if it starts with a valid token name, else we take
		 * the one we saved.  There's probably some reasonable
		 * alternative to this...
		 */
		case ';':
			if (t_def && level == t_level) {
				t_def = NO;
				getline();
				if (sp != tok)
					*sp = EOS;
				pfnote(tok,lineno);
				break;
			}
			goto storec;

		/*
		 * store characters until one that can't be part of a token
		 * comes along; check the current token against certain
		 * reserved words.
		 */
		default:
storec:			if (!intoken(c)) {
				if (sp == tok)
					break;
				*sp = EOS;
				if (tflag) {
					/* no typedefs inside typedefs */
					if (!t_def && !bcmp(tok,"typedef",8)) {
						t_def = YES;
						t_level = level;
						break;
					}
					/* catch "typedef struct" */
					if ((!t_def || t_level < level)
					    && (!bcmp(tok,"struct",7)
					    || !bcmp(tok,"union",6)
					    || !bcmp(tok,"enum",5))) {
						/*
						 * get line immediately;
						 * may change before '{'
						 */
						getline();
						if (str_entry(c))
							++level;
						break;
					}
				}
				sp = tok;
			}
			else if (sp != tok || begtoken(c)) {
				*sp++ = c;
				token = YES;
			}
			continue;
		}
		sp = tok;
		token = NO;
	}
}

/*
 * func_entry --
 *	handle a function reference
 */
static
func_entry()
{
	register int	c;		/* current character */

	/*
	 * we assume that the character after a function's right paren
	 * is a token character if it's a function and a non-token
	 * character if it's a declaration.  Comments don't count...
	 */
	(void)skip_key((int)')');
	for (;;) {
		while (GETC(!=,EOF) && iswhite(c))
			if (c == (int)'\n')
				SETLINE;
		if (intoken(c) || c == (int)'{')
			break;
		if (c == (int)'/' && GETC(==,'*'))
			skip_comment();
		else {				/* don't ever "read" '/' */
			(void)ungetc(c,inf);
			return(NO);
		}
	}
	if (c != (int)'{')
		(void)skip_key((int)'{');
	return(YES);
}

/*
 * hash_entry --
 *	handle a line starting with a '#'
 */
static void
hash_entry()
{
	extern int	dflag;		/* -d: non-macro defines */
	register int	c,		/* character read */
			curline;	/* line started on */
	register char	*sp;		/* buffer pointer */
	char	tok[MAXTOKEN];		/* storage buffer */

	curline = lineno;
	for (sp = tok;;) {		/* get next token */
		if (GETC(==,EOF))
			return;
		if (iswhite(c))
			break;
		*sp++ = c;
	}
	*sp = EOS;
	if (bcmp(tok,"define",6))	/* only interested in #define's */
		goto skip;
	for (;;) {			/* this doesn't handle "#define \n" */
		if (GETC(==,EOF))
			return;
		if (!iswhite(c))
			break;
	}
	for (sp = tok;;) {		/* get next token */
		*sp++ = c;
		if (GETC(==,EOF))
			return;
		/*
		 * this is where it DOESN'T handle
		 * "#define \n"
		 */
		if (!intoken(c))
			break;
	}
	*sp = EOS;
	if (dflag || c == (int)'(') {	/* only want macros */
		getline();
		pfnote(tok,curline);
	}
skip:	if (c == (int)'\n') {		/* get rid of rest of define */
		SETLINE
		if (*(sp - 1) != '\\')
			return;
	}
	(void)skip_key((int)'\n');
}

/*
 * str_entry --
 *	handle a struct, union or enum entry
 */
static
str_entry(c)
	register int	c;		/* current character */
{
	register char	*sp;		/* buffer pointer */
	int	curline;		/* line started on */
	char	tok[BUFSIZ];		/* storage buffer */

	curline = lineno;
	while (iswhite(c))
		if (GETC(==,EOF))
			return(NO);
	if (c == (int)'{')		/* it was "struct {" */
		return(YES);
	for (sp = tok;;) {		/* get next token */
		*sp++ = c;
		if (GETC(==,EOF))
			return(NO);
		if (!intoken(c))
			break;
	}
	switch ((char)c) {
		case '{':		/* it was "struct foo{" */
			--sp;
			break;
		case '\n':		/* it was "struct foo\n" */
			SETLINE;
			/*FALLTHROUGH*/
		default:		/* probably "struct foo " */
			while (GETC(!=,EOF))
				if (!iswhite(c))
					break;
			if (c != (int)'{') {
				(void)ungetc(c, inf);
				return(NO);
			}
	}
	*sp = EOS;
	pfnote(tok,curline);
	return(YES);
}

/*
 * skip_comment --
 *	skip over comment
 */
skip_comment()
{
	register int	c,		/* character read */
			star;		/* '*' flag */

	for (star = 0;GETC(!=,EOF);)
		switch((char)c) {
			/* comments don't nest, nor can they be escaped. */
			case '*':
				star = YES;
				break;
			case '/':
				if (star)
					return;
				break;
			case '\n':
				SETLINE;
				/*FALLTHROUGH*/
			default:
				star = NO;
		}
}

/*
 * skip_key --
 *	skip to next char "key"
 */
skip_key(key)
	register int	key;
{
	register int	c,
			skip,
			retval;

	for (skip = retval = NO;GETC(!=,EOF);)
		switch((char)c) {
		case '\\':		/* a backslash escapes anything */
			skip = !skip;	/* we toggle in case it's "\\" */
			break;
		case ';':		/* special case for yacc; if one */
		case '|':		/* of these chars occurs, we may */
			retval = YES;	/* have moved out of the rule */
			break;		/* not used by C */
		case '\n':
			SETLINE;
			/*FALLTHROUGH*/
		default:
			if (c == key && !skip)
				return(retval);
			skip = NO;
		}
	return(retval);
}
Commit	Line	Data
d9e9dbd6 WJ	1	/*
	2	* Copyright (c) 1987 The Regents of the University of California.
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	* 3. All advertising materials mentioning features or use of this software
	14	* must display the following acknowledgement:
	15	* This product includes software developed by the University of
	16	* California, Berkeley and its contributors.
	17	* 4. Neither the name of the University nor the names of its contributors
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	22	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	25	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	27	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	28	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	29	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	30	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	31	* SUCH DAMAGE.
	32	*/
	33
	34	#ifndef lint
	35	static char sccsid[] = "@(#)C.c 5.5 (Berkeley) 2/26/91";
	36	#endif /* not lint */
	37
	38	#include <stdio.h>
	39	#include <string.h>
	40	#include "ctags.h"
	41
	42	static int func_entry(), str_entry();
	43	static void hash_entry();
	44
	45	/*
	46	* c_entries --
	47	* read .c and .h files and call appropriate routines
	48	*/
	49	c_entries()
	50	{
	51	extern int tflag; /* -t: create tags for typedefs */
	52	register int c, /* current character */
	53	level; /* brace level */
	54	register char sp; / buffer pointer */
	55	int token, /* if reading a token */
	56	t_def, /* if reading a typedef */
	57	t_level; /* typedef's brace level */
	58	char tok[MAXTOKEN]; /* token buffer */
	59
	60	lineftell = ftell(inf);
	61	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
	62	while (GETC(!=,EOF)) {
	63
	64	switch ((char)c) {
65	/*
66	* Here's where it DOESN'T handle:
67	* foo(a)
68	* {
69	* #ifdef notdef
70	* }
71	* #endif
72	* if (a)
73	* puts("hello, world");
74	* }
75	*/
76	case '{':
77	++level;
78	goto endtok;
79	case '}':
80	/*
81	* if level goes below zero, try and fix
82	* it, even though we've already messed up
83	*/
84	if (--level < 0)
85	level = 0;
86	goto endtok;
87
88	case '\n':
89	SETLINE;
90	/*
91	* the above 3 cases are similar in that they
92	* are special characters that also end tokens.
93	*/
94	endtok: if (sp > tok) {
95	*sp = EOS;
96	token = YES;
97	sp = tok;
98	}
99	else
100	token = NO;
101	continue;
102
103	/* we ignore quoted strings and comments in their entirety */
104	case '"':
105	case '\'':
106	(void)skip_key(c);
107	break;
108
109	/*
110	* comments can be fun; note the state is unchanged after
111	* return, in case we found:
112	* "foo() XX comment XX { int bar; }"
113	*/
114	case '/':
115	if (GETC(==,'*')) {
116	skip_comment();
117	continue;
118	}
119	(void)ungetc(c,inf);
120	c = '/';
121	goto storec;
122
123	/* hash marks flag #define's. */
124	case '#':
125	if (sp == tok) {
126	hash_entry();
127	break;
128	}
129	goto storec;
130
131	/*
132	* if we have a current token, parenthesis on
133	* level zero indicates a function.
134	*/
135	case '(':
136	if (!level && token) {
137	int curline;
138
139	if (sp != tok)
140	*sp = EOS;
141	/*
142	* grab the line immediately, we may
143	* already be wrong, for example,
144	* foo\n
145	* (arg1,
146	*/
147	getline();
148	curline = lineno;
149	if (func_entry()) {
150	++level;
151	pfnote(tok,curline);
152	}
153	break;
154	}
155	goto storec;
156
157	/*
158	* semi-colons indicate the end of a typedef; if we find a
159	* typedef we search for the next semi-colon of the same
160	* level as the typedef. Ignoring "structs", they are
161	* tricky, since you can find:
162	*
163	* "typedef long time_t;"
164	* "typedef unsigned int u_int;"
165	* "typedef unsigned int u_int [10];"
166	*
167	* If looking at a typedef, we save a copy of the last token
168	* found. Then, when we find the ';' we take the current
169	* token if it starts with a valid token name, else we take
170	* the one we saved. There's probably some reasonable
171	* alternative to this...
172	*/
173	case ';':
174	if (t_def && level == t_level) {
175	t_def = NO;
176	getline();
177	if (sp != tok)
178	*sp = EOS;
179	pfnote(tok,lineno);
180	break;
181	}
182	goto storec;
183
184	/*
185	* store characters until one that can't be part of a token
186	* comes along; check the current token against certain
187	* reserved words.
188	*/
189	default:
190	storec: if (!intoken(c)) {
191	if (sp == tok)
192	break;
193	*sp = EOS;
194	if (tflag) {
195	/* no typedefs inside typedefs */
196	if (!t_def && !bcmp(tok,"typedef",8)) {
197	t_def = YES;
198	t_level = level;
199	break;
200	}
201	/* catch "typedef struct" */
202	if ((!t_def \|\| t_level < level)
203	&& (!bcmp(tok,"struct",7)
204	\|\| !bcmp(tok,"union",6)
205	\|\| !bcmp(tok,"enum",5))) {
206	/*
207	* get line immediately;
208	* may change before '{'
209	*/
210	getline();
211	if (str_entry(c))
212	++level;
213	break;
214	}
215	}
216	sp = tok;
217	}
218	else if (sp != tok \|\| begtoken(c)) {
219	*sp++ = c;
220	token = YES;
221	}
222	continue;
223	}
224	sp = tok;
225	token = NO;
226	}
227	}
228
229	/*
230	* func_entry --
231	* handle a function reference
232	*/
233	static
234	func_entry()
235	{
236	register int c; /* current character */
237
238	/*
239	* we assume that the character after a function's right paren
240	* is a token character if it's a function and a non-token
241	* character if it's a declaration. Comments don't count...
242	*/
243	(void)skip_key((int)')');
244	for (;;) {
245	while (GETC(!=,EOF) && iswhite(c))
246	if (c == (int)'\n')
247	SETLINE;
248	if (intoken(c) \|\| c == (int)'{')
249	break;
250	if (c == (int)'/' && GETC(==,'*'))
251	skip_comment();
252	else { /* don't ever "read" '/' */
253	(void)ungetc(c,inf);
254	return(NO);
255	}
256	}
257	if (c != (int)'{')
258	(void)skip_key((int)'{');
259	return(YES);
260	}
261
262	/*
263	* hash_entry --
264	* handle a line starting with a '#'
265	*/
266	static void
267	hash_entry()
268	{
269	extern int dflag; /* -d: non-macro defines */
270	register int c, /* character read */
271	curline; /* line started on */
272	register char sp; / buffer pointer */
273	char tok[MAXTOKEN]; /* storage buffer */
274
275	curline = lineno;
276	for (sp = tok;;) { /* get next token */
277	if (GETC(==,EOF))
278	return;
279	if (iswhite(c))
280	break;
281	*sp++ = c;
282	}
283	*sp = EOS;
284	if (bcmp(tok,"define",6)) /* only interested in #define's */
285	goto skip;
286	for (;;) { /* this doesn't handle "#define \n" */
287	if (GETC(==,EOF))
288	return;
289	if (!iswhite(c))
290	break;
291	}
292	for (sp = tok;;) { /* get next token */
293	*sp++ = c;
294	if (GETC(==,EOF))
295	return;
296	/*
297	* this is where it DOESN'T handle
298	* "#define \n"
299	*/
300	if (!intoken(c))
301	break;
302	}
303	*sp = EOS;
304	if (dflag \|\| c == (int)'(') { /* only want macros */
305	getline();
306	pfnote(tok,curline);
307	}
308	skip: if (c == (int)'\n') { /* get rid of rest of define */
309	SETLINE
310	if (*(sp - 1) != '\\')
311	return;
312	}
313	(void)skip_key((int)'\n');
314	}
315
316	/*
317	* str_entry --
318	* handle a struct, union or enum entry
319	*/
320	static
321	str_entry(c)
322	register int c; /* current character */
323	{
324	register char sp; / buffer pointer */
325	int curline; /* line started on */
326	char tok[BUFSIZ]; /* storage buffer */
327
328	curline = lineno;
329	while (iswhite(c))
330	if (GETC(==,EOF))
331	return(NO);
332	if (c == (int)'{') /* it was "struct {" */
333	return(YES);
334	for (sp = tok;;) { /* get next token */
335	*sp++ = c;
336	if (GETC(==,EOF))
337	return(NO);
338	if (!intoken(c))
339	break;
340	}
341	switch ((char)c) {
342	case '{': /* it was "struct foo{" */
343	--sp;
344	break;
345	case '\n': /* it was "struct foo\n" */
346	SETLINE;
347	/FALLTHROUGH/
348	default: /* probably "struct foo " */
349	while (GETC(!=,EOF))
350	if (!iswhite(c))
351	break;
352	if (c != (int)'{') {
353	(void)ungetc(c, inf);
354	return(NO);
355	}
356	}
357	*sp = EOS;
358	pfnote(tok,curline);
359	return(YES);
360	}
361
362	/*
363	* skip_comment --
364	* skip over comment
365	*/
366	skip_comment()
367	{
368	register int c, /* character read */
369	star; /* '' flag /
370
371	for (star = 0;GETC(!=,EOF);)
372	switch((char)c) {
373	/* comments don't nest, nor can they be escaped. */
374	case '*':
375	star = YES;
376	break;
377	case '/':
378	if (star)
379	return;
380	break;
381	case '\n':
382	SETLINE;
383	/FALLTHROUGH/
384	default:
385	star = NO;
386	}
387	}
388
389	/*
390	* skip_key --
391	* skip to next char "key"
392	*/
393	skip_key(key)
394	register int key;
395	{
396	register int c,
397	skip,
398	retval;
399
400	for (skip = retval = NO;GETC(!=,EOF);)
401	switch((char)c) {
402	case '\\': /* a backslash escapes anything */
403	skip = !skip; /* we toggle in case it's "\\" */
404	break;
405	case ';': /* special case for yacc; if one */
406	case '\|': /* of these chars occurs, we may */
407	retval = YES; /* have moved out of the rule */
408	break; /* not used by C */
409	case '\n':
410	SETLINE;
411	/FALLTHROUGH/
412	default:
413	if (c == key && !skip)
414	return(retval);
415	skip = NO;
416	}
417	return(retval);
418	}