[unix-history] / usr.bin / indent / lexi.c

/*
 * Copyright (c) 1985 Sun Microsystems, Inc.
 * Copyright (c) 1980 The Regents of the University of California.
 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char sccsid[] = "@(#)lexi.c	5.16 (Berkeley) 2/26/91";
#endif /* not lint */

/*
 * Here we have the token scanner for indent.  It scans off one token and puts
 * it in the global variable "token".  It returns a code, indicating the type
 * of token scanned.
 */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "indent_globs.h"
#include "indent_codes.h"

#define alphanum 1
#define opchar 3

struct templ {
    char       *rwd;
    int         rwcode;
};

struct templ specials[100] =
{
    "switch", 1,
    "case", 2,
    "break", 0,
    "struct", 3,
    "union", 3,
    "enum", 3,
    "default", 2,
    "int", 4,
    "char", 4,
    "float", 4,
    "double", 4,
    "long", 4,
    "short", 4,
    "typdef", 4,
    "unsigned", 4,
    "register", 4,
    "static", 4,
    "global", 4,
    "extern", 4,
    "void", 4,
    "goto", 0,
    "return", 0,
    "if", 5,
    "while", 5,
    "for", 5,
    "else", 6,
    "do", 6,
    "sizeof", 7,
    0, 0
};

char        chartype[128] =
{				/* this is used to facilitate the decision of
				 * what type (alphanumeric, operator) each
				 * character is */
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 3, 0, 0, 1, 3, 3, 0,
    0, 0, 3, 3, 0, 3, 0, 3,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 0, 0, 3, 3, 3, 3,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 0, 0, 3, 1,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 3, 0, 3, 0
};


int
lexi()
{
    int         unary_delim;	/* this is set to 1 if the current token
				 * 
				 * forces a following operator to be unary */
    static int  last_code;	/* the last token type returned */
    static int  l_struct;	/* set to 1 if the last token was 'struct' */
    int         code;		/* internal code to be returned */
    char        qchar;		/* the delimiter character for a string */

    e_token = s_token;		/* point to start of place to save token */
    unary_delim = false;
    ps.col_1 = ps.last_nl;	/* tell world that this token started in
				 * column 1 iff the last thing scanned was nl */
    ps.last_nl = false;

    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	ps.col_1 = false;	/* leading blanks imply token is not in column
				 * 1 */
	if (++buf_ptr >= buf_end)
	    fill_buffer();
    }

    /* Scan an alphanumeric token */
    if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
	/*
	 * we have a character or number
	 */
	register char *j;	/* used for searching thru list of
				 * 
				 * reserved words */
	register struct templ *p;

	if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
	    int         seendot = 0,
	                seenexp = 0;
	    if (*buf_ptr == '0' &&
		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
		*e_token++ = *buf_ptr++;
		*e_token++ = *buf_ptr++;
		while (isxdigit(*buf_ptr)) {
		    CHECK_SIZE_TOKEN;
		    *e_token++ = *buf_ptr++;
		}
	    }
	    else
		while (1) {
		    if (*buf_ptr == '.')
			if (seendot)
			    break;
			else
			    seendot++;
		    CHECK_SIZE_TOKEN;
		    *e_token++ = *buf_ptr++;
		    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
			    break;
			else {
			    seenexp++;
			    seendot++;
			    CHECK_SIZE_TOKEN;
			    *e_token++ = *buf_ptr++;
			    if (*buf_ptr == '+' || *buf_ptr == '-')
				*e_token++ = *buf_ptr++;
			}
		}
	    if (*buf_ptr == 'L' || *buf_ptr == 'l')
		*e_token++ = *buf_ptr++;
	}
	else
	    while (chartype[*buf_ptr] == alphanum) {	/* copy it over */
		CHECK_SIZE_TOKEN;
		*e_token++ = *buf_ptr++;
		if (buf_ptr >= buf_end)
		    fill_buffer();
	    }
	*e_token++ = '\0';
	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	ps.its_a_keyword = false;
	ps.sizeof_keyword = false;
	if (l_struct) {		/* if last token was 'struct', then this token
				 * should be treated as a declaration */
	    l_struct = false;
	    last_code = ident;
	    ps.last_u_d = true;
	    return (decl);
	}
	ps.last_u_d = false;	/* Operator after indentifier is binary */
	last_code = ident;	/* Remember that this is the code we will
				 * return */

	/*
	 * This loop will check if the token is a keyword.
	 */
	for (p = specials; (j = p->rwd) != 0; p++) {
	    register char *p = s_token;	/* point at scanned token */
	    if (*j++ != *p++ || *j++ != *p++)
		continue;	/* This test depends on the fact that
				 * identifiers are always at least 1 character
				 * long (ie. the first two bytes of the
				 * identifier are always meaningful) */
	    if (p[-1] == 0)
		break;		/* If its a one-character identifier */
	    while (*p++ == *j)
		if (*j++ == 0)
		    goto found_keyword;	/* I wish that C had a multi-level
					 * break... */
	}
	if (p->rwd) {		/* we have a keyword */
    found_keyword:
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    switch (p->rwcode) {
	    case 1:		/* it is a switch */
		return (swstmt);
	    case 2:		/* a case or default */
		return (casestmt);

	    case 3:		/* a "struct" */
		if (ps.p_l_follow)
		    break;	/* inside parens: cast */
		l_struct = true;

		/*
		 * Next time around, we will want to know that we have had a
		 * 'struct'
		 */
	    case 4:		/* one of the declaration keywords */
		if (ps.p_l_follow) {
		    ps.cast_mask |= 1 << ps.p_l_follow;
		    break;	/* inside parens: cast */
		}
		last_code = decl;
		return (decl);

	    case 5:		/* if, while, for */
		return (sp_paren);

	    case 6:		/* do, else */
		return (sp_nparen);

	    case 7:
		ps.sizeof_keyword = true;
	    default:		/* all others are treated like any other
				 * identifier */
		return (ident);
	    }			/* end of switch */
	}			/* end of if (found_it) */
	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
	    register char *tp = buf_ptr;
	    while (tp < buf_end)
		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
		    goto not_proc;
	    strncpy(ps.procname, token, sizeof ps.procname - 1);
	    ps.in_parameter_declaration = 1;
	    rparen_count = 1;
    not_proc:;
	}
	/*
	 * The following hack attempts to guess whether or not the current
	 * token is in fact a declaration keyword -- one that has been
	 * typedefd
	 */
	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
		&& !ps.p_l_follow
	        && !ps.block_init
		&& (ps.last_token == rparen || ps.last_token == semicolon ||
		    ps.last_token == decl ||
		    ps.last_token == lbrace || ps.last_token == rbrace)) {
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    last_code = decl;
	    return decl;
	}
	if (last_code == decl)	/* if this is a declared variable, then
				 * following sign is unary */
	    ps.last_u_d = true;	/* will make "int a -1" work */
	last_code = ident;
	return (ident);		/* the ident is not in the list */
    }				/* end of procesing for alpanum character */

    /* Scan a non-alphanumeric token */

    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
				 * moved here */
    *e_token = '\0';
    if (++buf_ptr >= buf_end)
	fill_buffer();

    switch (*token) {
    case '\n':
	unary_delim = ps.last_u_d;
	ps.last_nl = true;	/* remember that we just had a newline */
	code = (had_eof ? 0 : newline);

	/*
	 * if data has been exausted, the newline is a dummy, and we should
	 * return code to stop
	 */
	break;

    case '\'':			/* start of quoted character */
    case '"':			/* start of string */
	qchar = *token;
	if (troff) {
	    e_token[-1] = '`';
	    if (qchar == '"')
		*e_token++ = '`';
	    e_token = chfont(&bodyf, &stringf, e_token);
	}
	do {			/* copy the string */
	    while (1) {		/* move one character or [/<char>]<char> */
		if (*buf_ptr == '\n') {
		    printf("%d: Unterminated literal\n", line_no);
		    goto stop_lit;
		}
		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
					 * since CHECK_SIZE guarantees that there
					 * are at least 5 entries left */
		*e_token = *buf_ptr++;
		if (buf_ptr >= buf_end)
		    fill_buffer();
		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
		    if (*buf_ptr == '\n')	/* check for escaped newline */
			++line_no;
		    if (troff) {
			*++e_token = BACKSLASH;
			if (*buf_ptr == BACKSLASH)
			    *++e_token = BACKSLASH;
		    }
		    *++e_token = *buf_ptr++;
		    ++e_token;	/* we must increment this again because we
				 * copied two chars */
		    if (buf_ptr >= buf_end)
			fill_buffer();
		}
		else
		    break;	/* we copied one character */
	    }			/* end of while (1) */
	} while (*e_token++ != qchar);
	if (troff) {
	    e_token = chfont(&stringf, &bodyf, e_token - 1);
	    if (qchar == '"')
		*e_token++ = '\'';
	}
stop_lit:
	code = ident;
	break;

    case ('('):
    case ('['):
	unary_delim = true;
	code = lparen;
	break;

    case (')'):
    case (']'):
	code = rparen;
	break;

    case '#':
	unary_delim = ps.last_u_d;
	code = preesc;
	break;

    case '?':
	unary_delim = true;
	code = question;
	break;

    case (':'):
	code = colon;
	unary_delim = true;
	break;

    case (';'):
	unary_delim = true;
	code = semicolon;
	break;

    case ('{'):
	unary_delim = true;

	/*
	 * if (ps.in_or_st) ps.block_init = 1;
	 */
	/* ?	code = ps.block_init ? lparen : lbrace; */
	code = lbrace;
	break;

    case ('}'):
	unary_delim = true;
	/* ?	code = ps.block_init ? rparen : rbrace; */
	code = rbrace;
	break;

    case 014:			/* a form feed */
	unary_delim = ps.last_u_d;
	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
				 * right */
	code = form_feed;
	break;

    case (','):
	unary_delim = true;
	code = comma;
	break;

    case '.':
	unary_delim = false;
	code = period;
	break;

    case '-':
    case '+':			/* check for -, +, --, ++ */
	code = (ps.last_u_d ? unary_op : binary_op);
	unary_delim = true;

	if (*buf_ptr == token[0]) {
	    /* check for doubled character */
	    *e_token++ = *buf_ptr++;
	    /* buffer overflow will be checked at end of loop */
	    if (last_code == ident || last_code == rparen) {
		code = (ps.last_u_d ? unary_op : postop);
		/* check for following ++ or -- */
		unary_delim = false;
	    }
	}
	else if (*buf_ptr == '=')
	    /* check for operator += */
	    *e_token++ = *buf_ptr++;
	else if (*buf_ptr == '>') {
	    /* check for operator -> */
	    *e_token++ = *buf_ptr++;
	    if (!pointer_as_binop) {
		unary_delim = false;
		code = unary_op;
		ps.want_blank = false;
	    }
	}
	break;			/* buffer overflow will be checked at end of
				 * switch */

    case '=':
	if (ps.in_or_st)
	    ps.block_init = 1;
#ifdef undef
	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
	    e_token[-1] = *buf_ptr++;
	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
		*e_token++ = *buf_ptr++;
	    *e_token++ = '=';	/* Flip =+ to += */
	    *e_token = 0;
	}
#else
	if (*buf_ptr == '=') {/* == */
	    *e_token++ = '=';	/* Flip =+ to += */
	    buf_ptr++;
	    *e_token = 0;
	}
#endif
	code = binary_op;
	unary_delim = true;
	break;
	/* can drop thru!!! */

    case '>':
    case '<':
    case '!':			/* ops like <, <<, <=, !=, etc */
	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
	    *e_token++ = *buf_ptr;
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	if (*buf_ptr == '=')
	    *e_token++ = *buf_ptr++;
	code = (ps.last_u_d ? unary_op : binary_op);
	unary_delim = true;
	break;

    default:
	if (token[0] == '/' && *buf_ptr == '*') {
	    /* it is start of comment */
	    *e_token++ = '*';

	    if (++buf_ptr >= buf_end)
		fill_buffer();

	    code = comment;
	    unary_delim = ps.last_u_d;
	    break;
	}
	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
	    /*
	     * handle ||, &&, etc, and also things as in int *****i
	     */
	    *e_token++ = *buf_ptr;
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	code = (ps.last_u_d ? unary_op : binary_op);
	unary_delim = true;


    }				/* end of switch */
    if (code != newline) {
	l_struct = false;
	last_code = code;
    }
    if (buf_ptr >= buf_end)	/* check for input buffer empty */
	fill_buffer();
    ps.last_u_d = unary_delim;
    *e_token = '\0';		/* null terminate the token */
    return (code);
}

/*
 * Add the given keyword to the keyword table, using val as the keyword type
 */
addkey(key, val)
    char       *key;
{
    register struct templ *p = specials;
    while (p->rwd)
	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
	    return;
	else
	    p++;
    if (p >= specials + sizeof specials / sizeof specials[0])
	return;			/* For now, table overflows are silently
				 * ignored */
    p->rwd = key;
    p->rwcode = val;
    p[1].rwd = 0;
    p[1].rwcode = 0;
    return;
}
Commit	Line	Data
15637ed4 RG	1	/*
	2	* Copyright (c) 1985 Sun Microsystems, Inc.
	3	* Copyright (c) 1980 The Regents of the University of California.
	4	* Copyright (c) 1976 Board of Trustees of the University of Illinois.
	5	* All rights reserved.
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	* 1. Redistributions of source code must retain the above copyright
	11	* notice, this list of conditions and the following disclaimer.
	12	* 2. Redistributions in binary form must reproduce the above copyright
	13	* notice, this list of conditions and the following disclaimer in the
	14	* documentation and/or other materials provided with the distribution.
	15	* 3. All advertising materials mentioning features or use of this software
	16	* must display the following acknowledgement:
	17	* This product includes software developed by the University of
	18	* California, Berkeley and its contributors.
	19	* 4. Neither the name of the University nor the names of its contributors
	20	* may be used to endorse or promote products derived from this software
	21	* without specific prior written permission.
	22	*
	23	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	24	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	25	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	26	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	27	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	28	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	29	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	30	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	31	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	32	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	33	* SUCH DAMAGE.
	34	*/
	35
	36	#ifndef lint
	37	static char sccsid[] = "@(#)lexi.c 5.16 (Berkeley) 2/26/91";
	38	#endif /* not lint */
	39
	40	/*
	41	* Here we have the token scanner for indent. It scans off one token and puts
	42	* it in the global variable "token". It returns a code, indicating the type
	43	* of token scanned.
	44	*/
	45
	46	#include <stdio.h>
	47	#include <ctype.h>
	48	#include <stdlib.h>
	49	#include <string.h>
	50	#include "indent_globs.h"
	51	#include "indent_codes.h"
	52
	53	#define alphanum 1
	54	#define opchar 3
	55
	56	struct templ {
	57	char *rwd;
	58	int rwcode;
	59	};
	60
	61	struct templ specials[100] =
	62	{
	63	"switch", 1,
	64	"case", 2,
65	"break", 0,
66	"struct", 3,
67	"union", 3,
68	"enum", 3,
69	"default", 2,
70	"int", 4,
71	"char", 4,
72	"float", 4,
73	"double", 4,
74	"long", 4,
75	"short", 4,
76	"typdef", 4,
77	"unsigned", 4,
78	"register", 4,
79	"static", 4,
80	"global", 4,
81	"extern", 4,
82	"void", 4,
83	"goto", 0,
84	"return", 0,
85	"if", 5,
86	"while", 5,
87	"for", 5,
88	"else", 6,
89	"do", 6,
90	"sizeof", 7,
91	0, 0
92	};
93
94	char chartype[128] =
95	{ /* this is used to facilitate the decision of
96	* what type (alphanumeric, operator) each
97	* character is */
98	0, 0, 0, 0, 0, 0, 0, 0,
99	0, 0, 0, 0, 0, 0, 0, 0,
100	0, 0, 0, 0, 0, 0, 0, 0,
101	0, 0, 0, 0, 0, 0, 0, 0,
102	0, 3, 0, 0, 1, 3, 3, 0,
103	0, 0, 3, 3, 0, 3, 0, 3,
104	1, 1, 1, 1, 1, 1, 1, 1,
105	1, 1, 0, 0, 3, 3, 3, 3,
106	0, 1, 1, 1, 1, 1, 1, 1,
107	1, 1, 1, 1, 1, 1, 1, 1,
108	1, 1, 1, 1, 1, 1, 1, 1,
109	1, 1, 1, 0, 0, 0, 3, 1,
110	0, 1, 1, 1, 1, 1, 1, 1,
111	1, 1, 1, 1, 1, 1, 1, 1,
112	1, 1, 1, 1, 1, 1, 1, 1,
113	1, 1, 1, 0, 3, 0, 3, 0
114	};
115
116
117
118
119	int
120	lexi()
121	{
122	int unary_delim; /* this is set to 1 if the current token
123	*
124	* forces a following operator to be unary */
125	static int last_code; /* the last token type returned */
126	static int l_struct; /* set to 1 if the last token was 'struct' */
127	int code; /* internal code to be returned */
128	char qchar; /* the delimiter character for a string */
129
130	e_token = s_token; /* point to start of place to save token */
131	unary_delim = false;
132	ps.col_1 = ps.last_nl; /* tell world that this token started in
133	* column 1 iff the last thing scanned was nl */
134	ps.last_nl = false;
135
136	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
137	ps.col_1 = false; /* leading blanks imply token is not in column
138	* 1 */
139	if (++buf_ptr >= buf_end)
140	fill_buffer();
141	}
142
143	/* Scan an alphanumeric token */
144	if (chartype[*buf_ptr] == alphanum \|\| buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
145	/*
146	* we have a character or number
147	*/
148	register char j; / used for searching thru list of
149	*
150	* reserved words */
151	register struct templ *p;
152
153	if (isdigit(*buf_ptr) \|\| buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
154	int seendot = 0,
155	seenexp = 0;
156	if (*buf_ptr == '0' &&
157	(buf_ptr[1] == 'x' \|\| buf_ptr[1] == 'X')) {
158	e_token++ = buf_ptr++;
159	e_token++ = buf_ptr++;
160	while (isxdigit(*buf_ptr)) {
161	CHECK_SIZE_TOKEN;
162	e_token++ = buf_ptr++;
163	}
164	}
165	else
166	while (1) {
167	if (*buf_ptr == '.')
168	if (seendot)
169	break;
170	else
171	seendot++;
172	CHECK_SIZE_TOKEN;
173	e_token++ = buf_ptr++;
174	if (!isdigit(buf_ptr) && buf_ptr != '.')
175	if ((buf_ptr != 'E' && buf_ptr != 'e') \|\| seenexp)
176	break;
177	else {
178	seenexp++;
179	seendot++;
180	CHECK_SIZE_TOKEN;
181	e_token++ = buf_ptr++;
182	if (buf_ptr == '+' \|\| buf_ptr == '-')
183	e_token++ = buf_ptr++;
184	}
185	}
186	if (buf_ptr == 'L' \|\| buf_ptr == 'l')
187	e_token++ = buf_ptr++;
188	}
189	else
190	while (chartype[buf_ptr] == alphanum) { / copy it over */
191	CHECK_SIZE_TOKEN;
192	e_token++ = buf_ptr++;
193	if (buf_ptr >= buf_end)
194	fill_buffer();
195	}
196	*e_token++ = '\0';
197	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
198	if (++buf_ptr >= buf_end)
199	fill_buffer();
200	}
201	ps.its_a_keyword = false;
202	ps.sizeof_keyword = false;
203	if (l_struct) { /* if last token was 'struct', then this token
204	* should be treated as a declaration */
205	l_struct = false;
206	last_code = ident;
207	ps.last_u_d = true;
208	return (decl);
209	}
210	ps.last_u_d = false; /* Operator after indentifier is binary */
211	last_code = ident; /* Remember that this is the code we will
212	* return */
213
214	/*
215	* This loop will check if the token is a keyword.
216	*/
217	for (p = specials; (j = p->rwd) != 0; p++) {
218	register char p = s_token; / point at scanned token */
219	if (j++ != p++ \|\| j++ != p++)
220	continue; /* This test depends on the fact that
221	* identifiers are always at least 1 character
222	* long (ie. the first two bytes of the
223	* identifier are always meaningful) */
224	if (p[-1] == 0)
225	break; /* If its a one-character identifier */
226	while (p++ == j)
227	if (*j++ == 0)
228	goto found_keyword; /* I wish that C had a multi-level
229	* break... */
230	}
231	if (p->rwd) { /* we have a keyword */
232	found_keyword:
233	ps.its_a_keyword = true;
234	ps.last_u_d = true;
235	switch (p->rwcode) {
236	case 1: /* it is a switch */
237	return (swstmt);
238	case 2: /* a case or default */
239	return (casestmt);
240
241	case 3: /* a "struct" */
242	if (ps.p_l_follow)
243	break; /* inside parens: cast */
244	l_struct = true;
245
246	/*
247	* Next time around, we will want to know that we have had a
248	* 'struct'
249	*/
250	case 4: /* one of the declaration keywords */
251	if (ps.p_l_follow) {
252	ps.cast_mask \|= 1 << ps.p_l_follow;
253	break; /* inside parens: cast */
254	}
255	last_code = decl;
256	return (decl);
257
258	case 5: /* if, while, for */
259	return (sp_paren);
260
261	case 6: /* do, else */
262	return (sp_nparen);
263
264	case 7:
265	ps.sizeof_keyword = true;
266	default: /* all others are treated like any other
267	* identifier */
268	return (ident);
269	} /* end of switch */
270	} /* end of if (found_it) */
271	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
272	register char *tp = buf_ptr;
273	while (tp < buf_end)
274	if (tp++ == ')' && (tp == ';' \|\| *tp == ','))
275	goto not_proc;
276	strncpy(ps.procname, token, sizeof ps.procname - 1);
277	ps.in_parameter_declaration = 1;
278	rparen_count = 1;
279	not_proc:;
280	}
281	/*
282	* The following hack attempts to guess whether or not the current
283	* token is in fact a declaration keyword -- one that has been
284	* typedefd
285	*/
286	if (((buf_ptr == '' && buf_ptr[1] != '=') \|\| isalpha(buf_ptr) \|\| buf_ptr == '_')
287	&& !ps.p_l_follow
288	&& !ps.block_init
289	&& (ps.last_token == rparen \|\| ps.last_token == semicolon \|\|
290	ps.last_token == decl \|\|
291	ps.last_token == lbrace \|\| ps.last_token == rbrace)) {
292	ps.its_a_keyword = true;
293	ps.last_u_d = true;
294	last_code = decl;
295	return decl;
296	}
297	if (last_code == decl) /* if this is a declared variable, then
298	* following sign is unary */
299	ps.last_u_d = true; /* will make "int a -1" work */
300	last_code = ident;
301	return (ident); /* the ident is not in the list */
302	} /* end of procesing for alpanum character */
303
304	/* Scan a non-alphanumeric token */
305
306	e_token++ = buf_ptr; /* if it is only a one-character token, it is
307	* moved here */
308	*e_token = '\0';
309	if (++buf_ptr >= buf_end)
310	fill_buffer();
311
312	switch (*token) {
313	case '\n':
314	unary_delim = ps.last_u_d;
315	ps.last_nl = true; /* remember that we just had a newline */
316	code = (had_eof ? 0 : newline);
317
318	/*
319	* if data has been exausted, the newline is a dummy, and we should
320	* return code to stop
321	*/
322	break;
323
324	case '\'': /* start of quoted character */
325	case '"': /* start of string */
326	qchar = *token;
327	if (troff) {
328	e_token[-1] = '`';
329	if (qchar == '"')
330	*e_token++ = '`';
331	e_token = chfont(&bodyf, &stringf, e_token);
332	}
333	do { /* copy the string */
334	while (1) { /* move one character or [/<char>]<char> */
335	if (*buf_ptr == '\n') {
336	printf("%d: Unterminated literal\n", line_no);
337	goto stop_lit;
338	}
339	CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
340	* since CHECK_SIZE guarantees that there
341	* are at least 5 entries left */
342	e_token = buf_ptr++;
343	if (buf_ptr >= buf_end)
344	fill_buffer();
345	if (e_token == BACKSLASH) { / if escape, copy extra char */
346	if (buf_ptr == '\n') / check for escaped newline */
347	++line_no;
348	if (troff) {
349	*++e_token = BACKSLASH;
350	if (*buf_ptr == BACKSLASH)
351	*++e_token = BACKSLASH;
352	}
353	++e_token = buf_ptr++;
354	++e_token; /* we must increment this again because we
355	* copied two chars */
356	if (buf_ptr >= buf_end)
357	fill_buffer();
358	}
359	else
360	break; /* we copied one character */
361	} /* end of while (1) */
362	} while (*e_token++ != qchar);
363	if (troff) {
364	e_token = chfont(&stringf, &bodyf, e_token - 1);
365	if (qchar == '"')
366	*e_token++ = '\'';
367	}
368	stop_lit:
369	code = ident;
370	break;
371
372	case ('('):
373	case ('['):
374	unary_delim = true;
375	code = lparen;
376	break;
377
378	case (')'):
379	case (']'):
380	code = rparen;
381	break;
382
383	case '#':
384	unary_delim = ps.last_u_d;
385	code = preesc;
386	break;
387
388	case '?':
389	unary_delim = true;
390	code = question;
391	break;
392
393	case (':'):
394	code = colon;
395	unary_delim = true;
396	break;
397
398	case (';'):
399	unary_delim = true;
400	code = semicolon;
401	break;
402
403	case ('{'):
404	unary_delim = true;
405
406	/*
407	* if (ps.in_or_st) ps.block_init = 1;
408	*/
409	/* ? code = ps.block_init ? lparen : lbrace; */
410	code = lbrace;
411	break;
412
413	case ('}'):
414	unary_delim = true;
415	/* ? code = ps.block_init ? rparen : rbrace; */
416	code = rbrace;
417	break;
418
419	case 014: /* a form feed */
420	unary_delim = ps.last_u_d;
421	ps.last_nl = true; /* remember this so we can set 'ps.col_1'
422	* right */
423	code = form_feed;
424	break;
425
426	case (','):
427	unary_delim = true;
428	code = comma;
429	break;
430
431	case '.':
432	unary_delim = false;
433	code = period;
434	break;
435
436	case '-':
437	case '+': /* check for -, +, --, ++ */
438	code = (ps.last_u_d ? unary_op : binary_op);
439	unary_delim = true;
440
441	if (*buf_ptr == token[0]) {
442	/* check for doubled character */
443	e_token++ = buf_ptr++;
444	/* buffer overflow will be checked at end of loop */
445	if (last_code == ident \|\| last_code == rparen) {
446	code = (ps.last_u_d ? unary_op : postop);
447	/* check for following ++ or -- */
448	unary_delim = false;
449	}
450	}
451	else if (*buf_ptr == '=')
452	/* check for operator += */
453	e_token++ = buf_ptr++;
454	else if (*buf_ptr == '>') {
455	/* check for operator -> */
456	e_token++ = buf_ptr++;
457	if (!pointer_as_binop) {
458	unary_delim = false;
459	code = unary_op;
460	ps.want_blank = false;
461	}
462	}
463	break; /* buffer overflow will be checked at end of
464	* switch */
465
466	case '=':
467	if (ps.in_or_st)
468	ps.block_init = 1;
469	#ifdef undef
470	if (chartype[buf_ptr] == opchar) { / we have two char assignment */
471	e_token[-1] = *buf_ptr++;
472	if ((e_token[-1] == '<' \|\| e_token[-1] == '>') && e_token[-1] == *buf_ptr)
473	e_token++ = buf_ptr++;
474	e_token++ = '='; / Flip =+ to += */
475	*e_token = 0;
476	}
477	#else
478	if (buf_ptr == '=') {/ == */
479	e_token++ = '='; / Flip =+ to += */
480	buf_ptr++;
481	*e_token = 0;
482	}
483	#endif
484	code = binary_op;
485	unary_delim = true;
486	break;
487	/* can drop thru!!! */
488
489	case '>':
490	case '<':
491	case '!': /* ops like <, <<, <=, !=, etc */
492	if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| *buf_ptr == '=') {
493	e_token++ = buf_ptr;
494	if (++buf_ptr >= buf_end)
495	fill_buffer();
496	}
497	if (*buf_ptr == '=')
498	e_token++ = buf_ptr++;
499	code = (ps.last_u_d ? unary_op : binary_op);
500	unary_delim = true;
501	break;
502
503	default:
504	if (token[0] == '/' && buf_ptr == '') {
505	/* it is start of comment */
506	e_token++ = '';
507
508	if (++buf_ptr >= buf_end)
509	fill_buffer();
510
511	code = comment;
512	unary_delim = ps.last_u_d;
513	break;
514	}
515	while ((e_token - 1) == buf_ptr \|\| *buf_ptr == '=') {
516	/*
517	* handle \|\|, &&, etc, and also things as in int *****i
518	*/
519	e_token++ = buf_ptr;
520	if (++buf_ptr >= buf_end)
521	fill_buffer();
522	}
523	code = (ps.last_u_d ? unary_op : binary_op);
524	unary_delim = true;
525
526
527	} /* end of switch */
528	if (code != newline) {
529	l_struct = false;
530	last_code = code;
531	}
532	if (buf_ptr >= buf_end) /* check for input buffer empty */
533	fill_buffer();
534	ps.last_u_d = unary_delim;
535	e_token = '\0'; / null terminate the token */
536	return (code);
537	}
538
539	/*
540	* Add the given keyword to the keyword table, using val as the keyword type
541	*/
542	addkey(key, val)
543	char *key;
544	{
545	register struct templ *p = specials;
546	while (p->rwd)
547	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
548	return;
549	else
550	p++;
551	if (p >= specials + sizeof specials / sizeof specials[0])
552	return; /* For now, table overflows are silently
553	* ignored */
554	p->rwd = key;
555	p->rwcode = val;
556	p[1].rwd = 0;
557	p[1].rwcode = 0;
558	return;
559	}