[unix-history] / usr / src / usr.bin / indent / lexi.c

/*
 * Copyright (c) 1980 Regents of the University of California.
 * All rights reserved.  The Berkeley software License Agreement
 * specifies the terms and conditions for redistribution.
 */

#ifndef lint
static char sccsid[] = "@(#)lexi.c	5.5 (Berkeley) %G%";
#endif not lint

/*-
 *
 *			  Copyright (C) 1976
 *				by the
 *			  Board of Trustees
 *				of the
 *			University of Illinois
 *
 *			 All rights reserved
 *
 *
 * NAME:
 *	lexi
 *
 * FUNCTION:
 *	This is the token scanner for indent
 *
 * ALGORITHM:
 *	1) Strip off intervening blanks and/or tabs.
 *	2) If it is an alphanumeric token, move it to the token buffer "token".
 *	   Check if it is a special reserved word that indent will want to
 *	   know about.
 *	3) Non-alphanumeric tokens are handled with a big switch statement.  A
 *	   flag is kept to remember if the last token was a "unary delimiter",
 *	   which forces a following operator to be unary as opposed to binary.
 *
 * PARAMETERS:
 *	None
 *
 * RETURNS:
 *	An integer code indicating the type of token scanned.
 *
 * GLOBALS:
 *	buf_ptr =
 *	had_eof
 *	ps.last_u_d =	Set to true iff this token is a "unary delimiter"
 *
 * CALLS:
 *	fill_buffer
 *	printf (lib)
 *
 * CALLED BY:
 *	main
 *
 * NOTES:
 *	Start of comment is passed back so that the comment can be scanned by
 *	pr_comment.
 *
 *	Strings and character literals are returned just like identifiers.
 *
 * HISTORY:
 *	initial coding 	November 1976	D A Willcox of CAC
 *	1/7/77		D A Willcox of CAC	Fix to provide proper handling
 *						of "int a -1;"
 *
 */\f

/*
 * Here we have the token scanner for indent.  It scans off one token and
 * puts it in the global variable "token".  It returns a code, indicating
 * the type of token scanned. 
 */

#include "indent_globs.h"
#include "indent_codes.h"
#include "ctype.h"

#define alphanum 1
#define opchar 3

struct templ {
    char       *rwd;
    int         rwcode;
};

struct templ specials[100] =
{
    "switch", 1,
    "case", 2,
    "break", 0,
    "struct", 3,
    "union", 3,
    "enum", 3,
    "default", 2,
    "int", 4,
    "char", 4,
    "float", 4,
    "double", 4,
    "long", 4,
    "short", 4,
    "typdef", 4,
    "unsigned", 4,
    "register", 4,
    "static", 4,
    "global", 4,
    "extern", 4,
    "void", 4,
    "goto", 0,
    "return", 0,
    "if", 5,
    "while", 5,
    "for", 5,
    "else", 6,
    "do", 6,
    "sizeof", 7,
    0, 0
};

char        chartype[128] =
{				/* this is used to facilitate the decision
				 * of what type (alphanumeric, operator)
				 * each character is */
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 3, 0, 0, 0, 3, 3, 0,
    0, 0, 3, 3, 0, 3, 3, 3,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 0, 0, 3, 3, 3, 3,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 0, 0, 3, 1,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 3, 0, 3, 0
};


int 
lexi()
{
    register char *tok;		/* local pointer to next char in token */
    int         unary_delim;	/* this is set to 1 if the current token 
				 *
				 * forces a following operator to be unary */
    static int  last_code;	/* the last token type returned */
    static int  l_struct;	/* set to 1 if the last token was 'struct' */
    int         code;		/* internal code to be returned */
    char        qchar;		/* the delimiter character for a string */

    tok = token;		/* point to start of place to save token */
    unary_delim = false;
    ps.col_1 = ps.last_nl;	/* tell world that this token started in
				 * column 1 iff the last thing scanned was
				 * nl */
    ps.last_nl = false;

    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	ps.col_1 = false;	/* leading blanks imply token is not in
				 * column 1 */
	if (++buf_ptr >= buf_end)
	    fill_buffer();
    }

    /* Scan an alphanumeric token.  Note that we must also handle
     * stuff like "1.0e+03" and "7e-6". */
    if (chartype[*buf_ptr & 0177] == alphanum) {	/* we have a character
							 * or number */
	register char *j;	/* used for searching thru list of 
				 * reserved words */
	register struct templ *p;
	register int c;

	do {			/* copy it over */
	    *tok++ = *buf_ptr++;
	    if (buf_ptr >= buf_end)
		fill_buffer();
	} while (chartype[c = *buf_ptr & 0177] == alphanum ||
		isdigit(token[0]) && (c == '+' || c == '-') &&
		(tok[-1] == 'e' || tok[-1] == 'E'));
	*tok++ = '\0';
	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	ps.its_a_keyword = false;
	ps.sizeof_keyword = false;
	if (l_struct) {		/* if last token was 'struct', then this
				 * token should be treated as a
				 * declaration */
	    l_struct = false;
	    last_code = ident;
	    ps.last_u_d = true;
	    return (decl);
	}
	ps.last_u_d = false;	/* Operator after indentifier is binary */
	last_code = ident;	/* Remember that this is the code we will
				 * return */

	/*
	 * This loop will check if the token is a keyword. 
	 */
	for (p = specials; (j = p->rwd) != 0; p++) {
	    tok = token;	/* point at scanned token */
	    if (*j++ != *tok++ || *j++ != *tok++)
		continue;	/* This test depends on the fact that
				 * identifiers are always at least 1
				 * character long (ie. the first two bytes
				 * of the identifier are always
				 * meaningful) */
	    if (tok[-1] == 0)
		break;		/* If its a one-character identifier */
	    while (*tok++ == *j)
		if (*j++ == 0)
		    goto found_keyword;	/* I wish that C had a multi-level
					 * break... */
	}
	if (p->rwd) {		/* we have a keyword */
    found_keyword:
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    switch (p->rwcode) {
		case 1:	/* it is a switch */
		    return (swstmt);
		case 2:	/* a case or default */
		    return (casestmt);

		case 3:	/* a "struct" */
		    if (ps.p_l_follow)
			break;	/* inside parens: cast */
		    l_struct = true;

		    /*
		     * Next time around, we will want to know that we have
		     * had a 'struct' 
		     */
		case 4:	/* one of the declaration keywords */
		    if (ps.p_l_follow) {
			ps.cast_mask |= 1 << ps.p_l_follow;
			break;	/* inside parens: cast */
		    }
		    last_code = decl;
		    return (decl);

		case 5:	/* if, while, for */
		    return (sp_paren);

		case 6:	/* do, else */
		    return (sp_nparen);

		case 7:
		    ps.sizeof_keyword = true;
		default:	/* all others are treated like any other
				 * identifier */
		    return (ident);
	    }			/* end of switch */
	}			/* end of if (found_it) */
	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
	    && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
	    strncpy(ps.procname, token, sizeof ps.procname - 1);
	    ps.in_parameter_declaration = 1;
	}

	/*
	 * The following hack attempts to guess whether or not the current
	 * token is in fact a declaration keyword -- one that has been
	 * typedefd 
	 */
	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
	    && !ps.p_l_follow
	    && (ps.last_token == rparen || ps.last_token == semicolon ||
		ps.last_token == decl ||
		ps.last_token == lbrace || ps.last_token == rbrace)) {
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    last_code = decl;
	    return decl;
	}
	if (last_code == decl)	/* if this is a declared variable, then
				 * following sign is unary */
	    ps.last_u_d = true;	/* will make "int a -1" work */
	last_code = ident;
	return (ident);		/* the ident is not in the list */
    }				/* end of procesing for alpanum character */
    /* Scan a non-alphanumeric token */

    *tok++ = *buf_ptr;		/* if it is only a one-character token, it
				 * is moved here */
    *tok = '\0';
    if (++buf_ptr >= buf_end)
	fill_buffer();

    switch (*token) {
	case '\n':
	    unary_delim = ps.last_u_d;
	    ps.last_nl = true;	/* remember that we just had a newline */
	    code = (had_eof ? 0 : newline);

	    /*
	     * if data has been exausted, the newline is a dummy, and we
	     * should return code to stop 
	     */
	    break;

	case '\'':		/* start of quoted character */
	case '"':		/* start of string */
	    qchar = *token;
	    if (troff) {
		tok[-1] = '`';
		if (qchar == '"')
		    *tok++ = '`';
		*tok++ = BACKSLASH;
		*tok++ = 'f';
		*tok++ = 'L';
	    }
	    do {		/* copy the string */
		while (1) {	/* move one character or [/<char>]<char> */
		    if (*buf_ptr == '\n') {
			printf("%d: Unterminated literal\n", line_no);
			goto stop_lit;
		    }
		    *tok = *buf_ptr++;
		    if (buf_ptr >= buf_end)
			fill_buffer();
		    if (had_eof || ((tok - token) > (bufsize - 2))) {
			printf("Unterminated literal\n");
			++tok;
			goto stop_lit;
			/* get outof literal copying loop */
		    }
		    if (*tok == BACKSLASH) {	/* if escape, copy extra
						 * char */
			if (*buf_ptr == '\n')	/* check for escaped
						 * newline */
			    ++line_no;
			if (troff) {
			    *++tok = BACKSLASH;
			    if (*buf_ptr == BACKSLASH)
				*++tok = BACKSLASH;
			}
			*++tok = *buf_ptr++;
			++tok;	/* we must increment this again because we
				 * copied two chars */
			if (buf_ptr >= buf_end)
			    fill_buffer();
		    }
		    else
			break;	/* we copied one character */
		}		/* end of while (1) */
	    } while (*tok++ != qchar);
	    if (troff) {
		tok[-1] = BACKSLASH;
		*tok++ = 'f';
		*tok++ = 'R';
		*tok++ = '\'';
		if (qchar == '"')
		    *tok++ = '\'';
	    }
    stop_lit:
	    code = ident;
	    break;

	case ('('):
	case ('['):
	    unary_delim = true;
	    code = lparen;
	    break;

	case (')'):
	case (']'):
	    code = rparen;
	    break;

	case '#':
	    unary_delim = ps.last_u_d;
	    code = preesc;
	    break;

	case '?':
	    unary_delim = true;
	    code = question;
	    break;

	case (':'):
	    code = colon;
	    unary_delim = true;
	    break;

	case (';'):
	    unary_delim = true;
	    code = semicolon;
	    break;

	case ('{'):
	    unary_delim = true;

	    /*
	     * if (ps.in_or_st) ps.block_init = 1; 
	     */
	    code = ps.block_init ? lparen : lbrace;
	    break;

	case ('}'):
	    unary_delim = true;
	    code = ps.block_init ? rparen : rbrace;
	    break;

	case 014:		/* a form feed */
	    unary_delim = ps.last_u_d;
	    ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
				 * right */
	    code = form_feed;
	    break;

	case (','):
	    unary_delim = true;
	    code = comma;
	    break;

	case '.':
	    unary_delim = false;
	    code = period;
	    break;

	case '-':
	case '+':		/* check for -, +, --, ++ */
	    code = (ps.last_u_d ? unary_op : binary_op);
	    unary_delim = true;

	    if (*buf_ptr == token[0]) {
		/* check for doubled character */
		*tok++ = *buf_ptr++;
		/* buffer overflow will be checked at end of loop */
		if (last_code == ident || last_code == rparen) {
		    code = (ps.last_u_d ? unary_op : postop);
		    /* check for following ++ or -- */
		    unary_delim = false;
		}
	    }
	    else if (*buf_ptr == '=')
		/* check for operator += */
		*tok++ = *buf_ptr++;
	    else if (token[0] == '-' && *buf_ptr == '>') {
		/* check for operator -> */
		*tok++ = *buf_ptr++;
		if (!pointer_as_binop) {
		    code = unary_op;
		    unary_delim = false;
		    ps.want_blank = false;
		}
	    }
	    /* buffer overflow will be checked at end of switch */

	    break;

	case '=':
	    if (ps.in_or_st)
		ps.block_init = 1;
	    if (chartype[*buf_ptr] == opchar) {	/* we have two char
						 * assignment */
		tok[-1] = *buf_ptr++;
		if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
		    *tok++ = *buf_ptr++;
		*tok++ = '=';	/* Flip =+ to += */
		*tok = 0;
	    }
	    code = binary_op;
	    unary_delim = true;
	    break;
	    /* can drop thru!!! */

	case '>':
	case '<':
	case '!':		/* ops like <, <<, <=, !=, etc */
	    if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
		*tok++ = *buf_ptr;
		if (++buf_ptr >= buf_end)
		    fill_buffer();
	    }
	    if (*buf_ptr == '=')
		*tok++ = *buf_ptr++;
	    code = (ps.last_u_d ? unary_op : binary_op);
	    unary_delim = true;
	    break;

	default:
	    if (token[0] == '/' && *buf_ptr == '*') {
		/* it is start of comment */
		*tok++ = '*';

		if (++buf_ptr >= buf_end)
		    fill_buffer();

		code = comment;
		unary_delim = ps.last_u_d;
		break;
	    }
	    while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
		/* handle ||, &&, etc, and also things as in int *****i */
		*tok++ = *buf_ptr;
		if (++buf_ptr >= buf_end)
		    fill_buffer();
	    }
	    code = (ps.last_u_d ? unary_op : binary_op);
	    unary_delim = true;


    }				/* end of switch */
    if (code != newline) {
	l_struct = false;
	last_code = code;
    }
    if (buf_ptr >= buf_end)	/* check for input buffer empty */
	fill_buffer();
    ps.last_u_d = unary_delim;
    *tok = '\0';		/* null terminate the token */
    return (code);
};

/* Add the given keyword to the keyword table, using val as the keyword type
   */
addkey (key, val)
char       *key;
{
    register struct templ *p = specials;
    while (p->rwd)
	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
	    return;
	else
	    p++;
    if (p >= specials + sizeof specials / sizeof specials[0])
	return;			/* For now, table overflows are silently
				   ignored */
    p->rwd = key;
    p->rwcode = val;
    p[1].rwd = 0;
    p[1].rwcode = 0;
    return;
}
Commit	Line	Data
c0bc4ef7 DF	1	/*
	2	* Copyright (c) 1980 Regents of the University of California.
	3	* All rights reserved. The Berkeley software License Agreement
	4	* specifies the terms and conditions for redistribution.
	5	*/
	6
	7	#ifndef lint
1d7a34f4	8	static char sccsid[] = "@(#)lexi.c 5.5 (Berkeley) %G%";
c0bc4ef7	9	#endif not lint
4b365fcd	10
1009bf5e KM	11	/*-
	12	*
	13	* Copyright (C) 1976
	14	* by the
	15	* Board of Trustees
	16	* of the
	17	* University of Illinois
	18	*
	19	* All rights reserved
	20	*
	21	*
	22	* NAME:
	23	* lexi
	24	*
	25	* FUNCTION:
	26	* This is the token scanner for indent
	27	*
	28	* ALGORITHM:
	29	* 1) Strip off intervening blanks and/or tabs.
	30	* 2) If it is an alphanumeric token, move it to the token buffer "token".
	31	* Check if it is a special reserved word that indent will want to
	32	* know about.
	33	* 3) Non-alphanumeric tokens are handled with a big switch statement. A
	34	* flag is kept to remember if the last token was a "unary delimiter",
	35	* which forces a following operator to be unary as opposed to binary.
	36	*
	37	* PARAMETERS:
	38	* None
	39	*
	40	* RETURNS:
	41	* An integer code indicating the type of token scanned.
	42	*
	43	* GLOBALS:
	44	* buf_ptr =
	45	* had_eof
	46	* ps.last_u_d = Set to true iff this token is a "unary delimiter"
	47	*
	48	* CALLS:
	49	* fill_buffer
	50	* printf (lib)
	51	*
	52	* CALLED BY:
	53	* main
	54	*
	55	* NOTES:
	56	* Start of comment is passed back so that the comment can be scanned by
	57	* pr_comment.
	58	*
	59	* Strings and character literals are returned just like identifiers.
	60	*
	61	* HISTORY:
	62	* initial coding November 1976 D A Willcox of CAC
	63	* 1/7/77 D A Willcox of CAC Fix to provide proper handling
	64	* of "int a -1;"
	65	*
	66	*/\f
4b365fcd	67
1009bf5e KM	68	/*
	69	* Here we have the token scanner for indent. It scans off one token and
	70	* puts it in the global variable "token". It returns a code, indicating
	71	* the type of token scanned.
	72	*/
4b365fcd	73
1d7a34f4 KB	74	#include "indent_globs.h"
1d7a34f4 KB	75	#include "indent_codes.h"
1009bf5e	76	#include "ctype.h"
4b365fcd KM	77
	78	#define alphanum 1
	79	#define opchar 3
	80
	81	struct templ {
1009bf5e KM	82	char *rwd;
1009bf5e KM	83	int rwcode;
4b365fcd KM	84	};
4b365fcd KM	85
1009bf5e	86	struct templ specials[100] =
4b365fcd KM	87	{
	88	"switch", 1,
	89	"case", 2,
1009bf5e	90	"break", 0,
4b365fcd	91	"struct", 3,
1009bf5e KM	92	"union", 3,
1009bf5e KM	93	"enum", 3,
4b365fcd KM	94	"default", 2,
	95	"int", 4,
	96	"char", 4,
	97	"float", 4,
	98	"double", 4,
	99	"long", 4,
	100	"short", 4,
	101	"typdef", 4,
	102	"unsigned", 4,
	103	"register", 4,
	104	"static", 4,
	105	"global", 4,
	106	"extern", 4,
1009bf5e KM	107	"void", 4,
	108	"goto", 0,
	109	"return", 0,
4b365fcd KM	110	"if", 5,
	111	"while", 5,
	112	"for", 5,
	113	"else", 6,
	114	"do", 6,
1009bf5e	115	"sizeof", 7,
4b365fcd KM	116	0, 0
	117	};
	118
1009bf5e KM	119	char chartype[128] =
	120	{ /* this is used to facilitate the decision
	121	* of what type (alphanumeric, operator)
	122	* each character is */
4b365fcd KM	123	0, 0, 0, 0, 0, 0, 0, 0,
	124	0, 0, 0, 0, 0, 0, 0, 0,
	125	0, 0, 0, 0, 0, 0, 0, 0,
	126	0, 0, 0, 0, 0, 0, 0, 0,
	127	0, 3, 0, 0, 0, 3, 3, 0,
	128	0, 0, 3, 3, 0, 3, 3, 3,
	129	1, 1, 1, 1, 1, 1, 1, 1,
	130	1, 1, 0, 0, 3, 3, 3, 3,
	131	0, 1, 1, 1, 1, 1, 1, 1,
	132	1, 1, 1, 1, 1, 1, 1, 1,
	133	1, 1, 1, 1, 1, 1, 1, 1,
	134	1, 1, 1, 0, 0, 0, 3, 1,
	135	0, 1, 1, 1, 1, 1, 1, 1,
	136	1, 1, 1, 1, 1, 1, 1, 1,
	137	1, 1, 1, 1, 1, 1, 1, 1,
	138	1, 1, 1, 0, 3, 0, 3, 0
	139	};
	140
1009bf5e KM	141
	142
	143
	144	int
	145	lexi()
	146	{
	147	register char tok; / local pointer to next char in token */
	148	int unary_delim; /* this is set to 1 if the current token
	149	*
	150	* forces a following operator to be unary */
	151	static int last_code; /* the last token type returned */
	152	static int l_struct; /* set to 1 if the last token was 'struct' */
	153	int code; /* internal code to be returned */
	154	char qchar; /* the delimiter character for a string */
	155
	156	tok = token; /* point to start of place to save token */
4b365fcd	157	unary_delim = false;
1009bf5e KM	158	ps.col_1 = ps.last_nl; /* tell world that this token started in
	159	* column 1 iff the last thing scanned was
	160	* nl */
	161	ps.last_nl = false;
	162
	163	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
	164	ps.col_1 = false; /* leading blanks imply token is not in
	165	* column 1 */
4b365fcd	166	if (++buf_ptr >= buf_end)
1009bf5e	167	fill_buffer();
4b365fcd KM	168	}
4b365fcd KM	169
c93d6f87 KM	170	/* Scan an alphanumeric token. Note that we must also handle
c93d6f87 KM	171	* stuff like "1.0e+03" and "7e-6". */
1009bf5e KM	172	if (chartype[buf_ptr & 0177] == alphanum) { / we have a character
	173	* or number */
	174	register char j; / used for searching thru list of
1009bf5e KM	175	* reserved words */
1009bf5e KM	176	register struct templ *p;
c93d6f87	177	register int c;
4b365fcd	178
c93d6f87	179	do { /* copy it over */
4b365fcd KM	180	tok++ = buf_ptr++;
4b365fcd KM	181	if (buf_ptr >= buf_end)
1009bf5e	182	fill_buffer();
c93d6f87 KM	183	} while (chartype[c = *buf_ptr & 0177] == alphanum \|\|
	184	isdigit(token[0]) && (c == '+' \|\| c == '-') &&
	185	(tok[-1] == 'e' \|\| tok[-1] == 'E'));
4b365fcd	186	*tok++ = '\0';
1009bf5e KM	187	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
	188	if (++buf_ptr >= buf_end)
	189	fill_buffer();
	190	}
	191	ps.its_a_keyword = false;
	192	ps.sizeof_keyword = false;
	193	if (l_struct) { /* if last token was 'struct', then this
	194	* token should be treated as a
	195	* declaration */
4b365fcd KM	196	l_struct = false;
4b365fcd KM	197	last_code = ident;
1009bf5e	198	ps.last_u_d = true;
4b365fcd KM	199	return (decl);
4b365fcd KM	200	}
1009bf5e KM	201	ps.last_u_d = false; /* Operator after indentifier is binary */
	202	last_code = ident; /* Remember that this is the code we will
	203	* return */
	204
	205	/*
	206	* This loop will check if the token is a keyword.
	207	*/
	208	for (p = specials; (j = p->rwd) != 0; p++) {
	209	tok = token; /* point at scanned token */
	210	if (j++ != tok++ \|\| j++ != tok++)
	211	continue; /* This test depends on the fact that
	212	* identifiers are always at least 1
	213	* character long (ie. the first two bytes
	214	* of the identifier are always
	215	* meaningful) */
	216	if (tok[-1] == 0)
	217	break; /* If its a one-character identifier */
	218	while (tok++ == j)
	219	if (*j++ == 0)
	220	goto found_keyword; /* I wish that C had a multi-level
	221	* break... */
	222	}
	223	if (p->rwd) { /* we have a keyword */
	224	found_keyword:
	225	ps.its_a_keyword = true;
	226	ps.last_u_d = true;
	227	switch (p->rwcode) {
	228	case 1: /* it is a switch */
	229	return (swstmt);
	230	case 2: /* a case or default */
	231	return (casestmt);
	232
	233	case 3: /* a "struct" */
	234	if (ps.p_l_follow)
	235	break; /* inside parens: cast */
	236	l_struct = true;
	237
	238	/*
	239	* Next time around, we will want to know that we have
	240	* had a 'struct'
	241	*/
	242	case 4: /* one of the declaration keywords */
	243	if (ps.p_l_follow) {
	244	ps.cast_mask \|= 1 << ps.p_l_follow;
	245	break; /* inside parens: cast */
	246	}
	247	last_code = decl;
	248	return (decl);
	249
	250	case 5: /* if, while, for */
	251	return (sp_paren);
	252
	253	case 6: /* do, else */
	254	return (sp_nparen);
	255
	256	case 7:
	257	ps.sizeof_keyword = true;
	258	default: /* all others are treated like any other
	259	* identifier */
	260	return (ident);
	261	} /* end of switch */
	262	} /* end of if (found_it) */
	263	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
	264	&& (buf_ptr[1] != ')' \|\| buf_ptr[2] != ';')) {
265	strncpy(ps.procname, token, sizeof ps.procname - 1);
266	ps.in_parameter_declaration = 1;
4b365fcd KM	267	}
4b365fcd KM	268
1009bf5e KM	269	/*
	270	* The following hack attempts to guess whether or not the current
	271	* token is in fact a declaration keyword -- one that has been
	272	* typedefd
	273	*/
	274	if (((buf_ptr == '' && buf_ptr[1] != '=') \|\| isalpha(*buf_ptr))
	275	&& !ps.p_l_follow
	276	&& (ps.last_token == rparen \|\| ps.last_token == semicolon \|\|
	277	ps.last_token == decl \|\|
	278	ps.last_token == lbrace \|\| ps.last_token == rbrace)) {
	279	ps.its_a_keyword = true;
	280	ps.last_u_d = true;
	281	last_code = decl;
	282	return decl;
	283	}
	284	if (last_code == decl) /* if this is a declared variable, then
	285	* following sign is unary */
	286	ps.last_u_d = true; /* will make "int a -1" work */
4b365fcd	287	last_code = ident;
1009bf5e KM	288	return (ident); /* the ident is not in the list */
1009bf5e KM	289	} /* end of procesing for alpanum character */
c93d6f87	290	/* Scan a non-alphanumeric token */
4b365fcd	291
1009bf5e KM	292	tok++ = buf_ptr; /* if it is only a one-character token, it
1009bf5e KM	293	* is moved here */
4b365fcd KM	294	*tok = '\0';
4b365fcd KM	295	if (++buf_ptr >= buf_end)
1009bf5e	296	fill_buffer();
4b365fcd KM	297
4b365fcd KM	298	switch (*token) {
1009bf5e KM	299	case '\n':
	300	unary_delim = ps.last_u_d;
	301	ps.last_nl = true; /* remember that we just had a newline */
4b365fcd	302	code = (had_eof ? 0 : newline);
4b365fcd	303
1009bf5e KM	304	/*
	305	* if data has been exausted, the newline is a dummy, and we
	306	* should return code to stop
	307	*/
	308	break;
4b365fcd	309
1009bf5e KM	310	case '\'': /* start of quoted character */
	311	case '"': /* start of string */
	312	qchar = *token;
	313	if (troff) {
	314	tok[-1] = '`';
	315	if (qchar == '"')
	316	*tok++ = '`';
	317	*tok++ = BACKSLASH;
	318	*tok++ = 'f';
	319	*tok++ = 'L';
	320	}
	321	do { /* copy the string */
	322	while (1) { /* move one character or [/<char>]<char> */
4b365fcd	323	if (*buf_ptr == '\n') {
1009bf5e	324	printf("%d: Unterminated literal\n", line_no);
4b365fcd	325	goto stop_lit;
4b365fcd	326	}
4b365fcd KM	327	tok = buf_ptr++;
4b365fcd KM	328	if (buf_ptr >= buf_end)
1009bf5e	329	fill_buffer();
4b365fcd	330	if (had_eof \|\| ((tok - token) > (bufsize - 2))) {
1009bf5e	331	printf("Unterminated literal\n");
4b365fcd KM	332	++tok;
4b365fcd KM	333	goto stop_lit;
1009bf5e	334	/* get outof literal copying loop */
4b365fcd	335	}
1009bf5e KM	336	if (tok == BACKSLASH) { / if escape, copy extra
	337	* char */
	338	if (buf_ptr == '\n') / check for escaped
	339	* newline */
4b365fcd	340	++line_no;
1009bf5e KM	341	if (troff) {
	342	*++tok = BACKSLASH;
	343	if (*buf_ptr == BACKSLASH)
	344	*++tok = BACKSLASH;
	345	}
	346	++tok = buf_ptr++;
	347	++tok; /* we must increment this again because we
	348	* copied two chars */
4b365fcd	349	if (buf_ptr >= buf_end)
1009bf5e	350	fill_buffer();
4b365fcd KM	351	}
4b365fcd KM	352	else
1009bf5e KM	353	break; /* we copied one character */
1009bf5e KM	354	} /* end of while (1) */
4b365fcd	355	} while (*tok++ != qchar);
1009bf5e KM	356	if (troff) {
	357	tok[-1] = BACKSLASH;
	358	*tok++ = 'f';
	359	*tok++ = 'R';
	360	*tok++ = '\'';
	361	if (qchar == '"')
	362	*tok++ = '\'';
	363	}
	364	stop_lit:
4b365fcd KM	365	code = ident;
	366	break;
	367
1009bf5e KM	368	case ('('):
1009bf5e KM	369	case ('['):
4b365fcd KM	370	unary_delim = true;
	371	code = lparen;
	372	break;
	373
1009bf5e KM	374	case (')'):
1009bf5e KM	375	case (']'):
4b365fcd KM	376	code = rparen;
	377	break;
	378
1009bf5e KM	379	case '#':
1009bf5e KM	380	unary_delim = ps.last_u_d;
4b365fcd KM	381	code = preesc;
	382	break;
	383
1009bf5e	384	case '?':
4b365fcd KM	385	unary_delim = true;
	386	code = question;
	387	break;
	388
1009bf5e	389	case (':'):
4b365fcd KM	390	code = colon;
	391	unary_delim = true;
	392	break;
	393
1009bf5e	394	case (';'):
4b365fcd KM	395	unary_delim = true;
	396	code = semicolon;
	397	break;
	398
1009bf5e	399	case ('{'):
4b365fcd	400	unary_delim = true;
1009bf5e KM	401
	402	/*
	403	* if (ps.in_or_st) ps.block_init = 1;
	404	*/
	405	code = ps.block_init ? lparen : lbrace;
4b365fcd KM	406	break;
4b365fcd KM	407
1009bf5e	408	case ('}'):
4b365fcd	409	unary_delim = true;
1009bf5e	410	code = ps.block_init ? rparen : rbrace;
4b365fcd KM	411	break;
4b365fcd KM	412
1009bf5e KM	413	case 014: /* a form feed */
	414	unary_delim = ps.last_u_d;
	415	ps.last_nl = true; /* remember this so we can set 'ps.col_1'
	416	* right */
4b365fcd KM	417	code = form_feed;
	418	break;
	419
1009bf5e	420	case (','):
4b365fcd KM	421	unary_delim = true;
	422	code = comma;
	423	break;
	424
1009bf5e	425	case '.':
4b365fcd KM	426	unary_delim = false;
	427	code = period;
	428	break;
	429
1009bf5e KM	430	case '-':
	431	case '+': /* check for -, +, --, ++ */
	432	code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd KM	433	unary_delim = true;
	434
	435	if (*buf_ptr == token[0]) {
1009bf5e	436	/* check for doubled character */
4b365fcd	437	tok++ = buf_ptr++;
1009bf5e	438	/* buffer overflow will be checked at end of loop */
4b365fcd	439	if (last_code == ident \|\| last_code == rparen) {
1009bf5e KM	440	code = (ps.last_u_d ? unary_op : postop);
1009bf5e KM	441	/* check for following ++ or -- */
4b365fcd KM	442	unary_delim = false;
	443	}
	444	}
1009bf5e KM	445	else if (*buf_ptr == '=')
	446	/* check for operator += */
	447	tok++ = buf_ptr++;
5c6e73ac	448	else if (token[0] == '-' && *buf_ptr == '>') {
1009bf5e KM	449	/* check for operator -> */
1009bf5e KM	450	tok++ = buf_ptr++;
5c6e73ac KM	451	if (!pointer_as_binop) {
	452	code = unary_op;
	453	unary_delim = false;
	454	ps.want_blank = false;
	455	}
1009bf5e KM	456	}
1009bf5e KM	457	/* buffer overflow will be checked at end of switch */
4b365fcd KM	458
	459	break;
	460
1009bf5e KM	461	case '=':
	462	if (ps.in_or_st)
	463	ps.block_init = 1;
	464	if (chartype[buf_ptr] == opchar) { / we have two char
	465	* assignment */
	466	tok[-1] = *buf_ptr++;
	467	if ((tok[-1] == '<' \|\| tok[-1] == '>') && tok[-1] == *buf_ptr)
	468	tok++ = buf_ptr++;
	469	tok++ = '='; / Flip =+ to += */
	470	*tok = 0;
4b365fcd	471	}
4b365fcd KM	472	code = binary_op;
4b365fcd KM	473	unary_delim = true;
1009bf5e KM	474	break;
1009bf5e KM	475	/* can drop thru!!! */
4b365fcd	476
1009bf5e KM	477	case '>':
	478	case '<':
	479	case '!': /* ops like <, <<, <=, !=, etc */
4b365fcd KM	480	if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| *buf_ptr == '=') {
	481	tok++ = buf_ptr;
	482	if (++buf_ptr >= buf_end)
1009bf5e	483	fill_buffer();
4b365fcd	484	}
4b365fcd	485	if (*buf_ptr == '=')
1009bf5e KM	486	tok++ = buf_ptr++;
1009bf5e KM	487	code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd KM	488	unary_delim = true;
	489	break;
	490
1009bf5e	491	default:
4b365fcd	492	if (token[0] == '/' && buf_ptr == '') {
1009bf5e	493	/* it is start of comment */
4b365fcd KM	494	tok++ = '';
	495
	496	if (++buf_ptr >= buf_end)
1009bf5e	497	fill_buffer();
4b365fcd KM	498
4b365fcd KM	499	code = comment;
1009bf5e	500	unary_delim = ps.last_u_d;
4b365fcd KM	501	break;
4b365fcd KM	502	}
1009bf5e KM	503	while ((tok - 1) == buf_ptr \|\| *buf_ptr == '=') {
1009bf5e KM	504	/* handle \|\|, &&, etc, and also things as in int ****i /
4b365fcd KM	505	tok++ = buf_ptr;
4b365fcd KM	506	if (++buf_ptr >= buf_end)
1009bf5e	507	fill_buffer();
4b365fcd	508	}
1009bf5e	509	code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd KM	510	unary_delim = true;
	511
	512
1009bf5e	513	} /* end of switch */
4b365fcd KM	514	if (code != newline) {
	515	l_struct = false;
	516	last_code = code;
	517	}
1009bf5e KM	518	if (buf_ptr >= buf_end) /* check for input buffer empty */
	519	fill_buffer();
	520	ps.last_u_d = unary_delim;
	521	tok = '\0'; / null terminate the token */
4b365fcd KM	522	return (code);
4b365fcd KM	523	};
1009bf5e KM	524
	525	/* Add the given keyword to the keyword table, using val as the keyword type
	526	*/
	527	addkey (key, val)
	528	char *key;
	529	{
	530	register struct templ *p = specials;
	531	while (p->rwd)
	532	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
	533	return;
	534	else
	535	p++;
	536	if (p >= specials + sizeof specials / sizeof specials[0])
	537	return; /* For now, table overflows are silently
	538	ignored */
	539	p->rwd = key;
	540	p->rwcode = val;
	541	p[1].rwd = 0;
	542	p[1].rwcode = 0;
	543	return;
	544	}