[unix-history] / usr / src / usr.bin / indent / lexi.c

/*
 * Copyright (c) 1980 Regents of the University of California.
 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that this notice is preserved and that due credit is given
 * to the University of California at Berkeley and the University of
 * Illinois at Urbana.  The name of either University may not be used
 * to endorse or promote products derived from this software without
 * specific prior written permission. This software is provided
 * ``as is'' without express or implied warranty.
 */

#ifndef lint
static char sccsid[] = "@(#)lexi.c	5.7 (Berkeley) %G%";
#endif /* not lint */

/*
 * NAME:
 *	lexi
 *
 * FUNCTION:
 *	This is the token scanner for indent
 *
 * ALGORITHM:
 *	1) Strip off intervening blanks and/or tabs.
 *	2) If it is an alphanumeric token, move it to the token buffer "token".
 *	   Check if it is a special reserved word that indent will want to
 *	   know about.
 *	3) Non-alphanumeric tokens are handled with a big switch statement.  A
 *	   flag is kept to remember if the last token was a "unary delimiter",
 *	   which forces a following operator to be unary as opposed to binary.
 *
 * PARAMETERS:
 *	None
 *
 * RETURNS:
 *	An integer code indicating the type of token scanned.
 *
 * GLOBALS:
 *	buf_ptr =
 *	had_eof
 *	ps.last_u_d =	Set to true iff this token is a "unary delimiter"
 *
 * CALLS:
 *	fill_buffer
 *	printf (lib)
 *
 * CALLED BY:
 *	main
 *
 * NOTES:
 *	Start of comment is passed back so that the comment can be scanned by
 *	pr_comment.
 *
 *	Strings and character literals are returned just like identifiers.
 *
 * HISTORY:
 *	initial coding 	November 1976	D A Willcox of CAC
 *	1/7/77		D A Willcox of CAC	Fix to provide proper handling
 *						of "int a -1;"
 *
 */\f

/*
 * Here we have the token scanner for indent.  It scans off one token and
 * puts it in the global variable "token".  It returns a code, indicating
 * the type of token scanned. 
 */

#include "indent_globs.h"
#include "indent_codes.h"
#include "ctype.h"

#define alphanum 1
#define opchar 3

struct templ {
    char       *rwd;
    int         rwcode;
};

struct templ specials[100] =
{
    "switch", 1,
    "case", 2,
    "break", 0,
    "struct", 3,
    "union", 3,
    "enum", 3,
    "default", 2,
    "int", 4,
    "char", 4,
    "float", 4,
    "double", 4,
    "long", 4,
    "short", 4,
    "typdef", 4,
    "unsigned", 4,
    "register", 4,
    "static", 4,
    "global", 4,
    "extern", 4,
    "void", 4,
    "goto", 0,
    "return", 0,
    "if", 5,
    "while", 5,
    "for", 5,
    "else", 6,
    "do", 6,
    "sizeof", 7,
    0, 0
};

char        chartype[128] =
{				/* this is used to facilitate the decision
				 * of what type (alphanumeric, operator)
				 * each character is */
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 3, 0, 0, 1, 3, 3, 0,
    0, 0, 3, 3, 0, 3, 3, 3,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 0, 0, 3, 3, 3, 3,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 0, 0, 3, 1,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 3, 0, 3, 0
};


int 
lexi()
{
    register char *tok;		/* local pointer to next char in token */
    int         unary_delim;	/* this is set to 1 if the current token 
				 *
				 * forces a following operator to be unary */
    static int  last_code;	/* the last token type returned */
    static int  l_struct;	/* set to 1 if the last token was 'struct' */
    int         code;		/* internal code to be returned */
    char        qchar;		/* the delimiter character for a string */

    tok = token;		/* point to start of place to save token */
    unary_delim = false;
    ps.col_1 = ps.last_nl;	/* tell world that this token started in
				 * column 1 iff the last thing scanned was
				 * nl */
    ps.last_nl = false;

    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	ps.col_1 = false;	/* leading blanks imply token is not in
				 * column 1 */
	if (++buf_ptr >= buf_end)
	    fill_buffer();
    }

    /* Scan an alphanumeric token.  Note that we must also handle
     * stuff like "1.0e+03" and "7e-6". */
    if (chartype[*buf_ptr & 0177] == alphanum) {	/* we have a character
							 * or number */
	register char *j;	/* used for searching thru list of 
				 * reserved words */
	register struct templ *p;
	register int c;

	do {			/* copy it over */
	    *tok++ = *buf_ptr++;
	    if (buf_ptr >= buf_end)
		fill_buffer();
	} while (chartype[c = *buf_ptr & 0177] == alphanum ||
		isdigit(token[0]) && (c == '+' || c == '-') &&
		(tok[-1] == 'e' || tok[-1] == 'E'));
	*tok++ = '\0';
	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	ps.its_a_keyword = false;
	ps.sizeof_keyword = false;
	if (l_struct) {		/* if last token was 'struct', then this
				 * token should be treated as a
				 * declaration */
	    l_struct = false;
	    last_code = ident;
	    ps.last_u_d = true;
	    return (decl);
	}
	ps.last_u_d = false;	/* Operator after indentifier is binary */
	last_code = ident;	/* Remember that this is the code we will
				 * return */

	/*
	 * This loop will check if the token is a keyword. 
	 */
	for (p = specials; (j = p->rwd) != 0; p++) {
	    tok = token;	/* point at scanned token */
	    if (*j++ != *tok++ || *j++ != *tok++)
		continue;	/* This test depends on the fact that
				 * identifiers are always at least 1
				 * character long (ie. the first two bytes
				 * of the identifier are always
				 * meaningful) */
	    if (tok[-1] == 0)
		break;		/* If its a one-character identifier */
	    while (*tok++ == *j)
		if (*j++ == 0)
		    goto found_keyword;	/* I wish that C had a multi-level
					 * break... */
	}
	if (p->rwd) {		/* we have a keyword */
    found_keyword:
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    switch (p->rwcode) {
		case 1:	/* it is a switch */
		    return (swstmt);
		case 2:	/* a case or default */
		    return (casestmt);

		case 3:	/* a "struct" */
		    if (ps.p_l_follow)
			break;	/* inside parens: cast */
		    l_struct = true;

		    /*
		     * Next time around, we will want to know that we have
		     * had a 'struct' 
		     */
		case 4:	/* one of the declaration keywords */
		    if (ps.p_l_follow) {
			ps.cast_mask |= 1 << ps.p_l_follow;
			break;	/* inside parens: cast */
		    }
		    last_code = decl;
		    return (decl);

		case 5:	/* if, while, for */
		    return (sp_paren);

		case 6:	/* do, else */
		    return (sp_nparen);

		case 7:
		    ps.sizeof_keyword = true;
		default:	/* all others are treated like any other
				 * identifier */
		    return (ident);
	    }			/* end of switch */
	}			/* end of if (found_it) */
	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
	    && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
	    strncpy(ps.procname, token, sizeof ps.procname - 1);
	    ps.in_parameter_declaration = 1;
	}

	/*
	 * The following hack attempts to guess whether or not the current
	 * token is in fact a declaration keyword -- one that has been
	 * typedefd 
	 */
	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
	    && !ps.p_l_follow
	    && (ps.last_token == rparen || ps.last_token == semicolon ||
		ps.last_token == decl ||
		ps.last_token == lbrace || ps.last_token == rbrace)) {
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    last_code = decl;
	    return decl;
	}
	if (last_code == decl)	/* if this is a declared variable, then
				 * following sign is unary */
	    ps.last_u_d = true;	/* will make "int a -1" work */
	last_code = ident;
	return (ident);		/* the ident is not in the list */
    }				/* end of procesing for alpanum character */
    /* Scan a non-alphanumeric token */

    *tok++ = *buf_ptr;		/* if it is only a one-character token, it
				 * is moved here */
    *tok = '\0';
    if (++buf_ptr >= buf_end)
	fill_buffer();

    switch (*token) {
	case '\n':
	    unary_delim = ps.last_u_d;
	    ps.last_nl = true;	/* remember that we just had a newline */
	    code = (had_eof ? 0 : newline);

	    /*
	     * if data has been exausted, the newline is a dummy, and we
	     * should return code to stop 
	     */
	    break;

	case '\'':		/* start of quoted character */
	case '"':		/* start of string */
	    qchar = *token;
	    if (troff) {
		tok[-1] = '`';
		if (qchar == '"')
		    *tok++ = '`';
		*tok++ = BACKSLASH;
		*tok++ = 'f';
		*tok++ = 'L';
	    }
	    do {		/* copy the string */
		while (1) {	/* move one character or [/<char>]<char> */
		    if (*buf_ptr == '\n') {
			printf("%d: Unterminated literal\n", line_no);
			goto stop_lit;
		    }
		    *tok = *buf_ptr++;
		    if (buf_ptr >= buf_end)
			fill_buffer();
		    if (had_eof || ((tok - token) > (bufsize - 2))) {
			printf("Unterminated literal\n");
			++tok;
			goto stop_lit;
			/* get outof literal copying loop */
		    }
		    if (*tok == BACKSLASH) {	/* if escape, copy extra
						 * char */
			if (*buf_ptr == '\n')	/* check for escaped
						 * newline */
			    ++line_no;
			if (troff) {
			    *++tok = BACKSLASH;
			    if (*buf_ptr == BACKSLASH)
				*++tok = BACKSLASH;
			}
			*++tok = *buf_ptr++;
			++tok;	/* we must increment this again because we
				 * copied two chars */
			if (buf_ptr >= buf_end)
			    fill_buffer();
		    }
		    else
			break;	/* we copied one character */
		}		/* end of while (1) */
	    } while (*tok++ != qchar);
	    if (troff) {
		tok[-1] = BACKSLASH;
		*tok++ = 'f';
		*tok++ = 'R';
		*tok++ = '\'';
		if (qchar == '"')
		    *tok++ = '\'';
	    }
    stop_lit:
	    code = ident;
	    break;

	case ('('):
	case ('['):
	    unary_delim = true;
	    code = lparen;
	    break;

	case (')'):
	case (']'):
	    code = rparen;
	    break;

	case '#':
	    unary_delim = ps.last_u_d;
	    code = preesc;
	    break;

	case '?':
	    unary_delim = true;
	    code = question;
	    break;

	case (':'):
	    code = colon;
	    unary_delim = true;
	    break;

	case (';'):
	    unary_delim = true;
	    code = semicolon;
	    break;

	case ('{'):
	    unary_delim = true;

	    /*
	     * if (ps.in_or_st) ps.block_init = 1; 
	     */
	    code = ps.block_init ? lparen : lbrace;
	    break;

	case ('}'):
	    unary_delim = true;
	    code = ps.block_init ? rparen : rbrace;
	    break;

	case 014:		/* a form feed */
	    unary_delim = ps.last_u_d;
	    ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
				 * right */
	    code = form_feed;
	    break;

	case (','):
	    unary_delim = true;
	    code = comma;
	    break;

	case '.':
	    unary_delim = false;
	    code = period;
	    break;

	case '-':
	case '+':		/* check for -, +, --, ++ */
	    code = (ps.last_u_d ? unary_op : binary_op);
	    unary_delim = true;

	    if (*buf_ptr == token[0]) {
		/* check for doubled character */
		*tok++ = *buf_ptr++;
		/* buffer overflow will be checked at end of loop */
		if (last_code == ident || last_code == rparen) {
		    code = (ps.last_u_d ? unary_op : postop);
		    /* check for following ++ or -- */
		    unary_delim = false;
		}
	    }
	    else if (*buf_ptr == '=')
		/* check for operator += */
		*tok++ = *buf_ptr++;
	    else if (token[0] == '-' && *buf_ptr == '>') {
		/* check for operator -> */
		*tok++ = *buf_ptr++;
		if (!pointer_as_binop) {
		    code = unary_op;
		    unary_delim = false;
		    ps.want_blank = false;
		}
	    }
	    /* buffer overflow will be checked at end of switch */

	    break;

	case '=':
	    if (ps.in_or_st)
		ps.block_init = 1;
	    if (chartype[*buf_ptr] == opchar) {	/* we have two char
						 * assignment */
		tok[-1] = *buf_ptr++;
		if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
		    *tok++ = *buf_ptr++;
		*tok++ = '=';	/* Flip =+ to += */
		*tok = 0;
	    }
	    code = binary_op;
	    unary_delim = true;
	    break;
	    /* can drop thru!!! */

	case '>':
	case '<':
	case '!':		/* ops like <, <<, <=, !=, etc */
	    if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
		*tok++ = *buf_ptr;
		if (++buf_ptr >= buf_end)
		    fill_buffer();
	    }
	    if (*buf_ptr == '=')
		*tok++ = *buf_ptr++;
	    code = (ps.last_u_d ? unary_op : binary_op);
	    unary_delim = true;
	    break;

	default:
	    if (token[0] == '/' && *buf_ptr == '*') {
		/* it is start of comment */
		*tok++ = '*';

		if (++buf_ptr >= buf_end)
		    fill_buffer();

		code = comment;
		unary_delim = ps.last_u_d;
		break;
	    }
	    while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
		/* handle ||, &&, etc, and also things as in int *****i */
		*tok++ = *buf_ptr;
		if (++buf_ptr >= buf_end)
		    fill_buffer();
	    }
	    code = (ps.last_u_d ? unary_op : binary_op);
	    unary_delim = true;


    }				/* end of switch */
    if (code != newline) {
	l_struct = false;
	last_code = code;
    }
    if (buf_ptr >= buf_end)	/* check for input buffer empty */
	fill_buffer();
    ps.last_u_d = unary_delim;
    *tok = '\0';		/* null terminate the token */
    return (code);
};

/* Add the given keyword to the keyword table, using val as the keyword type
   */
addkey (key, val)
char       *key;
{
    register struct templ *p = specials;
    while (p->rwd)
	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
	    return;
	else
	    p++;
    if (p >= specials + sizeof specials / sizeof specials[0])
	return;			/* For now, table overflows are silently
				   ignored */
    p->rwd = key;
    p->rwcode = val;
    p[1].rwd = 0;
    p[1].rwcode = 0;
    return;
}
Commit	Line	Data
c0bc4ef7 DF	1	/*
c0bc4ef7 DF	2	* Copyright (c) 1980 Regents of the University of California.
b0627149 KB	3	* Copyright (c) 1976 Board of Trustees of the University of Illinois.
	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms are permitted
	7	* provided that this notice is preserved and that due credit is given
	8	* to the University of California at Berkeley and the University of
	9	* Illinois at Urbana. The name of either University may not be used
	10	* to endorse or promote products derived from this software without
	11	* specific prior written permission. This software is provided
	12	* ``as is'' without express or implied warranty.
c0bc4ef7 DF	13	*/
	14
	15	#ifndef lint
720fc992	16	static char sccsid[] = "@(#)lexi.c 5.7 (Berkeley) %G%";
b0627149	17	#endif /* not lint */
4b365fcd	18
b0627149	19	/*
1009bf5e KM	20	* NAME:
	21	* lexi
	22	*
	23	* FUNCTION:
	24	* This is the token scanner for indent
	25	*
	26	* ALGORITHM:
	27	* 1) Strip off intervening blanks and/or tabs.
	28	* 2) If it is an alphanumeric token, move it to the token buffer "token".
	29	* Check if it is a special reserved word that indent will want to
	30	* know about.
	31	* 3) Non-alphanumeric tokens are handled with a big switch statement. A
	32	* flag is kept to remember if the last token was a "unary delimiter",
	33	* which forces a following operator to be unary as opposed to binary.
	34	*
	35	* PARAMETERS:
	36	* None
	37	*
	38	* RETURNS:
	39	* An integer code indicating the type of token scanned.
	40	*
	41	* GLOBALS:
	42	* buf_ptr =
	43	* had_eof
	44	* ps.last_u_d = Set to true iff this token is a "unary delimiter"
	45	*
	46	* CALLS:
	47	* fill_buffer
	48	* printf (lib)
	49	*
	50	* CALLED BY:
	51	* main
	52	*
	53	* NOTES:
	54	* Start of comment is passed back so that the comment can be scanned by
	55	* pr_comment.
	56	*
	57	* Strings and character literals are returned just like identifiers.
	58	*
	59	* HISTORY:
	60	* initial coding November 1976 D A Willcox of CAC
	61	* 1/7/77 D A Willcox of CAC Fix to provide proper handling
	62	* of "int a -1;"
	63	*
	64	*/\f
4b365fcd	65
1009bf5e KM	66	/*
	67	* Here we have the token scanner for indent. It scans off one token and
	68	* puts it in the global variable "token". It returns a code, indicating
	69	* the type of token scanned.
	70	*/
4b365fcd	71
1d7a34f4 KB	72	#include "indent_globs.h"
1d7a34f4 KB	73	#include "indent_codes.h"
1009bf5e	74	#include "ctype.h"
4b365fcd KM	75
	76	#define alphanum 1
	77	#define opchar 3
	78
	79	struct templ {
1009bf5e KM	80	char *rwd;
1009bf5e KM	81	int rwcode;
4b365fcd KM	82	};
4b365fcd KM	83
1009bf5e	84	struct templ specials[100] =
4b365fcd KM	85	{
	86	"switch", 1,
	87	"case", 2,
1009bf5e	88	"break", 0,
4b365fcd	89	"struct", 3,
1009bf5e KM	90	"union", 3,
1009bf5e KM	91	"enum", 3,
4b365fcd KM	92	"default", 2,
	93	"int", 4,
	94	"char", 4,
	95	"float", 4,
	96	"double", 4,
	97	"long", 4,
	98	"short", 4,
	99	"typdef", 4,
	100	"unsigned", 4,
	101	"register", 4,
	102	"static", 4,
	103	"global", 4,
	104	"extern", 4,
1009bf5e KM	105	"void", 4,
	106	"goto", 0,
	107	"return", 0,
4b365fcd KM	108	"if", 5,
	109	"while", 5,
	110	"for", 5,
	111	"else", 6,
	112	"do", 6,
1009bf5e	113	"sizeof", 7,
4b365fcd KM	114	0, 0
	115	};
	116
1009bf5e KM	117	char chartype[128] =
	118	{ /* this is used to facilitate the decision
	119	* of what type (alphanumeric, operator)
	120	* each character is */
4b365fcd KM	121	0, 0, 0, 0, 0, 0, 0, 0,
	122	0, 0, 0, 0, 0, 0, 0, 0,
	123	0, 0, 0, 0, 0, 0, 0, 0,
	124	0, 0, 0, 0, 0, 0, 0, 0,
720fc992	125	0, 3, 0, 0, 1, 3, 3, 0,
4b365fcd KM	126	0, 0, 3, 3, 0, 3, 3, 3,
	127	1, 1, 1, 1, 1, 1, 1, 1,
	128	1, 1, 0, 0, 3, 3, 3, 3,
	129	0, 1, 1, 1, 1, 1, 1, 1,
	130	1, 1, 1, 1, 1, 1, 1, 1,
	131	1, 1, 1, 1, 1, 1, 1, 1,
	132	1, 1, 1, 0, 0, 0, 3, 1,
	133	0, 1, 1, 1, 1, 1, 1, 1,
	134	1, 1, 1, 1, 1, 1, 1, 1,
	135	1, 1, 1, 1, 1, 1, 1, 1,
	136	1, 1, 1, 0, 3, 0, 3, 0
	137	};
	138
1009bf5e KM	139
	140
	141
	142	int
	143	lexi()
	144	{
	145	register char tok; / local pointer to next char in token */
	146	int unary_delim; /* this is set to 1 if the current token
	147	*
	148	* forces a following operator to be unary */
	149	static int last_code; /* the last token type returned */
	150	static int l_struct; /* set to 1 if the last token was 'struct' */
	151	int code; /* internal code to be returned */
	152	char qchar; /* the delimiter character for a string */
	153
	154	tok = token; /* point to start of place to save token */
4b365fcd	155	unary_delim = false;
1009bf5e KM	156	ps.col_1 = ps.last_nl; /* tell world that this token started in
	157	* column 1 iff the last thing scanned was
	158	* nl */
	159	ps.last_nl = false;
	160
	161	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
	162	ps.col_1 = false; /* leading blanks imply token is not in
	163	* column 1 */
4b365fcd	164	if (++buf_ptr >= buf_end)
1009bf5e	165	fill_buffer();
4b365fcd KM	166	}
4b365fcd KM	167
c93d6f87 KM	168	/* Scan an alphanumeric token. Note that we must also handle
c93d6f87 KM	169	* stuff like "1.0e+03" and "7e-6". */
1009bf5e KM	170	if (chartype[buf_ptr & 0177] == alphanum) { / we have a character
	171	* or number */
	172	register char j; / used for searching thru list of
1009bf5e KM	173	* reserved words */
1009bf5e KM	174	register struct templ *p;
c93d6f87	175	register int c;
4b365fcd	176
c93d6f87	177	do { /* copy it over */
4b365fcd KM	178	tok++ = buf_ptr++;
4b365fcd KM	179	if (buf_ptr >= buf_end)
1009bf5e	180	fill_buffer();
c93d6f87 KM	181	} while (chartype[c = *buf_ptr & 0177] == alphanum \|\|
	182	isdigit(token[0]) && (c == '+' \|\| c == '-') &&
	183	(tok[-1] == 'e' \|\| tok[-1] == 'E'));
4b365fcd	184	*tok++ = '\0';
1009bf5e KM	185	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
	186	if (++buf_ptr >= buf_end)
	187	fill_buffer();
	188	}
	189	ps.its_a_keyword = false;
	190	ps.sizeof_keyword = false;
	191	if (l_struct) { /* if last token was 'struct', then this
	192	* token should be treated as a
	193	* declaration */
4b365fcd KM	194	l_struct = false;
4b365fcd KM	195	last_code = ident;
1009bf5e	196	ps.last_u_d = true;
4b365fcd KM	197	return (decl);
4b365fcd KM	198	}
1009bf5e KM	199	ps.last_u_d = false; /* Operator after indentifier is binary */
	200	last_code = ident; /* Remember that this is the code we will
	201	* return */
	202
	203	/*
	204	* This loop will check if the token is a keyword.
	205	*/
	206	for (p = specials; (j = p->rwd) != 0; p++) {
	207	tok = token; /* point at scanned token */
	208	if (j++ != tok++ \|\| j++ != tok++)
	209	continue; /* This test depends on the fact that
	210	* identifiers are always at least 1
	211	* character long (ie. the first two bytes
	212	* of the identifier are always
	213	* meaningful) */
	214	if (tok[-1] == 0)
	215	break; /* If its a one-character identifier */
	216	while (tok++ == j)
	217	if (*j++ == 0)
	218	goto found_keyword; /* I wish that C had a multi-level
	219	* break... */
	220	}
	221	if (p->rwd) { /* we have a keyword */
	222	found_keyword:
	223	ps.its_a_keyword = true;
	224	ps.last_u_d = true;
	225	switch (p->rwcode) {
	226	case 1: /* it is a switch */
	227	return (swstmt);
	228	case 2: /* a case or default */
	229	return (casestmt);
	230
	231	case 3: /* a "struct" */
	232	if (ps.p_l_follow)
	233	break; /* inside parens: cast */
	234	l_struct = true;
	235
	236	/*
	237	* Next time around, we will want to know that we have
	238	* had a 'struct'
	239	*/
	240	case 4: /* one of the declaration keywords */
	241	if (ps.p_l_follow) {
	242	ps.cast_mask \|= 1 << ps.p_l_follow;
	243	break; /* inside parens: cast */
	244	}
	245	last_code = decl;
	246	return (decl);
	247
	248	case 5: /* if, while, for */
	249	return (sp_paren);
	250
	251	case 6: /* do, else */
	252	return (sp_nparen);
	253
	254	case 7:
	255	ps.sizeof_keyword = true;
	256	default: /* all others are treated like any other
	257	* identifier */
	258	return (ident);
	259	} /* end of switch */
	260	} /* end of if (found_it) */
	261	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
	262	&& (buf_ptr[1] != ')' \|\| buf_ptr[2] != ';')) {
263	strncpy(ps.procname, token, sizeof ps.procname - 1);
264	ps.in_parameter_declaration = 1;
4b365fcd KM	265	}
4b365fcd KM	266
1009bf5e KM	267	/*
	268	* The following hack attempts to guess whether or not the current
	269	* token is in fact a declaration keyword -- one that has been
	270	* typedefd
	271	*/
	272	if (((buf_ptr == '' && buf_ptr[1] != '=') \|\| isalpha(*buf_ptr))
	273	&& !ps.p_l_follow
	274	&& (ps.last_token == rparen \|\| ps.last_token == semicolon \|\|
	275	ps.last_token == decl \|\|
	276	ps.last_token == lbrace \|\| ps.last_token == rbrace)) {
	277	ps.its_a_keyword = true;
	278	ps.last_u_d = true;
	279	last_code = decl;
	280	return decl;
	281	}
	282	if (last_code == decl) /* if this is a declared variable, then
	283	* following sign is unary */
	284	ps.last_u_d = true; /* will make "int a -1" work */
4b365fcd	285	last_code = ident;
1009bf5e KM	286	return (ident); /* the ident is not in the list */
1009bf5e KM	287	} /* end of procesing for alpanum character */
c93d6f87	288	/* Scan a non-alphanumeric token */
4b365fcd	289
1009bf5e KM	290	tok++ = buf_ptr; /* if it is only a one-character token, it
1009bf5e KM	291	* is moved here */
4b365fcd KM	292	*tok = '\0';
4b365fcd KM	293	if (++buf_ptr >= buf_end)
1009bf5e	294	fill_buffer();
4b365fcd KM	295
4b365fcd KM	296	switch (*token) {
1009bf5e KM	297	case '\n':
	298	unary_delim = ps.last_u_d;
	299	ps.last_nl = true; /* remember that we just had a newline */
4b365fcd	300	code = (had_eof ? 0 : newline);
4b365fcd	301
1009bf5e KM	302	/*
	303	* if data has been exausted, the newline is a dummy, and we
	304	* should return code to stop
	305	*/
	306	break;
4b365fcd	307
1009bf5e KM	308	case '\'': /* start of quoted character */
	309	case '"': /* start of string */
	310	qchar = *token;
	311	if (troff) {
	312	tok[-1] = '`';
	313	if (qchar == '"')
	314	*tok++ = '`';
	315	*tok++ = BACKSLASH;
	316	*tok++ = 'f';
	317	*tok++ = 'L';
	318	}
	319	do { /* copy the string */
	320	while (1) { /* move one character or [/<char>]<char> */
4b365fcd	321	if (*buf_ptr == '\n') {
1009bf5e	322	printf("%d: Unterminated literal\n", line_no);
4b365fcd	323	goto stop_lit;
4b365fcd	324	}
4b365fcd KM	325	tok = buf_ptr++;
4b365fcd KM	326	if (buf_ptr >= buf_end)
1009bf5e	327	fill_buffer();
4b365fcd	328	if (had_eof \|\| ((tok - token) > (bufsize - 2))) {
1009bf5e	329	printf("Unterminated literal\n");
4b365fcd KM	330	++tok;
4b365fcd KM	331	goto stop_lit;
1009bf5e	332	/* get outof literal copying loop */
4b365fcd	333	}
1009bf5e KM	334	if (tok == BACKSLASH) { / if escape, copy extra
	335	* char */
	336	if (buf_ptr == '\n') / check for escaped
	337	* newline */
4b365fcd	338	++line_no;
1009bf5e KM	339	if (troff) {
	340	*++tok = BACKSLASH;
	341	if (*buf_ptr == BACKSLASH)
	342	*++tok = BACKSLASH;
	343	}
	344	++tok = buf_ptr++;
	345	++tok; /* we must increment this again because we
	346	* copied two chars */
4b365fcd	347	if (buf_ptr >= buf_end)
1009bf5e	348	fill_buffer();
4b365fcd KM	349	}
4b365fcd KM	350	else
1009bf5e KM	351	break; /* we copied one character */
1009bf5e KM	352	} /* end of while (1) */
4b365fcd	353	} while (*tok++ != qchar);
1009bf5e KM	354	if (troff) {
	355	tok[-1] = BACKSLASH;
	356	*tok++ = 'f';
	357	*tok++ = 'R';
	358	*tok++ = '\'';
	359	if (qchar == '"')
	360	*tok++ = '\'';
	361	}
	362	stop_lit:
4b365fcd KM	363	code = ident;
	364	break;
	365
1009bf5e KM	366	case ('('):
1009bf5e KM	367	case ('['):
4b365fcd KM	368	unary_delim = true;
	369	code = lparen;
	370	break;
	371
1009bf5e KM	372	case (')'):
1009bf5e KM	373	case (']'):
4b365fcd KM	374	code = rparen;
	375	break;
	376
1009bf5e KM	377	case '#':
1009bf5e KM	378	unary_delim = ps.last_u_d;
4b365fcd KM	379	code = preesc;
	380	break;
	381
1009bf5e	382	case '?':
4b365fcd KM	383	unary_delim = true;
	384	code = question;
	385	break;
	386
1009bf5e	387	case (':'):
4b365fcd KM	388	code = colon;
	389	unary_delim = true;
	390	break;
	391
1009bf5e	392	case (';'):
4b365fcd KM	393	unary_delim = true;
	394	code = semicolon;
	395	break;
	396
1009bf5e	397	case ('{'):
4b365fcd	398	unary_delim = true;
1009bf5e KM	399
	400	/*
	401	* if (ps.in_or_st) ps.block_init = 1;
	402	*/
	403	code = ps.block_init ? lparen : lbrace;
4b365fcd KM	404	break;
4b365fcd KM	405
1009bf5e	406	case ('}'):
4b365fcd	407	unary_delim = true;
1009bf5e	408	code = ps.block_init ? rparen : rbrace;
4b365fcd KM	409	break;
4b365fcd KM	410
1009bf5e KM	411	case 014: /* a form feed */
	412	unary_delim = ps.last_u_d;
	413	ps.last_nl = true; /* remember this so we can set 'ps.col_1'
	414	* right */
4b365fcd KM	415	code = form_feed;
	416	break;
	417
1009bf5e	418	case (','):
4b365fcd KM	419	unary_delim = true;
	420	code = comma;
	421	break;
	422
1009bf5e	423	case '.':
4b365fcd KM	424	unary_delim = false;
	425	code = period;
	426	break;
	427
1009bf5e KM	428	case '-':
	429	case '+': /* check for -, +, --, ++ */
	430	code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd KM	431	unary_delim = true;
	432
	433	if (*buf_ptr == token[0]) {
1009bf5e	434	/* check for doubled character */
4b365fcd	435	tok++ = buf_ptr++;
1009bf5e	436	/* buffer overflow will be checked at end of loop */
4b365fcd	437	if (last_code == ident \|\| last_code == rparen) {
1009bf5e KM	438	code = (ps.last_u_d ? unary_op : postop);
1009bf5e KM	439	/* check for following ++ or -- */
4b365fcd KM	440	unary_delim = false;
	441	}
	442	}
1009bf5e KM	443	else if (*buf_ptr == '=')
	444	/* check for operator += */
	445	tok++ = buf_ptr++;
5c6e73ac	446	else if (token[0] == '-' && *buf_ptr == '>') {
1009bf5e KM	447	/* check for operator -> */
1009bf5e KM	448	tok++ = buf_ptr++;
5c6e73ac KM	449	if (!pointer_as_binop) {
	450	code = unary_op;
	451	unary_delim = false;
	452	ps.want_blank = false;
	453	}
1009bf5e KM	454	}
1009bf5e KM	455	/* buffer overflow will be checked at end of switch */
4b365fcd KM	456
	457	break;
	458
1009bf5e KM	459	case '=':
	460	if (ps.in_or_st)
	461	ps.block_init = 1;
	462	if (chartype[buf_ptr] == opchar) { / we have two char
	463	* assignment */
	464	tok[-1] = *buf_ptr++;
	465	if ((tok[-1] == '<' \|\| tok[-1] == '>') && tok[-1] == *buf_ptr)
	466	tok++ = buf_ptr++;
	467	tok++ = '='; / Flip =+ to += */
	468	*tok = 0;
4b365fcd	469	}
4b365fcd KM	470	code = binary_op;
4b365fcd KM	471	unary_delim = true;
1009bf5e KM	472	break;
1009bf5e KM	473	/* can drop thru!!! */
4b365fcd	474
1009bf5e KM	475	case '>':
	476	case '<':
	477	case '!': /* ops like <, <<, <=, !=, etc */
4b365fcd KM	478	if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| *buf_ptr == '=') {
	479	tok++ = buf_ptr;
	480	if (++buf_ptr >= buf_end)
1009bf5e	481	fill_buffer();
4b365fcd	482	}
4b365fcd	483	if (*buf_ptr == '=')
1009bf5e KM	484	tok++ = buf_ptr++;
1009bf5e KM	485	code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd KM	486	unary_delim = true;
	487	break;
	488
1009bf5e	489	default:
4b365fcd	490	if (token[0] == '/' && buf_ptr == '') {
1009bf5e	491	/* it is start of comment */
4b365fcd KM	492	tok++ = '';
	493
	494	if (++buf_ptr >= buf_end)
1009bf5e	495	fill_buffer();
4b365fcd KM	496
4b365fcd KM	497	code = comment;
1009bf5e	498	unary_delim = ps.last_u_d;
4b365fcd KM	499	break;
4b365fcd KM	500	}
1009bf5e KM	501	while ((tok - 1) == buf_ptr \|\| *buf_ptr == '=') {
1009bf5e KM	502	/* handle \|\|, &&, etc, and also things as in int ****i /
4b365fcd KM	503	tok++ = buf_ptr;
4b365fcd KM	504	if (++buf_ptr >= buf_end)
1009bf5e	505	fill_buffer();
4b365fcd	506	}
1009bf5e	507	code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd KM	508	unary_delim = true;
	509
	510
1009bf5e	511	} /* end of switch */
4b365fcd KM	512	if (code != newline) {
	513	l_struct = false;
	514	last_code = code;
	515	}
1009bf5e KM	516	if (buf_ptr >= buf_end) /* check for input buffer empty */
	517	fill_buffer();
	518	ps.last_u_d = unary_delim;
	519	tok = '\0'; / null terminate the token */
4b365fcd KM	520	return (code);
4b365fcd KM	521	};
1009bf5e KM	522
	523	/* Add the given keyword to the keyword table, using val as the keyword type
	524	*/
	525	addkey (key, val)
	526	char *key;
	527	{
	528	register struct templ *p = specials;
	529	while (p->rwd)
	530	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
	531	return;
	532	else
	533	p++;
	534	if (p >= specials + sizeof specials / sizeof specials[0])
	535	return; /* For now, table overflows are silently
	536	ignored */
	537	p->rwd = key;
	538	p->rwcode = val;
	539	p[1].rwd = 0;
	540	p[1].rwcode = 0;
	541	return;
	542	}