* Copyright (c) 1980 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
static char sccsid
[] = "@(#)lexi.c 5.2 (Berkeley) %G%";
* This is the token scanner for indent
* 1) Strip off intervening blanks and/or tabs.
* 2) If it is an alphanumeric token, move it to the token buffer "token".
* Check if it is a special reserved word that indent will want to
* 3) Non-alphanumeric tokens are handled with a big switch statement. A
* flag is kept to remember if the last token was a "unary delimiter",
* which forces a following operator to be unary as opposed to binary.
* An integer code indicating the type of token scanned.
* ps.last_u_d = Set to true iff this token is a "unary delimiter"
* Start of comment is passed back so that the comment can be scanned by
* Strings and character literals are returned just like identifiers.
* initial coding November 1976 D A Willcox of CAC
* 1/7/77 D A Willcox of CAC Fix to provide proper handling
* Here we have the token scanner for indent. It scans off one token and
* puts it in the global variable "token". It returns a code, indicating
* the type of token scanned.
#include "indent_globs.h";
#include "indent_codes.h";
struct templ specials
[100] =
{ /* this is used to facilitate the decision
* of what type (alphanumeric, operator)
register char *tok
; /* local pointer to next char in token */
int unary_delim
; /* this is set to 1 if the current token
* forces a following operator to be unary */
static int last_code
; /* the last token type returned */
static int l_struct
; /* set to 1 if the last token was 'struct' */
int code
; /* internal code to be returned */
char qchar
; /* the delimiter character for a string */
tok
= token
; /* point to start of place to save token */
ps
.col_1
= ps
.last_nl
; /* tell world that this token started in
* column 1 iff the last thing scanned was
while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
ps
.col_1
= false; /* leading blanks imply token is not in
if (++buf_ptr
>= buf_end
)
/* Scan an alphanumeric token */
if (chartype
[*buf_ptr
& 0177] == alphanum
) { /* we have a character
register char *j
; /* used for searching thru list of
register struct templ
*p
;
while (chartype
[*buf_ptr
& 0177] == alphanum
) { /* copy it over */
while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
if (++buf_ptr
>= buf_end
)
ps
.its_a_keyword
= false;
ps
.sizeof_keyword
= false;
if (l_struct
) { /* if last token was 'struct', then this
* token should be treated as a
ps
.last_u_d
= false; /* Operator after indentifier is binary */
last_code
= ident
; /* Remember that this is the code we will
* This loop will check if the token is a keyword.
for (p
= specials
; (j
= p
->rwd
) != 0; p
++) {
tok
= token
; /* point at scanned token */
if (*j
++ != *tok
++ || *j
++ != *tok
++)
continue; /* This test depends on the fact that
* identifiers are always at least 1
* character long (ie. the first two bytes
* of the identifier are always
break; /* If its a one-character identifier */
goto found_keyword
; /* I wish that C had a multi-level
if (p
->rwd
) { /* we have a keyword */
case 1: /* it is a switch */
case 2: /* a case or default */
break; /* inside parens: cast */
* Next time around, we will want to know that we have
case 4: /* one of the declaration keywords */
ps
.cast_mask
|= 1 << ps
.p_l_follow
;
break; /* inside parens: cast */
case 5: /* if, while, for */
ps
.sizeof_keyword
= true;
default: /* all others are treated like any other
} /* end of if (found_it) */
if (*buf_ptr
== '(' && ps
.tos
<= 1 && ps
.ind_level
== 0
&& (buf_ptr
[1] != ')' || buf_ptr
[2] != ';')) {
strncpy(ps
.procname
, token
, sizeof ps
.procname
- 1);
ps
.in_parameter_declaration
= 1;
* The following hack attempts to guess whether or not the current
* token is in fact a declaration keyword -- one that has been
if (((*buf_ptr
== '*' && buf_ptr
[1] != '=') || isalpha(*buf_ptr
))
&& (ps
.last_token
== rparen
|| ps
.last_token
== semicolon
||
ps
.last_token
== lbrace
|| ps
.last_token
== rbrace
)) {
if (last_code
== decl
) /* if this is a declared variable, then
* following sign is unary */
ps
.last_u_d
= true; /* will make "int a -1" work */
return (ident
); /* the ident is not in the list */
} /* end of procesing for alpanum character */
/* l l Scan a non-alphanumeric token */
*tok
++ = *buf_ptr
; /* if it is only a one-character token, it
if (++buf_ptr
>= buf_end
)
unary_delim
= ps
.last_u_d
;
ps
.last_nl
= true; /* remember that we just had a newline */
code
= (had_eof
? 0 : newline
);
* if data has been exausted, the newline is a dummy, and we
* should return code to stop
case '\'': /* start of quoted character */
case '"': /* start of string */
do { /* copy the string */
while (1) { /* move one character or [/<char>]<char> */
printf("%d: Unterminated literal\n", line_no
);
if (had_eof
|| ((tok
- token
) > (bufsize
- 2))) {
printf("Unterminated literal\n");
/* get outof literal copying loop */
if (*tok
== BACKSLASH
) { /* if escape, copy extra
if (*buf_ptr
== '\n') /* check for escaped
if (*buf_ptr
== BACKSLASH
)
++tok
; /* we must increment this again because we
break; /* we copied one character */
} while (*tok
++ != qchar
);
unary_delim
= ps
.last_u_d
;
* if (ps.in_or_st) ps.block_init = 1;
code
= ps
.block_init
? lparen
: lbrace
;
code
= ps
.block_init
? rparen
: rbrace
;
case 014: /* a form feed */
unary_delim
= ps
.last_u_d
;
ps
.last_nl
= true; /* remember this so we can set 'ps.col_1'
case '+': /* check for -, +, --, ++ */
code
= (ps
.last_u_d
? unary_op
: binary_op
);
if (*buf_ptr
== token
[0]) {
/* check for doubled character */
/* buffer overflow will be checked at end of loop */
if (last_code
== ident
|| last_code
== rparen
) {
code
= (ps
.last_u_d
? unary_op
: postop
);
/* check for following ++ or -- */
else if (*buf_ptr
== '=')
/* check for operator += */
else if (*buf_ptr
== '>') {
/* check for operator -> */
/* buffer overflow will be checked at end of switch */
if (chartype
[*buf_ptr
] == opchar
) { /* we have two char
if ((tok
[-1] == '<' || tok
[-1] == '>') && tok
[-1] == *buf_ptr
)
*tok
++ = '='; /* Flip =+ to += */
case '!': /* ops like <, <<, <=, !=, etc */
if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
if (++buf_ptr
>= buf_end
)
code
= (ps
.last_u_d
? unary_op
: binary_op
);
if (token
[0] == '/' && *buf_ptr
== '*') {
/* it is start of comment */
if (++buf_ptr
>= buf_end
)
unary_delim
= ps
.last_u_d
;
while (*(tok
- 1) == *buf_ptr
|| *buf_ptr
== '=') {
/* handle ||, &&, etc, and also things as in int *****i */
if (++buf_ptr
>= buf_end
)
code
= (ps
.last_u_d
? unary_op
: binary_op
);
if (buf_ptr
>= buf_end
) /* check for input buffer empty */
ps
.last_u_d
= unary_delim
;
*tok
= '\0'; /* null terminate the token */
/* Add the given keyword to the keyword table, using val as the keyword type
register struct templ
*p
= specials
;
if (p
->rwd
[0] == key
[0] && strcmp(p
->rwd
, key
) == 0)
if (p
>= specials
+ sizeof specials
/ sizeof specials
[0])
return; /* For now, table overflows are silently