static char sccsid
[] = "@(#)lexi.c 4.1 (Berkeley) %G%";
This is the token scanner for indent
1) Strip off intervening blanks and/or tabs.
2) If it is an alphanumeric token, move it to the token buffer "token".
Check if it is a special reserved word that indent will want to
3) Non-alphanumeric tokens are handled with a big switch statement. A
flag is kept to remember if the last token was a "unary delimiter",
which forces a following operator to be unary as opposed to binary.
An integer code indicating the type of token scanned.
last_u_d = Set to true iff this token is a "unary delimiter"
Start of comment is passed back so that the comment can be scanned by
Strings and character literals are returned just like identifiers.
initial coding November 1976 D A Willcox of CAC
1/7/77 D A Willcox of CAC Fix to provide proper handling
/* Here we have the token scanner for indent. It scans off one token and
puts it in the global variable "token". It returns a code, indicating the
type of token scanned. */
#include "indent_globs.h";
#include "indent_codes.h";
struct templ specials
[] =
{ /* this is used to facilitate the decision of what type
(alphanumeric, operator) each character is */
/* this is true if the last thing scanned was a newline */
/* local pointer to next char in token */
/* used for searching thru list of reserved words */
/* this is set to 1 if the current token forces a following operator to be
/* the last token type returned */
/* set to 1 if the last token was 'struct' */
int code
; /* internal code to be returned */
char qchar
; /* the delimiter character for a string */
tok
= token
; /* point to start of place to save token */
col_1
= last_nl
; /* tell world that this token started in column
1 iff the last thing scanned was nl */
while (*buf_ptr
== ' ' || *buf_ptr
== '\t') {
col_1
= false; /* leading blanks imply token is not in column 1
if (++buf_ptr
>= buf_end
)
/*----------------------------------------------------------*\
| Scan an alphanumeric token
\*----------------------------------------------------------*/
if (chartype
[*buf_ptr
& 0177] == alphanum
) {
/* we have a character or number */
while (chartype
[*buf_ptr
& 0177] == alphanum
) {
if (l_struct
) { /* if last token was 'struct', then this token
should be treated as a declaration */
last_u_d
= false; /* operator after indentifier is binary */
for (i
= 0; specials
[i
].rwd
!= 0; ++i
) {
/* this loop will check if the token is a keyword. if so, a following
last_code
= ident
; /* remember that this is the code we will return
/* point at ith reserved word */
tok
= token
; /* point at scanned toekn */
found_it
= true; /* set to false if not found */
if (found_it
) { /* we have a keyword */
switch (specials
[i
].rwcode
) {
case 1: /* it is a switch */
case 2: /* a case or default */
/* Next time around, we will want to know that we have had
case 4: /* one of the declaration keywords */
if(p_l_follow
) break; /* inside parens: cast */
case 5: /* if, while, for */
default: /* all others are treated like any other
} /* end of if (found_it) */
if (last_code
== decl
) /* if this is a declared variable, then
following sign is unary */
last_u_d
= true; /* will make "int a -1" work */
return (ident
); /* the ident is not in the list */
} /* end of procesing for alpanum character */
/*----------------------------------------------------------*\
| Scan a non-alphanumeric token
\*----------------------------------------------------------*/
*tok
++ = *buf_ptr
; /* if it is only a one-character token, it is
if (++buf_ptr
>= buf_end
)
last_nl
= true; /* remember that we just had a newline */
code
= (had_eof
? 0 : newline
);
/* if data has been exausted, the newline is a dummy, and we should
case '\'': /* start of quoted character */
qchar
= '\''; /* remember final delimiter */
goto copy_lit
; /* and go to common literal code */
case '"': /* start of string */
do { /* copy the string */
while (1) { /* move one character or [/<char>]<char> */
/* check for unterminated literal */
printf ("%d: Unterminated literal\n", line_no
);
/* Don't copy any more */
if (had_eof
|| ((tok
- token
) > (bufsize
- 2))) {
printf ("Unterminated literal\n");
/* get outof literal copying loop */
/* if escape, copy extra char */
/* check for escaped newline */
++tok
; /* we must increment this again because we
break; /* we copied one character */
} while (*tok
++ != qchar
);
case 014: /* a form feed */
last_nl
= true; /* remember this so we can set 'col_1' right */
case '+': /* check for -, +, --, ++ */
code
= (last_u_d
? unary_op
: binary_op
);
if (*buf_ptr
== token
[0]) {
/* check for doubled character */
/* buffer overflow will be checked at end of loop */
if (last_code
== ident
|| last_code
== rparen
) {
code
= (last_u_d
? unary_op
: postop
);
/* check for following ++ or -- */
if (*buf_ptr
== '>' || *buf_ptr
== '=')
/* check for operator -> or += */
/* buffer overflow will be checked at end of switch */
if (chartype
[*buf_ptr
] == opchar
) {
/* we have two char assignment */
/* move second character */
if (++buf_ptr
>= buf_end
)
if (token
[1] != '<' && token
[1] != '>')
/* check for possible 3 char operator */
case '!': /* ops like <, <<, <=, !=, etc */
if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
if (++buf_ptr
>= buf_end
)
code
= (last_u_d
? unary_op
: binary_op
);
if (token
[0] == '/' && *buf_ptr
== '*') {
/* it is start of comment */
if (++buf_ptr
>= buf_end
)
while (*(tok
- 1) == *buf_ptr
|| *buf_ptr
=='=') {
/* handle ||, &&, etc, and also things as in int *****i */
if (++buf_ptr
>= buf_end
)
code
= (last_u_d
? unary_op
: binary_op
);
if (buf_ptr
>= buf_end
) /* check for input buffer empty */
*tok
= '\0'; /* null terminate the token */