* Copyright (c) 1985 Sun Microsystems, Inc.
* Copyright (c) 1980 The Regents of the University of California.
* Copyright (c) 1976 Board of Trustees of the University of Illinois.
* Redistribution and use in source and binary forms are permitted
* provided that: (1) source distributions retain this entire copyright
* notice and comment, and (2) distributions including binaries display
* the following acknowledgement: ``This product includes software
* developed by the University of California, Berkeley and its contributors''
* in the documentation or other materials provided with the distribution
* and in all advertising materials mentioning features or use of this
* software. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
static char sccsid
[] = "@(#)lexi.c 5.15 (Berkeley) 6/1/90";
* Here we have the token scanner for indent. It scans off one token and puts
* it in the global variable "token". It returns a code, indicating the type
#include "indent_globs.h"
#include "indent_codes.h"
struct templ specials
[100] =
{ /* this is used to facilitate the decision of
* what type (alphanumeric, operator) each
int unary_delim
; /* this is set to 1 if the current token
* forces a following operator to be unary */
static int last_code
; /* the last token type returned */
static int l_struct
; /* set to 1 if the last token was 'struct' */
int code
; /* internal code to be returned */
char qchar
; /* the delimiter character for a string */
e_token
= s_token
; /* point to start of place to save token */
ps
.col_1
= ps
.last_nl
; /* tell world that this token started in
* column 1 iff the last thing scanned was nl */
while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
ps
.col_1
= false; /* leading blanks imply token is not in column
if (++buf_ptr
>= buf_end
)
/* Scan an alphanumeric token */
if (chartype
[*buf_ptr
] == alphanum
|| buf_ptr
[0] == '.' && isdigit(buf_ptr
[1])) {
* we have a character or number
register char *j
; /* used for searching thru list of
register struct templ
*p
;
if (isdigit(*buf_ptr
) || buf_ptr
[0] == '.' && isdigit(buf_ptr
[1])) {
(buf_ptr
[1] == 'x' || buf_ptr
[1] == 'X')) {
while (isxdigit(*buf_ptr
)) {
if (!isdigit(*buf_ptr
) && *buf_ptr
!= '.')
if ((*buf_ptr
!= 'E' && *buf_ptr
!= 'e') || seenexp
)
if (*buf_ptr
== '+' || *buf_ptr
== '-')
if (*buf_ptr
== 'L' || *buf_ptr
== 'l')
while (chartype
[*buf_ptr
] == alphanum
) { /* copy it over */
while (*buf_ptr
== ' ' || *buf_ptr
== '\t') { /* get rid of blanks */
if (++buf_ptr
>= buf_end
)
ps
.its_a_keyword
= false;
ps
.sizeof_keyword
= false;
if (l_struct
) { /* if last token was 'struct', then this token
* should be treated as a declaration */
ps
.last_u_d
= false; /* Operator after indentifier is binary */
last_code
= ident
; /* Remember that this is the code we will
* This loop will check if the token is a keyword.
for (p
= specials
; (j
= p
->rwd
) != 0; p
++) {
register char *p
= s_token
; /* point at scanned token */
if (*j
++ != *p
++ || *j
++ != *p
++)
continue; /* This test depends on the fact that
* identifiers are always at least 1 character
* long (ie. the first two bytes of the
* identifier are always meaningful) */
break; /* If its a one-character identifier */
goto found_keyword
; /* I wish that C had a multi-level
if (p
->rwd
) { /* we have a keyword */
case 1: /* it is a switch */
case 2: /* a case or default */
break; /* inside parens: cast */
* Next time around, we will want to know that we have had a
case 4: /* one of the declaration keywords */
ps
.cast_mask
|= 1 << ps
.p_l_follow
;
break; /* inside parens: cast */
case 5: /* if, while, for */
ps
.sizeof_keyword
= true;
default: /* all others are treated like any other
} /* end of if (found_it) */
if (*buf_ptr
== '(' && ps
.tos
<= 1 && ps
.ind_level
== 0) {
register char *tp
= buf_ptr
;
if (*tp
++ == ')' && (*tp
== ';' || *tp
== ','))
strncpy(ps
.procname
, token
, sizeof ps
.procname
- 1);
ps
.in_parameter_declaration
= 1;
* The following hack attempts to guess whether or not the current
* token is in fact a declaration keyword -- one that has been
if (((*buf_ptr
== '*' && buf_ptr
[1] != '=') || isalpha(*buf_ptr
) || *buf_ptr
== '_')
&& (ps
.last_token
== rparen
|| ps
.last_token
== semicolon
||
ps
.last_token
== lbrace
|| ps
.last_token
== rbrace
)) {
if (last_code
== decl
) /* if this is a declared variable, then
* following sign is unary */
ps
.last_u_d
= true; /* will make "int a -1" work */
return (ident
); /* the ident is not in the list */
} /* end of procesing for alpanum character */
/* Scan a non-alphanumeric token */
*e_token
++ = *buf_ptr
; /* if it is only a one-character token, it is
if (++buf_ptr
>= buf_end
)
unary_delim
= ps
.last_u_d
;
ps
.last_nl
= true; /* remember that we just had a newline */
code
= (had_eof
? 0 : newline
);
* if data has been exausted, the newline is a dummy, and we should
case '\'': /* start of quoted character */
case '"': /* start of string */
e_token
= chfont(&bodyf
, &stringf
, e_token
);
do { /* copy the string */
while (1) { /* move one character or [/<char>]<char> */
printf("%d: Unterminated literal\n", line_no
);
CHECK_SIZE_TOKEN
; /* Only have to do this once in this loop,
* since CHECK_SIZE guarantees that there
* are at least 5 entries left */
if (*e_token
== BACKSLASH
) { /* if escape, copy extra char */
if (*buf_ptr
== '\n') /* check for escaped newline */
if (*buf_ptr
== BACKSLASH
)
++e_token
; /* we must increment this again because we
break; /* we copied one character */
} while (*e_token
++ != qchar
);
e_token
= chfont(&stringf
, &bodyf
, e_token
- 1);
unary_delim
= ps
.last_u_d
;
* if (ps.in_or_st) ps.block_init = 1;
/* ? code = ps.block_init ? lparen : lbrace; */
/* ? code = ps.block_init ? rparen : rbrace; */
case 014: /* a form feed */
unary_delim
= ps
.last_u_d
;
ps
.last_nl
= true; /* remember this so we can set 'ps.col_1'
case '+': /* check for -, +, --, ++ */
code
= (ps
.last_u_d
? unary_op
: binary_op
);
if (*buf_ptr
== token
[0]) {
/* check for doubled character */
/* buffer overflow will be checked at end of loop */
if (last_code
== ident
|| last_code
== rparen
) {
code
= (ps
.last_u_d
? unary_op
: postop
);
/* check for following ++ or -- */
else if (*buf_ptr
== '=')
/* check for operator += */
else if (*buf_ptr
== '>') {
/* check for operator -> */
break; /* buffer overflow will be checked at end of
if (chartype
[*buf_ptr
] == opchar
) { /* we have two char assignment */
e_token
[-1] = *buf_ptr
++;
if ((e_token
[-1] == '<' || e_token
[-1] == '>') && e_token
[-1] == *buf_ptr
)
*e_token
++ = '='; /* Flip =+ to += */
if (*buf_ptr
== '=') {/* == */
*e_token
++ = '='; /* Flip =+ to += */
case '!': /* ops like <, <<, <=, !=, etc */
if (*buf_ptr
== '>' || *buf_ptr
== '<' || *buf_ptr
== '=') {
if (++buf_ptr
>= buf_end
)
code
= (ps
.last_u_d
? unary_op
: binary_op
);
if (token
[0] == '/' && *buf_ptr
== '*') {
/* it is start of comment */
if (++buf_ptr
>= buf_end
)
unary_delim
= ps
.last_u_d
;
while (*(e_token
- 1) == *buf_ptr
|| *buf_ptr
== '=') {
* handle ||, &&, etc, and also things as in int *****i
if (++buf_ptr
>= buf_end
)
code
= (ps
.last_u_d
? unary_op
: binary_op
);
if (buf_ptr
>= buf_end
) /* check for input buffer empty */
ps
.last_u_d
= unary_delim
;
*e_token
= '\0'; /* null terminate the token */
* Add the given keyword to the keyword table, using val as the keyword type
register struct templ
*p
= specials
;
if (p
->rwd
[0] == key
[0] && strcmp(p
->rwd
, key
) == 0)
if (p
>= specials
+ sizeof specials
/ sizeof specials
[0])
return; /* For now, table overflows are silently