X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/94a602c5fedcbed8aed6b580ac65694eb971e7a1..30f489145880932a3d40307c0213e120cb79a98a:/usr/src/usr.bin/indent/lexi.c diff --git a/usr/src/usr.bin/indent/lexi.c b/usr/src/usr.bin/indent/lexi.c index d743952211..dacca37fb1 100644 --- a/usr/src/usr.bin/indent/lexi.c +++ b/usr/src/usr.bin/indent/lexi.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 1980 Regents of the University of California. + * Copyright (c) 1985 Sun Microsystems, Inc. + * Copyright (c) 1980 The Regents of the University of California. * Copyright (c) 1976 Board of Trustees of the University of Illinois. * All rights reserved. * @@ -8,74 +9,27 @@ * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley and the University - * of Illinois, Urbana. The name of either - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. + * by the University of California, Berkeley, the University of Illinois, + * Urbana, and Sun Microsystems, Inc. The name of either University + * or Sun Microsystems may not be used to endorse or promote products + * derived from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint -static char sccsid[] = "@(#)lexi.c 5.8 (Berkeley) %G%"; +static char sccsid[] = "@(#)lexi.c 5.9 (Berkeley) %G%"; #endif /* not lint */ /* - * NAME: - * lexi - * - * FUNCTION: - * This is the token scanner for indent - * - * ALGORITHM: - * 1) Strip off intervening blanks and/or tabs. - * 2) If it is an alphanumeric token, move it to the token buffer "token". - * Check if it is a special reserved word that indent will want to - * know about. - * 3) Non-alphanumeric tokens are handled with a big switch statement. A - * flag is kept to remember if the last token was a "unary delimiter", - * which forces a following operator to be unary as opposed to binary. - * - * PARAMETERS: - * None - * - * RETURNS: - * An integer code indicating the type of token scanned. - * - * GLOBALS: - * buf_ptr = - * had_eof - * ps.last_u_d = Set to true iff this token is a "unary delimiter" - * - * CALLS: - * fill_buffer - * printf (lib) - * - * CALLED BY: - * main - * - * NOTES: - * Start of comment is passed back so that the comment can be scanned by - * pr_comment. - * - * Strings and character literals are returned just like identifiers. - * - * HISTORY: - * initial coding November 1976 D A Willcox of CAC - * 1/7/77 D A Willcox of CAC Fix to provide proper handling - * of "int a -1;" - * - */ - -/* - * Here we have the token scanner for indent. It scans off one token and - * puts it in the global variable "token". It returns a code, indicating - * the type of token scanned. + * Here we have the token scanner for indent. It scans off one token and puts + * it in the global variable "token". It returns a code, indicating the type + * of token scanned. */ -#include "indent_globs.h" -#include "indent_codes.h" +#include "indent_globs.h"; +#include "indent_codes.h"; #include "ctype.h" #define alphanum 1 @@ -120,15 +74,15 @@ struct templ specials[100] = }; char chartype[128] = -{ /* this is used to facilitate the decision - * of what type (alphanumeric, operator) - * each character is */ +{ /* this is used to facilitate the decision of + * what type (alphanumeric, operator) each + * character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, - 0, 0, 3, 3, 0, 3, 3, 3, + 0, 0, 3, 3, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, @@ -144,12 +98,12 @@ char chartype[128] = -int +int lexi() { register char *tok; /* local pointer to next char in token */ - int unary_delim; /* this is set to 1 if the current token - * + int unary_delim; /* this is set to 1 if the current token + * * forces a following operator to be unary */ static int last_code; /* the last token type returned */ static int l_struct; /* set to 1 if the last token was 'struct' */ @@ -159,33 +113,64 @@ lexi() tok = token; /* point to start of place to save token */ unary_delim = false; ps.col_1 = ps.last_nl; /* tell world that this token started in - * column 1 iff the last thing scanned was - * nl */ + * column 1 iff the last thing scanned was nl */ ps.last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ - ps.col_1 = false; /* leading blanks imply token is not in - * column 1 */ + ps.col_1 = false; /* leading blanks imply token is not in column + * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } - /* Scan an alphanumeric token. Note that we must also handle - * stuff like "1.0e+03" and "7e-6". */ - if (chartype[*buf_ptr & 0177] == alphanum) { /* we have a character - * or number */ - register char *j; /* used for searching thru list of + /* Scan an alphanumeric token */ + if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { + /* + * we have a character or number + */ + register char *j; /* used for searching thru list of + * * reserved words */ register struct templ *p; - register int c; - do { /* copy it over */ - *tok++ = *buf_ptr++; - if (buf_ptr >= buf_end) - fill_buffer(); - } while (chartype[c = *buf_ptr & 0177] == alphanum || - isdigit(token[0]) && (c == '+' || c == '-') && - (tok[-1] == 'e' || tok[-1] == 'E')); + if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { + int seendot = 0, + seenexp = 0; + if (*buf_ptr == '0' && + (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { + *tok++ = *buf_ptr++; + *tok++ = *buf_ptr++; + while (isxdigit(*buf_ptr)) + *tok++ = *buf_ptr++; + } + else + while (1) { + if (*buf_ptr == '.') + if (seendot) + break; + else + seendot++; + *tok++ = *buf_ptr++; + if (!isdigit(*buf_ptr) && *buf_ptr != '.') + if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) + break; + else { + seenexp++; + seendot++; + *tok++ = *buf_ptr++; + if (*buf_ptr == '+' || *buf_ptr == '-') + *tok++ = *buf_ptr++; + } + } + if (*buf_ptr == 'L' || *buf_ptr == 'l') + *tok++ = *buf_ptr++; + } + else + while (chartype[*buf_ptr] == alphanum) { /* copy it over */ + *tok++ = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + } *tok++ = '\0'; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) @@ -193,9 +178,8 @@ lexi() } ps.its_a_keyword = false; ps.sizeof_keyword = false; - if (l_struct) { /* if last token was 'struct', then this - * token should be treated as a - * declaration */ + if (l_struct) { /* if last token was 'struct', then this token + * should be treated as a declaration */ l_struct = false; last_code = ident; ps.last_u_d = true; @@ -206,16 +190,15 @@ lexi() * return */ /* - * This loop will check if the token is a keyword. + * This loop will check if the token is a keyword. */ for (p = specials; (j = p->rwd) != 0; p++) { tok = token; /* point at scanned token */ if (*j++ != *tok++ || *j++ != *tok++) continue; /* This test depends on the fact that - * identifiers are always at least 1 - * character long (ie. the first two bytes - * of the identifier are always - * meaningful) */ + * identifiers are always at least 1 character + * long (ie. the first two bytes of the + * identifier are always meaningful) */ if (tok[-1] == 0) break; /* If its a one-character identifier */ while (*tok++ == *j) @@ -228,57 +211,61 @@ lexi() ps.its_a_keyword = true; ps.last_u_d = true; switch (p->rwcode) { - case 1: /* it is a switch */ - return (swstmt); - case 2: /* a case or default */ - return (casestmt); - - case 3: /* a "struct" */ - if (ps.p_l_follow) - break; /* inside parens: cast */ - l_struct = true; - - /* - * Next time around, we will want to know that we have - * had a 'struct' - */ - case 4: /* one of the declaration keywords */ - if (ps.p_l_follow) { - ps.cast_mask |= 1 << ps.p_l_follow; - break; /* inside parens: cast */ - } - last_code = decl; - return (decl); + case 1: /* it is a switch */ + return (swstmt); + case 2: /* a case or default */ + return (casestmt); + + case 3: /* a "struct" */ + if (ps.p_l_follow) + break; /* inside parens: cast */ + l_struct = true; + + /* + * Next time around, we will want to know that we have had a + * 'struct' + */ + case 4: /* one of the declaration keywords */ + if (ps.p_l_follow) { + ps.cast_mask |= 1 << ps.p_l_follow; + break; /* inside parens: cast */ + } + last_code = decl; + return (decl); - case 5: /* if, while, for */ - return (sp_paren); + case 5: /* if, while, for */ + return (sp_paren); - case 6: /* do, else */ - return (sp_nparen); + case 6: /* do, else */ + return (sp_nparen); - case 7: - ps.sizeof_keyword = true; - default: /* all others are treated like any other + case 7: + ps.sizeof_keyword = true; + default: /* all others are treated like any other * identifier */ - return (ident); + return (ident); } /* end of switch */ } /* end of if (found_it) */ - if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0 - && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) { + if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { + register char *p = buf_ptr; + while (p < buf_end) + if (*p++ == ')' && *p == ';') + goto not_proc; strncpy(ps.procname, token, sizeof ps.procname - 1); ps.in_parameter_declaration = 1; + not_proc:; } - /* * The following hack attempts to guess whether or not the current * token is in fact a declaration keyword -- one that has been - * typedefd + * typedefd */ - if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr)) - && !ps.p_l_follow - && (ps.last_token == rparen || ps.last_token == semicolon || - ps.last_token == decl || - ps.last_token == lbrace || ps.last_token == rbrace)) { + if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') + && !ps.p_l_follow + && !ps.block_init + && (ps.last_token == rparen || ps.last_token == semicolon || + ps.last_token == decl || + ps.last_token == lbrace || ps.last_token == rbrace)) { ps.its_a_keyword = true; ps.last_u_d = true; last_code = decl; @@ -290,227 +277,230 @@ lexi() last_code = ident; return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ - /* Scan a non-alphanumeric token */ + /* l l l Scan a non-alphanumeric token */ - *tok++ = *buf_ptr; /* if it is only a one-character token, it - * is moved here */ + *tok++ = *buf_ptr; /* if it is only a one-character token, it is + * moved here */ *tok = '\0'; if (++buf_ptr >= buf_end) fill_buffer(); switch (*token) { - case '\n': - unary_delim = ps.last_u_d; - ps.last_nl = true; /* remember that we just had a newline */ - code = (had_eof ? 0 : newline); - - /* - * if data has been exausted, the newline is a dummy, and we - * should return code to stop - */ - break; + case '\n': + unary_delim = ps.last_u_d; + ps.last_nl = true; /* remember that we just had a newline */ + code = (had_eof ? 0 : newline); - case '\'': /* start of quoted character */ - case '"': /* start of string */ - qchar = *token; - if (troff) { - tok[-1] = '`'; - if (qchar == '"') - *tok++ = '`'; - *tok++ = BACKSLASH; - *tok++ = 'f'; - *tok++ = 'L'; - } - do { /* copy the string */ - while (1) { /* move one character or [/] */ - if (*buf_ptr == '\n') { - printf("%d: Unterminated literal\n", line_no); - goto stop_lit; + /* + * if data has been exausted, the newline is a dummy, and we should + * return code to stop + */ + break; + + case '\'': /* start of quoted character */ + case '"': /* start of string */ + qchar = *token; + if (troff) { + tok[-1] = '`'; + if (qchar == '"') + *tok++ = '`'; + tok = chfont(&bodyf, &stringf, tok); + } + do { /* copy the string */ + while (1) { /* move one character or [/] */ + if (*buf_ptr == '\n') { + printf("%d: Unterminated literal\n", line_no); + goto stop_lit; + } + *tok = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + if (had_eof || ((tok - token) > (bufsize - 2))) { + printf("Unterminated literal\n"); + ++tok; + goto stop_lit; + /* get outof literal copying loop */ + } + if (*tok == BACKSLASH) { /* if escape, copy extra char */ + if (*buf_ptr == '\n') /* check for escaped newline */ + ++line_no; + if (troff) { + *++tok = BACKSLASH; + if (*buf_ptr == BACKSLASH) + *++tok = BACKSLASH; } - *tok = *buf_ptr++; + *++tok = *buf_ptr++; + ++tok; /* we must increment this again because we + * copied two chars */ if (buf_ptr >= buf_end) fill_buffer(); - if (had_eof || ((tok - token) > (bufsize - 2))) { - printf("Unterminated literal\n"); - ++tok; - goto stop_lit; - /* get outof literal copying loop */ - } - if (*tok == BACKSLASH) { /* if escape, copy extra - * char */ - if (*buf_ptr == '\n') /* check for escaped - * newline */ - ++line_no; - if (troff) { - *++tok = BACKSLASH; - if (*buf_ptr == BACKSLASH) - *++tok = BACKSLASH; - } - *++tok = *buf_ptr++; - ++tok; /* we must increment this again because we - * copied two chars */ - if (buf_ptr >= buf_end) - fill_buffer(); - } - else - break; /* we copied one character */ - } /* end of while (1) */ - } while (*tok++ != qchar); - if (troff) { - tok[-1] = BACKSLASH; - *tok++ = 'f'; - *tok++ = 'R'; + } + else + break; /* we copied one character */ + } /* end of while (1) */ + } while (*tok++ != qchar); + if (troff) { + tok = chfont(&stringf, &bodyf, tok - 1); + if (qchar == '"') *tok++ = '\''; - if (qchar == '"') - *tok++ = '\''; - } - stop_lit: - code = ident; - break; - - case ('('): - case ('['): - unary_delim = true; - code = lparen; - break; - - case (')'): - case (']'): - code = rparen; - break; - - case '#': - unary_delim = ps.last_u_d; - code = preesc; - break; - - case '?': - unary_delim = true; - code = question; - break; - - case (':'): - code = colon; - unary_delim = true; - break; - - case (';'): - unary_delim = true; - code = semicolon; - break; - - case ('{'): - unary_delim = true; - - /* - * if (ps.in_or_st) ps.block_init = 1; - */ - code = ps.block_init ? lparen : lbrace; - break; - - case ('}'): - unary_delim = true; - code = ps.block_init ? rparen : rbrace; - break; + } +stop_lit: + code = ident; + break; + + case ('('): + case ('['): + unary_delim = true; + code = lparen; + break; + + case (')'): + case (']'): + code = rparen; + break; + + case '#': + unary_delim = ps.last_u_d; + code = preesc; + break; + + case '?': + unary_delim = true; + code = question; + break; + + case (':'): + code = colon; + unary_delim = true; + break; + + case (';'): + unary_delim = true; + code = semicolon; + break; + + case ('{'): + unary_delim = true; - case 014: /* a form feed */ - unary_delim = ps.last_u_d; - ps.last_nl = true; /* remember this so we can set 'ps.col_1' + /* + * if (ps.in_or_st) ps.block_init = 1; + */ + /* ? code = ps.block_init ? lparen : lbrace; */ + code = lbrace; + break; + + case ('}'): + unary_delim = true; + /* ? code = ps.block_init ? rparen : rbrace; */ + code = rbrace; + break; + + case 014: /* a form feed */ + unary_delim = ps.last_u_d; + ps.last_nl = true; /* remember this so we can set 'ps.col_1' * right */ - code = form_feed; - break; - - case (','): - unary_delim = true; - code = comma; - break; - - case '.': - unary_delim = false; - code = period; - break; - - case '-': - case '+': /* check for -, +, --, ++ */ - code = (ps.last_u_d ? unary_op : binary_op); - unary_delim = true; - - if (*buf_ptr == token[0]) { - /* check for doubled character */ - *tok++ = *buf_ptr++; - /* buffer overflow will be checked at end of loop */ - if (last_code == ident || last_code == rparen) { - code = (ps.last_u_d ? unary_op : postop); - /* check for following ++ or -- */ - unary_delim = false; - } - } - else if (*buf_ptr == '=') - /* check for operator += */ - *tok++ = *buf_ptr++; - else if (token[0] == '-' && *buf_ptr == '>') { - /* check for operator -> */ - *tok++ = *buf_ptr++; - if (!pointer_as_binop) { - code = unary_op; - unary_delim = false; - ps.want_blank = false; - } - } - /* buffer overflow will be checked at end of switch */ - - break; - - case '=': - if (ps.in_or_st) - ps.block_init = 1; - if (chartype[*buf_ptr] == opchar) { /* we have two char - * assignment */ - tok[-1] = *buf_ptr++; - if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr) - *tok++ = *buf_ptr++; - *tok++ = '='; /* Flip =+ to += */ - *tok = 0; + code = form_feed; + break; + + case (','): + unary_delim = true; + code = comma; + break; + + case '.': + unary_delim = false; + code = period; + break; + + case '-': + case '+': /* check for -, +, --, ++ */ + code = (ps.last_u_d ? unary_op : binary_op); + unary_delim = true; + + if (*buf_ptr == token[0]) { + /* check for doubled character */ + *tok++ = *buf_ptr++; + /* buffer overflow will be checked at end of loop */ + if (last_code == ident || last_code == rparen) { + code = (ps.last_u_d ? unary_op : postop); + /* check for following ++ or -- */ + unary_delim = false; } - code = binary_op; - unary_delim = true; - break; - /* can drop thru!!! */ - - case '>': - case '<': - case '!': /* ops like <, <<, <=, !=, etc */ - if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { - *tok++ = *buf_ptr; - if (++buf_ptr >= buf_end) - fill_buffer(); + } + else if (*buf_ptr == '=') + /* check for operator += */ + *tok++ = *buf_ptr++; + else if (*buf_ptr == '>') { + /* check for operator -> */ + *tok++ = *buf_ptr++; + if (!pointer_as_binop) { + unary_delim = false; + code = unary_op; + ps.want_blank = false; } - if (*buf_ptr == '=') + } + break; /* buffer overflow will be checked at end of + * switch */ + + case '=': + if (ps.in_or_st) + ps.block_init = 1; +#ifdef undef + if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ + tok[-1] = *buf_ptr++; + if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr) *tok++ = *buf_ptr++; - code = (ps.last_u_d ? unary_op : binary_op); - unary_delim = true; - break; + *tok++ = '='; /* Flip =+ to += */ + *tok = 0; + } +#else + if (*buf_ptr == '=') {/* == */ + *tok++ = '='; /* Flip =+ to += */ + buf_ptr++; + *tok = 0; + } +#endif + code = binary_op; + unary_delim = true; + break; + /* can drop thru!!! */ + + case '>': + case '<': + case '!': /* ops like <, <<, <=, !=, etc */ + if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { + *tok++ = *buf_ptr; + if (++buf_ptr >= buf_end) + fill_buffer(); + } + if (*buf_ptr == '=') + *tok++ = *buf_ptr++; + code = (ps.last_u_d ? unary_op : binary_op); + unary_delim = true; + break; - default: - if (token[0] == '/' && *buf_ptr == '*') { - /* it is start of comment */ - *tok++ = '*'; + default: + if (token[0] == '/' && *buf_ptr == '*') { + /* it is start of comment */ + *tok++ = '*'; - if (++buf_ptr >= buf_end) - fill_buffer(); + if (++buf_ptr >= buf_end) + fill_buffer(); - code = comment; - unary_delim = ps.last_u_d; - break; - } - while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') { - /* handle ||, &&, etc, and also things as in int *****i */ - *tok++ = *buf_ptr; - if (++buf_ptr >= buf_end) - fill_buffer(); - } - code = (ps.last_u_d ? unary_op : binary_op); - unary_delim = true; + code = comment; + unary_delim = ps.last_u_d; + break; + } + while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') { + /* + * handle ||, &&, etc, and also things as in int *****i + */ + *tok++ = *buf_ptr; + if (++buf_ptr >= buf_end) + fill_buffer(); + } + code = (ps.last_u_d ? unary_op : binary_op); + unary_delim = true; } /* end of switch */ @@ -525,10 +515,11 @@ lexi() return (code); }; -/* Add the given keyword to the keyword table, using val as the keyword type - */ -addkey (key, val) -char *key; +/* + * Add the given keyword to the keyword table, using val as the keyword type + */ +addkey(key, val) + char *key; { register struct templ *p = specials; while (p->rwd) @@ -538,7 +529,7 @@ char *key; p++; if (p >= specials + sizeof specials / sizeof specials[0]) return; /* For now, table overflows are silently - ignored */ + * ignored */ p->rwd = key; p->rwcode = val; p[1].rwd = 0;