The verb form of "accreditation" is "accredit"
[unix-history] / usr / src / usr.bin / indent / lexi.c
CommitLineData
c0bc4ef7
DF
1/*
2 * Copyright (c) 1980 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 */
6
7#ifndef lint
1d7a34f4 8static char sccsid[] = "@(#)lexi.c 5.5 (Berkeley) %G%";
c0bc4ef7 9#endif not lint
4b365fcd 10
1009bf5e
KM
11/*-
12 *
13 * Copyright (C) 1976
14 * by the
15 * Board of Trustees
16 * of the
17 * University of Illinois
18 *
19 * All rights reserved
20 *
21 *
22 * NAME:
23 * lexi
24 *
25 * FUNCTION:
26 * This is the token scanner for indent
27 *
28 * ALGORITHM:
29 * 1) Strip off intervening blanks and/or tabs.
30 * 2) If it is an alphanumeric token, move it to the token buffer "token".
31 * Check if it is a special reserved word that indent will want to
32 * know about.
33 * 3) Non-alphanumeric tokens are handled with a big switch statement. A
34 * flag is kept to remember if the last token was a "unary delimiter",
35 * which forces a following operator to be unary as opposed to binary.
36 *
37 * PARAMETERS:
38 * None
39 *
40 * RETURNS:
41 * An integer code indicating the type of token scanned.
42 *
43 * GLOBALS:
44 * buf_ptr =
45 * had_eof
46 * ps.last_u_d = Set to true iff this token is a "unary delimiter"
47 *
48 * CALLS:
49 * fill_buffer
50 * printf (lib)
51 *
52 * CALLED BY:
53 * main
54 *
55 * NOTES:
56 * Start of comment is passed back so that the comment can be scanned by
57 * pr_comment.
58 *
59 * Strings and character literals are returned just like identifiers.
60 *
61 * HISTORY:
62 * initial coding November 1976 D A Willcox of CAC
63 * 1/7/77 D A Willcox of CAC Fix to provide proper handling
64 * of "int a -1;"
65 *
66 */\f
4b365fcd 67
1009bf5e
KM
68/*
69 * Here we have the token scanner for indent. It scans off one token and
70 * puts it in the global variable "token". It returns a code, indicating
71 * the type of token scanned.
72 */
4b365fcd 73
1d7a34f4
KB
74#include "indent_globs.h"
75#include "indent_codes.h"
1009bf5e 76#include "ctype.h"
4b365fcd
KM
77
78#define alphanum 1
79#define opchar 3
80
81struct templ {
1009bf5e
KM
82 char *rwd;
83 int rwcode;
4b365fcd
KM
84};
85
1009bf5e 86struct templ specials[100] =
4b365fcd
KM
87{
88 "switch", 1,
89 "case", 2,
1009bf5e 90 "break", 0,
4b365fcd 91 "struct", 3,
1009bf5e
KM
92 "union", 3,
93 "enum", 3,
4b365fcd
KM
94 "default", 2,
95 "int", 4,
96 "char", 4,
97 "float", 4,
98 "double", 4,
99 "long", 4,
100 "short", 4,
101 "typdef", 4,
102 "unsigned", 4,
103 "register", 4,
104 "static", 4,
105 "global", 4,
106 "extern", 4,
1009bf5e
KM
107 "void", 4,
108 "goto", 0,
109 "return", 0,
4b365fcd
KM
110 "if", 5,
111 "while", 5,
112 "for", 5,
113 "else", 6,
114 "do", 6,
1009bf5e 115 "sizeof", 7,
4b365fcd
KM
116 0, 0
117};
118
1009bf5e
KM
119char chartype[128] =
120{ /* this is used to facilitate the decision
121 * of what type (alphanumeric, operator)
122 * each character is */
4b365fcd
KM
123 0, 0, 0, 0, 0, 0, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0,
126 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 3, 0, 0, 0, 3, 3, 0,
128 0, 0, 3, 3, 0, 3, 3, 3,
129 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 0, 0, 3, 3, 3, 3,
131 0, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 0, 0, 0, 3, 1,
135 0, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 0, 3, 0, 3, 0
139};
140
1009bf5e
KM
141
142
143
144int
145lexi()
146{
147 register char *tok; /* local pointer to next char in token */
148 int unary_delim; /* this is set to 1 if the current token
149 *
150 * forces a following operator to be unary */
151 static int last_code; /* the last token type returned */
152 static int l_struct; /* set to 1 if the last token was 'struct' */
153 int code; /* internal code to be returned */
154 char qchar; /* the delimiter character for a string */
155
156 tok = token; /* point to start of place to save token */
4b365fcd 157 unary_delim = false;
1009bf5e
KM
158 ps.col_1 = ps.last_nl; /* tell world that this token started in
159 * column 1 iff the last thing scanned was
160 * nl */
161 ps.last_nl = false;
162
163 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
164 ps.col_1 = false; /* leading blanks imply token is not in
165 * column 1 */
4b365fcd 166 if (++buf_ptr >= buf_end)
1009bf5e 167 fill_buffer();
4b365fcd
KM
168 }
169
c93d6f87
KM
170 /* Scan an alphanumeric token. Note that we must also handle
171 * stuff like "1.0e+03" and "7e-6". */
1009bf5e
KM
172 if (chartype[*buf_ptr & 0177] == alphanum) { /* we have a character
173 * or number */
174 register char *j; /* used for searching thru list of
1009bf5e
KM
175 * reserved words */
176 register struct templ *p;
c93d6f87 177 register int c;
4b365fcd 178
c93d6f87 179 do { /* copy it over */
4b365fcd
KM
180 *tok++ = *buf_ptr++;
181 if (buf_ptr >= buf_end)
1009bf5e 182 fill_buffer();
c93d6f87
KM
183 } while (chartype[c = *buf_ptr & 0177] == alphanum ||
184 isdigit(token[0]) && (c == '+' || c == '-') &&
185 (tok[-1] == 'e' || tok[-1] == 'E'));
4b365fcd 186 *tok++ = '\0';
1009bf5e
KM
187 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
188 if (++buf_ptr >= buf_end)
189 fill_buffer();
190 }
191 ps.its_a_keyword = false;
192 ps.sizeof_keyword = false;
193 if (l_struct) { /* if last token was 'struct', then this
194 * token should be treated as a
195 * declaration */
4b365fcd
KM
196 l_struct = false;
197 last_code = ident;
1009bf5e 198 ps.last_u_d = true;
4b365fcd
KM
199 return (decl);
200 }
1009bf5e
KM
201 ps.last_u_d = false; /* Operator after indentifier is binary */
202 last_code = ident; /* Remember that this is the code we will
203 * return */
204
205 /*
206 * This loop will check if the token is a keyword.
207 */
208 for (p = specials; (j = p->rwd) != 0; p++) {
209 tok = token; /* point at scanned token */
210 if (*j++ != *tok++ || *j++ != *tok++)
211 continue; /* This test depends on the fact that
212 * identifiers are always at least 1
213 * character long (ie. the first two bytes
214 * of the identifier are always
215 * meaningful) */
216 if (tok[-1] == 0)
217 break; /* If its a one-character identifier */
218 while (*tok++ == *j)
219 if (*j++ == 0)
220 goto found_keyword; /* I wish that C had a multi-level
221 * break... */
222 }
223 if (p->rwd) { /* we have a keyword */
224 found_keyword:
225 ps.its_a_keyword = true;
226 ps.last_u_d = true;
227 switch (p->rwcode) {
228 case 1: /* it is a switch */
229 return (swstmt);
230 case 2: /* a case or default */
231 return (casestmt);
232
233 case 3: /* a "struct" */
234 if (ps.p_l_follow)
235 break; /* inside parens: cast */
236 l_struct = true;
237
238 /*
239 * Next time around, we will want to know that we have
240 * had a 'struct'
241 */
242 case 4: /* one of the declaration keywords */
243 if (ps.p_l_follow) {
244 ps.cast_mask |= 1 << ps.p_l_follow;
245 break; /* inside parens: cast */
246 }
247 last_code = decl;
248 return (decl);
249
250 case 5: /* if, while, for */
251 return (sp_paren);
252
253 case 6: /* do, else */
254 return (sp_nparen);
255
256 case 7:
257 ps.sizeof_keyword = true;
258 default: /* all others are treated like any other
259 * identifier */
260 return (ident);
261 } /* end of switch */
262 } /* end of if (found_it) */
263 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
264 && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
265 strncpy(ps.procname, token, sizeof ps.procname - 1);
266 ps.in_parameter_declaration = 1;
4b365fcd
KM
267 }
268
1009bf5e
KM
269 /*
270 * The following hack attempts to guess whether or not the current
271 * token is in fact a declaration keyword -- one that has been
272 * typedefd
273 */
274 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
275 && !ps.p_l_follow
276 && (ps.last_token == rparen || ps.last_token == semicolon ||
277 ps.last_token == decl ||
278 ps.last_token == lbrace || ps.last_token == rbrace)) {
279 ps.its_a_keyword = true;
280 ps.last_u_d = true;
281 last_code = decl;
282 return decl;
283 }
284 if (last_code == decl) /* if this is a declared variable, then
285 * following sign is unary */
286 ps.last_u_d = true; /* will make "int a -1" work */
4b365fcd 287 last_code = ident;
1009bf5e
KM
288 return (ident); /* the ident is not in the list */
289 } /* end of procesing for alpanum character */
c93d6f87 290 /* Scan a non-alphanumeric token */
4b365fcd 291
1009bf5e
KM
292 *tok++ = *buf_ptr; /* if it is only a one-character token, it
293 * is moved here */
4b365fcd
KM
294 *tok = '\0';
295 if (++buf_ptr >= buf_end)
1009bf5e 296 fill_buffer();
4b365fcd
KM
297
298 switch (*token) {
1009bf5e
KM
299 case '\n':
300 unary_delim = ps.last_u_d;
301 ps.last_nl = true; /* remember that we just had a newline */
4b365fcd 302 code = (had_eof ? 0 : newline);
4b365fcd 303
1009bf5e
KM
304 /*
305 * if data has been exausted, the newline is a dummy, and we
306 * should return code to stop
307 */
308 break;
4b365fcd 309
1009bf5e
KM
310 case '\'': /* start of quoted character */
311 case '"': /* start of string */
312 qchar = *token;
313 if (troff) {
314 tok[-1] = '`';
315 if (qchar == '"')
316 *tok++ = '`';
317 *tok++ = BACKSLASH;
318 *tok++ = 'f';
319 *tok++ = 'L';
320 }
321 do { /* copy the string */
322 while (1) { /* move one character or [/<char>]<char> */
4b365fcd 323 if (*buf_ptr == '\n') {
1009bf5e 324 printf("%d: Unterminated literal\n", line_no);
4b365fcd 325 goto stop_lit;
4b365fcd 326 }
4b365fcd
KM
327 *tok = *buf_ptr++;
328 if (buf_ptr >= buf_end)
1009bf5e 329 fill_buffer();
4b365fcd 330 if (had_eof || ((tok - token) > (bufsize - 2))) {
1009bf5e 331 printf("Unterminated literal\n");
4b365fcd
KM
332 ++tok;
333 goto stop_lit;
1009bf5e 334 /* get outof literal copying loop */
4b365fcd 335 }
1009bf5e
KM
336 if (*tok == BACKSLASH) { /* if escape, copy extra
337 * char */
338 if (*buf_ptr == '\n') /* check for escaped
339 * newline */
4b365fcd 340 ++line_no;
1009bf5e
KM
341 if (troff) {
342 *++tok = BACKSLASH;
343 if (*buf_ptr == BACKSLASH)
344 *++tok = BACKSLASH;
345 }
346 *++tok = *buf_ptr++;
347 ++tok; /* we must increment this again because we
348 * copied two chars */
4b365fcd 349 if (buf_ptr >= buf_end)
1009bf5e 350 fill_buffer();
4b365fcd
KM
351 }
352 else
1009bf5e
KM
353 break; /* we copied one character */
354 } /* end of while (1) */
4b365fcd 355 } while (*tok++ != qchar);
1009bf5e
KM
356 if (troff) {
357 tok[-1] = BACKSLASH;
358 *tok++ = 'f';
359 *tok++ = 'R';
360 *tok++ = '\'';
361 if (qchar == '"')
362 *tok++ = '\'';
363 }
364 stop_lit:
4b365fcd
KM
365 code = ident;
366 break;
367
1009bf5e
KM
368 case ('('):
369 case ('['):
4b365fcd
KM
370 unary_delim = true;
371 code = lparen;
372 break;
373
1009bf5e
KM
374 case (')'):
375 case (']'):
4b365fcd
KM
376 code = rparen;
377 break;
378
1009bf5e
KM
379 case '#':
380 unary_delim = ps.last_u_d;
4b365fcd
KM
381 code = preesc;
382 break;
383
1009bf5e 384 case '?':
4b365fcd
KM
385 unary_delim = true;
386 code = question;
387 break;
388
1009bf5e 389 case (':'):
4b365fcd
KM
390 code = colon;
391 unary_delim = true;
392 break;
393
1009bf5e 394 case (';'):
4b365fcd
KM
395 unary_delim = true;
396 code = semicolon;
397 break;
398
1009bf5e 399 case ('{'):
4b365fcd 400 unary_delim = true;
1009bf5e
KM
401
402 /*
403 * if (ps.in_or_st) ps.block_init = 1;
404 */
405 code = ps.block_init ? lparen : lbrace;
4b365fcd
KM
406 break;
407
1009bf5e 408 case ('}'):
4b365fcd 409 unary_delim = true;
1009bf5e 410 code = ps.block_init ? rparen : rbrace;
4b365fcd
KM
411 break;
412
1009bf5e
KM
413 case 014: /* a form feed */
414 unary_delim = ps.last_u_d;
415 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
416 * right */
4b365fcd
KM
417 code = form_feed;
418 break;
419
1009bf5e 420 case (','):
4b365fcd
KM
421 unary_delim = true;
422 code = comma;
423 break;
424
1009bf5e 425 case '.':
4b365fcd
KM
426 unary_delim = false;
427 code = period;
428 break;
429
1009bf5e
KM
430 case '-':
431 case '+': /* check for -, +, --, ++ */
432 code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd
KM
433 unary_delim = true;
434
435 if (*buf_ptr == token[0]) {
1009bf5e 436 /* check for doubled character */
4b365fcd 437 *tok++ = *buf_ptr++;
1009bf5e 438 /* buffer overflow will be checked at end of loop */
4b365fcd 439 if (last_code == ident || last_code == rparen) {
1009bf5e
KM
440 code = (ps.last_u_d ? unary_op : postop);
441 /* check for following ++ or -- */
4b365fcd
KM
442 unary_delim = false;
443 }
444 }
1009bf5e
KM
445 else if (*buf_ptr == '=')
446 /* check for operator += */
447 *tok++ = *buf_ptr++;
5c6e73ac 448 else if (token[0] == '-' && *buf_ptr == '>') {
1009bf5e
KM
449 /* check for operator -> */
450 *tok++ = *buf_ptr++;
5c6e73ac
KM
451 if (!pointer_as_binop) {
452 code = unary_op;
453 unary_delim = false;
454 ps.want_blank = false;
455 }
1009bf5e
KM
456 }
457 /* buffer overflow will be checked at end of switch */
4b365fcd
KM
458
459 break;
460
1009bf5e
KM
461 case '=':
462 if (ps.in_or_st)
463 ps.block_init = 1;
464 if (chartype[*buf_ptr] == opchar) { /* we have two char
465 * assignment */
466 tok[-1] = *buf_ptr++;
467 if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
468 *tok++ = *buf_ptr++;
469 *tok++ = '='; /* Flip =+ to += */
470 *tok = 0;
4b365fcd 471 }
4b365fcd
KM
472 code = binary_op;
473 unary_delim = true;
1009bf5e
KM
474 break;
475 /* can drop thru!!! */
4b365fcd 476
1009bf5e
KM
477 case '>':
478 case '<':
479 case '!': /* ops like <, <<, <=, !=, etc */
4b365fcd
KM
480 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
481 *tok++ = *buf_ptr;
482 if (++buf_ptr >= buf_end)
1009bf5e 483 fill_buffer();
4b365fcd 484 }
4b365fcd 485 if (*buf_ptr == '=')
1009bf5e
KM
486 *tok++ = *buf_ptr++;
487 code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd
KM
488 unary_delim = true;
489 break;
490
1009bf5e 491 default:
4b365fcd 492 if (token[0] == '/' && *buf_ptr == '*') {
1009bf5e 493 /* it is start of comment */
4b365fcd
KM
494 *tok++ = '*';
495
496 if (++buf_ptr >= buf_end)
1009bf5e 497 fill_buffer();
4b365fcd
KM
498
499 code = comment;
1009bf5e 500 unary_delim = ps.last_u_d;
4b365fcd
KM
501 break;
502 }
1009bf5e
KM
503 while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
504 /* handle ||, &&, etc, and also things as in int *****i */
4b365fcd
KM
505 *tok++ = *buf_ptr;
506 if (++buf_ptr >= buf_end)
1009bf5e 507 fill_buffer();
4b365fcd 508 }
1009bf5e 509 code = (ps.last_u_d ? unary_op : binary_op);
4b365fcd
KM
510 unary_delim = true;
511
512
1009bf5e 513 } /* end of switch */
4b365fcd
KM
514 if (code != newline) {
515 l_struct = false;
516 last_code = code;
517 }
1009bf5e
KM
518 if (buf_ptr >= buf_end) /* check for input buffer empty */
519 fill_buffer();
520 ps.last_u_d = unary_delim;
521 *tok = '\0'; /* null terminate the token */
4b365fcd
KM
522 return (code);
523};
1009bf5e
KM
524
525/* Add the given keyword to the keyword table, using val as the keyword type
526 */
527addkey (key, val)
528char *key;
529{
530 register struct templ *p = specials;
531 while (p->rwd)
532 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
533 return;
534 else
535 p++;
536 if (p >= specials + sizeof specials / sizeof specials[0])
537 return; /* For now, table overflows are silently
538 ignored */
539 p->rwd = key;
540 p->rwcode = val;
541 p[1].rwd = 0;
542 p[1].rwcode = 0;
543 return;
544}