ANSI
[unix-history] / usr / src / usr.bin / indent / lexi.c
CommitLineData
c0bc4ef7 1/*
30f48914
KB
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980 The Regents of the University of California.
b0627149
KB
4 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5 * All rights reserved.
6 *
6ecf3d85 7 * %sccs.include.redist.c%
c0bc4ef7
DF
8 */
9
10#ifndef lint
2b8540ff 11static char sccsid[] = "@(#)lexi.c 5.16 (Berkeley) %G%";
b0627149 12#endif /* not lint */
4b365fcd 13
b0627149 14/*
30f48914
KB
15 * Here we have the token scanner for indent. It scans off one token and puts
16 * it in the global variable "token". It returns a code, indicating the type
17 * of token scanned.
1009bf5e 18 */
4b365fcd 19
2b8540ff
KB
20#include <stdio.h>
21#include <ctype.h>
22#include <stdlib.h>
23#include <string.h>
8540f0fa
KB
24#include "indent_globs.h"
25#include "indent_codes.h"
4b365fcd
KM
26
27#define alphanum 1
28#define opchar 3
29
30struct templ {
1009bf5e
KM
31 char *rwd;
32 int rwcode;
4b365fcd
KM
33};
34
1009bf5e 35struct templ specials[100] =
4b365fcd
KM
36{
37 "switch", 1,
38 "case", 2,
1009bf5e 39 "break", 0,
4b365fcd 40 "struct", 3,
1009bf5e
KM
41 "union", 3,
42 "enum", 3,
4b365fcd
KM
43 "default", 2,
44 "int", 4,
45 "char", 4,
46 "float", 4,
47 "double", 4,
48 "long", 4,
49 "short", 4,
50 "typdef", 4,
51 "unsigned", 4,
52 "register", 4,
53 "static", 4,
54 "global", 4,
55 "extern", 4,
1009bf5e
KM
56 "void", 4,
57 "goto", 0,
58 "return", 0,
4b365fcd
KM
59 "if", 5,
60 "while", 5,
61 "for", 5,
62 "else", 6,
63 "do", 6,
1009bf5e 64 "sizeof", 7,
4b365fcd
KM
65 0, 0
66};
67
1009bf5e 68char chartype[128] =
30f48914
KB
69{ /* this is used to facilitate the decision of
70 * what type (alphanumeric, operator) each
71 * character is */
4b365fcd
KM
72 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0,
720fc992 76 0, 3, 0, 0, 1, 3, 3, 0,
30f48914 77 0, 0, 3, 3, 0, 3, 0, 3,
4b365fcd
KM
78 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 0, 0, 3, 3, 3, 3,
80 0, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 0, 0, 0, 3, 1,
84 0, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1,
86 1, 1, 1, 1, 1, 1, 1, 1,
87 1, 1, 1, 0, 3, 0, 3, 0
88};
89
1009bf5e
KM
90
91
92
30f48914 93int
1009bf5e
KM
94lexi()
95{
30f48914
KB
96 int unary_delim; /* this is set to 1 if the current token
97 *
1009bf5e
KM
98 * forces a following operator to be unary */
99 static int last_code; /* the last token type returned */
100 static int l_struct; /* set to 1 if the last token was 'struct' */
101 int code; /* internal code to be returned */
102 char qchar; /* the delimiter character for a string */
103
0c8ee79d 104 e_token = s_token; /* point to start of place to save token */
4b365fcd 105 unary_delim = false;
1009bf5e 106 ps.col_1 = ps.last_nl; /* tell world that this token started in
30f48914 107 * column 1 iff the last thing scanned was nl */
1009bf5e
KM
108 ps.last_nl = false;
109
110 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
30f48914
KB
111 ps.col_1 = false; /* leading blanks imply token is not in column
112 * 1 */
4b365fcd 113 if (++buf_ptr >= buf_end)
1009bf5e 114 fill_buffer();
4b365fcd
KM
115 }
116
30f48914
KB
117 /* Scan an alphanumeric token */
118 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
119 /*
120 * we have a character or number
121 */
122 register char *j; /* used for searching thru list of
123 *
1009bf5e
KM
124 * reserved words */
125 register struct templ *p;
4b365fcd 126
30f48914
KB
127 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
128 int seendot = 0,
129 seenexp = 0;
130 if (*buf_ptr == '0' &&
131 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
0c8ee79d
KB
132 *e_token++ = *buf_ptr++;
133 *e_token++ = *buf_ptr++;
134 while (isxdigit(*buf_ptr)) {
19961177 135 CHECK_SIZE_TOKEN;
0c8ee79d
KB
136 *e_token++ = *buf_ptr++;
137 }
30f48914
KB
138 }
139 else
140 while (1) {
141 if (*buf_ptr == '.')
142 if (seendot)
143 break;
144 else
145 seendot++;
19961177 146 CHECK_SIZE_TOKEN;
0c8ee79d 147 *e_token++ = *buf_ptr++;
30f48914
KB
148 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
149 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
150 break;
151 else {
152 seenexp++;
153 seendot++;
19961177 154 CHECK_SIZE_TOKEN;
0c8ee79d 155 *e_token++ = *buf_ptr++;
30f48914 156 if (*buf_ptr == '+' || *buf_ptr == '-')
0c8ee79d 157 *e_token++ = *buf_ptr++;
30f48914
KB
158 }
159 }
160 if (*buf_ptr == 'L' || *buf_ptr == 'l')
0c8ee79d 161 *e_token++ = *buf_ptr++;
30f48914
KB
162 }
163 else
164 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
19961177 165 CHECK_SIZE_TOKEN;
0c8ee79d 166 *e_token++ = *buf_ptr++;
30f48914
KB
167 if (buf_ptr >= buf_end)
168 fill_buffer();
169 }
0c8ee79d 170 *e_token++ = '\0';
1009bf5e
KM
171 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
172 if (++buf_ptr >= buf_end)
173 fill_buffer();
174 }
175 ps.its_a_keyword = false;
176 ps.sizeof_keyword = false;
30f48914
KB
177 if (l_struct) { /* if last token was 'struct', then this token
178 * should be treated as a declaration */
4b365fcd
KM
179 l_struct = false;
180 last_code = ident;
1009bf5e 181 ps.last_u_d = true;
4b365fcd
KM
182 return (decl);
183 }
1009bf5e
KM
184 ps.last_u_d = false; /* Operator after indentifier is binary */
185 last_code = ident; /* Remember that this is the code we will
186 * return */
187
188 /*
30f48914 189 * This loop will check if the token is a keyword.
1009bf5e
KM
190 */
191 for (p = specials; (j = p->rwd) != 0; p++) {
0c8ee79d
KB
192 register char *p = s_token; /* point at scanned token */
193 if (*j++ != *p++ || *j++ != *p++)
1009bf5e 194 continue; /* This test depends on the fact that
30f48914
KB
195 * identifiers are always at least 1 character
196 * long (ie. the first two bytes of the
197 * identifier are always meaningful) */
0c8ee79d 198 if (p[-1] == 0)
1009bf5e 199 break; /* If its a one-character identifier */
0c8ee79d 200 while (*p++ == *j)
1009bf5e
KM
201 if (*j++ == 0)
202 goto found_keyword; /* I wish that C had a multi-level
203 * break... */
204 }
205 if (p->rwd) { /* we have a keyword */
206 found_keyword:
207 ps.its_a_keyword = true;
208 ps.last_u_d = true;
209 switch (p->rwcode) {
30f48914
KB
210 case 1: /* it is a switch */
211 return (swstmt);
212 case 2: /* a case or default */
213 return (casestmt);
214
215 case 3: /* a "struct" */
216 if (ps.p_l_follow)
217 break; /* inside parens: cast */
218 l_struct = true;
219
220 /*
221 * Next time around, we will want to know that we have had a
222 * 'struct'
223 */
224 case 4: /* one of the declaration keywords */
225 if (ps.p_l_follow) {
226 ps.cast_mask |= 1 << ps.p_l_follow;
227 break; /* inside parens: cast */
228 }
229 last_code = decl;
230 return (decl);
1009bf5e 231
30f48914
KB
232 case 5: /* if, while, for */
233 return (sp_paren);
1009bf5e 234
30f48914
KB
235 case 6: /* do, else */
236 return (sp_nparen);
1009bf5e 237
30f48914
KB
238 case 7:
239 ps.sizeof_keyword = true;
240 default: /* all others are treated like any other
1009bf5e 241 * identifier */
30f48914 242 return (ident);
1009bf5e
KM
243 } /* end of switch */
244 } /* end of if (found_it) */
30f48914 245 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
c5b954f4
KB
246 register char *tp = buf_ptr;
247 while (tp < buf_end)
0c8ee79d 248 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
30f48914 249 goto not_proc;
1009bf5e
KM
250 strncpy(ps.procname, token, sizeof ps.procname - 1);
251 ps.in_parameter_declaration = 1;
0c8ee79d 252 rparen_count = 1;
30f48914 253 not_proc:;
4b365fcd 254 }
1009bf5e
KM
255 /*
256 * The following hack attempts to guess whether or not the current
257 * token is in fact a declaration keyword -- one that has been
30f48914 258 * typedefd
1009bf5e 259 */
30f48914
KB
260 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
261 && !ps.p_l_follow
262 && !ps.block_init
263 && (ps.last_token == rparen || ps.last_token == semicolon ||
264 ps.last_token == decl ||
265 ps.last_token == lbrace || ps.last_token == rbrace)) {
1009bf5e
KM
266 ps.its_a_keyword = true;
267 ps.last_u_d = true;
268 last_code = decl;
269 return decl;
270 }
271 if (last_code == decl) /* if this is a declared variable, then
272 * following sign is unary */
273 ps.last_u_d = true; /* will make "int a -1" work */
4b365fcd 274 last_code = ident;
1009bf5e
KM
275 return (ident); /* the ident is not in the list */
276 } /* end of procesing for alpanum character */
4b365fcd 277
0c8ee79d
KB
278 /* Scan a non-alphanumeric token */
279
280 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
30f48914 281 * moved here */
0c8ee79d 282 *e_token = '\0';
4b365fcd 283 if (++buf_ptr >= buf_end)
1009bf5e 284 fill_buffer();
4b365fcd
KM
285
286 switch (*token) {
30f48914
KB
287 case '\n':
288 unary_delim = ps.last_u_d;
289 ps.last_nl = true; /* remember that we just had a newline */
290 code = (had_eof ? 0 : newline);
4b365fcd 291
30f48914
KB
292 /*
293 * if data has been exausted, the newline is a dummy, and we should
294 * return code to stop
295 */
296 break;
297
298 case '\'': /* start of quoted character */
299 case '"': /* start of string */
300 qchar = *token;
301 if (troff) {
0c8ee79d 302 e_token[-1] = '`';
30f48914 303 if (qchar == '"')
0c8ee79d
KB
304 *e_token++ = '`';
305 e_token = chfont(&bodyf, &stringf, e_token);
30f48914
KB
306 }
307 do { /* copy the string */
308 while (1) { /* move one character or [/<char>]<char> */
309 if (*buf_ptr == '\n') {
310 printf("%d: Unterminated literal\n", line_no);
311 goto stop_lit;
312 }
19961177
KB
313 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
314 * since CHECK_SIZE guarantees that there
0c8ee79d
KB
315 * are at least 5 entries left */
316 *e_token = *buf_ptr++;
30f48914
KB
317 if (buf_ptr >= buf_end)
318 fill_buffer();
0c8ee79d 319 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
30f48914
KB
320 if (*buf_ptr == '\n') /* check for escaped newline */
321 ++line_no;
322 if (troff) {
0c8ee79d 323 *++e_token = BACKSLASH;
30f48914 324 if (*buf_ptr == BACKSLASH)
0c8ee79d 325 *++e_token = BACKSLASH;
4b365fcd 326 }
0c8ee79d
KB
327 *++e_token = *buf_ptr++;
328 ++e_token; /* we must increment this again because we
30f48914 329 * copied two chars */
4b365fcd 330 if (buf_ptr >= buf_end)
1009bf5e 331 fill_buffer();
30f48914
KB
332 }
333 else
334 break; /* we copied one character */
335 } /* end of while (1) */
0c8ee79d 336 } while (*e_token++ != qchar);
30f48914 337 if (troff) {
0c8ee79d 338 e_token = chfont(&stringf, &bodyf, e_token - 1);
30f48914 339 if (qchar == '"')
0c8ee79d 340 *e_token++ = '\'';
30f48914
KB
341 }
342stop_lit:
343 code = ident;
344 break;
345
346 case ('('):
347 case ('['):
348 unary_delim = true;
349 code = lparen;
350 break;
351
352 case (')'):
353 case (']'):
354 code = rparen;
355 break;
356
357 case '#':
358 unary_delim = ps.last_u_d;
359 code = preesc;
360 break;
361
362 case '?':
363 unary_delim = true;
364 code = question;
365 break;
366
367 case (':'):
368 code = colon;
369 unary_delim = true;
370 break;
371
372 case (';'):
373 unary_delim = true;
374 code = semicolon;
375 break;
376
377 case ('{'):
378 unary_delim = true;
4b365fcd 379
30f48914
KB
380 /*
381 * if (ps.in_or_st) ps.block_init = 1;
382 */
383 /* ? code = ps.block_init ? lparen : lbrace; */
384 code = lbrace;
385 break;
386
387 case ('}'):
388 unary_delim = true;
389 /* ? code = ps.block_init ? rparen : rbrace; */
390 code = rbrace;
391 break;
392
393 case 014: /* a form feed */
394 unary_delim = ps.last_u_d;
395 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
1009bf5e 396 * right */
30f48914
KB
397 code = form_feed;
398 break;
399
400 case (','):
401 unary_delim = true;
402 code = comma;
403 break;
404
405 case '.':
406 unary_delim = false;
407 code = period;
408 break;
409
410 case '-':
411 case '+': /* check for -, +, --, ++ */
412 code = (ps.last_u_d ? unary_op : binary_op);
413 unary_delim = true;
414
415 if (*buf_ptr == token[0]) {
416 /* check for doubled character */
0c8ee79d 417 *e_token++ = *buf_ptr++;
30f48914
KB
418 /* buffer overflow will be checked at end of loop */
419 if (last_code == ident || last_code == rparen) {
420 code = (ps.last_u_d ? unary_op : postop);
421 /* check for following ++ or -- */
422 unary_delim = false;
4b365fcd 423 }
30f48914
KB
424 }
425 else if (*buf_ptr == '=')
426 /* check for operator += */
0c8ee79d 427 *e_token++ = *buf_ptr++;
30f48914
KB
428 else if (*buf_ptr == '>') {
429 /* check for operator -> */
0c8ee79d 430 *e_token++ = *buf_ptr++;
30f48914
KB
431 if (!pointer_as_binop) {
432 unary_delim = false;
433 code = unary_op;
434 ps.want_blank = false;
4b365fcd 435 }
30f48914
KB
436 }
437 break; /* buffer overflow will be checked at end of
438 * switch */
439
440 case '=':
441 if (ps.in_or_st)
442 ps.block_init = 1;
443#ifdef undef
444 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
0c8ee79d
KB
445 e_token[-1] = *buf_ptr++;
446 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
447 *e_token++ = *buf_ptr++;
448 *e_token++ = '='; /* Flip =+ to += */
449 *e_token = 0;
30f48914
KB
450 }
451#else
452 if (*buf_ptr == '=') {/* == */
0c8ee79d 453 *e_token++ = '='; /* Flip =+ to += */
30f48914 454 buf_ptr++;
0c8ee79d 455 *e_token = 0;
30f48914
KB
456 }
457#endif
458 code = binary_op;
459 unary_delim = true;
460 break;
461 /* can drop thru!!! */
462
463 case '>':
464 case '<':
465 case '!': /* ops like <, <<, <=, !=, etc */
466 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
0c8ee79d 467 *e_token++ = *buf_ptr;
30f48914
KB
468 if (++buf_ptr >= buf_end)
469 fill_buffer();
470 }
471 if (*buf_ptr == '=')
0c8ee79d 472 *e_token++ = *buf_ptr++;
30f48914
KB
473 code = (ps.last_u_d ? unary_op : binary_op);
474 unary_delim = true;
475 break;
4b365fcd 476
30f48914
KB
477 default:
478 if (token[0] == '/' && *buf_ptr == '*') {
479 /* it is start of comment */
0c8ee79d 480 *e_token++ = '*';
4b365fcd 481
30f48914
KB
482 if (++buf_ptr >= buf_end)
483 fill_buffer();
4b365fcd 484
30f48914
KB
485 code = comment;
486 unary_delim = ps.last_u_d;
487 break;
488 }
0c8ee79d 489 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
30f48914
KB
490 /*
491 * handle ||, &&, etc, and also things as in int *****i
492 */
0c8ee79d 493 *e_token++ = *buf_ptr;
30f48914
KB
494 if (++buf_ptr >= buf_end)
495 fill_buffer();
496 }
497 code = (ps.last_u_d ? unary_op : binary_op);
498 unary_delim = true;
4b365fcd
KM
499
500
1009bf5e 501 } /* end of switch */
4b365fcd
KM
502 if (code != newline) {
503 l_struct = false;
504 last_code = code;
505 }
1009bf5e
KM
506 if (buf_ptr >= buf_end) /* check for input buffer empty */
507 fill_buffer();
508 ps.last_u_d = unary_delim;
0c8ee79d 509 *e_token = '\0'; /* null terminate the token */
4b365fcd 510 return (code);
1a43f1ba 511}
1009bf5e 512
30f48914
KB
513/*
514 * Add the given keyword to the keyword table, using val as the keyword type
515 */
516addkey(key, val)
517 char *key;
1009bf5e
KM
518{
519 register struct templ *p = specials;
520 while (p->rwd)
521 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
522 return;
523 else
524 p++;
525 if (p >= specials + sizeof specials / sizeof specials[0])
526 return; /* For now, table overflows are silently
30f48914 527 * ignored */
1009bf5e
KM
528 p->rwd = key;
529 p->rwcode = val;
530 p[1].rwd = 0;
531 p[1].rwcode = 0;
532 return;
533}