BSD 4_4 release
[unix-history] / usr / src / usr.bin / indent / lexi.c
CommitLineData
c0bc4ef7 1/*
30f48914 2 * Copyright (c) 1985 Sun Microsystems, Inc.
ad787160
C
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
b0627149
KB
5 * All rights reserved.
6 *
ad787160
C
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
c0bc4ef7
DF
34 */
35
36#ifndef lint
ad787160 37static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
b0627149 38#endif /* not lint */
4b365fcd 39
b0627149 40/*
30f48914
KB
41 * Here we have the token scanner for indent. It scans off one token and puts
42 * it in the global variable "token". It returns a code, indicating the type
43 * of token scanned.
1009bf5e 44 */
4b365fcd 45
2b8540ff
KB
46#include <stdio.h>
47#include <ctype.h>
48#include <stdlib.h>
49#include <string.h>
8540f0fa
KB
50#include "indent_globs.h"
51#include "indent_codes.h"
4b365fcd
KM
52
53#define alphanum 1
54#define opchar 3
55
56struct templ {
1009bf5e
KM
57 char *rwd;
58 int rwcode;
4b365fcd
KM
59};
60
1009bf5e 61struct templ specials[100] =
4b365fcd
KM
62{
63 "switch", 1,
64 "case", 2,
1009bf5e 65 "break", 0,
4b365fcd 66 "struct", 3,
1009bf5e
KM
67 "union", 3,
68 "enum", 3,
4b365fcd
KM
69 "default", 2,
70 "int", 4,
71 "char", 4,
72 "float", 4,
73 "double", 4,
74 "long", 4,
75 "short", 4,
76 "typdef", 4,
77 "unsigned", 4,
78 "register", 4,
79 "static", 4,
80 "global", 4,
81 "extern", 4,
1009bf5e
KM
82 "void", 4,
83 "goto", 0,
84 "return", 0,
4b365fcd
KM
85 "if", 5,
86 "while", 5,
87 "for", 5,
88 "else", 6,
89 "do", 6,
1009bf5e 90 "sizeof", 7,
4b365fcd
KM
91 0, 0
92};
93
1009bf5e 94char chartype[128] =
30f48914
KB
95{ /* this is used to facilitate the decision of
96 * what type (alphanumeric, operator) each
97 * character is */
4b365fcd
KM
98 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0,
720fc992 102 0, 3, 0, 0, 1, 3, 3, 0,
30f48914 103 0, 0, 3, 3, 0, 3, 0, 3,
4b365fcd
KM
104 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 0, 0, 3, 3, 3, 3,
106 0, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 0, 0, 0, 3, 1,
110 0, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 0, 3, 0, 3, 0
114};
115
1009bf5e
KM
116
117
118
30f48914 119int
1009bf5e
KM
120lexi()
121{
30f48914
KB
122 int unary_delim; /* this is set to 1 if the current token
123 *
1009bf5e
KM
124 * forces a following operator to be unary */
125 static int last_code; /* the last token type returned */
126 static int l_struct; /* set to 1 if the last token was 'struct' */
127 int code; /* internal code to be returned */
128 char qchar; /* the delimiter character for a string */
129
0c8ee79d 130 e_token = s_token; /* point to start of place to save token */
4b365fcd 131 unary_delim = false;
1009bf5e 132 ps.col_1 = ps.last_nl; /* tell world that this token started in
30f48914 133 * column 1 iff the last thing scanned was nl */
1009bf5e
KM
134 ps.last_nl = false;
135
136 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
30f48914
KB
137 ps.col_1 = false; /* leading blanks imply token is not in column
138 * 1 */
4b365fcd 139 if (++buf_ptr >= buf_end)
1009bf5e 140 fill_buffer();
4b365fcd
KM
141 }
142
30f48914
KB
143 /* Scan an alphanumeric token */
144 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
145 /*
146 * we have a character or number
147 */
148 register char *j; /* used for searching thru list of
149 *
1009bf5e
KM
150 * reserved words */
151 register struct templ *p;
4b365fcd 152
30f48914
KB
153 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
154 int seendot = 0,
155 seenexp = 0;
156 if (*buf_ptr == '0' &&
157 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
0c8ee79d
KB
158 *e_token++ = *buf_ptr++;
159 *e_token++ = *buf_ptr++;
160 while (isxdigit(*buf_ptr)) {
19961177 161 CHECK_SIZE_TOKEN;
0c8ee79d
KB
162 *e_token++ = *buf_ptr++;
163 }
30f48914
KB
164 }
165 else
166 while (1) {
167 if (*buf_ptr == '.')
168 if (seendot)
169 break;
170 else
171 seendot++;
19961177 172 CHECK_SIZE_TOKEN;
0c8ee79d 173 *e_token++ = *buf_ptr++;
30f48914
KB
174 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
175 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
176 break;
177 else {
178 seenexp++;
179 seendot++;
19961177 180 CHECK_SIZE_TOKEN;
0c8ee79d 181 *e_token++ = *buf_ptr++;
30f48914 182 if (*buf_ptr == '+' || *buf_ptr == '-')
0c8ee79d 183 *e_token++ = *buf_ptr++;
30f48914
KB
184 }
185 }
186 if (*buf_ptr == 'L' || *buf_ptr == 'l')
0c8ee79d 187 *e_token++ = *buf_ptr++;
30f48914
KB
188 }
189 else
190 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
19961177 191 CHECK_SIZE_TOKEN;
0c8ee79d 192 *e_token++ = *buf_ptr++;
30f48914
KB
193 if (buf_ptr >= buf_end)
194 fill_buffer();
195 }
0c8ee79d 196 *e_token++ = '\0';
1009bf5e
KM
197 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
198 if (++buf_ptr >= buf_end)
199 fill_buffer();
200 }
201 ps.its_a_keyword = false;
202 ps.sizeof_keyword = false;
30f48914
KB
203 if (l_struct) { /* if last token was 'struct', then this token
204 * should be treated as a declaration */
4b365fcd
KM
205 l_struct = false;
206 last_code = ident;
1009bf5e 207 ps.last_u_d = true;
4b365fcd
KM
208 return (decl);
209 }
1009bf5e
KM
210 ps.last_u_d = false; /* Operator after indentifier is binary */
211 last_code = ident; /* Remember that this is the code we will
212 * return */
213
214 /*
30f48914 215 * This loop will check if the token is a keyword.
1009bf5e
KM
216 */
217 for (p = specials; (j = p->rwd) != 0; p++) {
0c8ee79d
KB
218 register char *p = s_token; /* point at scanned token */
219 if (*j++ != *p++ || *j++ != *p++)
1009bf5e 220 continue; /* This test depends on the fact that
30f48914
KB
221 * identifiers are always at least 1 character
222 * long (ie. the first two bytes of the
223 * identifier are always meaningful) */
0c8ee79d 224 if (p[-1] == 0)
1009bf5e 225 break; /* If its a one-character identifier */
0c8ee79d 226 while (*p++ == *j)
1009bf5e
KM
227 if (*j++ == 0)
228 goto found_keyword; /* I wish that C had a multi-level
229 * break... */
230 }
231 if (p->rwd) { /* we have a keyword */
232 found_keyword:
233 ps.its_a_keyword = true;
234 ps.last_u_d = true;
235 switch (p->rwcode) {
30f48914
KB
236 case 1: /* it is a switch */
237 return (swstmt);
238 case 2: /* a case or default */
239 return (casestmt);
240
241 case 3: /* a "struct" */
242 if (ps.p_l_follow)
243 break; /* inside parens: cast */
244 l_struct = true;
245
246 /*
247 * Next time around, we will want to know that we have had a
248 * 'struct'
249 */
250 case 4: /* one of the declaration keywords */
251 if (ps.p_l_follow) {
252 ps.cast_mask |= 1 << ps.p_l_follow;
253 break; /* inside parens: cast */
254 }
255 last_code = decl;
256 return (decl);
1009bf5e 257
30f48914
KB
258 case 5: /* if, while, for */
259 return (sp_paren);
1009bf5e 260
30f48914
KB
261 case 6: /* do, else */
262 return (sp_nparen);
1009bf5e 263
30f48914
KB
264 case 7:
265 ps.sizeof_keyword = true;
266 default: /* all others are treated like any other
1009bf5e 267 * identifier */
30f48914 268 return (ident);
1009bf5e
KM
269 } /* end of switch */
270 } /* end of if (found_it) */
30f48914 271 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
c5b954f4
KB
272 register char *tp = buf_ptr;
273 while (tp < buf_end)
0c8ee79d 274 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
30f48914 275 goto not_proc;
1009bf5e
KM
276 strncpy(ps.procname, token, sizeof ps.procname - 1);
277 ps.in_parameter_declaration = 1;
0c8ee79d 278 rparen_count = 1;
30f48914 279 not_proc:;
4b365fcd 280 }
1009bf5e
KM
281 /*
282 * The following hack attempts to guess whether or not the current
283 * token is in fact a declaration keyword -- one that has been
30f48914 284 * typedefd
1009bf5e 285 */
30f48914
KB
286 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
287 && !ps.p_l_follow
288 && !ps.block_init
289 && (ps.last_token == rparen || ps.last_token == semicolon ||
290 ps.last_token == decl ||
291 ps.last_token == lbrace || ps.last_token == rbrace)) {
1009bf5e
KM
292 ps.its_a_keyword = true;
293 ps.last_u_d = true;
294 last_code = decl;
295 return decl;
296 }
297 if (last_code == decl) /* if this is a declared variable, then
298 * following sign is unary */
299 ps.last_u_d = true; /* will make "int a -1" work */
4b365fcd 300 last_code = ident;
1009bf5e
KM
301 return (ident); /* the ident is not in the list */
302 } /* end of procesing for alpanum character */
4b365fcd 303
0c8ee79d
KB
304 /* Scan a non-alphanumeric token */
305
306 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
30f48914 307 * moved here */
0c8ee79d 308 *e_token = '\0';
4b365fcd 309 if (++buf_ptr >= buf_end)
1009bf5e 310 fill_buffer();
4b365fcd
KM
311
312 switch (*token) {
30f48914
KB
313 case '\n':
314 unary_delim = ps.last_u_d;
315 ps.last_nl = true; /* remember that we just had a newline */
316 code = (had_eof ? 0 : newline);
4b365fcd 317
30f48914
KB
318 /*
319 * if data has been exausted, the newline is a dummy, and we should
320 * return code to stop
321 */
322 break;
323
324 case '\'': /* start of quoted character */
325 case '"': /* start of string */
326 qchar = *token;
327 if (troff) {
0c8ee79d 328 e_token[-1] = '`';
30f48914 329 if (qchar == '"')
0c8ee79d
KB
330 *e_token++ = '`';
331 e_token = chfont(&bodyf, &stringf, e_token);
30f48914
KB
332 }
333 do { /* copy the string */
334 while (1) { /* move one character or [/<char>]<char> */
335 if (*buf_ptr == '\n') {
336 printf("%d: Unterminated literal\n", line_no);
337 goto stop_lit;
338 }
19961177
KB
339 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
340 * since CHECK_SIZE guarantees that there
0c8ee79d
KB
341 * are at least 5 entries left */
342 *e_token = *buf_ptr++;
30f48914
KB
343 if (buf_ptr >= buf_end)
344 fill_buffer();
0c8ee79d 345 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
30f48914
KB
346 if (*buf_ptr == '\n') /* check for escaped newline */
347 ++line_no;
348 if (troff) {
0c8ee79d 349 *++e_token = BACKSLASH;
30f48914 350 if (*buf_ptr == BACKSLASH)
0c8ee79d 351 *++e_token = BACKSLASH;
4b365fcd 352 }
0c8ee79d
KB
353 *++e_token = *buf_ptr++;
354 ++e_token; /* we must increment this again because we
30f48914 355 * copied two chars */
4b365fcd 356 if (buf_ptr >= buf_end)
1009bf5e 357 fill_buffer();
30f48914
KB
358 }
359 else
360 break; /* we copied one character */
361 } /* end of while (1) */
0c8ee79d 362 } while (*e_token++ != qchar);
30f48914 363 if (troff) {
0c8ee79d 364 e_token = chfont(&stringf, &bodyf, e_token - 1);
30f48914 365 if (qchar == '"')
0c8ee79d 366 *e_token++ = '\'';
30f48914
KB
367 }
368stop_lit:
369 code = ident;
370 break;
371
372 case ('('):
373 case ('['):
374 unary_delim = true;
375 code = lparen;
376 break;
377
378 case (')'):
379 case (']'):
380 code = rparen;
381 break;
382
383 case '#':
384 unary_delim = ps.last_u_d;
385 code = preesc;
386 break;
387
388 case '?':
389 unary_delim = true;
390 code = question;
391 break;
392
393 case (':'):
394 code = colon;
395 unary_delim = true;
396 break;
397
398 case (';'):
399 unary_delim = true;
400 code = semicolon;
401 break;
402
403 case ('{'):
404 unary_delim = true;
4b365fcd 405
30f48914
KB
406 /*
407 * if (ps.in_or_st) ps.block_init = 1;
408 */
409 /* ? code = ps.block_init ? lparen : lbrace; */
410 code = lbrace;
411 break;
412
413 case ('}'):
414 unary_delim = true;
415 /* ? code = ps.block_init ? rparen : rbrace; */
416 code = rbrace;
417 break;
418
419 case 014: /* a form feed */
420 unary_delim = ps.last_u_d;
421 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
1009bf5e 422 * right */
30f48914
KB
423 code = form_feed;
424 break;
425
426 case (','):
427 unary_delim = true;
428 code = comma;
429 break;
430
431 case '.':
432 unary_delim = false;
433 code = period;
434 break;
435
436 case '-':
437 case '+': /* check for -, +, --, ++ */
438 code = (ps.last_u_d ? unary_op : binary_op);
439 unary_delim = true;
440
441 if (*buf_ptr == token[0]) {
442 /* check for doubled character */
0c8ee79d 443 *e_token++ = *buf_ptr++;
30f48914
KB
444 /* buffer overflow will be checked at end of loop */
445 if (last_code == ident || last_code == rparen) {
446 code = (ps.last_u_d ? unary_op : postop);
447 /* check for following ++ or -- */
448 unary_delim = false;
4b365fcd 449 }
30f48914
KB
450 }
451 else if (*buf_ptr == '=')
452 /* check for operator += */
0c8ee79d 453 *e_token++ = *buf_ptr++;
30f48914
KB
454 else if (*buf_ptr == '>') {
455 /* check for operator -> */
0c8ee79d 456 *e_token++ = *buf_ptr++;
30f48914
KB
457 if (!pointer_as_binop) {
458 unary_delim = false;
459 code = unary_op;
460 ps.want_blank = false;
4b365fcd 461 }
30f48914
KB
462 }
463 break; /* buffer overflow will be checked at end of
464 * switch */
465
466 case '=':
467 if (ps.in_or_st)
468 ps.block_init = 1;
469#ifdef undef
470 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
0c8ee79d
KB
471 e_token[-1] = *buf_ptr++;
472 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
473 *e_token++ = *buf_ptr++;
474 *e_token++ = '='; /* Flip =+ to += */
475 *e_token = 0;
30f48914
KB
476 }
477#else
478 if (*buf_ptr == '=') {/* == */
0c8ee79d 479 *e_token++ = '='; /* Flip =+ to += */
30f48914 480 buf_ptr++;
0c8ee79d 481 *e_token = 0;
30f48914
KB
482 }
483#endif
484 code = binary_op;
485 unary_delim = true;
486 break;
487 /* can drop thru!!! */
488
489 case '>':
490 case '<':
491 case '!': /* ops like <, <<, <=, !=, etc */
492 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
0c8ee79d 493 *e_token++ = *buf_ptr;
30f48914
KB
494 if (++buf_ptr >= buf_end)
495 fill_buffer();
496 }
497 if (*buf_ptr == '=')
0c8ee79d 498 *e_token++ = *buf_ptr++;
30f48914
KB
499 code = (ps.last_u_d ? unary_op : binary_op);
500 unary_delim = true;
501 break;
4b365fcd 502
30f48914
KB
503 default:
504 if (token[0] == '/' && *buf_ptr == '*') {
505 /* it is start of comment */
0c8ee79d 506 *e_token++ = '*';
4b365fcd 507
30f48914
KB
508 if (++buf_ptr >= buf_end)
509 fill_buffer();
4b365fcd 510
30f48914
KB
511 code = comment;
512 unary_delim = ps.last_u_d;
513 break;
514 }
0c8ee79d 515 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
30f48914
KB
516 /*
517 * handle ||, &&, etc, and also things as in int *****i
518 */
0c8ee79d 519 *e_token++ = *buf_ptr;
30f48914
KB
520 if (++buf_ptr >= buf_end)
521 fill_buffer();
522 }
523 code = (ps.last_u_d ? unary_op : binary_op);
524 unary_delim = true;
4b365fcd
KM
525
526
1009bf5e 527 } /* end of switch */
4b365fcd
KM
528 if (code != newline) {
529 l_struct = false;
530 last_code = code;
531 }
1009bf5e
KM
532 if (buf_ptr >= buf_end) /* check for input buffer empty */
533 fill_buffer();
534 ps.last_u_d = unary_delim;
0c8ee79d 535 *e_token = '\0'; /* null terminate the token */
4b365fcd 536 return (code);
1a43f1ba 537}
1009bf5e 538
30f48914
KB
539/*
540 * Add the given keyword to the keyword table, using val as the keyword type
541 */
542addkey(key, val)
543 char *key;
1009bf5e
KM
544{
545 register struct templ *p = specials;
546 while (p->rwd)
547 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
548 return;
549 else
550 p++;
551 if (p >= specials + sizeof specials / sizeof specials[0])
552 return; /* For now, table overflows are silently
30f48914 553 * ignored */
1009bf5e
KM
554 p->rwd = key;
555 p->rwcode = val;
556 p[1].rwd = 0;
557 p[1].rwcode = 0;
558 return;
559}