BSD 4_3_Reno release
[unix-history] / usr / src / pgrm / indent / lexi.c
CommitLineData
c0bc4ef7 1/*
30f48914
KB
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980 The Regents of the University of California.
b0627149
KB
4 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5 * All rights reserved.
6 *
1c15e888
C
7 * Redistribution and use in source and binary forms are permitted
8 * provided that: (1) source distributions retain this entire copyright
9 * notice and comment, and (2) distributions including binaries display
10 * the following acknowledgement: ``This product includes software
11 * developed by the University of California, Berkeley and its contributors''
12 * in the documentation or other materials provided with the distribution
13 * and in all advertising materials mentioning features or use of this
14 * software. Neither the name of the University nor the names of its
15 * contributors may be used to endorse or promote products derived
16 * from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
c0bc4ef7
DF
20 */
21
22#ifndef lint
1c15e888 23static char sccsid[] = "@(#)lexi.c 5.15 (Berkeley) 6/1/90";
b0627149 24#endif /* not lint */
4b365fcd 25
b0627149 26/*
30f48914
KB
27 * Here we have the token scanner for indent. It scans off one token and puts
28 * it in the global variable "token". It returns a code, indicating the type
29 * of token scanned.
1009bf5e 30 */
4b365fcd 31
8540f0fa
KB
32#include "indent_globs.h"
33#include "indent_codes.h"
1a43f1ba 34#include <ctype.h>
4b365fcd
KM
35
36#define alphanum 1
37#define opchar 3
38
39struct templ {
1009bf5e
KM
40 char *rwd;
41 int rwcode;
4b365fcd
KM
42};
43
1009bf5e 44struct templ specials[100] =
4b365fcd
KM
45{
46 "switch", 1,
47 "case", 2,
1009bf5e 48 "break", 0,
4b365fcd 49 "struct", 3,
1009bf5e
KM
50 "union", 3,
51 "enum", 3,
4b365fcd
KM
52 "default", 2,
53 "int", 4,
54 "char", 4,
55 "float", 4,
56 "double", 4,
57 "long", 4,
58 "short", 4,
59 "typdef", 4,
60 "unsigned", 4,
61 "register", 4,
62 "static", 4,
63 "global", 4,
64 "extern", 4,
1009bf5e
KM
65 "void", 4,
66 "goto", 0,
67 "return", 0,
4b365fcd
KM
68 "if", 5,
69 "while", 5,
70 "for", 5,
71 "else", 6,
72 "do", 6,
1009bf5e 73 "sizeof", 7,
4b365fcd
KM
74 0, 0
75};
76
1009bf5e 77char chartype[128] =
30f48914
KB
78{ /* this is used to facilitate the decision of
79 * what type (alphanumeric, operator) each
80 * character is */
4b365fcd
KM
81 0, 0, 0, 0, 0, 0, 0, 0,
82 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0,
720fc992 85 0, 3, 0, 0, 1, 3, 3, 0,
30f48914 86 0, 0, 3, 3, 0, 3, 0, 3,
4b365fcd
KM
87 1, 1, 1, 1, 1, 1, 1, 1,
88 1, 1, 0, 0, 3, 3, 3, 3,
89 0, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 0, 0, 0, 3, 1,
93 0, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 0, 3, 0, 3, 0
97};
98
1009bf5e
KM
99
100
101
30f48914 102int
1009bf5e
KM
103lexi()
104{
30f48914
KB
105 int unary_delim; /* this is set to 1 if the current token
106 *
1009bf5e
KM
107 * forces a following operator to be unary */
108 static int last_code; /* the last token type returned */
109 static int l_struct; /* set to 1 if the last token was 'struct' */
110 int code; /* internal code to be returned */
111 char qchar; /* the delimiter character for a string */
112
0c8ee79d 113 e_token = s_token; /* point to start of place to save token */
4b365fcd 114 unary_delim = false;
1009bf5e 115 ps.col_1 = ps.last_nl; /* tell world that this token started in
30f48914 116 * column 1 iff the last thing scanned was nl */
1009bf5e
KM
117 ps.last_nl = false;
118
119 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
30f48914
KB
120 ps.col_1 = false; /* leading blanks imply token is not in column
121 * 1 */
4b365fcd 122 if (++buf_ptr >= buf_end)
1009bf5e 123 fill_buffer();
4b365fcd
KM
124 }
125
30f48914
KB
126 /* Scan an alphanumeric token */
127 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
128 /*
129 * we have a character or number
130 */
131 register char *j; /* used for searching thru list of
132 *
1009bf5e
KM
133 * reserved words */
134 register struct templ *p;
4b365fcd 135
30f48914
KB
136 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
137 int seendot = 0,
138 seenexp = 0;
139 if (*buf_ptr == '0' &&
140 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
0c8ee79d
KB
141 *e_token++ = *buf_ptr++;
142 *e_token++ = *buf_ptr++;
143 while (isxdigit(*buf_ptr)) {
19961177 144 CHECK_SIZE_TOKEN;
0c8ee79d
KB
145 *e_token++ = *buf_ptr++;
146 }
30f48914
KB
147 }
148 else
149 while (1) {
150 if (*buf_ptr == '.')
151 if (seendot)
152 break;
153 else
154 seendot++;
19961177 155 CHECK_SIZE_TOKEN;
0c8ee79d 156 *e_token++ = *buf_ptr++;
30f48914
KB
157 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
158 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
159 break;
160 else {
161 seenexp++;
162 seendot++;
19961177 163 CHECK_SIZE_TOKEN;
0c8ee79d 164 *e_token++ = *buf_ptr++;
30f48914 165 if (*buf_ptr == '+' || *buf_ptr == '-')
0c8ee79d 166 *e_token++ = *buf_ptr++;
30f48914
KB
167 }
168 }
169 if (*buf_ptr == 'L' || *buf_ptr == 'l')
0c8ee79d 170 *e_token++ = *buf_ptr++;
30f48914
KB
171 }
172 else
173 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
19961177 174 CHECK_SIZE_TOKEN;
0c8ee79d 175 *e_token++ = *buf_ptr++;
30f48914
KB
176 if (buf_ptr >= buf_end)
177 fill_buffer();
178 }
0c8ee79d 179 *e_token++ = '\0';
1009bf5e
KM
180 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
181 if (++buf_ptr >= buf_end)
182 fill_buffer();
183 }
184 ps.its_a_keyword = false;
185 ps.sizeof_keyword = false;
30f48914
KB
186 if (l_struct) { /* if last token was 'struct', then this token
187 * should be treated as a declaration */
4b365fcd
KM
188 l_struct = false;
189 last_code = ident;
1009bf5e 190 ps.last_u_d = true;
4b365fcd
KM
191 return (decl);
192 }
1009bf5e
KM
193 ps.last_u_d = false; /* Operator after indentifier is binary */
194 last_code = ident; /* Remember that this is the code we will
195 * return */
196
197 /*
30f48914 198 * This loop will check if the token is a keyword.
1009bf5e
KM
199 */
200 for (p = specials; (j = p->rwd) != 0; p++) {
0c8ee79d
KB
201 register char *p = s_token; /* point at scanned token */
202 if (*j++ != *p++ || *j++ != *p++)
1009bf5e 203 continue; /* This test depends on the fact that
30f48914
KB
204 * identifiers are always at least 1 character
205 * long (ie. the first two bytes of the
206 * identifier are always meaningful) */
0c8ee79d 207 if (p[-1] == 0)
1009bf5e 208 break; /* If its a one-character identifier */
0c8ee79d 209 while (*p++ == *j)
1009bf5e
KM
210 if (*j++ == 0)
211 goto found_keyword; /* I wish that C had a multi-level
212 * break... */
213 }
214 if (p->rwd) { /* we have a keyword */
215 found_keyword:
216 ps.its_a_keyword = true;
217 ps.last_u_d = true;
218 switch (p->rwcode) {
30f48914
KB
219 case 1: /* it is a switch */
220 return (swstmt);
221 case 2: /* a case or default */
222 return (casestmt);
223
224 case 3: /* a "struct" */
225 if (ps.p_l_follow)
226 break; /* inside parens: cast */
227 l_struct = true;
228
229 /*
230 * Next time around, we will want to know that we have had a
231 * 'struct'
232 */
233 case 4: /* one of the declaration keywords */
234 if (ps.p_l_follow) {
235 ps.cast_mask |= 1 << ps.p_l_follow;
236 break; /* inside parens: cast */
237 }
238 last_code = decl;
239 return (decl);
1009bf5e 240
30f48914
KB
241 case 5: /* if, while, for */
242 return (sp_paren);
1009bf5e 243
30f48914
KB
244 case 6: /* do, else */
245 return (sp_nparen);
1009bf5e 246
30f48914
KB
247 case 7:
248 ps.sizeof_keyword = true;
249 default: /* all others are treated like any other
1009bf5e 250 * identifier */
30f48914 251 return (ident);
1009bf5e
KM
252 } /* end of switch */
253 } /* end of if (found_it) */
30f48914 254 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
c5b954f4
KB
255 register char *tp = buf_ptr;
256 while (tp < buf_end)
0c8ee79d 257 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
30f48914 258 goto not_proc;
1009bf5e
KM
259 strncpy(ps.procname, token, sizeof ps.procname - 1);
260 ps.in_parameter_declaration = 1;
0c8ee79d 261 rparen_count = 1;
30f48914 262 not_proc:;
4b365fcd 263 }
1009bf5e
KM
264 /*
265 * The following hack attempts to guess whether or not the current
266 * token is in fact a declaration keyword -- one that has been
30f48914 267 * typedefd
1009bf5e 268 */
30f48914
KB
269 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
270 && !ps.p_l_follow
271 && !ps.block_init
272 && (ps.last_token == rparen || ps.last_token == semicolon ||
273 ps.last_token == decl ||
274 ps.last_token == lbrace || ps.last_token == rbrace)) {
1009bf5e
KM
275 ps.its_a_keyword = true;
276 ps.last_u_d = true;
277 last_code = decl;
278 return decl;
279 }
280 if (last_code == decl) /* if this is a declared variable, then
281 * following sign is unary */
282 ps.last_u_d = true; /* will make "int a -1" work */
4b365fcd 283 last_code = ident;
1009bf5e
KM
284 return (ident); /* the ident is not in the list */
285 } /* end of procesing for alpanum character */
4b365fcd 286
0c8ee79d
KB
287 /* Scan a non-alphanumeric token */
288
289 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
30f48914 290 * moved here */
0c8ee79d 291 *e_token = '\0';
4b365fcd 292 if (++buf_ptr >= buf_end)
1009bf5e 293 fill_buffer();
4b365fcd
KM
294
295 switch (*token) {
30f48914
KB
296 case '\n':
297 unary_delim = ps.last_u_d;
298 ps.last_nl = true; /* remember that we just had a newline */
299 code = (had_eof ? 0 : newline);
4b365fcd 300
30f48914
KB
301 /*
302 * if data has been exausted, the newline is a dummy, and we should
303 * return code to stop
304 */
305 break;
306
307 case '\'': /* start of quoted character */
308 case '"': /* start of string */
309 qchar = *token;
310 if (troff) {
0c8ee79d 311 e_token[-1] = '`';
30f48914 312 if (qchar == '"')
0c8ee79d
KB
313 *e_token++ = '`';
314 e_token = chfont(&bodyf, &stringf, e_token);
30f48914
KB
315 }
316 do { /* copy the string */
317 while (1) { /* move one character or [/<char>]<char> */
318 if (*buf_ptr == '\n') {
319 printf("%d: Unterminated literal\n", line_no);
320 goto stop_lit;
321 }
19961177
KB
322 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
323 * since CHECK_SIZE guarantees that there
0c8ee79d
KB
324 * are at least 5 entries left */
325 *e_token = *buf_ptr++;
30f48914
KB
326 if (buf_ptr >= buf_end)
327 fill_buffer();
0c8ee79d 328 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
30f48914
KB
329 if (*buf_ptr == '\n') /* check for escaped newline */
330 ++line_no;
331 if (troff) {
0c8ee79d 332 *++e_token = BACKSLASH;
30f48914 333 if (*buf_ptr == BACKSLASH)
0c8ee79d 334 *++e_token = BACKSLASH;
4b365fcd 335 }
0c8ee79d
KB
336 *++e_token = *buf_ptr++;
337 ++e_token; /* we must increment this again because we
30f48914 338 * copied two chars */
4b365fcd 339 if (buf_ptr >= buf_end)
1009bf5e 340 fill_buffer();
30f48914
KB
341 }
342 else
343 break; /* we copied one character */
344 } /* end of while (1) */
0c8ee79d 345 } while (*e_token++ != qchar);
30f48914 346 if (troff) {
0c8ee79d 347 e_token = chfont(&stringf, &bodyf, e_token - 1);
30f48914 348 if (qchar == '"')
0c8ee79d 349 *e_token++ = '\'';
30f48914
KB
350 }
351stop_lit:
352 code = ident;
353 break;
354
355 case ('('):
356 case ('['):
357 unary_delim = true;
358 code = lparen;
359 break;
360
361 case (')'):
362 case (']'):
363 code = rparen;
364 break;
365
366 case '#':
367 unary_delim = ps.last_u_d;
368 code = preesc;
369 break;
370
371 case '?':
372 unary_delim = true;
373 code = question;
374 break;
375
376 case (':'):
377 code = colon;
378 unary_delim = true;
379 break;
380
381 case (';'):
382 unary_delim = true;
383 code = semicolon;
384 break;
385
386 case ('{'):
387 unary_delim = true;
4b365fcd 388
30f48914
KB
389 /*
390 * if (ps.in_or_st) ps.block_init = 1;
391 */
392 /* ? code = ps.block_init ? lparen : lbrace; */
393 code = lbrace;
394 break;
395
396 case ('}'):
397 unary_delim = true;
398 /* ? code = ps.block_init ? rparen : rbrace; */
399 code = rbrace;
400 break;
401
402 case 014: /* a form feed */
403 unary_delim = ps.last_u_d;
404 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
1009bf5e 405 * right */
30f48914
KB
406 code = form_feed;
407 break;
408
409 case (','):
410 unary_delim = true;
411 code = comma;
412 break;
413
414 case '.':
415 unary_delim = false;
416 code = period;
417 break;
418
419 case '-':
420 case '+': /* check for -, +, --, ++ */
421 code = (ps.last_u_d ? unary_op : binary_op);
422 unary_delim = true;
423
424 if (*buf_ptr == token[0]) {
425 /* check for doubled character */
0c8ee79d 426 *e_token++ = *buf_ptr++;
30f48914
KB
427 /* buffer overflow will be checked at end of loop */
428 if (last_code == ident || last_code == rparen) {
429 code = (ps.last_u_d ? unary_op : postop);
430 /* check for following ++ or -- */
431 unary_delim = false;
4b365fcd 432 }
30f48914
KB
433 }
434 else if (*buf_ptr == '=')
435 /* check for operator += */
0c8ee79d 436 *e_token++ = *buf_ptr++;
30f48914
KB
437 else if (*buf_ptr == '>') {
438 /* check for operator -> */
0c8ee79d 439 *e_token++ = *buf_ptr++;
30f48914
KB
440 if (!pointer_as_binop) {
441 unary_delim = false;
442 code = unary_op;
443 ps.want_blank = false;
4b365fcd 444 }
30f48914
KB
445 }
446 break; /* buffer overflow will be checked at end of
447 * switch */
448
449 case '=':
450 if (ps.in_or_st)
451 ps.block_init = 1;
452#ifdef undef
453 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
0c8ee79d
KB
454 e_token[-1] = *buf_ptr++;
455 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
456 *e_token++ = *buf_ptr++;
457 *e_token++ = '='; /* Flip =+ to += */
458 *e_token = 0;
30f48914
KB
459 }
460#else
461 if (*buf_ptr == '=') {/* == */
0c8ee79d 462 *e_token++ = '='; /* Flip =+ to += */
30f48914 463 buf_ptr++;
0c8ee79d 464 *e_token = 0;
30f48914
KB
465 }
466#endif
467 code = binary_op;
468 unary_delim = true;
469 break;
470 /* can drop thru!!! */
471
472 case '>':
473 case '<':
474 case '!': /* ops like <, <<, <=, !=, etc */
475 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
0c8ee79d 476 *e_token++ = *buf_ptr;
30f48914
KB
477 if (++buf_ptr >= buf_end)
478 fill_buffer();
479 }
480 if (*buf_ptr == '=')
0c8ee79d 481 *e_token++ = *buf_ptr++;
30f48914
KB
482 code = (ps.last_u_d ? unary_op : binary_op);
483 unary_delim = true;
484 break;
4b365fcd 485
30f48914
KB
486 default:
487 if (token[0] == '/' && *buf_ptr == '*') {
488 /* it is start of comment */
0c8ee79d 489 *e_token++ = '*';
4b365fcd 490
30f48914
KB
491 if (++buf_ptr >= buf_end)
492 fill_buffer();
4b365fcd 493
30f48914
KB
494 code = comment;
495 unary_delim = ps.last_u_d;
496 break;
497 }
0c8ee79d 498 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
30f48914
KB
499 /*
500 * handle ||, &&, etc, and also things as in int *****i
501 */
0c8ee79d 502 *e_token++ = *buf_ptr;
30f48914
KB
503 if (++buf_ptr >= buf_end)
504 fill_buffer();
505 }
506 code = (ps.last_u_d ? unary_op : binary_op);
507 unary_delim = true;
4b365fcd
KM
508
509
1009bf5e 510 } /* end of switch */
4b365fcd
KM
511 if (code != newline) {
512 l_struct = false;
513 last_code = code;
514 }
1009bf5e
KM
515 if (buf_ptr >= buf_end) /* check for input buffer empty */
516 fill_buffer();
517 ps.last_u_d = unary_delim;
0c8ee79d 518 *e_token = '\0'; /* null terminate the token */
4b365fcd 519 return (code);
1a43f1ba 520}
1009bf5e 521
30f48914
KB
522/*
523 * Add the given keyword to the keyword table, using val as the keyword type
524 */
525addkey(key, val)
526 char *key;
1009bf5e
KM
527{
528 register struct templ *p = specials;
529 while (p->rwd)
530 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
531 return;
532 else
533 p++;
534 if (p >= specials + sizeof specials / sizeof specials[0])
535 return; /* For now, table overflows are silently
30f48914 536 * ignored */
1009bf5e
KM
537 p->rwd = key;
538 p->rwcode = val;
539 p[1].rwd = 0;
540 p[1].rwcode = 0;
541 return;
542}