cleanup from David MacKenzie (mackenzi@thor.stolaf.edu)
[unix-history] / usr / src / usr.bin / indent / lexi.c
CommitLineData
c0bc4ef7 1/*
30f48914
KB
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980 The Regents of the University of California.
b0627149
KB
4 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms are permitted
b36fc510
KB
8 * provided that the above copyright notice and this paragraph are
9 * duplicated in all such forms and that any documentation,
10 * advertising materials, and other materials related to such
11 * distribution and use acknowledge that the software was developed
30f48914
KB
12 * by the University of California, Berkeley, the University of Illinois,
13 * Urbana, and Sun Microsystems, Inc. The name of either University
14 * or Sun Microsystems may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
b36fc510
KB
16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
c0bc4ef7
DF
19 */
20
21#ifndef lint
c5b954f4 22static char sccsid[] = "@(#)lexi.c 5.11 (Berkeley) %G%";
b0627149 23#endif /* not lint */
4b365fcd 24
b0627149 25/*
30f48914
KB
26 * Here we have the token scanner for indent. It scans off one token and puts
27 * it in the global variable "token". It returns a code, indicating the type
28 * of token scanned.
1009bf5e 29 */
4b365fcd 30
8540f0fa
KB
31#include "indent_globs.h"
32#include "indent_codes.h"
1009bf5e 33#include "ctype.h"
4b365fcd
KM
34
35#define alphanum 1
36#define opchar 3
37
38struct templ {
1009bf5e
KM
39 char *rwd;
40 int rwcode;
4b365fcd
KM
41};
42
1009bf5e 43struct templ specials[100] =
4b365fcd
KM
44{
45 "switch", 1,
46 "case", 2,
1009bf5e 47 "break", 0,
4b365fcd 48 "struct", 3,
1009bf5e
KM
49 "union", 3,
50 "enum", 3,
4b365fcd
KM
51 "default", 2,
52 "int", 4,
53 "char", 4,
54 "float", 4,
55 "double", 4,
56 "long", 4,
57 "short", 4,
58 "typdef", 4,
59 "unsigned", 4,
60 "register", 4,
61 "static", 4,
62 "global", 4,
63 "extern", 4,
1009bf5e
KM
64 "void", 4,
65 "goto", 0,
66 "return", 0,
4b365fcd
KM
67 "if", 5,
68 "while", 5,
69 "for", 5,
70 "else", 6,
71 "do", 6,
1009bf5e 72 "sizeof", 7,
4b365fcd
KM
73 0, 0
74};
75
1009bf5e 76char chartype[128] =
30f48914
KB
77{ /* this is used to facilitate the decision of
78 * what type (alphanumeric, operator) each
79 * character is */
4b365fcd
KM
80 0, 0, 0, 0, 0, 0, 0, 0,
81 0, 0, 0, 0, 0, 0, 0, 0,
82 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, 0, 0, 0, 0,
720fc992 84 0, 3, 0, 0, 1, 3, 3, 0,
30f48914 85 0, 0, 3, 3, 0, 3, 0, 3,
4b365fcd
KM
86 1, 1, 1, 1, 1, 1, 1, 1,
87 1, 1, 0, 0, 3, 3, 3, 3,
88 0, 1, 1, 1, 1, 1, 1, 1,
89 1, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 0, 0, 0, 3, 1,
92 0, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 0, 3, 0, 3, 0
96};
97
1009bf5e
KM
98
99
100
30f48914 101int
1009bf5e
KM
102lexi()
103{
104 register char *tok; /* local pointer to next char in token */
30f48914
KB
105 int unary_delim; /* this is set to 1 if the current token
106 *
1009bf5e
KM
107 * forces a following operator to be unary */
108 static int last_code; /* the last token type returned */
109 static int l_struct; /* set to 1 if the last token was 'struct' */
110 int code; /* internal code to be returned */
111 char qchar; /* the delimiter character for a string */
112
113 tok = token; /* point to start of place to save token */
4b365fcd 114 unary_delim = false;
1009bf5e 115 ps.col_1 = ps.last_nl; /* tell world that this token started in
30f48914 116 * column 1 iff the last thing scanned was nl */
1009bf5e
KM
117 ps.last_nl = false;
118
119 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
30f48914
KB
120 ps.col_1 = false; /* leading blanks imply token is not in column
121 * 1 */
4b365fcd 122 if (++buf_ptr >= buf_end)
1009bf5e 123 fill_buffer();
4b365fcd
KM
124 }
125
30f48914
KB
126 /* Scan an alphanumeric token */
127 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
128 /*
129 * we have a character or number
130 */
131 register char *j; /* used for searching thru list of
132 *
1009bf5e
KM
133 * reserved words */
134 register struct templ *p;
4b365fcd 135
30f48914
KB
136 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
137 int seendot = 0,
138 seenexp = 0;
139 if (*buf_ptr == '0' &&
140 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
141 *tok++ = *buf_ptr++;
142 *tok++ = *buf_ptr++;
143 while (isxdigit(*buf_ptr))
144 *tok++ = *buf_ptr++;
145 }
146 else
147 while (1) {
148 if (*buf_ptr == '.')
149 if (seendot)
150 break;
151 else
152 seendot++;
153 *tok++ = *buf_ptr++;
154 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
155 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
156 break;
157 else {
158 seenexp++;
159 seendot++;
160 *tok++ = *buf_ptr++;
161 if (*buf_ptr == '+' || *buf_ptr == '-')
162 *tok++ = *buf_ptr++;
163 }
164 }
165 if (*buf_ptr == 'L' || *buf_ptr == 'l')
166 *tok++ = *buf_ptr++;
167 }
168 else
169 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
170 *tok++ = *buf_ptr++;
171 if (buf_ptr >= buf_end)
172 fill_buffer();
173 }
4b365fcd 174 *tok++ = '\0';
1009bf5e
KM
175 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
176 if (++buf_ptr >= buf_end)
177 fill_buffer();
178 }
179 ps.its_a_keyword = false;
180 ps.sizeof_keyword = false;
30f48914
KB
181 if (l_struct) { /* if last token was 'struct', then this token
182 * should be treated as a declaration */
4b365fcd
KM
183 l_struct = false;
184 last_code = ident;
1009bf5e 185 ps.last_u_d = true;
4b365fcd
KM
186 return (decl);
187 }
1009bf5e
KM
188 ps.last_u_d = false; /* Operator after indentifier is binary */
189 last_code = ident; /* Remember that this is the code we will
190 * return */
191
192 /*
30f48914 193 * This loop will check if the token is a keyword.
1009bf5e
KM
194 */
195 for (p = specials; (j = p->rwd) != 0; p++) {
196 tok = token; /* point at scanned token */
197 if (*j++ != *tok++ || *j++ != *tok++)
198 continue; /* This test depends on the fact that
30f48914
KB
199 * identifiers are always at least 1 character
200 * long (ie. the first two bytes of the
201 * identifier are always meaningful) */
1009bf5e
KM
202 if (tok[-1] == 0)
203 break; /* If its a one-character identifier */
204 while (*tok++ == *j)
205 if (*j++ == 0)
206 goto found_keyword; /* I wish that C had a multi-level
207 * break... */
208 }
209 if (p->rwd) { /* we have a keyword */
210 found_keyword:
211 ps.its_a_keyword = true;
212 ps.last_u_d = true;
213 switch (p->rwcode) {
30f48914
KB
214 case 1: /* it is a switch */
215 return (swstmt);
216 case 2: /* a case or default */
217 return (casestmt);
218
219 case 3: /* a "struct" */
220 if (ps.p_l_follow)
221 break; /* inside parens: cast */
222 l_struct = true;
223
224 /*
225 * Next time around, we will want to know that we have had a
226 * 'struct'
227 */
228 case 4: /* one of the declaration keywords */
229 if (ps.p_l_follow) {
230 ps.cast_mask |= 1 << ps.p_l_follow;
231 break; /* inside parens: cast */
232 }
233 last_code = decl;
234 return (decl);
1009bf5e 235
30f48914
KB
236 case 5: /* if, while, for */
237 return (sp_paren);
1009bf5e 238
30f48914
KB
239 case 6: /* do, else */
240 return (sp_nparen);
1009bf5e 241
30f48914
KB
242 case 7:
243 ps.sizeof_keyword = true;
244 default: /* all others are treated like any other
1009bf5e 245 * identifier */
30f48914 246 return (ident);
1009bf5e
KM
247 } /* end of switch */
248 } /* end of if (found_it) */
30f48914 249 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
c5b954f4
KB
250 register char *tp = buf_ptr;
251 while (tp < buf_end)
252 if (*tp++ == ')' && *tp == ';')
30f48914 253 goto not_proc;
1009bf5e
KM
254 strncpy(ps.procname, token, sizeof ps.procname - 1);
255 ps.in_parameter_declaration = 1;
30f48914 256 not_proc:;
4b365fcd 257 }
1009bf5e
KM
258 /*
259 * The following hack attempts to guess whether or not the current
260 * token is in fact a declaration keyword -- one that has been
30f48914 261 * typedefd
1009bf5e 262 */
30f48914
KB
263 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
264 && !ps.p_l_follow
265 && !ps.block_init
266 && (ps.last_token == rparen || ps.last_token == semicolon ||
267 ps.last_token == decl ||
268 ps.last_token == lbrace || ps.last_token == rbrace)) {
1009bf5e
KM
269 ps.its_a_keyword = true;
270 ps.last_u_d = true;
271 last_code = decl;
272 return decl;
273 }
274 if (last_code == decl) /* if this is a declared variable, then
275 * following sign is unary */
276 ps.last_u_d = true; /* will make "int a -1" work */
4b365fcd 277 last_code = ident;
1009bf5e
KM
278 return (ident); /* the ident is not in the list */
279 } /* end of procesing for alpanum character */
30f48914 280 /* l l l Scan a non-alphanumeric token */
4b365fcd 281
30f48914
KB
282 *tok++ = *buf_ptr; /* if it is only a one-character token, it is
283 * moved here */
4b365fcd
KM
284 *tok = '\0';
285 if (++buf_ptr >= buf_end)
1009bf5e 286 fill_buffer();
4b365fcd
KM
287
288 switch (*token) {
30f48914
KB
289 case '\n':
290 unary_delim = ps.last_u_d;
291 ps.last_nl = true; /* remember that we just had a newline */
292 code = (had_eof ? 0 : newline);
4b365fcd 293
30f48914
KB
294 /*
295 * if data has been exausted, the newline is a dummy, and we should
296 * return code to stop
297 */
298 break;
299
300 case '\'': /* start of quoted character */
301 case '"': /* start of string */
302 qchar = *token;
303 if (troff) {
304 tok[-1] = '`';
305 if (qchar == '"')
306 *tok++ = '`';
307 tok = chfont(&bodyf, &stringf, tok);
308 }
309 do { /* copy the string */
310 while (1) { /* move one character or [/<char>]<char> */
311 if (*buf_ptr == '\n') {
312 printf("%d: Unterminated literal\n", line_no);
313 goto stop_lit;
314 }
315 *tok = *buf_ptr++;
316 if (buf_ptr >= buf_end)
317 fill_buffer();
318 if (had_eof || ((tok - token) > (bufsize - 2))) {
319 printf("Unterminated literal\n");
320 ++tok;
321 goto stop_lit;
322 /* get outof literal copying loop */
323 }
324 if (*tok == BACKSLASH) { /* if escape, copy extra char */
325 if (*buf_ptr == '\n') /* check for escaped newline */
326 ++line_no;
327 if (troff) {
328 *++tok = BACKSLASH;
329 if (*buf_ptr == BACKSLASH)
330 *++tok = BACKSLASH;
4b365fcd 331 }
30f48914
KB
332 *++tok = *buf_ptr++;
333 ++tok; /* we must increment this again because we
334 * copied two chars */
4b365fcd 335 if (buf_ptr >= buf_end)
1009bf5e 336 fill_buffer();
30f48914
KB
337 }
338 else
339 break; /* we copied one character */
340 } /* end of while (1) */
341 } while (*tok++ != qchar);
342 if (troff) {
343 tok = chfont(&stringf, &bodyf, tok - 1);
344 if (qchar == '"')
1009bf5e 345 *tok++ = '\'';
30f48914
KB
346 }
347stop_lit:
348 code = ident;
349 break;
350
351 case ('('):
352 case ('['):
353 unary_delim = true;
354 code = lparen;
355 break;
356
357 case (')'):
358 case (']'):
359 code = rparen;
360 break;
361
362 case '#':
363 unary_delim = ps.last_u_d;
364 code = preesc;
365 break;
366
367 case '?':
368 unary_delim = true;
369 code = question;
370 break;
371
372 case (':'):
373 code = colon;
374 unary_delim = true;
375 break;
376
377 case (';'):
378 unary_delim = true;
379 code = semicolon;
380 break;
381
382 case ('{'):
383 unary_delim = true;
4b365fcd 384
30f48914
KB
385 /*
386 * if (ps.in_or_st) ps.block_init = 1;
387 */
388 /* ? code = ps.block_init ? lparen : lbrace; */
389 code = lbrace;
390 break;
391
392 case ('}'):
393 unary_delim = true;
394 /* ? code = ps.block_init ? rparen : rbrace; */
395 code = rbrace;
396 break;
397
398 case 014: /* a form feed */
399 unary_delim = ps.last_u_d;
400 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
1009bf5e 401 * right */
30f48914
KB
402 code = form_feed;
403 break;
404
405 case (','):
406 unary_delim = true;
407 code = comma;
408 break;
409
410 case '.':
411 unary_delim = false;
412 code = period;
413 break;
414
415 case '-':
416 case '+': /* check for -, +, --, ++ */
417 code = (ps.last_u_d ? unary_op : binary_op);
418 unary_delim = true;
419
420 if (*buf_ptr == token[0]) {
421 /* check for doubled character */
422 *tok++ = *buf_ptr++;
423 /* buffer overflow will be checked at end of loop */
424 if (last_code == ident || last_code == rparen) {
425 code = (ps.last_u_d ? unary_op : postop);
426 /* check for following ++ or -- */
427 unary_delim = false;
4b365fcd 428 }
30f48914
KB
429 }
430 else if (*buf_ptr == '=')
431 /* check for operator += */
432 *tok++ = *buf_ptr++;
433 else if (*buf_ptr == '>') {
434 /* check for operator -> */
435 *tok++ = *buf_ptr++;
436 if (!pointer_as_binop) {
437 unary_delim = false;
438 code = unary_op;
439 ps.want_blank = false;
4b365fcd 440 }
30f48914
KB
441 }
442 break; /* buffer overflow will be checked at end of
443 * switch */
444
445 case '=':
446 if (ps.in_or_st)
447 ps.block_init = 1;
448#ifdef undef
449 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
450 tok[-1] = *buf_ptr++;
451 if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
1009bf5e 452 *tok++ = *buf_ptr++;
30f48914
KB
453 *tok++ = '='; /* Flip =+ to += */
454 *tok = 0;
455 }
456#else
457 if (*buf_ptr == '=') {/* == */
458 *tok++ = '='; /* Flip =+ to += */
459 buf_ptr++;
460 *tok = 0;
461 }
462#endif
463 code = binary_op;
464 unary_delim = true;
465 break;
466 /* can drop thru!!! */
467
468 case '>':
469 case '<':
470 case '!': /* ops like <, <<, <=, !=, etc */
471 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
472 *tok++ = *buf_ptr;
473 if (++buf_ptr >= buf_end)
474 fill_buffer();
475 }
476 if (*buf_ptr == '=')
477 *tok++ = *buf_ptr++;
478 code = (ps.last_u_d ? unary_op : binary_op);
479 unary_delim = true;
480 break;
4b365fcd 481
30f48914
KB
482 default:
483 if (token[0] == '/' && *buf_ptr == '*') {
484 /* it is start of comment */
485 *tok++ = '*';
4b365fcd 486
30f48914
KB
487 if (++buf_ptr >= buf_end)
488 fill_buffer();
4b365fcd 489
30f48914
KB
490 code = comment;
491 unary_delim = ps.last_u_d;
492 break;
493 }
494 while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
495 /*
496 * handle ||, &&, etc, and also things as in int *****i
497 */
498 *tok++ = *buf_ptr;
499 if (++buf_ptr >= buf_end)
500 fill_buffer();
501 }
502 code = (ps.last_u_d ? unary_op : binary_op);
503 unary_delim = true;
4b365fcd
KM
504
505
1009bf5e 506 } /* end of switch */
4b365fcd
KM
507 if (code != newline) {
508 l_struct = false;
509 last_code = code;
510 }
1009bf5e
KM
511 if (buf_ptr >= buf_end) /* check for input buffer empty */
512 fill_buffer();
513 ps.last_u_d = unary_delim;
514 *tok = '\0'; /* null terminate the token */
4b365fcd
KM
515 return (code);
516};
1009bf5e 517
30f48914
KB
518/*
519 * Add the given keyword to the keyword table, using val as the keyword type
520 */
521addkey(key, val)
522 char *key;
1009bf5e
KM
523{
524 register struct templ *p = specials;
525 while (p->rwd)
526 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
527 return;
528 else
529 p++;
530 if (p >= specials + sizeof specials / sizeof specials[0])
531 return; /* For now, table overflows are silently
30f48914 532 * ignored */
1009bf5e
KM
533 p->rwd = key;
534 p->rwcode = val;
535 p[1].rwd = 0;
536 p[1].rwcode = 0;
537 return;
538}