remove unused variables
[unix-history] / usr / src / usr.bin / indent / lexi.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980 The Regents of the University of California.
4 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms are permitted
8 * provided that the above copyright notice and this paragraph are
9 * duplicated in all such forms and that any documentation,
10 * advertising materials, and other materials related to such
11 * distribution and use acknowledge that the software was developed
12 * by the University of California, Berkeley, the University of Illinois,
13 * Urbana, and Sun Microsystems, Inc. The name of either University
14 * or Sun Microsystems may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19 */
20
21#ifndef lint
22static char sccsid[] = "@(#)lexi.c 5.10 (Berkeley) %G%";
23#endif /* not lint */
24
25/*
26 * Here we have the token scanner for indent. It scans off one token and puts
27 * it in the global variable "token". It returns a code, indicating the type
28 * of token scanned.
29 */
30
31#include "indent_globs.h"
32#include "indent_codes.h"
33#include "ctype.h"
34
35#define alphanum 1
36#define opchar 3
37
38struct templ {
39 char *rwd;
40 int rwcode;
41};
42
43struct templ specials[100] =
44{
45 "switch", 1,
46 "case", 2,
47 "break", 0,
48 "struct", 3,
49 "union", 3,
50 "enum", 3,
51 "default", 2,
52 "int", 4,
53 "char", 4,
54 "float", 4,
55 "double", 4,
56 "long", 4,
57 "short", 4,
58 "typdef", 4,
59 "unsigned", 4,
60 "register", 4,
61 "static", 4,
62 "global", 4,
63 "extern", 4,
64 "void", 4,
65 "goto", 0,
66 "return", 0,
67 "if", 5,
68 "while", 5,
69 "for", 5,
70 "else", 6,
71 "do", 6,
72 "sizeof", 7,
73 0, 0
74};
75
76char chartype[128] =
77{ /* this is used to facilitate the decision of
78 * what type (alphanumeric, operator) each
79 * character is */
80 0, 0, 0, 0, 0, 0, 0, 0,
81 0, 0, 0, 0, 0, 0, 0, 0,
82 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 3, 0, 0, 1, 3, 3, 0,
85 0, 0, 3, 3, 0, 3, 0, 3,
86 1, 1, 1, 1, 1, 1, 1, 1,
87 1, 1, 0, 0, 3, 3, 3, 3,
88 0, 1, 1, 1, 1, 1, 1, 1,
89 1, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 0, 0, 0, 3, 1,
92 0, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 0, 3, 0, 3, 0
96};
97
98
99
100
101int
102lexi()
103{
104 register char *tok; /* local pointer to next char in token */
105 int unary_delim; /* this is set to 1 if the current token
106 *
107 * forces a following operator to be unary */
108 static int last_code; /* the last token type returned */
109 static int l_struct; /* set to 1 if the last token was 'struct' */
110 int code; /* internal code to be returned */
111 char qchar; /* the delimiter character for a string */
112
113 tok = token; /* point to start of place to save token */
114 unary_delim = false;
115 ps.col_1 = ps.last_nl; /* tell world that this token started in
116 * column 1 iff the last thing scanned was nl */
117 ps.last_nl = false;
118
119 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
120 ps.col_1 = false; /* leading blanks imply token is not in column
121 * 1 */
122 if (++buf_ptr >= buf_end)
123 fill_buffer();
124 }
125
126 /* Scan an alphanumeric token */
127 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
128 /*
129 * we have a character or number
130 */
131 register char *j; /* used for searching thru list of
132 *
133 * reserved words */
134 register struct templ *p;
135
136 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
137 int seendot = 0,
138 seenexp = 0;
139 if (*buf_ptr == '0' &&
140 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
141 *tok++ = *buf_ptr++;
142 *tok++ = *buf_ptr++;
143 while (isxdigit(*buf_ptr))
144 *tok++ = *buf_ptr++;
145 }
146 else
147 while (1) {
148 if (*buf_ptr == '.')
149 if (seendot)
150 break;
151 else
152 seendot++;
153 *tok++ = *buf_ptr++;
154 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
155 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
156 break;
157 else {
158 seenexp++;
159 seendot++;
160 *tok++ = *buf_ptr++;
161 if (*buf_ptr == '+' || *buf_ptr == '-')
162 *tok++ = *buf_ptr++;
163 }
164 }
165 if (*buf_ptr == 'L' || *buf_ptr == 'l')
166 *tok++ = *buf_ptr++;
167 }
168 else
169 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
170 *tok++ = *buf_ptr++;
171 if (buf_ptr >= buf_end)
172 fill_buffer();
173 }
174 *tok++ = '\0';
175 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
176 if (++buf_ptr >= buf_end)
177 fill_buffer();
178 }
179 ps.its_a_keyword = false;
180 ps.sizeof_keyword = false;
181 if (l_struct) { /* if last token was 'struct', then this token
182 * should be treated as a declaration */
183 l_struct = false;
184 last_code = ident;
185 ps.last_u_d = true;
186 return (decl);
187 }
188 ps.last_u_d = false; /* Operator after indentifier is binary */
189 last_code = ident; /* Remember that this is the code we will
190 * return */
191
192 /*
193 * This loop will check if the token is a keyword.
194 */
195 for (p = specials; (j = p->rwd) != 0; p++) {
196 tok = token; /* point at scanned token */
197 if (*j++ != *tok++ || *j++ != *tok++)
198 continue; /* This test depends on the fact that
199 * identifiers are always at least 1 character
200 * long (ie. the first two bytes of the
201 * identifier are always meaningful) */
202 if (tok[-1] == 0)
203 break; /* If its a one-character identifier */
204 while (*tok++ == *j)
205 if (*j++ == 0)
206 goto found_keyword; /* I wish that C had a multi-level
207 * break... */
208 }
209 if (p->rwd) { /* we have a keyword */
210 found_keyword:
211 ps.its_a_keyword = true;
212 ps.last_u_d = true;
213 switch (p->rwcode) {
214 case 1: /* it is a switch */
215 return (swstmt);
216 case 2: /* a case or default */
217 return (casestmt);
218
219 case 3: /* a "struct" */
220 if (ps.p_l_follow)
221 break; /* inside parens: cast */
222 l_struct = true;
223
224 /*
225 * Next time around, we will want to know that we have had a
226 * 'struct'
227 */
228 case 4: /* one of the declaration keywords */
229 if (ps.p_l_follow) {
230 ps.cast_mask |= 1 << ps.p_l_follow;
231 break; /* inside parens: cast */
232 }
233 last_code = decl;
234 return (decl);
235
236 case 5: /* if, while, for */
237 return (sp_paren);
238
239 case 6: /* do, else */
240 return (sp_nparen);
241
242 case 7:
243 ps.sizeof_keyword = true;
244 default: /* all others are treated like any other
245 * identifier */
246 return (ident);
247 } /* end of switch */
248 } /* end of if (found_it) */
249 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
250 register char *p = buf_ptr;
251 while (p < buf_end)
252 if (*p++ == ')' && *p == ';')
253 goto not_proc;
254 strncpy(ps.procname, token, sizeof ps.procname - 1);
255 ps.in_parameter_declaration = 1;
256 not_proc:;
257 }
258 /*
259 * The following hack attempts to guess whether or not the current
260 * token is in fact a declaration keyword -- one that has been
261 * typedefd
262 */
263 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
264 && !ps.p_l_follow
265 && !ps.block_init
266 && (ps.last_token == rparen || ps.last_token == semicolon ||
267 ps.last_token == decl ||
268 ps.last_token == lbrace || ps.last_token == rbrace)) {
269 ps.its_a_keyword = true;
270 ps.last_u_d = true;
271 last_code = decl;
272 return decl;
273 }
274 if (last_code == decl) /* if this is a declared variable, then
275 * following sign is unary */
276 ps.last_u_d = true; /* will make "int a -1" work */
277 last_code = ident;
278 return (ident); /* the ident is not in the list */
279 } /* end of procesing for alpanum character */
280 /* l l l Scan a non-alphanumeric token */
281
282 *tok++ = *buf_ptr; /* if it is only a one-character token, it is
283 * moved here */
284 *tok = '\0';
285 if (++buf_ptr >= buf_end)
286 fill_buffer();
287
288 switch (*token) {
289 case '\n':
290 unary_delim = ps.last_u_d;
291 ps.last_nl = true; /* remember that we just had a newline */
292 code = (had_eof ? 0 : newline);
293
294 /*
295 * if data has been exausted, the newline is a dummy, and we should
296 * return code to stop
297 */
298 break;
299
300 case '\'': /* start of quoted character */
301 case '"': /* start of string */
302 qchar = *token;
303 if (troff) {
304 tok[-1] = '`';
305 if (qchar == '"')
306 *tok++ = '`';
307 tok = chfont(&bodyf, &stringf, tok);
308 }
309 do { /* copy the string */
310 while (1) { /* move one character or [/<char>]<char> */
311 if (*buf_ptr == '\n') {
312 printf("%d: Unterminated literal\n", line_no);
313 goto stop_lit;
314 }
315 *tok = *buf_ptr++;
316 if (buf_ptr >= buf_end)
317 fill_buffer();
318 if (had_eof || ((tok - token) > (bufsize - 2))) {
319 printf("Unterminated literal\n");
320 ++tok;
321 goto stop_lit;
322 /* get outof literal copying loop */
323 }
324 if (*tok == BACKSLASH) { /* if escape, copy extra char */
325 if (*buf_ptr == '\n') /* check for escaped newline */
326 ++line_no;
327 if (troff) {
328 *++tok = BACKSLASH;
329 if (*buf_ptr == BACKSLASH)
330 *++tok = BACKSLASH;
331 }
332 *++tok = *buf_ptr++;
333 ++tok; /* we must increment this again because we
334 * copied two chars */
335 if (buf_ptr >= buf_end)
336 fill_buffer();
337 }
338 else
339 break; /* we copied one character */
340 } /* end of while (1) */
341 } while (*tok++ != qchar);
342 if (troff) {
343 tok = chfont(&stringf, &bodyf, tok - 1);
344 if (qchar == '"')
345 *tok++ = '\'';
346 }
347stop_lit:
348 code = ident;
349 break;
350
351 case ('('):
352 case ('['):
353 unary_delim = true;
354 code = lparen;
355 break;
356
357 case (')'):
358 case (']'):
359 code = rparen;
360 break;
361
362 case '#':
363 unary_delim = ps.last_u_d;
364 code = preesc;
365 break;
366
367 case '?':
368 unary_delim = true;
369 code = question;
370 break;
371
372 case (':'):
373 code = colon;
374 unary_delim = true;
375 break;
376
377 case (';'):
378 unary_delim = true;
379 code = semicolon;
380 break;
381
382 case ('{'):
383 unary_delim = true;
384
385 /*
386 * if (ps.in_or_st) ps.block_init = 1;
387 */
388 /* ? code = ps.block_init ? lparen : lbrace; */
389 code = lbrace;
390 break;
391
392 case ('}'):
393 unary_delim = true;
394 /* ? code = ps.block_init ? rparen : rbrace; */
395 code = rbrace;
396 break;
397
398 case 014: /* a form feed */
399 unary_delim = ps.last_u_d;
400 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
401 * right */
402 code = form_feed;
403 break;
404
405 case (','):
406 unary_delim = true;
407 code = comma;
408 break;
409
410 case '.':
411 unary_delim = false;
412 code = period;
413 break;
414
415 case '-':
416 case '+': /* check for -, +, --, ++ */
417 code = (ps.last_u_d ? unary_op : binary_op);
418 unary_delim = true;
419
420 if (*buf_ptr == token[0]) {
421 /* check for doubled character */
422 *tok++ = *buf_ptr++;
423 /* buffer overflow will be checked at end of loop */
424 if (last_code == ident || last_code == rparen) {
425 code = (ps.last_u_d ? unary_op : postop);
426 /* check for following ++ or -- */
427 unary_delim = false;
428 }
429 }
430 else if (*buf_ptr == '=')
431 /* check for operator += */
432 *tok++ = *buf_ptr++;
433 else if (*buf_ptr == '>') {
434 /* check for operator -> */
435 *tok++ = *buf_ptr++;
436 if (!pointer_as_binop) {
437 unary_delim = false;
438 code = unary_op;
439 ps.want_blank = false;
440 }
441 }
442 break; /* buffer overflow will be checked at end of
443 * switch */
444
445 case '=':
446 if (ps.in_or_st)
447 ps.block_init = 1;
448#ifdef undef
449 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
450 tok[-1] = *buf_ptr++;
451 if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
452 *tok++ = *buf_ptr++;
453 *tok++ = '='; /* Flip =+ to += */
454 *tok = 0;
455 }
456#else
457 if (*buf_ptr == '=') {/* == */
458 *tok++ = '='; /* Flip =+ to += */
459 buf_ptr++;
460 *tok = 0;
461 }
462#endif
463 code = binary_op;
464 unary_delim = true;
465 break;
466 /* can drop thru!!! */
467
468 case '>':
469 case '<':
470 case '!': /* ops like <, <<, <=, !=, etc */
471 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
472 *tok++ = *buf_ptr;
473 if (++buf_ptr >= buf_end)
474 fill_buffer();
475 }
476 if (*buf_ptr == '=')
477 *tok++ = *buf_ptr++;
478 code = (ps.last_u_d ? unary_op : binary_op);
479 unary_delim = true;
480 break;
481
482 default:
483 if (token[0] == '/' && *buf_ptr == '*') {
484 /* it is start of comment */
485 *tok++ = '*';
486
487 if (++buf_ptr >= buf_end)
488 fill_buffer();
489
490 code = comment;
491 unary_delim = ps.last_u_d;
492 break;
493 }
494 while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
495 /*
496 * handle ||, &&, etc, and also things as in int *****i
497 */
498 *tok++ = *buf_ptr;
499 if (++buf_ptr >= buf_end)
500 fill_buffer();
501 }
502 code = (ps.last_u_d ? unary_op : binary_op);
503 unary_delim = true;
504
505
506 } /* end of switch */
507 if (code != newline) {
508 l_struct = false;
509 last_code = code;
510 }
511 if (buf_ptr >= buf_end) /* check for input buffer empty */
512 fill_buffer();
513 ps.last_u_d = unary_delim;
514 *tok = '\0'; /* null terminate the token */
515 return (code);
516};
517
518/*
519 * Add the given keyword to the keyword table, using val as the keyword type
520 */
521addkey(key, val)
522 char *key;
523{
524 register struct templ *p = specials;
525 while (p->rwd)
526 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
527 return;
528 else
529 p++;
530 if (p >= specials + sizeof specials / sizeof specials[0])
531 return; /* For now, table overflows are silently
532 * ignored */
533 p->rwd = key;
534 p->rwcode = val;
535 p[1].rwd = 0;
536 p[1].rwcode = 0;
537 return;
538}