This commit was generated by cvs2svn to track changes on a CVS vendor
[unix-history] / usr.bin / indent / lexi.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980 The Regents of the University of California.
4 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#ifndef lint
37static char sccsid[] = "@(#)lexi.c 5.16 (Berkeley) 2/26/91";
38#endif /* not lint */
39
40/*
41 * Here we have the token scanner for indent. It scans off one token and puts
42 * it in the global variable "token". It returns a code, indicating the type
43 * of token scanned.
44 */
45
46#include <stdio.h>
47#include <ctype.h>
48#include <stdlib.h>
49#include <string.h>
50#include "indent_globs.h"
51#include "indent_codes.h"
52
53#define alphanum 1
54#define opchar 3
55
56struct templ {
57 char *rwd;
58 int rwcode;
59};
60
61struct templ specials[100] =
62{
63 "switch", 1,
64 "case", 2,
65 "break", 0,
66 "struct", 3,
67 "union", 3,
68 "enum", 3,
69 "default", 2,
70 "int", 4,
71 "char", 4,
72 "float", 4,
73 "double", 4,
74 "long", 4,
75 "short", 4,
76 "typdef", 4,
77 "unsigned", 4,
78 "register", 4,
79 "static", 4,
80 "global", 4,
81 "extern", 4,
82 "void", 4,
83 "goto", 0,
84 "return", 0,
85 "if", 5,
86 "while", 5,
87 "for", 5,
88 "else", 6,
89 "do", 6,
90 "sizeof", 7,
91 0, 0
92};
93
94char chartype[128] =
95{ /* this is used to facilitate the decision of
96 * what type (alphanumeric, operator) each
97 * character is */
98 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 3, 0, 0, 1, 3, 3, 0,
103 0, 0, 3, 3, 0, 3, 0, 3,
104 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 0, 0, 3, 3, 3, 3,
106 0, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 0, 0, 0, 3, 1,
110 0, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 0, 3, 0, 3, 0
114};
115
116
117
118
119int
120lexi()
121{
122 int unary_delim; /* this is set to 1 if the current token
123 *
124 * forces a following operator to be unary */
125 static int last_code; /* the last token type returned */
126 static int l_struct; /* set to 1 if the last token was 'struct' */
127 int code; /* internal code to be returned */
128 char qchar; /* the delimiter character for a string */
129
130 e_token = s_token; /* point to start of place to save token */
131 unary_delim = false;
132 ps.col_1 = ps.last_nl; /* tell world that this token started in
133 * column 1 iff the last thing scanned was nl */
134 ps.last_nl = false;
135
136 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
137 ps.col_1 = false; /* leading blanks imply token is not in column
138 * 1 */
139 if (++buf_ptr >= buf_end)
140 fill_buffer();
141 }
142
143 /* Scan an alphanumeric token */
144 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
145 /*
146 * we have a character or number
147 */
148 register char *j; /* used for searching thru list of
149 *
150 * reserved words */
151 register struct templ *p;
152
153 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
154 int seendot = 0,
155 seenexp = 0;
156 if (*buf_ptr == '0' &&
157 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
158 *e_token++ = *buf_ptr++;
159 *e_token++ = *buf_ptr++;
160 while (isxdigit(*buf_ptr)) {
161 CHECK_SIZE_TOKEN;
162 *e_token++ = *buf_ptr++;
163 }
164 }
165 else
166 while (1) {
167 if (*buf_ptr == '.')
168 if (seendot)
169 break;
170 else
171 seendot++;
172 CHECK_SIZE_TOKEN;
173 *e_token++ = *buf_ptr++;
174 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
175 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
176 break;
177 else {
178 seenexp++;
179 seendot++;
180 CHECK_SIZE_TOKEN;
181 *e_token++ = *buf_ptr++;
182 if (*buf_ptr == '+' || *buf_ptr == '-')
183 *e_token++ = *buf_ptr++;
184 }
185 }
186 if (*buf_ptr == 'L' || *buf_ptr == 'l')
187 *e_token++ = *buf_ptr++;
188 }
189 else
190 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
191 CHECK_SIZE_TOKEN;
192 *e_token++ = *buf_ptr++;
193 if (buf_ptr >= buf_end)
194 fill_buffer();
195 }
196 *e_token++ = '\0';
197 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
198 if (++buf_ptr >= buf_end)
199 fill_buffer();
200 }
201 ps.its_a_keyword = false;
202 ps.sizeof_keyword = false;
203 if (l_struct) { /* if last token was 'struct', then this token
204 * should be treated as a declaration */
205 l_struct = false;
206 last_code = ident;
207 ps.last_u_d = true;
208 return (decl);
209 }
210 ps.last_u_d = false; /* Operator after indentifier is binary */
211 last_code = ident; /* Remember that this is the code we will
212 * return */
213
214 /*
215 * This loop will check if the token is a keyword.
216 */
217 for (p = specials; (j = p->rwd) != 0; p++) {
218 register char *p = s_token; /* point at scanned token */
219 if (*j++ != *p++ || *j++ != *p++)
220 continue; /* This test depends on the fact that
221 * identifiers are always at least 1 character
222 * long (ie. the first two bytes of the
223 * identifier are always meaningful) */
224 if (p[-1] == 0)
225 break; /* If its a one-character identifier */
226 while (*p++ == *j)
227 if (*j++ == 0)
228 goto found_keyword; /* I wish that C had a multi-level
229 * break... */
230 }
231 if (p->rwd) { /* we have a keyword */
232 found_keyword:
233 ps.its_a_keyword = true;
234 ps.last_u_d = true;
235 switch (p->rwcode) {
236 case 1: /* it is a switch */
237 return (swstmt);
238 case 2: /* a case or default */
239 return (casestmt);
240
241 case 3: /* a "struct" */
242 if (ps.p_l_follow)
243 break; /* inside parens: cast */
244 l_struct = true;
245
246 /*
247 * Next time around, we will want to know that we have had a
248 * 'struct'
249 */
250 case 4: /* one of the declaration keywords */
251 if (ps.p_l_follow) {
252 ps.cast_mask |= 1 << ps.p_l_follow;
253 break; /* inside parens: cast */
254 }
255 last_code = decl;
256 return (decl);
257
258 case 5: /* if, while, for */
259 return (sp_paren);
260
261 case 6: /* do, else */
262 return (sp_nparen);
263
264 case 7:
265 ps.sizeof_keyword = true;
266 default: /* all others are treated like any other
267 * identifier */
268 return (ident);
269 } /* end of switch */
270 } /* end of if (found_it) */
271 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
272 register char *tp = buf_ptr;
273 while (tp < buf_end)
274 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
275 goto not_proc;
276 strncpy(ps.procname, token, sizeof ps.procname - 1);
277 ps.in_parameter_declaration = 1;
278 rparen_count = 1;
279 not_proc:;
280 }
281 /*
282 * The following hack attempts to guess whether or not the current
283 * token is in fact a declaration keyword -- one that has been
284 * typedefd
285 */
286 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
287 && !ps.p_l_follow
288 && !ps.block_init
289 && (ps.last_token == rparen || ps.last_token == semicolon ||
290 ps.last_token == decl ||
291 ps.last_token == lbrace || ps.last_token == rbrace)) {
292 ps.its_a_keyword = true;
293 ps.last_u_d = true;
294 last_code = decl;
295 return decl;
296 }
297 if (last_code == decl) /* if this is a declared variable, then
298 * following sign is unary */
299 ps.last_u_d = true; /* will make "int a -1" work */
300 last_code = ident;
301 return (ident); /* the ident is not in the list */
302 } /* end of procesing for alpanum character */
303
304 /* Scan a non-alphanumeric token */
305
306 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
307 * moved here */
308 *e_token = '\0';
309 if (++buf_ptr >= buf_end)
310 fill_buffer();
311
312 switch (*token) {
313 case '\n':
314 unary_delim = ps.last_u_d;
315 ps.last_nl = true; /* remember that we just had a newline */
316 code = (had_eof ? 0 : newline);
317
318 /*
319 * if data has been exausted, the newline is a dummy, and we should
320 * return code to stop
321 */
322 break;
323
324 case '\'': /* start of quoted character */
325 case '"': /* start of string */
326 qchar = *token;
327 if (troff) {
328 e_token[-1] = '`';
329 if (qchar == '"')
330 *e_token++ = '`';
331 e_token = chfont(&bodyf, &stringf, e_token);
332 }
333 do { /* copy the string */
334 while (1) { /* move one character or [/<char>]<char> */
335 if (*buf_ptr == '\n') {
336 printf("%d: Unterminated literal\n", line_no);
337 goto stop_lit;
338 }
339 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
340 * since CHECK_SIZE guarantees that there
341 * are at least 5 entries left */
342 *e_token = *buf_ptr++;
343 if (buf_ptr >= buf_end)
344 fill_buffer();
345 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
346 if (*buf_ptr == '\n') /* check for escaped newline */
347 ++line_no;
348 if (troff) {
349 *++e_token = BACKSLASH;
350 if (*buf_ptr == BACKSLASH)
351 *++e_token = BACKSLASH;
352 }
353 *++e_token = *buf_ptr++;
354 ++e_token; /* we must increment this again because we
355 * copied two chars */
356 if (buf_ptr >= buf_end)
357 fill_buffer();
358 }
359 else
360 break; /* we copied one character */
361 } /* end of while (1) */
362 } while (*e_token++ != qchar);
363 if (troff) {
364 e_token = chfont(&stringf, &bodyf, e_token - 1);
365 if (qchar == '"')
366 *e_token++ = '\'';
367 }
368stop_lit:
369 code = ident;
370 break;
371
372 case ('('):
373 case ('['):
374 unary_delim = true;
375 code = lparen;
376 break;
377
378 case (')'):
379 case (']'):
380 code = rparen;
381 break;
382
383 case '#':
384 unary_delim = ps.last_u_d;
385 code = preesc;
386 break;
387
388 case '?':
389 unary_delim = true;
390 code = question;
391 break;
392
393 case (':'):
394 code = colon;
395 unary_delim = true;
396 break;
397
398 case (';'):
399 unary_delim = true;
400 code = semicolon;
401 break;
402
403 case ('{'):
404 unary_delim = true;
405
406 /*
407 * if (ps.in_or_st) ps.block_init = 1;
408 */
409 /* ? code = ps.block_init ? lparen : lbrace; */
410 code = lbrace;
411 break;
412
413 case ('}'):
414 unary_delim = true;
415 /* ? code = ps.block_init ? rparen : rbrace; */
416 code = rbrace;
417 break;
418
419 case 014: /* a form feed */
420 unary_delim = ps.last_u_d;
421 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
422 * right */
423 code = form_feed;
424 break;
425
426 case (','):
427 unary_delim = true;
428 code = comma;
429 break;
430
431 case '.':
432 unary_delim = false;
433 code = period;
434 break;
435
436 case '-':
437 case '+': /* check for -, +, --, ++ */
438 code = (ps.last_u_d ? unary_op : binary_op);
439 unary_delim = true;
440
441 if (*buf_ptr == token[0]) {
442 /* check for doubled character */
443 *e_token++ = *buf_ptr++;
444 /* buffer overflow will be checked at end of loop */
445 if (last_code == ident || last_code == rparen) {
446 code = (ps.last_u_d ? unary_op : postop);
447 /* check for following ++ or -- */
448 unary_delim = false;
449 }
450 }
451 else if (*buf_ptr == '=')
452 /* check for operator += */
453 *e_token++ = *buf_ptr++;
454 else if (*buf_ptr == '>') {
455 /* check for operator -> */
456 *e_token++ = *buf_ptr++;
457 if (!pointer_as_binop) {
458 unary_delim = false;
459 code = unary_op;
460 ps.want_blank = false;
461 }
462 }
463 break; /* buffer overflow will be checked at end of
464 * switch */
465
466 case '=':
467 if (ps.in_or_st)
468 ps.block_init = 1;
469#ifdef undef
470 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
471 e_token[-1] = *buf_ptr++;
472 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
473 *e_token++ = *buf_ptr++;
474 *e_token++ = '='; /* Flip =+ to += */
475 *e_token = 0;
476 }
477#else
478 if (*buf_ptr == '=') {/* == */
479 *e_token++ = '='; /* Flip =+ to += */
480 buf_ptr++;
481 *e_token = 0;
482 }
483#endif
484 code = binary_op;
485 unary_delim = true;
486 break;
487 /* can drop thru!!! */
488
489 case '>':
490 case '<':
491 case '!': /* ops like <, <<, <=, !=, etc */
492 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
493 *e_token++ = *buf_ptr;
494 if (++buf_ptr >= buf_end)
495 fill_buffer();
496 }
497 if (*buf_ptr == '=')
498 *e_token++ = *buf_ptr++;
499 code = (ps.last_u_d ? unary_op : binary_op);
500 unary_delim = true;
501 break;
502
503 default:
504 if (token[0] == '/' && *buf_ptr == '*') {
505 /* it is start of comment */
506 *e_token++ = '*';
507
508 if (++buf_ptr >= buf_end)
509 fill_buffer();
510
511 code = comment;
512 unary_delim = ps.last_u_d;
513 break;
514 }
515 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
516 /*
517 * handle ||, &&, etc, and also things as in int *****i
518 */
519 *e_token++ = *buf_ptr;
520 if (++buf_ptr >= buf_end)
521 fill_buffer();
522 }
523 code = (ps.last_u_d ? unary_op : binary_op);
524 unary_delim = true;
525
526
527 } /* end of switch */
528 if (code != newline) {
529 l_struct = false;
530 last_code = code;
531 }
532 if (buf_ptr >= buf_end) /* check for input buffer empty */
533 fill_buffer();
534 ps.last_u_d = unary_delim;
535 *e_token = '\0'; /* null terminate the token */
536 return (code);
537}
538
539/*
540 * Add the given keyword to the keyword table, using val as the keyword type
541 */
542addkey(key, val)
543 char *key;
544{
545 register struct templ *p = specials;
546 while (p->rwd)
547 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
548 return;
549 else
550 p++;
551 if (p >= specials + sizeof specials / sizeof specials[0])
552 return; /* For now, table overflows are silently
553 * ignored */
554 p->rwd = key;
555 p->rwcode = val;
556 p[1].rwd = 0;
557 p[1].rwcode = 0;
558 return;
559}