fix signal handling for /bin/sh
[unix-history] / usr / src / usr.bin / indent / lexi.c
CommitLineData
c0bc4ef7
DF
1/*
2 * Copyright (c) 1980 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 */
6
7#ifndef lint
8static char sccsid[] = "@(#)lexi.c 5.1 (Berkeley) %G%";
9#endif not lint
4b365fcd
KM
10
11/*
12
13 Copyright (C) 1976
14 by the
15 Board of Trustees
16 of the
17 University of Illinois
18
19 All rights reserved
20
21
22NAME:
23 lexi
24
25FUNCTION:
26 This is the token scanner for indent
27
28ALGORITHM:
29 1) Strip off intervening blanks and/or tabs.
30 2) If it is an alphanumeric token, move it to the token buffer "token".
31 Check if it is a special reserved word that indent will want to
32 know about.
33 3) Non-alphanumeric tokens are handled with a big switch statement. A
34 flag is kept to remember if the last token was a "unary delimiter",
35 which forces a following operator to be unary as opposed to binary.
36
37PARAMETERS:
38 None
39
40RETURNS:
41 An integer code indicating the type of token scanned.
42
43GLOBALS:
44 buf_ptr =
45 had_eof
46 last_u_d = Set to true iff this token is a "unary delimiter"
47
48CALLS:
49 fill_buffer
50 printf (lib)
51
52CALLED BY:
53 main
54
55NOTES:
56 Start of comment is passed back so that the comment can be scanned by
57 pr_comment.
58
59 Strings and character literals are returned just like identifiers.
60
61HISTORY:
62 initial coding November 1976 D A Willcox of CAC
63 1/7/77 D A Willcox of CAC Fix to provide proper handling
64 of "int a -1;"
65
66*/\f
67
68/* Here we have the token scanner for indent. It scans off one token and
69 puts it in the global variable "token". It returns a code, indicating the
70 type of token scanned. */
71
72#include "indent_globs.h";
73#include "indent_codes.h";
74
75
76
77#define alphanum 1
78#define opchar 3
79
80struct templ {
81 char *rwd;
82 int rwcode;
83};
84
85struct templ specials[] =
86{
87 "switch", 1,
88 "case", 2,
89 "struct", 3,
90 "default", 2,
91 "int", 4,
92 "char", 4,
93 "float", 4,
94 "double", 4,
95 "long", 4,
96 "short", 4,
97 "typdef", 4,
98 "unsigned", 4,
99 "register", 4,
100 "static", 4,
101 "global", 4,
102 "extern", 4,
103 "if", 5,
104 "while", 5,
105 "for", 5,
106 "else", 6,
107 "do", 6,
108 "sizeof", 0,
109 0, 0
110};
111
112char chartype[128] =
113{ /* this is used to facilitate the decision of what type
114 (alphanumeric, operator) each character is */
115 0, 0, 0, 0, 0, 0, 0, 0,
116 0, 0, 0, 0, 0, 0, 0, 0,
117 0, 0, 0, 0, 0, 0, 0, 0,
118 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 3, 0, 0, 0, 3, 3, 0,
120 0, 0, 3, 3, 0, 3, 3, 3,
121 1, 1, 1, 1, 1, 1, 1, 1,
122 1, 1, 0, 0, 3, 3, 3, 3,
123 0, 1, 1, 1, 1, 1, 1, 1,
124 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 0, 0, 0, 3, 1,
127 0, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 0, 3, 0, 3, 0
131};
132
133int last_nl = true;
134 /* this is true if the last thing scanned was a newline */
135
136
137
138int lexi () {
139 register char *tok;
140 /* local pointer to next char in token */
141 register int i;
142 /* local loop counter */
143 register char *j;
144 /* used for searching thru list of reserved words */
145 int unary_delim;
146 /* this is set to 1 if the current token forces a following operator to be
147 unary */
148 static int last_code;
149 /* the last token type returned */
150 static int l_struct;
151 /* set to 1 if the last token was 'struct' */
152 int found_it;
153 int code; /* internal code to be returned */
154 char qchar; /* the delimiter character for a string */
155
156 tok = token; /* point to start of place to save token */
157 unary_delim = false;
158 col_1 = last_nl; /* tell world that this token started in column
159 1 iff the last thing scanned was nl */
160 last_nl = false;
161
162 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
163 /* get rid of blanks */
164 col_1 = false; /* leading blanks imply token is not in column 1
165 */
166 if (++buf_ptr >= buf_end)
167 fill_buffer ();
168 }
169
170/*----------------------------------------------------------*\
171| Scan an alphanumeric token
172\*----------------------------------------------------------*/
173
174 if (chartype[*buf_ptr & 0177] == alphanum) {
175 /* we have a character or number */
176 while (chartype[*buf_ptr & 0177] == alphanum) {
177 /* copy it over */
178 *tok++ = *buf_ptr++;
179 if (buf_ptr >= buf_end)
180 fill_buffer ();
181 }
182
183 *tok++ = '\0';
184
185 if (l_struct) { /* if last token was 'struct', then this token
186 should be treated as a declaration */
187 l_struct = false;
188 last_code = ident;
189 last_u_d = true;
190 return (decl);
191 }
192
193 last_u_d = false; /* operator after indentifier is binary */
194
195 for (i = 0; specials[i].rwd != 0; ++i) {
196 /* this loop will check if the token is a keyword. if so, a following
197 operator is unary */
198 last_code = ident; /* remember that this is the code we will return
199 */
200 j = specials[i].rwd;
201 /* point at ith reserved word */
202 tok = token; /* point at scanned toekn */
203 found_it = true; /* set to false if not found */
204 do {
205 if (*tok++ != *j) {
206 found_it = false;
207 break;
208 }
209 } while (*j++);
210
211 if (found_it) { /* we have a keyword */
212 last_u_d = true;
213 switch (specials[i].rwcode) {
214 case 1: /* it is a switch */
215 return (swstmt);
216 case 2: /* a case or default */
217 return (casestmt);
218
219 case 3: /* a "struct" */
220 l_struct = true;
221 /* Next time around, we will want to know that we have had
222 a 'struct' */
223 case 4: /* one of the declaration keywords */
224 if(p_l_follow) break; /* inside parens: cast */
225 last_code = decl;
226 return (decl);
227
228 case 5: /* if, while, for */
229 return (sp_paren);
230
231 case 6: /* do, else */
232 return (sp_nparen);
233
234 default: /* all others are treated like any other
235 identifier */
236 return (ident);
237 } /* end of switch */
238 } /* end of if (found_it) */
239
240 }
241
242 if (last_code == decl) /* if this is a declared variable, then
243 following sign is unary */
244 last_u_d = true; /* will make "int a -1" work */
245 last_code = ident;
246 return (ident); /* the ident is not in the list */
247 } /* end of procesing for alpanum character */
248
249
250
251/*----------------------------------------------------------*\
252| Scan a non-alphanumeric token
253\*----------------------------------------------------------*/
254
255 *tok++ = *buf_ptr; /* if it is only a one-character token, it is
256 moved here */
257 *tok = '\0';
258 if (++buf_ptr >= buf_end)
259 fill_buffer ();
260
261 switch (*token) {
262 case '\n':
263 unary_delim = last_u_d;
264 last_nl = true; /* remember that we just had a newline */
265 code = (had_eof ? 0 : newline);
266 /* if data has been exausted, the newline is a dummy, and we should
267 return code to stop */
268 break;
269
270 case '\'': /* start of quoted character */
271 qchar = '\''; /* remember final delimiter */
272 goto copy_lit; /* and go to common literal code */
273
274 case '"': /* start of string */
275 qchar = '"';
276
277 copy_lit:
278 do { /* copy the string */
279 while (1) { /* move one character or [/<char>]<char> */
280 if (*buf_ptr == '\n') {
281 /* check for unterminated literal */
282 printf ("%d: Unterminated literal\n", line_no);
283 goto stop_lit;
284 /* Don't copy any more */
285 }
286
287 *tok = *buf_ptr++;
288 if (buf_ptr >= buf_end)
289 fill_buffer ();
290 if (had_eof || ((tok - token) > (bufsize - 2))) {
291 printf ("Unterminated literal\n");
292 ++tok;
293 goto stop_lit;
294 /* get outof literal copying loop */
295 }
296
297 if (*tok == '\\') {
298 /* if escape, copy extra char */
299 if (*buf_ptr == '\n')
300 /* check for escaped newline */
301 ++line_no;
302 *(++tok) = *buf_ptr++;
303 ++tok; /* we must increment this again because we
304 copied two chars */
305 if (buf_ptr >= buf_end)
306 fill_buffer ();
307 }
308 else
309 break; /* we copied one character */
310 } /* end of while (1) */
311 } while (*tok++ != qchar);
312
313 stop_lit:
314 code = ident;
315 break;
316
317 case ('('):
318 case ('['):
319 unary_delim = true;
320 code = lparen;
321 break;
322
323 case (')'):
324 case (']'):
325 code = rparen;
326 break;
327
328 case '#':
329 unary_delim = last_u_d;
330 code = preesc;
331 break;
332
333 case '?':
334 unary_delim = true;
335 code = question;
336 break;
337
338 case (':'):
339 code = colon;
340 unary_delim = true;
341 break;
342
343 case (';'):
344 unary_delim = true;
345 code = semicolon;
346 break;
347
348 case ('{'):
349 unary_delim = true;
350 code = lbrace;
351 break;
352
353 case ('}'):
354 unary_delim = true;
355 code = rbrace;
356 break;
357
358 case 014: /* a form feed */
359 unary_delim = last_u_d;
360 last_nl = true; /* remember this so we can set 'col_1' right */
361 code = form_feed;
362 break;
363
364 case (','):
365 unary_delim = true;
366 code = comma;
367 break;
368
369 case '.':
370 unary_delim = false;
371 code = period;
372 break;
373
374 case '-':
375 case '+': /* check for -, +, --, ++ */
376 code = (last_u_d ? unary_op : binary_op);
377 unary_delim = true;
378
379 if (*buf_ptr == token[0]) {
380 /* check for doubled character */
381 *tok++ = *buf_ptr++;
382 /* buffer overflow will be checked at end of loop */
383 if (last_code == ident || last_code == rparen) {
384 code = (last_u_d ? unary_op : postop);
385 /* check for following ++ or -- */
386 unary_delim = false;
387 }
388 }
389 else
390 if (*buf_ptr == '>' || *buf_ptr == '=')
391 /* check for operator -> or += */
392 *tok++ = *buf_ptr++;
393 /* buffer overflow will be checked at end of switch */
394
395 break;
396
397 case '=':
398 if (chartype[*buf_ptr] == opchar) {
399 /* we have two char assignment */
400 *tok++ = *buf_ptr;
401 /* move second character */
402 if (++buf_ptr >= buf_end)
403 fill_buffer ();
404 }
405
406 code = binary_op;
407 unary_delim = true;
408 if (token[1] != '<' && token[1] != '>')
409 /* check for possible 3 char operator */
410 break;
411 /* can drop thru!!! */
412
413 case '>':
414 case '<':
415 case '!': /* ops like <, <<, <=, !=, etc */
416 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
417 *tok++ = *buf_ptr;
418 if (++buf_ptr >= buf_end)
419 fill_buffer ();
420 }
421
422 if (*buf_ptr == '=')
423 *tok++ = *buf_ptr++;
424 code = (last_u_d ? unary_op : binary_op);
425 unary_delim = true;
426 break;
427
428 default:
429 if (token[0] == '/' && *buf_ptr == '*') {
430 /* it is start of comment */
431 *tok++ = '*';
432
433 if (++buf_ptr >= buf_end)
434 fill_buffer ();
435
436 code = comment;
437 unary_delim = last_u_d;
438 break;
439 }
440
441 while (*(tok - 1) == *buf_ptr || *buf_ptr=='=') {
442 /* handle ||, &&, etc, and also things as in int *****i */
443 *tok++ = *buf_ptr;
444 if (++buf_ptr >= buf_end)
445 fill_buffer ();
446 }
447
448
449 code = (last_u_d ? unary_op : binary_op);
450 unary_delim = true;
451
452
453 } /* end of switch */
454
455 if (code != newline) {
456 l_struct = false;
457 last_code = code;
458 }
459
460 if (buf_ptr >= buf_end) /* check for input buffer empty */
461 fill_buffer ();
462 last_u_d = unary_delim;
463 *tok = '\0'; /* null terminate the token */
464 return (code);
465};