Commit | Line | Data |
---|---|---|
075181d6 WJ |
1 | /* Parse C expressions for CCCP. |
2 | Copyright (C) 1987 Free Software Foundation. | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify it | |
5 | under the terms of the GNU General Public License as published by the | |
6 | Free Software Foundation; either version 1, or (at your option) any | |
7 | later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | |
17 | ||
18 | In other words, you are welcome to use, share and improve this program. | |
19 | You are forbidden to forbid anyone else to use, share and improve | |
20 | what you give them. Help stamp out software-hoarding! | |
21 | ||
22 | Adapted from expread.y of GDB by Paul Rubin, July 1986. | |
23 | ||
24 | /* Parse a C expression from text in a string */ | |
25 | ||
26 | %{ | |
27 | #include "config.h" | |
28 | #include <setjmp.h> | |
29 | /* #define YYDEBUG 1 */ | |
30 | ||
31 | int yylex (); | |
32 | void yyerror (); | |
33 | int expression_value; | |
34 | ||
35 | static jmp_buf parse_return_error; | |
36 | ||
37 | /* some external tables of character types */ | |
38 | extern unsigned char is_idstart[], is_idchar[]; | |
39 | ||
40 | #ifndef CHAR_TYPE_SIZE | |
41 | #define CHAR_TYPE_SIZE BITS_PER_UNIT | |
42 | #endif | |
43 | %} | |
44 | ||
45 | %union { | |
46 | struct constant {long value; int unsignedp;} integer; | |
47 | int voidval; | |
48 | char *sval; | |
49 | } | |
50 | ||
51 | %type <integer> exp exp1 start | |
52 | %token <integer> INT CHAR | |
53 | %token <sval> NAME | |
54 | %token <integer> ERROR | |
55 | ||
56 | %right '?' ':' | |
57 | %left ',' | |
58 | %left OR | |
59 | %left AND | |
60 | %left '|' | |
61 | %left '^' | |
62 | %left '&' | |
63 | %left EQUAL NOTEQUAL | |
64 | %left '<' '>' LEQ GEQ | |
65 | %left LSH RSH | |
66 | %left '+' '-' | |
67 | %left '*' '/' '%' | |
68 | %right UNARY | |
69 | ||
70 | /* %expect 40 */ | |
71 | \f | |
72 | %% | |
73 | ||
74 | start : exp1 | |
75 | { expression_value = $1.value; } | |
76 | ; | |
77 | ||
78 | /* Expressions, including the comma operator. */ | |
79 | exp1 : exp | |
80 | | exp1 ',' exp | |
81 | { $$ = $3; } | |
82 | ; | |
83 | ||
84 | /* Expressions, not including the comma operator. */ | |
85 | exp : '-' exp %prec UNARY | |
86 | { $$.value = - $2.value; | |
87 | $$.unsignedp = $2.unsignedp; } | |
88 | | '!' exp %prec UNARY | |
89 | { $$.value = ! $2.value; | |
90 | $$.unsignedp = 0; } | |
91 | | '+' exp %prec UNARY | |
92 | { $$ = $2; } | |
93 | | '~' exp %prec UNARY | |
94 | { $$.value = ~ $2.value; | |
95 | $$.unsignedp = $2.unsignedp; } | |
96 | | '(' exp1 ')' | |
97 | { $$ = $2; } | |
98 | ; | |
99 | ||
100 | /* Binary operators in order of decreasing precedence. */ | |
101 | exp : exp '*' exp | |
102 | { $$.unsignedp = $1.unsignedp || $3.unsignedp; | |
103 | if ($$.unsignedp) | |
104 | $$.value = (unsigned) $1.value * $3.value; | |
105 | else | |
106 | $$.value = $1.value * $3.value; } | |
107 | | exp '/' exp | |
108 | { if ($3.value == 0) | |
109 | { | |
110 | error ("division by zero in #if"); | |
111 | $3.value = 1; | |
112 | } | |
113 | $$.unsignedp = $1.unsignedp || $3.unsignedp; | |
114 | if ($$.unsignedp) | |
115 | $$.value = (unsigned) $1.value / $3.value; | |
116 | else | |
117 | $$.value = $1.value / $3.value; } | |
118 | | exp '%' exp | |
119 | { if ($3.value == 0) | |
120 | { | |
121 | error ("division by zero in #if"); | |
122 | $3.value = 1; | |
123 | } | |
124 | $$.unsignedp = $1.unsignedp || $3.unsignedp; | |
125 | if ($$.unsignedp) | |
126 | $$.value = (unsigned) $1.value % $3.value; | |
127 | else | |
128 | $$.value = $1.value % $3.value; } | |
129 | | exp '+' exp | |
130 | { $$.value = $1.value + $3.value; | |
131 | $$.unsignedp = $1.unsignedp || $3.unsignedp; } | |
132 | | exp '-' exp | |
133 | { $$.value = $1.value - $3.value; | |
134 | $$.unsignedp = $1.unsignedp || $3.unsignedp; } | |
135 | | exp LSH exp | |
136 | { $$.unsignedp = $1.unsignedp; | |
137 | if ($$.unsignedp) | |
138 | $$.value = (unsigned) $1.value << $3.value; | |
139 | else | |
140 | $$.value = $1.value << $3.value; } | |
141 | | exp RSH exp | |
142 | { $$.unsignedp = $1.unsignedp; | |
143 | if ($$.unsignedp) | |
144 | $$.value = (unsigned) $1.value >> $3.value; | |
145 | else | |
146 | $$.value = $1.value >> $3.value; } | |
147 | | exp EQUAL exp | |
148 | { $$.value = ($1.value == $3.value); | |
149 | $$.unsignedp = 0; } | |
150 | | exp NOTEQUAL exp | |
151 | { $$.value = ($1.value != $3.value); | |
152 | $$.unsignedp = 0; } | |
153 | | exp LEQ exp | |
154 | { $$.unsignedp = 0; | |
155 | if ($1.unsignedp || $3.unsignedp) | |
156 | $$.value = (unsigned) $1.value <= $3.value; | |
157 | else | |
158 | $$.value = $1.value <= $3.value; } | |
159 | | exp GEQ exp | |
160 | { $$.unsignedp = 0; | |
161 | if ($1.unsignedp || $3.unsignedp) | |
162 | $$.value = (unsigned) $1.value >= $3.value; | |
163 | else | |
164 | $$.value = $1.value >= $3.value; } | |
165 | | exp '<' exp | |
166 | { $$.unsignedp = 0; | |
167 | if ($1.unsignedp || $3.unsignedp) | |
168 | $$.value = (unsigned) $1.value < $3.value; | |
169 | else | |
170 | $$.value = $1.value < $3.value; } | |
171 | | exp '>' exp | |
172 | { $$.unsignedp = 0; | |
173 | if ($1.unsignedp || $3.unsignedp) | |
174 | $$.value = (unsigned) $1.value > $3.value; | |
175 | else | |
176 | $$.value = $1.value > $3.value; } | |
177 | | exp '&' exp | |
178 | { $$.value = $1.value & $3.value; | |
179 | $$.unsignedp = $1.unsignedp || $3.unsignedp; } | |
180 | | exp '^' exp | |
181 | { $$.value = $1.value ^ $3.value; | |
182 | $$.unsignedp = $1.unsignedp || $3.unsignedp; } | |
183 | | exp '|' exp | |
184 | { $$.value = $1.value | $3.value; | |
185 | $$.unsignedp = $1.unsignedp || $3.unsignedp; } | |
186 | | exp AND exp | |
187 | { $$.value = ($1.value && $3.value); | |
188 | $$.unsignedp = 0; } | |
189 | | exp OR exp | |
190 | { $$.value = ($1.value || $3.value); | |
191 | $$.unsignedp = 0; } | |
192 | | exp '?' exp ':' exp | |
193 | { $$.value = $1.value ? $3.value : $5.value; | |
194 | $$.unsignedp = $3.unsignedp || $5.unsignedp; } | |
195 | | INT | |
196 | { $$ = yylval.integer; } | |
197 | | CHAR | |
198 | { $$ = yylval.integer; } | |
199 | | NAME | |
200 | { $$.value = 0; | |
201 | $$.unsignedp = 0; } | |
202 | ; | |
203 | %% | |
204 | \f | |
205 | /* During parsing of a C expression, the pointer to the next character | |
206 | is in this variable. */ | |
207 | ||
208 | static char *lexptr; | |
209 | ||
210 | /* Take care of parsing a number (anything that starts with a digit). | |
211 | Set yylval and return the token type; update lexptr. | |
212 | LEN is the number of characters in it. */ | |
213 | ||
214 | /* maybe needs to actually deal with floating point numbers */ | |
215 | ||
216 | int | |
217 | parse_number (olen) | |
218 | int olen; | |
219 | { | |
220 | register char *p = lexptr; | |
221 | register long n = 0; | |
222 | register int c; | |
223 | register int base = 10; | |
224 | register int len = olen; | |
225 | ||
226 | for (c = 0; c < len; c++) | |
227 | if (p[c] == '.') { | |
228 | /* It's a float since it contains a point. */ | |
229 | yyerror ("floating point numbers not allowed in #if expressions"); | |
230 | return ERROR; | |
231 | } | |
232 | ||
233 | yylval.integer.unsignedp = 0; | |
234 | ||
235 | if (len >= 3 && (!strncmp (p, "0x", 2) || !strncmp (p, "0X", 2))) { | |
236 | p += 2; | |
237 | base = 16; | |
238 | len -= 2; | |
239 | } | |
240 | else if (*p == '0') | |
241 | base = 8; | |
242 | ||
243 | while (len > 0) { | |
244 | c = *p++; | |
245 | len--; | |
246 | if (c >= 'A' && c <= 'Z') c += 'a' - 'A'; | |
247 | ||
248 | if (c >= '0' && c <= '9') { | |
249 | n *= base; | |
250 | n += c - '0'; | |
251 | } else if (base == 16 && c >= 'a' && c <= 'f') { | |
252 | n *= base; | |
253 | n += c - 'a' + 10; | |
254 | } else { | |
255 | /* `l' means long, and `u' means unsigned. */ | |
256 | while (1) { | |
257 | if (c == 'l' || c == 'L') | |
258 | ; | |
259 | else if (c == 'u' || c == 'U') | |
260 | yylval.integer.unsignedp = 1; | |
261 | else | |
262 | break; | |
263 | ||
264 | if (len == 0) | |
265 | break; | |
266 | c = *p++; | |
267 | len--; | |
268 | } | |
269 | /* Don't look for any more digits after the suffixes. */ | |
270 | break; | |
271 | } | |
272 | } | |
273 | ||
274 | if (len != 0) { | |
275 | yyerror ("Invalid number in #if expression"); | |
276 | return ERROR; | |
277 | } | |
278 | ||
279 | /* If too big to be signed, consider it unsigned. */ | |
280 | if (n < 0) | |
281 | yylval.integer.unsignedp = 1; | |
282 | ||
283 | lexptr = p; | |
284 | yylval.integer.value = n; | |
285 | return INT; | |
286 | } | |
287 | ||
288 | struct token { | |
289 | char *operator; | |
290 | int token; | |
291 | }; | |
292 | ||
293 | #ifndef NULL | |
294 | #define NULL 0 | |
295 | #endif | |
296 | ||
297 | static struct token tokentab2[] = { | |
298 | {"&&", AND}, | |
299 | {"||", OR}, | |
300 | {"<<", LSH}, | |
301 | {">>", RSH}, | |
302 | {"==", EQUAL}, | |
303 | {"!=", NOTEQUAL}, | |
304 | {"<=", LEQ}, | |
305 | {">=", GEQ}, | |
306 | {NULL, ERROR} | |
307 | }; | |
308 | ||
309 | /* Read one token, getting characters through lexptr. */ | |
310 | ||
311 | int | |
312 | yylex () | |
313 | { | |
314 | register int c; | |
315 | register int namelen; | |
316 | register char *tokstart; | |
317 | register struct token *toktab; | |
318 | ||
319 | retry: | |
320 | ||
321 | tokstart = lexptr; | |
322 | c = *tokstart; | |
323 | /* See if it is a special token of length 2. */ | |
324 | for (toktab = tokentab2; toktab->operator != NULL; toktab++) | |
325 | if (c == *toktab->operator && tokstart[1] == toktab->operator[1]) { | |
326 | lexptr += 2; | |
327 | return toktab->token; | |
328 | } | |
329 | ||
330 | switch (c) { | |
331 | case 0: | |
332 | return 0; | |
333 | ||
334 | case ' ': | |
335 | case '\t': | |
336 | case '\r': | |
337 | case '\n': | |
338 | lexptr++; | |
339 | goto retry; | |
340 | ||
341 | case '\'': | |
342 | lexptr++; | |
343 | c = *lexptr++; | |
344 | if (c == '\\') | |
345 | c = parse_escape (&lexptr); | |
346 | ||
347 | /* Sign-extend the constant if chars are signed on target machine. */ | |
348 | { | |
349 | if (lookup ("__CHAR_UNSIGNED__", sizeof ("__CHAR_UNSIGNED__")-1, -1) | |
350 | || ((c >> (CHAR_TYPE_SIZE - 1)) & 1) == 0) | |
351 | yylval.integer.value = c & ((1 << CHAR_TYPE_SIZE) - 1); | |
352 | else | |
353 | yylval.integer.value = c | ~((1 << CHAR_TYPE_SIZE) - 1); | |
354 | } | |
355 | ||
356 | yylval.integer.unsignedp = 0; | |
357 | c = *lexptr++; | |
358 | if (c != '\'') { | |
359 | yyerror ("Invalid character constant in #if"); | |
360 | return ERROR; | |
361 | } | |
362 | ||
363 | return CHAR; | |
364 | ||
365 | /* some of these chars are invalid in constant expressions; | |
366 | maybe do something about them later */ | |
367 | case '/': | |
368 | case '+': | |
369 | case '-': | |
370 | case '*': | |
371 | case '%': | |
372 | case '|': | |
373 | case '&': | |
374 | case '^': | |
375 | case '~': | |
376 | case '!': | |
377 | case '@': | |
378 | case '<': | |
379 | case '>': | |
380 | case '(': | |
381 | case ')': | |
382 | case '[': | |
383 | case ']': | |
384 | case '.': | |
385 | case '?': | |
386 | case ':': | |
387 | case '=': | |
388 | case '{': | |
389 | case '}': | |
390 | case ',': | |
391 | lexptr++; | |
392 | return c; | |
393 | ||
394 | case '"': | |
395 | yyerror ("double quoted strings not allowed in #if expressions"); | |
396 | return ERROR; | |
397 | } | |
398 | if (c >= '0' && c <= '9') { | |
399 | /* It's a number */ | |
400 | for (namelen = 0; | |
401 | c = tokstart[namelen], is_idchar[c] || c == '.'; | |
402 | namelen++) | |
403 | ; | |
404 | return parse_number (namelen); | |
405 | } | |
406 | ||
407 | if (!is_idstart[c]) { | |
408 | yyerror ("Invalid token in expression"); | |
409 | return ERROR; | |
410 | } | |
411 | ||
412 | /* It is a name. See how long it is. */ | |
413 | ||
414 | for (namelen = 0; is_idchar[tokstart[namelen]]; namelen++) | |
415 | ; | |
416 | ||
417 | lexptr += namelen; | |
418 | return NAME; | |
419 | } | |
420 | ||
421 | ||
422 | /* Parse a C escape sequence. STRING_PTR points to a variable | |
423 | containing a pointer to the string to parse. That pointer | |
424 | is updated past the characters we use. The value of the | |
425 | escape sequence is returned. | |
426 | ||
427 | A negative value means the sequence \ newline was seen, | |
428 | which is supposed to be equivalent to nothing at all. | |
429 | ||
430 | If \ is followed by a null character, we return a negative | |
431 | value and leave the string pointer pointing at the null character. | |
432 | ||
433 | If \ is followed by 000, we return 0 and leave the string pointer | |
434 | after the zeros. A value of 0 does not mean end of string. */ | |
435 | ||
436 | int | |
437 | parse_escape (string_ptr) | |
438 | char **string_ptr; | |
439 | { | |
440 | register int c = *(*string_ptr)++; | |
441 | switch (c) | |
442 | { | |
443 | case 'a': | |
444 | return TARGET_BELL; | |
445 | case 'b': | |
446 | return TARGET_BS; | |
447 | case 'e': | |
448 | return 033; | |
449 | case 'f': | |
450 | return TARGET_FF; | |
451 | case 'n': | |
452 | return TARGET_NEWLINE; | |
453 | case 'r': | |
454 | return TARGET_CR; | |
455 | case 't': | |
456 | return TARGET_TAB; | |
457 | case 'v': | |
458 | return TARGET_VT; | |
459 | case '\n': | |
460 | return -2; | |
461 | case 0: | |
462 | (*string_ptr)--; | |
463 | return 0; | |
464 | case '^': | |
465 | c = *(*string_ptr)++; | |
466 | if (c == '\\') | |
467 | c = parse_escape (string_ptr); | |
468 | if (c == '?') | |
469 | return 0177; | |
470 | return (c & 0200) | (c & 037); | |
471 | ||
472 | case '0': | |
473 | case '1': | |
474 | case '2': | |
475 | case '3': | |
476 | case '4': | |
477 | case '5': | |
478 | case '6': | |
479 | case '7': | |
480 | { | |
481 | register int i = c - '0'; | |
482 | register int count = 0; | |
483 | while (++count < 3) | |
484 | { | |
485 | c = *(*string_ptr)++; | |
486 | if (c >= '0' && c <= '7') | |
487 | i = (i << 3) + c - '0'; | |
488 | else | |
489 | { | |
490 | (*string_ptr)--; | |
491 | break; | |
492 | } | |
493 | } | |
494 | if ((i & ~((1 << CHAR_TYPE_SIZE) - 1)) != 0) | |
495 | { | |
496 | i &= (1 << CHAR_TYPE_SIZE) - 1; | |
497 | warning ("octal character constant does not fit in a byte"); | |
498 | } | |
499 | return i; | |
500 | } | |
501 | case 'x': | |
502 | { | |
503 | register int i = 0; | |
504 | register int count = 0; | |
505 | for (;;) | |
506 | { | |
507 | c = *(*string_ptr)++; | |
508 | if (c >= '0' && c <= '9') | |
509 | i = (i << 4) + c - '0'; | |
510 | else if (c >= 'a' && c <= 'f') | |
511 | i = (i << 4) + c - 'a' + 10; | |
512 | else if (c >= 'A' && c <= 'F') | |
513 | i = (i << 4) + c - 'A' + 10; | |
514 | else | |
515 | { | |
516 | (*string_ptr)--; | |
517 | break; | |
518 | } | |
519 | } | |
520 | if ((i & ~((1 << BITS_PER_UNIT) - 1)) != 0) | |
521 | { | |
522 | i &= (1 << BITS_PER_UNIT) - 1; | |
523 | warning ("hex character constant does not fit in a byte"); | |
524 | } | |
525 | return i; | |
526 | } | |
527 | default: | |
528 | return c; | |
529 | } | |
530 | } | |
531 | ||
532 | void | |
533 | yyerror (s) | |
534 | char *s; | |
535 | { | |
536 | error (s); | |
537 | longjmp (parse_return_error, 1); | |
538 | } | |
539 | \f | |
540 | /* This page contains the entry point to this file. */ | |
541 | ||
542 | /* Parse STRING as an expression, and complain if this fails | |
543 | to use up all of the contents of STRING. */ | |
544 | /* We do not support C comments. They should be removed before | |
545 | this function is called. */ | |
546 | ||
547 | int | |
548 | parse_c_expression (string) | |
549 | char *string; | |
550 | { | |
551 | lexptr = string; | |
552 | ||
553 | if (lexptr == 0 || *lexptr == 0) { | |
554 | error ("empty #if expression"); | |
555 | return 0; /* don't include the #if group */ | |
556 | } | |
557 | ||
558 | /* if there is some sort of scanning error, just return 0 and assume | |
559 | the parsing routine has printed an error message somewhere. | |
560 | there is surely a better thing to do than this. */ | |
561 | if (setjmp (parse_return_error)) | |
562 | return 0; | |
563 | ||
564 | if (yyparse ()) | |
565 | return 0; /* actually this is never reached | |
566 | the way things stand. */ | |
567 | if (*lexptr) | |
568 | error ("Junk after end of expression."); | |
569 | ||
570 | return expression_value; /* set by yyparse () */ | |
571 | } | |
572 | \f | |
573 | #ifdef TEST_EXP_READER | |
574 | /* main program, for testing purposes. */ | |
575 | main () | |
576 | { | |
577 | int n, c; | |
578 | char buf[1024]; | |
579 | extern int yydebug; | |
580 | /* | |
581 | yydebug = 1; | |
582 | */ | |
583 | initialize_random_junk (); | |
584 | ||
585 | for (;;) { | |
586 | printf ("enter expression: "); | |
587 | n = 0; | |
588 | while ((buf[n] = getchar ()) != '\n' && buf[n] != EOF) | |
589 | n++; | |
590 | if (buf[n] == EOF) | |
591 | break; | |
592 | buf[n] = '\0'; | |
593 | printf ("parser returned %d\n", parse_c_expression (buf)); | |
594 | } | |
595 | } | |
596 | ||
597 | /* table to tell if char can be part of a C identifier. */ | |
598 | unsigned char is_idchar[256]; | |
599 | /* table to tell if char can be first char of a c identifier. */ | |
600 | unsigned char is_idstart[256]; | |
601 | /* table to tell if c is horizontal space. isspace () thinks that | |
602 | newline is space; this is not a good idea for this program. */ | |
603 | char is_hor_space[256]; | |
604 | ||
605 | /* | |
606 | * initialize random junk in the hash table and maybe other places | |
607 | */ | |
608 | initialize_random_junk () | |
609 | { | |
610 | register int i; | |
611 | ||
612 | /* | |
613 | * Set up is_idchar and is_idstart tables. These should be | |
614 | * faster than saying (is_alpha (c) || c == '_'), etc. | |
615 | * Must do set up these things before calling any routines tthat | |
616 | * refer to them. | |
617 | */ | |
618 | for (i = 'a'; i <= 'z'; i++) { | |
619 | ++is_idchar[i - 'a' + 'A']; | |
620 | ++is_idchar[i]; | |
621 | ++is_idstart[i - 'a' + 'A']; | |
622 | ++is_idstart[i]; | |
623 | } | |
624 | for (i = '0'; i <= '9'; i++) | |
625 | ++is_idchar[i]; | |
626 | ++is_idchar['_']; | |
627 | ++is_idstart['_']; | |
628 | #if DOLLARS_IN_IDENTIFIERS | |
629 | ++is_idchar['$']; | |
630 | ++is_idstart['$']; | |
631 | #endif | |
632 | ||
633 | /* horizontal space table */ | |
634 | ++is_hor_space[' ']; | |
635 | ++is_hor_space['\t']; | |
636 | } | |
637 | ||
638 | error (msg) | |
639 | { | |
640 | printf ("error: %s\n", msg); | |
641 | } | |
642 | ||
643 | warning (msg) | |
644 | { | |
645 | printf ("warning: %s\n", msg); | |
646 | } | |
647 | ||
648 | struct hashnode * | |
649 | lookup (name, len, hash) | |
650 | char *name; | |
651 | int len; | |
652 | int hash; | |
653 | { | |
654 | return (DEFAULT_SIGNED_CHAR) ? 0 : ((struct hashnode *) -1); | |
655 | } | |
656 | #endif |