Commit | Line | Data |
---|---|---|
d2713949 WJ |
1 | |
2 | /******************************************** | |
3 | scan.c | |
4 | copyright 1991, Michael D. Brennan | |
5 | ||
6 | This is a source file for mawk, an implementation of | |
7 | the AWK programming language. | |
8 | ||
9 | Mawk is distributed without warranty under the terms of | |
10 | the GNU General Public License, version 2, 1991. | |
11 | ********************************************/ | |
12 | ||
13 | ||
14 | /* $Log: scan.c,v $ | |
15 | * Revision 5.2 92/02/21 14:16:53 brennan | |
16 | * fix: getline <= | |
17 | * | |
18 | * Revision 5.1 91/12/05 07:56:27 brennan | |
19 | * 1.1 pre-release | |
20 | * | |
21 | */ | |
22 | ||
23 | ||
24 | #include "mawk.h" | |
25 | #include "sizes.h" | |
26 | #include "scan.h" | |
27 | #include "memory.h" | |
28 | #include "field.h" | |
29 | #include "init.h" | |
30 | #include "fin.h" | |
31 | #include "repl.h" | |
32 | #include "code.h" | |
33 | ||
34 | #if HAVE_FCNTL_H | |
35 | #include <fcntl.h> | |
36 | #endif | |
37 | ||
38 | #include "files.h" | |
39 | ||
40 | ||
41 | /* static functions */ | |
42 | static void PROTO(scan_fillbuff, (void) ) ; | |
43 | static void PROTO(scan_open, (void) ) ; | |
44 | static int PROTO(slow_next, (void) ) ; | |
45 | static void PROTO(eat_comment, (void) ) ; | |
46 | static void PROTO(eat_semi_colon, (void) ) ; | |
47 | static double PROTO(collect_decimal, (int, int *) ) ; | |
48 | static int PROTO(collect_string, (void) ) ; | |
49 | static int PROTO(collect_RE, (void) ) ; | |
50 | ||
51 | ||
52 | /*----------------------------- | |
53 | program file management | |
54 | *----------------------------*/ | |
55 | ||
56 | char *pfile_name ; | |
57 | STRING *program_string ; | |
58 | PFILE *pfile_list ; | |
59 | static unsigned char *buffer ; | |
60 | static unsigned char *buffp ; | |
61 | /* unsigned so it works with 8 bit chars */ | |
62 | static int program_fd ; | |
63 | static int eof_flag ; | |
64 | ||
65 | void scan_init(cmdline_program) | |
66 | char * cmdline_program ; | |
67 | { | |
68 | if ( cmdline_program ) | |
69 | { | |
70 | program_fd = -1 ; /* command line program */ | |
71 | program_string = new_STRING((char *)0, | |
72 | strlen(cmdline_program) + 1 ) ; | |
73 | (void) strcpy(program_string->str, cmdline_program) ; | |
74 | /* simulate file termination */ | |
75 | program_string->str[program_string->len-1] = '\n' ; | |
76 | buffp = (unsigned char *) program_string->str ; | |
77 | eof_flag = 1 ; | |
78 | } | |
79 | else /* program from file[s] */ | |
80 | { | |
81 | scan_open() ; | |
82 | buffp = buffer = (unsigned char *) zmalloc( BUFFSZ+1 ) ; | |
83 | scan_fillbuff() ; | |
84 | } | |
85 | ||
86 | eat_nl() ; /* scan to first token */ | |
87 | if ( next() == 0 ) { errmsg(0, "no program") ; mawk_exit(1) ; } | |
88 | un_next() ; | |
89 | ||
90 | } | |
91 | ||
92 | static void scan_open() /* open pfile_name */ | |
93 | { | |
94 | if ( pfile_name[0] == '-' && pfile_name[1] == 0 ) | |
95 | program_fd = 0 ; | |
96 | else | |
97 | if ( (program_fd = open(pfile_name, O_RDONLY, 0)) == -1 ) | |
98 | { errmsg( errno, "cannot open %s", pfile_name) ; mawk_exit(1) ; } | |
99 | } | |
100 | ||
101 | void scan_cleanup() | |
102 | { | |
103 | if ( program_fd >= 0 ) zfree(buffer, BUFFSZ+1) ; | |
104 | else free_STRING(program_string) ; | |
105 | ||
106 | if ( program_fd > 0 ) (void) close(program_fd) ; | |
107 | ||
108 | /* redefine SPACE as [ \t\n] */ | |
109 | ||
110 | scan_code['\n'] = posix_space_flag && rs_shadow.type != SEP_MLR | |
111 | ? SC_UNEXPECTED : SC_SPACE ; | |
112 | scan_code['\f'] = SC_UNEXPECTED ; /*value doesn't matter */ | |
113 | scan_code['\013'] = SC_UNEXPECTED ; /* \v not space */ | |
114 | scan_code['\r'] = SC_UNEXPECTED ; | |
115 | } | |
116 | ||
117 | /*-------------------------------- | |
118 | global variables shared by yyparse() and yylex() | |
119 | and used for error messages too | |
120 | *-------------------------------*/ | |
121 | ||
122 | int current_token = -1 ; | |
123 | unsigned token_lineno ; | |
124 | unsigned compile_error_count ; | |
125 | int NR_flag ; /* are we tracking NR */ | |
126 | int paren_cnt ; | |
127 | int brace_cnt ; | |
128 | int print_flag ; /* changes meaning of '>' */ | |
129 | int getline_flag ; /* changes meaning of '<' */ | |
130 | ||
131 | extern YYSTYPE yylval ; | |
132 | ||
133 | /*---------------------------------------- | |
134 | file reading functions | |
135 | next() and un_next(c) are macros in scan.h | |
136 | ||
137 | *---------------------*/ | |
138 | ||
139 | static unsigned lineno = 1 ; | |
140 | ||
141 | ||
142 | static void scan_fillbuff() | |
143 | { unsigned r ; | |
144 | ||
145 | r = fillbuff(program_fd, (char *)buffer, BUFFSZ) ; | |
146 | if ( r < BUFFSZ ) | |
147 | { eof_flag = 1 ; | |
148 | /* check eof is terminated */ | |
149 | if ( r && buffer[r-1] != '\n' ) | |
150 | { buffer[r] = '\n' ; buffer[r+1] = 0 ; } | |
151 | } | |
152 | } | |
153 | ||
154 | /* read one character -- slowly */ | |
155 | static int slow_next() | |
156 | { | |
157 | ||
158 | while ( *buffp == 0 ) | |
159 | { | |
160 | if ( !eof_flag ) | |
161 | { buffp = buffer ; scan_fillbuff() ; } | |
162 | else | |
163 | if ( pfile_list /* open another program file */ ) | |
164 | { | |
165 | PFILE *q ; | |
166 | ||
167 | if ( program_fd > 0 ) (void) close(program_fd) ; | |
168 | eof_flag = 0 ; | |
169 | pfile_name = pfile_list->fname ; | |
170 | q = pfile_list ; | |
171 | pfile_list = pfile_list->link ; | |
172 | ZFREE(q) ; | |
173 | scan_open() ; | |
174 | token_lineno = lineno = 1 ; | |
175 | } | |
176 | else break /* real eof */ ; | |
177 | } | |
178 | ||
179 | return *buffp++ ; /* note can un_next() , eof which is zero */ | |
180 | } | |
181 | ||
182 | static void eat_comment() | |
183 | { register int c ; | |
184 | ||
185 | while ( (c = next()) != '\n' && scan_code[c] ) ; | |
186 | un_next() ; | |
187 | } | |
188 | ||
189 | /* this is how we handle extra semi-colons that are | |
190 | now allowed to separate pattern-action blocks | |
191 | ||
192 | A proof that they are useless clutter to the language: | |
193 | we throw them away | |
194 | */ | |
195 | ||
196 | static void eat_semi_colon() | |
197 | /* eat one semi-colon on the current line */ | |
198 | { register int c ; | |
199 | ||
200 | while ( scan_code[c = next()] == SC_SPACE ) ; | |
201 | if ( c != ';' ) un_next() ; | |
202 | } | |
203 | ||
204 | void eat_nl() /* eat all space including newlines */ | |
205 | { | |
206 | while ( 1 ) | |
207 | switch( scan_code[next()] ) | |
208 | { | |
209 | case SC_COMMENT : | |
210 | eat_comment() ; | |
211 | break ; | |
212 | ||
213 | case SC_NL : lineno++ ; | |
214 | /* fall thru */ | |
215 | case SC_SPACE : break ; | |
216 | default : | |
217 | un_next() ; return ; | |
218 | } | |
219 | } | |
220 | ||
221 | int yylex() | |
222 | { | |
223 | register int c ; | |
224 | ||
225 | token_lineno = lineno ; | |
226 | ||
227 | reswitch: | |
228 | ||
229 | switch( scan_code[c = next()] ) | |
230 | { | |
231 | case 0 : | |
232 | ct_ret(EOF) ; | |
233 | ||
234 | case SC_SPACE : goto reswitch ; | |
235 | ||
236 | case SC_COMMENT : | |
237 | eat_comment() ; goto reswitch ; | |
238 | ||
239 | case SC_NL : | |
240 | lineno++ ; eat_nl() ; | |
241 | ct_ret(NL) ; | |
242 | ||
243 | case SC_ESCAPE : | |
244 | while ( scan_code[ c = next() ] == SC_SPACE ) ; | |
245 | if ( c == '\n') | |
246 | { token_lineno = ++lineno ; goto reswitch ; } | |
247 | if ( c == 0 ) ct_ret(EOF) ; | |
248 | un_next() ; | |
249 | yylval.ival = '\\' ; | |
250 | ct_ret(UNEXPECTED) ; | |
251 | ||
252 | ||
253 | case SC_SEMI_COLON : | |
254 | eat_nl() ; | |
255 | ct_ret(SEMI_COLON) ; | |
256 | ||
257 | case SC_LBRACE : | |
258 | eat_nl() ; brace_cnt++ ; | |
259 | ct_ret(LBRACE) ; | |
260 | ||
261 | case SC_PLUS : | |
262 | switch( next() ) | |
263 | { | |
264 | case '+' : | |
265 | yylval.ival = '+' ; | |
266 | string_buff[0] = | |
267 | string_buff[1] = '+' ; | |
268 | string_buff[2] = 0 ; | |
269 | ct_ret(INC_or_DEC) ; | |
270 | ||
271 | case '=' : | |
272 | ct_ret(ADD_ASG) ; | |
273 | ||
274 | default : un_next() ; ct_ret(PLUS) ; | |
275 | } | |
276 | ||
277 | case SC_MINUS : | |
278 | switch( next() ) | |
279 | { | |
280 | case '-' : | |
281 | yylval.ival = '-' ; | |
282 | string_buff[0] = | |
283 | string_buff[1] = '-' ; | |
284 | string_buff[2] = 0 ; | |
285 | ct_ret(INC_or_DEC) ; | |
286 | ||
287 | case '=' : | |
288 | ct_ret(SUB_ASG) ; | |
289 | ||
290 | default : un_next() ; ct_ret(MINUS) ; | |
291 | } | |
292 | ||
293 | case SC_COMMA : eat_nl() ; ct_ret(COMMA) ; | |
294 | ||
295 | case SC_MUL : test1_ret('=', MUL_ASG, MUL) ; | |
296 | case SC_DIV : | |
297 | { static int can_precede_div[] = | |
298 | { DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD, | |
299 | GETLINE, INC_or_DEC, -1 } ; | |
300 | ||
301 | int *p = can_precede_div ; | |
302 | ||
303 | do | |
304 | if ( *p == current_token ) | |
305 | { | |
306 | if ( *p != INC_or_DEC ) | |
307 | test1_ret('=', DIV_ASG, DIV) ; | |
308 | ||
309 | if ( next() == '=' ) | |
310 | { un_next() ; ct_ret( collect_RE() ) ; } | |
311 | } | |
312 | ||
313 | while ( * ++p != -1 ) ; | |
314 | ||
315 | ct_ret( collect_RE() ) ; | |
316 | } | |
317 | ||
318 | case SC_MOD : test1_ret('=', MOD_ASG, MOD) ; | |
319 | case SC_POW : test1_ret('=' , POW_ASG, POW) ; | |
320 | case SC_LPAREN : | |
321 | paren_cnt++ ; | |
322 | ct_ret(LPAREN) ; | |
323 | ||
324 | case SC_RPAREN : | |
325 | if ( --paren_cnt < 0 ) | |
326 | { compile_error( "extra ')'" ) ; | |
327 | paren_cnt = 0 ; | |
328 | goto reswitch ; } | |
329 | ||
330 | ct_ret(RPAREN) ; | |
331 | ||
332 | case SC_LBOX : ct_ret(LBOX) ; | |
333 | case SC_RBOX : ct_ret(RBOX) ; | |
334 | ||
335 | case SC_MATCH : | |
336 | string_buff[0] = '~' ; string_buff[0] = 0 ; | |
337 | yylval.ival = 1 ; | |
338 | ct_ret(MATCH) ; | |
339 | ||
340 | case SC_EQUAL : | |
341 | test1_ret( '=', EQ, ASSIGN ) ; | |
342 | ||
343 | case SC_NOT : /* ! */ | |
344 | if ( (c = next()) == '~' ) | |
345 | { | |
346 | string_buff[0] = '!' ; | |
347 | string_buff[1] = '~' ; | |
348 | string_buff[2] = 0 ; | |
349 | yylval.ival = 0 ; | |
350 | ct_ret(MATCH) ; | |
351 | } | |
352 | else | |
353 | if ( c == '=' ) ct_ret(NEQ) ; | |
354 | ||
355 | un_next() ; | |
356 | ct_ret(NOT) ; | |
357 | ||
358 | ||
359 | case SC_LT : /* '<' */ | |
360 | if ( next() == '=' ) ct_ret(LTE) ; | |
361 | else un_next() ; | |
362 | ||
363 | if ( getline_flag ) | |
364 | { getline_flag = 0 ; ct_ret(IO_IN) ; } | |
365 | else ct_ret(LT) ; | |
366 | ||
367 | case SC_GT : /* '>' */ | |
368 | if ( print_flag && paren_cnt == 0 ) | |
369 | { print_flag = 0 ; | |
370 | /* there are 3 types of IO_OUT | |
371 | -- build the error string in string_buff */ | |
372 | string_buff[0] = '>' ; | |
373 | if ( next() == '>' ) | |
374 | { | |
375 | yylval.ival = F_APPEND ; | |
376 | string_buff[1] = '>' ; | |
377 | string_buff[2] = 0 ; | |
378 | } | |
379 | else | |
380 | { un_next() ; | |
381 | yylval.ival = F_TRUNC ; | |
382 | string_buff[1] = 0 ; | |
383 | } | |
384 | return current_token = IO_OUT ; | |
385 | } | |
386 | ||
387 | test1_ret('=', GTE, GT) ; | |
388 | ||
389 | case SC_OR : | |
390 | if ( next() == '|' ) | |
391 | { eat_nl() ; ct_ret(OR) ; } | |
392 | else | |
393 | { un_next() ; | |
394 | ||
395 | if ( print_flag && paren_cnt == 0 ) | |
396 | { print_flag = 0 ; | |
397 | yylval.ival = PIPE_OUT; | |
398 | string_buff[0] = '|' ; | |
399 | string_buff[1] = 0 ; | |
400 | ct_ret(IO_OUT) ; | |
401 | } | |
402 | else ct_ret(PIPE) ; | |
403 | } | |
404 | ||
405 | case SC_AND : | |
406 | if ( next() == '&' ) | |
407 | { eat_nl() ; ct_ret(AND) ; } | |
408 | else | |
409 | { un_next() ; yylval.ival = '&' ; ct_ret(UNEXPECTED) ; } | |
410 | ||
411 | case SC_QMARK : ct_ret(QMARK) ; | |
412 | case SC_COLON : ct_ret(COLON) ; | |
413 | case SC_RBRACE : | |
414 | if ( --brace_cnt < 0 ) | |
415 | { compile_error("extra '}'" ) ; | |
416 | eat_semi_colon() ; | |
417 | brace_cnt = 0 ; goto reswitch ; } | |
418 | ||
419 | if ( (c = current_token) == NL || c == SEMI_COLON | |
420 | || c == SC_FAKE_SEMI_COLON || c == RBRACE ) | |
421 | { | |
422 | /* if the brace_cnt is zero , we've completed | |
423 | a pattern action block. If the user insists | |
424 | on adding a semi-colon on the same line | |
425 | we will eat it. Note what we do below: | |
426 | physical law -- conservation of semi-colons */ | |
427 | ||
428 | if ( brace_cnt == 0 ) eat_semi_colon() ; | |
429 | eat_nl() ; | |
430 | ct_ret(RBRACE) ; | |
431 | } | |
432 | ||
433 | /* supply missing semi-colon to statement that | |
434 | precedes a '}' */ | |
435 | brace_cnt++ ; un_next() ; | |
436 | current_token = SC_FAKE_SEMI_COLON ; | |
437 | return SEMI_COLON ; | |
438 | ||
439 | case SC_DIGIT : | |
440 | case SC_DOT : | |
441 | { double d ; | |
442 | int flag ; | |
443 | static double double_zero = 0.0 ; | |
444 | static double double_one = 1.0 ; | |
445 | ||
446 | if ( (d = collect_decimal(c, &flag)) == 0.0 ) | |
447 | if ( flag ) ct_ret(flag) ; | |
448 | else yylval.ptr = (PTR) &double_zero ; | |
449 | else if ( d == 1.0 ) yylval.ptr = (PTR) &double_one ; | |
450 | else | |
451 | { yylval.ptr = (PTR) ZMALLOC(double) ; | |
452 | *(double*)yylval.ptr = d ; | |
453 | } | |
454 | ct_ret( DOUBLE ) ; | |
455 | } | |
456 | ||
457 | case SC_DOLLAR : /* '$' */ | |
458 | { double d ; | |
459 | int flag ; | |
460 | ||
461 | while ( scan_code[c = next()] == SC_SPACE ) ; | |
462 | if ( scan_code[c] != SC_DIGIT && | |
463 | scan_code[c] != SC_DOT ) | |
464 | { un_next() ; ct_ret(DOLLAR) ; } | |
465 | /* compute field address at compile time */ | |
466 | if ( (d = collect_decimal(c, &flag)) == 0.0 ) | |
467 | if ( flag ) ct_ret(flag) ; /* an error */ | |
468 | else yylval.cp = &field[0] ; | |
469 | else | |
470 | { int k = (int) d ; | |
471 | ||
472 | if ( k > MAX_FIELD ) | |
473 | { compile_error( | |
474 | "$%g exceeds maximum field(%d)" , d, MAX_FIELD) ; | |
475 | k = MAX_FIELD ; | |
476 | } | |
477 | yylval.cp = field_ptr(k) ; | |
478 | } | |
479 | ||
480 | ct_ret(FIELD) ; | |
481 | } | |
482 | ||
483 | case SC_DQUOTE : | |
484 | return current_token = collect_string() ; | |
485 | ||
486 | case SC_IDCHAR : /* collect an identifier */ | |
487 | { unsigned char *p = | |
488 | (unsigned char *)string_buff + 1 ; | |
489 | SYMTAB *stp ; | |
490 | ||
491 | string_buff[0] = c ; | |
492 | ||
493 | while ( | |
494 | (c = scan_code[ *p++ = next()]) == SC_IDCHAR || | |
495 | c == SC_DIGIT ) ; | |
496 | ||
497 | un_next() ; * --p = 0 ; | |
498 | ||
499 | switch( (stp = find(string_buff))->type ) | |
500 | { case ST_NONE : | |
501 | /* check for function call before defined */ | |
502 | if ( next() == '(' ) | |
503 | { stp->type = ST_FUNCT ; | |
504 | stp->stval.fbp = (FBLOCK *) | |
505 | zmalloc(sizeof(FBLOCK)) ; | |
506 | stp->stval.fbp->name = stp->name ; | |
507 | stp->stval.fbp->code = (INST *) 0 ; | |
508 | yylval.fbp = stp->stval.fbp ; | |
509 | current_token = FUNCT_ID ; | |
510 | } | |
511 | else | |
512 | { yylval.stp = stp ; | |
513 | current_token = | |
514 | current_token == DOLLAR ? D_ID : ID ; | |
515 | } | |
516 | un_next() ; | |
517 | break ; | |
518 | ||
519 | case ST_NR : | |
520 | NR_flag = 1 ; | |
521 | stp->type = ST_VAR ; | |
522 | /* fall thru */ | |
523 | ||
524 | case ST_VAR : | |
525 | case ST_ARRAY : | |
526 | case ST_LOCAL_NONE : | |
527 | case ST_LOCAL_VAR : | |
528 | case ST_LOCAL_ARRAY : | |
529 | ||
530 | yylval.stp = stp ; | |
531 | current_token = | |
532 | current_token == DOLLAR ? D_ID : ID ; | |
533 | break ; | |
534 | ||
535 | case ST_ENV : | |
536 | stp->type = ST_ARRAY ; | |
537 | stp->stval.array = new_ARRAY() ; | |
538 | load_environ(stp->stval.array) ; | |
539 | yylval.stp = stp ; | |
540 | current_token = | |
541 | current_token == DOLLAR ? D_ID : ID ; | |
542 | break ; | |
543 | ||
544 | case ST_FUNCT : | |
545 | yylval.fbp = stp->stval.fbp ; | |
546 | current_token = FUNCT_ID ; | |
547 | break ; | |
548 | ||
549 | case ST_KEYWORD : | |
550 | current_token = stp->stval.kw ; | |
551 | break ; | |
552 | ||
553 | case ST_BUILTIN : | |
554 | yylval.bip = stp->stval.bip ; | |
555 | current_token = BUILTIN ; | |
556 | break ; | |
557 | ||
558 | case ST_FIELD : | |
559 | yylval.cp = stp->stval.cp ; | |
560 | current_token = FIELD ; | |
561 | break ; | |
562 | ||
563 | default : | |
564 | bozo("find returned bad st type") ; | |
565 | } | |
566 | return current_token ; | |
567 | } | |
568 | ||
569 | ||
570 | case SC_UNEXPECTED : | |
571 | yylval.ival = c & 0xff ; | |
572 | ct_ret(UNEXPECTED) ; | |
573 | } | |
574 | return 0 ; /* never get here make lint happy */ | |
575 | } | |
576 | ||
577 | /* collect a decimal constant in temp_buff. | |
578 | Return the value and error conditions by reference */ | |
579 | ||
580 | static double collect_decimal(c, flag) | |
581 | int c ; int *flag ; | |
582 | { register unsigned char *p = (unsigned char*) string_buff + 1; | |
583 | unsigned char *endp ; | |
584 | double d ; | |
585 | ||
586 | *flag = 0 ; | |
587 | string_buff[0] = c ; | |
588 | ||
589 | if ( c == '.' ) | |
590 | { if ( scan_code[*p++ = next()] != SC_DIGIT ) | |
591 | { *flag = UNEXPECTED ; yylval.ival = '.' ; | |
592 | return 0.0 ; } | |
593 | } | |
594 | else | |
595 | { while ( scan_code[*p++ = next()] == SC_DIGIT ) ; | |
596 | if ( p[-1] != '.' ) | |
597 | { un_next() ; p-- ; } | |
598 | } | |
599 | /* get rest of digits after decimal point */ | |
600 | while ( scan_code[*p++ = next()] == SC_DIGIT ) ; | |
601 | ||
602 | /* check for exponent */ | |
603 | if ( p[-1] != 'e' && p[-1] != 'E' ) | |
604 | { un_next() ; * --p = 0 ; } | |
605 | else /* get the exponent */ | |
606 | if ( scan_code[*p = next()] != SC_DIGIT && | |
607 | *p != '-' && *p != '+' ) | |
608 | { *++p = 0 ; *flag = BAD_DECIMAL ; | |
609 | return 0.0 ; } | |
610 | else /* get the rest of the exponent */ | |
611 | { p++ ; | |
612 | while ( scan_code[*p++ = next()] == SC_DIGIT ) ; | |
613 | un_next() ; * --p = 0 ; | |
614 | } | |
615 | ||
616 | errno = 0 ; /* check for overflow/underflow */ | |
617 | d = strtod( string_buff, (char **)&endp ) ; | |
618 | ||
619 | #ifndef STRTOD_UNDERFLOW_ON_ZERO_BUG | |
620 | if ( errno ) | |
621 | compile_error( "%s : decimal %sflow" , string_buff, | |
622 | d == 0.0 ? "under" : "over") ; | |
623 | #else /* sun4 bug */ | |
624 | if ( errno && d != 0.0 ) | |
625 | compile_error( "%s : decimal overflow", string_buff) ; | |
626 | #endif | |
627 | ||
628 | if ( endp < p ) | |
629 | { *flag = BAD_DECIMAL ; return 0.0 ; } | |
630 | return d ; | |
631 | } | |
632 | ||
633 | /*---------- process escape characters ---------------*/ | |
634 | ||
635 | static char hex_val['f' - 'A' + 1] = { | |
636 | 10,11,12,13,14,15, 0, 0, | |
637 | 0, 0, 0, 0, 0, 0, 0, 0, | |
638 | 0, 0, 0, 0, 0, 0, 0, 0, | |
639 | 0, 0, 0, 0, 0, 0, 0, 0, | |
640 | 10,11,12,13,14,15 } ; | |
641 | ||
642 | #define isoctal(x) ((x)>='0'&&(x)<='7') | |
643 | ||
644 | #define hex_value(x) hex_val[(x)-'A'] | |
645 | ||
646 | #define ishex(x) (scan_code[x] == SC_DIGIT ||\ | |
647 | 'A' <= (x) && (x) <= 'f' && hex_value(x)) | |
648 | ||
649 | static int PROTO(octal, (char **)) ; | |
650 | static int PROTO(hex, (char **)) ; | |
651 | ||
652 | /* process one , two or three octal digits | |
653 | moving a pointer forward by reference */ | |
654 | static int octal( start_p ) | |
655 | char **start_p ; | |
656 | { register char *p = *start_p ; | |
657 | register unsigned x ; | |
658 | ||
659 | x = *p++ - '0' ; | |
660 | if ( isoctal(*p) ) | |
661 | { | |
662 | x = (x<<3) + *p++ - '0' ; | |
663 | if ( isoctal(*p) ) x = (x<<3) + *p++ - '0' ; | |
664 | } | |
665 | *start_p = p ; | |
666 | return x & 0xff ; | |
667 | } | |
668 | ||
669 | /* process one or two hex digits | |
670 | moving a pointer forward by reference */ | |
671 | ||
672 | static int hex( start_p ) | |
673 | char **start_p ; | |
674 | { register unsigned char *p = (unsigned char*) *start_p ; | |
675 | register unsigned x ; | |
676 | unsigned t ; | |
677 | ||
678 | if ( scan_code[*p] == SC_DIGIT ) | |
679 | x = *p++ - '0' ; | |
680 | else x = hex_value(*p++) ; | |
681 | ||
682 | if ( scan_code[*p] == SC_DIGIT ) | |
683 | x = (x<<4) + *p++ - '0' ; | |
684 | else | |
685 | if ( 'A' <= *p && *p <= 'f' && (t = hex_value(*p)) ) | |
686 | { x = (x<<4) + t ; p++ ; } | |
687 | ||
688 | *start_p = (char *) p ; | |
689 | return x ; | |
690 | } | |
691 | ||
692 | #define ET_END 9 | |
693 | ||
694 | static struct { char in , out ; } escape_test[ET_END+1] = { | |
695 | 'n' , '\n', | |
696 | 't' , '\t', | |
697 | 'f' , '\f', | |
698 | 'b' , '\b', | |
699 | 'r' , '\r', | |
700 | 'a' , '\07', | |
701 | 'v' , '\013', | |
702 | '\\', '\\', | |
703 | '\"', '\"', | |
704 | 0 , 0 } ; | |
705 | ||
706 | ||
707 | /* process the escape characters in a string, in place . */ | |
708 | ||
709 | char *rm_escape(s) | |
710 | char *s ; | |
711 | { register char *p, *q ; | |
712 | char *t ; | |
713 | int i ; | |
714 | ||
715 | q = p = s ; | |
716 | ||
717 | while ( *p ) | |
718 | if ( *p == '\\' ) | |
719 | { | |
720 | escape_test[ET_END].in = * ++p ; /* sentinal */ | |
721 | i = 0 ; | |
722 | while ( escape_test[i].in != *p ) i++ ; | |
723 | ||
724 | if ( i != ET_END ) /* in table */ | |
725 | { | |
726 | p++ ; *q++ = escape_test[i].out ; | |
727 | } | |
728 | else | |
729 | if ( isoctal(*p) ) | |
730 | { | |
731 | t = p ; *q++ = octal(&t) ; p = t ; | |
732 | } | |
733 | else | |
734 | if ( *p == 'x' && ishex(*(unsigned char*)(p+1)) ) | |
735 | { | |
736 | t = p+1 ; *q++ = hex(&t) ; p = t ; | |
737 | } | |
738 | else | |
739 | if ( *p == 0 ) /* can only happen with command line assign */ | |
740 | *q++ = '\\' ; | |
741 | else /* not an escape sequence */ | |
742 | { | |
743 | *q++ = '\\' ; *q++ = *p++ ; | |
744 | } | |
745 | } | |
746 | else *q++ = *p++ ; | |
747 | ||
748 | *q = 0 ; | |
749 | return s ; | |
750 | } | |
751 | ||
752 | static int collect_string() | |
753 | { register unsigned char *p = (unsigned char *)string_buff ; | |
754 | int c ; | |
755 | int e_flag = 0 ; /* on if have an escape char */ | |
756 | ||
757 | while ( 1 ) | |
758 | switch( scan_code[ *p++ = next() ] ) | |
759 | { case SC_DQUOTE : /* done */ | |
760 | * --p = 0 ; goto out ; | |
761 | ||
762 | case SC_NL : | |
763 | p[-1] = 0 ; | |
764 | /* fall thru */ | |
765 | ||
766 | case 0 : /* unterminated string */ | |
767 | compile_error( | |
768 | "runaway string constant \"%.10s ..." , | |
769 | string_buff, token_lineno ) ; | |
770 | mawk_exit(1) ; | |
771 | ||
772 | case SC_ESCAPE : | |
773 | if ( (c = next()) == '\n' ) | |
774 | { p-- ; lineno++ ; } | |
775 | else | |
776 | if ( c == 0 ) un_next() ; | |
777 | else | |
778 | { *p++ = c ; e_flag = 1 ; } | |
779 | ||
780 | break ; | |
781 | ||
782 | default : break ; | |
783 | } | |
784 | ||
785 | out: | |
786 | yylval.ptr = (PTR) new_STRING( | |
787 | e_flag ? rm_escape( string_buff ) | |
788 | : string_buff ) ; | |
789 | return STRING_ ; | |
790 | } | |
791 | ||
792 | ||
793 | static int collect_RE() | |
794 | { register unsigned char *p = (unsigned char*) string_buff ; | |
795 | int c ; | |
796 | STRING *sval ; | |
797 | ||
798 | while ( 1 ) | |
799 | switch( scan_code[ *p++ = next() ] ) | |
800 | { case SC_DIV : /* done */ | |
801 | * --p = 0 ; goto out ; | |
802 | ||
803 | case SC_NL : | |
804 | p[-1] = 0 ; | |
805 | /* fall thru */ | |
806 | ||
807 | case 0 : /* unterminated re */ | |
808 | compile_error( | |
809 | "runaway regular expression /%.10s ..." , | |
810 | string_buff, token_lineno ) ; | |
811 | mawk_exit(1) ; | |
812 | ||
813 | case SC_ESCAPE : | |
814 | switch( c = next() ) | |
815 | { case '/' : | |
816 | p[-1] = '/' ; break ; | |
817 | ||
818 | case '\n' : | |
819 | p-- ; break ; | |
820 | ||
821 | case 0 : | |
822 | un_next() ; break ; | |
823 | ||
824 | default : | |
825 | *p++ = c ; break ; | |
826 | } | |
827 | break ; | |
828 | } | |
829 | ||
830 | out: | |
831 | /* now we've got the RE, so compile it */ | |
832 | sval = new_STRING( string_buff ) ; | |
833 | yylval.ptr = re_compile(sval) ; | |
834 | free_STRING(sval) ; | |
835 | return RE ; | |
836 | } | |
837 |