Commit | Line | Data |
---|---|---|
31edab6f WJ |
1 | /* Copyright (C) 1989, 1992 Aladdin Enterprises. All rights reserved. |
2 | Distributed by Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of Ghostscript. | |
5 | ||
6 | Ghostscript is distributed in the hope that it will be useful, but | |
7 | WITHOUT ANY WARRANTY. No author or distributor accepts responsibility | |
8 | to anyone for the consequences of using it or for whether it serves any | |
9 | particular purpose or works at all, unless he says so in writing. Refer | |
10 | to the Ghostscript General Public License for full details. | |
11 | ||
12 | Everyone is granted permission to copy, modify and redistribute | |
13 | Ghostscript, but only under the conditions described in the Ghostscript | |
14 | General Public License. A copy of this license is supposed to have been | |
15 | given to you along with Ghostscript so you can know your rights and | |
16 | responsibilities. It should be in a file named COPYING. Among other | |
17 | things, the copyright notice and this notice must be preserved on all | |
18 | copies. */ | |
19 | ||
20 | /* iscan.c */ | |
21 | /* Token scanner for Ghostscript interpreter */ | |
22 | #include <ctype.h> | |
23 | #include "memory_.h" | |
24 | #include "ghost.h" | |
25 | #include "alloc.h" | |
26 | #include "dict.h" /* for //name lookup */ | |
27 | #include "dstack.h" /* ditto */ | |
28 | #include "errors.h" | |
29 | #include "iutil.h" | |
30 | #include "name.h" | |
31 | #include "ostack.h" /* for accumulating proc bodies */ | |
32 | #include "packed.h" | |
33 | #include "store.h" | |
34 | #include "stream.h" | |
35 | #include "scanchar.h" | |
36 | ||
37 | /* Array packing flag */ | |
38 | ref array_packing; /* t_boolean */ | |
39 | /* Binary object format flag. This will never be set non-zero */ | |
40 | /* unless the binary token feature is enabled. */ | |
41 | ref binary_object_format; /* t_integer */ | |
42 | #define recognize_btokens() (binary_object_format.value.intval != 0) | |
43 | ||
44 | /* Procedure for binary tokens. Set at initialization if the binary token */ | |
45 | /* option is included; only called if recognize_btokens() is true. */ | |
46 | /* Returns 0 on success, <0 on failure. */ | |
47 | int (*scan_btoken_proc)(P3(stream *, ref *, int)) = NULL; | |
48 | ||
49 | /* | |
50 | * Level 2 includes some changes in the scanner: | |
51 | * - \ is always recognized in strings, regardless of the data source; | |
52 | * - << and >> are legal tokens; | |
53 | * - <~ introduces an ASCII-85 encoded string (terminated by ~>) | |
54 | * (not implemented yet); | |
55 | * - Character codes above 127 introduce binary objects. | |
56 | * We explicitly enable or disable these changes here. | |
57 | */ | |
58 | int scan_enable_level2 = 1; | |
59 | ||
60 | /* Forward references */ | |
61 | private int scan_hex_string(P2(stream *, ref *)), | |
62 | scan_int(P6(byte **, byte *, int, int, long *, double *)), | |
63 | scan_number(P3(byte *, byte *, ref *)), | |
64 | scan_string(P3(stream *, int, ref *)); | |
65 | ||
66 | /* Define the character scanning table (see scanchar.h). */ | |
67 | byte scan_char_array[258]; | |
68 | ||
69 | /* A structure for dynamically growable objects */ | |
70 | typedef struct dynamic_area_s { | |
71 | byte *base; | |
72 | byte *next; | |
73 | uint num_elts; | |
74 | uint elt_size; | |
75 | int is_dynamic; /* false if using fixed buffer */ | |
76 | byte *limit; | |
77 | } dynamic_area; | |
78 | typedef dynamic_area _ss *da_ptr; | |
79 | ||
80 | /* Begin a dynamic object. */ | |
81 | /* dynamic_begin returns the value of alloc, which may be 0: */ | |
82 | /* the invoker of dynamic_begin must test the value against 0. */ | |
83 | #define dynamic_begin(pda, dnum, desize)\ | |
84 | ((pda)->base = (byte *)alloc((pda)->num_elts = (dnum),\ | |
85 | (pda)->elt_size = (desize), "scanner"),\ | |
86 | (pda)->limit = (pda)->base + (dnum) * (desize),\ | |
87 | (pda)->is_dynamic = 1,\ | |
88 | (pda)->next = (pda)->base) | |
89 | ||
90 | /* Free a dynamic object. */ | |
91 | private void | |
92 | dynamic_free(da_ptr pda) | |
93 | { if ( pda->is_dynamic ) | |
94 | alloc_free((char *)(pda->base), pda->num_elts, pda->elt_size, | |
95 | "scanner"); | |
96 | } | |
97 | ||
98 | /* Grow a dynamic object. */ | |
99 | /* If the allocation fails, free the old contents, and return NULL; */ | |
100 | /* otherwise, return the new `next' pointer. */ | |
101 | private byte * | |
102 | dynamic_grow(register da_ptr pda, byte *next) | |
103 | { if ( next != pda->limit ) return next; | |
104 | pda->next = next; | |
105 | { uint num = pda->num_elts; | |
106 | uint old_size = num * pda->elt_size; | |
107 | uint pos = pda->next - pda->base; | |
108 | uint new_size = (old_size < 10 ? 20 : | |
109 | old_size >= (max_uint >> 1) ? max_uint : | |
110 | old_size << 1); | |
111 | uint new_num = new_size / pda->elt_size; | |
112 | if ( pda->is_dynamic ) | |
113 | { byte *base = alloc_grow(pda->base, num, new_num, pda->elt_size, "scanner"); | |
114 | if ( base == 0 ) | |
115 | { dynamic_free(pda); | |
116 | return NULL; | |
117 | } | |
118 | pda->base = base; | |
119 | pda->num_elts = new_num; | |
120 | pda->limit = pda->base + new_size; | |
121 | } | |
122 | else | |
123 | { byte *base = pda->base; | |
124 | if ( !dynamic_begin(pda, new_num, pda->elt_size) ) return NULL; | |
125 | memcpy(pda->base, base, old_size); | |
126 | pda->is_dynamic = 1; | |
127 | } | |
128 | pda->next = pda->base + pos; | |
129 | } | |
130 | return pda->next; | |
131 | } | |
132 | ||
133 | /* Initialize the scanner. */ | |
134 | void | |
135 | scan_init() | |
136 | { /* Initialize decoder array */ | |
137 | register byte _ds *decoder = scan_char_decoder; | |
138 | static char stop_chars[] = "()<>[]{}/%"; | |
139 | static char space_chars[] = " \f\t\n\r"; | |
140 | decoder[ERRC] = ctype_eof; /* ****** FIX THIS? ****** */ | |
141 | decoder[EOFC] = ctype_eof; | |
142 | memset(decoder, ctype_name, 256); | |
143 | memset(decoder + 128, ctype_btoken, 32); | |
144 | { register char _ds *p; | |
145 | for ( p = space_chars; *p; p++ ) | |
146 | decoder[*p] = ctype_space; | |
147 | decoder[char_NULL] = decoder[char_VT] = | |
148 | decoder[char_DOS_EOF] = ctype_space; | |
149 | for ( p = stop_chars; *p; p++ ) | |
150 | decoder[*p] = ctype_other; | |
151 | } | |
152 | { register int i; | |
153 | for ( i = 0; i < 10; i++ ) | |
154 | decoder['0' + i] = i; | |
155 | for ( i = 0; i < max_radix - 10; i++ ) | |
156 | decoder['A' + i] = decoder['a' + i] = i + 10; | |
157 | } | |
158 | /* Other initialization */ | |
159 | make_false(&array_packing); | |
160 | make_int(&binary_object_format, 0); | |
161 | } | |
162 | ||
163 | /* Read a token from a stream. */ | |
164 | /* Return 1 for end-of-stream, 0 if a token was read, */ | |
165 | /* or a (negative) error code. */ | |
166 | /* If the token required a terminating character (i.e., was a name or */ | |
167 | /* number) and the next character was whitespace, read and discard */ | |
168 | /* that character: see the description of the 'token' operator on */ | |
169 | /* p. 232 of the Red Book. */ | |
170 | /* from_string indicates reading from a string vs. a file, */ | |
171 | /* because \ escapes are not recognized in the former case. */ | |
172 | /* (See the footnote on p. 23 of the Red Book.) */ | |
173 | int | |
174 | scan_token(register stream *s, int from_string, ref *pref) | |
175 | { ref *myref = pref; | |
176 | dynamic_area proc_da; /* (not actually dynamic) */ | |
177 | int pstack = 0; /* offset from proc_da.base */ | |
178 | int retcode = 0; | |
179 | register int c; | |
180 | int name_type; /* number of /'s preceding */ | |
181 | int try_number; | |
182 | byte s1[2]; | |
183 | register byte _ds *decoder = scan_char_decoder; | |
184 | /* Only old P*stScr*pt interpreters use from_string.... */ | |
185 | from_string &= !scan_enable_level2; | |
186 | top: c = sgetc(s); | |
187 | #ifdef DEBUG | |
188 | if ( gs_debug['s'] ) | |
189 | fprintf(gs_debug_out, (c >= 32 && c <= 126 ? "`%c'" : "`%03o'"), c); | |
190 | #endif | |
191 | switch ( c ) | |
192 | { | |
193 | case ' ': case '\f': case '\t': case '\n': case '\r': | |
194 | case char_NULL: case char_VT: case char_DOS_EOF: | |
195 | goto top; | |
196 | case '[': | |
197 | case ']': | |
198 | s1[0] = (byte)c; | |
199 | name_ref(s1, 1, myref, 1); | |
200 | r_set_attrs(myref, a_executable); | |
201 | break; | |
202 | case '<': | |
203 | if ( scan_enable_level2 ) | |
204 | { c = sgetc(s); | |
205 | if ( char_is_data(c) ) sputback(s); | |
206 | switch ( c ) | |
207 | { | |
208 | case '<': | |
209 | name_type = try_number = 0; | |
210 | goto try_funny_name; | |
211 | /****** Check for <~ here ******/ | |
212 | } | |
213 | } | |
214 | retcode = scan_hex_string(s, myref); | |
215 | break; | |
216 | case '(': | |
217 | retcode = scan_string(s, from_string, myref); | |
218 | break; | |
219 | case '{': | |
220 | if ( pstack == 0 ) | |
221 | { /* Use the operand stack to accumulate procedures. */ | |
222 | myref = osp + 1; | |
223 | proc_da.base = (byte *)myref; | |
224 | proc_da.limit = (byte *)(ostop + 1); | |
225 | proc_da.is_dynamic = 0; | |
226 | proc_da.elt_size = sizeof(ref); | |
227 | proc_da.num_elts = ostop - osp; | |
228 | } | |
229 | if ( proc_da.limit - (byte *)myref < 2 * sizeof(ref) ) | |
230 | return e_limitcheck; /* ****** SHOULD GROW OSTACK ****** */ | |
231 | r_set_size(myref, pstack); | |
232 | myref++; | |
233 | pstack = (byte *)myref - proc_da.base; | |
234 | goto top; | |
235 | case '>': | |
236 | if ( scan_enable_level2 ) | |
237 | { name_type = try_number = 0; | |
238 | goto try_funny_name; | |
239 | } | |
240 | /* falls through */ | |
241 | case ')': | |
242 | retcode = e_syntaxerror; | |
243 | break; | |
244 | case '}': | |
245 | if ( pstack == 0 ) | |
246 | { retcode = e_syntaxerror; | |
247 | break; | |
248 | } | |
249 | { ref *ref0 = (ref *)(proc_da.base + pstack); | |
250 | uint size = myref - ref0; | |
251 | ref *aref; | |
252 | myref = ref0 - 1; | |
253 | pstack = r_size(myref); | |
254 | if ( pstack == 0 ) myref = pref; | |
255 | if ( array_packing.value.index ) | |
256 | { retcode = make_packed_array(ref0, size, myref, | |
257 | "scanner(packed)"); | |
258 | if ( retcode < 0 ) return retcode; | |
259 | r_set_attrs(myref, a_executable); | |
260 | } | |
261 | else | |
262 | { aref = alloc_refs(size, "scanner(proc)"); | |
263 | if ( aref == 0 ) return e_VMerror; | |
264 | refcpy_to_new(aref, ref0, size); | |
265 | make_tasv_new(myref, t_array, a_executable + a_all, size, refs, aref); | |
266 | } | |
267 | } | |
268 | break; | |
269 | case '/': | |
270 | c = sgetc(s); | |
271 | if ( c == '/' ) | |
272 | { name_type = 2; | |
273 | c = sgetc(s); | |
274 | } | |
275 | else | |
276 | name_type = 1; | |
277 | try_number = 0; | |
278 | switch ( decoder[c] ) | |
279 | { | |
280 | case ctype_name: | |
281 | default: | |
282 | goto do_name; | |
283 | case ctype_btoken: | |
284 | if ( !recognize_btokens() ) goto do_name; | |
285 | /* otherwise, an empty name */ | |
286 | sputback(s); | |
287 | case ctype_eof: | |
288 | /* Empty name: bizarre but legitimate. */ | |
289 | name_ref((byte *)0, 0, myref, 1); | |
290 | goto have_name; | |
291 | case ctype_other: | |
292 | switch ( c ) | |
293 | { | |
294 | case '[': /* only special as first character */ | |
295 | case ']': /* ditto */ | |
296 | s1[0] = (byte)c; | |
297 | name_ref(s1, 1, myref, 1); | |
298 | goto have_name; | |
299 | case '<': /* legal in Level 2 */ | |
300 | case '>': | |
301 | if ( scan_enable_level2 ) goto try_funny_name; | |
302 | default: | |
303 | /* Empty name: bizarre but legitimate. */ | |
304 | name_ref((byte *)0, 0, myref, 1); | |
305 | sputback(s); | |
306 | goto have_name; | |
307 | } | |
308 | case ctype_space: | |
309 | /* Empty name: bizarre but legitimate. */ | |
310 | name_ref((byte *)0, 0, myref, 1); | |
311 | /* Check for \r\n */ | |
312 | if ( c == '\r' && (c = sgetc(s)) != '\n' && char_is_data(c) ) | |
313 | sputback(s); | |
314 | goto have_name; | |
315 | } | |
316 | /* NOTREACHED */ | |
317 | case '%': | |
318 | { for ( ; ; ) | |
319 | switch ( sgetc(s) ) | |
320 | { | |
321 | case '\r': | |
322 | if ( (c = sgetc(s)) != '\n' && char_is_data(c) ) | |
323 | sputback(s); | |
324 | /* falls through */ | |
325 | case '\n': case '\f': | |
326 | goto top; | |
327 | case EOFC: | |
328 | goto ceof; | |
329 | case ERRC: | |
330 | goto cerr; | |
331 | } | |
332 | ceof: ; | |
333 | } /* falls through */ | |
334 | case EOFC: | |
335 | retcode = (pstack != 0 ? e_syntaxerror : 1); | |
336 | break; | |
337 | case ERRC: | |
338 | cerr: retcode = e_ioerror; | |
339 | break; | |
340 | ||
341 | /* Check for a Level 2 funny name (<< or >>). */ | |
342 | /* c is '<' or '>'. */ | |
343 | try_funny_name: | |
344 | { int c1 = sgetc(s); | |
345 | if ( c1 == c ) | |
346 | { s1[0] = s1[1] = c; | |
347 | retcode = name_ref(s1, 2, myref, 1); | |
348 | goto have_name; | |
349 | } | |
350 | if ( char_is_data(c1) ) sputback(s); | |
351 | } retcode = e_syntaxerror; | |
352 | break; | |
353 | ||
354 | /* Handle separately the names that might be a number */ | |
355 | case '0': case '1': case '2': case '3': case '4': | |
356 | case '5': case '6': case '7': case '8': case '9': | |
357 | case '.': case '+': case '-': | |
358 | name_type = 0; | |
359 | try_number = 1; | |
360 | goto do_name; | |
361 | ||
362 | /* Check for a binary object */ | |
363 | #define case4(c) case c: case c+1: case c+2: case c+3 | |
364 | case4(128): case4(132): case4(136): case4(140): | |
365 | case4(144): case4(148): case4(152): case4(156): | |
366 | #undef case4 | |
367 | if ( recognize_btokens() ) | |
368 | { retcode = (*scan_btoken_proc)(s, myref, c); | |
369 | break; | |
370 | } | |
371 | /* Not a binary object, fall through. */ | |
372 | ||
373 | /* The default is a name. */ | |
374 | default: | |
375 | /* Handle the common cases (letters and _) explicitly, */ | |
376 | /* rather than going through the default test. */ | |
377 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
378 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': | |
379 | case 'n': case 'o': case 'p': case 'q': case 'r': case 's': | |
380 | case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': | |
381 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
382 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': | |
383 | case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': | |
384 | case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': | |
385 | case '_': | |
386 | /* Common code for scanning a name. */ | |
387 | /* try_number and name_type are already set. */ | |
388 | /* We know c has ctype_name (or maybe ctype_btoken) */ | |
389 | /* or is a digit. */ | |
390 | name_type = 0; | |
391 | try_number = 0; | |
392 | do_name: | |
393 | { dynamic_area da; | |
394 | int max_name_ctype = | |
395 | (recognize_btokens() ? ctype_name : ctype_btoken); | |
396 | /* Try to scan entirely within the stream buffer. */ | |
397 | /* We stop 1 character early, so we don't switch buffers */ | |
398 | /* looking ahead if the name is terminated by \r\n. */ | |
399 | register byte *ptr = sbufptr(s); | |
400 | byte *end = sbufend(s) - 1; | |
401 | da.base = ptr - 1; | |
402 | da.is_dynamic = 0; | |
403 | do | |
404 | { if ( ptr >= end ) | |
405 | { ssetbufptr(s, ptr); | |
406 | /* Initialize the dynamic area. */ | |
407 | /* We have to do this before the next */ | |
408 | /* sgetc, which will overwrite the buffer. */ | |
409 | da.limit = ptr; | |
410 | da.num_elts = ptr - da.base; | |
411 | da.elt_size = 1; | |
412 | ptr = dynamic_grow(&da, ptr); | |
413 | if ( !ptr ) return e_VMerror; | |
414 | goto dyn_name; | |
415 | } | |
416 | c = *ptr++; | |
417 | } | |
418 | while ( decoder[c] <= max_name_ctype ); /* digit or name */ | |
419 | /* Name ended within the buffer. */ | |
420 | ssetbufptr(s, ptr); | |
421 | ptr--; | |
422 | goto nx; | |
423 | /* Name overran buffer. */ | |
424 | dyn_name: while ( decoder[c = sgetc(s)] <= max_name_ctype ) | |
425 | { if ( ptr == da.limit ) | |
426 | { ptr = dynamic_grow(&da, ptr); | |
427 | if ( !ptr ) return e_VMerror; | |
428 | } | |
429 | *ptr++ = c; | |
430 | } | |
431 | nx: switch ( decoder[c] ) | |
432 | { | |
433 | case ctype_btoken: | |
434 | case ctype_other: | |
435 | sputback(s); | |
436 | break; | |
437 | case ctype_space: | |
438 | /* Check for \r\n */ | |
439 | if ( c == '\r' && (c = sgetc(s)) != '\n' && char_is_data(c) ) | |
440 | sputback(s); | |
441 | case ctype_eof: ; | |
442 | } | |
443 | /* Check for a number */ | |
444 | if ( try_number ) | |
445 | { retcode = scan_number(da.base, ptr, myref); | |
446 | if ( retcode != e_syntaxerror ) | |
447 | { dynamic_free(&da); | |
448 | if ( name_type == 2 ) return e_syntaxerror; | |
449 | break; /* might be e_limitcheck */ | |
450 | } | |
451 | } | |
452 | retcode = name_ref(da.base, (uint)(ptr - da.base), myref, 1); | |
453 | dynamic_free(&da); | |
454 | } | |
455 | /* Done scanning. Check for preceding /'s. */ | |
456 | have_name: if ( retcode < 0 ) return retcode; | |
457 | switch ( name_type ) | |
458 | { | |
459 | case 0: /* ordinary executable name */ | |
460 | if ( r_has_type(myref, t_name) ) /* i.e., not a number */ | |
461 | r_set_attrs(myref, a_executable); | |
462 | case 1: /* quoted name */ | |
463 | break; | |
464 | case 2: /* immediate lookup */ | |
465 | { ref *pvalue; | |
466 | if ( !r_has_type(myref, t_name) ) | |
467 | return e_undefined; | |
468 | if ( (pvalue = dict_find_name(myref)) == 0 ) | |
469 | return e_undefined; | |
470 | ref_assign_new(myref, pvalue); | |
471 | } | |
472 | } | |
473 | } | |
474 | /* If we are at the top level, return the object, */ | |
475 | /* otherwise keep going. */ | |
476 | if ( pstack == 0 || retcode < 0 ) | |
477 | return retcode; | |
478 | if ( proc_da.limit - (byte *)myref < 2 * sizeof(ref) ) | |
479 | return e_limitcheck; /* ****** SHOULD GROW OSTACK ****** */ | |
480 | myref++; | |
481 | goto top; | |
482 | } | |
483 | ||
484 | /* The internal scanning procedures return 0 on success, */ | |
485 | /* or a (negative) error code on failure. */ | |
486 | ||
487 | /* Scan a number for cvi or cvr. */ | |
488 | /* The first argument is a t_string. This is just like scan_number, */ | |
489 | /* but allows leading or trailing whitespace. */ | |
490 | int | |
491 | scan_number_only(ref *psref, ref *pnref) | |
492 | { byte *str = psref->value.bytes; | |
493 | byte *end = str + r_size(psref); | |
494 | if ( !r_has_attr(psref, a_read) ) return e_invalidaccess; | |
495 | while ( str < end && scan_char_decoder[*str] == ctype_space ) | |
496 | str++; | |
497 | while ( str < end && scan_char_decoder[end[-1]] == ctype_space ) | |
498 | end--; | |
499 | return scan_number(str, end, pnref); | |
500 | } | |
501 | ||
502 | /* Note that the number scanning procedures use a byte ** and a byte * */ | |
503 | /* rather than a stream. (It makes quite a difference in performance.) */ | |
504 | #define ngetc(sp) (sp < end ? *sp++ : EOFC) | |
505 | #define nputback(sp) (--sp) | |
506 | #define nreturn(v) return (*pstr = sp, v) | |
507 | ||
508 | /* Procedure to scan a number. */ | |
509 | private int | |
510 | scan_number(byte *str, byte *end, ref *pref) | |
511 | { /* Powers of 10 up to 6 can be represented accurately as */ | |
512 | /* a single-precision float. */ | |
513 | #define num_powers_10 6 | |
514 | static float powers_10[num_powers_10+1] = | |
515 | { 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6 }; | |
516 | static double neg_powers_10[num_powers_10+1] = | |
517 | { 1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6 }; | |
518 | byte *sp = str; /* can't be register because of & */ | |
519 | int sign = 0; | |
520 | long ival; | |
521 | double dval; | |
522 | int exp10 = 0; | |
523 | int code; | |
524 | register int c; | |
525 | switch ( c = ngetc(sp) ) | |
526 | { | |
527 | case '+': sign = 1; c = ngetc(sp); break; | |
528 | case '-': sign = -1; c = ngetc(sp); break; | |
529 | } | |
530 | if ( !isdigit(c) ) | |
531 | { if ( c != '.' ) return(e_syntaxerror); | |
532 | c = ngetc(sp); | |
533 | if ( !isdigit(c) ) return(e_syntaxerror); | |
534 | ival = 0; | |
535 | goto fi; | |
536 | } | |
537 | nputback(sp); | |
538 | if ( (code = scan_int(&sp, end, 10, 0, &ival, &dval)) != 0 ) | |
539 | { if ( code < 0 ) return(code); /* e_syntaxerror */ | |
540 | /* Code == 1, i.e., the integer overflowed. */ | |
541 | switch ( c = ngetc(sp) ) | |
542 | { | |
543 | default: | |
544 | return(e_syntaxerror); /* not terminated properly */ | |
545 | case '.': | |
546 | c = ngetc(sp); goto fd; | |
547 | case 'e': case 'E': | |
548 | goto fsd; | |
549 | case EOFC: /* return a float */ | |
550 | make_real_new(pref, (float)(sign < 0 ? -dval : dval)); | |
551 | return 0; | |
552 | case ERRC: | |
553 | return e_ioerror; | |
554 | } | |
555 | } | |
556 | switch ( c = ngetc(sp) ) | |
557 | { | |
558 | case EOFC: | |
559 | break; | |
560 | case ERRC: | |
561 | return e_ioerror; | |
562 | case '.': | |
563 | c = ngetc(sp); goto fi; | |
564 | default: | |
565 | return(e_syntaxerror); /* not terminated properly */ | |
566 | case 'e': case 'E': | |
567 | goto fsi; | |
568 | case '#': | |
569 | if ( sign || ival < min_radix || ival > max_radix ) | |
570 | return(e_syntaxerror); | |
571 | code = scan_int(&sp, end, (int)ival, 1, &ival, NULL); | |
572 | if ( code ) return(code); | |
573 | switch ( ngetc(sp) ) | |
574 | { | |
575 | case EOFC: | |
576 | break; | |
577 | case ERRC: | |
578 | return(e_ioerror); | |
579 | default: | |
580 | return(e_syntaxerror); | |
581 | } | |
582 | } | |
583 | /* Return an integer */ | |
584 | make_int_new(pref, (sign < 0 ? -ival : ival)); | |
585 | return(0); | |
586 | /* Handle a real. We just saw the decimal point. */ | |
587 | /* Enter here if we are still accumulating an integer in ival. */ | |
588 | fi: while ( isdigit(c) ) | |
589 | { /* Check for overflowing ival */ | |
590 | if ( ival >= (max_ulong >> 1) / 10 - 1 ) | |
591 | { dval = ival; | |
592 | goto fd; | |
593 | } | |
594 | ival = ival * 10 + (c - '0'); | |
595 | c = ngetc(sp); | |
596 | exp10--; | |
597 | } | |
598 | fsi: if ( sign < 0 ) ival = -ival; | |
599 | /* Take a shortcut for the common case */ | |
600 | if ( !(c == 'e' || c == 'E' || exp10 < -num_powers_10) ) | |
601 | { make_real_new(pref, (float)(ival * neg_powers_10[-exp10])); | |
602 | return(0); | |
603 | } | |
604 | dval = ival; | |
605 | goto fe; | |
606 | /* Now we are accumulating a double in dval. */ | |
607 | fd: while ( isdigit(c) ) | |
608 | { dval = dval * 10 + (c - '0'); | |
609 | c = ngetc(sp); | |
610 | exp10--; | |
611 | } | |
612 | fsd: if ( sign < 0 ) dval = -dval; | |
613 | fe: /* dval contains the value, negated if necessary */ | |
614 | if ( c == 'e' || c == 'E' ) | |
615 | { /* Check for a following exponent. */ | |
616 | int esign = 0; | |
617 | long eexp; | |
618 | switch ( c = ngetc(sp) ) | |
619 | { | |
620 | case '+': break; | |
621 | case '-': esign = 1; break; | |
622 | default: nputback(sp); | |
623 | } | |
624 | code = scan_int(&sp, end, 10, 0, &eexp, NULL); | |
625 | if ( code < 0 ) return(code); | |
626 | if ( code > 0 || eexp > 999 ) | |
627 | return(e_limitcheck); /* semi-arbitrary */ | |
628 | if ( esign ) | |
629 | exp10 -= (int)eexp; | |
630 | else | |
631 | exp10 += (int)eexp; | |
632 | c = ngetc(sp); | |
633 | } | |
634 | if ( c != EOFC ) return(c == ERRC ? e_ioerror : e_syntaxerror); | |
635 | /* Compute dval * 10^exp10. */ | |
636 | if ( exp10 > 0 ) | |
637 | { while ( exp10 > num_powers_10 ) | |
638 | dval *= powers_10[num_powers_10], | |
639 | exp10 -= num_powers_10; | |
640 | if ( exp10 > 0 ) | |
641 | dval *= powers_10[exp10]; | |
642 | } | |
643 | else if ( exp10 < 0 ) | |
644 | { while ( exp10 < -num_powers_10 ) | |
645 | dval /= powers_10[num_powers_10], | |
646 | exp10 += num_powers_10; | |
647 | if ( exp10 < 0 ) | |
648 | dval /= powers_10[-exp10]; | |
649 | } | |
650 | make_real_new(pref, (float)dval); | |
651 | return(0); | |
652 | } | |
653 | /* Internal subroutine to scan an integer. */ | |
654 | /* Return 0, e_limitcheck, or e_syntaxerror. */ | |
655 | /* (The only syntax error is no digits encountered.) */ | |
656 | /* Put back the terminating character. */ | |
657 | /* If nosign is true, the integer is scanned as unsigned; */ | |
658 | /* overflowing a ulong returns e_limitcheck. If nosign is false, */ | |
659 | /* the integer is scanned as signed; if the integer won't fit in a long, */ | |
660 | /* then: */ | |
661 | /* if pdval == NULL, return e_limitcheck; */ | |
662 | /* if pdval != NULL, return 1 and store a double value in *pdval. */ | |
663 | private int | |
664 | scan_int(byte **pstr, byte *end, int radix, int nosign, | |
665 | long *pval, double *pdval) | |
666 | { register byte *sp = *pstr; | |
667 | uint ival = 0, imax, irem; | |
668 | #if arch_ints_are_short | |
669 | ulong lval, lmax; | |
670 | uint lrem; | |
671 | #else | |
672 | # define lval ival /* for overflowing into double */ | |
673 | #endif | |
674 | double dval; | |
675 | register int c, d; | |
676 | register byte _ds *decoder = scan_char_decoder; | |
677 | /* Avoid the long divisions when radix = 10 */ | |
678 | #define set_max(vmax, vrem, big)\ | |
679 | if ( radix == 10 ) vmax = (big) / 10, vrem = (big) % 10;\ | |
680 | else vmax = (big) / radix, vrem = (big) % radix | |
681 | set_max(imax, irem, max_uint); | |
682 | #define convert_digit_fails(c, d)\ | |
683 | (d = decoder[c]) >= radix | |
684 | while ( 1 ) | |
685 | { c = ngetc(sp); | |
686 | if ( convert_digit_fails(c, d) ) | |
687 | { if ( char_is_data(c) ) nputback(sp); | |
688 | if ( (int)ival < 0 && !nosign ) | |
689 | { d = ival % radix; | |
690 | ival /= radix; | |
691 | break; | |
692 | } | |
693 | *pval = ival; | |
694 | nreturn(0); | |
695 | } | |
696 | if ( ival >= imax && (ival > imax || d > irem) ) | |
697 | break; /* overflow */ | |
698 | ival = ival * radix + d; | |
699 | } | |
700 | #if arch_ints_are_short | |
701 | /* Short integer overflowed. Accumulate in a long. */ | |
702 | lval = (ulong)ival * radix + d; | |
703 | set_max(lmax, lrem, max_ulong); | |
704 | while ( 1 ) | |
705 | { c = ngetc(sp); | |
706 | if ( convert_digit_fails(c, d) ) | |
707 | { if ( char_is_data(c) ) nputback(sp); | |
708 | if ( (long)lval < 0 && !nosign ) | |
709 | { d = lval % radix; | |
710 | lval /= radix; | |
711 | break; | |
712 | } | |
713 | *pval = lval; | |
714 | nreturn(0); | |
715 | } | |
716 | if ( lval >= lmax && (lval > lmax || d > lrem) ) | |
717 | break; /* overflow */ | |
718 | lval = lval * radix + d; | |
719 | } | |
720 | #endif | |
721 | /* Integer overflowed. Accumulate the result as a double. */ | |
722 | if ( pdval == NULL ) nreturn(e_limitcheck); | |
723 | dval = (double)lval * radix + d; | |
724 | while ( 1 ) | |
725 | { c = ngetc(sp); | |
726 | if ( convert_digit_fails(c, d) ) | |
727 | { if ( char_is_data(c) ) nputback(sp); | |
728 | *pdval = dval; | |
729 | nreturn(1); | |
730 | } | |
731 | dval = dval * radix + d; | |
732 | } | |
733 | /* Control doesn't get here */ | |
734 | } | |
735 | ||
736 | /* Make a string. If the allocation fails, release any dynamic storage. */ | |
737 | private int | |
738 | mk_string(ref *pref, da_ptr pda, byte *next) | |
739 | { uint size = (pda->next = next) - pda->base; | |
740 | byte *body = alloc_shrink(pda->base, pda->num_elts, size, 1, "scanner(string)"); | |
741 | if ( body == 0 ) | |
742 | { dynamic_free(pda); | |
743 | return e_VMerror; | |
744 | } | |
745 | make_tasv_new(pref, t_string, a_all, size, bytes, body); | |
746 | return 0; | |
747 | } | |
748 | ||
749 | /* Internal procedure to scan a string. */ | |
750 | private int | |
751 | scan_string(register stream *s, int from_string, ref *pref) | |
752 | { dynamic_area da; | |
753 | register int c; | |
754 | register byte *ptr = dynamic_begin(&da, 100, 1); | |
755 | int plevel = 0; | |
756 | if ( ptr == 0 ) return e_VMerror; | |
757 | top: while ( 1 ) | |
758 | { switch ( (c = sgetc(s)) ) | |
759 | { | |
760 | case EOFC: | |
761 | dynamic_free(&da); | |
762 | return e_syntaxerror; | |
763 | case ERRC: | |
764 | dynamic_free(&da); | |
765 | return e_ioerror; | |
766 | case '\\': | |
767 | if ( from_string ) break; | |
768 | switch ( (c = sgetc(s)) ) | |
769 | { | |
770 | case 'n': c = '\n'; break; | |
771 | case 'r': c = '\r'; break; | |
772 | case 't': c = '\t'; break; | |
773 | case 'b': c = '\b'; break; | |
774 | case 'f': c = '\f'; break; | |
775 | case '\r': /* ignore, check for following \n */ | |
776 | c = sgetc(s); | |
777 | if ( c != '\n' && char_is_data(c) ) | |
778 | sputback(s); | |
779 | goto top; | |
780 | case '\n': goto top; /* ignore */ | |
781 | case '0': case '1': case '2': case '3': | |
782 | case '4': case '5': case '6': case '7': | |
783 | { int d = sgetc(s); | |
784 | c -= '0'; | |
785 | if ( d >= '0' && d <= '7' ) | |
786 | { c = (c << 3) + d - '0'; | |
787 | d = sgetc(s); | |
788 | if ( d >= '0' && d <= '7' ) | |
789 | { c = (c << 3) + d - '0'; | |
790 | break; | |
791 | } | |
792 | } | |
793 | if ( char_is_signal(d) ) | |
794 | { dynamic_free(&da); | |
795 | return (d == ERRC ? e_ioerror : e_syntaxerror); | |
796 | } | |
797 | sputback(s); | |
798 | } | |
799 | break; | |
800 | default: ; /* ignore the \ */ | |
801 | } | |
802 | break; | |
803 | case '(': | |
804 | plevel++; break; | |
805 | case ')': | |
806 | if ( --plevel < 0 ) goto out; break; | |
807 | case '\r': /* convert to \n */ | |
808 | c = sgetc(s); | |
809 | if ( c != '\n' && char_is_data(c) ) | |
810 | sputback(s); | |
811 | c = '\n'; | |
812 | } | |
813 | if ( ptr == da.limit ) | |
814 | { ptr = dynamic_grow(&da, ptr); | |
815 | if ( !ptr ) return e_VMerror; | |
816 | } | |
817 | *ptr++ = c; | |
818 | } | |
819 | out: return mk_string(pref, &da, ptr); | |
820 | } | |
821 | ||
822 | /* Internal procedure to scan a hex string. */ | |
823 | private int | |
824 | scan_hex_string(stream *s, ref *pref) | |
825 | { dynamic_area da; | |
826 | int c1, c2, val1, val2; | |
827 | byte *ptr = dynamic_begin(&da, 100, 1); | |
828 | register byte _ds *decoder = scan_char_decoder; | |
829 | if ( ptr == 0 ) return e_VMerror; | |
830 | l1: do | |
831 | { c1 = sgetc(s); | |
832 | if ( (val1 = decoder[c1]) < 0x10 ) | |
833 | { do | |
834 | { c2 = sgetc(s); | |
835 | if ( (val2 = decoder[c2]) < 0x10 ) | |
836 | { if ( ptr == da.limit ) | |
837 | { ptr = dynamic_grow(&da, ptr); | |
838 | if ( !ptr ) return e_VMerror; | |
839 | } | |
840 | *ptr++ = (val1 << 4) + val2; | |
841 | goto l1; | |
842 | } | |
843 | } | |
844 | while ( val2 == ctype_space ); | |
845 | if ( c2 != '>' ) | |
846 | { dynamic_free(&da); | |
847 | return e_syntaxerror; | |
848 | } | |
849 | if ( ptr == da.limit ) | |
850 | { ptr = dynamic_grow(&da, ptr); | |
851 | if ( !ptr ) return e_VMerror; | |
852 | } | |
853 | *ptr++ = val1 << 4; /* no 2nd char */ | |
854 | goto lx; | |
855 | } | |
856 | } | |
857 | while ( val1 == ctype_space ); | |
858 | if ( c1 != '>' ) | |
859 | { dynamic_free(&da); | |
860 | return e_syntaxerror; | |
861 | } | |
862 | lx: return mk_string(pref, &da, ptr); | |
863 | } |