This commit was manufactured by cvs2svn to create tag 'FreeBSD-release/1.0'.
[unix-history] / lib / libc / stdio / vfscanf.c
CommitLineData
15637ed4
RG
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#if defined(LIBC_SCCS) && !defined(lint)
78ed81a3 38static char sccsid[] = "@(#)vfscanf.c 5.7 (Berkeley) 12/14/92";
15637ed4
RG
39#endif /* LIBC_SCCS and not lint */
40
41#include <stdio.h>
42#include <stdlib.h>
43#include <ctype.h>
44#if __STDC__
45#include <stdarg.h>
46#else
47#include <varargs.h>
48#endif
49#include "local.h"
50
51#define FLOATING_POINT
52
15637ed4 53#include "floatio.h"
78ed81a3 54#define BUF 513 /* Maximum length of numeric string. */
15637ed4
RG
55
56/*
57 * Flags used during conversion.
58 */
59#define LONG 0x01 /* l: long or double */
60#define LONGDBL 0x02 /* L: long double; unimplemented */
61#define SHORT 0x04 /* h: short */
62#define SUPPRESS 0x08 /* suppress assignment */
63#define POINTER 0x10 /* weird %p pointer (`fake hex') */
64#define NOSKIP 0x20 /* do not skip blanks */
65
66/*
67 * The following are used in numeric conversions only:
68 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
69 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
70 */
71#define SIGNOK 0x40 /* +/- is (still) legal */
72#define NDIGITS 0x80 /* no digits detected */
73
74#define DPTOK 0x100 /* (float) decimal point is still legal */
75#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
76
77#define PFXOK 0x100 /* 0x prefix is (still) legal */
78#define NZDIGITS 0x200 /* no zero digits detected */
79
80/*
81 * Conversion types.
82 */
83#define CT_CHAR 0 /* %c conversion */
84#define CT_CCL 1 /* %[...] conversion */
85#define CT_STRING 2 /* %s conversion */
86#define CT_INT 3 /* integer, i.e., strtol or strtoul */
87#define CT_FLOAT 4 /* floating, i.e., strtod */
88
89#define u_char unsigned char
90#define u_long unsigned long
91
92static u_char *__sccl();
93
94/*
95 * vfscanf
96 */
97__svfscanf(fp, fmt0, ap)
98 register FILE *fp;
99 char const *fmt0;
100 va_list ap;
101{
102 register u_char *fmt = (u_char *)fmt0;
103 register int c; /* character from format, or conversion */
104 register size_t width; /* field width, or 0 */
105 register char *p; /* points into all kinds of strings */
106 register int n; /* handy integer */
107 register int flags; /* flags as defined above */
108 register char *p0; /* saves original value of p when necessary */
109 int nassigned; /* number of fields assigned */
110 int nread; /* number of characters consumed from fp */
111 int base; /* base argument to strtol/strtoul */
112 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */
113 char ccltab[256]; /* character class table for %[...] */
114 char buf[BUF]; /* buffer for numeric conversions */
115
116 /* `basefix' is used to avoid `if' tests in the integer scanner */
117 static short basefix[17] =
118 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
119
120 nassigned = 0;
121 nread = 0;
122 base = 0; /* XXX just to keep gcc happy */
123 ccfn = NULL; /* XXX just to keep gcc happy */
124 for (;;) {
125 c = *fmt++;
126 if (c == 0)
127 return (nassigned);
128 if (isspace(c)) {
129 for (;;) {
130 if (fp->_r <= 0 && __srefill(fp))
131 return (nassigned);
132 if (!isspace(*fp->_p))
133 break;
134 nread++, fp->_r--, fp->_p++;
135 }
136 continue;
137 }
138 if (c != '%')
139 goto literal;
140 width = 0;
141 flags = 0;
142 /*
143 * switch on the format. continue if done;
144 * break once format type is derived.
145 */
146again: c = *fmt++;
147 switch (c) {
148 case '%':
149literal:
150 if (fp->_r <= 0 && __srefill(fp))
151 goto input_failure;
152 if (*fp->_p != c)
153 goto match_failure;
154 fp->_r--, fp->_p++;
155 nread++;
156 continue;
157
158 case '*':
159 flags |= SUPPRESS;
160 goto again;
161 case 'l':
162 flags |= LONG;
163 goto again;
164 case 'L':
165 flags |= LONGDBL;
166 goto again;
167 case 'h':
168 flags |= SHORT;
169 goto again;
170
171 case '0': case '1': case '2': case '3': case '4':
172 case '5': case '6': case '7': case '8': case '9':
173 width = width * 10 + c - '0';
174 goto again;
175
176 /*
177 * Conversions.
178 * Those marked `compat' are for 4.[123]BSD compatibility.
179 *
180 * (According to ANSI, E and X formats are supposed
181 * to the same as e and x. Sorry about that.)
182 */
183 case 'D': /* compat */
184 flags |= LONG;
185 /* FALLTHROUGH */
186 case 'd':
187 c = CT_INT;
188 ccfn = (u_long (*)())strtol;
189 base = 10;
190 break;
191
192 case 'i':
193 c = CT_INT;
194 ccfn = (u_long (*)())strtol;
195 base = 0;
196 break;
197
198 case 'O': /* compat */
199 flags |= LONG;
200 /* FALLTHROUGH */
201 case 'o':
202 c = CT_INT;
203 ccfn = strtoul;
204 base = 8;
205 break;
206
207 case 'u':
208 c = CT_INT;
209 ccfn = strtoul;
210 base = 10;
211 break;
212
213 case 'X': /* compat XXX */
214 flags |= LONG;
215 /* FALLTHROUGH */
216 case 'x':
217 flags |= PFXOK; /* enable 0x prefixing */
218 c = CT_INT;
219 ccfn = strtoul;
220 base = 16;
221 break;
222
223#ifdef FLOATING_POINT
224 case 'E': /* compat XXX */
225 case 'F': /* compat */
226 flags |= LONG;
227 /* FALLTHROUGH */
228 case 'e': case 'f': case 'g':
229 c = CT_FLOAT;
230 break;
231#endif
232
233 case 's':
234 c = CT_STRING;
235 break;
236
237 case '[':
238 fmt = __sccl(ccltab, fmt);
239 flags |= NOSKIP;
240 c = CT_CCL;
241 break;
242
243 case 'c':
244 flags |= NOSKIP;
245 c = CT_CHAR;
246 break;
247
248 case 'p': /* pointer format is like hex */
249 flags |= POINTER | PFXOK;
250 c = CT_INT;
251 ccfn = strtoul;
252 base = 16;
253 break;
254
255 case 'n':
256 if (flags & SUPPRESS) /* ??? */
257 continue;
258 if (flags & SHORT)
259 *va_arg(ap, short *) = nread;
260 else if (flags & LONG)
261 *va_arg(ap, long *) = nread;
262 else
263 *va_arg(ap, int *) = nread;
264 continue;
265
266 /*
267 * Disgusting backwards compatibility hacks. XXX
268 */
269 case '\0': /* compat */
270 return (EOF);
271
272 default: /* compat */
273 if (isupper(c))
274 flags |= LONG;
275 c = CT_INT;
276 ccfn = (u_long (*)())strtol;
277 base = 10;
278 break;
279 }
280
281 /*
282 * We have a conversion that requires input.
283 */
284 if (fp->_r <= 0 && __srefill(fp))
285 goto input_failure;
286
287 /*
288 * Consume leading white space, except for formats
289 * that suppress this.
290 */
291 if ((flags & NOSKIP) == 0) {
292 while (isspace(*fp->_p)) {
293 nread++;
294 if (--fp->_r > 0)
295 fp->_p++;
296 else if (__srefill(fp))
297 goto input_failure;
298 }
299 /*
300 * Note that there is at least one character in
301 * the buffer, so conversions that do not set NOSKIP
302 * ca no longer result in an input failure.
303 */
304 }
305
306 /*
307 * Do the conversion.
308 */
309 switch (c) {
310
311 case CT_CHAR:
312 /* scan arbitrary characters (sets NOSKIP) */
313 if (width == 0)
314 width = 1;
315 if (flags & SUPPRESS) {
316 size_t sum = 0;
317 for (;;) {
318 if ((n = fp->_r) < width) {
319 sum += n;
320 width -= n;
321 fp->_p += n;
322 if (__srefill(fp)) {
323 if (sum == 0)
324 goto input_failure;
325 break;
326 }
327 } else {
328 sum += width;
329 fp->_r -= width;
330 fp->_p += width;
331 break;
332 }
333 }
334 nread += sum;
335 } else {
336 size_t r = fread((void *)va_arg(ap, char *), 1,
337 width, fp);
338
339 if (r == 0)
340 goto input_failure;
341 nread += r;
342 nassigned++;
343 }
344 break;
345
346 case CT_CCL:
347 /* scan a (nonempty) character class (sets NOSKIP) */
348 if (width == 0)
349 width = ~0; /* `infinity' */
350 /* take only those things in the class */
351 if (flags & SUPPRESS) {
352 n = 0;
353 while (ccltab[*fp->_p]) {
354 n++, fp->_r--, fp->_p++;
355 if (--width == 0)
356 break;
357 if (fp->_r <= 0 && __srefill(fp)) {
358 if (n == 0)
359 goto input_failure;
360 break;
361 }
362 }
363 if (n == 0)
364 goto match_failure;
365 } else {
366 p0 = p = va_arg(ap, char *);
367 while (ccltab[*fp->_p]) {
368 fp->_r--;
369 *p++ = *fp->_p++;
370 if (--width == 0)
371 break;
372 if (fp->_r <= 0 && __srefill(fp)) {
373 if (p == p0)
374 goto input_failure;
375 break;
376 }
377 }
378 n = p - p0;
379 if (n == 0)
380 goto match_failure;
381 *p = 0;
382 nassigned++;
383 }
384 nread += n;
385 break;
386
387 case CT_STRING:
388 /* like CCL, but zero-length string OK, & no NOSKIP */
389 if (width == 0)
390 width = ~0;
391 if (flags & SUPPRESS) {
392 n = 0;
393 while (!isspace(*fp->_p)) {
394 n++, fp->_r--, fp->_p++;
395 if (--width == 0)
396 break;
397 if (fp->_r <= 0 && __srefill(fp))
398 break;
399 }
400 nread += n;
401 } else {
402 p0 = p = va_arg(ap, char *);
403 while (!isspace(*fp->_p)) {
404 fp->_r--;
405 *p++ = *fp->_p++;
406 if (--width == 0)
407 break;
408 if (fp->_r <= 0 && __srefill(fp))
409 break;
410 }
411 *p = 0;
412 nread += p - p0;
413 nassigned++;
414 }
415 continue;
416
417 case CT_INT:
418 /* scan an integer as if by strtol/strtoul */
419#ifdef hardway
420 if (width == 0 || width > sizeof(buf) - 1)
421 width = sizeof(buf) - 1;
422#else
423 /* size_t is unsigned, hence this optimisation */
424 if (--width > sizeof(buf) - 2)
425 width = sizeof(buf) - 2;
426 width++;
427#endif
428 flags |= SIGNOK | NDIGITS | NZDIGITS;
429 for (p = buf; width; width--) {
430 c = *fp->_p;
431 /*
432 * Switch on the character; `goto ok'
433 * if we accept it as a part of number.
434 */
435 switch (c) {
436
437 /*
438 * The digit 0 is always legal, but is
439 * special. For %i conversions, if no
440 * digits (zero or nonzero) have been
441 * scanned (only signs), we will have
442 * base==0. In that case, we should set
443 * it to 8 and enable 0x prefixing.
444 * Also, if we have not scanned zero digits
445 * before this, do not turn off prefixing
446 * (someone else will turn it off if we
447 * have scanned any nonzero digits).
448 */
449 case '0':
450 if (base == 0) {
451 base = 8;
452 flags |= PFXOK;
453 }
454 if (flags & NZDIGITS)
455 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
456 else
457 flags &= ~(SIGNOK|PFXOK|NDIGITS);
458 goto ok;
459
460 /* 1 through 7 always legal */
461 case '1': case '2': case '3':
462 case '4': case '5': case '6': case '7':
463 base = basefix[base];
464 flags &= ~(SIGNOK | PFXOK | NDIGITS);
465 goto ok;
466
467 /* digits 8 and 9 ok iff decimal or hex */
468 case '8': case '9':
469 base = basefix[base];
470 if (base <= 8)
471 break; /* not legal here */
472 flags &= ~(SIGNOK | PFXOK | NDIGITS);
473 goto ok;
474
475 /* letters ok iff hex */
476 case 'A': case 'B': case 'C':
477 case 'D': case 'E': case 'F':
478 case 'a': case 'b': case 'c':
479 case 'd': case 'e': case 'f':
480 /* no need to fix base here */
481 if (base <= 10)
482 break; /* not legal here */
483 flags &= ~(SIGNOK | PFXOK | NDIGITS);
484 goto ok;
485
486 /* sign ok only as first character */
487 case '+': case '-':
488 if (flags & SIGNOK) {
489 flags &= ~SIGNOK;
490 goto ok;
491 }
492 break;
493
494 /* x ok iff flag still set & 2nd char */
495 case 'x': case 'X':
496 if (flags & PFXOK && p == buf + 1) {
497 base = 16; /* if %i */
498 flags &= ~PFXOK;
499 goto ok;
500 }
501 break;
502 }
503
504 /*
505 * If we got here, c is not a legal character
506 * for a number. Stop accumulating digits.
507 */
508 break;
509 ok:
510 /*
511 * c is legal: store it and look at the next.
512 */
513 *p++ = c;
514 if (--fp->_r > 0)
515 fp->_p++;
516 else if (__srefill(fp))
517 break; /* EOF */
518 }
519 /*
520 * If we had only a sign, it is no good; push
521 * back the sign. If the number ends in `x',
522 * it was [sign] '0' 'x', so push back the x
523 * and treat it as [sign] '0'.
524 */
525 if (flags & NDIGITS) {
526 if (p > buf)
527 (void) ungetc(*(u_char *)--p, fp);
528 goto match_failure;
529 }
530 c = ((u_char *)p)[-1];
531 if (c == 'x' || c == 'X') {
532 --p;
533 (void) ungetc(c, fp);
534 }
535 if ((flags & SUPPRESS) == 0) {
536 u_long res;
537
538 *p = 0;
539 res = (*ccfn)(buf, (char **)NULL, base);
540 if (flags & POINTER)
541 *va_arg(ap, void **) = (void *)res;
542 else if (flags & SHORT)
543 *va_arg(ap, short *) = res;
544 else if (flags & LONG)
545 *va_arg(ap, long *) = res;
546 else
547 *va_arg(ap, int *) = res;
548 nassigned++;
549 }
550 nread += p - buf;
551 break;
552
553#ifdef FLOATING_POINT
554 case CT_FLOAT:
555 /* scan a floating point number as if by strtod */
556#ifdef hardway
557 if (width == 0 || width > sizeof(buf) - 1)
558 width = sizeof(buf) - 1;
559#else
560 /* size_t is unsigned, hence this optimisation */
561 if (--width > sizeof(buf) - 2)
562 width = sizeof(buf) - 2;
563 width++;
564#endif
565 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
566 for (p = buf; width; width--) {
567 c = *fp->_p;
568 /*
569 * This code mimicks the integer conversion
570 * code, but is much simpler.
571 */
572 switch (c) {
573
574 case '0': case '1': case '2': case '3':
575 case '4': case '5': case '6': case '7':
576 case '8': case '9':
577 flags &= ~(SIGNOK | NDIGITS);
578 goto fok;
579
580 case '+': case '-':
581 if (flags & SIGNOK) {
582 flags &= ~SIGNOK;
583 goto fok;
584 }
585 break;
586 case '.':
587 if (flags & DPTOK) {
588 flags &= ~(SIGNOK | DPTOK);
589 goto fok;
590 }
591 break;
592 case 'e': case 'E':
593 /* no exponent without some digits */
594 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
595 flags =
596 (flags & ~(EXPOK|DPTOK)) |
597 SIGNOK | NDIGITS;
598 goto fok;
599 }
600 break;
601 }
602 break;
603 fok:
604 *p++ = c;
605 if (--fp->_r > 0)
606 fp->_p++;
607 else if (__srefill(fp))
608 break; /* EOF */
609 }
610 /*
611 * If no digits, might be missing exponent digits
612 * (just give back the exponent) or might be missing
613 * regular digits, but had sign and/or decimal point.
614 */
615 if (flags & NDIGITS) {
616 if (flags & EXPOK) {
617 /* no digits at all */
618 while (p > buf)
619 ungetc(*(u_char *)--p, fp);
620 goto match_failure;
621 }
622 /* just a bad exponent (e and maybe sign) */
623 c = *(u_char *)--p;
624 if (c != 'e' && c != 'E') {
625 (void) ungetc(c, fp);/* sign */
626 c = *(u_char *)--p;
627 }
628 (void) ungetc(c, fp);
629 }
630 if ((flags & SUPPRESS) == 0) {
631 double res;
632
633 *p = 0;
78ed81a3 634 res = strtod(buf,(char **) NULL);
15637ed4
RG
635 if (flags & LONG)
636 *va_arg(ap, double *) = res;
637 else
638 *va_arg(ap, float *) = res;
639 nassigned++;
640 }
641 nread += p - buf;
642 break;
643#endif /* FLOATING_POINT */
644 }
645 }
646input_failure:
647 return (nassigned ? nassigned : -1);
648match_failure:
649 return (nassigned);
650}
651
652/*
653 * Fill in the given table from the scanset at the given format
654 * (just after `['). Return a pointer to the character past the
655 * closing `]'. The table has a 1 wherever characters should be
656 * considered part of the scanset.
657 */
658static u_char *
659__sccl(tab, fmt)
660 register char *tab;
661 register u_char *fmt;
662{
663 register int c, n, v;
664
665 /* first `clear' the whole table */
666 c = *fmt++; /* first char hat => negated scanset */
667 if (c == '^') {
668 v = 1; /* default => accept */
669 c = *fmt++; /* get new first char */
670 } else
671 v = 0; /* default => reject */
672 /* should probably use memset here */
673 for (n = 0; n < 256; n++)
674 tab[n] = v;
675 if (c == 0)
676 return (fmt - 1);/* format ended before closing ] */
677
678 /*
679 * Now set the entries corresponding to the actual scanset
680 * to the opposite of the above.
681 *
682 * The first character may be ']' (or '-') without being special;
683 * the last character may be '-'.
684 */
685 v = 1 - v;
686 for (;;) {
687 tab[c] = v; /* take character c */
688doswitch:
689 n = *fmt++; /* and examine the next */
690 switch (n) {
691
692 case 0: /* format ended too soon */
693 return (fmt - 1);
694
695 case '-':
696 /*
697 * A scanset of the form
698 * [01+-]
699 * is defined as `the digit 0, the digit 1,
700 * the character +, the character -', but
701 * the effect of a scanset such as
702 * [a-zA-Z0-9]
703 * is implementation defined. The V7 Unix
704 * scanf treats `a-z' as `the letters a through
705 * z', but treats `a-a' as `the letter a, the
706 * character -, and the letter a'.
707 *
708 * For compatibility, the `-' is not considerd
709 * to define a range if the character following
710 * it is either a close bracket (required by ANSI)
711 * or is not numerically greater than the character
712 * we just stored in the table (c).
713 */
714 n = *fmt;
715 if (n == ']' || n < c) {
716 c = '-';
717 break; /* resume the for(;;) */
718 }
719 fmt++;
720 do { /* fill in the range */
721 tab[++c] = v;
722 } while (c < n);
723#if 1 /* XXX another disgusting compatibility hack */
724 /*
725 * Alas, the V7 Unix scanf also treats formats
726 * such as [a-c-e] as `the letters a through e'.
727 * This too is permitted by the standard....
728 */
729 goto doswitch;
730#else
731 c = *fmt++;
732 if (c == 0)
733 return (fmt - 1);
734 if (c == ']')
735 return (fmt);
736#endif
737 break;
738
739 case ']': /* end of scanset */
740 return (fmt);
741
742 default: /* just another character */
743 c = n;
744 break;
745 }
746 }
747 /* NOTREACHED */
748}