new stdio; renamed from doscan.c
[unix-history] / usr / src / lib / libc / stdio / vfscanf.c
CommitLineData
3686044d
KB
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * %sccs.include.redist.c%
9 */
10
2ce81398 11#if defined(LIBC_SCCS) && !defined(lint)
3686044d
KB
12static char sccsid[] = "@(#)vfscanf.c 5.3 (Berkeley) %G%";
13#endif /* LIBC_SCCS and not lint */
87bdee5f 14
3686044d 15#include <sys/stdc.h>
87bdee5f 16#include <stdio.h>
3686044d
KB
17#include <ctype.h>
18#include <stdlib.h>
19#if __STDC__
20#include <stdarg.h>
21#else
22#include <varargs.h>
23#endif
24#include "local.h"
25
26#define FLOATING_POINT
27
28#ifdef FLOATING_POINT
29#include "floatio.h"
30#define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */
31#else
32#define BUF 40
33#endif
34
35/*
36 * Flags used during conversion.
37 */
38#define LONG 0x01 /* l: long or double */
39#define LONGDBL 0x02 /* L: long double; unimplemented */
40#define SHORT 0x04 /* h: short */
41#define SUPPRESS 0x08 /* suppress assignment */
42#define POINTER 0x10 /* weird %p pointer (`fake hex') */
43#define NOSKIP 0x20 /* do not skip blanks */
44
45/*
46 * The following are used in numeric conversions only:
47 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
48 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
49 */
50#define SIGNOK 0x40 /* +/- is (still) legal */
51#define NDIGITS 0x80 /* no digits detected */
52
53#define DPTOK 0x100 /* (float) decimal point is still legal */
54#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
55
56#define PFXOK 0x100 /* 0x prefix is (still) legal */
57#define NZDIGITS 0x200 /* no zero digits detected */
58
59/*
60 * Conversion types.
61 */
62#define CT_CHAR 0 /* %c conversion */
63#define CT_CCL 1 /* %[...] conversion */
64#define CT_STRING 2 /* %s conversion */
65#define CT_INT 3 /* integer, i.e., strtol or strtoul */
66#define CT_FLOAT 4 /* floating, i.e., strtod */
67
68#define u_char unsigned char
69#define u_long unsigned long
70
71static u_char *__sccl();
72
73/*
74 * vfscanf
75 */
76__svfscanf(fp, fmt0, ap)
77 register FILE *fp;
78 char const *fmt0;
79 va_list ap;
87bdee5f 80{
3686044d
KB
81 register u_char *fmt = (u_char *)fmt0;
82 register int c; /* character from format, or conversion */
83 register size_t width; /* field width, or 0 */
84 register char *p; /* points into all kinds of strings */
85 register int n; /* handy integer */
86 register int flags; /* flags as defined above */
87 register char *p0; /* saves original value of p when necessary */
88 int nassigned; /* number of fields assigned */
89 int nread; /* number of characters consumed from fp */
90 int base; /* base argument to strtol/strtoul */
91 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */
92 char ccltab[256]; /* character class table for %[...] */
93 char buf[BUF]; /* buffer for numeric conversions */
94
95 /* `basefix' is used to avoid `if' tests in the integer scanner */
96 static short basefix[17] =
97 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
98
99 nassigned = 0;
100 nread = 0;
101 for (;;) {
102 c = *fmt++;
103 if (c == 0)
104 return (nassigned);
105 if (isspace(c)) {
106 for (;;) {
107 if (fp->_r <= 0 && __srefill(fp))
108 return (nassigned);
109 if (!isspace(*fp->_p))
110 break;
111 nread++, fp->_r--, fp->_p++;
112 }
113 continue;
87bdee5f 114 }
3686044d
KB
115 if (c != '%')
116 goto literal;
117 width = 0;
118 flags = 0;
119 /*
120 * switch on the format. continue if done;
121 * break once format type is derived.
122 */
123again: c = *fmt++;
124 switch (c) {
125 case '%':
126literal:
127 if (fp->_r <= 0 && __srefill(fp))
128 goto input_failure;
129 if (*fp->_p != c)
130 goto match_failure;
131 fp->_r--, fp->_p++;
132 nread++;
133 continue;
87bdee5f 134
3686044d
KB
135 case '*':
136 flags |= SUPPRESS;
137 goto again;
138 case 'l':
139 flags |= LONG;
140 goto again;
141 case 'L':
142 flags |= LONGDBL;
143 goto again;
144 case 'h':
145 flags |= SHORT;
146 goto again;
147
148 case '0': case '1': case '2': case '3': case '4':
149 case '5': case '6': case '7': case '8': case '9':
150 width = width * 10 + c - '0';
151 goto again;
152
153 /*
154 * Conversions.
155 * Those marked `compat' are for 4.[123]BSD compatibility.
156 *
157 * (According to ANSI, E and X formats are supposed
158 * to the same as e and x. Sorry about that.)
159 */
160 case 'D': /* compat */
161 flags |= LONG;
162 /* FALLTHROUGH */
163 case 'd':
164 c = CT_INT;
165 ccfn = (u_long (*)())strtol;
166 base = 10;
167 break;
168
169 case 'i':
170 c = CT_INT;
171 ccfn = (u_long (*)())strtol;
172 base = 0;
173 break;
174
175 case 'O': /* compat */
176 flags |= LONG;
177 /* FALLTHROUGH */
178 case 'o':
179 c = CT_INT;
180 ccfn = strtoul;
181 base = 8;
182 break;
183
184 case 'u':
185 c = CT_INT;
186 ccfn = strtoul;
187 base = 10;
188 break;
189
190 case 'X': /* compat XXX */
191 flags |= LONG;
192 /* FALLTHROUGH */
193 case 'x':
194 flags |= PFXOK; /* enable 0x prefixing */
195 c = CT_INT;
196 ccfn = strtoul;
197 base = 16;
198 break;
199
200#ifdef FLOATING_POINT
201 case 'E': /* compat XXX */
202 case 'F': /* compat */
203 flags |= LONG;
204 /* FALLTHROUGH */
205 case 'e': case 'f': case 'g':
206 c = CT_FLOAT;
207 break;
208#endif
209
210 case 's':
211 c = CT_STRING;
212 break;
213
214 case '[':
215 fmt = __sccl(ccltab, fmt);
216 flags |= NOSKIP;
217 c = CT_CCL;
218 break;
219
220 case 'c':
221 flags |= NOSKIP;
222 c = CT_CHAR;
223 break;
224
225 case 'p': /* pointer format is like hex */
226 flags |= POINTER | PFXOK;
227 c = CT_INT;
228 ccfn = strtoul;
229 base = 16;
230 break;
231
232 case 'n':
233 if (flags & SUPPRESS) /* ??? */
234 continue;
235 if (flags & SHORT)
236 *va_arg(ap, short *) = nread;
237 else if (flags & LONG)
238 *va_arg(ap, long *) = nread;
87bdee5f 239 else
3686044d 240 *va_arg(ap, int *) = nread;
87bdee5f 241 continue;
3686044d
KB
242
243 /*
244 * Disgusting backwards compatibility hacks. XXX
245 */
246 case '\0': /* compat */
247 return (EOF);
248
249 default: /* compat */
250 if (isupper(c))
251 flags |= LONG;
252 c = CT_INT;
253 ccfn = (u_long (*)())strtol;
254 base = 10;
255 break;
256 }
257
258 /*
259 * We have a conversion that requires input.
260 */
261 if (fp->_r <= 0 && __srefill(fp))
262 goto input_failure;
263
264 /*
265 * Consume leading white space, except for formats
266 * that suppress this.
267 */
268 if ((flags & NOSKIP) == 0) {
269 while (isspace(*fp->_p)) {
270 nread++;
271 if (--fp->_r > 0)
272 fp->_p++;
273 else if (__srefill(fp))
274 goto input_failure;
275 }
276 /*
277 * Note that there is at least one character in
278 * the buffer, so conversions that do not set NOSKIP
279 * ca no longer result in an input failure.
280 */
281 }
282
283 /*
284 * Do the conversion.
285 */
286 switch (c) {
287
288 case CT_CHAR:
289 /* scan arbitrary characters (sets NOSKIP) */
290 if (width == 0)
291 width = 1;
292 if (flags & SUPPRESS) {
293 size_t sum = 0;
294 for (;;) {
295 if ((n = fp->_r) < width) {
296 sum += n;
297 width -= n;
298 fp->_p += n;
299 if (__srefill(fp)) {
300 if (sum == 0)
301 goto input_failure;
302 break;
303 }
304 } else {
305 sum += width;
306 fp->_r -= width;
307 fp->_p += width;
308 break;
309 }
310 }
311 nread += sum;
312 } else {
313 size_t r = fread((void *)va_arg(ap, char *), 1,
314 width, fp);
315
316 if (r == 0)
317 goto input_failure;
318 nread += r;
319 nassigned++;
320 }
321 break;
322
323 case CT_CCL:
324 /* scan a (nonempty) character class (sets NOSKIP) */
325 if (width == 0)
326 width = ~0; /* `infinity' */
327 /* take only those things in the class */
328 if (flags & SUPPRESS) {
329 n = 0;
330 while (ccltab[*fp->_p]) {
331 n++, fp->_r--, fp->_p++;
332 if (--width == 0)
333 break;
334 if (fp->_r <= 0 && __srefill(fp)) {
335 if (n == 0)
336 goto input_failure;
337 break;
338 }
339 }
340 if (n == 0)
341 goto match_failure;
342 } else {
343 p0 = p = va_arg(ap, char *);
344 while (ccltab[*fp->_p]) {
345 fp->_r--;
346 *p++ = *fp->_p++;
347 if (--width == 0)
348 break;
349 if (fp->_r <= 0 && __srefill(fp)) {
350 if (p == p0)
351 goto input_failure;
352 break;
353 }
354 }
355 n = p - p0;
356 if (n == 0)
357 goto match_failure;
358 *p = 0;
359 nassigned++;
360 }
361 nread += n;
362 break;
363
364 case CT_STRING:
365 /* like CCL, but zero-length string OK, & no NOSKIP */
366 if (width == 0)
367 width = ~0;
368 if (flags & SUPPRESS) {
369 n = 0;
370 while (!isspace(*fp->_p)) {
371 n++, fp->_r--, fp->_p++;
372 if (--width == 0)
373 break;
374 if (fp->_r <= 0 && __srefill(fp))
375 break;
376 }
377 nread += n;
378 } else {
379 p0 = p = va_arg(ap, char *);
380 while (!isspace(*fp->_p)) {
381 fp->_r--;
382 *p++ = *fp->_p++;
383 if (--width == 0)
384 break;
385 if (fp->_r <= 0 && __srefill(fp))
386 break;
387 }
388 *p = 0;
389 nread += p - p0;
390 nassigned++;
391 }
87bdee5f 392 continue;
3686044d
KB
393
394 case CT_INT:
395 /* scan an integer as if by strtol/strtoul */
396#ifdef hardway
397 if (width == 0 || width > sizeof(buf) - 1)
398 width = sizeof(buf) - 1;
399#else
400 /* size_t is unsigned, hence this optimisation */
401 if (--width > sizeof(buf) - 2)
402 width = sizeof(buf) - 2;
403 width++;
404#endif
405 flags |= SIGNOK | NDIGITS | NZDIGITS;
406 for (p = buf; width; width--) {
407 c = *fp->_p;
408 /*
409 * Switch on the character; `goto ok'
410 * if we accept it as a part of number.
411 */
412 switch (c) {
413
414 /*
415 * The digit 0 is always legal, but is
416 * special. For %i conversions, if no
417 * digits (zero or nonzero) have been
418 * scanned (only signs), we will have
419 * base==0. In that case, we should set
420 * it to 8 and enable 0x prefixing.
421 * Also, if we have not scanned zero digits
422 * before this, do not turn off prefixing
423 * (someone else will turn it off if we
424 * have scanned any nonzero digits).
425 */
426 case '0':
427 if (base == 0) {
428 base = 8;
429 flags |= PFXOK;
430 }
431 if (flags & NZDIGITS)
432 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
433 else
434 flags &= ~(SIGNOK|PFXOK|NDIGITS);
435 goto ok;
436
437 /* 1 through 7 always legal */
438 case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 base = basefix[base];
441 flags &= ~(SIGNOK | PFXOK | NDIGITS);
442 goto ok;
443
444 /* digits 8 and 9 ok iff decimal or hex */
445 case '8': case '9':
446 base = basefix[base];
447 if (base <= 8)
448 break; /* not legal here */
449 flags &= ~(SIGNOK | PFXOK | NDIGITS);
450 goto ok;
451
452 /* letters ok iff hex */
453 case 'A': case 'B': case 'C':
454 case 'D': case 'E': case 'F':
455 case 'a': case 'b': case 'c':
456 case 'd': case 'e': case 'f':
457 /* no need to fix base here */
458 if (base <= 10)
459 break; /* not legal here */
460 flags &= ~(SIGNOK | PFXOK | NDIGITS);
461 goto ok;
462
463 /* sign ok only as first character */
464 case '+': case '-':
465 if (flags & SIGNOK) {
466 flags &= ~SIGNOK;
467 goto ok;
468 }
469 break;
470
471 /* x ok iff flag still set & 2nd char */
472 case 'x': case 'X':
473 if (flags & PFXOK && p == buf + 1) {
474 base = 16; /* if %i */
475 flags &= ~PFXOK;
476 goto ok;
477 }
478 break;
479 }
480
481 /*
482 * If we got here, c is not a legal character
483 * for a number. Stop accumulating digits.
484 */
87bdee5f 485 break;
3686044d
KB
486 ok:
487 /*
488 * c is legal: store it and look at the next.
489 */
490 *p++ = c;
491 if (--fp->_r > 0)
492 fp->_p++;
493 else if (__srefill(fp))
494 break; /* EOF */
495 }
496 /*
497 * If we had only a sign, it is no good; push
498 * back the sign. If the number ends in `x',
499 * it was [sign] '0' 'x', so push back the x
500 * and treat it as [sign] '0'.
501 */
502 if (flags & NDIGITS) {
503 if (p > buf)
504 (void) ungetc(*(u_char *)--p, fp);
505 goto match_failure;
506 }
507 c = ((u_char *)p)[-1];
508 if (c == 'x' || c == 'X') {
509 --p;
510 (void) ungetc(c, fp);
511 }
512 if ((flags & SUPPRESS) == 0) {
513 u_long res;
514
515 *p = 0;
516 res = (*ccfn)(buf, (char **)NULL, base);
517 if (flags & POINTER)
518 *va_arg(ap, void **) = (void *)res;
519 else if (flags & SHORT)
520 *va_arg(ap, short *) = res;
521 else if (flags & LONG)
522 *va_arg(ap, long *) = res;
523 else
524 *va_arg(ap, int *) = res;
525 nassigned++;
526 }
527 nread += p - buf;
528 break;
529
530#ifdef FLOATING_POINT
531 case CT_FLOAT:
532 /* scan a floating point number as if by strtod */
533#ifdef hardway
534 if (width == 0 || width > sizeof(buf) - 1)
535 width = sizeof(buf) - 1;
536#else
537 /* size_t is unsigned, hence this optimisation */
538 if (--width > sizeof(buf) - 2)
539 width = sizeof(buf) - 2;
540 width++;
541#endif
542 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
543 for (p = buf; width; width--) {
544 c = *fp->_p;
545 /*
546 * This code mimicks the integer conversion
547 * code, but is much simpler.
548 */
549 switch (c) {
550
551 case '0': case '1': case '2': case '3':
552 case '4': case '5': case '6': case '7':
553 case '8': case '9':
554 flags &= ~(SIGNOK | NDIGITS);
555 goto fok;
556
557 case '+': case '-':
558 if (flags & SIGNOK) {
559 flags &= ~SIGNOK;
560 goto fok;
561 }
562 break;
563 case '.':
564 if (flags & DPTOK) {
565 flags &= ~(SIGNOK | DPTOK);
566 goto fok;
567 }
568 break;
569 case 'e': case 'E':
570 /* no exponent without some digits */
571 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
572 flags =
573 (flags & ~(EXPOK|DPTOK)) |
574 SIGNOK | NDIGITS;
575 goto fok;
576 }
577 break;
578 }
87bdee5f 579 break;
3686044d
KB
580 fok:
581 *p++ = c;
582 if (--fp->_r > 0)
583 fp->_p++;
584 else if (__srefill(fp))
585 break; /* EOF */
586 }
587 /*
588 * If no digits, might be missing exponent digits
589 * (just give back the exponent) or might be missing
590 * regular digits, but had sign and/or decimal point.
591 */
592 if (flags & NDIGITS) {
593 if (flags & EXPOK) {
594 /* no digits at all */
595 while (p > buf)
596 ungetc(*(u_char *)--p, fp);
597 goto match_failure;
598 }
599 /* just a bad exponent (e and maybe sign) */
600 c = *(u_char *)--p;
601 if (c != 'e' && c != 'E') {
602 (void) ungetc(c, fp);/* sign */
603 c = *(u_char *)--p;
604 }
605 (void) ungetc(c, fp);
606 }
607 if ((flags & SUPPRESS) == 0) {
608 double res;
609
610 *p = 0;
611 res = atof(buf);
612 if (flags & LONG)
613 *va_arg(ap, double *) = res;
614 else
615 *va_arg(ap, float *) = res;
616 nassigned++;
617 }
618 nread += p - buf;
87bdee5f 619 break;
3686044d
KB
620#endif /* FLOATING_POINT */
621 }
87bdee5f 622 }
3686044d
KB
623input_failure:
624 return (nassigned ? nassigned : -1);
625match_failure:
626 return (nassigned);
87bdee5f
KM
627}
628
3686044d
KB
629/*
630 * Fill in the given table from the scanset at the given format
631 * (just after `['). Return a pointer to the character past the
632 * closing `]'. The table has a 1 wherever characters should be
633 * considered part of the scanset.
634 */
635static u_char *
636__sccl(tab, fmt)
637 register char *tab;
638 register u_char *fmt;
87bdee5f 639{
3686044d
KB
640 register int c, n, v;
641
642 /* first `clear' the whole table */
643 c = *fmt++; /* first char hat => negated scanset */
644 if (c == '^') {
645 v = 1; /* default => accept */
646 c = *fmt++; /* get new first char */
87bdee5f 647 } else
3686044d
KB
648 v = 0; /* default => reject */
649 /* should probably use memset here */
650 for (n = 0; n < 256; n++)
651 tab[n] = v;
652 if (c == 0)
653 return (fmt - 1);/* format ended before closing ] */
87bdee5f 654
3686044d
KB
655 /*
656 * Now set the entries corresponding to the actual scanset
657 * to the opposite of the above.
658 *
659 * The first character may be ']' (or '-') without being special;
660 * the last character may be '-'.
661 */
662 v = 1 - v;
663 for (;;) {
664 tab[c] = v; /* take character c */
665doswitch:
666 n = *fmt++; /* and examine the next */
667 switch (n) {
87bdee5f 668
3686044d
KB
669 case 0: /* format ended too soon */
670 return (fmt - 1);
671
672 case '-':
673 /*
674 * A scanset of the form
675 * [01+-]
676 * is defined as `the digit 0, the digit 1,
677 * the character +, the character -', but
678 * the effect of a scanset such as
679 * [a-zA-Z0-9]
680 * is implementation defined. The V7 Unix
681 * scanf treats `a-z' as `the letters a through
682 * z', but treats `a-a' as `the letter a, the
683 * character -, and the letter a'.
684 *
685 * For compatibility, the `-' is not considerd
686 * to define a range if the character following
687 * it is either a close bracket (required by ANSI)
688 * or is not numerically greater than the character
689 * we just stored in the table (c).
690 */
691 n = *fmt;
692 if (n == ']' || n < c) {
693 c = '-';
694 break; /* resume the for(;;) */
695 }
696 fmt++;
697 do { /* fill in the range */
698 tab[++c] = v;
699 } while (c < n);
700#if 1 /* XXX another disgusting compatibility hack */
701 /*
702 * Alas, the V7 Unix scanf also treats formats
703 * such as [a-c-e] as `the letters a through e'.
704 * This too is permitted by the standard....
705 */
706 goto doswitch;
707#else
708 c = *fmt++;
709 if (c == 0)
710 return (fmt - 1);
711 if (c == ']')
712 return (fmt);
713#endif
714 break;
715
716 case ']': /* end of scanset */
717 return (fmt);
718
719 default: /* just another character */
720 c = n;
721 break;
722 }
87bdee5f 723 }
3686044d 724 /* NOTREACHED */
87bdee5f 725}