Updated to Ian Darwin's latest file command and magic database.
[unix-history] / usr.bin / file / apprentice.c
CommitLineData
15637ed4
RG
1/*
2 * apprentice - make one pass through /etc/magic, learning its secrets.
3 *
4 * Copyright (c) Ian F. Darwin, 1987.
5 * Written by Ian F. Darwin.
6 *
7 * This software is not subject to any license of the American Telephone
8 * and Telegraph Company or of the Regents of the University of California.
9 *
10 * Permission is granted to anyone to use this software for any purpose on
11 * any computer system, and to alter it and redistribute it freely, subject
12 * to the following restrictions:
13 *
14 * 1. The author is not responsible for the consequences of use of this
15 * software, no matter how awful, even if they arise from flaws in it.
16 *
17 * 2. The origin of this software must not be misrepresented, either by
18 * explicit claim or by omission. Since few users ever read sources,
19 * credits must appear in the documentation.
20 *
21 * 3. Altered versions must be plainly marked as such, and must not be
22 * misrepresented as being the original software. Since few users
23 * ever read sources, credits must appear in the documentation.
24 *
25 * 4. This notice may not be removed or altered.
26 */
27
28#include <stdio.h>
286a6f32
C
29#include <stdlib.h>
30#include <string.h>
15637ed4
RG
31#include <ctype.h>
32#include "file.h"
33
34#ifndef lint
35static char *moduleid =
286a6f32 36 "@(#)apprentice.c,v 1.2 1993/06/10 00:38:02 jtc Exp";
15637ed4
RG
37#endif /* lint */
38
286a6f32
C
39#define EATAB {while (isascii((unsigned char) *l) && \
40 isspace((unsigned char) *l)) ++l;}
15637ed4 41
15637ed4 42
286a6f32
C
43static int getvalue __P((struct magic *, char **));
44static int hextoint __P((int));
45static char *getstr __P((char *, char *, int, int *));
46static int parse __P((char *, int *, int));
15637ed4 47
286a6f32 48static int maxmagic = 0;
15637ed4 49
286a6f32 50int
15637ed4
RG
51apprentice(fn, check)
52char *fn; /* name of magic file */
286a6f32 53int check; /* non-zero? checking-only run. */
15637ed4
RG
54{
55 FILE *f;
286a6f32 56 char line[BUFSIZ+1];
15637ed4
RG
57 int errs = 0;
58
59 f = fopen(fn, "r");
60 if (f==NULL) {
61 (void) fprintf(stderr, "%s: can't read magic file %s\n",
62 progname, fn);
63 if (check)
64 return -1;
65 else
66 exit(1);
67 }
68
286a6f32
C
69 maxmagic = MAXMAGIS;
70 if ((magic = (struct magic *) malloc(sizeof(struct magic) * maxmagic))
71 == NULL) {
72 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
73 if (check)
74 return -1;
75 else
76 exit(1);
77 }
78
15637ed4
RG
79 /* parse it */
80 if (check) /* print silly verbose header for USG compat. */
286a6f32 81 (void) printf("cont\toffset\ttype\topcode\tmask\tvalue\tdesc\n");
15637ed4 82
286a6f32 83 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
15637ed4
RG
84 if (line[0]=='#') /* comment, do not parse */
85 continue;
286a6f32 86 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
15637ed4
RG
87 continue;
88 line[strlen(line)-1] = '\0'; /* delete newline */
89 if (parse(line, &nmagic, check) != 0)
90 ++errs;
91 }
92
93 (void) fclose(f);
94 return errs ? -1 : 0;
95}
96
97/*
98 * parse one line from magic file, put into magic[index++] if valid
99 */
286a6f32 100static int
15637ed4
RG
101parse(l, ndx, check)
102char *l;
103int *ndx, check;
104{
105 int i = 0, nd = *ndx;
15637ed4 106 struct magic *m;
286a6f32
C
107 char *t, *s;
108
109 if (nd+1 >= maxmagic){
110 maxmagic += 20;
111 if ((magic = (struct magic *) realloc(magic,
112 sizeof(struct magic) *
113 maxmagic)) == NULL) {
114 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
115 if (check)
116 return -1;
117 else
118 exit(1);
119 }
15637ed4
RG
120 }
121 m = &magic[*ndx];
286a6f32
C
122 m->flag = 0;
123 m->cont_level = 0;
15637ed4 124
286a6f32 125 while (*l == '>') {
15637ed4 126 ++l; /* step over */
286a6f32
C
127 m->cont_level++;
128 }
129
130 if (m->cont_level != 0 && *l == '(') {
131 ++l; /* step over */
132 m->flag |= INDIR;
133 }
15637ed4
RG
134
135 /* get offset, then skip over it */
286a6f32
C
136 m->offset = (int) strtol(l,&t,0);
137 if (l == t)
138 magwarn("offset %s invalid", l);
139 l = t;
140
141 if (m->flag & INDIR) {
142 m->in.type = LONG;
143 m->in.offset = 0;
144 /*
145 * read [.lbs][+-]nnnnn)
146 */
147 if (*l == '.') {
148 switch (*++l) {
149 case 'l':
150 m->in.type = LONG;
151 break;
152 case 's':
153 m->in.type = SHORT;
154 break;
155 case 'b':
156 m->in.type = BYTE;
157 break;
158 default:
159 magwarn("indirect offset type %c invalid", *l);
160 break;
161 }
162 l++;
163 }
164 s = l;
165 if (*l == '+' || *l == '-') l++;
166 if (isdigit((unsigned char)*l)) {
167 m->in.offset = strtol(l, &t, 0);
168 if (*s == '-') m->in.offset = - m->in.offset;
169 }
170 if (*t++ != ')')
171 magwarn("missing ')' in indirect offset");
172 l = t;
173 }
174
175
176 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
15637ed4
RG
177 ++l;
178 EATAB;
179
286a6f32
C
180#define NBYTE 4
181#define NSHORT 5
182#define NLONG 4
183#define NSTRING 6
184#define NDATE 4
185#define NBESHORT 7
186#define NBELONG 6
187#define NBEDATE 6
188#define NLESHORT 7
189#define NLELONG 6
190#define NLEDATE 6
191
15637ed4
RG
192 /* get type, skip it */
193 if (strncmp(l, "byte", NBYTE)==0) {
194 m->type = BYTE;
195 l += NBYTE;
196 } else if (strncmp(l, "short", NSHORT)==0) {
197 m->type = SHORT;
198 l += NSHORT;
199 } else if (strncmp(l, "long", NLONG)==0) {
200 m->type = LONG;
201 l += NLONG;
202 } else if (strncmp(l, "string", NSTRING)==0) {
203 m->type = STRING;
204 l += NSTRING;
286a6f32
C
205 } else if (strncmp(l, "date", NDATE)==0) {
206 m->type = DATE;
207 l += NDATE;
208 } else if (strncmp(l, "beshort", NBESHORT)==0) {
209 m->type = BESHORT;
210 l += NBESHORT;
211 } else if (strncmp(l, "belong", NBELONG)==0) {
212 m->type = BELONG;
213 l += NBELONG;
214 } else if (strncmp(l, "bedate", NBEDATE)==0) {
215 m->type = BEDATE;
216 l += NBEDATE;
217 } else if (strncmp(l, "leshort", NLESHORT)==0) {
218 m->type = LESHORT;
219 l += NLESHORT;
220 } else if (strncmp(l, "lelong", NLELONG)==0) {
221 m->type = LELONG;
222 l += NLELONG;
223 } else if (strncmp(l, "ledate", NLEDATE)==0) {
224 m->type = LEDATE;
225 l += NLEDATE;
15637ed4 226 } else {
286a6f32 227 magwarn("type %s invalid", l);
15637ed4
RG
228 return -1;
229 }
286a6f32
C
230 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
231 if (*l == '&') {
15637ed4 232 ++l;
286a6f32 233 m->mask = strtol(l, &l, 0);
15637ed4 234 } else
286a6f32 235 m->mask = 0L;
15637ed4 236 EATAB;
286a6f32
C
237
238 switch (*l) {
239 case '>':
240 case '<':
241 /* Old-style anding: "0 byte &0x80 dynamically linked" */
242 case '&':
243 case '^':
244 case '=':
245 m->reln = *l;
246 ++l;
15637ed4 247 break;
286a6f32
C
248 case '!':
249 if (m->type != STRING) {
250 m->reln = *l;
251 ++l;
252 break;
253 }
254 /* FALL THROUGH */
15637ed4 255 default:
286a6f32
C
256 if (*l == 'x' && isascii((unsigned char)l[1]) &&
257 isspace((unsigned char)l[1])) {
258 m->reln = *l;
259 ++l;
260 goto GetDesc; /* Bill The Cat */
261 }
262 m->reln = '=';
263 break;
15637ed4 264 }
286a6f32
C
265 EATAB;
266
267 if (getvalue(m, &l))
268 return -1;
269 /*
270 * TODO finish this macro and start using it!
271 * #define offsetcheck {if (offset > HOWMANY-1)
272 * magwarn("offset too big"); }
273 */
15637ed4
RG
274
275 /*
276 * now get last part - the description
277 */
286a6f32 278GetDesc:
15637ed4 279 EATAB;
286a6f32
C
280 if (l[0] == '\b') {
281 ++l;
282 m->nospflag = 1;
283 } else if ((l[0] == '\\') && (l[1] == 'b')) {
284 ++l;
285 ++l;
286 m->nospflag = 1;
287 } else
288 m->nospflag = 0;
15637ed4
RG
289 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
290 /* NULLBODY */;
291
292 if (check) {
293 mdump(m);
294 }
295 ++(*ndx); /* make room for next */
296 return 0;
297}
298
286a6f32
C
299/*
300 * Read a numeric value from a pointer, into the value union of a magic
301 * pointer, according to the magic type. Update the string pointer to point
302 * just after the number read. Return 0 for success, non-zero for failure.
303 */
304static int
305getvalue(m, p)
306struct magic *m;
307char **p;
308{
309 int slen;
310
311 if (m->type == STRING) {
312 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
313 m->vallen = slen;
314 } else {
315 if (m->reln != 'x') {
316 switch(m->type) {
317 /*
318 * Do not remove the casts below. They are vital.
319 * When later compared with the data, the sign
320 * extension must have happened.
321 */
322 case BYTE:
323 m->value.l = (char) strtol(*p,p,0);
324 break;
325 case SHORT:
326 case BESHORT:
327 case LESHORT:
328 m->value.l = (short) strtol(*p,p,0);
329 break;
330 case DATE:
331 case BEDATE:
332 case LEDATE:
333 case LONG:
334 case BELONG:
335 case LELONG:
336 m->value.l = (long) strtol(*p,p,0);
337 break;
338 default:
339 magwarn("can't happen: m->type=%d\n", m->type);
340 return -1;
341 }
342 }
343 }
344 return 0;
345}
346
15637ed4
RG
347/*
348 * Convert a string containing C character escapes. Stop at an unescaped
349 * space or tab.
350 * Copy the converted version to "p", returning its length in *slen.
351 * Return updated scan pointer as function result.
352 */
286a6f32 353static char *
15637ed4
RG
354getstr(s, p, plen, slen)
355register char *s;
356register char *p;
357int plen, *slen;
358{
359 char *origs = s, *origp = p;
360 char *pmax = p + plen - 1;
361 register int c;
362 register int val;
363
286a6f32
C
364 while ((c = *s++) != '\0') {
365 if (isspace((unsigned char) c))
366 break;
15637ed4
RG
367 if (p >= pmax) {
368 fprintf(stderr, "String too long: %s\n", origs);
369 break;
370 }
371 if(c == '\\') {
372 switch(c = *s++) {
373
374 case '\0':
375 goto out;
376
377 default:
286a6f32 378 *p++ = (char) c;
15637ed4
RG
379 break;
380
381 case 'n':
382 *p++ = '\n';
383 break;
384
385 case 'r':
386 *p++ = '\r';
387 break;
388
389 case 'b':
390 *p++ = '\b';
391 break;
392
393 case 't':
394 *p++ = '\t';
395 break;
396
397 case 'f':
398 *p++ = '\f';
399 break;
400
401 case 'v':
402 *p++ = '\v';
403 break;
404
405 /* \ and up to 3 octal digits */
406 case '0':
407 case '1':
408 case '2':
409 case '3':
410 case '4':
411 case '5':
412 case '6':
413 case '7':
414 val = c - '0';
415 c = *s++; /* try for 2 */
416 if(c >= '0' && c <= '7') {
417 val = (val<<3) | (c - '0');
418 c = *s++; /* try for 3 */
419 if(c >= '0' && c <= '7')
420 val = (val<<3) | (c-'0');
421 else
422 --s;
423 }
424 else
425 --s;
286a6f32 426 *p++ = (char)val;
15637ed4
RG
427 break;
428
429 /* \x and up to 3 hex digits */
430 case 'x':
431 val = 'x'; /* Default if no digits */
432 c = hextoint(*s++); /* Get next char */
433 if (c >= 0) {
434 val = c;
435 c = hextoint(*s++);
436 if (c >= 0) {
437 val = (val << 4) + c;
438 c = hextoint(*s++);
439 if (c >= 0) {
440 val = (val << 4) + c;
441 } else
442 --s;
443 } else
444 --s;
445 } else
446 --s;
286a6f32 447 *p++ = (char)val;
15637ed4
RG
448 break;
449 }
450 } else
286a6f32 451 *p++ = (char)c;
15637ed4
RG
452 }
453out:
454 *p = '\0';
455 *slen = p - origp;
286a6f32 456 return s;
15637ed4
RG
457}
458
459
460/* Single hex char to int; -1 if not a hex char. */
286a6f32 461static int
15637ed4 462hextoint(c)
286a6f32 463int c;
15637ed4 464{
286a6f32
C
465 if (!isascii((unsigned char) c)) return -1;
466 if (isdigit((unsigned char) c)) return c - '0';
467 if ((c>='a')&&(c<='f')) return c + 10 - 'a';
468 if ((c>='A')&&(c<='F')) return c + 10 - 'A';
15637ed4
RG
469 return -1;
470}
471
472
473/*
474 * Print a string containing C character escapes.
475 */
476void
477showstr(s)
286a6f32 478const char *s;
15637ed4
RG
479{
480 register char c;
481
482 while((c = *s++) != '\0') {
286a6f32 483 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
15637ed4
RG
484 putchar(c);
485 else {
486 putchar('\\');
487 switch (c) {
488
489 case '\n':
490 putchar('n');
491 break;
492
493 case '\r':
494 putchar('r');
495 break;
496
497 case '\b':
498 putchar('b');
499 break;
500
501 case '\t':
502 putchar('t');
503 break;
504
505 case '\f':
506 putchar('f');
507 break;
508
509 case '\v':
510 putchar('v');
511 break;
512
513 default:
514 printf("%.3o", c & 0377);
515 break;
516 }
517 }
518 }
519 putchar('\t');
520}