Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * apprentice - make one pass through /etc/magic, learning its secrets. | |
3 | * | |
4 | * Copyright (c) Ian F. Darwin, 1987. | |
5 | * Written by Ian F. Darwin. | |
6 | * | |
7 | * This software is not subject to any license of the American Telephone | |
8 | * and Telegraph Company or of the Regents of the University of California. | |
9 | * | |
10 | * Permission is granted to anyone to use this software for any purpose on | |
11 | * any computer system, and to alter it and redistribute it freely, subject | |
12 | * to the following restrictions: | |
13 | * | |
14 | * 1. The author is not responsible for the consequences of use of this | |
15 | * software, no matter how awful, even if they arise from flaws in it. | |
16 | * | |
17 | * 2. The origin of this software must not be misrepresented, either by | |
18 | * explicit claim or by omission. Since few users ever read sources, | |
19 | * credits must appear in the documentation. | |
20 | * | |
21 | * 3. Altered versions must be plainly marked as such, and must not be | |
22 | * misrepresented as being the original software. Since few users | |
23 | * ever read sources, credits must appear in the documentation. | |
24 | * | |
25 | * 4. This notice may not be removed or altered. | |
26 | */ | |
27 | ||
28 | #include <stdio.h> | |
286a6f32 C |
29 | #include <stdlib.h> |
30 | #include <string.h> | |
15637ed4 RG |
31 | #include <ctype.h> |
32 | #include "file.h" | |
33 | ||
34 | #ifndef lint | |
35 | static char *moduleid = | |
286a6f32 | 36 | "@(#)apprentice.c,v 1.2 1993/06/10 00:38:02 jtc Exp"; |
15637ed4 RG |
37 | #endif /* lint */ |
38 | ||
286a6f32 C |
39 | #define EATAB {while (isascii((unsigned char) *l) && \ |
40 | isspace((unsigned char) *l)) ++l;} | |
15637ed4 | 41 | |
15637ed4 | 42 | |
286a6f32 C |
43 | static int getvalue __P((struct magic *, char **)); |
44 | static int hextoint __P((int)); | |
45 | static char *getstr __P((char *, char *, int, int *)); | |
46 | static int parse __P((char *, int *, int)); | |
15637ed4 | 47 | |
286a6f32 | 48 | static int maxmagic = 0; |
15637ed4 | 49 | |
286a6f32 | 50 | int |
15637ed4 RG |
51 | apprentice(fn, check) |
52 | char *fn; /* name of magic file */ | |
286a6f32 | 53 | int check; /* non-zero? checking-only run. */ |
15637ed4 RG |
54 | { |
55 | FILE *f; | |
286a6f32 | 56 | char line[BUFSIZ+1]; |
15637ed4 RG |
57 | int errs = 0; |
58 | ||
59 | f = fopen(fn, "r"); | |
60 | if (f==NULL) { | |
61 | (void) fprintf(stderr, "%s: can't read magic file %s\n", | |
62 | progname, fn); | |
63 | if (check) | |
64 | return -1; | |
65 | else | |
66 | exit(1); | |
67 | } | |
68 | ||
286a6f32 C |
69 | maxmagic = MAXMAGIS; |
70 | if ((magic = (struct magic *) malloc(sizeof(struct magic) * maxmagic)) | |
71 | == NULL) { | |
72 | (void) fprintf(stderr, "%s: Out of memory.\n", progname); | |
73 | if (check) | |
74 | return -1; | |
75 | else | |
76 | exit(1); | |
77 | } | |
78 | ||
15637ed4 RG |
79 | /* parse it */ |
80 | if (check) /* print silly verbose header for USG compat. */ | |
286a6f32 | 81 | (void) printf("cont\toffset\ttype\topcode\tmask\tvalue\tdesc\n"); |
15637ed4 | 82 | |
286a6f32 | 83 | for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) { |
15637ed4 RG |
84 | if (line[0]=='#') /* comment, do not parse */ |
85 | continue; | |
286a6f32 | 86 | if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */ |
15637ed4 RG |
87 | continue; |
88 | line[strlen(line)-1] = '\0'; /* delete newline */ | |
89 | if (parse(line, &nmagic, check) != 0) | |
90 | ++errs; | |
91 | } | |
92 | ||
93 | (void) fclose(f); | |
94 | return errs ? -1 : 0; | |
95 | } | |
96 | ||
97 | /* | |
98 | * parse one line from magic file, put into magic[index++] if valid | |
99 | */ | |
286a6f32 | 100 | static int |
15637ed4 RG |
101 | parse(l, ndx, check) |
102 | char *l; | |
103 | int *ndx, check; | |
104 | { | |
105 | int i = 0, nd = *ndx; | |
15637ed4 | 106 | struct magic *m; |
286a6f32 C |
107 | char *t, *s; |
108 | ||
109 | if (nd+1 >= maxmagic){ | |
110 | maxmagic += 20; | |
111 | if ((magic = (struct magic *) realloc(magic, | |
112 | sizeof(struct magic) * | |
113 | maxmagic)) == NULL) { | |
114 | (void) fprintf(stderr, "%s: Out of memory.\n", progname); | |
115 | if (check) | |
116 | return -1; | |
117 | else | |
118 | exit(1); | |
119 | } | |
15637ed4 RG |
120 | } |
121 | m = &magic[*ndx]; | |
286a6f32 C |
122 | m->flag = 0; |
123 | m->cont_level = 0; | |
15637ed4 | 124 | |
286a6f32 | 125 | while (*l == '>') { |
15637ed4 | 126 | ++l; /* step over */ |
286a6f32 C |
127 | m->cont_level++; |
128 | } | |
129 | ||
130 | if (m->cont_level != 0 && *l == '(') { | |
131 | ++l; /* step over */ | |
132 | m->flag |= INDIR; | |
133 | } | |
15637ed4 RG |
134 | |
135 | /* get offset, then skip over it */ | |
286a6f32 C |
136 | m->offset = (int) strtol(l,&t,0); |
137 | if (l == t) | |
138 | magwarn("offset %s invalid", l); | |
139 | l = t; | |
140 | ||
141 | if (m->flag & INDIR) { | |
142 | m->in.type = LONG; | |
143 | m->in.offset = 0; | |
144 | /* | |
145 | * read [.lbs][+-]nnnnn) | |
146 | */ | |
147 | if (*l == '.') { | |
148 | switch (*++l) { | |
149 | case 'l': | |
150 | m->in.type = LONG; | |
151 | break; | |
152 | case 's': | |
153 | m->in.type = SHORT; | |
154 | break; | |
155 | case 'b': | |
156 | m->in.type = BYTE; | |
157 | break; | |
158 | default: | |
159 | magwarn("indirect offset type %c invalid", *l); | |
160 | break; | |
161 | } | |
162 | l++; | |
163 | } | |
164 | s = l; | |
165 | if (*l == '+' || *l == '-') l++; | |
166 | if (isdigit((unsigned char)*l)) { | |
167 | m->in.offset = strtol(l, &t, 0); | |
168 | if (*s == '-') m->in.offset = - m->in.offset; | |
169 | } | |
170 | if (*t++ != ')') | |
171 | magwarn("missing ')' in indirect offset"); | |
172 | l = t; | |
173 | } | |
174 | ||
175 | ||
176 | while (isascii((unsigned char)*l) && isdigit((unsigned char)*l)) | |
15637ed4 RG |
177 | ++l; |
178 | EATAB; | |
179 | ||
286a6f32 C |
180 | #define NBYTE 4 |
181 | #define NSHORT 5 | |
182 | #define NLONG 4 | |
183 | #define NSTRING 6 | |
184 | #define NDATE 4 | |
185 | #define NBESHORT 7 | |
186 | #define NBELONG 6 | |
187 | #define NBEDATE 6 | |
188 | #define NLESHORT 7 | |
189 | #define NLELONG 6 | |
190 | #define NLEDATE 6 | |
191 | ||
15637ed4 RG |
192 | /* get type, skip it */ |
193 | if (strncmp(l, "byte", NBYTE)==0) { | |
194 | m->type = BYTE; | |
195 | l += NBYTE; | |
196 | } else if (strncmp(l, "short", NSHORT)==0) { | |
197 | m->type = SHORT; | |
198 | l += NSHORT; | |
199 | } else if (strncmp(l, "long", NLONG)==0) { | |
200 | m->type = LONG; | |
201 | l += NLONG; | |
202 | } else if (strncmp(l, "string", NSTRING)==0) { | |
203 | m->type = STRING; | |
204 | l += NSTRING; | |
286a6f32 C |
205 | } else if (strncmp(l, "date", NDATE)==0) { |
206 | m->type = DATE; | |
207 | l += NDATE; | |
208 | } else if (strncmp(l, "beshort", NBESHORT)==0) { | |
209 | m->type = BESHORT; | |
210 | l += NBESHORT; | |
211 | } else if (strncmp(l, "belong", NBELONG)==0) { | |
212 | m->type = BELONG; | |
213 | l += NBELONG; | |
214 | } else if (strncmp(l, "bedate", NBEDATE)==0) { | |
215 | m->type = BEDATE; | |
216 | l += NBEDATE; | |
217 | } else if (strncmp(l, "leshort", NLESHORT)==0) { | |
218 | m->type = LESHORT; | |
219 | l += NLESHORT; | |
220 | } else if (strncmp(l, "lelong", NLELONG)==0) { | |
221 | m->type = LELONG; | |
222 | l += NLELONG; | |
223 | } else if (strncmp(l, "ledate", NLEDATE)==0) { | |
224 | m->type = LEDATE; | |
225 | l += NLEDATE; | |
15637ed4 | 226 | } else { |
286a6f32 | 227 | magwarn("type %s invalid", l); |
15637ed4 RG |
228 | return -1; |
229 | } | |
286a6f32 C |
230 | /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ |
231 | if (*l == '&') { | |
15637ed4 | 232 | ++l; |
286a6f32 | 233 | m->mask = strtol(l, &l, 0); |
15637ed4 | 234 | } else |
286a6f32 | 235 | m->mask = 0L; |
15637ed4 | 236 | EATAB; |
286a6f32 C |
237 | |
238 | switch (*l) { | |
239 | case '>': | |
240 | case '<': | |
241 | /* Old-style anding: "0 byte &0x80 dynamically linked" */ | |
242 | case '&': | |
243 | case '^': | |
244 | case '=': | |
245 | m->reln = *l; | |
246 | ++l; | |
15637ed4 | 247 | break; |
286a6f32 C |
248 | case '!': |
249 | if (m->type != STRING) { | |
250 | m->reln = *l; | |
251 | ++l; | |
252 | break; | |
253 | } | |
254 | /* FALL THROUGH */ | |
15637ed4 | 255 | default: |
286a6f32 C |
256 | if (*l == 'x' && isascii((unsigned char)l[1]) && |
257 | isspace((unsigned char)l[1])) { | |
258 | m->reln = *l; | |
259 | ++l; | |
260 | goto GetDesc; /* Bill The Cat */ | |
261 | } | |
262 | m->reln = '='; | |
263 | break; | |
15637ed4 | 264 | } |
286a6f32 C |
265 | EATAB; |
266 | ||
267 | if (getvalue(m, &l)) | |
268 | return -1; | |
269 | /* | |
270 | * TODO finish this macro and start using it! | |
271 | * #define offsetcheck {if (offset > HOWMANY-1) | |
272 | * magwarn("offset too big"); } | |
273 | */ | |
15637ed4 RG |
274 | |
275 | /* | |
276 | * now get last part - the description | |
277 | */ | |
286a6f32 | 278 | GetDesc: |
15637ed4 | 279 | EATAB; |
286a6f32 C |
280 | if (l[0] == '\b') { |
281 | ++l; | |
282 | m->nospflag = 1; | |
283 | } else if ((l[0] == '\\') && (l[1] == 'b')) { | |
284 | ++l; | |
285 | ++l; | |
286 | m->nospflag = 1; | |
287 | } else | |
288 | m->nospflag = 0; | |
15637ed4 RG |
289 | while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC) |
290 | /* NULLBODY */; | |
291 | ||
292 | if (check) { | |
293 | mdump(m); | |
294 | } | |
295 | ++(*ndx); /* make room for next */ | |
296 | return 0; | |
297 | } | |
298 | ||
286a6f32 C |
299 | /* |
300 | * Read a numeric value from a pointer, into the value union of a magic | |
301 | * pointer, according to the magic type. Update the string pointer to point | |
302 | * just after the number read. Return 0 for success, non-zero for failure. | |
303 | */ | |
304 | static int | |
305 | getvalue(m, p) | |
306 | struct magic *m; | |
307 | char **p; | |
308 | { | |
309 | int slen; | |
310 | ||
311 | if (m->type == STRING) { | |
312 | *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); | |
313 | m->vallen = slen; | |
314 | } else { | |
315 | if (m->reln != 'x') { | |
316 | switch(m->type) { | |
317 | /* | |
318 | * Do not remove the casts below. They are vital. | |
319 | * When later compared with the data, the sign | |
320 | * extension must have happened. | |
321 | */ | |
322 | case BYTE: | |
323 | m->value.l = (char) strtol(*p,p,0); | |
324 | break; | |
325 | case SHORT: | |
326 | case BESHORT: | |
327 | case LESHORT: | |
328 | m->value.l = (short) strtol(*p,p,0); | |
329 | break; | |
330 | case DATE: | |
331 | case BEDATE: | |
332 | case LEDATE: | |
333 | case LONG: | |
334 | case BELONG: | |
335 | case LELONG: | |
336 | m->value.l = (long) strtol(*p,p,0); | |
337 | break; | |
338 | default: | |
339 | magwarn("can't happen: m->type=%d\n", m->type); | |
340 | return -1; | |
341 | } | |
342 | } | |
343 | } | |
344 | return 0; | |
345 | } | |
346 | ||
15637ed4 RG |
347 | /* |
348 | * Convert a string containing C character escapes. Stop at an unescaped | |
349 | * space or tab. | |
350 | * Copy the converted version to "p", returning its length in *slen. | |
351 | * Return updated scan pointer as function result. | |
352 | */ | |
286a6f32 | 353 | static char * |
15637ed4 RG |
354 | getstr(s, p, plen, slen) |
355 | register char *s; | |
356 | register char *p; | |
357 | int plen, *slen; | |
358 | { | |
359 | char *origs = s, *origp = p; | |
360 | char *pmax = p + plen - 1; | |
361 | register int c; | |
362 | register int val; | |
363 | ||
286a6f32 C |
364 | while ((c = *s++) != '\0') { |
365 | if (isspace((unsigned char) c)) | |
366 | break; | |
15637ed4 RG |
367 | if (p >= pmax) { |
368 | fprintf(stderr, "String too long: %s\n", origs); | |
369 | break; | |
370 | } | |
371 | if(c == '\\') { | |
372 | switch(c = *s++) { | |
373 | ||
374 | case '\0': | |
375 | goto out; | |
376 | ||
377 | default: | |
286a6f32 | 378 | *p++ = (char) c; |
15637ed4 RG |
379 | break; |
380 | ||
381 | case 'n': | |
382 | *p++ = '\n'; | |
383 | break; | |
384 | ||
385 | case 'r': | |
386 | *p++ = '\r'; | |
387 | break; | |
388 | ||
389 | case 'b': | |
390 | *p++ = '\b'; | |
391 | break; | |
392 | ||
393 | case 't': | |
394 | *p++ = '\t'; | |
395 | break; | |
396 | ||
397 | case 'f': | |
398 | *p++ = '\f'; | |
399 | break; | |
400 | ||
401 | case 'v': | |
402 | *p++ = '\v'; | |
403 | break; | |
404 | ||
405 | /* \ and up to 3 octal digits */ | |
406 | case '0': | |
407 | case '1': | |
408 | case '2': | |
409 | case '3': | |
410 | case '4': | |
411 | case '5': | |
412 | case '6': | |
413 | case '7': | |
414 | val = c - '0'; | |
415 | c = *s++; /* try for 2 */ | |
416 | if(c >= '0' && c <= '7') { | |
417 | val = (val<<3) | (c - '0'); | |
418 | c = *s++; /* try for 3 */ | |
419 | if(c >= '0' && c <= '7') | |
420 | val = (val<<3) | (c-'0'); | |
421 | else | |
422 | --s; | |
423 | } | |
424 | else | |
425 | --s; | |
286a6f32 | 426 | *p++ = (char)val; |
15637ed4 RG |
427 | break; |
428 | ||
429 | /* \x and up to 3 hex digits */ | |
430 | case 'x': | |
431 | val = 'x'; /* Default if no digits */ | |
432 | c = hextoint(*s++); /* Get next char */ | |
433 | if (c >= 0) { | |
434 | val = c; | |
435 | c = hextoint(*s++); | |
436 | if (c >= 0) { | |
437 | val = (val << 4) + c; | |
438 | c = hextoint(*s++); | |
439 | if (c >= 0) { | |
440 | val = (val << 4) + c; | |
441 | } else | |
442 | --s; | |
443 | } else | |
444 | --s; | |
445 | } else | |
446 | --s; | |
286a6f32 | 447 | *p++ = (char)val; |
15637ed4 RG |
448 | break; |
449 | } | |
450 | } else | |
286a6f32 | 451 | *p++ = (char)c; |
15637ed4 RG |
452 | } |
453 | out: | |
454 | *p = '\0'; | |
455 | *slen = p - origp; | |
286a6f32 | 456 | return s; |
15637ed4 RG |
457 | } |
458 | ||
459 | ||
460 | /* Single hex char to int; -1 if not a hex char. */ | |
286a6f32 | 461 | static int |
15637ed4 | 462 | hextoint(c) |
286a6f32 | 463 | int c; |
15637ed4 | 464 | { |
286a6f32 C |
465 | if (!isascii((unsigned char) c)) return -1; |
466 | if (isdigit((unsigned char) c)) return c - '0'; | |
467 | if ((c>='a')&&(c<='f')) return c + 10 - 'a'; | |
468 | if ((c>='A')&&(c<='F')) return c + 10 - 'A'; | |
15637ed4 RG |
469 | return -1; |
470 | } | |
471 | ||
472 | ||
473 | /* | |
474 | * Print a string containing C character escapes. | |
475 | */ | |
476 | void | |
477 | showstr(s) | |
286a6f32 | 478 | const char *s; |
15637ed4 RG |
479 | { |
480 | register char c; | |
481 | ||
482 | while((c = *s++) != '\0') { | |
286a6f32 | 483 | if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ |
15637ed4 RG |
484 | putchar(c); |
485 | else { | |
486 | putchar('\\'); | |
487 | switch (c) { | |
488 | ||
489 | case '\n': | |
490 | putchar('n'); | |
491 | break; | |
492 | ||
493 | case '\r': | |
494 | putchar('r'); | |
495 | break; | |
496 | ||
497 | case '\b': | |
498 | putchar('b'); | |
499 | break; | |
500 | ||
501 | case '\t': | |
502 | putchar('t'); | |
503 | break; | |
504 | ||
505 | case '\f': | |
506 | putchar('f'); | |
507 | break; | |
508 | ||
509 | case '\v': | |
510 | putchar('v'); | |
511 | break; | |
512 | ||
513 | default: | |
514 | printf("%.3o", c & 0377); | |
515 | break; | |
516 | } | |
517 | } | |
518 | } | |
519 | putchar('\t'); | |
520 | } |