386BSD 0.1 development
[unix-history] / usr / src / usr.bin / hexdump / parse.c
CommitLineData
7585a57e
WJ
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char sccsid[] = "@(#)parse.c 5.6 (Berkeley) 3/9/91";
36#endif /* not lint */
37
38#include <sys/types.h>
39#include <sys/file.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <ctype.h>
43#include <string.h>
44#include "hexdump.h"
45
46FU *endfu; /* format at end-of-data */
47
48addfile(name)
49 char *name;
50{
51 register char *p;
52 FILE *fp;
53 int ch;
54 char buf[2048 + 1];
55
56 if (!(fp = fopen(name, "r"))) {
57 (void)fprintf(stderr, "hexdump: can't read %s.\n", name);
58 exit(1);
59 }
60 while (fgets(buf, sizeof(buf), fp)) {
61 if (!(p = index(buf, '\n'))) {
62 (void)fprintf(stderr, "hexdump: line too long.\n");
63 while ((ch = getchar()) != '\n' && ch != EOF);
64 continue;
65 }
66 *p = '\0';
67 for (p = buf; *p && isspace(*p); ++p);
68 if (!*p || *p == '#')
69 continue;
70 add(p);
71 }
72 (void)fclose(fp);
73}
74
75add(fmt)
76 char *fmt;
77{
78 register char *p;
79 static FS **nextfs;
80 FS *tfs;
81 FU *tfu, **nextfu;
82 char *savep, *emalloc();
83
84 /* start new linked list of format units */
85 /* NOSTRICT */
86 tfs = (FS *)emalloc(sizeof(FS));
87 if (!fshead)
88 fshead = tfs;
89 else
90 *nextfs = tfs;
91 nextfs = &tfs->nextfs;
92 nextfu = &tfs->nextfu;
93
94 /* take the format string and break it up into format units */
95 for (p = fmt;;) {
96 /* skip leading white space */
97 for (; isspace(*p); ++p);
98 if (!*p)
99 break;
100
101 /* allocate a new format unit and link it in */
102 /* NOSTRICT */
103 tfu = (FU *)emalloc(sizeof(FU));
104 *nextfu = tfu;
105 nextfu = &tfu->nextfu;
106 tfu->reps = 1;
107
108 /* if leading digit, repetition count */
109 if (isdigit(*p)) {
110 for (savep = p; isdigit(*p); ++p);
111 if (!isspace(*p) && *p != '/')
112 badfmt(fmt);
113 /* may overwrite either white space or slash */
114 tfu->reps = atoi(savep);
115 tfu->flags = F_SETREP;
116 /* skip trailing white space */
117 for (++p; isspace(*p); ++p);
118 }
119
120 /* skip slash and trailing white space */
121 if (*p == '/')
122 while (isspace(*++p));
123
124 /* byte count */
125 if (isdigit(*p)) {
126 for (savep = p; isdigit(*p); ++p);
127 if (!isspace(*p))
128 badfmt(fmt);
129 tfu->bcnt = atoi(savep);
130 /* skip trailing white space */
131 for (++p; isspace(*p); ++p);
132 }
133
134 /* format */
135 if (*p != '"')
136 badfmt(fmt);
137 for (savep = ++p; *p != '"';)
138 if (*p++ == 0)
139 badfmt(fmt);
140 if (!(tfu->fmt = malloc(p - savep + 1)))
141 nomem();
142 (void) strncpy(tfu->fmt, savep, p - savep);
143 tfu->fmt[p - savep] = '\0';
144 escape(tfu->fmt);
145 p++;
146 }
147}
148
149static char *spec = ".#-+ 0123456789";
150size(fs)
151 FS *fs;
152{
153 register FU *fu;
154 register int bcnt, cursize;
155 register char *fmt;
156 int prec;
157
158 /* figure out the data block size needed for each format unit */
159 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
160 if (fu->bcnt) {
161 cursize += fu->bcnt * fu->reps;
162 continue;
163 }
164 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
165 if (*fmt != '%')
166 continue;
167 /*
168 * skip any special chars -- save precision in
169 * case it's a %s format.
170 */
171 while (index(spec + 1, *++fmt));
172 if (*fmt == '.' && isdigit(*++fmt)) {
173 prec = atoi(fmt);
174 while (isdigit(*++fmt));
175 }
176 switch(*fmt) {
177 case 'c':
178 bcnt += 1;
179 break;
180 case 'd': case 'i': case 'o': case 'u':
181 case 'x': case 'X':
182 bcnt += 4;
183 break;
184 case 'e': case 'E': case 'f': case 'g': case 'G':
185 bcnt += 8;
186 break;
187 case 's':
188 bcnt += prec;
189 break;
190 case '_':
191 switch(*++fmt) {
192 case 'c': case 'p': case 'u':
193 bcnt += 1;
194 break;
195 }
196 }
197 }
198 cursize += bcnt * fu->reps;
199 }
200 return(cursize);
201}
202
203rewrite(fs)
204 FS *fs;
205{
206 enum { NOTOKAY, USEBCNT, USEPREC } sokay;
207 register PR *pr, **nextpr;
208 register FU *fu;
209 register char *p1, *p2;
210 char savech, *fmtp;
211 int nconv, prec;
212
213 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
214 /*
215 * break each format unit into print units; each
216 * conversion character gets its own.
217 */
218 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
219 /* NOSTRICT */
220 pr = (PR *)emalloc(sizeof(PR));
221 if (!fu->nextpr)
222 fu->nextpr = pr;
223 else
224 *nextpr = pr;
225
226 /* skip preceding text and up to the next % sign */
227 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
228
229 /* only text in the string */
230 if (!*p1) {
231 pr->fmt = fmtp;
232 pr->flags = F_TEXT;
233 break;
234 }
235
236 /*
237 * get precision for %s -- if have a byte count, don't
238 * need it.
239 */
240 if (fu->bcnt) {
241 sokay = USEBCNT;
242 /* skip to conversion character */
243 for (++p1; index(spec, *p1); ++p1);
244 } else {
245 /* skip any special chars, field width */
246 while (index(spec + 1, *++p1));
247 if (*p1 == '.' && isdigit(*++p1)) {
248 sokay = USEPREC;
249 prec = atoi(p1);
250 while (isdigit(*++p1));
251 }
252 else
253 sokay = NOTOKAY;
254 }
255
256 p2 = p1 + 1; /* set end pointer */
257
258 /*
259 * figure out the byte count for each conversion;
260 * rewrite the format as necessary, set up blank-
261 * padding for end of data.
262 */
263 switch(*p1) {
264 case 'c':
265 pr->flags = F_CHAR;
266 switch(fu->bcnt) {
267 case 0: case 1:
268 pr->bcnt = 1;
269 break;
270 default:
271 p1[1] = '\0';
272 badcnt(p1);
273 }
274 break;
275 case 'd': case 'i':
276 pr->flags = F_INT;
277 goto sw1;
278 case 'l':
279 ++p2;
280 switch(p1[1]) {
281 case 'd': case 'i':
282 ++p1;
283 pr->flags = F_INT;
284 goto sw1;
285 case 'o': case 'u': case 'x': case 'X':
286 ++p1;
287 pr->flags = F_UINT;
288 goto sw1;
289 default:
290 p1[2] = '\0';
291 badconv(p1);
292 }
293 /* NOTREACHED */
294 case 'o': case 'u': case 'x': case 'X':
295 pr->flags = F_UINT;
296sw1: switch(fu->bcnt) {
297 case 0: case 4:
298 pr->bcnt = 4;
299 break;
300 case 1:
301 pr->bcnt = 1;
302 break;
303 case 2:
304 pr->bcnt = 2;
305 break;
306 default:
307 p1[1] = '\0';
308 badcnt(p1);
309 }
310 break;
311 case 'e': case 'E': case 'f': case 'g': case 'G':
312 pr->flags = F_DBL;
313 switch(fu->bcnt) {
314 case 0: case 8:
315 pr->bcnt = 8;
316 break;
317 case 4:
318 pr->bcnt = 4;
319 break;
320 default:
321 p1[1] = '\0';
322 badcnt(p1);
323 }
324 break;
325 case 's':
326 pr->flags = F_STR;
327 switch(sokay) {
328 case NOTOKAY:
329 badsfmt();
330 case USEBCNT:
331 pr->bcnt = fu->bcnt;
332 break;
333 case USEPREC:
334 pr->bcnt = prec;
335 break;
336 }
337 break;
338 case '_':
339 ++p2;
340 switch(p1[1]) {
341 case 'A':
342 endfu = fu;
343 fu->flags |= F_IGNORE;
344 /* FALLTHROUGH */
345 case 'a':
346 pr->flags = F_ADDRESS;
347 ++p2;
348 switch(p1[2]) {
349 case 'd': case 'o': case'x':
350 *p1 = p1[2];
351 break;
352 default:
353 p1[3] = '\0';
354 badconv(p1);
355 }
356 break;
357 case 'c':
358 pr->flags = F_C;
359 /* *p1 = 'c'; set in conv_c */
360 goto sw2;
361 case 'p':
362 pr->flags = F_P;
363 *p1 = 'c';
364 goto sw2;
365 case 'u':
366 pr->flags = F_U;
367 /* *p1 = 'c'; set in conv_u */
368sw2: switch(fu->bcnt) {
369 case 0: case 1:
370 pr->bcnt = 1;
371 break;
372 default:
373 p1[2] = '\0';
374 badcnt(p1);
375 }
376 break;
377 default:
378 p1[2] = '\0';
379 badconv(p1);
380 }
381 break;
382 default:
383 p1[1] = '\0';
384 badconv(p1);
385 }
386
387 /*
388 * copy to PR format string, set conversion character
389 * pointer, update original.
390 */
391 savech = *p2;
392 p1[1] = '\0';
393 if (!(pr->fmt = strdup(fmtp)))
394 nomem();
395 *p2 = savech;
396 pr->cchar = pr->fmt + (p1 - fmtp);
397 fmtp = p2;
398
399 /* only one conversion character if byte count */
400 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) {
401 (void)fprintf(stderr,
402 "hexdump: byte count with multiple conversion characters.\n");
403 exit(1);
404 }
405 }
406 /*
407 * if format unit byte count not specified, figure it out
408 * so can adjust rep count later.
409 */
410 if (!fu->bcnt)
411 for (pr = fu->nextpr; pr; pr = pr->nextpr)
412 fu->bcnt += pr->bcnt;
413 }
414 /*
415 * if the format string interprets any data at all, and it's
416 * not the same as the blocksize, and its last format unit
417 * interprets any data at all, and has no iteration count,
418 * repeat it as necessary.
419 *
420 * if, rep count is greater than 1, no trailing whitespace
421 * gets output from the last iteration of the format unit.
422 */
423 for (fu = fs->nextfu;; fu = fu->nextfu) {
424 if (!fu->nextfu && fs->bcnt < blocksize &&
425 !(fu->flags&F_SETREP) && fu->bcnt)
426 fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
427 if (fu->reps > 1) {
428 for (pr = fu->nextpr;; pr = pr->nextpr)
429 if (!pr->nextpr)
430 break;
431 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
432 p2 = isspace(*p1) ? p1 : NULL;
433 if (p2)
434 pr->nospace = p2;
435 }
436 if (!fu->nextfu)
437 break;
438 }
439}
440
441
442escape(p1)
443 register char *p1;
444{
445 register char *p2;
446
447 /* alphabetic escape sequences have to be done in place */
448 for (p2 = p1;; ++p1, ++p2) {
449 if (!*p1) {
450 *p2 = *p1;
451 break;
452 }
453 if (*p1 == '\\')
454 switch(*++p1) {
455 case 'a':
456 /* *p2 = '\a'; */
457 *p2 = '\007';
458 break;
459 case 'b':
460 *p2 = '\b';
461 break;
462 case 'f':
463 *p2 = '\f';
464 break;
465 case 'n':
466 *p2 = '\n';
467 break;
468 case 'r':
469 *p2 = '\r';
470 break;
471 case 't':
472 *p2 = '\t';
473 break;
474 case 'v':
475 *p2 = '\v';
476 break;
477 default:
478 *p2 = *p1;
479 break;
480 }
481 }
482}
483
484badcnt(s)
485 char *s;
486{
487 (void)fprintf(stderr,
488 "hexdump: bad byte count for conversion character %s.\n", s);
489 exit(1);
490}
491
492badsfmt()
493{
494 (void)fprintf(stderr,
495 "hexdump: %%s requires a precision or a byte count.\n");
496 exit(1);
497}
498
499badfmt(fmt)
500 char *fmt;
501{
502 (void)fprintf(stderr, "hexdump: bad format {%s}\n", fmt);
503 exit(1);
504}
505
506badconv(ch)
507 char *ch;
508{
509 (void)fprintf(stderr, "hexdump: bad conversion character %%%s.\n", ch);
510 exit(1);
511}