Commit | Line | Data |
---|---|---|
bc37b687 WJ |
1 | /* dd -- convert a file while copying it. |
2 | Copyright (C) 1985, 1990, 1991 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
17 | ||
18 | /* Written by Paul Rubin, David MacKenzie, and Stuart Kemp. */ | |
19 | ||
20 | /* Options: | |
21 | ||
22 | Numbers can be followed by a multiplier: | |
23 | b=512, k=1024, w=2, xm=number m | |
24 | ||
25 | if=FILE Read from FILE instead of stdin. | |
26 | of=FILE Write to FILE instead of stdout; don't | |
27 | truncate FILE. | |
28 | ibs=BYTES Read BYTES bytes at a time. | |
29 | obs=BYTES Write BYTES bytes at a time. | |
30 | bs=BYTES Override ibs and obs. | |
31 | cbs=BYTES Convert BYTES bytes at a time. | |
32 | skip=BLOCKS Skip BLOCKS ibs-sized blocks at | |
33 | start of input. | |
34 | seek=BLOCKS Skip BLOCKS obs-sized blocks at | |
35 | start of output. | |
36 | count=BLOCKS Copy only BLOCKS input blocks. | |
37 | conv=CONVERSION[,CONVERSION...] | |
38 | ||
39 | Conversions: | |
40 | ascii Convert EBCDIC to ASCII. | |
41 | ebcdic Convert ASCII to EBCDIC. | |
42 | ibm Convert ASCII to alternate EBCDIC. | |
43 | block Pad newline-terminated records to size of | |
44 | cbs, replacing newline with trailing spaces. | |
45 | unblock Replace trailing spaces in cbs-sized block | |
46 | with newline. | |
47 | lcase Change uppercase characters to lowercase. | |
48 | ucase Change lowercase characters to uppercase. | |
49 | swab Swap every pair of input bytes. | |
50 | Unlike the Unix dd, this works when an odd | |
51 | number of bytes are read. | |
52 | noerror Continue after read errors. | |
53 | sync Pad every input block to size of ibs with | |
54 | trailing NULs. */ | |
55 | ||
56 | #include <stdio.h> | |
57 | #include <ctype.h> | |
58 | #define ISLOWER islower | |
59 | #define ISUPPER isupper | |
60 | #include <sys/types.h> | |
61 | #include <signal.h> | |
62 | #include <sys/stat.h> | |
63 | #include <unistd.h> | |
64 | #include <string.h> | |
65 | #include <errno.h> | |
66 | #include <stdlib.h> | |
67 | #include <fcntl.h> | |
68 | #define SIGTYPE void | |
69 | ||
70 | #define equal(p, q) (strcmp ((p),(q)) == 0) | |
71 | #define max(a, b) ((a) > (b) ? (a) : (b)) | |
72 | #define output_char(c) \ | |
73 | do { \ | |
74 | obuf[oc++] = (c); if (oc >= output_blocksize) write_output (); \ | |
75 | } while (0) | |
76 | ||
77 | /* Default input and output blocksize. */ | |
78 | #define DEFAULT_BLOCKSIZE 512 | |
79 | ||
80 | /* Conversions bit masks. */ | |
81 | #define C_ASCII 01 | |
82 | #define C_EBCDIC 02 | |
83 | #define C_IBM 04 | |
84 | #define C_BLOCK 010 | |
85 | #define C_UNBLOCK 020 | |
86 | #define C_LCASE 040 | |
87 | #define C_UCASE 0100 | |
88 | #define C_SWAB 0200 | |
89 | #define C_NOERROR 0400 | |
90 | #define C_NOTRUNC 01000 | |
91 | #define C_SYNC 02000 | |
92 | /* Use separate input and output buffers, and combine partial input blocks. */ | |
93 | #define C_TWOBUFS 04000 | |
94 | ||
95 | /*char *malloc ();*/ | |
96 | SIGTYPE interrupt_handler (); | |
97 | int bit_count (); | |
98 | int parse_integer (); | |
99 | void apply_translations (); | |
100 | void copy (); | |
101 | void copy_simple (); | |
102 | void copy_with_block (); | |
103 | void copy_with_unblock (); | |
104 | /*void error ();*/ | |
105 | void parse_conversion (); | |
106 | void print_stats (); | |
107 | void translate_charset (); | |
108 | void quit (); | |
109 | void scanargs (); | |
110 | void skip (); | |
111 | void usage (); | |
112 | void write_output (); | |
113 | ||
114 | /* The name this program was run with. */ | |
115 | char *program_name; | |
116 | ||
117 | /* The name of the input file, or NULL for the standard input. */ | |
118 | char *input_file = NULL; | |
119 | ||
120 | /* The input file descriptor. */ | |
121 | int input_fd = 0; | |
122 | ||
123 | /* The name of the output file, or NULL for the standard output. */ | |
124 | char *output_file = NULL; | |
125 | ||
126 | /* The output file descriptor. */ | |
127 | int output_fd = 1; | |
128 | ||
129 | /* The number of bytes in which atomic reads are done. */ | |
130 | long input_blocksize = -1; | |
131 | ||
132 | /* The number of bytes in which atomic writes are done. */ | |
133 | long output_blocksize = -1; | |
134 | ||
135 | /* Conversion buffer size, in bytes. 0 prevents conversions. */ | |
136 | long conversion_blocksize = 0; | |
137 | ||
138 | /* Skip this many records of `input_blocksize' bytes before input. */ | |
139 | long skip_records = 0; | |
140 | ||
141 | /* Skip this many records of `output_blocksize' bytes before output. */ | |
142 | long seek_record = 0; | |
143 | ||
144 | /* Copy only this many records. <0 means no limit. */ | |
145 | int max_records = -1; | |
146 | ||
147 | /* Bit vector of conversions to apply. */ | |
148 | int conversions_mask = 0; | |
149 | ||
150 | /* If nonzero, filter characters through the translation table. */ | |
151 | int translation_needed = 0; | |
152 | ||
153 | /* Number of partial blocks written. */ | |
154 | unsigned w_partial = 0; | |
155 | ||
156 | /* Number of full blocks written. */ | |
157 | unsigned w_full = 0; | |
158 | ||
159 | /* Number of partial blocks read. */ | |
160 | unsigned r_partial = 0; | |
161 | ||
162 | /* Number of full blocks read. */ | |
163 | unsigned r_full = 0; | |
164 | ||
165 | /* Records truncated by conv=block. */ | |
166 | unsigned r_truncate = 0; | |
167 | ||
168 | /* Output representation of newline and space characters. | |
169 | They change if we're converting to EBCDIC. */ | |
170 | unsigned char newline_character = '\n'; | |
171 | unsigned char space_character = ' '; | |
172 | ||
173 | struct conversion | |
174 | { | |
175 | char *convname; | |
176 | int conversion; | |
177 | }; | |
178 | ||
179 | struct conversion conversions[] = | |
180 | { | |
181 | "ascii", C_ASCII | C_TWOBUFS, /* EBCDIC to ASCII. */ | |
182 | "ebcdic", C_EBCDIC | C_TWOBUFS, /* ASCII to EBCDIC. */ | |
183 | "ibm", C_IBM | C_TWOBUFS, /* Slightly different ASCII to EBCDIC. */ | |
184 | "block", C_BLOCK | C_TWOBUFS, /* Variable to fixed length records. */ | |
185 | "unblock", C_UNBLOCK | C_TWOBUFS, /* Fixed to variable length records. */ | |
186 | "lcase", C_LCASE | C_TWOBUFS, /* Translate upper to lower case. */ | |
187 | "ucase", C_UCASE | C_TWOBUFS, /* Translate lower to upper case. */ | |
188 | "swab", C_SWAB | C_TWOBUFS, /* Swap bytes of input. */ | |
189 | "noerror", C_NOERROR, /* Ignore i/o errors. */ | |
190 | "notrunc", C_NOTRUNC, /* Do not truncate output file. */ | |
191 | "sync", C_SYNC, /* Pad input records to ibs with NULs. */ | |
192 | NULL, 0 | |
193 | }; | |
194 | ||
195 | /* Translation table formed by applying successive transformations. */ | |
196 | unsigned char trans_table[256]; | |
197 | ||
198 | unsigned char ascii_to_ebcdic[] = | |
199 | { | |
200 | 0, 01, 02, 03, 067, 055, 056, 057, | |
201 | 026, 05, 045, 013, 014, 015, 016, 017, | |
202 | 020, 021, 022, 023, 074, 075, 062, 046, | |
203 | 030, 031, 077, 047, 034, 035, 036, 037, | |
204 | 0100, 0117, 0177, 0173, 0133, 0154, 0120, 0175, | |
205 | 0115, 0135, 0134, 0116, 0153, 0140, 0113, 0141, | |
206 | 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, | |
207 | 0370, 0371, 0172, 0136, 0114, 0176, 0156, 0157, | |
208 | 0174, 0301, 0302, 0303, 0304, 0305, 0306, 0307, | |
209 | 0310, 0311, 0321, 0322, 0323, 0324, 0325, 0326, | |
210 | 0327, 0330, 0331, 0342, 0343, 0344, 0345, 0346, | |
211 | 0347, 0350, 0351, 0112, 0340, 0132, 0137, 0155, | |
212 | 0171, 0201, 0202, 0203, 0204, 0205, 0206, 0207, | |
213 | 0210, 0211, 0221, 0222, 0223, 0224, 0225, 0226, | |
214 | 0227, 0230, 0231, 0242, 0243, 0244, 0245, 0246, | |
215 | 0247, 0250, 0251, 0300, 0152, 0320, 0241, 07, | |
216 | 040, 041, 042, 043, 044, 025, 06, 027, | |
217 | 050, 051, 052, 053, 054, 011, 012, 033, | |
218 | 060, 061, 032, 063, 064, 065, 066, 010, | |
219 | 070, 071, 072, 073, 04, 024, 076, 0341, | |
220 | 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, | |
221 | 0111, 0121, 0122, 0123, 0124, 0125, 0126, 0127, | |
222 | 0130, 0131, 0142, 0143, 0144, 0145, 0146, 0147, | |
223 | 0150, 0151, 0160, 0161, 0162, 0163, 0164, 0165, | |
224 | 0166, 0167, 0170, 0200, 0212, 0213, 0214, 0215, | |
225 | 0216, 0217, 0220, 0232, 0233, 0234, 0235, 0236, | |
226 | 0237, 0240, 0252, 0253, 0254, 0255, 0256, 0257, | |
227 | 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, | |
228 | 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, | |
229 | 0312, 0313, 0314, 0315, 0316, 0317, 0332, 0333, | |
230 | 0334, 0335, 0336, 0337, 0352, 0353, 0354, 0355, | |
231 | 0356, 0357, 0372, 0373, 0374, 0375, 0376, 0377 | |
232 | }; | |
233 | ||
234 | unsigned char ascii_to_ibm[] = | |
235 | { | |
236 | 0, 01, 02, 03, 067, 055, 056, 057, | |
237 | 026, 05, 045, 013, 014, 015, 016, 017, | |
238 | 020, 021, 022, 023, 074, 075, 062, 046, | |
239 | 030, 031, 077, 047, 034, 035, 036, 037, | |
240 | 0100, 0132, 0177, 0173, 0133, 0154, 0120, 0175, | |
241 | 0115, 0135, 0134, 0116, 0153, 0140, 0113, 0141, | |
242 | 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, | |
243 | 0370, 0371, 0172, 0136, 0114, 0176, 0156, 0157, | |
244 | 0174, 0301, 0302, 0303, 0304, 0305, 0306, 0307, | |
245 | 0310, 0311, 0321, 0322, 0323, 0324, 0325, 0326, | |
246 | 0327, 0330, 0331, 0342, 0343, 0344, 0345, 0346, | |
247 | 0347, 0350, 0351, 0255, 0340, 0275, 0137, 0155, | |
248 | 0171, 0201, 0202, 0203, 0204, 0205, 0206, 0207, | |
249 | 0210, 0211, 0221, 0222, 0223, 0224, 0225, 0226, | |
250 | 0227, 0230, 0231, 0242, 0243, 0244, 0245, 0246, | |
251 | 0247, 0250, 0251, 0300, 0117, 0320, 0241, 07, | |
252 | 040, 041, 042, 043, 044, 025, 06, 027, | |
253 | 050, 051, 052, 053, 054, 011, 012, 033, | |
254 | 060, 061, 032, 063, 064, 065, 066, 010, | |
255 | 070, 071, 072, 073, 04, 024, 076, 0341, | |
256 | 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, | |
257 | 0111, 0121, 0122, 0123, 0124, 0125, 0126, 0127, | |
258 | 0130, 0131, 0142, 0143, 0144, 0145, 0146, 0147, | |
259 | 0150, 0151, 0160, 0161, 0162, 0163, 0164, 0165, | |
260 | 0166, 0167, 0170, 0200, 0212, 0213, 0214, 0215, | |
261 | 0216, 0217, 0220, 0232, 0233, 0234, 0235, 0236, | |
262 | 0237, 0240, 0252, 0253, 0254, 0255, 0256, 0257, | |
263 | 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, | |
264 | 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, | |
265 | 0312, 0313, 0314, 0315, 0316, 0317, 0332, 0333, | |
266 | 0334, 0335, 0336, 0337, 0352, 0353, 0354, 0355, | |
267 | 0356, 0357, 0372, 0373, 0374, 0375, 0376, 0377 | |
268 | }; | |
269 | ||
270 | unsigned char ebcdic_to_ascii[] = | |
271 | { | |
272 | 0, 01, 02, 03, 0234, 011, 0206, 0177, | |
273 | 0227, 0215, 0216, 013, 014, 015, 016, 017, | |
274 | 020, 021, 022, 023, 0235, 0205, 010, 0207, | |
275 | 030, 031, 0222, 0217, 034, 035, 036, 037, | |
276 | 0200, 0201, 0202, 0203, 0204, 012, 027, 033, | |
277 | 0210, 0211, 0212, 0213, 0214, 05, 06, 07, | |
278 | 0220, 0221, 026, 0223, 0224, 0225, 0226, 04, | |
279 | 0230, 0231, 0232, 0233, 024, 025, 0236, 032, | |
280 | 040, 0240, 0241, 0242, 0243, 0244, 0245, 0246, | |
281 | 0247, 0250, 0133, 056, 074, 050, 053, 041, | |
282 | 046, 0251, 0252, 0253, 0254, 0255, 0256, 0257, | |
283 | 0260, 0261, 0135, 044, 052, 051, 073, 0136, | |
284 | 055, 057, 0262, 0263, 0264, 0265, 0266, 0267, | |
285 | 0270, 0271, 0174, 054, 045, 0137, 076, 077, | |
286 | 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301, | |
287 | 0302, 0140, 072, 043, 0100, 047, 075, 042, | |
288 | 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147, | |
289 | 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311, | |
290 | 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160, | |
291 | 0161, 0162, 0313, 0314, 0315, 0316, 0317, 0320, | |
292 | 0321, 0176, 0163, 0164, 0165, 0166, 0167, 0170, | |
293 | 0171, 0172, 0322, 0323, 0324, 0325, 0326, 0327, | |
294 | 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, | |
295 | 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, | |
296 | 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107, | |
297 | 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355, | |
298 | 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120, | |
299 | 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363, | |
300 | 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130, | |
301 | 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371, | |
302 | 060, 061, 062, 063, 064, 065, 066, 067, | |
303 | 070, 071, 0372, 0373, 0374, 0375, 0376, 0377 | |
304 | }; | |
305 | ||
306 | void | |
307 | main (argc, argv) | |
308 | int argc; | |
309 | char **argv; | |
310 | { | |
311 | struct sigaction sigact; | |
312 | int i; | |
313 | ||
314 | program_name = argv[0]; | |
315 | ||
316 | /* Initialize translation table to identity translation. */ | |
317 | for (i = 0; i < 256; i++) | |
318 | trans_table[i] = i; | |
319 | ||
320 | /* Decode arguments. */ | |
321 | scanargs (argc, argv); | |
322 | apply_translations (); | |
323 | ||
324 | if (input_file != NULL) | |
325 | { | |
326 | input_fd = open (input_file, O_RDONLY); | |
327 | if (input_fd < 0) | |
328 | error (1, errno, "%s", input_file); | |
329 | } | |
330 | else | |
331 | input_file = "standard input"; | |
332 | ||
333 | if (input_fd == output_fd) | |
334 | error (1, 0, "standard %s is closed", input_fd == 0 ? "input" : "output"); | |
335 | ||
336 | if (output_file != NULL) | |
337 | { | |
338 | int omode = O_RDWR | O_CREAT; | |
339 | ||
340 | if (seek_record == 0 && !(conversions_mask & C_NOTRUNC)) | |
341 | omode |= O_TRUNC; | |
342 | output_fd = open (output_file, omode, 0666); | |
343 | if (output_fd < 0) | |
344 | error (1, errno, "%s", output_file); | |
345 | #ifndef FTRUNCATE_MISSING | |
346 | if (seek_record > 0 && !(conversions_mask & C_NOTRUNC)) | |
347 | { | |
348 | if (ftruncate (output_fd, seek_record * output_blocksize) < 0) | |
349 | error (0, errno, "%s", output_file); | |
350 | } | |
351 | #endif | |
352 | } | |
353 | else | |
354 | output_file = "standard output"; | |
355 | ||
356 | sigaction (SIGINT, NULL, &sigact); | |
357 | if (sigact.sa_handler != SIG_IGN) | |
358 | { | |
359 | sigact.sa_handler = interrupt_handler; | |
360 | sigemptyset (&sigact.sa_mask); | |
361 | sigact.sa_flags = 0; | |
362 | sigaction (SIGINT, &sigact, NULL); | |
363 | } | |
364 | copy (); | |
365 | } | |
366 | ||
367 | /* Throw away RECORDS blocks of BLOCKSIZE bytes on file descriptor FDESC, | |
368 | which is open with read permission for FILE. Store up to BLOCKSIZE | |
369 | bytes of the data at a time in BUF, if necessary. */ | |
370 | ||
371 | void | |
372 | skip (fdesc, file, records, blocksize, buf) | |
373 | int fdesc; | |
374 | char *file; | |
375 | long records; | |
376 | long blocksize; | |
377 | char *buf; | |
378 | { | |
379 | struct stat stats; | |
380 | ||
381 | /* Use fstat instead of checking for errno == ESPIPE because | |
382 | lseek doesn't work on some special files but doesn't return an | |
383 | error, either. */ | |
384 | if (fstat (fdesc, &stats)) | |
385 | { | |
386 | error (0, errno, "%s", file); | |
387 | quit (1); | |
388 | } | |
389 | ||
390 | if (S_ISREG (stats.st_mode)) | |
391 | { | |
392 | if (lseek (fdesc, records * blocksize, SEEK_SET) < 0) | |
393 | { | |
394 | error (0, errno, "%s", file); | |
395 | quit (1); | |
396 | } | |
397 | } | |
398 | else | |
399 | { | |
400 | while (records-- > 0) | |
401 | { | |
402 | if (read (fdesc, buf, blocksize) < 0) | |
403 | { | |
404 | error (0, errno, "%s", file); | |
405 | quit (1); | |
406 | } | |
407 | /* FIXME If fewer bytes were read than requested, meaning that | |
408 | EOF was reached, POSIX wants the output file padded with NULs. */ | |
409 | } | |
410 | } | |
411 | } | |
412 | ||
413 | /* Apply the character-set translations specified by the user | |
414 | to the NREAD bytes in BUF. */ | |
415 | ||
416 | void | |
417 | translate_buffer (buf, nread) | |
418 | unsigned char *buf; | |
419 | int nread; | |
420 | { | |
421 | register unsigned char *cp; | |
422 | register int i; | |
423 | ||
424 | for (i = nread, cp = buf; i; i--, cp++) | |
425 | *cp = trans_table[*cp]; | |
426 | } | |
427 | ||
428 | /* If nonnzero, the last char from the previous call to `swab_buffer' | |
429 | is saved in `saved_char'. */ | |
430 | int char_is_saved = 0; | |
431 | ||
432 | /* Odd char from previous call. */ | |
433 | unsigned char saved_char; | |
434 | ||
435 | /* Swap NREAD bytes in BUF, plus possibly an initial char from the | |
436 | previous call. If NREAD is odd, save the last char for the | |
437 | next call. Return the new start of the BUF buffer. */ | |
438 | ||
439 | unsigned char * | |
440 | swab_buffer (buf, nread) | |
441 | unsigned char *buf; | |
442 | int *nread; | |
443 | { | |
444 | unsigned char *bufstart = buf; | |
445 | register unsigned char *cp; | |
446 | register int i; | |
447 | ||
448 | /* Is a char left from last time? */ | |
449 | if (char_is_saved) | |
450 | { | |
451 | *--bufstart = saved_char; | |
452 | *nread++; | |
453 | char_is_saved = 0; | |
454 | } | |
455 | ||
456 | if (*nread & 1) | |
457 | { | |
458 | /* An odd number of chars are in the buffer. */ | |
459 | saved_char = bufstart[--*nread]; | |
460 | char_is_saved = 1; | |
461 | } | |
462 | ||
463 | /* Do the byte-swapping by moving every second character two | |
464 | positions toward the end, working from the end of the buffer | |
465 | toward the beginning. This way we only move half of the data. */ | |
466 | ||
467 | cp = bufstart + *nread; /* Start one char past the last. */ | |
468 | for (i = *nread / 2; i; i--, cp -= 2) | |
469 | *cp = *(cp - 2); | |
470 | ||
471 | return ++bufstart; | |
472 | } | |
473 | ||
474 | /* Output buffer. */ | |
475 | unsigned char *obuf; | |
476 | ||
477 | /* Current index into `obuf'. */ | |
478 | int oc = 0; | |
479 | ||
480 | /* Index into current line, for `conv=block' and `conv=unblock'. */ | |
481 | int col = 0; | |
482 | ||
483 | /* The main loop. */ | |
484 | ||
485 | void | |
486 | copy () | |
487 | { | |
488 | unsigned char *ibuf, *bufstart; /* Input buffer. */ | |
489 | int nread; /* Bytes read in the current block. */ | |
490 | int exit_status = 0; | |
491 | ||
492 | /* Leave an extra byte at the beginning and end of `ibuf' for conv=swab. */ | |
493 | ibuf = (unsigned char *) malloc (input_blocksize + 2) + 1; | |
494 | if (conversions_mask & C_TWOBUFS) | |
495 | obuf = (unsigned char *) malloc (output_blocksize); | |
496 | else | |
497 | obuf = ibuf; | |
498 | ||
499 | if (skip_records > 0) | |
500 | skip (input_fd, input_file, skip_records, input_blocksize, ibuf); | |
501 | ||
502 | if (seek_record > 0) | |
503 | skip (output_fd, output_file, seek_record, output_blocksize, obuf); | |
504 | ||
505 | if (max_records == 0) | |
506 | quit (exit_status); | |
507 | ||
508 | while (1) | |
509 | { | |
510 | if (max_records >= 0 && r_partial + r_full >= max_records) | |
511 | break; | |
512 | ||
513 | /* Zero the buffer before reading, so that if we get a read error, | |
514 | whatever data we are able to read is followed by zeros. | |
515 | This minimizes data loss. */ | |
516 | if ((conversions_mask & C_SYNC) && (conversions_mask & C_NOERROR)) | |
517 | bzero (ibuf, input_blocksize); | |
518 | ||
519 | nread = read (input_fd, ibuf, input_blocksize); | |
520 | ||
521 | if (nread == 0) | |
522 | break; /* EOF. */ | |
523 | ||
524 | if (nread < 0) | |
525 | { | |
526 | error (0, errno, "%s", input_file); | |
527 | if (conversions_mask & C_NOERROR) | |
528 | { | |
529 | print_stats (); | |
530 | /* Seek past the bad block if possible. */ | |
531 | lseek (input_fd, input_blocksize, SEEK_CUR); | |
532 | if (conversions_mask & C_SYNC) | |
533 | /* Replace the missing input with null bytes and | |
534 | proceed normally. */ | |
535 | nread = 0; | |
536 | else | |
537 | continue; | |
538 | } | |
539 | else | |
540 | { | |
541 | /* Write any partial block. */ | |
542 | exit_status = 2; | |
543 | break; | |
544 | } | |
545 | } | |
546 | ||
547 | if (nread < input_blocksize) | |
548 | { | |
549 | r_partial++; | |
550 | if (conversions_mask & C_SYNC) | |
551 | { | |
552 | if (!(conversions_mask & C_NOERROR)) | |
553 | /* If C_NOERROR, we zeroed the block before reading. */ | |
554 | bzero (ibuf + nread, input_blocksize - nread); | |
555 | nread = input_blocksize; | |
556 | } | |
557 | } | |
558 | else | |
559 | r_full++; | |
560 | ||
561 | if (ibuf == obuf) /* If not C_TWOBUFS. */ | |
562 | { | |
563 | int nwritten = write (output_fd, obuf, nread); | |
564 | if (nwritten != nread) | |
565 | { | |
566 | error (0, errno, "%s", output_file); | |
567 | if (nwritten > 0) | |
568 | w_partial++; | |
569 | quit (1); | |
570 | } | |
571 | else if (nread == input_blocksize) | |
572 | w_full++; | |
573 | else | |
574 | w_partial++; | |
575 | continue; | |
576 | } | |
577 | ||
578 | /* Do any translations on the whole buffer at once. */ | |
579 | ||
580 | if (translation_needed) | |
581 | translate_buffer (ibuf, nread); | |
582 | ||
583 | if (conversions_mask & C_SWAB) | |
584 | bufstart = swab_buffer (ibuf, &nread); | |
585 | else | |
586 | bufstart = ibuf; | |
587 | ||
588 | if (conversions_mask & C_BLOCK) | |
589 | copy_with_block (bufstart, nread); | |
590 | else if (conversions_mask & C_UNBLOCK) | |
591 | copy_with_unblock (bufstart, nread); | |
592 | else | |
593 | copy_simple (bufstart, nread); | |
594 | } | |
595 | ||
596 | /* If we have a char left as a result of conv=swab, output it. */ | |
597 | if (char_is_saved) | |
598 | { | |
599 | if (conversions_mask & C_BLOCK) | |
600 | copy_with_block (&saved_char, 1); | |
601 | else if (conversions_mask & C_UNBLOCK) | |
602 | copy_with_unblock (&saved_char, 1); | |
603 | else | |
604 | output_char (saved_char); | |
605 | } | |
606 | ||
607 | if ((conversions_mask & C_BLOCK) && col > 0) | |
608 | { | |
609 | /* If the final input line didn't end with a '\n', pad | |
610 | the output block to `conversion_blocksize' chars. */ | |
611 | int pending_spaces = max (0, conversion_blocksize - col); | |
612 | while (pending_spaces--) | |
613 | output_char (space_character); | |
614 | } | |
615 | ||
616 | if ((conversions_mask & C_UNBLOCK) && col == conversion_blocksize) | |
617 | /* Add a final '\n' if there are exactly `conversion_blocksize' | |
618 | characters in the final record. */ | |
619 | output_char (newline_character); | |
620 | ||
621 | /* Write out the last block. */ | |
622 | if (oc > 0) | |
623 | { | |
624 | int nwritten = write (output_fd, obuf, oc); | |
625 | if (nwritten > 0) | |
626 | w_partial++; | |
627 | if (nwritten != oc) | |
628 | { | |
629 | error (0, errno, "%s", output_file); | |
630 | quit (1); | |
631 | } | |
632 | } | |
633 | ||
634 | free (ibuf - 1); | |
635 | if (obuf != ibuf) | |
636 | free (obuf); | |
637 | ||
638 | quit (exit_status); | |
639 | } | |
640 | ||
641 | /* Copy NREAD bytes of BUF, with no conversions. */ | |
642 | ||
643 | void | |
644 | copy_simple (buf, nread) | |
645 | unsigned char *buf; | |
646 | int nread; | |
647 | { | |
648 | int nfree; /* Number of unused bytes in `obuf'. */ | |
649 | unsigned char *start = buf; /* First uncopied char in BUF. */ | |
650 | ||
651 | do | |
652 | { | |
653 | nfree = output_blocksize - oc; | |
654 | if (nfree > nread) | |
655 | nfree = nread; | |
656 | ||
657 | bcopy (start, obuf + oc, nfree); | |
658 | ||
659 | nread -= nfree; /* Update the number of bytes left to copy. */ | |
660 | start += nfree; | |
661 | oc += nfree; | |
662 | if (oc >= output_blocksize) | |
663 | write_output (); | |
664 | } | |
665 | while (nread > 0); | |
666 | } | |
667 | ||
668 | /* Copy NREAD bytes of BUF, doing conv=block | |
669 | (pad newline-terminated records to `conversion_blocksize', | |
670 | replacing the newline with trailing spaces). */ | |
671 | ||
672 | void | |
673 | copy_with_block (buf, nread) | |
674 | unsigned char *buf; | |
675 | int nread; | |
676 | { | |
677 | register int i; | |
678 | ||
679 | for (i = nread; i; i--, buf++) | |
680 | { | |
681 | if (*buf == newline_character) | |
682 | { | |
683 | int pending_spaces = max (0, conversion_blocksize - col); | |
684 | while (pending_spaces--) | |
685 | output_char (space_character); | |
686 | col = 0; | |
687 | } | |
688 | else | |
689 | { | |
690 | if (col == conversion_blocksize) | |
691 | r_truncate++; | |
692 | else if (col < conversion_blocksize) | |
693 | output_char (*buf); | |
694 | col++; | |
695 | } | |
696 | } | |
697 | } | |
698 | ||
699 | /* Copy NREAD bytes of BUF, doing conv=unblock | |
700 | (replace trailing spaces in `conversion_blocksize'-sized records | |
701 | with a newline). */ | |
702 | ||
703 | void | |
704 | copy_with_unblock (buf, nread) | |
705 | unsigned char *buf; | |
706 | int nread; | |
707 | { | |
708 | register int i; | |
709 | register unsigned char c; | |
710 | static int pending_spaces = 0; | |
711 | ||
712 | for (i = 0; i < nread; i++) | |
713 | { | |
714 | c = buf[i]; | |
715 | ||
716 | if (col++ >= conversion_blocksize) | |
717 | { | |
718 | col = pending_spaces = 0; /* Wipe out any pending spaces. */ | |
719 | i--; /* Push the char back; get it later. */ | |
720 | output_char (newline_character); | |
721 | } | |
722 | else if (c == space_character) | |
723 | pending_spaces++; | |
724 | else | |
725 | { | |
726 | if (pending_spaces) | |
727 | { | |
728 | /* `c' is the character after a run of spaces that were not | |
729 | at the end of the conversion buffer. Output them. */ | |
730 | while (pending_spaces--) | |
731 | output_char (space_character); | |
732 | } | |
733 | output_char (c); | |
734 | } | |
735 | } | |
736 | } | |
737 | ||
738 | /* Write, then empty, the output buffer `obuf'. */ | |
739 | ||
740 | void | |
741 | write_output () | |
742 | { | |
743 | int nwritten = write (output_fd, obuf, output_blocksize); | |
744 | if (nwritten != output_blocksize) | |
745 | { | |
746 | error (0, errno, "%s", output_file); | |
747 | if (nwritten > 0) | |
748 | w_partial++; | |
749 | quit (1); | |
750 | } | |
751 | else | |
752 | w_full++; | |
753 | oc = 0; | |
754 | } | |
755 | ||
756 | void | |
757 | scanargs (argc, argv) | |
758 | int argc; | |
759 | char **argv; | |
760 | { | |
761 | int i, n; | |
762 | ||
763 | for (i = 1; i < argc; i++) | |
764 | { | |
765 | char *name, *val; | |
766 | ||
767 | name = argv[i]; | |
768 | val = index (name, '='); | |
769 | if (val == NULL) | |
770 | usage ("unrecognized option `%s'", name); | |
771 | *val++ = '\0'; | |
772 | ||
773 | if (equal (name, "if")) | |
774 | input_file = val; | |
775 | else if (equal (name, "of")) | |
776 | output_file = val; | |
777 | else if (equal (name, "conv")) | |
778 | parse_conversion (val); | |
779 | else | |
780 | { | |
781 | n = parse_integer (val); | |
782 | if (n < 0) | |
783 | error (1, 0, "invalid number `%s'", val); | |
784 | ||
785 | if (equal (name, "ibs")) | |
786 | { | |
787 | input_blocksize = n; | |
788 | conversions_mask |= C_TWOBUFS; | |
789 | } | |
790 | else if (equal (name, "obs")) | |
791 | { | |
792 | output_blocksize = n; | |
793 | conversions_mask |= C_TWOBUFS; | |
794 | } | |
795 | else if (equal (name, "bs")) | |
796 | output_blocksize = input_blocksize = n; | |
797 | else if (equal (name, "cbs")) | |
798 | conversion_blocksize = n; | |
799 | else if (equal (name, "skip")) | |
800 | skip_records = n; | |
801 | else if (equal (name, "seek")) | |
802 | seek_record = n; | |
803 | else if (equal (name, "count")) | |
804 | max_records = n; | |
805 | else | |
806 | usage ("unrecognized option `%s=%s'", name, val); | |
807 | } | |
808 | } | |
809 | ||
810 | /* If bs= was given, both `input_blocksize' and `output_blocksize' will | |
811 | have been set to non-negative values. If either has not been set, | |
812 | bs= was not given, so make sure two buffers are used. */ | |
813 | if (input_blocksize == -1 || output_blocksize == -1) | |
814 | conversions_mask |= C_TWOBUFS; | |
815 | if (input_blocksize == -1) | |
816 | input_blocksize = DEFAULT_BLOCKSIZE; | |
817 | if (output_blocksize == -1) | |
818 | output_blocksize = DEFAULT_BLOCKSIZE; | |
819 | if (conversion_blocksize == 0) | |
820 | conversions_mask &= ~(C_BLOCK | C_UNBLOCK); | |
821 | } | |
822 | ||
823 | /* Return the value of STR, interpreted as a non-negative decimal integer, | |
824 | optionally multiplied by various values. | |
825 | Return -1 if STR does not represent a number in this format. */ | |
826 | ||
827 | int | |
828 | parse_integer (str) | |
829 | char *str; | |
830 | { | |
831 | register int n = 0; | |
832 | register int temp; | |
833 | register char *p = str; | |
834 | ||
835 | while (isdigit (*p)) | |
836 | { | |
837 | n = n * 10 + *p - '0'; | |
838 | p++; | |
839 | } | |
840 | loop: | |
841 | switch (*p++) | |
842 | { | |
843 | case '\0': | |
844 | return n; | |
845 | case 'b': | |
846 | n *= 512; | |
847 | goto loop; | |
848 | case 'k': | |
849 | n *= 1024; | |
850 | goto loop; | |
851 | case 'w': | |
852 | n *= 2; | |
853 | goto loop; | |
854 | case 'x': | |
855 | temp = parse_integer (p); | |
856 | if (temp == -1) | |
857 | return -1; | |
858 | n *= temp; | |
859 | break; | |
860 | default: | |
861 | return -1; | |
862 | } | |
863 | return n; | |
864 | } | |
865 | ||
866 | /* Interpret one "conv=..." option. */ | |
867 | ||
868 | void | |
869 | parse_conversion (str) | |
870 | char *str; | |
871 | { | |
872 | char *new; | |
873 | int i; | |
874 | ||
875 | do | |
876 | { | |
877 | new = index (str, ','); | |
878 | if (new != NULL) | |
879 | *new++ = '\0'; | |
880 | for (i = 0; conversions[i].convname != NULL; i++) | |
881 | if (equal (conversions[i].convname, str)) | |
882 | { | |
883 | conversions_mask |= conversions[i].conversion; | |
884 | break; | |
885 | } | |
886 | if (conversions[i].convname == NULL) | |
887 | { | |
888 | usage ("%s: invalid conversion", str); | |
889 | exit (1); | |
890 | } | |
891 | str = new; | |
892 | } while (new != NULL); | |
893 | } | |
894 | ||
895 | /* Fix up translation table. */ | |
896 | ||
897 | void | |
898 | apply_translations () | |
899 | { | |
900 | int i; | |
901 | ||
902 | #define MX(a) (bit_count (conversions_mask & (a))) | |
903 | if ((MX (C_ASCII | C_EBCDIC | C_IBM) > 1) | |
904 | || (MX (C_BLOCK | C_UNBLOCK) > 1) | |
905 | || (MX (C_LCASE | C_UCASE) > 1) | |
906 | || (MX (C_UNBLOCK | C_SYNC) > 1)) | |
907 | { | |
908 | error (1, 0, "\ | |
909 | only one conv in {ascii,ebcdic,ibm}, {lcase,ucase}, {block,unblock}, {unblock,sync}"); | |
910 | } | |
911 | #undef MX | |
912 | ||
913 | if (conversions_mask & C_ASCII) | |
914 | translate_charset (ebcdic_to_ascii); | |
915 | ||
916 | if (conversions_mask & C_UCASE) | |
917 | { | |
918 | for (i = 0; i < 256; i++) | |
919 | if (ISLOWER (trans_table[i])) | |
920 | trans_table[i] = toupper (trans_table[i]); | |
921 | translation_needed = 1; | |
922 | } | |
923 | else if (conversions_mask & C_LCASE) | |
924 | { | |
925 | for (i = 0; i < 256; i++) | |
926 | if (ISUPPER (trans_table[i])) | |
927 | trans_table[i] = tolower (trans_table[i]); | |
928 | translation_needed = 1; | |
929 | } | |
930 | ||
931 | if (conversions_mask & C_EBCDIC) | |
932 | { | |
933 | translate_charset (ascii_to_ebcdic); | |
934 | newline_character = ascii_to_ebcdic['\n']; | |
935 | space_character = ascii_to_ebcdic[' ']; | |
936 | } | |
937 | else if (conversions_mask & C_IBM) | |
938 | { | |
939 | translate_charset (ascii_to_ibm); | |
940 | newline_character = ascii_to_ibm['\n']; | |
941 | space_character = ascii_to_ibm[' ']; | |
942 | } | |
943 | } | |
944 | ||
945 | void | |
946 | translate_charset (new_trans) | |
947 | unsigned char *new_trans; | |
948 | { | |
949 | int i; | |
950 | ||
951 | for (i = 0; i < 256; i++) | |
952 | trans_table[i] = new_trans[trans_table[i]]; | |
953 | translation_needed = 1; | |
954 | } | |
955 | ||
956 | /* Return the number of 1 bits in `i'. */ | |
957 | ||
958 | int | |
959 | bit_count (i) | |
960 | register unsigned int i; | |
961 | { | |
962 | register int set_bits; | |
963 | ||
964 | for (set_bits = 0; i != 0; set_bits++) | |
965 | i &= i - 1; | |
966 | return set_bits; | |
967 | } | |
968 | ||
969 | void | |
970 | print_stats () | |
971 | { | |
972 | fprintf (stderr, "%u+%u records in\n", r_full, r_partial); | |
973 | fprintf (stderr, "%u+%u records out\n", w_full, w_partial); | |
974 | if (r_truncate > 0) | |
975 | fprintf (stderr, "%u truncated block%s\n", r_truncate, | |
976 | r_truncate == 1 ? "" : "s"); | |
977 | } | |
978 | ||
979 | void | |
980 | quit (code) | |
981 | int code; | |
982 | { | |
983 | int errcode = code ? code : 1; | |
984 | print_stats (); | |
985 | if (close (input_fd) < 0) | |
986 | error (errcode, errno, "%s", input_file); | |
987 | if (close (output_fd) < 0) | |
988 | error (errcode, errno, "%s", output_file); | |
989 | exit (code); | |
990 | } | |
991 | ||
992 | SIGTYPE | |
993 | interrupt_handler () | |
994 | { | |
995 | quit (1); | |
996 | } | |
997 | ||
998 | void | |
999 | usage (string, arg0, arg1) | |
1000 | char *string, *arg0, *arg1; | |
1001 | { | |
1002 | fprintf (stderr, "%s: ", program_name); | |
1003 | fprintf (stderr, string, arg0, arg1); | |
1004 | fprintf (stderr, "\n"); | |
1005 | fprintf (stderr, "\ | |
1006 | Usage: %s [if=file] [of=file] [ibs=bytes] [obs=bytes] [bs=bytes] [cbs=bytes]\n\ | |
1007 | [skip=blocks] [seek=blocks] [count=blocks]\n\ | |
1008 | [conv={ascii,ebcdic,ibm,block,unblock,lcase,ucase,swab,noerror,notrunc,\n\ | |
1009 | sync}]\n\ | |
1010 | Numbers can be followed by a multiplier:\n\ | |
1011 | b=512, k=1024, w=2, xm=number m\n", | |
1012 | program_name); | |
1013 | exit (1); | |
1014 | } | |
1015 | ||
1016 | error(n,e, s,s1) { | |
1017 | if(e) fprintf(stderr,"error %d:", e); | |
1018 | fprintf(stderr,s, s1); | |
1019 | if(n) exit(n); | |
1020 | } |