1) Fix -E (no escape char) mode, has error with 0xFF char
[unix-history] / usr.bin / cut / cut.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE
37 * -------------------- ----- ----------------------
38 * CURRENT PATCH LEVEL: 1 00142
39 * -------------------- ----- ----------------------
40 *
41 * 20 Apr 93 Simon J Gerraty cut -f1 outputs a field separator
42 * before the first field.
43 */
44
45#ifndef lint
46char copyright[] =
47"@(#) Copyright (c) 1989 The Regents of the University of California.\n\
48 All rights reserved.\n";
49#endif /* not lint */
50
51#ifndef lint
52static char sccsid[] = "@(#)cut.c 5.4 (Berkeley) 10/30/90";
53#endif /* not lint */
54
55#include <limits.h>
56#include <stdio.h>
57#include <ctype.h>
58
59int cflag;
60char dchar;
61int dflag;
62int fflag;
63int sflag;
64
65main(argc, argv)
66 int argc;
67 char **argv;
68{
69 extern char *optarg;
70 extern int errno, optind;
71 FILE *fp;
72 int ch, (*fcn)(), c_cut(), f_cut();
73 char *strerror();
74
75 dchar = '\t'; /* default delimiter is \t */
76
77 while ((ch = getopt(argc, argv, "c:d:f:s")) != EOF)
78 switch(ch) {
79 case 'c':
80 fcn = c_cut;
81 get_list(optarg);
82 cflag = 1;
83 break;
84 case 'd':
85 dchar = *optarg;
86 dflag = 1;
87 break;
88 case 'f':
89 get_list(optarg);
90 fcn = f_cut;
91 fflag = 1;
92 break;
93 case 's':
94 sflag = 1;
95 break;
96 case '?':
97 default:
98 usage();
99 }
100 argc -= optind;
101 argv += optind;
102
103 if (fflag) {
104 if (cflag)
105 usage();
106 } else if (!cflag || dflag || sflag)
107 usage();
108
109 if (*argv)
110 for (; *argv; ++argv) {
111 if (!(fp = fopen(*argv, "r"))) {
112 (void)fprintf(stderr,
113 "cut: %s: %s\n", *argv, strerror(errno));
114 exit(1);
115 }
116 fcn(fp, *argv);
117 }
118 else
119 fcn(stdin, "stdin");
120 exit(0);
121}
122
123int autostart, autostop, maxval;
124
125char positions[_POSIX2_LINE_MAX + 1];
126
127get_list(list)
128 char *list;
129{
130 register char *pos;
131 register int setautostart, start, stop;
132 char *p, *strtok();
133
134 /*
135 * set a byte in the positions array to indicate if a field or
136 * column is to be selected; use +1, it's 1-based, not 0-based.
137 * This parser is less restrictive than the Draft 9 POSIX spec.
138 * POSIX doesn't allow lists that aren't in increasing order or
139 * overlapping lists. We also handle "-3-5" although there's no
140 * real reason too.
141 */
142 for (; p = strtok(list, ", \t"); list = NULL) {
143 setautostart = start = stop = 0;
144 if (*p == '-') {
145 ++p;
146 setautostart = 1;
147 }
148 if (isdigit(*p)) {
149 start = stop = strtol(p, &p, 10);
150 if (setautostart && start > autostart)
151 autostart = start;
152 }
153 if (*p == '-') {
154 if (isdigit(p[1]))
155 stop = strtol(p + 1, &p, 10);
156 if (*p == '-') {
157 ++p;
158 if (!autostop || autostop > stop)
159 autostop = stop;
160 }
161 }
162 if (*p)
163 badlist("illegal list value");
164 if (!stop || !start)
165 badlist("values may not include zero");
166 if (stop > _POSIX2_LINE_MAX) {
167 /* positions used rather than allocate a new buffer */
168 (void)sprintf(positions, "%d too large (max %d)",
169 stop, _POSIX2_LINE_MAX);
170 badlist(positions);
171 }
172 if (maxval < stop)
173 maxval = stop;
174 for (pos = positions + start; start++ <= stop; *pos++ = 1);
175 }
176
177 /* overlapping ranges */
178 if (autostop && maxval > autostop)
179 maxval = autostop;
180
181 /* set autostart */
182 if (autostart)
183 memset(positions + 1, '1', autostart);
184}
185
186/* ARGSUSED */
187c_cut(fp, fname)
188 FILE *fp;
189 char *fname;
190{
191 register int ch, col;
192 register char *pos;
193
194 for (;;) {
195 pos = positions + 1;
196 for (col = maxval; col; --col) {
197 if ((ch = getc(fp)) == EOF)
198 return;
199 if (ch == '\n')
200 break;
201 if (*pos++)
202 putchar(ch);
203 }
204 if (ch != '\n')
205 if (autostop)
206 while ((ch = getc(fp)) != EOF && ch != '\n')
207 putchar(ch);
208 else
209 while ((ch = getc(fp)) != EOF && ch != '\n');
210 putchar('\n');
211 }
212}
213
214f_cut(fp, fname)
215 FILE *fp;
216 char *fname;
217{
218 register int ch, field, isdelim;
219 register char *pos, *p, sep;
220 int output;
221 char lbuf[_POSIX2_LINE_MAX + 1];
222
223 for (sep = dchar, output = 0; fgets(lbuf, sizeof(lbuf), fp); output = 0) {
224 for (isdelim = 0, p = lbuf;; ++p) {
225 if (!(ch = *p)) {
226 (void)fprintf(stderr,
227 "cut: %s: line too long.\n", fname);
228 exit(1);
229 }
230 /* this should work if newline is delimiter */
231 if (ch == sep)
232 isdelim = 1;
233 if (ch == '\n') {
234 if (!isdelim && !sflag)
235 (void)printf("%s", lbuf);
236 break;
237 }
238 }
239 if (!isdelim)
240 continue;
241
242 pos = positions + 1;
243 for (field = maxval, p = lbuf; field; --field, ++pos) {
244 if (*pos) {
245 if (output++)
246 putchar(sep);
247 while ((ch = *p++) != '\n' && ch != sep)
248 putchar(ch);
249 } else
250 while ((ch = *p++) != '\n' && ch != sep);
251 if (ch == '\n')
252 break;
253 }
254 if (ch != '\n')
255 if (autostop) {
256 if (output)
257 putchar(sep);
258 for (; (ch = *p) != '\n'; ++p)
259 putchar(ch);
260 } else
261 for (; (ch = *p) != '\n'; ++p);
262 putchar('\n');
263 }
264}
265
266badlist(msg)
267 char *msg;
268{
269 (void)fprintf(stderr, "cut: [-cf] list: %s.\n", msg);
270 exit(1);
271}
272
273usage()
274{
275 (void)fprintf(stderr,
276"usage:\tcut -c list [file1 ...]\n\tcut -f list [-s] [-d delim] [file ...]\n");
277 exit(1);
278}