wc has a bug, is too slow; bug report 4.3BSD/ucb/33
[unix-history] / usr / src / usr.bin / wc / wc.c
CommitLineData
f42904bc
DF
1/*
2 * Copyright (c) 1980 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 */
6
7#ifndef lint
8char copyright[] =
9"@(#) Copyright (c) 1980 Regents of the University of California.\n\
10 All rights reserved.\n";
11#endif not lint
12
13#ifndef lint
93b63814 14static char sccsid[] = "@(#)wc.c 5.2 (Berkeley) %G%";
f42904bc
DF
15#endif not lint
16
93b63814 17/* wc line, word and char count */
66690d0f 18
93b63814
KB
19#include <sys/param.h>
20#include <sys/stat.h>
21#include <sys/file.h>
66690d0f 22#include <stdio.h>
66690d0f 23
93b63814
KB
24#define DEL 0177 /* del char */
25#define ERR 1 /* error exit */
26#define NL 012 /* newline char */
27#define NO 0 /* no/false */
28#define OK 0 /* okay exit */
29#define SPACE 040 /* space char */
30#define TAB 011 /* tab char */
31#define YES 1 /* yes/true */
32
33static long tlinect, /* total line count */
34 twordct, /* total word count */
35 tcharct; /* total character count */
36static short doline, /* if want line count */
37 doword, /* if want word count */
38 dochar; /* if want character count */
39
40main(argc,argv)
41int argc;
42char **argv;
66690d0f 43{
93b63814
KB
44 extern char *optarg; /* getopt arguments */
45 extern int optind;
46 register int ch; /* getopt character */
66690d0f 47
93b63814
KB
48 /*
49 * wc is unusual in that its flags are on by default, so,
50 * if you don't get any arguments, you have to turn them
51 * all on.
52 */
53 if (argc > 1 && argv[1][0] == '-' && argv[1][1]) {
54 while ((ch = getopt(argc,argv,"lwc")) != EOF)
55 switch((char)ch) {
56 case 'l':
57 doline = YES;
58 break;
59 case 'w':
60 doword = YES;
61 break;
62 case 'c':
63 dochar = YES;
64 break;
65 case '?':
66 default:
67 fputs("Usage: wc [-lwc] [files]\n",stderr);
68 exit(ERR);
69 }
70 argv += optind;
71 argc -= optind;
72 }
73 else {
74 ++argv;
75 --argc;
76 doline = doword = dochar = YES;
77 }
78
79 /* should print "stdin" as the file name, here */
80 if (argc <= 1) {
81 if (!*argv || !strcmp(*argv,"-")) {
82 cnt((char *)NULL);
83 putchar('\n');
84 }
85 else {
86 cnt(*argv);
87 printf(" %s\n",*argv);
66690d0f 88 }
93b63814 89 exit(OK);
66690d0f
BJ
90 }
91
93b63814
KB
92 /*
93 * cat allows "-" as stdin anywhere in the arg list,
94 * might as well here, too. Again, should use "stdin"
95 * as the file name.
96 */
66690d0f 97 do {
93b63814
KB
98 if (!strcmp(*argv,"-")) {
99 cnt((char *)NULL);
100 putchar('\n');
66690d0f 101 }
93b63814
KB
102 else {
103 cnt(*argv);
104 printf(" %s\n",*argv);
105 }
106 } while(*++argv);
107
108 if (doline)
109 printf(" %7ld",tlinect);
110 if (doword)
111 printf(" %7ld",twordct);
112 if (dochar)
113 printf(" %7ld",tcharct);
114 puts(" total");
115 exit(OK);
116}
117
118static
119cnt(file)
120char *file;
121{
122 register u_char *C; /* traveling pointer */
123 register short gotsp; /* space toggle */
124 register int len; /* length of read */
125 register long linect, /* line count */
126 wordct, /* word count */
127 charct; /* character count */
128 struct stat sbuf; /* stat buffer */
129 int fd; /* file descriptor */
130 u_char buf[MAXBSIZE]; /* read buffer */
131
132 linect = wordct = charct = 0;
133 if (file) {
134 if ((fd = open(file,O_RDONLY)) < 0) {
135 perror(file);
136 exit(ERR);
137 }
138 if (!doword) {
139 /*
140 * line counting is split out because it's a lot
141 * faster to get lines than to get words, since
142 * the word count requires some logic.
143 */
144 if (doline) {
145 while(len = read(fd,buf,MAXBSIZE)) {
146 if (len == -1) {
147 perror(file);
148 exit(ERR);
149 }
150 charct += len;
151 for (C = buf;len--;++C)
152 if (*C == '\n')
153 ++linect;
154 }
155 tlinect += linect;
156 printf(" %7ld",linect);
157 if (dochar) {
158 tcharct += charct;
159 printf(" %7ld",sbuf.st_size);
66690d0f 160 }
93b63814
KB
161 close(fd);
162 return;
66690d0f 163 }
93b63814
KB
164 /*
165 * if all we need is the number of characters and
166 * it's a directory or a regular or linked file, just
167 * stat the puppy. We avoid testing for it not being
168 * a special device in case someone adds a new type
169 * of inode.
170 */
171 if (dochar) {
172 if (fstat(fd,&sbuf)) {
173 perror(file);
174 exit(ERR);
175 }
176 if (sbuf.st_mode & (S_IFREG | S_IFLNK | S_IFDIR)) {
177 printf(" %7ld",sbuf.st_size);
178 tcharct += sbuf.st_size;
179 close(fd);
180 return;
181 }
66690d0f 182 }
66690d0f 183 }
93b63814
KB
184 }
185 else
186 fd = 0;
187 /* do it the hard way... */
188 for (gotsp = YES;len = read(fd,buf,MAXBSIZE);) {
189 if (len == -1) {
190 perror(file);
191 exit(ERR);
192 }
193 charct += len;
194 for (C = buf;len--;++C)
195 switch(*C) {
196 case NL:
197 ++linect;
198 case TAB:
199 case SPACE:
200 gotsp = YES;
201 continue;
202 default:
203#ifdef NOT_DEFINED
204 /*
205 * This line of code implements the
206 * original V7 wc algorithm, i.e.
207 * a non-printing character doesn't
208 * toggle the "word" count, so that
209 * " ^D^F " counts as 6 spaces,
210 * while "foo^D^Fbar" counts as 8
211 * characters.
212 *
213 * test order is important -- gotsp
214 * will normally be NO, so test it
215 * first
216 */
217 if (gotsp && *C > SPACE && *C < DEL) {
218#endif NOT_DEFINED
219 /*
220 * This line implements the manual
221 * page, i.e. a word is a "maximal
222 * string of characters delimited by
223 * spaces, tabs or newlines." Notice
224 * nothing was said about a character
225 * being printing or non-printing.
226 */
227 if (gotsp) {
228 gotsp = NO;
229 ++wordct;
230 }
231 }
232 }
233 if (doline) {
66690d0f 234 tlinect += linect;
93b63814
KB
235 printf(" %7ld",linect);
236 }
237 if (doword) {
66690d0f 238 twordct += wordct;
93b63814 239 printf(" %7ld",wordct);
66690d0f 240 }
93b63814
KB
241 if (dochar) {
242 tcharct += charct;
243 printf(" %7ld",charct);
66690d0f 244 }
93b63814 245 close(fd);
66690d0f 246}