POSIX 1003.2 compatibility; "word" now defined by isspace(3)
authorKeith Bostic <bostic@ucbvax.Berkeley.EDU>
Thu, 7 Nov 1991 04:38:51 +0000 (20:38 -0800)
committerKeith Bostic <bostic@ucbvax.Berkeley.EDU>
Thu, 7 Nov 1991 04:38:51 +0000 (20:38 -0800)
SCCS-vsn: usr.bin/wc/wc.1 6.5
SCCS-vsn: usr.bin/wc/wc.c 5.8

usr/src/usr.bin/wc/wc.1
usr/src/usr.bin/wc/wc.c

index b67ca21..828166a 100644 (file)
@@ -6,7 +6,7 @@
 .\"
 .\" %sccs.include.redist.man%
 .\"
 .\"
 .\" %sccs.include.redist.man%
 .\"
-.\"     @(#)wc.1       6.4 (Berkeley) %G%
+.\"     @(#)wc.1       6.5 (Berkeley) %G%
 .\"
 .Dd 
 .Dt WC 1
 .\"
 .Dd 
 .Dt WC 1
 .Sh DESCRIPTION
 The
 .Nm wc
 .Sh DESCRIPTION
 The
 .Nm wc
-utility reads one or more input text files, and, by
-default, writes the number of lines, words, and bytes
-contained in each input file to the standard output.
-If more than one input file is specified,
-a line of cumulative count(s) for all named files is output on a
-separate line
-following the last file count.
-.Nm Wc
-considers a word to be a maximal string of
-characters delimited by white space.
+utility displays the number of lines, words, and bytes contained in each
+input
+.Ar file
+(or standard input, by default) to the standard output.
+A line is defined as a string of chacters delimited by a <newline> character,
+and a word is defined as a string of characters delimited by white space
+characters.
+White space characters are the set of characters for which the
+.Xr isspace 3
+function returns true.
+If more than one input file is specified, a line of cumulative counts
+for all the files is displayed on a separate line after the output for
+the last file.
 .Pp
 The following options are available:
 .Bl -tag -width Ds
 .Pp
 The following options are available:
 .Bl -tag -width Ds
@@ -47,40 +50,31 @@ is written to the standard output.
 .Pp
 When an option is specified,
 .Nm wc
 .Pp
 When an option is specified,
 .Nm wc
-only
-reports the
-information requested by that option. The
-default action is equivalent to all the
-flags
-.Fl clw
-having been specified.
+only reports the information requested by that option.
+The default action is equivalent to specifying all of the flags.
 .Pp
 .Pp
-The following operands are available:
-.Bl -tag -width Ds
-.It Ar file
-A pathname of an input file.
-.El
-.Pp
-If no file names
-are specified, the standard input is used and
-a file name is not output. The resulting output is one
-line of the requested count(s) with the cumulative sum
-of all files read in via standard input.
-.Pp
-By default, the standard output contains a line for each
-input file of the form:
-.Bd -literal -offset indent
-lines   words  bytes   file_name
-.Ed
-.Pp
-The counts for lines, words and bytes are integers separated
-by spaces.
-The ordering of the display of the number of lines, words,
-and/or bytes is the order in which the options were specified.
+If no files are specified, the standard input is used and no
+file name is displayed.
 .Pp
 The
 .Nm wc
 utility exits 0 on success, and >0 if an error occurs.
 .Pp
 The
 .Nm wc
 utility exits 0 on success, and >0 if an error occurs.
+.Sh SEE ALSO
+.Xr isspace 3
+.Sh COMPATIBILITY
+Historically, the
+.Nm wc
+utility was documented to define a word as a ``maximal string of
+characters delimited by <space>, <tab> or <newline> characters''.
+The implementation, however, didn't handle non-printing characters
+correctly so that ``  ^D^E  '' counted as 6 spaces, while ``foo^D^Ebar''
+counted as 8 characters.
+4BSD systems after 4.3BSD modified the implementation to be consistent
+with the documentation.
+This implementation defines a ``word'' in terms of the
+.Xr isspace 3
+function, as required by
+.St -p1003.2 .
 .Sh STANDARDS
 The
 .Nm wc
 .Sh STANDARDS
 The
 .Nm wc
index 2d761e2..0cb8a20 100644 (file)
 
 #ifndef lint
 char copyright[] =
 
 #ifndef lint
 char copyright[] =
-"@(#) Copyright (c) 1980, 1987 Regents of the University of California.\n\
+"@(#) Copyright (c) 1987, 1991 Regents of the University of California.\n\
  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
-static char sccsid[] = "@(#)wc.c       5.7 (Berkeley) %G%";
+static char sccsid[] = "@(#)wc.c       5.8 (Berkeley) %G%";
 #endif /* not lint */
 
 #endif /* not lint */
 
-/* wc line, word and char count */
-
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/param.h>
 #include <sys/stat.h>
-#include <sys/file.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
 #include <stdio.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
 
 
-#define DEL    0177                    /* del char */
-#define NL     012                     /* newline char */
-#define SPACE  040                     /* space char */
-#define TAB    011                     /* tab char */
+u_long tlinect, twordct, tcharct;
+int doline, doword, dochar;
 
 
-static long    tlinect, twordct, tcharct;
-static int     doline, doword, dochar;
+void cnt __P((char *));
+void err __P((const char *, ...));
+void usage __P((void));
 
 
+int
 main(argc, argv)
        int argc;
 main(argc, argv)
        int argc;
-       char **argv;
+       char *argv[];
 {
 {
-       extern int optind;
        register int ch;
        int total;
 
        register int ch;
        int total;
 
-       /*
-        * wc is unusual in that its flags are on by default, so,
-        * if you don't get any arguments, you have to turn them
-        * all on.
-        */
-       if (argc > 1 && argv[1][0] == '-' && argv[1][1]) {
-               while ((ch = getopt(argc, argv, "lwc")) != EOF)
-                       switch((char)ch) {
-                       case 'l':
-                               doline = 1;
-                               break;
-                       case 'w':
-                               doword = 1;
-                               break;
-                       case 'c':
-                               dochar = 1;
-                               break;
-                       case '?':
-                       default:
-                               fputs("usage: wc [-lwc] [files]\n", stderr);
-                               exit(1);
-                       }
-               argv += optind;
-               argc -= optind;
-       }
-       else {
-               ++argv;
-               --argc;
+       while ((ch = getopt(argc, argv, "lwc")) != EOF)
+               switch((char)ch) {
+               case 'l':
+                       doline = 1;
+                       break;
+               case 'w':
+                       doword = 1;
+                       break;
+               case 'c':
+                       dochar = 1;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argv += optind;
+       argc -= optind;
+
+       /* Wc's flags are on by default. */
+       if (doline + doword + dochar == 0)
                doline = doword = dochar = 1;
                doline = doword = dochar = 1;
-       }
 
        total = 0;
        if (!*argv) {
 
        total = 0;
        if (!*argv) {
-               cnt((char *)NULL);
-               putchar('\n');
+               cnt(NULL);
+               (void)printf("\n");
        }
        else do {
                cnt(*argv);
        }
        else do {
                cnt(*argv);
-               printf(" %s\n", *argv);
+               (void)printf(" %s\n", *argv);
                ++total;
        } while(*++argv);
 
        if (total > 1) {
                if (doline)
                ++total;
        } while(*++argv);
 
        if (total > 1) {
                if (doline)
-                       printf(" %7ld", tlinect);
+                       (void)printf(" %7ld", tlinect);
                if (doword)
                if (doword)
-                       printf(" %7ld", twordct);
+                       (void)printf(" %7ld", twordct);
                if (dochar)
                if (dochar)
-                       printf(" %7ld", tcharct);
-               puts(" total");
+                       (void)printf(" %7ld", tcharct);
+               (void)printf(" total\n");
        }
        exit(0);
 }
 
        }
        exit(0);
 }
 
+void
 cnt(file)
        char *file;
 {
 cnt(file)
        char *file;
 {
-       register u_char *C;
+       register u_char *p;
        register short gotsp;
        register short gotsp;
-       register int len;
-       register long linect, wordct, charct;
-       struct stat sbuf;
+       register int ch, len;
+       register u_long linect, wordct, charct;
+       struct stat sb;
        int fd;
        u_char buf[MAXBSIZE];
 
        int fd;
        u_char buf[MAXBSIZE];
 
+       fd = STDIN_FILENO;
        linect = wordct = charct = 0;
        if (file) {
        linect = wordct = charct = 0;
        if (file) {
-               if ((fd = open(file, O_RDONLY, 0)) < 0) {
-                       perror(file);
-                       exit(1);
-               }
-               if (!doword) {
-                       /*
-                        * line counting is split out because it's a lot
-                        * faster to get lines than to get words, since
-                        * the word count requires some logic.
-                        */
-                       if (doline) {
-                               while(len = read(fd, buf, MAXBSIZE)) {
-                                       if (len == -1) {
-                                               perror(file);
-                                               exit(1);
-                                       }
-                                       charct += len;
-                                       for (C = buf; len--; ++C)
-                                               if (*C == '\n')
-                                                       ++linect;
-                               }
-                               tlinect += linect;
-                               printf(" %7ld", linect);
-                               if (dochar) {
-                                       tcharct += charct;
-                                       printf(" %7ld", charct);
-                               }
-                               close(fd);
-                               return;
+               if ((fd = open(file, O_RDONLY, 0)) < 0)
+                       err("%s: %s", file, strerror(errno));
+               if (doword)
+                       goto word;
+               /*
+                * Line counting is split out because it's a lot faster to get
+                * lines than to get words, since the word count requires some
+                * logic.
+                */
+               if (doline) {
+                       while (len = read(fd, buf, MAXBSIZE)) {
+                               if (len == -1)
+                                       err("%s: %s", file, strerror(errno));
+                               charct += len;
+                               for (p = buf; len--; ++p)
+                                       if (*p == '\n')
+                                               ++linect;
                        }
                        }
-                       /*
-                        * if all we need is the number of characters and
-                        * it's a directory or a regular or linked file, just
-                        * stat the puppy.  We avoid testing for it not being
-                        * a special device in case someone adds a new type
-                        * of inode.
-                        */
+                       tlinect += linect;
+                       (void)printf(" %7lu", linect);
                        if (dochar) {
                        if (dochar) {
-                               if (fstat(fd, &sbuf)) {
-                                       perror(file);
-                                       exit(1);
-                               }
-                               if (sbuf.st_mode & (S_IFREG | S_IFLNK | S_IFDIR)) {
-                                       printf(" %7ld", sbuf.st_size);
-                                       tcharct += sbuf.st_size;
-                                       close(fd);
-                                       return;
-                               }
+                               tcharct += charct;
+                               (void)printf(" %7lu", charct);
                        }
                        }
+                       (void)close(fd);
+                       return;
                }
                }
-       }
-       else
-               fd = 0;
-       /* do it the hard way... */
-       for (gotsp = 1; len = read(fd, buf, MAXBSIZE);) {
-               if (len == -1) {
-                       perror(file);
-                       exit(1);
+               /*
+                * If all we need is the number of characters and it's a
+                * regular or linked file, just stat the puppy.
+                */
+               if (dochar) {
+                       if (fstat(fd, &sb))
+                               err("%s: %s", file, strerror(errno));
+                       if (S_ISREG(sb.st_mode) || S_ISLNK(sb.st_mode)) {
+                               (void)printf(" %7lu", sb.st_size);
+                               tcharct += sb.st_size;
+                               (void)close(fd);
+                               return;
+                       }
                }
                }
+       }
+
+       /* Do it the hard way... */
+word:  for (gotsp = 1; len = read(fd, buf, MAXBSIZE);) {
+               if (len == -1)
+                       err("%s: %s", file, strerror(errno));
+               /*
+                * This loses in the presence of multi-byte characters.
+                * To do it right would require a function to return a
+                * character while knowing how many bytes it consumed.
+                */
                charct += len;
                charct += len;
-               for (C = buf; len--; ++C)
-                       switch(*C) {
-                               case NL:
-                                       ++linect;
-                               case TAB:
-                               case SPACE:
-                                       gotsp = 1;
-                                       continue;
-                               default:
-#ifdef notdef
-                                       /*
-                                        * This line of code implements the
-                                        * original V7 wc algorithm, i.e.
-                                        * a non-printing character doesn't
-                                        * toggle the "word" count, so that
-                                        * "  ^D^F  " counts as 6 spaces,
-                                        * while "foo^D^Fbar" counts as 8
-                                        * characters.
-                                        *
-                                        * test order is important -- gotsp
-                                        * will normally be NO, so test it
-                                        * first
-                                        */
-                                       if (gotsp && *C > SPACE && *C < DEL) {
-#endif
-                                       /*
-                                        * This line implements the manual
-                                        * page, i.e. a word is a "maximal
-                                        * string of characters delimited by
-                                        * spaces, tabs or newlines."  Notice
-                                        * nothing was said about a character
-                                        * being printing or non-printing.
-                                        */
-                                       if (gotsp) {
-                                               gotsp = 0;
-                                               ++wordct;
-                                       }
+               for (p = buf; len--;) {
+                       ch = *p++;
+                       if (ch == '\n')
+                               ++linect;
+                       if (isspace(ch))
+                               gotsp = 1;
+                       else if (gotsp) {
+                               gotsp = 0;
+                               ++wordct;
                        }
                        }
+               }
        }
        if (doline) {
                tlinect += linect;
        }
        if (doline) {
                tlinect += linect;
-               printf(" %7ld", linect);
+               (void)printf(" %7lu", linect);
        }
        if (doword) {
                twordct += wordct;
        }
        if (doword) {
                twordct += wordct;
-               printf(" %7ld", wordct);
+               (void)printf(" %7lu", wordct);
        }
        if (dochar) {
                tcharct += charct;
        }
        if (dochar) {
                tcharct += charct;
-               printf(" %7ld", charct);
+               (void)printf(" %7lu", charct);
        }
        }
-       close(fd);
+       (void)close(fd);
+}
+
+void
+usage()
+{
+       (void)fprintf(stderr, "usage: wc [-clw] [files]\n");
+       exit(1);
+}
+
+#if __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+void
+#if __STDC__
+err(const char *fmt, ...)
+#else
+err(fmt, va_alist)
+       char *fmt;
+        va_dcl
+#endif
+{
+       va_list ap;
+#if __STDC__
+       va_start(ap, fmt);
+#else
+       va_start(ap);
+#endif
+       (void)fprintf(stderr, "wc: ");
+       (void)vfprintf(stderr, fmt, ap);
+       va_end(ap);
+       (void)fprintf(stderr, "\n");
+       exit(1);
+       /* NOTREACHED */
 }
 }