date and time created 90/06/25 13:56:22 by bostic
[unix-history] / usr / src / usr.bin / wc / wc.c
index 30d6b81..64666d2 100644 (file)
-static char *sccsid = "@(#)wc.c        4.1 (Berkeley) %G%";
-/* wc line and word count */
+/*
+ * Copyright (c) 1980, 1987 Regents of the University of California.
+ * All rights reserved.
+ *
+ * %sccs.include.redist.c%
+ */
 
 
+#ifndef lint
+char copyright[] =
+"@(#) Copyright (c) 1980, 1987 Regents of the University of California.\n\
+ All rights reserved.\n";
+#endif /* not lint */
+
+#ifndef lint
+static char sccsid[] = "@(#)wc.c       5.6 (Berkeley) %G%";
+#endif /* not lint */
+
+/* wc line, word and char count */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/file.h>
 #include <stdio.h>
 #include <stdio.h>
-long   linect, wordct, charct, pagect;
-long   tlinect, twordct, tcharct, tpagect;
-int    baud=300;       /* baud rate */
-int    cps=30;         /* # of chars per second */
-int    lpp=66;         /* # of lines per page */
-char   *wd = "lwc";
-int    verbose;
-int    uucp;
 
 
-main(argc, argv)
-char **argv;
-{
-       int i, token;
-       register FILE *fp;
-       register int c;
-       char *p;
+#define DEL    0177                    /* del char */
+#define NL     012                     /* newline char */
+#define SPACE  040                     /* space char */
+#define TAB    011                     /* tab char */
 
 
-       while (argc > 1 && *argv[1] == '-') {
-               switch (argv[1][1]) {
-               case 'l': case 'w': case 'c': case 'p': case 't':
-                       wd = argv[1]+1;
-                       break;
-               case 's':
-                       lpp = atoi(argv[1]+2);
-                       break;
-               case 'v':
-                       verbose++;
-                       wd = "lwcpt";
-                       break;
-               case 'u':
-                       uucp++;
-                       break;
-               case 'b':
-                       baud = atoi(argv[1]+2);
-                       if (baud == 110)
-                               cps = 10;
-                       else
-                               cps = baud / 10;
-                       break;
-               }
-               argc--;
-               argv++;
-       }
+static long    tlinect, twordct, tcharct;
+static int     doline, doword, dochar;
 
 
-       if (uucp)
-               cps = cps * 9 / 10;     /* 27 cps at 300 baud */
+main(argc, argv)
+       int argc;
+       char **argv;
+{
+       extern int optind;
+       register int ch;
+       int total;
 
 
-       if (verbose) {
-               for (p=wd; *p; p++)
-                       switch(*p) {
+       /*
+        * wc is unusual in that its flags are on by default, so,
+        * if you don't get any arguments, you have to turn them
+        * all on.
+        */
+       if (argc > 1 && argv[1][0] == '-' && argv[1][1]) {
+               while ((ch = getopt(argc, argv, "lwc")) != EOF)
+                       switch((char)ch) {
                        case 'l':
                        case 'l':
-                               printf("lines\t");
+                               doline = 1;
                                break;
                        case 'w':
                                break;
                        case 'w':
-                               printf("words\t");
+                               doword = 1;
                                break;
                        case 'c':
                                break;
                        case 'c':
-                               printf("chars\t");
-                               break;
-                       case 'p':
-                               printf("pages\t");
-                               break;
-                       case 't':
-                               printf("time@%d\t",baud);
+                               dochar = 1;
                                break;
                                break;
+                       case '?':
+                       default:
+                               fputs("usage: wc [-lwc] [files]\n", stderr);
+                               exit(1);
                        }
                        }
-               printf("\n");
+               argv += optind;
+               argc -= optind;
+       }
+       else {
+               ++argv;
+               --argc;
+               doline = doword = dochar = 1;
        }
 
        }
 
-       i = 1;
-       fp = stdin;
-       do {
-               if(argc>1 && (fp=fopen(argv[i], "r")) == NULL) {
-                       fprintf(stderr, "wc: can't open %s\n", argv[i]);
-                       continue;
-               }
-               linect = 0;
-               wordct = 0;
-               charct = 0;
-               pagect = 0;
-               token = 0;
-               for(;;) {
-                       c = getc(fp);
-                       if (c == EOF)
-                               break;
-                       charct++;
-                       if(' '<c&&c<0177) {
-                               if(!token) {
-                                       wordct++;
-                                       token++;
-                               }
-                               continue;
-                       }
-                       if(c=='\n') {
-                               linect++;
-                               if (linect % lpp == 1)
-                                       pagect++;
-                       }
-                       else if(c!=' '&&c!='\t')
-                               continue;
-                       token = 0;
-               }
-               /* print lines, words, chars */
-               wcp(wd, charct, wordct, linect, pagect);
-               if(argc>1) {
-                       printf(" %s\n", argv[i]);
-               } else
-                       printf("\n");
-               fclose(fp);
-               tlinect += linect;
-               twordct += wordct;
-               tcharct += charct;
-               tpagect += pagect;
-       } while(++i<argc);
-       if(argc > 2) {
-               wcp(wd, tcharct, twordct, tlinect, tpagect);
-               printf(" total\n");
+       total = 0;
+       if (!*argv) {
+               cnt((char *)NULL);
+               putchar('\n');
+       }
+       else do {
+               cnt(*argv);
+               printf(" %s\n", *argv);
+               ++total;
+       } while(*++argv);
+
+       if (total > 1) {
+               if (doline)
+                       printf(" %7ld", tlinect);
+               if (doword)
+                       printf(" %7ld", twordct);
+               if (dochar)
+                       printf(" %7ld", tcharct);
+               puts(" total");
        }
        exit(0);
 }
 
        }
        exit(0);
 }
 
-wcp(wd, charct, wordct, linect, pagect)
-register char *wd;
-long charct; long wordct; long linect, pagect;
+static
+cnt(file)
+       char *file;
 {
 {
-       while (*wd) switch (*wd++) {
-       case 'l':
-               ipr(linect);
-               break;
-
-       case 'w':
-               ipr(wordct);
-               break;
-
-       case 'c':
-               ipr(charct);
-               break;
+       register u_char *C;
+       register short gotsp;
+       register int len;
+       register long linect, wordct, charct;
+       struct stat sbuf;
+       int fd;
+       u_char buf[MAXBSIZE];
 
 
-       case 'p':
-               ipr(pagect);
-               break;
-
-       case 't':
-               prttime(charct/cps);
-               break;
+       linect = wordct = charct = 0;
+       if (file) {
+               if ((fd = open(file, O_RDONLY, 0)) < 0) {
+                       perror(file);
+                       exit(1);
+               }
+               if (!doword) {
+                       /*
+                        * line counting is split out because it's a lot
+                        * faster to get lines than to get words, since
+                        * the word count requires some logic.
+                        */
+                       if (doline) {
+                               while(len = read(fd, buf, MAXBSIZE)) {
+                                       if (len == -1) {
+                                               perror(file);
+                                               exit(1);
+                                       }
+                                       charct += len;
+                                       for (C = buf; len--; ++C)
+                                               if (*C == '\n')
+                                                       ++linect;
+                               }
+                               tlinect += linect;
+                               printf(" %7ld", linect);
+                               if (dochar) {
+                                       tcharct += charct;
+                                       printf(" %7ld", charct);
+                               }
+                               close(fd);
+                               return;
+                       }
+                       /*
+                        * if all we need is the number of characters and
+                        * it's a directory or a regular or linked file, just
+                        * stat the puppy.  We avoid testing for it not being
+                        * a special device in case someone adds a new type
+                        * of inode.
+                        */
+                       if (dochar) {
+                               if (fstat(fd, &sbuf)) {
+                                       perror(file);
+                                       exit(1);
+                               }
+                               if (sbuf.st_mode & (S_IFREG | S_IFLNK | S_IFDIR)) {
+                                       printf(" %7ld", sbuf.st_size);
+                                       tcharct += sbuf.st_size;
+                                       close(fd);
+                                       return;
+                               }
+                       }
+               }
        }
        }
-}
-
-ipr(num)
-long num;
-{
-       if (verbose)
-               printf("%ld\t", num);
        else
        else
-               printf("%7ld", num);
-}
-
-prttime(secs)
-long secs;
-{
-       int hrs,mins;
-       float t;
-       long osecs;
-       char *units;
-
-       osecs = secs;
-       hrs = secs / (60*60);
-       secs = secs % (60*60);
-       mins = secs / 60;
-       secs = secs % 60;
-
-       t = osecs;
-       if (hrs) {
-               t /= (60*60);
-               units = "hr";
-       } else if (mins) {
-               t /= 60;
-               units = "mi";
-       } else {
-               units = "se";
+               fd = 0;
+       /* do it the hard way... */
+       for (gotsp = 1; len = read(fd, buf, MAXBSIZE);) {
+               if (len == -1) {
+                       perror(file);
+                       exit(1);
+               }
+               charct += len;
+               for (C = buf; len--; ++C)
+                       switch(*C) {
+                               case NL:
+                                       ++linect;
+                               case TAB:
+                               case SPACE:
+                                       gotsp = 1;
+                                       continue;
+                               default:
+#ifdef notdef
+                                       /*
+                                        * This line of code implements the
+                                        * original V7 wc algorithm, i.e.
+                                        * a non-printing character doesn't
+                                        * toggle the "word" count, so that
+                                        * "  ^D^F  " counts as 6 spaces,
+                                        * while "foo^D^Fbar" counts as 8
+                                        * characters.
+                                        *
+                                        * test order is important -- gotsp
+                                        * will normally be NO, so test it
+                                        * first
+                                        */
+                                       if (gotsp && *C > SPACE && *C < DEL) {
+#endif
+                                       /*
+                                        * This line implements the manual
+                                        * page, i.e. a word is a "maximal
+                                        * string of characters delimited by
+                                        * spaces, tabs or newlines."  Notice
+                                        * nothing was said about a character
+                                        * being printing or non-printing.
+                                        */
+                                       if (gotsp) {
+                                               gotsp = 0;
+                                               ++wordct;
+                                       }
+                       }
+       }
+       if (doline) {
+               tlinect += linect;
+               printf(" %7ld", linect);
+       }
+       if (doword) {
+               twordct += wordct;
+               printf(" %7ld", wordct);
+       }
+       if (dochar) {
+               tcharct += charct;
+               printf(" %7ld", charct);
        }
        }
-       printf("%4.1f %2s\t", t, units);
+       close(fd);
 }
 }