BSD 4_4 release
[unix-history] / usr / src / usr.bin / sed / compile.c
index 26bda02..944a226 100644 (file)
@@ -1,16 +1,42 @@
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
- * Copyright (c) 1992 The Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1992, 1993
+ *     The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
- * %sccs.include.redist.c%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #ifndef lint
  */
 
 #ifndef lint
-static char sccsid[] = "@(#)compile.c  5.4 (Berkeley) %G%";
+static char sccsid[] = "@(#)compile.c  8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 
 #include <sys/types.h>
 #endif /* not lint */
 
 #include <sys/types.h>
@@ -28,19 +54,30 @@ static char sccsid[] = "@(#)compile.c       5.4 (Berkeley) %G%";
 #include "defs.h"
 #include "extern.h"
 
 #include "defs.h"
 #include "extern.h"
 
+#define LHSZ   128
+#define        LHMASK  (LHSZ - 1)
+static struct labhash {
+       struct  labhash *lh_next;
+       u_int   lh_hash;
+       struct  s_command *lh_cmd;
+       int     lh_ref;
+} *labels[LHSZ];
+
 static char     *compile_addr __P((char *, struct s_addr *));
 static char     *compile_delimited __P((char *, char *));
 static char     *compile_flags __P((char *, struct s_subst *));
 static char     *compile_addr __P((char *, struct s_addr *));
 static char     *compile_delimited __P((char *, char *));
 static char     *compile_flags __P((char *, struct s_subst *));
-static char     *compile_re __P((char *, regex_t **, int));
+static char     *compile_re __P((char *, regex_t **));
 static char     *compile_subst __P((char *, struct s_subst *));
 static char     *compile_text __P((void));
 static char     *compile_tr __P((char *, char **));
 static struct s_command
                **compile_stream __P((char *, struct s_command **, char *));
 static char     *compile_subst __P((char *, struct s_subst *));
 static char     *compile_text __P((void));
 static char     *compile_tr __P((char *, char **));
 static struct s_command
                **compile_stream __P((char *, struct s_command **, char *));
-static char     *duptoeol __P((char *));
+static char     *duptoeol __P((char *, char *));
+static void      enterlabel __P((struct s_command *));
 static struct s_command
 static struct s_command
-                *findlabel __P((struct s_command *, struct s_command *));
+                *findlabel __P((char *));
 static void      fixuplabel __P((struct s_command *, struct s_command *));
 static void      fixuplabel __P((struct s_command *, struct s_command *));
+static void      uselabel __P((void));
 
 /*
  * Command specification.  This is used to drive the command parser.
 
 /*
  * Command specification.  This is used to drive the command parser.
@@ -82,9 +119,6 @@ static struct s_format cmd_fmts[] = {
        {'\0', 0, COMMENT},
 };
 
        {'\0', 0, COMMENT},
 };
 
-/* Maximum number of parenthesized regular expressions found. */
-static int nsub_max;
-
 /* The compiled program. */
 struct s_command *prog;
 
 /* The compiled program. */
 struct s_command *prog;
 
@@ -96,8 +130,10 @@ void
 compile()
 {
        *compile_stream(NULL, &prog, NULL) = NULL;
 compile()
 {
        *compile_stream(NULL, &prog, NULL) = NULL;
-       fixuplabel(prog, prog);
+       fixuplabel(prog, NULL);
+       uselabel();
        appends = xmalloc(sizeof(struct s_appends) * appendnum);
        appends = xmalloc(sizeof(struct s_appends) * appendnum);
+       match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
 }
 
 #define EATSPACE() do {                                                        \
 }
 
 #define EATSPACE() do {                                                        \
@@ -218,7 +254,7 @@ nonsel:             /* Now parse the command */
                        EATSPACE();
                        if (*p == '\0')
                                err(COMPILE, "filename expected");
                        EATSPACE();
                        if (*p == '\0')
                                err(COMPILE, "filename expected");
-                       cmd->t = duptoeol(p);
+                       cmd->t = duptoeol(p, "w command");
                        if (aflag)
                                cmd->u.fd = -1;
                        else if ((cmd->u.fd = open(p, 
                        if (aflag)
                                cmd->u.fd = -1;
                        else if ((cmd->u.fd = open(p, 
@@ -232,7 +268,7 @@ nonsel:             /* Now parse the command */
                        if (*p == '\0')
                                err(COMPILE, "filename expected");
                        else
                        if (*p == '\0')
                                err(COMPILE, "filename expected");
                        else
-                               cmd->t = duptoeol(p);
+                               cmd->t = duptoeol(p, "read command");
                        break;
                case BRANCH:                    /* b t */
                        p++;
                        break;
                case BRANCH:                    /* b t */
                        p++;
@@ -240,14 +276,15 @@ nonsel:           /* Now parse the command */
                        if (*p == '\0')
                                cmd->t = NULL;
                        else
                        if (*p == '\0')
                                cmd->t = NULL;
                        else
-                               cmd->t = duptoeol(p);
+                               cmd->t = duptoeol(p, "branch");
                        break;
                case LABEL:                     /* : */
                        p++;
                        EATSPACE();
                        break;
                case LABEL:                     /* : */
                        p++;
                        EATSPACE();
-                       cmd->t = duptoeol(p);
+                       cmd->t = duptoeol(p, "label");
                        if (strlen(p) == 0)
                                err(COMPILE, "empty label");
                        if (strlen(p) == 0)
                                err(COMPILE, "empty label");
+                       enterlabel(cmd);
                        break;
                case SUBST:                     /* s */
                        p++;
                        break;
                case SUBST:                     /* s */
                        p++;
@@ -255,12 +292,9 @@ nonsel:            /* Now parse the command */
                                err(COMPILE,
 "substitute pattern can not be delimited by newline or backslash");
                        cmd->u.s = xmalloc(sizeof(struct s_subst));
                                err(COMPILE,
 "substitute pattern can not be delimited by newline or backslash");
                        cmd->u.s = xmalloc(sizeof(struct s_subst));
-                       p = compile_re(p, &cmd->u.s->re, 0);
+                       p = compile_re(p, &cmd->u.s->re);
                        if (p == NULL)
                                err(COMPILE, "unterminated substitute pattern");
                        if (p == NULL)
                                err(COMPILE, "unterminated substitute pattern");
-                       if (cmd->u.s->re != NULL &&
-                           nsub_max < cmd->u.s->re->re_nsub)
-                               nsub_max = cmd->u.s->re->re_nsub;
                        --p;
                        p = compile_subst(p, cmd->u.s);
                        p = compile_flags(p, cmd->u.s);
                        --p;
                        p = compile_subst(p, cmd->u.s);
                        p = compile_flags(p, cmd->u.s);
@@ -317,7 +351,9 @@ compile_delimited(p, d)
                        *d++ = '\n';
                        p += 2;
                        continue;
                        *d++ = '\n';
                        p += 2;
                        continue;
-               } else if (*p == c) {
+               } else if (*p == '\\' && p[1] == '\\')
+                       *d++ = *p++;
+               else if (*p == c) {
                        *d = '\0';
                        return (p + 1);
                }
                        *d = '\0';
                        return (p + 1);
                }
@@ -336,10 +372,9 @@ compile_delimited(p, d)
  * Cflags are passed to regcomp.
  */
 static char *
  * Cflags are passed to regcomp.
  */
 static char *
-compile_re(p, repp, cflags)
+compile_re(p, repp)
        char *p;
        regex_t **repp;
        char *p;
        regex_t **repp;
-       int cflags;
 {
        int eval;
        char re[_POSIX2_LINE_MAX + 1];
 {
        int eval;
        char re[_POSIX2_LINE_MAX + 1];
@@ -350,8 +385,10 @@ compile_re(p, repp, cflags)
                return (p);
        }
        *repp = xmalloc(sizeof(regex_t));
                return (p);
        }
        *repp = xmalloc(sizeof(regex_t));
-       if (p && (eval = regcomp(*repp, re, cflags)) != 0)
+       if (p && (eval = regcomp(*repp, re, 0)) != 0)
                err(COMPILE, "RE error: %s", strregerror(eval, *repp));
                err(COMPILE, "RE error: %s", strregerror(eval, *repp));
+       if (maxnsub < (*repp)->re_nsub)
+               maxnsub = (*repp)->re_nsub;
        return (p);
 }
 
        return (p);
 }
 
@@ -386,13 +423,13 @@ compile_subst(p, s)
                                if (strchr("123456789", *p) != NULL) {
                                        *sp++ = '\\';
                                        ref = *p - '0';
                                if (strchr("123456789", *p) != NULL) {
                                        *sp++ = '\\';
                                        ref = *p - '0';
-                                       if (s->maxbref < ref)
-                                               s->maxbref = ref;
                                        if (s->re != NULL &&
                                            ref > s->re->re_nsub)
                                                err(COMPILE,
 "\\%c not defined in the RE", *p);
                                        if (s->re != NULL &&
                                            ref > s->re->re_nsub)
                                                err(COMPILE,
 "\\%c not defined in the RE", *p);
-                               } else if (*p == '&')
+                                       if (s->maxbref < ref)
+                                               s->maxbref = ref;
+                               } else if (*p == '&' || *p == '\\')
                                        *sp++ = '\\';
                        } else if (*p == c) {
                                p++;
                                        *sp++ = '\\';
                        } else if (*p == c) {
                                p++;
@@ -584,7 +621,7 @@ compile_addr(p, a)
                ++p;
                /* FALLTHROUGH */
        case '/':                               /* Context address */
                ++p;
                /* FALLTHROUGH */
        case '/':                               /* Context address */
-               p = compile_re(p, &a->u.r, REG_NOSUB);
+               p = compile_re(p, &a->u.r);
                if (p == NULL)
                        err(COMPILE, "unterminated regular expression");
                a->type = AT_RE;
                if (p == NULL)
                        err(COMPILE, "unterminated regular expression");
                a->type = AT_RE;
@@ -606,86 +643,129 @@ compile_addr(p, a)
 }
 
 /*
 }
 
 /*
- * Return a copy of all the characters up to \n or \0
+ * duptoeol --
+ *     Return a copy of all the characters up to \n or \0.
  */
 static char *
  */
 static char *
-duptoeol(s)
+duptoeol(s, ctype)
        register char *s;
        register char *s;
+       char *ctype;
 {
        size_t len;
 {
        size_t len;
+       int ws;
        char *start;
 
        char *start;
 
-       for (start = s; *s != '\0' && *s != '\n'; ++s);
+       ws = 0;
+       for (start = s; *s != '\0' && *s != '\n'; ++s)
+               ws = isspace(*s);
        *s = '\0';
        *s = '\0';
+       if (ws)
+               err(WARNING, "whitespace after %s", ctype);
        len = s - start + 1;
        return (memmove(xmalloc(len), start, len));
 }
 
 /*
        len = s - start + 1;
        return (memmove(xmalloc(len), start, len));
 }
 
 /*
- * Find the label contained in the command l in the command linked list cp.
- * L is excluded from the search.  Return NULL if not found.
- */
-static struct s_command *
-findlabel(l, cp)
-       struct s_command *l, *cp;
-{
-       struct s_command *r;
-
-       for (; cp; cp = cp->next)
-               if (cp->code == ':' && cp != l && strcmp(l->t, cp->t) == 0)
-                       return (cp);
-               else if (cp->code == '{' && (r = findlabel(l, cp->u.c)))
-                       return (r);
-       return (NULL);
-}
-
-/*
- * Convert goto label names to addresses.
- * Detect duplicate labels.
- * Set appendnum to the number of a and r commands in the script.
- * Free the memory used by labels in b and t commands (but not by :)
- * Root is a pointer to the script linked list; cp points to the
- * search start.
+ * Convert goto label names to addresses, and count a and r commands, in
+ * the given subset of the script.  Free the memory used by labels in b
+ * and t commands (but not by :).
+ *
  * TODO: Remove } nodes
  */
 static void
  * TODO: Remove } nodes
  */
 static void
-fixuplabel(root, cp)
-       struct s_command *root, *cp;
+fixuplabel(cp, end)
+       struct s_command *cp, *end;
 {
 {
-       struct s_command *cp2;
 
 
-       for (; cp; cp = cp->next)
+       for (; cp != end; cp = cp->next)
                switch (cp->code) {
                switch (cp->code) {
-               case ':':
-                       if (findlabel(cp, root))
-                               err(COMPILE2, "duplicate label %s", cp->t);
-                       break;
                case 'a':
                case 'r':
                        appendnum++;
                        break;
                case 'b':
                case 't':
                case 'a':
                case 'r':
                        appendnum++;
                        break;
                case 'b':
                case 't':
+                       /* Resolve branch target. */
                        if (cp->t == NULL) {
                                cp->u.c = NULL;
                                break;
                        }
                        if (cp->t == NULL) {
                                cp->u.c = NULL;
                                break;
                        }
-                       if ((cp2 = findlabel(cp, root)) == NULL)
+                       if ((cp->u.c = findlabel(cp->t)) == NULL)
                                err(COMPILE2, "undefined label '%s'", cp->t);
                        free(cp->t);
                                err(COMPILE2, "undefined label '%s'", cp->t);
                        free(cp->t);
-                       cp->u.c = cp2;
-                       break;
-               case 's':
-                       if (cp->u.s->re == NULL)
-                               cp->u.s->pmatch = xmalloc((nsub_max + 1) *
-                                   sizeof(regmatch_t));
-                       else
-                               cp->u.s->pmatch =
-                                   xmalloc((cp->u.s->re->re_nsub + 1) *
-                                   sizeof(regmatch_t));
                        break;
                case '{':
                        break;
                case '{':
-                       fixuplabel(root, cp->u.c);
+                       /* Do interior commands. */
+                       fixuplabel(cp->u.c, cp->next);
                        break;
                }
 }
                        break;
                }
 }
+
+/*
+ * Associate the given command label for later lookup.
+ */
+static void
+enterlabel(cp)
+       struct s_command *cp;
+{
+       register struct labhash **lhp, *lh;
+       register u_char *p;
+       register u_int h, c;
+
+       for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
+               h = (h << 5) + h + c;
+       lhp = &labels[h & LHMASK];
+       for (lh = *lhp; lh != NULL; lh = lh->lh_next)
+               if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
+                       err(COMPILE2, "duplicate label '%s'", cp->t);
+       lh = xmalloc(sizeof *lh);
+       lh->lh_next = *lhp;
+       lh->lh_hash = h;
+       lh->lh_cmd = cp;
+       lh->lh_ref = 0;
+       *lhp = lh;
+}
+
+/*
+ * Find the label contained in the command l in the command linked
+ * list cp.  L is excluded from the search.  Return NULL if not found.
+ */
+static struct s_command *
+findlabel(name)
+       char *name;
+{
+       register struct labhash *lh;
+       register u_char *p;
+       register u_int h, c;
+
+       for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
+               h = (h << 5) + h + c;
+       for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
+               if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
+                       lh->lh_ref = 1;
+                       return (lh->lh_cmd);
+               }
+       }
+       return (NULL);
+}
+
+/* 
+ * Warn about any unused labels.  As a side effect, release the label hash
+ * table space.
+ */
+static void
+uselabel()
+{
+       register struct labhash *lh, *next;
+       register int i;
+
+       for (i = 0; i < LHSZ; i++) {
+               for (lh = labels[i]; lh != NULL; lh = next) {
+                       next = lh->lh_next;
+                       if (!lh->lh_ref)
+                               err(WARNING, "unused label '%s'",
+                                   lh->lh_cmd->t);
+                       free(lh);
+               }
+       }
+}