BSD 4_4 release
[unix-history] / usr / src / contrib / mh-6.8 / zotnet / mf / mf.c
/* mf.c - mail filter subroutines */
#ifndef lint
static char ident[] = "@(#)$Id: mf.c,v 1.10 1992/12/15 00:20:22 jromine Exp $";
#endif /* lint */
#include "mf.h"
#include <ctype.h>
#include <stdio.h>
static int isat(), parse_address(), phrase();
static int route_addr(), local_part(), domain(), route();
static int my_lex();
static void compress();
/* \f */
static char *getcpy (s)
register char *s;
{
register char *p;
if (!s) { _cleanup(); abort(); for(;;) pause();}
if ((p = malloc ((unsigned) (strlen (s) + 2))) != NULL)
(void) strcpy (p, s);
return p;
}
static char *add (s1, s2)
register char *s1,
*s2;
{
register char *p;
if (s2 == NULL)
return getcpy (s1);
if ((p = malloc ((unsigned) (strlen (s1) + strlen (s2) + 2))) != NULL)
(void) sprintf (p, "%s%s", s2, s1);
free (s2);
return p;
}
/* \f */
int isfrom (string)
register char *string;
{
return (strncmp (string, "From ", 5) == 0
|| strncmp (string, ">From ", 6) == 0);
}
int lequal (a, b)
register char *a,
*b;
{
for (; *a; a++, b++)
if (*b == 0)
return FALSE;
else {
char c1 = islower (*a) ? toupper (*a) : *a;
char c2 = islower (*b) ? toupper (*b) : *b;
if (c1 != c2)
return FALSE;
}
return (*b == 0);
}
/* \f */
/*
*
* seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
* addresses, so for each list of addresses we see if we can find some
* character to give us a hint.
*
*/
#define CHKADR 0 /* undertermined address style */
#define UNIXDR 1 /* UNIX-style address */
#define ARPADR 2 /* ARPAnet-style address */
static char *punctuators = ";<>.()[]";
static char *vp = NULL;
static char *tp = NULL;
static struct adrx adrxs1;
/* \f */
struct adrx *seekadrx (addrs)
register char *addrs;
{
static int state = CHKADR;
register char *cp;
register struct adrx *adrxp;
if (state == CHKADR)
for (state = UNIXDR, cp = addrs; *cp; cp++)
if (index (punctuators, *cp)) {
state = ARPADR;
break;
}
switch (state) {
case UNIXDR:
adrxp = uucpadrx (addrs);
break;
case ARPADR:
default:
adrxp = getadrx (addrs);
break;
}
if (adrxp == NULL)
state = CHKADR;
return adrxp;
}
/* \f */
/*
*
* uucpadrx() implements a partial UUCP-style address parser. It's based
* on the UUCP notion that addresses are separated by spaces or commas.
*
*/
struct adrx *uucpadrx (addrs)
register char *addrs;
{
register char *cp,
*wp,
*xp,
*yp,
*zp;
register struct adrx *adrxp = &adrxs1;
if (vp == NULL) {
vp = tp = getcpy (addrs);
compress (addrs, vp);
}
else
if (tp == NULL) {
free (vp);
vp = NULL;
return NULL;
}
for (cp = tp; isspace (*cp); cp++)
continue;
if (*cp == 0) {
free (vp);
vp = tp = NULL;
return NULL;
}
/* \f */
if ((wp = index (cp, ',')) == NULL)
if ((wp = index (cp, ' ')) != NULL) {
xp = wp;
while (isspace (*xp))
xp++;
if (*xp != 0 && isat (--xp)) {
yp = xp + 4;
while (isspace (*yp))
yp++;
if (*yp != 0)
if ((zp = index (yp, ' ')) != NULL)
*zp = 0, tp = ++zp;
else
tp = NULL;
else
*wp = 0, tp = ++wp;
}
else
*wp = 0, tp = ++wp;
}
else
tp = NULL;
else
*wp = 0, tp = ++wp;
if (adrxp -> text)
free (adrxp -> text);
adrxp -> text = getcpy (cp);
adrxp -> mbox = cp;
adrxp -> host = adrxp -> path = NULL;
if ((wp = rindex (cp, '@')) != NULL) {
*wp++ = 0;
adrxp -> host = *wp ? wp : NULL;
}
else
for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
if (isat (wp)) {
*wp++ = 0;
adrxp -> host = wp + 3;
}
adrxp -> pers = adrxp -> grp = adrxp -> note = adrxp -> err = NULL;
adrxp -> ingrp = 0;
return adrxp;
}
/* \f */
static void compress (fp, tp)
register char *fp,
*tp;
{
register char c,
*cp;
for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
if (isspace (*tp)) {
if (c != ' ')
*tp++ = c = ' ';
}
else
c = *tp++;
if (c == ' ' && cp < tp)
*--tp = 0;
}
static int isat (p)
register char *p;
{
return (strncmp (p, " AT ", 4)
&& strncmp (p, " At ", 4)
&& strncmp (p, " aT ", 4)
&& strncmp (p, " at ", 4) ? FALSE : TRUE);
}
/* \f */
/*
*
* getadrx() implements a partial 822-style address parser. The parser
* is neither complete nor correct. It does however recognize nearly all
* of the 822 address syntax. In addition it handles the majority of the
* 733 syntax as well. Most problems arise from trying to accomodate both.
*
* In terms of 822, the route-specification in
*
* "<" [route] local-part "@" domain ">"
*
* is parsed and returned unchanged. Multiple at-signs are compressed
* via source-routing. Recursive groups are not allowed as per the
* standard.
*
* In terms of 733, " at " is recognized as equivalent to "@".
*
* In terms of both the parser will not complain about missing hosts.
*
* -----
*
* We should not allow addresses like
*
* Marshall T. Rose <MRose@UCI>
*
* but should insist on
*
* "Marshall T. Rose" <MRose@UCI>
*
* Unfortunately, a lot of mailers stupidly let people get away with this.
*
* -----
*
* We should not allow addresses like
*
* <MRose@UCI>
*
* but should insist on
*
* MRose@UCI
*
* Unfortunately, a lot of mailers stupidly let people's UAs get away with
* this.
*
* -----
*
* We should not allow addresses like
*
* @UCI:MRose@UCI-750a
*
* but should insist on
*
* Marshall Rose <@UCI:MRose@UCI-750a>
*
* Unfortunately, a lot of mailers stupidly do this.
*
*/
/* \f */
#define QUOTE '\\'
#define LX_END 0
#define LX_ERR 1
#define LX_ATOM 2
#define LX_QSTR 3
#define LX_DLIT 4
#define LX_SEMI 5
#define LX_COMA 6
#define LX_LBRK 7
#define LX_RBRK 8
#define LX_COLN 9
#define LX_DOT 10
#define LX_AT 11
static struct {
char lx_chr;
int lx_val;
} special[] = {
';', LX_SEMI,
',', LX_COMA,
'<', LX_LBRK,
'>', LX_RBRK,
':', LX_COLN,
'.', LX_DOT,
'@', LX_AT,
'(', LX_ERR,
')', LX_ERR,
QUOTE, LX_ERR,
'"', LX_ERR,
'[', LX_ERR,
']', LX_ERR,
0, 0
};
/* \f */
static int glevel = 0;
static int ingrp = 0;
static int last_lex = LX_END;
static char *dp = NULL;
static char *cp = NULL;
static char *ap = NULL;
static char *pers = NULL;
static char *mbox = NULL;
static char *host = NULL;
static char *path = NULL;
static char *grp = NULL;
static char *note = NULL;
static char err[BUFSIZ];
static char adr[BUFSIZ];
static struct adrx adrxs2;
/* \f */
struct adrx *getadrx (addrs)
register char *addrs;
{
register char *bp;
register struct adrx *adrxp = &adrxs2;
if (pers)
free (pers);
if (mbox)
free (mbox);
if (host)
free (host);
if (path)
free (path);
if (grp)
free (grp);
if (note)
free (note);
pers = mbox = host = path = grp = note = NULL;
err[0] = 0;
if (dp == NULL) {
dp = cp = getcpy (addrs ? addrs : "");
glevel = 0;
}
else
if (cp == NULL) {
free (dp);
dp = NULL;
return NULL;
}
switch (parse_address ()) {
case DONE:
free (dp);
dp = cp = NULL;
return NULL;
case OK:
switch (last_lex) {
case LX_COMA:
case LX_END:
break;
default: /* catch trailing comments */
bp = cp;
(void) my_lex (adr);
cp = bp;
break;
}
break;
default:
break;
}
if (err[0])
for (;;) {
switch (last_lex) {
case LX_COMA:
case LX_END:
break;
default:
(void) my_lex (adr);
continue;
}
break;
}
while (isspace (*ap))
ap++;
if (cp)
(void) sprintf (adr, "%.*s", cp - ap, ap);
else
(void) strcpy (adr, ap);
bp = adr + strlen (adr) - 1;
if (*bp == ',' || *bp == ';' || *bp == '\n')
*bp = 0;
/* \f */
adrxp -> text = adr;
adrxp -> pers = pers;
adrxp -> mbox = mbox;
adrxp -> host = host;
adrxp -> path = path;
adrxp -> grp = grp;
adrxp -> ingrp = ingrp;
adrxp -> note = note;
adrxp -> err = err[0] ? err : NULL;
return adrxp;
}
/* \f */
static int parse_address () {
char buffer[BUFSIZ];
again: ;
ap = cp;
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
pers = getcpy (buffer);
break;
case LX_SEMI:
if (glevel-- <= 0) {
(void) strcpy (err, "extraneous semi-colon");
return NOTOK;
}
case LX_COMA:
if (note) {
free (note);
note = NULL;
}
goto again;
case LX_END:
return DONE;
case LX_LBRK: /* sigh (2) */
goto get_addr;
case LX_AT: /* sigh (3) */
cp = ap;
if (route_addr (buffer) == NOTOK)
return NOTOK;
return OK; /* why be choosy? */
default:
(void) sprintf (err, "illegal address construct (%s)", buffer);
return NOTOK;
}
/* \f */
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
pers = add (buffer, add (" ", pers));
more_phrase: ; /* sigh (1) */
if (phrase (buffer) == NOTOK)
return NOTOK;
switch (last_lex) {
case LX_LBRK:
get_addr: ;
if (route_addr (buffer) == NOTOK)
return NOTOK;
if (last_lex == LX_RBRK)
return OK;
(void) sprintf (err, "missing right-bracket (%s)", buffer);
return NOTOK;
case LX_COLN:
get_group: ;
if (glevel++ > 0) {
(void) sprintf (err, "nested groups not allowed (%s)",
pers);
return NOTOK;
}
grp = add (": ", pers);
pers = NULL;
{
char *pp = cp;
for (;;)
switch (my_lex (buffer)) {
case LX_SEMI:
case LX_END: /* tsk, tsk */
glevel--;
return OK;
case LX_COMA:
continue;
default:
cp = pp;
return parse_address ();
}
}
case LX_DOT: /* sigh (1) */
pers = add (".", pers);
goto more_phrase;
default:
(void) sprintf (err,
"no mailbox in address, only a phrase (%s%s)",
pers, buffer);
return NOTOK;
}
/* \f */
case LX_LBRK:
goto get_addr;
case LX_COLN:
goto get_group;
case LX_DOT:
mbox = add (buffer, pers);
pers = NULL;
if (route_addr (buffer) == NOTOK)
return NOTOK;
goto check_end;
case LX_AT:
ingrp = glevel;
mbox = pers;
pers = NULL;
if (domain (buffer) == NOTOK)
return NOTOK;
check_end: ;
switch (last_lex) {
case LX_SEMI:
if (glevel-- <= 0) {
(void) strcpy (err, "extraneous semi-colon");
return NOTOK;
}
case LX_COMA:
case LX_END:
return OK;
default:
(void) sprintf (err, "junk after local@domain (%s)",
buffer);
return NOTOK;
}
case LX_SEMI: /* no host */
case LX_COMA:
case LX_END:
ingrp = glevel;
if (last_lex == LX_SEMI && glevel-- <= 0) {
(void) strcpy (err, "extraneous semi-colon");
return NOTOK;
}
mbox = pers;
pers = NULL;
return OK;
default:
(void) sprintf (err, "missing mailbox (%s)", buffer);
return NOTOK;
}
}
/* \f */
static int phrase (buffer)
register char *buffer;
{
for (;;)
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
pers = add (buffer, add (" ", pers));
continue;
default:
return OK;
}
}
/* \f */
static int route_addr (buffer)
register char *buffer;
{
register char *pp = cp;
if (my_lex (buffer) == LX_AT) {
if (route (buffer) == NOTOK)
return NOTOK;
}
else
cp = pp;
if (local_part (buffer) == NOTOK)
return NOTOK;
switch (last_lex) {
case LX_AT:
return domain (buffer);
case LX_SEMI: /* if in group */
case LX_RBRK: /* no host */
case LX_COMA:
case LX_END:
return OK;
default:
(void) sprintf (err, "no at-sign after local-part (%s)", buffer);
return NOTOK;
}
}
/* \f */
static int local_part (buffer)
register char *buffer;
{
ingrp = glevel;
for (;;) {
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
mbox = add (buffer, mbox);
break;
default:
(void) sprintf (err, "no mailbox in local-part (%s)", buffer);
return NOTOK;
}
switch (my_lex (buffer)) {
case LX_DOT:
mbox = add (buffer, mbox);
continue;
default:
return OK;
}
}
}
/* \f */
static int domain (buffer)
register char *buffer;
{
for (;;) {
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_DLIT:
host = add (buffer, host);
break;
default:
(void) sprintf (err,
"no sub-domain in domain-part of address (%s)",
buffer);
return NOTOK;
}
switch (my_lex (buffer)) {
case LX_DOT:
host = add (buffer, host);
continue;
case LX_AT: /* sigh (0) */
mbox = add (host, add ("%", mbox));
free (host);
host = NULL;
continue;
default:
return OK;
}
}
}
/* \f */
static int route (buffer)
register char *buffer;
{
path = getcpy ("@");
for (;;) {
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_DLIT:
path = add (buffer, path);
break;
default:
(void) sprintf (err,
"no sub-domain in domain-part of address (%s)",
buffer);
return NOTOK;
}
switch (my_lex (buffer)) {
case LX_COMA:
path = add (buffer, path);
for (;;) {
switch (my_lex (buffer)) {
case LX_COMA:
continue;
case LX_AT:
path = add (buffer, path);
break;
default:
(void) sprintf (err,
"no at-sign found for next domain in route (%s)",
buffer);
}
break;
}
continue;
case LX_AT: /* XXX */
case LX_DOT:
path = add (buffer, path);
continue;
case LX_COLN:
path = add (buffer, path);
return OK;
default:
(void) sprintf (err,
"no colon found to terminate route (%s)", buffer);
return NOTOK;
}
}
}
/* \f */
static int my_lex (buffer)
register char *buffer;
{
int i, gotat = 0;
register char c,
*bp;
bp = buffer;
*bp = 0;
if (!cp)
return (last_lex = LX_END);
gotat = isat (cp);
c = *cp++;
while (isspace (c))
c = *cp++;
if (c == 0) {
cp = NULL;
return (last_lex = LX_END);
}
if (c == '(')
for (*bp++ = c, i = 0;;)
switch (c = *cp++) {
case 0:
cp = NULL;
return (last_lex = LX_ERR);
case QUOTE:
*bp++ = c;
if ((c = *cp++) == 0) {
cp = NULL;
return (last_lex = LX_ERR);
}
*bp++ = c;
continue;
case '(':
i++;
default:
*bp++ = c;
continue;
case ')':
*bp++ = c;
if (--i < 0) {
*bp = 0;
note = note ? add (buffer, add (" ", note))
: getcpy (buffer);
return my_lex (buffer);
}
}
/* \f */
if (c == '"')
for (*bp++ = c;;)
switch (c = *cp++) {
case 0:
cp = NULL;
return (last_lex = LX_ERR);
case QUOTE:
*bp++ = c;
if ((c = *cp++) == 0) {
cp = NULL;
return (last_lex = LX_ERR);
}
default:
*bp++ = c;
continue;
case '"':
*bp++ = c;
*bp = 0;
return (last_lex = LX_QSTR);
}
if (c == '[')
for (*bp++ = c;;)
switch (c = *cp++) {
case 0:
cp = NULL;
return (last_lex = LX_ERR);
case QUOTE:
*bp++ = c;
if ((c = *cp++) == 0) {
cp = NULL;
return (last_lex = LX_ERR);
}
default:
*bp++ = c;
continue;
case ']':
*bp++ = c;
*bp = 0;
return (last_lex = LX_DLIT);
}
/* \f */
*bp++ = c;
*bp = 0;
for (i = 0; special[i].lx_chr != 0; i++)
if (c == special[i].lx_chr)
return (last_lex = special[i].lx_val);
if (iscntrl (c))
return (last_lex = LX_ERR);
for (;;) {
if ((c = *cp++) == 0)
break;
for (i = 0; special[i].lx_chr != 0; i++)
if (c == special[i].lx_chr)
goto got_atom;
if (iscntrl (c) || isspace (c))
break;
*bp++ = c;
}
got_atom: ;
if (c == 0)
cp = NULL;
else
cp--;
*bp = 0;
last_lex = !gotat || cp == NULL || index (cp, '<') != NULL
? LX_ATOM : LX_AT;
return last_lex;
}
/* \f */
char *legal_person (p)
register char *p;
{
int i;
register char *cp;
static char buffer[BUFSIZ];
if (*p == '"')
return p;
for (cp = p; *cp; cp++)
for (i = 0; special[i].lx_chr; i++)
if (*cp == special[i].lx_chr) {
(void) sprintf (buffer, "\"%s\"", p);
return buffer;
}
return p;
}
/* \f */
int mfgets (in, bp)
register FILE *in;
register char **bp;
{
int i;
register char *cp,
*dp,
*ep;
static int len = 0;
static char *pp = NULL;
if (pp == NULL)
if ((pp = malloc ((unsigned) (len = BUFSIZ))) == NULL)
return NOTOK;
for (ep = (cp = pp) + len - 2;;) {
switch (i = getc (in)) {
case EOF:
eol: ;
if (cp != pp) {
*cp = 0;
*bp = pp;
return OK;
}
eoh: ;
*bp = NULL;
free (pp);
pp = NULL;
return DONE;
case 0:
continue;
case '\n':
if (cp == pp) /* end of headers, gobble it */
goto eoh;
switch (i = getc (in)) {
default: /* end of line */
case '\n': /* end of headers, save for next call */
(void) ungetc (i, in);
goto eol;
case ' ': /* continue headers */
case '\t':
*cp++ = '\n';
break;
} /* fall into default case */
default:
*cp++ = i;
break;
}
if (cp >= ep)
if ((dp = realloc (pp, (unsigned) (len += BUFSIZ))) == NULL) {
free (pp);
pp = NULL;
return NOTOK;
}
else
cp += dp - pp, ep = (pp = cp) + len - 2;
}
}