SCCSID(@
(#)parseaddr.c 3.80 3/8/83);
** PARSEADDR -- Parse an address
** Parses an address and breaks it up into three parts: a
** net to transmit the message on, the host to transmit it
** to, and a user on that host. These are loaded into an
** ADDRESS header with the values squirreled away if necessary.
** The "user" part may not be a real user; the process may
** just reoccur on that machine. For example, on a machine
** with an arpanet connection, the address
** will break up to a "user" of 'csvax.bill' and a host
** of 'berkeley' -- to be transmitted over the arpanet.
** addr -- the address to parse.
** a -- a pointer to the address descriptor buffer.
** If NULL, a header will be created.
** copyf -- determines what shall be copied:
** -1 -- don't copy anything. The printname
** (q_paddr) is just addr, and the
** user & host are allocated internally
** 0 -- copy out the parsed user & host, but
** don't copy the printname.
** +1 -- copy everything.
** delim -- the character to terminate the address, passed
** A pointer to the address descriptor header (`a' if
/* following delimiters are inherent to the internal algorithms */
# define DELIMCHARS "$()<>,;\\\"\r\n" /* word delimiters */
parseaddr(addr
, a
, copyf
, delim
)
register struct mailer
*m
;
extern ADDRESS
*buildaddr();
** Initialize and prescan address.
printf("\n--parseaddr(%s)\n", addr
);
pvp
= prescan(addr
, delim
);
** Apply rewriting rules.
** Ruleset 0 does basic parsing. It must resolve.
** See if we resolved to a real mailer.
if (pvp
[0][0] != CANONNET
)
usrerr("cannot resolve name");
** Build canonical address from pvp.
** Make local copies of the host & user and then
a
->q_paddr
= newstr(addr
);
a
->q_host
= newstr(a
->q_host
);
if (a
->q_user
!= a
->q_paddr
)
a
->q_user
= newstr(a
->q_user
);
** Do UPPER->lower case mapping unless inhibited.
if (!bitnset(M_HST_UPPER
, m
->m_flags
))
if (!bitnset(M_USR_UPPER
, m
->m_flags
))
** PRESCAN -- Prescan name and make it canonical
** Scans a name and turns it into a set of tokens. This process
** deletes blanks and comments (in parentheses).
** This routine knows about quoted strings and angle brackets.
** There are certain subtleties to this routine. The one that
** comes to mind now is that backslashes on the ends of names
** are silently stripped off; this is intentional. The problem
** is that some versions of sndmsg (like at LBL) set the kill
** character to something other than @ when reading addresses;
** so people type "csvax.eric\@berkeley" -- which screws up the
** addr -- the name to chomp.
** delim -- the delimiter for the address, normally
** '\0' or ','; \0 is accepted in any case.
** A pointer to a vector of tokens.
/* states and character types */
# define OPR 0 /* operator */
# define ATM 1 /* atom */
# define QST 2 /* in quoted string */
# define SPC 3 /* chewing up spaces */
# define ONE 4 /* pick up one character */
# define NSTATES 5 /* number of states */
# define TYPE 017 /* mask to select state type */
/* meta bits for table */
# define M 020 /* meta character; don't pass through */
# define B 040 /* cause a break */
# define MB M|B /* meta-break */
static short StateTab
[NSTATES
][NSTATES
] =
/* oldst chtype> OPR ATM QST SPC ONE */
/*OPR*/ OPR
|B
, ATM
|B
, QST
|B
, SPC
|MB
, ONE
|B
,
/*ATM*/ OPR
|B
, ATM
, QST
|B
, SPC
|MB
, ONE
|B
,
/*QST*/ QST
, QST
, OPR
, QST
, QST
,
/*SPC*/ OPR
, ATM
, QST
, SPC
|M
, ONE
,
/*ONE*/ OPR
, OPR
, OPR
, OPR
, OPR
,
# define NOCHAR -1 /* signal nothing in lookahead token */
char *DelimChar
; /* set to point to the delimiter */
static char buf
[MAXNAME
+MAXATOM
];
static char *av
[MAXATOM
+1];
/* store away any old lookahead character */
if (q
>= &buf
[sizeof buf
- 5])
usrerr("Address too long");
/* read a new input character */
printf("c=%c, s=%d; ", c
, state
);
/* chew up special characters */
/* do nothing, just avoid next clauses */
usrerr("Unbalanced ')'");
usrerr("Unbalanced '>'");
else if (delim
== ' ' && isspace(c
))
/* see if this is end of input */
if (c
== delim
&& anglecnt
<= 0 && state
!= QST
)
newstate
= StateTab
[state
][toktype(c
)];
printf("ns=%02o\n", newstate
);
syserr("prescan: too many tokens");
} while (c
!= '\0' && (c
!= delim
|| anglecnt
> 0));
usrerr("Unbalanced '('");
usrerr("Unbalanced '<'");
usrerr("Unbalanced '\"'");
** TOKTYPE -- return token type
** c -- the character in question.
static bool firstime
= TRUE
;
expand("$o", buf
, &buf
[sizeof buf
- 1], CurEnv
);
(void) strcat(buf
, DELIMCHARS
);
if (c
== MATCHCLASS
|| c
== MATCHREPL
|| c
== MATCHNCLASS
)
if (isspace(c
) || c
== ')')
if (iscntrl(c
) || index(buf
, c
) != NULL
)
** REWRITE -- apply rewrite rules to token vector.
** This routine is an ordered production system. Each rewrite
** rule has a LHS (called the pattern) and a RHS (called the
** rewrite); 'rwr' points the the current rewrite rule.
** For each rewrite rule, 'avp' points the address vector we
** are trying to match against, and 'pvp' points to the pattern.
** If pvp points to a special match value (MATCHZANY, MATCHANY,
** MATCHONE, MATCHCLASS, MATCHNCLASS) then the address in avp
** matched is saved away in the match vector (pointed to by 'mvp').
** When a match between avp & pvp does not match, we try to
** back out. If we back up over MATCHONE, MATCHCLASS, or MATCHNCLASS
** we must also back out the match in mvp. If we reach a
** MATCHANY or MATCHZANY we just extend the match and start
** When we finally match, we rewrite the address vector
** pvp -- pointer to token vector.
char **first
; /* first token matched */
char **last
; /* last token matched */
# define MAXMATCH 9 /* max params per rewrite */
register char *ap
; /* address pointer */
register char *rp
; /* rewrite pointer */
register char **avp
; /* address vector pointer */
register char **rvp
; /* rewrite vector pointer */
register struct match
*mlp
; /* cur ptr into mlist */
register struct rewrite
*rwr
; /* pointer to current rewrite rule */
struct match mlist
[MAXMATCH
]; /* stores match on LHS */
char *npvp
[MAXATOM
+1]; /* temporary space for rebuild */
if (OpMode
== MD_TEST
|| tTd(21, 2))
printf("rewrite: ruleset %2d input:", ruleset
);
** Run through the list of rewrite rules, applying
for (rwr
= RewriteRules
[ruleset
]; rwr
!= NULL
; )
printf("-----trying rule:");
/* try to match on this rule */
while ((ap
= *avp
) != NULL
|| *rvp
!= NULL
)
/* end-of-pattern before end-of-address */
if (ap
== NULL
&& *rp
!= MATCHZANY
)
/* match any token in (not in) a class */
s
= stab(ap
, ST_CLASS
, ST_FIND
);
if (s
== NULL
|| !bitnset(rp
[1], s
->s_class
))
else if (*rp
== MATCHNCLASS
)
/* explicit fall-through */
/* match exactly one token */
/* match zero or more tokens */
/* must have exact match */
/* successful match on this token */
/* match failed -- back up */
while (--rvp
>= rwr
->r_lhs
)
if (*rp
== MATCHANY
|| *rp
== MATCHZANY
)
/* extend binding and continue */
if (*rp
== MATCHONE
|| *rp
== MATCHCLASS
||
/* total failure to match */
** See if we successfully matched
if (rvp
< rwr
->r_lhs
|| *rvp
!= NULL
)
printf("----- rule fails\n");
printf("-----rule matches:");
else if (*rp
== CANONHOST
)
else if (*rp
== CANONNET
)
for (avp
= npvp
; *rvp
!= NULL
; rvp
++)
register struct match
*m
;
if (avp
>= &npvp
[MAXATOM
])
syserr("rewrite: expansion too long");
/* substitute from LHS */
if (avp
>= &npvp
[MAXATOM
])
syserr("rewrite: expansion too long");
bmove((char *) &npvp
[2], (char *) pvp
,
(avp
- npvp
- 2) * sizeof *avp
);
printf("-----callsubr %s\n", npvp
[1]);
rewrite(pvp
, atoi(npvp
[1]));
bmove((char *) npvp
, (char *) pvp
,
(avp
- npvp
) * sizeof *avp
);
if (OpMode
== MD_TEST
|| tTd(21, 2))
printf("rewrite: ruleset %2d returns:", ruleset
);
** BUILDADDR -- build address from token vector.
** a -- pointer to address descriptor to fill.
** If NULL, one will be allocated.
** NULL if there was an error.
static char buf
[MAXNAME
];
register struct mailer
*m
;
a
= (ADDRESS
*) xalloc(sizeof *a
);
clear((char *) a
, sizeof *a
);
/* figure out what net/mailer to use */
syserr("buildaddr: no net");
if (sameword(*tv
, "error"))
syserr("buildaddr: error: no user");
for (mp
= Mailer
; (m
= *mp
++) != NULL
; )
if (sameword(m
->m_name
, *tv
))
syserr("buildaddr: unknown net %s", *tv
);
/* figure out what host (if any) */
if (!bitnset(M_LOCAL
, m
->m_flags
))
syserr("buildaddr: no host");
while (*tv
!= NULL
&& **tv
!= CANONUSER
)
(void) strcat(buf
, *tv
++);
/* figure out the user */
syserr("buildaddr: no user");
cataddr(tv
, buf
, sizeof buf
);
** CATADDR -- concatenate pieces of addresses (putting in <LWSP> subs)
** pvp -- parameter vector to rebuild.
** buf -- buffer to build the string into.
while (*pvp
!= NULL
&& (i
= strlen(*pvp
)) < sz
)
natomtok
= (toktype(**pvp
) == ATM
);
if (oatomtok
&& natomtok
)
** SAMEADDR -- Determine if two addresses are the same
** This is not just a straight comparison -- if the mailer doesn't
** care about the host we just ignore it, etc.
** a, b -- pointers to the internal forms to compare.
** TRUE -- they represent the same mailbox.
/* if they don't have the same mailer, forget it */
if (a
->q_mailer
!= b
->q_mailer
)
/* if the user isn't the same, we can drop out */
if (strcmp(a
->q_user
, b
->q_user
) != 0)
/* if the mailer ignores hosts, we have succeeded! */
if (bitnset(M_LOCAL
, a
->q_mailer
->m_flags
))
/* otherwise compare hosts (but be careful for NULL ptrs) */
if (a
->q_host
== NULL
|| b
->q_host
== NULL
)
if (strcmp(a
->q_host
, b
->q_host
) != 0)
** PRINTADDR -- print address (for debugging)
** a -- the address to print
** follow -- follow the q_next chain.
printf("%s: mailer %d (%s), host `%s', user `%s'\n", a
->q_paddr
,
a
->q_mailer
->m_mno
, a
->q_mailer
->m_name
, a
->q_host
,
printf("\tnext=%x, flags=%o, alias %x\n", a
->q_next
, a
->q_flags
,
printf("\thome=\"%s\", fullname=\"%s\"\n", a
->q_home
,
** REMOTENAME -- return the name relative to the current mailer
** name -- the name to translate.
** m -- the mailer that we want to do rewriting relative
** senderaddress -- if set, uses the sender rewriting rules
** rather than the recipient rewriting rules.
** canonical -- if set, strip out any comment information,
** the text string representing this address relative to
** The text string returned is tucked away locally;
** copy it if you intend to save it.
remotename(name
, m
, senderaddress
, canonical
)
char *oldg
= macvalue('g', CurEnv
);
static char buf
[MAXNAME
];
extern char *crackaddr();
printf("remotename(%s)\n", name
);
/* don't do anything if we are tagging it as special */
if ((senderaddress
? m
->m_s_rwset
: m
->m_r_rwset
) < 0)
** Do a heuristic crack of this name to extract any comment info.
** This will leave the name as a comment and a $g macro.
** Turn the name into canonical form.
** Normally this will be RFC 822 style, i.e., "user@domain".
** If this only resolves to "user", and the "C" flag is
** specified in the sending mailer, then the sender's
** domain will be appended.
pvp
= prescan(name
, '\0');
if (CurEnv
->e_fromdomain
!= NULL
)
/* append from domain to this address */
register char **pxp
= pvp
;
/* see if there is an "@domain" in the current name */
while (*pxp
!= NULL
&& strcmp(*pxp
, "@") != 0)
/* no.... append the "@domain" from the sender */
register char **qxq
= CurEnv
->e_fromdomain
;
while ((*pxp
++ = *qxq
++) != NULL
)
** Do more specific rewriting.
** Rewrite using ruleset 1 or 2 depending on whether this is
** a sender address or not.
** Then run it through any receiving-mailer-specific rulesets.
rewrite(pvp
, m
->m_s_rwset
);
rewrite(pvp
, m
->m_r_rwset
);
** Do any final sanitation the address may require.
** This will normally be used to turn internal forms
** (e.g., user@host.LOCAL) into external form. This
** may be used as a default to the above rules.
** Now restore the comment information we had at the beginning.
cataddr(pvp
, lbuf
, sizeof lbuf
);
define('g', lbuf
, CurEnv
);
expand(fancy
, buf
, &buf
[sizeof buf
- 1], CurEnv
);
define('g', oldg
, CurEnv
);
printf("remotename => `%s'\n", buf
);