* Copyright (c) 1994 Eric P. Allman
* The Regents of the University of California. All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
static char sccsid
[] = "@(#)mime.c 8.26 (Berkeley) 6/18/95";
** I am indebted to John Beck of Hewlett-Packard, who contributed
** his code to me for inclusion. As it turns out, I did not use
** his code since he used a "minimum change" approach that used
** several temp files, and I wanted a "minimum impact" approach
** that would avoid copying. However, looking over his code
** helped me cement my understanding of the problem.
** I also looked at, but did not directly use, Nathaniel
** Borenstein's "code.c" module. Again, it functioned as
** a file-to-file translator, which did not fit within my
** design bounds, but it was a useful base for understanding
/* character set for hex and base64 encoding */
char Base16Code
[] = "0123456789ABCDEF";
char Base64Code
[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
/* types of MIME boundaries */
#define MBT_SYNTAX 0 /* syntax error */
#define MBT_NOTSEP 1 /* not a boundary */
#define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */
#define MBT_FINAL 3 /* final boundary (trailing -- included) */
static char *MimeBoundaryNames
[] =
"SYNTAX", "NOTSEP", "INTERMED", "FINAL"
** MIME8TO7 -- output 8 bit body in 7 bit format
** The header has already been output -- this has to do the
** 8 to 7 bit conversion. It would be easy if we didn't have
** to deal with nested formats (multipart/xxx and message/rfc822).
** We won't be called if we don't have to do a conversion, and
** appropriate MIME-Version: and Content-Type: fields have been
** output. Any Content-Transfer-Encoding: field has not been
** output, and we can add it here.
** mci -- mailer connection information.
** header -- the header for this body part.
** boundaries -- the currently pending message boundaries.
** NULL if we are processing the outer portion.
** flags -- to tweak processing.
** An indicator of what terminated the message part:
** MBT_FINAL -- the final boundary
** MBT_INTERMED -- an intermediate boundary
** MBT_NOTSEP -- an end of file
char *field
; /* name of field */
char *value
; /* value of that field */
mime8to7(mci
, header
, e
, boundaries
, flags
)
size_t sectionsize
, sectionhighbits
;
struct args argv
[MAXMIMEARGS
];
extern u_char MimeTokenTab
[256];
printf("mime8to7: flags = %x, boundaries =", flags
);
if (boundaries
[0] == NULL
)
for (i
= 0; boundaries
[i
] != NULL
; i
++)
printf(" %s", boundaries
[i
]);
p
= hvalue("Content-Transfer-Encoding", header
);
(pvp
= prescan(p
, '\0', pvpbuf
, sizeof pvpbuf
, NULL
,
MimeTokenTab
)) == NULL
||
cataddr(pvp
, NULL
, buf
, sizeof buf
, '\0');
p
= hvalue("Content-Type", header
);
if (bitset(M87F_DIGEST
, flags
))
(pvp
= prescan(p
, '\0', pvpbuf
, sizeof pvpbuf
, NULL
,
MimeTokenTab
)) != NULL
&&
for (i
= 0; pvp
[i
] != NULL
; i
++)
printf("pvp[%d] = \"%s\"\n", i
, pvp
[i
]);
if (*pvp
!= NULL
&& strcmp(*pvp
, "/") == 0 &&
/* break out parameters */
while (*pvp
!= NULL
&& argc
< MAXMIMEARGS
)
/* skip to semicolon separator */
while (*pvp
!= NULL
&& strcmp(*pvp
, ";") != 0)
if (*pvp
++ == NULL
|| *pvp
== NULL
)
argv
[argc
].field
= *pvp
++;
/* see if there is a value */
if (*pvp
!= NULL
&& strcmp(*pvp
, "=") == 0 &&
(*++pvp
== NULL
|| strcmp(*pvp
, ";") != 0))
/* check for disaster cases */
/* don't propogate some flags more than one level into the message */
** Check for cases that can not be encoded.
** For example, you can't encode certain kinds of types
** or already-encoded messages. If we find this case,
sprintf(buf
, "%s/%s", type
, subtype
);
if (wordinclass(buf
, 'n') || (cte
!= NULL
&& !wordinclass(cte
, 'e')))
** Multipart requires special processing.
** Do a recursive descent into the message.
if (strcasecmp(type
, "multipart") == 0 && !bitset(M87F_NO8BIT
, flags
))
if (strcasecmp(subtype
, "digest") == 0)
for (i
= 0; i
< argc
; i
++)
if (strcasecmp(argv
[i
].field
, "boundary") == 0)
syserr("mime8to7: Content-Type: %s missing boundary", p
);
if (blen
> sizeof bbuf
- 1)
syserr("mime8to7: multipart boundary \"%s\" too long",
printf("mime8to7: multipart boundary \"%s\"\n", bbuf
);
for (i
= 0; i
< MAXMIMENESTING
; i
++)
if (boundaries
[i
] == NULL
)
syserr("mime8to7: multipart nesting boundary too deep");
boundaries
[i
+ 1] = NULL
;
mci
->mci_flags
|= MCIF_INMIME
;
/* skip the early "comment" prologue */
while (fgets(buf
, sizeof buf
, e
->e_dfp
) != NULL
)
bt
= mimeboundary(buf
, boundaries
);
putxline(buf
, mci
, PXLF_MAPFROM
|PXLF_STRIP8BIT
);
sprintf(buf
, "--%s", bbuf
);
collect(e
->e_dfp
, FALSE
, FALSE
, &hdr
, e
);
putline("+++after collect", mci
);
putline("+++after putheader", mci
);
bt
= mime8to7(mci
, hdr
, e
, boundaries
, flags
);
sprintf(buf
, "--%s--", bbuf
);
mci
->mci_flags
&= ~MCIF_INMIME
;
/* skip the late "comment" epilogue */
while (fgets(buf
, sizeof buf
, e
->e_dfp
) != NULL
)
bt
= mimeboundary(buf
, boundaries
);
putxline(buf
, mci
, PXLF_MAPFROM
|PXLF_STRIP8BIT
);
printf("\t\t\tmime8to7=>%s (multipart)\n",
** Message/* types -- recurse exactly once.
** Class 'm' is predefined to have "rfc822" only.
if (strcasecmp(type
, "message") == 0)
if (!wordinclass(subtype
, 'm'))
mci
->mci_flags
|= MCIF_INMIME
;
collect(e
->e_dfp
, FALSE
, FALSE
, &hdr
, e
);
putline("+++after collect", mci
);
putline("+++after putheader", mci
);
if (hvalue("MIME-Version", hdr
) == NULL
)
putline("MIME-Version: 1.0", mci
);
bt
= mime8to7(mci
, hdr
, e
, boundaries
, flags
);
mci
->mci_flags
&= ~MCIF_INMIME
;
** Non-compound body type
** Compute the ratio of seven to eight bit characters;
** use that as a heuristic to decide how to do the
sectionsize
= sectionhighbits
= 0;
if (!bitset(M87F_NO8BIT
, flags
))
/* remember where we were */
offset
= ftell(e
->e_dfp
);
syserr("mime8to7: cannot ftell on df%s", e
->e_id
);
/* do a scan of this body type to count character types */
while (fgets(buf
, sizeof buf
, e
->e_dfp
) != NULL
)
if (mimeboundary(buf
, boundaries
) != MBT_NOTSEP
)
for (p
= buf
; *p
!= '\0'; p
++)
/* count bytes with the high bit set */
** Heuristic: if 1/4 of the first 4K bytes are 8-bit,
** assume base64. This heuristic avoids double-reading
** large graphics or video files.
if (sectionsize
>= 4096 &&
sectionhighbits
> sectionsize
/ 4)
/* return to the original offset for processing */
/* XXX use relative seeks to handle >31 bit file sizes? */
if (fseek(e
->e_dfp
, offset
, SEEK_SET
) < 0)
syserr("mime8to7: cannot fseek on df%s", e
->e_id
);
** Heuristically determine encoding method.
** If more than 1/8 of the total characters have the
** eighth bit set, use base64; else use quoted-printable.
** However, only encode binary encoded data as base64,
** since otherwise the NL=>CRLF mapping will be a problem.
printf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s\n",
sectionhighbits
, sectionsize
,
cte
== NULL
? "[none]" : cte
);
if (cte
!= NULL
&& strcasecmp(cte
, "binary") == 0)
sectionsize
= sectionhighbits
;
if (sectionhighbits
== 0)
/* no encoding necessary */
sprintf(buf
, "Content-Transfer-Encoding: %s", cte
);
mci
->mci_flags
&= ~MCIF_INHEADER
;
while (fgets(buf
, sizeof buf
, e
->e_dfp
) != NULL
)
bt
= mimeboundary(buf
, boundaries
);
else if (sectionsize
/ 8 < sectionhighbits
)
/* use base64 encoding */
putline("Content-Transfer-Encoding: base64", mci
);
printf(" ...Content-Transfer-Encoding: base64\n");
mci
->mci_flags
&= ~MCIF_INHEADER
;
while ((c1
= mime_getchar_crlf(e
->e_dfp
, boundaries
, &bt
)) != EOF
)
*bp
++ = Base64Code
[(c1
>> 2)];
c2
= mime_getchar_crlf(e
->e_dfp
, boundaries
, &bt
);
c2
= mime_getchar_crlf(e
->e_dfp
, boundaries
, &bt
);
*bp
++ = Base64Code
[c2
& 0x3f];
/* use quoted-printable encoding */
/* set up map of characters that must be mapped */
for (c1
= 0x00; c1
< 0x20; c1
++)
for (c1
= 0x7f; c1
< 0x100; c1
++)
if (bitnset(M_EBCDIC
, mci
->mci_mailer
->m_flags
))
for (p
= "!\"#$@[\\]^`{|}~"; *p
!= '\0'; p
++)
putline("Content-Transfer-Encoding: quoted-printable", mci
);
printf(" ...Content-Transfer-Encoding: quoted-printable\n");
mci
->mci_flags
&= ~MCIF_INHEADER
;
while ((c1
= mime_getchar(e
->e_dfp
, boundaries
, &bt
)) != EOF
)
if (c2
== ' ' || c2
== '\t')
*bp
++ = Base16Code
[(c2
>> 4) & 0x0f];
*bp
++ = Base16Code
[c2
& 0x0f];
if (c2
== ' ' && linelen
== 4 && fromstate
== 4 &&
bitnset(M_ESCFROM
, mci
->mci_mailer
->m_flags
))
else if (c2
== ' ' || c2
== '\t')
if (bitnset(c1
& 0xff, badchars
))
*bp
++ = Base16Code
[(c1
>> 4) & 0x0f];
*bp
++ = Base16Code
[c1
& 0x0f];
else if (c1
!= ' ' && c1
!= '\t')
if (linelen
< 4 && c1
== "From"[linelen
])
/* output any saved character */
if (c2
== ' ' || c2
== '\t')
*bp
++ = Base16Code
[(c2
>> 4) & 0x0f];
*bp
++ = Base16Code
[c2
& 0x0f];
if (linelen
> 0 || boundaries
[0] != NULL
)
printf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames
[bt
]);
** MIME_GETCHAR -- get a character for MIME processing
** Treats boundaries as EOF.
** boundaries -- the current MIME boundaries.
** btp -- if the return value is EOF, *btp is set to
** the type of the boundary.
** The next character in the input stream.
mime_getchar(fp
, boundaries
, btp
)
static u_char
*bp
= NULL
;
static bool atbol
= TRUE
; /* at beginning of line */
static int bt
= MBT_SYNTAX
; /* boundary type of next EOF */
static u_char buf
[128]; /* need not be a full line */
/* might be part of a MIME boundary */
/* check for a message boundary */
/* got "--", now check for rest of separator */
while (bp
< &buf
[sizeof buf
- 2] &&
(c
= getc(fp
)) != EOF
&& c
!= '\n')
bt
= mimeboundary(&buf
[1], boundaries
);
/* we have a message boundary */
** MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
** boundaries -- the current MIME boundaries.
** btp -- if the return value is EOF, *btp is set to
** the type of the boundary.
** The next character in the input stream.
mime_getchar_crlf(fp
, boundaries
, btp
)
static bool sendlf
= FALSE
;
c
= mime_getchar(fp
, boundaries
, btp
);
** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
** line -- the input line.
** boundaries -- the set of currently pending boundaries.
** MBT_NOTSEP -- if this is not a separator line
** MBT_INTERMED -- if this is an intermediate separator
** MBT_FINAL -- if this is a final boundary
** MBT_SYNTAX -- if this is a boundary for the wrong
** enclosure -- i.e., a syntax error.
mimeboundary(line
, boundaries
)
if (line
[0] != '-' || line
[1] != '-' || boundaries
== NULL
)
/* strip off trailing whitespace */
while (line
[i
- 1] == ' ' || line
[i
- 1] == '\t')
printf("mimeboundary: line=\"%s\"... ", line
);
/* check for this as an intermediate boundary */
if (isboundary(&line
[2], boundaries
) >= 0)
else if (i
> 2 && strncmp(&line
[i
- 2], "--", 2) == 0)
/* check for a final boundary */
if (isboundary(&line
[2], boundaries
) >= 0)
printf("%s\n", MimeBoundaryNames
[type
]);
** DEFCHARSET -- return default character set for message
** The first choice for character set is for the mailer
** corresponding to the envelope sender. If neither that
** nor the global configuration file has a default character
** set defined, return "unknown-8bit" as recommended by
** e -- the envelope for this message.
** The default character set for that mailer.
if (e
!= NULL
&& e
->e_from
.q_mailer
!= NULL
&&
e
->e_from
.q_mailer
->m_defcharset
!= NULL
)
return e
->e_from
.q_mailer
->m_defcharset
;
if (DefaultCharSet
!= NULL
)
** ISBOUNDARY -- is a given string a currently valid boundary?
** line -- the current input line.
** boundaries -- the list of valid boundaries.
** The index number in boundaries if the line is found.
isboundary(line
, boundaries
)
for (i
= 0; boundaries
[i
] != NULL
; i
++)
if (strcmp(line
, boundaries
[i
]) == 0)