output original Content-Transfer-Encoding header if we aren't
[unix-history] / usr / src / usr.sbin / sendmail / src / mime.c
CommitLineData
363ce64f
EA
1/*
2 * Copyright (c) 1994 Eric P. Allman
3 * Copyright (c) 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * %sccs.include.redist.c%
7 */
8
9# include "sendmail.h"
10# include <string.h>
11
12#ifndef lint
edce97a2 13static char sccsid[] = "@(#)mime.c 8.5 (Berkeley) %G%";
363ce64f
EA
14#endif /* not lint */
15
16/*
17** MIME support.
18**
19** I am indebted to John Beck of Hewlett-Packard, who contributed
20** his code to me for inclusion. As it turns out, I did not use
21** his code since he used a "minimum change" approach that used
22** several temp files, and I wanted a "minimum impact" approach
23** that would avoid copying. However, looking over his code
24** helped me cement my understanding of the problem.
25**
26** I also looked at, but did not directly use, Nathaniel
27** Borenstein's "code.c" module. Again, it functioned as
28** a file-to-file translator, which did not fit within my
29** design bounds, but it was a useful base for understanding
30** the problem.
31*/
32
33
34/* character set for hex and base64 encoding */
35char Base16Code[] = "0123456789ABCDEF";
36char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37
38/* types of MIME boundaries */
39#define MBT_SYNTAX 0 /* syntax error */
40#define MBT_NOTSEP 1 /* not a boundary */
41#define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */
42#define MBT_FINAL 3 /* final boundary (trailing -- included) */
e3c84ea8
EA
43
44static int MimeBoundaryType; /* internal linkage */
363ce64f
EA
45\f/*
46** MIME8TO7 -- output 8 bit body in 7 bit format
47**
48** The header has already been output -- this has to do the
49** 8 to 7 bit conversion. It would be easy if we didn't have
50** to deal with nested formats (multipart/xxx and message/rfc822).
51**
52** We won't be called if we don't have to do a conversion, and
53** appropriate MIME-Version: and Content-Type: fields have been
54** output. Any Content-Transfer-Encoding: field has not been
55** output, and we can add it here.
56**
57** Parameters:
58** mci -- mailer connection information.
59** header -- the header for this body part.
60** e -- envelope.
61** boundary -- the message boundary -- NULL if we are
62** processing the outer portion.
63**
64** Returns:
65** An indicator of what terminated the message part:
66** MBT_FINAL -- the final boundary
67** MBT_INTERMED -- an intermediate boundary
68** MBT_NOTSEP -- an end of file
69*/
70
71int
72mime8to7(mci, header, e, boundary)
73 register MCI *mci;
74 HDR *header;
75 register ENVELOPE *e;
76 char *boundary;
77{
78 register char *p;
79 int linelen;
80 int bt;
81 off_t offset;
82 size_t sectionsize, sectionhighbits;
83 char bbuf[128];
84 char buf[MAXLINE];
363ce64f
EA
85
86 if (tTd(43, 1))
87 {
88 printf("mime8to7: boundary=%s\n",
89 boundary == NULL ? "<none>" : boundary);
90 }
91 p = hvalue("Content-Type", header);
92 if (p != NULL && strncasecmp(p, "multipart/", 10) == 0)
93 {
94 register char *q;
95
96 /* oh dear -- this part is hard */
97 p = strstr(p, "boundary="); /*XXX*/
98 if (p == NULL)
99 {
100 syserr("mime8to7: Content-Type: %s missing boundary", p);
101 p = "---";
102 }
103 else
104 p += 9;
105 if (*p == '"')
106 q = strchr(p, '"');
107 else
108 q = strchr(p, ',');
109 if (q == NULL)
110 q = p + strlen(p);
111 if (q - p > sizeof bbuf - 1)
112 {
113 syserr("mime8to7: multipart boundary \"%.*s\" too long",
114 q - p, p);
115 q = p + sizeof bbuf - 1;
116 }
117 strncpy(bbuf, p, q - p);
118 bbuf[q - p] = '\0';
119 if (tTd(43, 1))
120 {
121 printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
122 }
123
124 /* skip the early "comment" prologue */
125 bt = MBT_FINAL;
126 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
127 {
128 bt = mimeboundary(buf, bbuf);
129 if (bt != MBT_NOTSEP)
130 break;
131 putline(buf, mci);
132 }
133 while (bt != MBT_FINAL)
134 {
135 auto HDR *hdr = NULL;
136
137 sprintf(buf, "--%s", bbuf);
138 putline(buf, mci);
139 collect(e->e_dfp, FALSE, FALSE, &hdr, e);
140 putheader(mci, hdr, e);
141 bt = mime8to7(mci, hdr, e, bbuf);
142 }
143 sprintf(buf, "--%s--", bbuf);
144 putline(buf, mci);
145
146 /* skip the late "comment" epilogue */
147 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
148 {
149 putline(buf, mci);
150 bt = mimeboundary(buf, boundary);
151 if (bt != MBT_NOTSEP)
152 break;
153 }
154 return bt;
155 }
156
157 /*
158 ** Non-compound body type
159 **
160 ** Compute the ratio of seven to eight bit characters;
161 ** use that as a heuristic to decide how to do the
162 ** encoding.
163 */
164
165 /* remember where we were */
166 offset = ftell(e->e_dfp);
167 if (offset == -1)
168 syserr("mime8to7: cannot ftell on %s", e->e_df);
169
170 /* do a scan of this body type to count character types */
171 sectionsize = sectionhighbits = 0;
172 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
173 {
174 bt = mimeboundary(buf, boundary);
175 if (bt != MBT_NOTSEP)
176 break;
177 for (p = buf; *p != '\0'; p++)
178 {
e3c84ea8 179 /* count bytes with the high bit set */
363ce64f
EA
180 sectionsize++;
181 if (bitset(0200, *p))
182 sectionhighbits++;
183 }
e3c84ea8
EA
184
185 /*
186 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit,
187 ** assume base64. This heuristic avoids double-reading
188 ** large graphics or video files.
189 */
190
191 if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4)
192 break;
363ce64f
EA
193 }
194 if (feof(e->e_dfp))
195 bt = MBT_FINAL;
196
197 /* return to the original offset for processing */
e3c84ea8 198 /* XXX use relative seeks to handle >31 bit file sizes? */
363ce64f
EA
199 if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
200 syserr("mime8to7: cannot fseek on %s", e->e_df);
201
e3c84ea8
EA
202 /*
203 ** Heuristically determine encoding method.
204 ** If more than 1/8 of the total characters have the
205 ** eighth bit set, use base64; else use quoted-printable.
206 */
207
363ce64f
EA
208 if (tTd(43, 8))
209 {
210 printf("mime8to7: %ld high bits in %ld bytes\n",
211 sectionhighbits, sectionsize);
212 }
bb224b8e
EA
213 if (sectionhighbits == 0)
214 {
215 /* no encoding necessary */
edce97a2
EA
216 p = hvalue("content-transfer-encoding", header);
217 if (p != NULL)
218 {
219 sprintf(buf, "Content-Transfer-Encoding: %s", p);
220 putline(buf, mci);
221 }
bb224b8e
EA
222 putline("", mci);
223 mci->mci_flags &= ~MCIF_INHEADER;
224 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
225 {
226 bt = mimeboundary(buf, boundary);
227 if (bt != MBT_NOTSEP)
228 break;
229 if (buf[0] == 'F' &&
230 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
231 strncmp(buf, "From ", 5) == 0)
232 (void) putc('>', mci->mci_out);
233 putline(buf, mci);
234 }
235 }
236 else if (sectionsize / 8 < sectionhighbits)
363ce64f
EA
237 {
238 /* use base64 encoding */
239 int c1, c2;
240
241 putline("Content-Transfer-Encoding: base64", mci);
242 putline("", mci);
243 mci->mci_flags &= ~MCIF_INHEADER;
244 linelen = 0;
245 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
246 {
247 if (linelen > 71)
248 {
249 fputs(mci->mci_mailer->m_eol, mci->mci_out);
250 linelen = 0;
251 }
252 linelen += 4;
253 fputc(Base64Code[c1 >> 2], mci->mci_out);
254 c1 = (c1 & 0x03) << 4;
255 c2 = mime_getchar(e->e_dfp, boundary);
256 if (c2 == EOF)
257 {
258 fputc(Base64Code[c1], mci->mci_out);
259 fputc('=', mci->mci_out);
260 fputc('=', mci->mci_out);
261 break;
262 }
263 c1 |= (c2 >> 4) & 0x0f;
264 fputc(Base64Code[c1], mci->mci_out);
265 c1 = (c2 & 0x0f) << 2;
266 c2 = mime_getchar(e->e_dfp, boundary);
267 if (c2 == EOF)
268 {
269 fputc(Base64Code[c1], mci->mci_out);
270 fputc('=', mci->mci_out);
271 break;
272 }
273 c1 |= (c2 >> 6) & 0x03;
274 fputc(Base64Code[c1], mci->mci_out);
275 fputc(Base64Code[c2 & 0x3f], mci->mci_out);
276 }
277 }
278 else
279 {
280 /* use quoted-printable encoding */
281 int c1, c2;
282
283 putline("Content-Transfer-Encoding: quoted-printable", mci);
284 putline("", mci);
285 mci->mci_flags &= ~MCIF_INHEADER;
286 linelen = 0;
bb224b8e 287 c2 = '\n';
363ce64f
EA
288 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
289 {
290 if (c1 == '\n')
291 {
292 if (c2 == ' ' || c2 == '\t')
293 {
294 fputc('=', mci->mci_out);
295 fputs(mci->mci_mailer->m_eol, mci->mci_out);
296 }
297 fputs(mci->mci_mailer->m_eol, mci->mci_out);
298 linelen = 0;
299 c2 = c1;
300 continue;
301 }
bb224b8e
EA
302 else if (c2 == '\n' && c1 == '.' &&
303 bitnset(M_XDOT, mci->mci_mailer->m_flags))
304 {
305 fputc('.', mci->mci_out);
306 linelen++;
307 }
363ce64f
EA
308 if (linelen > 72)
309 {
310 fputc('=', mci->mci_out);
311 fputs(mci->mci_mailer->m_eol, mci->mci_out);
312 linelen = 0;
bb224b8e 313 c2 = '\n';
363ce64f 314 }
e3c84ea8 315 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
363ce64f
EA
316 {
317 fputc('=', mci->mci_out);
318 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
319 fputc(Base16Code[c1 & 0x0f], mci->mci_out);
320 linelen += 3;
321 }
322 else
323 {
324 fputc(c1, mci->mci_out);
325 linelen++;
326 }
327 c2 = c1;
328 }
329 }
330 if (linelen > 0)
331 fputs(mci->mci_mailer->m_eol, mci->mci_out);
e3c84ea8 332 return MimeBoundaryType;
363ce64f
EA
333}
334
335
336int
337mime_getchar(fp, boundary)
338 register FILE *fp;
339 char *boundary;
340{
341 int c;
342 static char *bp = NULL;
343 static int buflen = 0;
344 static bool atbol = TRUE; /* at beginning of line */
345 static char buf[128]; /* need not be a full line */
346
347 if (buflen > 0)
348 {
349 buflen--;
350 return *bp++;
351 }
352 c = fgetc(fp);
353 if (atbol && c == '-' && boundary != NULL)
354 {
355 /* check for a message boundary */
356 bp = buf;
357 c = fgetc(fp);
358 if (c != '-')
359 {
360 if (c != EOF)
361 {
362 *bp = c;
363 buflen++;
364 }
365 return '-';
366 }
367
368 /* got "--", now check for rest of separator */
369 *bp++ = '-';
370 *bp++ = '-';
371 while (bp < &buf[sizeof buf - 1] &&
372 (c = fgetc(fp)) != EOF && c != '\n')
373 {
374 *bp++ = c;
375 }
376 *bp = '\0';
e3c84ea8
EA
377 MimeBoundaryType = mimeboundary(buf, boundary);
378 switch (MimeBoundaryType)
363ce64f
EA
379 {
380 case MBT_FINAL:
381 case MBT_INTERMED:
382 /* we have a message boundary */
383 buflen = 0;
384 return EOF;
385 }
386
387 atbol = c == '\n';
388 if (c != EOF)
389 *bp++ = c;
390 buflen = bp - buf - 1;
391 bp = buf;
392 return *bp++;
393 }
363ce64f
EA
394
395 atbol = c == '\n';
396 return c;
397}
398\f/*
399** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
400**
401** Parameters:
402** line -- the input line.
403** boundary -- the expected boundary.
404**
405** Returns:
406** MBT_NOTSEP -- if this is not a separator line
407** MBT_INTERMED -- if this is an intermediate separator
408** MBT_FINAL -- if this is a final boundary
409** MBT_SYNTAX -- if this is a boundary for the wrong
410** enclosure -- i.e., a syntax error.
411*/
412
413int
414mimeboundary(line, boundary)
415 register char *line;
416 char *boundary;
417{
418 int type;
419 int i;
420
421 if (line[0] != '-' || line[1] != '-' || boundary == NULL)
422 return MBT_NOTSEP;
423 if (tTd(43, 5))
424 printf("mimeboundary: bound=\"%s\", line=\"%s\"... ",
425 boundary, line);
426 i = strlen(line);
427 if (line[i - 1] == '\n')
428 i--;
429 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
430 {
431 type = MBT_FINAL;
432 i -= 2;
433 }
434 else
435 type = MBT_INTERMED;
436
437 /* XXX should check for improper nesting here */
438 if (strncmp(boundary, &line[2], i - 2) != 0 ||
439 strlen(boundary) != i - 2)
440 type = MBT_NOTSEP;
441 if (tTd(43, 5))
442 printf("%d\n", type);
443 return type;
444}