merge a bit with Sun version
[unix-history] / usr / src / usr.sbin / sendmail / src / mime.c
CommitLineData
363ce64f
EA
1/*
2 * Copyright (c) 1994 Eric P. Allman
3 * Copyright (c) 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * %sccs.include.redist.c%
7 */
8
9# include "sendmail.h"
10# include <string.h>
11
12#ifndef lint
18a81ac0 13static char sccsid[] = "@(#)mime.c 8.12 (Berkeley) %G%";
363ce64f
EA
14#endif /* not lint */
15
16/*
17** MIME support.
18**
19** I am indebted to John Beck of Hewlett-Packard, who contributed
20** his code to me for inclusion. As it turns out, I did not use
21** his code since he used a "minimum change" approach that used
22** several temp files, and I wanted a "minimum impact" approach
23** that would avoid copying. However, looking over his code
24** helped me cement my understanding of the problem.
25**
26** I also looked at, but did not directly use, Nathaniel
27** Borenstein's "code.c" module. Again, it functioned as
28** a file-to-file translator, which did not fit within my
29** design bounds, but it was a useful base for understanding
30** the problem.
31*/
32
33
34/* character set for hex and base64 encoding */
35char Base16Code[] = "0123456789ABCDEF";
36char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37
38/* types of MIME boundaries */
39#define MBT_SYNTAX 0 /* syntax error */
40#define MBT_NOTSEP 1 /* not a boundary */
41#define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */
42#define MBT_FINAL 3 /* final boundary (trailing -- included) */
e3c84ea8
EA
43
44static int MimeBoundaryType; /* internal linkage */
363ce64f
EA
45\f/*
46** MIME8TO7 -- output 8 bit body in 7 bit format
47**
48** The header has already been output -- this has to do the
49** 8 to 7 bit conversion. It would be easy if we didn't have
50** to deal with nested formats (multipart/xxx and message/rfc822).
51**
52** We won't be called if we don't have to do a conversion, and
53** appropriate MIME-Version: and Content-Type: fields have been
54** output. Any Content-Transfer-Encoding: field has not been
55** output, and we can add it here.
56**
57** Parameters:
58** mci -- mailer connection information.
59** header -- the header for this body part.
60** e -- envelope.
022362dd
EA
61** boundaries -- the currently pending message boundaries.
62** NULL if we are processing the outer portion.
63** flags -- to tweak processing.
363ce64f
EA
64**
65** Returns:
66** An indicator of what terminated the message part:
67** MBT_FINAL -- the final boundary
68** MBT_INTERMED -- an intermediate boundary
69** MBT_NOTSEP -- an end of file
70*/
71
022362dd
EA
72struct args
73{
74 char *field; /* name of field */
75 char *value; /* value of that field */
76};
77
363ce64f 78int
022362dd 79mime8to7(mci, header, e, boundaries, flags)
363ce64f 80 register MCI *mci;
a9d5941e 81 HDR *header; register ENVELOPE *e;
022362dd
EA
82 char **boundaries;
83 int flags;
363ce64f
EA
84{
85 register char *p;
86 int linelen;
87 int bt;
88 off_t offset;
89 size_t sectionsize, sectionhighbits;
022362dd
EA
90 int i;
91 char *type;
92 char *subtype;
93 char **pvp;
94 int argc = 0;
95 struct args argv[MAXMIMEARGS];
363ce64f
EA
96 char bbuf[128];
97 char buf[MAXLINE];
022362dd 98 char pvpbuf[MAXLINE];
363ce64f
EA
99
100 if (tTd(43, 1))
101 {
102 printf("mime8to7: boundary=%s\n",
022362dd
EA
103 boundaries[0] == NULL ? "<none>" : boundaries[0]);
104 for (i = 1; boundaries[i] != NULL; i++)
105 printf("\tboundaries[i]\n");
363ce64f 106 }
022362dd 107 type = subtype = "-none-";
363ce64f 108 p = hvalue("Content-Type", header);
022362dd
EA
109 if (p != NULL &&
110 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL)) != NULL &&
111 pvp[0] != NULL)
112 {
113 type = *pvp++;
114 if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
115 *++pvp != NULL)
116 {
117 subtype = *pvp++;
118 }
119
120 /* break out parameters */
121 while (*pvp != NULL && argc < MAXMIMEARGS)
122 {
123 /* skip to semicolon separator */
124 while (*pvp != NULL && strcmp(*pvp, ";") != 0)
125 pvp++;
126 if (*pvp++ == NULL || *pvp == NULL)
127 break;
128
129 /* extract field name */
130 argv[argc].field = *pvp++;
131
132 /* see if there is a value */
133 if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
134 (*++pvp == NULL || strcmp(*pvp, ";") != 0))
135 {
136 argv[argc].value = *pvp;
137 argc++;
138 }
139 }
140 }
141 if (strcasecmp(type, "multipart") == 0)
363ce64f
EA
142 {
143 register char *q;
144
022362dd
EA
145 for (i = 0; i < argc; i++)
146 {
147 if (strcasecmp(argv[i].field, "boundary") == 0)
148 break;
149 }
150 if (i >= argc)
363ce64f
EA
151 {
152 syserr("mime8to7: Content-Type: %s missing boundary", p);
153 p = "---";
154 }
155 else
022362dd 156 p = argv[i].value;
363ce64f
EA
157 if (*p == '"')
158 q = strchr(p, '"');
159 else
363ce64f
EA
160 q = p + strlen(p);
161 if (q - p > sizeof bbuf - 1)
162 {
163 syserr("mime8to7: multipart boundary \"%.*s\" too long",
164 q - p, p);
165 q = p + sizeof bbuf - 1;
166 }
167 strncpy(bbuf, p, q - p);
168 bbuf[q - p] = '\0';
169 if (tTd(43, 1))
170 {
171 printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
172 }
022362dd
EA
173 for (i = 0; i < MAXMIMENESTING; i++)
174 if (boundaries[i] == NULL)
175 break;
176 if (i >= MAXMIMENESTING)
177 syserr("mime8to7: multipart nesting boundary too deep");
178 else
179 {
180 boundaries[i] = bbuf;
181 boundaries[i + 1] = NULL;
182 }
183
184 /* flag subtypes that can't have any 8-bit data */
185 if (strcasecmp(subtype, "signed") == 0)
186 flags |= M87F_NO8BIT;
363ce64f
EA
187
188 /* skip the early "comment" prologue */
189 bt = MBT_FINAL;
190 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
191 {
022362dd 192 bt = mimeboundary(buf, boundaries);
363ce64f
EA
193 if (bt != MBT_NOTSEP)
194 break;
195 putline(buf, mci);
196 }
197 while (bt != MBT_FINAL)
198 {
199 auto HDR *hdr = NULL;
200
201 sprintf(buf, "--%s", bbuf);
202 putline(buf, mci);
203 collect(e->e_dfp, FALSE, FALSE, &hdr, e);
a579cea5 204 putheader(mci, hdr, e, 0);
022362dd 205 bt = mime8to7(mci, hdr, e, boundaries, flags);
363ce64f
EA
206 }
207 sprintf(buf, "--%s--", bbuf);
208 putline(buf, mci);
209
210 /* skip the late "comment" epilogue */
211 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
212 {
213 putline(buf, mci);
022362dd 214 bt = mimeboundary(buf, boundaries);
363ce64f
EA
215 if (bt != MBT_NOTSEP)
216 break;
217 }
022362dd 218 boundaries[i] = NULL;
363ce64f
EA
219 return bt;
220 }
221
222 /*
223 ** Non-compound body type
224 **
225 ** Compute the ratio of seven to eight bit characters;
226 ** use that as a heuristic to decide how to do the
227 ** encoding.
228 */
229
022362dd
EA
230 /* handle types that cannot have 8-bit data internally */
231 sprintf(buf, "%s/%s", type, subtype);
232 if (wordinclass(buf, 'n'))
233 flags |= M87F_NO8BIT;
234
363ce64f 235 sectionsize = sectionhighbits = 0;
022362dd 236 if (!bitset(M87F_NO8BIT, flags))
363ce64f 237 {
a9d5941e
EA
238 /* remember where we were */
239 offset = ftell(e->e_dfp);
240 if (offset == -1)
18a81ac0 241 syserr("mime8to7: cannot ftell on df%s", e->e_id);
a9d5941e
EA
242
243 /* do a scan of this body type to count character types */
244 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
363ce64f 245 {
022362dd 246 bt = mimeboundary(buf, boundaries);
a9d5941e
EA
247 if (bt != MBT_NOTSEP)
248 break;
249 for (p = buf; *p != '\0'; p++)
250 {
251 /* count bytes with the high bit set */
252 sectionsize++;
253 if (bitset(0200, *p))
254 sectionhighbits++;
255 }
e3c84ea8 256
a9d5941e
EA
257 /*
258 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit,
259 ** assume base64. This heuristic avoids double-reading
260 ** large graphics or video files.
261 */
e3c84ea8 262
a9d5941e
EA
263 if (sectionsize >= 4096 &&
264 sectionhighbits > sectionsize / 4)
265 break;
266 }
267 if (feof(e->e_dfp))
268 bt = MBT_FINAL;
363ce64f 269
a9d5941e
EA
270 /* return to the original offset for processing */
271 /* XXX use relative seeks to handle >31 bit file sizes? */
272 if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
18a81ac0 273 syserr("mime8to7: cannot fseek on df%s", e->e_id);
a9d5941e 274 }
363ce64f 275
e3c84ea8
EA
276 /*
277 ** Heuristically determine encoding method.
278 ** If more than 1/8 of the total characters have the
279 ** eighth bit set, use base64; else use quoted-printable.
280 */
281
363ce64f
EA
282 if (tTd(43, 8))
283 {
284 printf("mime8to7: %ld high bits in %ld bytes\n",
285 sectionhighbits, sectionsize);
286 }
bb224b8e
EA
287 if (sectionhighbits == 0)
288 {
289 /* no encoding necessary */
edce97a2
EA
290 p = hvalue("content-transfer-encoding", header);
291 if (p != NULL)
292 {
293 sprintf(buf, "Content-Transfer-Encoding: %s", p);
294 putline(buf, mci);
295 }
bb224b8e
EA
296 putline("", mci);
297 mci->mci_flags &= ~MCIF_INHEADER;
298 while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
299 {
022362dd 300 bt = mimeboundary(buf, boundaries);
bb224b8e
EA
301 if (bt != MBT_NOTSEP)
302 break;
303 if (buf[0] == 'F' &&
304 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
305 strncmp(buf, "From ", 5) == 0)
306 (void) putc('>', mci->mci_out);
307 putline(buf, mci);
308 }
309 }
310 else if (sectionsize / 8 < sectionhighbits)
363ce64f
EA
311 {
312 /* use base64 encoding */
313 int c1, c2;
314
315 putline("Content-Transfer-Encoding: base64", mci);
316 putline("", mci);
317 mci->mci_flags &= ~MCIF_INHEADER;
318 linelen = 0;
022362dd 319 while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF)
363ce64f
EA
320 {
321 if (linelen > 71)
322 {
323 fputs(mci->mci_mailer->m_eol, mci->mci_out);
324 linelen = 0;
325 }
326 linelen += 4;
327 fputc(Base64Code[c1 >> 2], mci->mci_out);
328 c1 = (c1 & 0x03) << 4;
022362dd 329 c2 = mime_getchar(e->e_dfp, boundaries);
363ce64f
EA
330 if (c2 == EOF)
331 {
332 fputc(Base64Code[c1], mci->mci_out);
333 fputc('=', mci->mci_out);
334 fputc('=', mci->mci_out);
335 break;
336 }
337 c1 |= (c2 >> 4) & 0x0f;
338 fputc(Base64Code[c1], mci->mci_out);
339 c1 = (c2 & 0x0f) << 2;
022362dd 340 c2 = mime_getchar(e->e_dfp, boundaries);
363ce64f
EA
341 if (c2 == EOF)
342 {
343 fputc(Base64Code[c1], mci->mci_out);
344 fputc('=', mci->mci_out);
345 break;
346 }
347 c1 |= (c2 >> 6) & 0x03;
348 fputc(Base64Code[c1], mci->mci_out);
349 fputc(Base64Code[c2 & 0x3f], mci->mci_out);
350 }
351 }
352 else
353 {
354 /* use quoted-printable encoding */
355 int c1, c2;
a9d5941e 356 int fromstate;
363ce64f
EA
357
358 putline("Content-Transfer-Encoding: quoted-printable", mci);
359 putline("", mci);
360 mci->mci_flags &= ~MCIF_INHEADER;
a9d5941e 361 linelen = fromstate = 0;
bb224b8e 362 c2 = '\n';
022362dd 363 while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF)
363ce64f
EA
364 {
365 if (c1 == '\n')
366 {
367 if (c2 == ' ' || c2 == '\t')
368 {
369 fputc('=', mci->mci_out);
efc965d0
EA
370 fputc(Base16Code[(c2 >> 4) & 0x0f],
371 mci->mci_out);
372 fputc(Base16Code[c2 & 0x0f],
373 mci->mci_out);
374 fputs(mci->mci_mailer->m_eol,
375 mci->mci_out);
363ce64f
EA
376 }
377 fputs(mci->mci_mailer->m_eol, mci->mci_out);
a9d5941e 378 linelen = fromstate = 0;
363ce64f
EA
379 c2 = c1;
380 continue;
381 }
a9d5941e
EA
382 if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
383 bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
384 {
385 fputs("=20", mci->mci_out);
386 linelen += 3;
387 }
388 else if (c2 == ' ' || c2 == '\t')
efc965d0
EA
389 {
390 fputc(c2, mci->mci_out);
391 linelen++;
392 }
363ce64f
EA
393 if (linelen > 72)
394 {
395 fputc('=', mci->mci_out);
396 fputs(mci->mci_mailer->m_eol, mci->mci_out);
a9d5941e 397 linelen = fromstate = 0;
bb224b8e 398 c2 = '\n';
363ce64f 399 }
583c74d3
EA
400 if (c2 == '\n' && c1 == '.' &&
401 bitnset(M_XDOT, mci->mci_mailer->m_flags))
402 {
403 fputc('.', mci->mci_out);
404 linelen++;
405 }
e3c84ea8 406 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
363ce64f
EA
407 {
408 fputc('=', mci->mci_out);
409 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
410 fputc(Base16Code[c1 & 0x0f], mci->mci_out);
411 linelen += 3;
412 }
efc965d0 413 else if (c1 != ' ' && c1 != '\t')
363ce64f 414 {
a9d5941e
EA
415 if (linelen < 4 && c1 == "From"[linelen])
416 fromstate++;
363ce64f
EA
417 fputc(c1, mci->mci_out);
418 linelen++;
419 }
420 c2 = c1;
421 }
efc965d0
EA
422
423 /* output any saved character */
424 if (c2 == ' ' || c2 == '\t')
425 {
a9d5941e
EA
426 fputc('=', mci->mci_out);
427 fputc(Base16Code[(c2 >> 4) & 0x0f], mci->mci_out);
428 fputc(Base16Code[c2 & 0x0f], mci->mci_out);
429 linelen += 3;
efc965d0 430 }
363ce64f
EA
431 }
432 if (linelen > 0)
433 fputs(mci->mci_mailer->m_eol, mci->mci_out);
e3c84ea8 434 return MimeBoundaryType;
363ce64f 435}
a9d5941e
EA
436\f/*
437** MIME_GETCHAR -- get a character for MIME processing
438**
439** Treats boundaries as EOF.
440**
441** Parameters:
442** fp -- the input file.
022362dd 443** boundaries -- the current MIME boundaries.
a9d5941e
EA
444**
445** Returns:
446** The next character in the input stream.
447*/
363ce64f
EA
448
449int
022362dd 450mime_getchar(fp, boundaries)
363ce64f 451 register FILE *fp;
022362dd 452 char **boundaries;
363ce64f
EA
453{
454 int c;
455 static char *bp = NULL;
456 static int buflen = 0;
457 static bool atbol = TRUE; /* at beginning of line */
458 static char buf[128]; /* need not be a full line */
459
460 if (buflen > 0)
461 {
462 buflen--;
463 return *bp++;
464 }
a9d5941e
EA
465 bp = buf;
466 buflen = 0;
363ce64f 467 c = fgetc(fp);
a9d5941e
EA
468 if (c == '\n')
469 {
470 /* might be part of a MIME boundary */
471 *bp++ = c;
472 atbol = TRUE;
473 c = fgetc(fp);
474 }
475 if (c != EOF)
476 *bp++ = c;
022362dd 477 if (atbol && c == '-')
363ce64f
EA
478 {
479 /* check for a message boundary */
363ce64f
EA
480 c = fgetc(fp);
481 if (c != '-')
482 {
483 if (c != EOF)
a9d5941e
EA
484 *bp++ = c;
485 buflen = bp - buf - 1;
486 bp = buf;
487 return *bp++;
363ce64f
EA
488 }
489
490 /* got "--", now check for rest of separator */
491 *bp++ = '-';
363ce64f
EA
492 while (bp < &buf[sizeof buf - 1] &&
493 (c = fgetc(fp)) != EOF && c != '\n')
494 {
495 *bp++ = c;
496 }
497 *bp = '\0';
022362dd 498 MimeBoundaryType = mimeboundary(buf, boundaries);
e3c84ea8 499 switch (MimeBoundaryType)
363ce64f
EA
500 {
501 case MBT_FINAL:
502 case MBT_INTERMED:
503 /* we have a message boundary */
504 buflen = 0;
505 return EOF;
506 }
507
508 atbol = c == '\n';
509 if (c != EOF)
510 *bp++ = c;
363ce64f 511 }
363ce64f 512
a9d5941e
EA
513 buflen = bp - buf - 1;
514 if (buflen < 0)
515 return EOF;
516 bp = buf;
517 return *bp++;
363ce64f
EA
518}
519\f/*
520** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
521**
522** Parameters:
523** line -- the input line.
022362dd 524** boundaries -- the set of currently pending boundaries.
363ce64f
EA
525**
526** Returns:
527** MBT_NOTSEP -- if this is not a separator line
528** MBT_INTERMED -- if this is an intermediate separator
529** MBT_FINAL -- if this is a final boundary
530** MBT_SYNTAX -- if this is a boundary for the wrong
531** enclosure -- i.e., a syntax error.
532*/
533
534int
022362dd 535mimeboundary(line, boundaries)
363ce64f 536 register char *line;
022362dd 537 char **boundaries;
363ce64f
EA
538{
539 int type;
540 int i;
022362dd 541 int savec;
363ce64f 542
022362dd 543 if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
363ce64f
EA
544 return MBT_NOTSEP;
545 if (tTd(43, 5))
022362dd 546 printf("mimeboundary: line=\"%s\"... ", line);
363ce64f
EA
547 i = strlen(line);
548 if (line[i - 1] == '\n')
549 i--;
a9d5941e
EA
550 while (line[i - 1] == ' ' || line[i - 1] == '\t')
551 i--;
363ce64f
EA
552 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
553 {
554 type = MBT_FINAL;
555 i -= 2;
556 }
557 else
558 type = MBT_INTERMED;
559
022362dd
EA
560 savec = line[i];
561 line[i] = '\0';
363ce64f 562 /* XXX should check for improper nesting here */
022362dd 563 if (isboundary(&line[2], boundaries) < 0)
363ce64f 564 type = MBT_NOTSEP;
022362dd 565 line[i] = savec;
363ce64f
EA
566 if (tTd(43, 5))
567 printf("%d\n", type);
568 return type;
569}
3caf3b1f
EA
570\f/*
571** DEFCHARSET -- return default character set for message
572**
573** The first choice for character set is for the mailer
574** corresponding to the envelope sender. If neither that
575** nor the global configuration file has a default character
576** set defined, return "unknown-8bit" as recommended by
577** RFC 1428 section 3.
578**
579** Parameters:
580** e -- the envelope for this message.
581**
582** Returns:
583** The default character set for that mailer.
584*/
585
586char *
587defcharset(e)
588 register ENVELOPE *e;
589{
590 if (e != NULL && e->e_from.q_mailer != NULL &&
591 e->e_from.q_mailer->m_defcharset != NULL)
592 return e->e_from.q_mailer->m_defcharset;
593 if (DefaultCharSet != NULL)
594 return DefaultCharSet;
595 return "unknown-8bit";
596}
022362dd
EA
597\f/*
598** ISBOUNDARY -- is a given string a currently valid boundary?
599**
600** Parameters:
601** line -- the current input line.
602** boundaries -- the list of valid boundaries.
603**
604** Returns:
605** The index number in boundaries if the line is found.
606** -1 -- otherwise.
607**
608*/
609
610int
611isboundary(line, boundaries)
612 char *line;
613 char **boundaries;
614{
615 register int i;
616
617 i = 0;
618 while (boundaries[i] != NULL)
619 {
620 if (strcmp(line, boundaries[i]) == 0)
621 return i;
622 }
623 return -1;
624}