Commit | Line | Data |
---|---|---|
363ce64f EA |
1 | /* |
2 | * Copyright (c) 1994 Eric P. Allman | |
3 | * Copyright (c) 1994 | |
4 | * The Regents of the University of California. All rights reserved. | |
5 | * | |
6 | * %sccs.include.redist.c% | |
7 | */ | |
8 | ||
9 | # include "sendmail.h" | |
10 | # include <string.h> | |
11 | ||
12 | #ifndef lint | |
edce97a2 | 13 | static char sccsid[] = "@(#)mime.c 8.5 (Berkeley) %G%"; |
363ce64f EA |
14 | #endif /* not lint */ |
15 | ||
16 | /* | |
17 | ** MIME support. | |
18 | ** | |
19 | ** I am indebted to John Beck of Hewlett-Packard, who contributed | |
20 | ** his code to me for inclusion. As it turns out, I did not use | |
21 | ** his code since he used a "minimum change" approach that used | |
22 | ** several temp files, and I wanted a "minimum impact" approach | |
23 | ** that would avoid copying. However, looking over his code | |
24 | ** helped me cement my understanding of the problem. | |
25 | ** | |
26 | ** I also looked at, but did not directly use, Nathaniel | |
27 | ** Borenstein's "code.c" module. Again, it functioned as | |
28 | ** a file-to-file translator, which did not fit within my | |
29 | ** design bounds, but it was a useful base for understanding | |
30 | ** the problem. | |
31 | */ | |
32 | ||
33 | ||
34 | /* character set for hex and base64 encoding */ | |
35 | char Base16Code[] = "0123456789ABCDEF"; | |
36 | char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | |
37 | ||
38 | /* types of MIME boundaries */ | |
39 | #define MBT_SYNTAX 0 /* syntax error */ | |
40 | #define MBT_NOTSEP 1 /* not a boundary */ | |
41 | #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ | |
42 | #define MBT_FINAL 3 /* final boundary (trailing -- included) */ | |
e3c84ea8 EA |
43 | |
44 | static int MimeBoundaryType; /* internal linkage */ | |
363ce64f EA |
45 | \f/* |
46 | ** MIME8TO7 -- output 8 bit body in 7 bit format | |
47 | ** | |
48 | ** The header has already been output -- this has to do the | |
49 | ** 8 to 7 bit conversion. It would be easy if we didn't have | |
50 | ** to deal with nested formats (multipart/xxx and message/rfc822). | |
51 | ** | |
52 | ** We won't be called if we don't have to do a conversion, and | |
53 | ** appropriate MIME-Version: and Content-Type: fields have been | |
54 | ** output. Any Content-Transfer-Encoding: field has not been | |
55 | ** output, and we can add it here. | |
56 | ** | |
57 | ** Parameters: | |
58 | ** mci -- mailer connection information. | |
59 | ** header -- the header for this body part. | |
60 | ** e -- envelope. | |
61 | ** boundary -- the message boundary -- NULL if we are | |
62 | ** processing the outer portion. | |
63 | ** | |
64 | ** Returns: | |
65 | ** An indicator of what terminated the message part: | |
66 | ** MBT_FINAL -- the final boundary | |
67 | ** MBT_INTERMED -- an intermediate boundary | |
68 | ** MBT_NOTSEP -- an end of file | |
69 | */ | |
70 | ||
71 | int | |
72 | mime8to7(mci, header, e, boundary) | |
73 | register MCI *mci; | |
74 | HDR *header; | |
75 | register ENVELOPE *e; | |
76 | char *boundary; | |
77 | { | |
78 | register char *p; | |
79 | int linelen; | |
80 | int bt; | |
81 | off_t offset; | |
82 | size_t sectionsize, sectionhighbits; | |
83 | char bbuf[128]; | |
84 | char buf[MAXLINE]; | |
363ce64f EA |
85 | |
86 | if (tTd(43, 1)) | |
87 | { | |
88 | printf("mime8to7: boundary=%s\n", | |
89 | boundary == NULL ? "<none>" : boundary); | |
90 | } | |
91 | p = hvalue("Content-Type", header); | |
92 | if (p != NULL && strncasecmp(p, "multipart/", 10) == 0) | |
93 | { | |
94 | register char *q; | |
95 | ||
96 | /* oh dear -- this part is hard */ | |
97 | p = strstr(p, "boundary="); /*XXX*/ | |
98 | if (p == NULL) | |
99 | { | |
100 | syserr("mime8to7: Content-Type: %s missing boundary", p); | |
101 | p = "---"; | |
102 | } | |
103 | else | |
104 | p += 9; | |
105 | if (*p == '"') | |
106 | q = strchr(p, '"'); | |
107 | else | |
108 | q = strchr(p, ','); | |
109 | if (q == NULL) | |
110 | q = p + strlen(p); | |
111 | if (q - p > sizeof bbuf - 1) | |
112 | { | |
113 | syserr("mime8to7: multipart boundary \"%.*s\" too long", | |
114 | q - p, p); | |
115 | q = p + sizeof bbuf - 1; | |
116 | } | |
117 | strncpy(bbuf, p, q - p); | |
118 | bbuf[q - p] = '\0'; | |
119 | if (tTd(43, 1)) | |
120 | { | |
121 | printf("mime8to7: multipart boundary \"%s\"\n", bbuf); | |
122 | } | |
123 | ||
124 | /* skip the early "comment" prologue */ | |
125 | bt = MBT_FINAL; | |
126 | while (fgets(buf, sizeof buf, e->e_dfp) != NULL) | |
127 | { | |
128 | bt = mimeboundary(buf, bbuf); | |
129 | if (bt != MBT_NOTSEP) | |
130 | break; | |
131 | putline(buf, mci); | |
132 | } | |
133 | while (bt != MBT_FINAL) | |
134 | { | |
135 | auto HDR *hdr = NULL; | |
136 | ||
137 | sprintf(buf, "--%s", bbuf); | |
138 | putline(buf, mci); | |
139 | collect(e->e_dfp, FALSE, FALSE, &hdr, e); | |
140 | putheader(mci, hdr, e); | |
141 | bt = mime8to7(mci, hdr, e, bbuf); | |
142 | } | |
143 | sprintf(buf, "--%s--", bbuf); | |
144 | putline(buf, mci); | |
145 | ||
146 | /* skip the late "comment" epilogue */ | |
147 | while (fgets(buf, sizeof buf, e->e_dfp) != NULL) | |
148 | { | |
149 | putline(buf, mci); | |
150 | bt = mimeboundary(buf, boundary); | |
151 | if (bt != MBT_NOTSEP) | |
152 | break; | |
153 | } | |
154 | return bt; | |
155 | } | |
156 | ||
157 | /* | |
158 | ** Non-compound body type | |
159 | ** | |
160 | ** Compute the ratio of seven to eight bit characters; | |
161 | ** use that as a heuristic to decide how to do the | |
162 | ** encoding. | |
163 | */ | |
164 | ||
165 | /* remember where we were */ | |
166 | offset = ftell(e->e_dfp); | |
167 | if (offset == -1) | |
168 | syserr("mime8to7: cannot ftell on %s", e->e_df); | |
169 | ||
170 | /* do a scan of this body type to count character types */ | |
171 | sectionsize = sectionhighbits = 0; | |
172 | while (fgets(buf, sizeof buf, e->e_dfp) != NULL) | |
173 | { | |
174 | bt = mimeboundary(buf, boundary); | |
175 | if (bt != MBT_NOTSEP) | |
176 | break; | |
177 | for (p = buf; *p != '\0'; p++) | |
178 | { | |
e3c84ea8 | 179 | /* count bytes with the high bit set */ |
363ce64f EA |
180 | sectionsize++; |
181 | if (bitset(0200, *p)) | |
182 | sectionhighbits++; | |
183 | } | |
e3c84ea8 EA |
184 | |
185 | /* | |
186 | ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, | |
187 | ** assume base64. This heuristic avoids double-reading | |
188 | ** large graphics or video files. | |
189 | */ | |
190 | ||
191 | if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4) | |
192 | break; | |
363ce64f EA |
193 | } |
194 | if (feof(e->e_dfp)) | |
195 | bt = MBT_FINAL; | |
196 | ||
197 | /* return to the original offset for processing */ | |
e3c84ea8 | 198 | /* XXX use relative seeks to handle >31 bit file sizes? */ |
363ce64f EA |
199 | if (fseek(e->e_dfp, offset, SEEK_SET) < 0) |
200 | syserr("mime8to7: cannot fseek on %s", e->e_df); | |
201 | ||
e3c84ea8 EA |
202 | /* |
203 | ** Heuristically determine encoding method. | |
204 | ** If more than 1/8 of the total characters have the | |
205 | ** eighth bit set, use base64; else use quoted-printable. | |
206 | */ | |
207 | ||
363ce64f EA |
208 | if (tTd(43, 8)) |
209 | { | |
210 | printf("mime8to7: %ld high bits in %ld bytes\n", | |
211 | sectionhighbits, sectionsize); | |
212 | } | |
bb224b8e EA |
213 | if (sectionhighbits == 0) |
214 | { | |
215 | /* no encoding necessary */ | |
edce97a2 EA |
216 | p = hvalue("content-transfer-encoding", header); |
217 | if (p != NULL) | |
218 | { | |
219 | sprintf(buf, "Content-Transfer-Encoding: %s", p); | |
220 | putline(buf, mci); | |
221 | } | |
bb224b8e EA |
222 | putline("", mci); |
223 | mci->mci_flags &= ~MCIF_INHEADER; | |
224 | while (fgets(buf, sizeof buf, e->e_dfp) != NULL) | |
225 | { | |
226 | bt = mimeboundary(buf, boundary); | |
227 | if (bt != MBT_NOTSEP) | |
228 | break; | |
229 | if (buf[0] == 'F' && | |
230 | bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && | |
231 | strncmp(buf, "From ", 5) == 0) | |
232 | (void) putc('>', mci->mci_out); | |
233 | putline(buf, mci); | |
234 | } | |
235 | } | |
236 | else if (sectionsize / 8 < sectionhighbits) | |
363ce64f EA |
237 | { |
238 | /* use base64 encoding */ | |
239 | int c1, c2; | |
240 | ||
241 | putline("Content-Transfer-Encoding: base64", mci); | |
242 | putline("", mci); | |
243 | mci->mci_flags &= ~MCIF_INHEADER; | |
244 | linelen = 0; | |
245 | while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) | |
246 | { | |
247 | if (linelen > 71) | |
248 | { | |
249 | fputs(mci->mci_mailer->m_eol, mci->mci_out); | |
250 | linelen = 0; | |
251 | } | |
252 | linelen += 4; | |
253 | fputc(Base64Code[c1 >> 2], mci->mci_out); | |
254 | c1 = (c1 & 0x03) << 4; | |
255 | c2 = mime_getchar(e->e_dfp, boundary); | |
256 | if (c2 == EOF) | |
257 | { | |
258 | fputc(Base64Code[c1], mci->mci_out); | |
259 | fputc('=', mci->mci_out); | |
260 | fputc('=', mci->mci_out); | |
261 | break; | |
262 | } | |
263 | c1 |= (c2 >> 4) & 0x0f; | |
264 | fputc(Base64Code[c1], mci->mci_out); | |
265 | c1 = (c2 & 0x0f) << 2; | |
266 | c2 = mime_getchar(e->e_dfp, boundary); | |
267 | if (c2 == EOF) | |
268 | { | |
269 | fputc(Base64Code[c1], mci->mci_out); | |
270 | fputc('=', mci->mci_out); | |
271 | break; | |
272 | } | |
273 | c1 |= (c2 >> 6) & 0x03; | |
274 | fputc(Base64Code[c1], mci->mci_out); | |
275 | fputc(Base64Code[c2 & 0x3f], mci->mci_out); | |
276 | } | |
277 | } | |
278 | else | |
279 | { | |
280 | /* use quoted-printable encoding */ | |
281 | int c1, c2; | |
282 | ||
283 | putline("Content-Transfer-Encoding: quoted-printable", mci); | |
284 | putline("", mci); | |
285 | mci->mci_flags &= ~MCIF_INHEADER; | |
286 | linelen = 0; | |
bb224b8e | 287 | c2 = '\n'; |
363ce64f EA |
288 | while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) |
289 | { | |
290 | if (c1 == '\n') | |
291 | { | |
292 | if (c2 == ' ' || c2 == '\t') | |
293 | { | |
294 | fputc('=', mci->mci_out); | |
295 | fputs(mci->mci_mailer->m_eol, mci->mci_out); | |
296 | } | |
297 | fputs(mci->mci_mailer->m_eol, mci->mci_out); | |
298 | linelen = 0; | |
299 | c2 = c1; | |
300 | continue; | |
301 | } | |
bb224b8e EA |
302 | else if (c2 == '\n' && c1 == '.' && |
303 | bitnset(M_XDOT, mci->mci_mailer->m_flags)) | |
304 | { | |
305 | fputc('.', mci->mci_out); | |
306 | linelen++; | |
307 | } | |
363ce64f EA |
308 | if (linelen > 72) |
309 | { | |
310 | fputc('=', mci->mci_out); | |
311 | fputs(mci->mci_mailer->m_eol, mci->mci_out); | |
312 | linelen = 0; | |
bb224b8e | 313 | c2 = '\n'; |
363ce64f | 314 | } |
e3c84ea8 | 315 | if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') |
363ce64f EA |
316 | { |
317 | fputc('=', mci->mci_out); | |
318 | fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); | |
319 | fputc(Base16Code[c1 & 0x0f], mci->mci_out); | |
320 | linelen += 3; | |
321 | } | |
322 | else | |
323 | { | |
324 | fputc(c1, mci->mci_out); | |
325 | linelen++; | |
326 | } | |
327 | c2 = c1; | |
328 | } | |
329 | } | |
330 | if (linelen > 0) | |
331 | fputs(mci->mci_mailer->m_eol, mci->mci_out); | |
e3c84ea8 | 332 | return MimeBoundaryType; |
363ce64f EA |
333 | } |
334 | ||
335 | ||
336 | int | |
337 | mime_getchar(fp, boundary) | |
338 | register FILE *fp; | |
339 | char *boundary; | |
340 | { | |
341 | int c; | |
342 | static char *bp = NULL; | |
343 | static int buflen = 0; | |
344 | static bool atbol = TRUE; /* at beginning of line */ | |
345 | static char buf[128]; /* need not be a full line */ | |
346 | ||
347 | if (buflen > 0) | |
348 | { | |
349 | buflen--; | |
350 | return *bp++; | |
351 | } | |
352 | c = fgetc(fp); | |
353 | if (atbol && c == '-' && boundary != NULL) | |
354 | { | |
355 | /* check for a message boundary */ | |
356 | bp = buf; | |
357 | c = fgetc(fp); | |
358 | if (c != '-') | |
359 | { | |
360 | if (c != EOF) | |
361 | { | |
362 | *bp = c; | |
363 | buflen++; | |
364 | } | |
365 | return '-'; | |
366 | } | |
367 | ||
368 | /* got "--", now check for rest of separator */ | |
369 | *bp++ = '-'; | |
370 | *bp++ = '-'; | |
371 | while (bp < &buf[sizeof buf - 1] && | |
372 | (c = fgetc(fp)) != EOF && c != '\n') | |
373 | { | |
374 | *bp++ = c; | |
375 | } | |
376 | *bp = '\0'; | |
e3c84ea8 EA |
377 | MimeBoundaryType = mimeboundary(buf, boundary); |
378 | switch (MimeBoundaryType) | |
363ce64f EA |
379 | { |
380 | case MBT_FINAL: | |
381 | case MBT_INTERMED: | |
382 | /* we have a message boundary */ | |
383 | buflen = 0; | |
384 | return EOF; | |
385 | } | |
386 | ||
387 | atbol = c == '\n'; | |
388 | if (c != EOF) | |
389 | *bp++ = c; | |
390 | buflen = bp - buf - 1; | |
391 | bp = buf; | |
392 | return *bp++; | |
393 | } | |
363ce64f EA |
394 | |
395 | atbol = c == '\n'; | |
396 | return c; | |
397 | } | |
398 | \f/* | |
399 | ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type | |
400 | ** | |
401 | ** Parameters: | |
402 | ** line -- the input line. | |
403 | ** boundary -- the expected boundary. | |
404 | ** | |
405 | ** Returns: | |
406 | ** MBT_NOTSEP -- if this is not a separator line | |
407 | ** MBT_INTERMED -- if this is an intermediate separator | |
408 | ** MBT_FINAL -- if this is a final boundary | |
409 | ** MBT_SYNTAX -- if this is a boundary for the wrong | |
410 | ** enclosure -- i.e., a syntax error. | |
411 | */ | |
412 | ||
413 | int | |
414 | mimeboundary(line, boundary) | |
415 | register char *line; | |
416 | char *boundary; | |
417 | { | |
418 | int type; | |
419 | int i; | |
420 | ||
421 | if (line[0] != '-' || line[1] != '-' || boundary == NULL) | |
422 | return MBT_NOTSEP; | |
423 | if (tTd(43, 5)) | |
424 | printf("mimeboundary: bound=\"%s\", line=\"%s\"... ", | |
425 | boundary, line); | |
426 | i = strlen(line); | |
427 | if (line[i - 1] == '\n') | |
428 | i--; | |
429 | if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) | |
430 | { | |
431 | type = MBT_FINAL; | |
432 | i -= 2; | |
433 | } | |
434 | else | |
435 | type = MBT_INTERMED; | |
436 | ||
437 | /* XXX should check for improper nesting here */ | |
438 | if (strncmp(boundary, &line[2], i - 2) != 0 || | |
439 | strlen(boundary) != i - 2) | |
440 | type = MBT_NOTSEP; | |
441 | if (tTd(43, 5)) | |
442 | printf("%d\n", type); | |
443 | return type; | |
444 | } |