Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | #! /usr/bin/env python |
2 | ||
3 | """Mimification and unmimification of mail messages. | |
4 | ||
5 | Decode quoted-printable parts of a mail message or encode using | |
6 | quoted-printable. | |
7 | ||
8 | Usage: | |
9 | mimify(input, output) | |
10 | unmimify(input, output, decode_base64 = 0) | |
11 | to encode and decode respectively. Input and output may be the name | |
12 | of a file or an open file object. Only a readline() method is used | |
13 | on the input file, only a write() method is used on the output file. | |
14 | When using file names, the input and output file names may be the | |
15 | same. | |
16 | ||
17 | Interactive usage: | |
18 | mimify.py -e [infile [outfile]] | |
19 | mimify.py -d [infile [outfile]] | |
20 | to encode and decode respectively. Infile defaults to standard | |
21 | input and outfile to standard output. | |
22 | """ | |
23 | ||
24 | # Configure | |
25 | MAXLEN = 200 # if lines longer than this, encode as quoted-printable | |
26 | CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail | |
27 | QUOTE = '> ' # string replies are quoted with | |
28 | # End configure | |
29 | ||
30 | import re | |
31 | ||
32 | __all__ = ["mimify","unmimify","mime_encode_header","mime_decode_header"] | |
33 | ||
34 | qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I) | |
35 | base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I) | |
36 | mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S) | |
37 | chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S) | |
38 | he = re.compile('^-*\n') | |
39 | mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I) | |
40 | mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I) | |
41 | repl = re.compile('^subject:\\s+re: ', re.I) | |
42 | ||
43 | class File: | |
44 | """A simple fake file object that knows about limited read-ahead and | |
45 | boundaries. The only supported method is readline().""" | |
46 | ||
47 | def __init__(self, file, boundary): | |
48 | self.file = file | |
49 | self.boundary = boundary | |
50 | self.peek = None | |
51 | ||
52 | def readline(self): | |
53 | if self.peek is not None: | |
54 | return '' | |
55 | line = self.file.readline() | |
56 | if not line: | |
57 | return line | |
58 | if self.boundary: | |
59 | if line == self.boundary + '\n': | |
60 | self.peek = line | |
61 | return '' | |
62 | if line == self.boundary + '--\n': | |
63 | self.peek = line | |
64 | return '' | |
65 | return line | |
66 | ||
67 | class HeaderFile: | |
68 | def __init__(self, file): | |
69 | self.file = file | |
70 | self.peek = None | |
71 | ||
72 | def readline(self): | |
73 | if self.peek is not None: | |
74 | line = self.peek | |
75 | self.peek = None | |
76 | else: | |
77 | line = self.file.readline() | |
78 | if not line: | |
79 | return line | |
80 | if he.match(line): | |
81 | return line | |
82 | while 1: | |
83 | self.peek = self.file.readline() | |
84 | if len(self.peek) == 0 or \ | |
85 | (self.peek[0] != ' ' and self.peek[0] != '\t'): | |
86 | return line | |
87 | line = line + self.peek | |
88 | self.peek = None | |
89 | ||
90 | def mime_decode(line): | |
91 | """Decode a single line of quoted-printable text to 8bit.""" | |
92 | newline = '' | |
93 | pos = 0 | |
94 | while 1: | |
95 | res = mime_code.search(line, pos) | |
96 | if res is None: | |
97 | break | |
98 | newline = newline + line[pos:res.start(0)] + \ | |
99 | chr(int(res.group(1), 16)) | |
100 | pos = res.end(0) | |
101 | return newline + line[pos:] | |
102 | ||
103 | def mime_decode_header(line): | |
104 | """Decode a header line to 8bit.""" | |
105 | newline = '' | |
106 | pos = 0 | |
107 | while 1: | |
108 | res = mime_head.search(line, pos) | |
109 | if res is None: | |
110 | break | |
111 | match = res.group(1) | |
112 | # convert underscores to spaces (before =XX conversion!) | |
113 | match = ' '.join(match.split('_')) | |
114 | newline = newline + line[pos:res.start(0)] + mime_decode(match) | |
115 | pos = res.end(0) | |
116 | return newline + line[pos:] | |
117 | ||
118 | def unmimify_part(ifile, ofile, decode_base64 = 0): | |
119 | """Convert a quoted-printable part of a MIME mail message to 8bit.""" | |
120 | multipart = None | |
121 | quoted_printable = 0 | |
122 | is_base64 = 0 | |
123 | is_repl = 0 | |
124 | if ifile.boundary and ifile.boundary[:2] == QUOTE: | |
125 | prefix = QUOTE | |
126 | else: | |
127 | prefix = '' | |
128 | ||
129 | # read header | |
130 | hfile = HeaderFile(ifile) | |
131 | while 1: | |
132 | line = hfile.readline() | |
133 | if not line: | |
134 | return | |
135 | if prefix and line[:len(prefix)] == prefix: | |
136 | line = line[len(prefix):] | |
137 | pref = prefix | |
138 | else: | |
139 | pref = '' | |
140 | line = mime_decode_header(line) | |
141 | if qp.match(line): | |
142 | quoted_printable = 1 | |
143 | continue # skip this header | |
144 | if decode_base64 and base64_re.match(line): | |
145 | is_base64 = 1 | |
146 | continue | |
147 | ofile.write(pref + line) | |
148 | if not prefix and repl.match(line): | |
149 | # we're dealing with a reply message | |
150 | is_repl = 1 | |
151 | mp_res = mp.match(line) | |
152 | if mp_res: | |
153 | multipart = '--' + mp_res.group(1) | |
154 | if he.match(line): | |
155 | break | |
156 | if is_repl and (quoted_printable or multipart): | |
157 | is_repl = 0 | |
158 | ||
159 | # read body | |
160 | while 1: | |
161 | line = ifile.readline() | |
162 | if not line: | |
163 | return | |
164 | line = re.sub(mime_head, '\\1', line) | |
165 | if prefix and line[:len(prefix)] == prefix: | |
166 | line = line[len(prefix):] | |
167 | pref = prefix | |
168 | else: | |
169 | pref = '' | |
170 | ## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n': | |
171 | ## multipart = line[:-1] | |
172 | while multipart: | |
173 | if line == multipart + '--\n': | |
174 | ofile.write(pref + line) | |
175 | multipart = None | |
176 | line = None | |
177 | break | |
178 | if line == multipart + '\n': | |
179 | ofile.write(pref + line) | |
180 | nifile = File(ifile, multipart) | |
181 | unmimify_part(nifile, ofile, decode_base64) | |
182 | line = nifile.peek | |
183 | if not line: | |
184 | # premature end of file | |
185 | break | |
186 | continue | |
187 | # not a boundary between parts | |
188 | break | |
189 | if line and quoted_printable: | |
190 | while line[-2:] == '=\n': | |
191 | line = line[:-2] | |
192 | newline = ifile.readline() | |
193 | if newline[:len(QUOTE)] == QUOTE: | |
194 | newline = newline[len(QUOTE):] | |
195 | line = line + newline | |
196 | line = mime_decode(line) | |
197 | if line and is_base64 and not pref: | |
198 | import base64 | |
199 | line = base64.decodestring(line) | |
200 | if line: | |
201 | ofile.write(pref + line) | |
202 | ||
203 | def unmimify(infile, outfile, decode_base64 = 0): | |
204 | """Convert quoted-printable parts of a MIME mail message to 8bit.""" | |
205 | if type(infile) == type(''): | |
206 | ifile = open(infile) | |
207 | if type(outfile) == type('') and infile == outfile: | |
208 | import os | |
209 | d, f = os.path.split(infile) | |
210 | os.rename(infile, os.path.join(d, ',' + f)) | |
211 | else: | |
212 | ifile = infile | |
213 | if type(outfile) == type(''): | |
214 | ofile = open(outfile, 'w') | |
215 | else: | |
216 | ofile = outfile | |
217 | nifile = File(ifile, None) | |
218 | unmimify_part(nifile, ofile, decode_base64) | |
219 | ofile.flush() | |
220 | ||
221 | mime_char = re.compile('[=\177-\377]') # quote these chars in body | |
222 | mime_header_char = re.compile('[=?\177-\377]') # quote these in header | |
223 | ||
224 | def mime_encode(line, header): | |
225 | """Code a single line as quoted-printable. | |
226 | If header is set, quote some extra characters.""" | |
227 | if header: | |
228 | reg = mime_header_char | |
229 | else: | |
230 | reg = mime_char | |
231 | newline = '' | |
232 | pos = 0 | |
233 | if len(line) >= 5 and line[:5] == 'From ': | |
234 | # quote 'From ' at the start of a line for stupid mailers | |
235 | newline = ('=%02x' % ord('F')).upper() | |
236 | pos = 1 | |
237 | while 1: | |
238 | res = reg.search(line, pos) | |
239 | if res is None: | |
240 | break | |
241 | newline = newline + line[pos:res.start(0)] + \ | |
242 | ('=%02x' % ord(res.group(0))).upper() | |
243 | pos = res.end(0) | |
244 | line = newline + line[pos:] | |
245 | ||
246 | newline = '' | |
247 | while len(line) >= 75: | |
248 | i = 73 | |
249 | while line[i] == '=' or line[i-1] == '=': | |
250 | i = i - 1 | |
251 | i = i + 1 | |
252 | newline = newline + line[:i] + '=\n' | |
253 | line = line[i:] | |
254 | return newline + line | |
255 | ||
256 | mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)(?=[ \t)]|\n)') | |
257 | ||
258 | def mime_encode_header(line): | |
259 | """Code a single header line as quoted-printable.""" | |
260 | newline = '' | |
261 | pos = 0 | |
262 | while 1: | |
263 | res = mime_header.search(line, pos) | |
264 | if res is None: | |
265 | break | |
266 | newline = '%s%s%s=?%s?Q?%s?=' % \ | |
267 | (newline, line[pos:res.start(0)], res.group(1), | |
268 | CHARSET, mime_encode(res.group(2), 1)) | |
269 | pos = res.end(0) | |
270 | return newline + line[pos:] | |
271 | ||
272 | mv = re.compile('^mime-version:', re.I) | |
273 | cte = re.compile('^content-transfer-encoding:', re.I) | |
274 | iso_char = re.compile('[\177-\377]') | |
275 | ||
276 | def mimify_part(ifile, ofile, is_mime): | |
277 | """Convert an 8bit part of a MIME mail message to quoted-printable.""" | |
278 | has_cte = is_qp = is_base64 = 0 | |
279 | multipart = None | |
280 | must_quote_body = must_quote_header = has_iso_chars = 0 | |
281 | ||
282 | header = [] | |
283 | header_end = '' | |
284 | message = [] | |
285 | message_end = '' | |
286 | # read header | |
287 | hfile = HeaderFile(ifile) | |
288 | while 1: | |
289 | line = hfile.readline() | |
290 | if not line: | |
291 | break | |
292 | if not must_quote_header and iso_char.search(line): | |
293 | must_quote_header = 1 | |
294 | if mv.match(line): | |
295 | is_mime = 1 | |
296 | if cte.match(line): | |
297 | has_cte = 1 | |
298 | if qp.match(line): | |
299 | is_qp = 1 | |
300 | elif base64_re.match(line): | |
301 | is_base64 = 1 | |
302 | mp_res = mp.match(line) | |
303 | if mp_res: | |
304 | multipart = '--' + mp_res.group(1) | |
305 | if he.match(line): | |
306 | header_end = line | |
307 | break | |
308 | header.append(line) | |
309 | ||
310 | # read body | |
311 | while 1: | |
312 | line = ifile.readline() | |
313 | if not line: | |
314 | break | |
315 | if multipart: | |
316 | if line == multipart + '--\n': | |
317 | message_end = line | |
318 | break | |
319 | if line == multipart + '\n': | |
320 | message_end = line | |
321 | break | |
322 | if is_base64: | |
323 | message.append(line) | |
324 | continue | |
325 | if is_qp: | |
326 | while line[-2:] == '=\n': | |
327 | line = line[:-2] | |
328 | newline = ifile.readline() | |
329 | if newline[:len(QUOTE)] == QUOTE: | |
330 | newline = newline[len(QUOTE):] | |
331 | line = line + newline | |
332 | line = mime_decode(line) | |
333 | message.append(line) | |
334 | if not has_iso_chars: | |
335 | if iso_char.search(line): | |
336 | has_iso_chars = must_quote_body = 1 | |
337 | if not must_quote_body: | |
338 | if len(line) > MAXLEN: | |
339 | must_quote_body = 1 | |
340 | ||
341 | # convert and output header and body | |
342 | for line in header: | |
343 | if must_quote_header: | |
344 | line = mime_encode_header(line) | |
345 | chrset_res = chrset.match(line) | |
346 | if chrset_res: | |
347 | if has_iso_chars: | |
348 | # change us-ascii into iso-8859-1 | |
349 | if chrset_res.group(2).lower() == 'us-ascii': | |
350 | line = '%s%s%s' % (chrset_res.group(1), | |
351 | CHARSET, | |
352 | chrset_res.group(3)) | |
353 | else: | |
354 | # change iso-8859-* into us-ascii | |
355 | line = '%sus-ascii%s' % chrset_res.group(1, 3) | |
356 | if has_cte and cte.match(line): | |
357 | line = 'Content-Transfer-Encoding: ' | |
358 | if is_base64: | |
359 | line = line + 'base64\n' | |
360 | elif must_quote_body: | |
361 | line = line + 'quoted-printable\n' | |
362 | else: | |
363 | line = line + '7bit\n' | |
364 | ofile.write(line) | |
365 | if (must_quote_header or must_quote_body) and not is_mime: | |
366 | ofile.write('Mime-Version: 1.0\n') | |
367 | ofile.write('Content-Type: text/plain; ') | |
368 | if has_iso_chars: | |
369 | ofile.write('charset="%s"\n' % CHARSET) | |
370 | else: | |
371 | ofile.write('charset="us-ascii"\n') | |
372 | if must_quote_body and not has_cte: | |
373 | ofile.write('Content-Transfer-Encoding: quoted-printable\n') | |
374 | ofile.write(header_end) | |
375 | ||
376 | for line in message: | |
377 | if must_quote_body: | |
378 | line = mime_encode(line, 0) | |
379 | ofile.write(line) | |
380 | ofile.write(message_end) | |
381 | ||
382 | line = message_end | |
383 | while multipart: | |
384 | if line == multipart + '--\n': | |
385 | # read bit after the end of the last part | |
386 | while 1: | |
387 | line = ifile.readline() | |
388 | if not line: | |
389 | return | |
390 | if must_quote_body: | |
391 | line = mime_encode(line, 0) | |
392 | ofile.write(line) | |
393 | if line == multipart + '\n': | |
394 | nifile = File(ifile, multipart) | |
395 | mimify_part(nifile, ofile, 1) | |
396 | line = nifile.peek | |
397 | if not line: | |
398 | # premature end of file | |
399 | break | |
400 | ofile.write(line) | |
401 | continue | |
402 | # unexpectedly no multipart separator--copy rest of file | |
403 | while 1: | |
404 | line = ifile.readline() | |
405 | if not line: | |
406 | return | |
407 | if must_quote_body: | |
408 | line = mime_encode(line, 0) | |
409 | ofile.write(line) | |
410 | ||
411 | def mimify(infile, outfile): | |
412 | """Convert 8bit parts of a MIME mail message to quoted-printable.""" | |
413 | if type(infile) == type(''): | |
414 | ifile = open(infile) | |
415 | if type(outfile) == type('') and infile == outfile: | |
416 | import os | |
417 | d, f = os.path.split(infile) | |
418 | os.rename(infile, os.path.join(d, ',' + f)) | |
419 | else: | |
420 | ifile = infile | |
421 | if type(outfile) == type(''): | |
422 | ofile = open(outfile, 'w') | |
423 | else: | |
424 | ofile = outfile | |
425 | nifile = File(ifile, None) | |
426 | mimify_part(nifile, ofile, 0) | |
427 | ofile.flush() | |
428 | ||
429 | import sys | |
430 | if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'): | |
431 | import getopt | |
432 | usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]' | |
433 | ||
434 | decode_base64 = 0 | |
435 | opts, args = getopt.getopt(sys.argv[1:], 'l:edb') | |
436 | if len(args) not in (0, 1, 2): | |
437 | print usage | |
438 | sys.exit(1) | |
439 | if (('-e', '') in opts) == (('-d', '') in opts) or \ | |
440 | ((('-b', '') in opts) and (('-d', '') not in opts)): | |
441 | print usage | |
442 | sys.exit(1) | |
443 | for o, a in opts: | |
444 | if o == '-e': | |
445 | encode = mimify | |
446 | elif o == '-d': | |
447 | encode = unmimify | |
448 | elif o == '-l': | |
449 | try: | |
450 | MAXLEN = int(a) | |
451 | except (ValueError, OverflowError): | |
452 | print usage | |
453 | sys.exit(1) | |
454 | elif o == '-b': | |
455 | decode_base64 = 1 | |
456 | if len(args) == 0: | |
457 | encode_args = (sys.stdin, sys.stdout) | |
458 | elif len(args) == 1: | |
459 | encode_args = (args[0], sys.stdout) | |
460 | else: | |
461 | encode_args = (args[0], args[1]) | |
462 | if decode_base64: | |
463 | encode_args = encode_args + (decode_base64,) | |
464 | encode(*encode_args) |