Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # Copyright (C) 2001-2004 Python Software Foundation |
2 | # Author: Barry Warsaw | |
3 | # Contact: email-sig@python.org | |
4 | ||
5 | """Classes to generate plain text from a message object tree.""" | |
6 | ||
7 | import re | |
8 | import sys | |
9 | import time | |
10 | import random | |
11 | import warnings | |
12 | from cStringIO import StringIO | |
13 | ||
14 | from email.Header import Header | |
15 | ||
16 | UNDERSCORE = '_' | |
17 | NL = '\n' | |
18 | ||
19 | fcre = re.compile(r'^From ', re.MULTILINE) | |
20 | ||
21 | def _is8bitstring(s): | |
22 | if isinstance(s, str): | |
23 | try: | |
24 | unicode(s, 'us-ascii') | |
25 | except UnicodeError: | |
26 | return True | |
27 | return False | |
28 | ||
29 | ||
30 | \f | |
31 | class Generator: | |
32 | """Generates output from a Message object tree. | |
33 | ||
34 | This basic generator writes the message to the given file object as plain | |
35 | text. | |
36 | """ | |
37 | # | |
38 | # Public interface | |
39 | # | |
40 | ||
41 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): | |
42 | """Create the generator for message flattening. | |
43 | ||
44 | outfp is the output file-like object for writing the message to. It | |
45 | must have a write() method. | |
46 | ||
47 | Optional mangle_from_ is a flag that, when True (the default), escapes | |
48 | From_ lines in the body of the message by putting a `>' in front of | |
49 | them. | |
50 | ||
51 | Optional maxheaderlen specifies the longest length for a non-continued | |
52 | header. When a header line is longer (in characters, with tabs | |
53 | expanded to 8 spaces) than maxheaderlen, the header will split as | |
54 | defined in the Header class. Set maxheaderlen to zero to disable | |
55 | header wrapping. The default is 78, as recommended (but not required) | |
56 | by RFC 2822. | |
57 | """ | |
58 | self._fp = outfp | |
59 | self._mangle_from_ = mangle_from_ | |
60 | self._maxheaderlen = maxheaderlen | |
61 | ||
62 | def write(self, s): | |
63 | # Just delegate to the file object | |
64 | self._fp.write(s) | |
65 | ||
66 | def flatten(self, msg, unixfrom=False): | |
67 | """Print the message object tree rooted at msg to the output file | |
68 | specified when the Generator instance was created. | |
69 | ||
70 | unixfrom is a flag that forces the printing of a Unix From_ delimiter | |
71 | before the first object in the message tree. If the original message | |
72 | has no From_ delimiter, a `standard' one is crafted. By default, this | |
73 | is False to inhibit the printing of any From_ delimiter. | |
74 | ||
75 | Note that for subobjects, no From_ line is printed. | |
76 | """ | |
77 | if unixfrom: | |
78 | ufrom = msg.get_unixfrom() | |
79 | if not ufrom: | |
80 | ufrom = 'From nobody ' + time.ctime(time.time()) | |
81 | print >> self._fp, ufrom | |
82 | self._write(msg) | |
83 | ||
84 | # For backwards compatibility, but this is slower | |
85 | def __call__(self, msg, unixfrom=False): | |
86 | warnings.warn('__call__() deprecated; use flatten()', | |
87 | DeprecationWarning, 2) | |
88 | self.flatten(msg, unixfrom) | |
89 | ||
90 | def clone(self, fp): | |
91 | """Clone this generator with the exact same options.""" | |
92 | return self.__class__(fp, self._mangle_from_, self._maxheaderlen) | |
93 | ||
94 | # | |
95 | # Protected interface - undocumented ;/ | |
96 | # | |
97 | ||
98 | def _write(self, msg): | |
99 | # We can't write the headers yet because of the following scenario: | |
100 | # say a multipart message includes the boundary string somewhere in | |
101 | # its body. We'd have to calculate the new boundary /before/ we write | |
102 | # the headers so that we can write the correct Content-Type: | |
103 | # parameter. | |
104 | # | |
105 | # The way we do this, so as to make the _handle_*() methods simpler, | |
106 | # is to cache any subpart writes into a StringIO. The we write the | |
107 | # headers and the StringIO contents. That way, subpart handlers can | |
108 | # Do The Right Thing, and can still modify the Content-Type: header if | |
109 | # necessary. | |
110 | oldfp = self._fp | |
111 | try: | |
112 | self._fp = sfp = StringIO() | |
113 | self._dispatch(msg) | |
114 | finally: | |
115 | self._fp = oldfp | |
116 | # Write the headers. First we see if the message object wants to | |
117 | # handle that itself. If not, we'll do it generically. | |
118 | meth = getattr(msg, '_write_headers', None) | |
119 | if meth is None: | |
120 | self._write_headers(msg) | |
121 | else: | |
122 | meth(self) | |
123 | self._fp.write(sfp.getvalue()) | |
124 | ||
125 | def _dispatch(self, msg): | |
126 | # Get the Content-Type: for the message, then try to dispatch to | |
127 | # self._handle_<maintype>_<subtype>(). If there's no handler for the | |
128 | # full MIME type, then dispatch to self._handle_<maintype>(). If | |
129 | # that's missing too, then dispatch to self._writeBody(). | |
130 | main = msg.get_content_maintype() | |
131 | sub = msg.get_content_subtype() | |
132 | specific = UNDERSCORE.join((main, sub)).replace('-', '_') | |
133 | meth = getattr(self, '_handle_' + specific, None) | |
134 | if meth is None: | |
135 | generic = main.replace('-', '_') | |
136 | meth = getattr(self, '_handle_' + generic, None) | |
137 | if meth is None: | |
138 | meth = self._writeBody | |
139 | meth(msg) | |
140 | ||
141 | # | |
142 | # Default handlers | |
143 | # | |
144 | ||
145 | def _write_headers(self, msg): | |
146 | for h, v in msg.items(): | |
147 | print >> self._fp, '%s:' % h, | |
148 | if self._maxheaderlen == 0: | |
149 | # Explicit no-wrapping | |
150 | print >> self._fp, v | |
151 | elif isinstance(v, Header): | |
152 | # Header instances know what to do | |
153 | print >> self._fp, v.encode() | |
154 | elif _is8bitstring(v): | |
155 | # If we have raw 8bit data in a byte string, we have no idea | |
156 | # what the encoding is. There is no safe way to split this | |
157 | # string. If it's ascii-subset, then we could do a normal | |
158 | # ascii split, but if it's multibyte then we could break the | |
159 | # string. There's no way to know so the least harm seems to | |
160 | # be to not split the string and risk it being too long. | |
161 | print >> self._fp, v | |
162 | else: | |
163 | # Header's got lots of smarts, so use it. | |
164 | print >> self._fp, Header( | |
165 | v, maxlinelen=self._maxheaderlen, | |
166 | header_name=h, continuation_ws='\t').encode() | |
167 | # A blank line always separates headers from body | |
168 | print >> self._fp | |
169 | ||
170 | # | |
171 | # Handlers for writing types and subtypes | |
172 | # | |
173 | ||
174 | def _handle_text(self, msg): | |
175 | payload = msg.get_payload() | |
176 | if payload is None: | |
177 | return | |
178 | cset = msg.get_charset() | |
179 | if cset is not None: | |
180 | payload = cset.body_encode(payload) | |
181 | if not isinstance(payload, basestring): | |
182 | raise TypeError('string payload expected: %s' % type(payload)) | |
183 | if self._mangle_from_: | |
184 | payload = fcre.sub('>From ', payload) | |
185 | self._fp.write(payload) | |
186 | ||
187 | # Default body handler | |
188 | _writeBody = _handle_text | |
189 | ||
190 | def _handle_multipart(self, msg): | |
191 | # The trick here is to write out each part separately, merge them all | |
192 | # together, and then make sure that the boundary we've chosen isn't | |
193 | # present in the payload. | |
194 | msgtexts = [] | |
195 | subparts = msg.get_payload() | |
196 | if subparts is None: | |
197 | subparts = [] | |
198 | elif isinstance(subparts, basestring): | |
199 | # e.g. a non-strict parse of a message with no starting boundary. | |
200 | self._fp.write(subparts) | |
201 | return | |
202 | elif not isinstance(subparts, list): | |
203 | # Scalar payload | |
204 | subparts = [subparts] | |
205 | for part in subparts: | |
206 | s = StringIO() | |
207 | g = self.clone(s) | |
208 | g.flatten(part, unixfrom=False) | |
209 | msgtexts.append(s.getvalue()) | |
210 | # Now make sure the boundary we've selected doesn't appear in any of | |
211 | # the message texts. | |
212 | alltext = NL.join(msgtexts) | |
213 | # BAW: What about boundaries that are wrapped in double-quotes? | |
214 | boundary = msg.get_boundary(failobj=_make_boundary(alltext)) | |
215 | # If we had to calculate a new boundary because the body text | |
216 | # contained that string, set the new boundary. We don't do it | |
217 | # unconditionally because, while set_boundary() preserves order, it | |
218 | # doesn't preserve newlines/continuations in headers. This is no big | |
219 | # deal in practice, but turns out to be inconvenient for the unittest | |
220 | # suite. | |
221 | if msg.get_boundary() <> boundary: | |
222 | msg.set_boundary(boundary) | |
223 | # If there's a preamble, write it out, with a trailing CRLF | |
224 | if msg.preamble is not None: | |
225 | print >> self._fp, msg.preamble | |
226 | # dash-boundary transport-padding CRLF | |
227 | print >> self._fp, '--' + boundary | |
228 | # body-part | |
229 | if msgtexts: | |
230 | self._fp.write(msgtexts.pop(0)) | |
231 | # *encapsulation | |
232 | # --> delimiter transport-padding | |
233 | # --> CRLF body-part | |
234 | for body_part in msgtexts: | |
235 | # delimiter transport-padding CRLF | |
236 | print >> self._fp, '\n--' + boundary | |
237 | # body-part | |
238 | self._fp.write(body_part) | |
239 | # close-delimiter transport-padding | |
240 | self._fp.write('\n--' + boundary + '--') | |
241 | if msg.epilogue is not None: | |
242 | print >> self._fp | |
243 | self._fp.write(msg.epilogue) | |
244 | ||
245 | def _handle_message_delivery_status(self, msg): | |
246 | # We can't just write the headers directly to self's file object | |
247 | # because this will leave an extra newline between the last header | |
248 | # block and the boundary. Sigh. | |
249 | blocks = [] | |
250 | for part in msg.get_payload(): | |
251 | s = StringIO() | |
252 | g = self.clone(s) | |
253 | g.flatten(part, unixfrom=False) | |
254 | text = s.getvalue() | |
255 | lines = text.split('\n') | |
256 | # Strip off the unnecessary trailing empty line | |
257 | if lines and lines[-1] == '': | |
258 | blocks.append(NL.join(lines[:-1])) | |
259 | else: | |
260 | blocks.append(text) | |
261 | # Now join all the blocks with an empty line. This has the lovely | |
262 | # effect of separating each block with an empty line, but not adding | |
263 | # an extra one after the last one. | |
264 | self._fp.write(NL.join(blocks)) | |
265 | ||
266 | def _handle_message(self, msg): | |
267 | s = StringIO() | |
268 | g = self.clone(s) | |
269 | # The payload of a message/rfc822 part should be a multipart sequence | |
270 | # of length 1. The zeroth element of the list should be the Message | |
271 | # object for the subpart. Extract that object, stringify it, and | |
272 | # write it out. | |
273 | g.flatten(msg.get_payload(0), unixfrom=False) | |
274 | self._fp.write(s.getvalue()) | |
275 | ||
276 | ||
277 | \f | |
278 | _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' | |
279 | ||
280 | class DecodedGenerator(Generator): | |
281 | """Generator a text representation of a message. | |
282 | ||
283 | Like the Generator base class, except that non-text parts are substituted | |
284 | with a format string representing the part. | |
285 | """ | |
286 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): | |
287 | """Like Generator.__init__() except that an additional optional | |
288 | argument is allowed. | |
289 | ||
290 | Walks through all subparts of a message. If the subpart is of main | |
291 | type `text', then it prints the decoded payload of the subpart. | |
292 | ||
293 | Otherwise, fmt is a format string that is used instead of the message | |
294 | payload. fmt is expanded with the following keywords (in | |
295 | %(keyword)s format): | |
296 | ||
297 | type : Full MIME type of the non-text part | |
298 | maintype : Main MIME type of the non-text part | |
299 | subtype : Sub-MIME type of the non-text part | |
300 | filename : Filename of the non-text part | |
301 | description: Description associated with the non-text part | |
302 | encoding : Content transfer encoding of the non-text part | |
303 | ||
304 | The default value for fmt is None, meaning | |
305 | ||
306 | [Non-text (%(type)s) part of message omitted, filename %(filename)s] | |
307 | """ | |
308 | Generator.__init__(self, outfp, mangle_from_, maxheaderlen) | |
309 | if fmt is None: | |
310 | self._fmt = _FMT | |
311 | else: | |
312 | self._fmt = fmt | |
313 | ||
314 | def _dispatch(self, msg): | |
315 | for part in msg.walk(): | |
316 | maintype = part.get_content_maintype() | |
317 | if maintype == 'text': | |
318 | print >> self, part.get_payload(decode=True) | |
319 | elif maintype == 'multipart': | |
320 | # Just skip this | |
321 | pass | |
322 | else: | |
323 | print >> self, self._fmt % { | |
324 | 'type' : part.get_content_type(), | |
325 | 'maintype' : part.get_content_maintype(), | |
326 | 'subtype' : part.get_content_subtype(), | |
327 | 'filename' : part.get_filename('[no filename]'), | |
328 | 'description': part.get('Content-Description', | |
329 | '[no description]'), | |
330 | 'encoding' : part.get('Content-Transfer-Encoding', | |
331 | '[no encoding]'), | |
332 | } | |
333 | ||
334 | ||
335 | \f | |
336 | # Helper | |
337 | _width = len(repr(sys.maxint-1)) | |
338 | _fmt = '%%0%dd' % _width | |
339 | ||
340 | def _make_boundary(text=None): | |
341 | # Craft a random boundary. If text is given, ensure that the chosen | |
342 | # boundary doesn't appear in the text. | |
343 | token = random.randrange(sys.maxint) | |
344 | boundary = ('=' * 15) + (_fmt % token) + '==' | |
345 | if text is None: | |
346 | return boundary | |
347 | b = boundary | |
348 | counter = 0 | |
349 | while True: | |
350 | cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) | |
351 | if not cre.search(text): | |
352 | break | |
353 | b = boundary + '.' + str(counter) | |
354 | counter += 1 | |
355 | return b |