Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # Copyright (C) 2001-2004 Python Software Foundation |
2 | # Author: Barry Warsaw | |
3 | # Contact: email-sig@python.org | |
4 | ||
5 | """Basic message object for the email package object model.""" | |
6 | ||
7 | import re | |
8 | import uu | |
9 | import binascii | |
10 | import warnings | |
11 | from cStringIO import StringIO | |
12 | ||
13 | # Intrapackage imports | |
14 | from email import Utils | |
15 | from email import Errors | |
16 | from email import Charset | |
17 | ||
18 | SEMISPACE = '; ' | |
19 | ||
20 | # Regular expression used to split header parameters. BAW: this may be too | |
21 | # simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches | |
22 | # most headers found in the wild. We may eventually need a full fledged | |
23 | # parser eventually. | |
24 | paramre = re.compile(r'\s*;\s*') | |
25 | # Regular expression that matches `special' characters in parameters, the | |
26 | # existance of which force quoting of the parameter value. | |
27 | tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') | |
28 | ||
29 | ||
30 | \f | |
31 | # Helper functions | |
32 | def _formatparam(param, value=None, quote=True): | |
33 | """Convenience function to format and return a key=value pair. | |
34 | ||
35 | This will quote the value if needed or if quote is true. | |
36 | """ | |
37 | if value is not None and len(value) > 0: | |
38 | # A tuple is used for RFC 2231 encoded parameter values where items | |
39 | # are (charset, language, value). charset is a string, not a Charset | |
40 | # instance. | |
41 | if isinstance(value, tuple): | |
42 | # Encode as per RFC 2231 | |
43 | param += '*' | |
44 | value = Utils.encode_rfc2231(value[2], value[0], value[1]) | |
45 | # BAW: Please check this. I think that if quote is set it should | |
46 | # force quoting even if not necessary. | |
47 | if quote or tspecials.search(value): | |
48 | return '%s="%s"' % (param, Utils.quote(value)) | |
49 | else: | |
50 | return '%s=%s' % (param, value) | |
51 | else: | |
52 | return param | |
53 | ||
54 | def _parseparam(s): | |
55 | plist = [] | |
56 | while s[:1] == ';': | |
57 | s = s[1:] | |
58 | end = s.find(';') | |
59 | while end > 0 and s.count('"', 0, end) % 2: | |
60 | end = s.find(';', end + 1) | |
61 | if end < 0: | |
62 | end = len(s) | |
63 | f = s[:end] | |
64 | if '=' in f: | |
65 | i = f.index('=') | |
66 | f = f[:i].strip().lower() + '=' + f[i+1:].strip() | |
67 | plist.append(f.strip()) | |
68 | s = s[end:] | |
69 | return plist | |
70 | ||
71 | ||
72 | def _unquotevalue(value): | |
73 | # This is different than Utils.collapse_rfc2231_value() because it doesn't | |
74 | # try to convert the value to a unicode. Message.get_param() and | |
75 | # Message.get_params() are both currently defined to return the tuple in | |
76 | # the face of RFC 2231 parameters. | |
77 | if isinstance(value, tuple): | |
78 | return value[0], value[1], Utils.unquote(value[2]) | |
79 | else: | |
80 | return Utils.unquote(value) | |
81 | ||
82 | ||
83 | \f | |
84 | class Message: | |
85 | """Basic message object. | |
86 | ||
87 | A message object is defined as something that has a bunch of RFC 2822 | |
88 | headers and a payload. It may optionally have an envelope header | |
89 | (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a | |
90 | multipart or a message/rfc822), then the payload is a list of Message | |
91 | objects, otherwise it is a string. | |
92 | ||
93 | Message objects implement part of the `mapping' interface, which assumes | |
94 | there is exactly one occurrance of the header per message. Some headers | |
95 | do in fact appear multiple times (e.g. Received) and for those headers, | |
96 | you must use the explicit API to set or get all the headers. Not all of | |
97 | the mapping methods are implemented. | |
98 | """ | |
99 | def __init__(self): | |
100 | self._headers = [] | |
101 | self._unixfrom = None | |
102 | self._payload = None | |
103 | self._charset = None | |
104 | # Defaults for multipart messages | |
105 | self.preamble = self.epilogue = None | |
106 | self.defects = [] | |
107 | # Default content type | |
108 | self._default_type = 'text/plain' | |
109 | ||
110 | def __str__(self): | |
111 | """Return the entire formatted message as a string. | |
112 | This includes the headers, body, and envelope header. | |
113 | """ | |
114 | return self.as_string(unixfrom=True) | |
115 | ||
116 | def as_string(self, unixfrom=False): | |
117 | """Return the entire formatted message as a string. | |
118 | Optional `unixfrom' when True, means include the Unix From_ envelope | |
119 | header. | |
120 | ||
121 | This is a convenience method and may not generate the message exactly | |
122 | as you intend because by default it mangles lines that begin with | |
123 | "From ". For more flexibility, use the flatten() method of a | |
124 | Generator instance. | |
125 | """ | |
126 | from email.Generator import Generator | |
127 | fp = StringIO() | |
128 | g = Generator(fp) | |
129 | g.flatten(self, unixfrom=unixfrom) | |
130 | return fp.getvalue() | |
131 | ||
132 | def is_multipart(self): | |
133 | """Return True if the message consists of multiple parts.""" | |
134 | return isinstance(self._payload, list) | |
135 | ||
136 | # | |
137 | # Unix From_ line | |
138 | # | |
139 | def set_unixfrom(self, unixfrom): | |
140 | self._unixfrom = unixfrom | |
141 | ||
142 | def get_unixfrom(self): | |
143 | return self._unixfrom | |
144 | ||
145 | # | |
146 | # Payload manipulation. | |
147 | # | |
148 | def attach(self, payload): | |
149 | """Add the given payload to the current payload. | |
150 | ||
151 | The current payload will always be a list of objects after this method | |
152 | is called. If you want to set the payload to a scalar object, use | |
153 | set_payload() instead. | |
154 | """ | |
155 | if self._payload is None: | |
156 | self._payload = [payload] | |
157 | else: | |
158 | self._payload.append(payload) | |
159 | ||
160 | def get_payload(self, i=None, decode=False): | |
161 | """Return a reference to the payload. | |
162 | ||
163 | The payload will either be a list object or a string. If you mutate | |
164 | the list object, you modify the message's payload in place. Optional | |
165 | i returns that index into the payload. | |
166 | ||
167 | Optional decode is a flag indicating whether the payload should be | |
168 | decoded or not, according to the Content-Transfer-Encoding header | |
169 | (default is False). | |
170 | ||
171 | When True and the message is not a multipart, the payload will be | |
172 | decoded if this header's value is `quoted-printable' or `base64'. If | |
173 | some other encoding is used, or the header is missing, or if the | |
174 | payload has bogus data (i.e. bogus base64 or uuencoded data), the | |
175 | payload is returned as-is. | |
176 | ||
177 | If the message is a multipart and the decode flag is True, then None | |
178 | is returned. | |
179 | """ | |
180 | if i is None: | |
181 | payload = self._payload | |
182 | elif not isinstance(self._payload, list): | |
183 | raise TypeError('Expected list, got %s' % type(self._payload)) | |
184 | else: | |
185 | payload = self._payload[i] | |
186 | if decode: | |
187 | if self.is_multipart(): | |
188 | return None | |
189 | cte = self.get('content-transfer-encoding', '').lower() | |
190 | if cte == 'quoted-printable': | |
191 | return Utils._qdecode(payload) | |
192 | elif cte == 'base64': | |
193 | try: | |
194 | return Utils._bdecode(payload) | |
195 | except binascii.Error: | |
196 | # Incorrect padding | |
197 | return payload | |
198 | elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): | |
199 | sfp = StringIO() | |
200 | try: | |
201 | uu.decode(StringIO(payload+'\n'), sfp) | |
202 | payload = sfp.getvalue() | |
203 | except uu.Error: | |
204 | # Some decoding problem | |
205 | return payload | |
206 | # Everything else, including encodings with 8bit or 7bit are returned | |
207 | # unchanged. | |
208 | return payload | |
209 | ||
210 | def set_payload(self, payload, charset=None): | |
211 | """Set the payload to the given value. | |
212 | ||
213 | Optional charset sets the message's default character set. See | |
214 | set_charset() for details. | |
215 | """ | |
216 | self._payload = payload | |
217 | if charset is not None: | |
218 | self.set_charset(charset) | |
219 | ||
220 | def set_charset(self, charset): | |
221 | """Set the charset of the payload to a given character set. | |
222 | ||
223 | charset can be a Charset instance, a string naming a character set, or | |
224 | None. If it is a string it will be converted to a Charset instance. | |
225 | If charset is None, the charset parameter will be removed from the | |
226 | Content-Type field. Anything else will generate a TypeError. | |
227 | ||
228 | The message will be assumed to be of type text/* encoded with | |
229 | charset.input_charset. It will be converted to charset.output_charset | |
230 | and encoded properly, if needed, when generating the plain text | |
231 | representation of the message. MIME headers (MIME-Version, | |
232 | Content-Type, Content-Transfer-Encoding) will be added as needed. | |
233 | ||
234 | """ | |
235 | if charset is None: | |
236 | self.del_param('charset') | |
237 | self._charset = None | |
238 | return | |
239 | if isinstance(charset, str): | |
240 | charset = Charset.Charset(charset) | |
241 | if not isinstance(charset, Charset.Charset): | |
242 | raise TypeError(charset) | |
243 | # BAW: should we accept strings that can serve as arguments to the | |
244 | # Charset constructor? | |
245 | self._charset = charset | |
246 | if not self.has_key('MIME-Version'): | |
247 | self.add_header('MIME-Version', '1.0') | |
248 | if not self.has_key('Content-Type'): | |
249 | self.add_header('Content-Type', 'text/plain', | |
250 | charset=charset.get_output_charset()) | |
251 | else: | |
252 | self.set_param('charset', charset.get_output_charset()) | |
253 | if not self.has_key('Content-Transfer-Encoding'): | |
254 | cte = charset.get_body_encoding() | |
255 | try: | |
256 | cte(self) | |
257 | except TypeError: | |
258 | self.add_header('Content-Transfer-Encoding', cte) | |
259 | ||
260 | def get_charset(self): | |
261 | """Return the Charset instance associated with the message's payload. | |
262 | """ | |
263 | return self._charset | |
264 | ||
265 | # | |
266 | # MAPPING INTERFACE (partial) | |
267 | # | |
268 | def __len__(self): | |
269 | """Return the total number of headers, including duplicates.""" | |
270 | return len(self._headers) | |
271 | ||
272 | def __getitem__(self, name): | |
273 | """Get a header value. | |
274 | ||
275 | Return None if the header is missing instead of raising an exception. | |
276 | ||
277 | Note that if the header appeared multiple times, exactly which | |
278 | occurrance gets returned is undefined. Use get_all() to get all | |
279 | the values matching a header field name. | |
280 | """ | |
281 | return self.get(name) | |
282 | ||
283 | def __setitem__(self, name, val): | |
284 | """Set the value of a header. | |
285 | ||
286 | Note: this does not overwrite an existing header with the same field | |
287 | name. Use __delitem__() first to delete any existing headers. | |
288 | """ | |
289 | self._headers.append((name, val)) | |
290 | ||
291 | def __delitem__(self, name): | |
292 | """Delete all occurrences of a header, if present. | |
293 | ||
294 | Does not raise an exception if the header is missing. | |
295 | """ | |
296 | name = name.lower() | |
297 | newheaders = [] | |
298 | for k, v in self._headers: | |
299 | if k.lower() <> name: | |
300 | newheaders.append((k, v)) | |
301 | self._headers = newheaders | |
302 | ||
303 | def __contains__(self, name): | |
304 | return name.lower() in [k.lower() for k, v in self._headers] | |
305 | ||
306 | def has_key(self, name): | |
307 | """Return true if the message contains the header.""" | |
308 | missing = object() | |
309 | return self.get(name, missing) is not missing | |
310 | ||
311 | def keys(self): | |
312 | """Return a list of all the message's header field names. | |
313 | ||
314 | These will be sorted in the order they appeared in the original | |
315 | message, or were added to the message, and may contain duplicates. | |
316 | Any fields deleted and re-inserted are always appended to the header | |
317 | list. | |
318 | """ | |
319 | return [k for k, v in self._headers] | |
320 | ||
321 | def values(self): | |
322 | """Return a list of all the message's header values. | |
323 | ||
324 | These will be sorted in the order they appeared in the original | |
325 | message, or were added to the message, and may contain duplicates. | |
326 | Any fields deleted and re-inserted are always appended to the header | |
327 | list. | |
328 | """ | |
329 | return [v for k, v in self._headers] | |
330 | ||
331 | def items(self): | |
332 | """Get all the message's header fields and values. | |
333 | ||
334 | These will be sorted in the order they appeared in the original | |
335 | message, or were added to the message, and may contain duplicates. | |
336 | Any fields deleted and re-inserted are always appended to the header | |
337 | list. | |
338 | """ | |
339 | return self._headers[:] | |
340 | ||
341 | def get(self, name, failobj=None): | |
342 | """Get a header value. | |
343 | ||
344 | Like __getitem__() but return failobj instead of None when the field | |
345 | is missing. | |
346 | """ | |
347 | name = name.lower() | |
348 | for k, v in self._headers: | |
349 | if k.lower() == name: | |
350 | return v | |
351 | return failobj | |
352 | ||
353 | # | |
354 | # Additional useful stuff | |
355 | # | |
356 | ||
357 | def get_all(self, name, failobj=None): | |
358 | """Return a list of all the values for the named field. | |
359 | ||
360 | These will be sorted in the order they appeared in the original | |
361 | message, and may contain duplicates. Any fields deleted and | |
362 | re-inserted are always appended to the header list. | |
363 | ||
364 | If no such fields exist, failobj is returned (defaults to None). | |
365 | """ | |
366 | values = [] | |
367 | name = name.lower() | |
368 | for k, v in self._headers: | |
369 | if k.lower() == name: | |
370 | values.append(v) | |
371 | if not values: | |
372 | return failobj | |
373 | return values | |
374 | ||
375 | def add_header(self, _name, _value, **_params): | |
376 | """Extended header setting. | |
377 | ||
378 | name is the header field to add. keyword arguments can be used to set | |
379 | additional parameters for the header field, with underscores converted | |
380 | to dashes. Normally the parameter will be added as key="value" unless | |
381 | value is None, in which case only the key will be added. | |
382 | ||
383 | Example: | |
384 | ||
385 | msg.add_header('content-disposition', 'attachment', filename='bud.gif') | |
386 | """ | |
387 | parts = [] | |
388 | for k, v in _params.items(): | |
389 | if v is None: | |
390 | parts.append(k.replace('_', '-')) | |
391 | else: | |
392 | parts.append(_formatparam(k.replace('_', '-'), v)) | |
393 | if _value is not None: | |
394 | parts.insert(0, _value) | |
395 | self._headers.append((_name, SEMISPACE.join(parts))) | |
396 | ||
397 | def replace_header(self, _name, _value): | |
398 | """Replace a header. | |
399 | ||
400 | Replace the first matching header found in the message, retaining | |
401 | header order and case. If no matching header was found, a KeyError is | |
402 | raised. | |
403 | """ | |
404 | _name = _name.lower() | |
405 | for i, (k, v) in zip(range(len(self._headers)), self._headers): | |
406 | if k.lower() == _name: | |
407 | self._headers[i] = (k, _value) | |
408 | break | |
409 | else: | |
410 | raise KeyError(_name) | |
411 | ||
412 | # | |
413 | # Deprecated methods. These will be removed in email 3.1. | |
414 | # | |
415 | ||
416 | def get_type(self, failobj=None): | |
417 | """Returns the message's content type. | |
418 | ||
419 | The returned string is coerced to lowercase and returned as a single | |
420 | string of the form `maintype/subtype'. If there was no Content-Type | |
421 | header in the message, failobj is returned (defaults to None). | |
422 | """ | |
423 | warnings.warn('get_type() deprecated; use get_content_type()', | |
424 | DeprecationWarning, 2) | |
425 | missing = object() | |
426 | value = self.get('content-type', missing) | |
427 | if value is missing: | |
428 | return failobj | |
429 | return paramre.split(value)[0].lower().strip() | |
430 | ||
431 | def get_main_type(self, failobj=None): | |
432 | """Return the message's main content type if present.""" | |
433 | warnings.warn('get_main_type() deprecated; use get_content_maintype()', | |
434 | DeprecationWarning, 2) | |
435 | missing = object() | |
436 | ctype = self.get_type(missing) | |
437 | if ctype is missing: | |
438 | return failobj | |
439 | if ctype.count('/') <> 1: | |
440 | return failobj | |
441 | return ctype.split('/')[0] | |
442 | ||
443 | def get_subtype(self, failobj=None): | |
444 | """Return the message's content subtype if present.""" | |
445 | warnings.warn('get_subtype() deprecated; use get_content_subtype()', | |
446 | DeprecationWarning, 2) | |
447 | missing = object() | |
448 | ctype = self.get_type(missing) | |
449 | if ctype is missing: | |
450 | return failobj | |
451 | if ctype.count('/') <> 1: | |
452 | return failobj | |
453 | return ctype.split('/')[1] | |
454 | ||
455 | # | |
456 | # Use these three methods instead of the three above. | |
457 | # | |
458 | ||
459 | def get_content_type(self): | |
460 | """Return the message's content type. | |
461 | ||
462 | The returned string is coerced to lower case of the form | |
463 | `maintype/subtype'. If there was no Content-Type header in the | |
464 | message, the default type as given by get_default_type() will be | |
465 | returned. Since according to RFC 2045, messages always have a default | |
466 | type this will always return a value. | |
467 | ||
468 | RFC 2045 defines a message's default type to be text/plain unless it | |
469 | appears inside a multipart/digest container, in which case it would be | |
470 | message/rfc822. | |
471 | """ | |
472 | missing = object() | |
473 | value = self.get('content-type', missing) | |
474 | if value is missing: | |
475 | # This should have no parameters | |
476 | return self.get_default_type() | |
477 | ctype = paramre.split(value)[0].lower().strip() | |
478 | # RFC 2045, section 5.2 says if its invalid, use text/plain | |
479 | if ctype.count('/') <> 1: | |
480 | return 'text/plain' | |
481 | return ctype | |
482 | ||
483 | def get_content_maintype(self): | |
484 | """Return the message's main content type. | |
485 | ||
486 | This is the `maintype' part of the string returned by | |
487 | get_content_type(). | |
488 | """ | |
489 | ctype = self.get_content_type() | |
490 | return ctype.split('/')[0] | |
491 | ||
492 | def get_content_subtype(self): | |
493 | """Returns the message's sub-content type. | |
494 | ||
495 | This is the `subtype' part of the string returned by | |
496 | get_content_type(). | |
497 | """ | |
498 | ctype = self.get_content_type() | |
499 | return ctype.split('/')[1] | |
500 | ||
501 | def get_default_type(self): | |
502 | """Return the `default' content type. | |
503 | ||
504 | Most messages have a default content type of text/plain, except for | |
505 | messages that are subparts of multipart/digest containers. Such | |
506 | subparts have a default content type of message/rfc822. | |
507 | """ | |
508 | return self._default_type | |
509 | ||
510 | def set_default_type(self, ctype): | |
511 | """Set the `default' content type. | |
512 | ||
513 | ctype should be either "text/plain" or "message/rfc822", although this | |
514 | is not enforced. The default content type is not stored in the | |
515 | Content-Type header. | |
516 | """ | |
517 | self._default_type = ctype | |
518 | ||
519 | def _get_params_preserve(self, failobj, header): | |
520 | # Like get_params() but preserves the quoting of values. BAW: | |
521 | # should this be part of the public interface? | |
522 | missing = object() | |
523 | value = self.get(header, missing) | |
524 | if value is missing: | |
525 | return failobj | |
526 | params = [] | |
527 | for p in _parseparam(';' + value): | |
528 | try: | |
529 | name, val = p.split('=', 1) | |
530 | name = name.strip() | |
531 | val = val.strip() | |
532 | except ValueError: | |
533 | # Must have been a bare attribute | |
534 | name = p.strip() | |
535 | val = '' | |
536 | params.append((name, val)) | |
537 | params = Utils.decode_params(params) | |
538 | return params | |
539 | ||
540 | def get_params(self, failobj=None, header='content-type', unquote=True): | |
541 | """Return the message's Content-Type parameters, as a list. | |
542 | ||
543 | The elements of the returned list are 2-tuples of key/value pairs, as | |
544 | split on the `=' sign. The left hand side of the `=' is the key, | |
545 | while the right hand side is the value. If there is no `=' sign in | |
546 | the parameter the value is the empty string. The value is as | |
547 | described in the get_param() method. | |
548 | ||
549 | Optional failobj is the object to return if there is no Content-Type | |
550 | header. Optional header is the header to search instead of | |
551 | Content-Type. If unquote is True, the value is unquoted. | |
552 | """ | |
553 | missing = object() | |
554 | params = self._get_params_preserve(missing, header) | |
555 | if params is missing: | |
556 | return failobj | |
557 | if unquote: | |
558 | return [(k, _unquotevalue(v)) for k, v in params] | |
559 | else: | |
560 | return params | |
561 | ||
562 | def get_param(self, param, failobj=None, header='content-type', | |
563 | unquote=True): | |
564 | """Return the parameter value if found in the Content-Type header. | |
565 | ||
566 | Optional failobj is the object to return if there is no Content-Type | |
567 | header, or the Content-Type header has no such parameter. Optional | |
568 | header is the header to search instead of Content-Type. | |
569 | ||
570 | Parameter keys are always compared case insensitively. The return | |
571 | value can either be a string, or a 3-tuple if the parameter was RFC | |
572 | 2231 encoded. When it's a 3-tuple, the elements of the value are of | |
573 | the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and | |
574 | LANGUAGE can be None, in which case you should consider VALUE to be | |
575 | encoded in the us-ascii charset. You can usually ignore LANGUAGE. | |
576 | ||
577 | Your application should be prepared to deal with 3-tuple return | |
578 | values, and can convert the parameter to a Unicode string like so: | |
579 | ||
580 | param = msg.get_param('foo') | |
581 | if isinstance(param, tuple): | |
582 | param = unicode(param[2], param[0] or 'us-ascii') | |
583 | ||
584 | In any case, the parameter value (either the returned string, or the | |
585 | VALUE item in the 3-tuple) is always unquoted, unless unquote is set | |
586 | to False. | |
587 | """ | |
588 | if not self.has_key(header): | |
589 | return failobj | |
590 | for k, v in self._get_params_preserve(failobj, header): | |
591 | if k.lower() == param.lower(): | |
592 | if unquote: | |
593 | return _unquotevalue(v) | |
594 | else: | |
595 | return v | |
596 | return failobj | |
597 | ||
598 | def set_param(self, param, value, header='Content-Type', requote=True, | |
599 | charset=None, language=''): | |
600 | """Set a parameter in the Content-Type header. | |
601 | ||
602 | If the parameter already exists in the header, its value will be | |
603 | replaced with the new value. | |
604 | ||
605 | If header is Content-Type and has not yet been defined for this | |
606 | message, it will be set to "text/plain" and the new parameter and | |
607 | value will be appended as per RFC 2045. | |
608 | ||
609 | An alternate header can specified in the header argument, and all | |
610 | parameters will be quoted as necessary unless requote is False. | |
611 | ||
612 | If charset is specified, the parameter will be encoded according to RFC | |
613 | 2231. Optional language specifies the RFC 2231 language, defaulting | |
614 | to the empty string. Both charset and language should be strings. | |
615 | """ | |
616 | if not isinstance(value, tuple) and charset: | |
617 | value = (charset, language, value) | |
618 | ||
619 | if not self.has_key(header) and header.lower() == 'content-type': | |
620 | ctype = 'text/plain' | |
621 | else: | |
622 | ctype = self.get(header) | |
623 | if not self.get_param(param, header=header): | |
624 | if not ctype: | |
625 | ctype = _formatparam(param, value, requote) | |
626 | else: | |
627 | ctype = SEMISPACE.join( | |
628 | [ctype, _formatparam(param, value, requote)]) | |
629 | else: | |
630 | ctype = '' | |
631 | for old_param, old_value in self.get_params(header=header, | |
632 | unquote=requote): | |
633 | append_param = '' | |
634 | if old_param.lower() == param.lower(): | |
635 | append_param = _formatparam(param, value, requote) | |
636 | else: | |
637 | append_param = _formatparam(old_param, old_value, requote) | |
638 | if not ctype: | |
639 | ctype = append_param | |
640 | else: | |
641 | ctype = SEMISPACE.join([ctype, append_param]) | |
642 | if ctype <> self.get(header): | |
643 | del self[header] | |
644 | self[header] = ctype | |
645 | ||
646 | def del_param(self, param, header='content-type', requote=True): | |
647 | """Remove the given parameter completely from the Content-Type header. | |
648 | ||
649 | The header will be re-written in place without the parameter or its | |
650 | value. All values will be quoted as necessary unless requote is | |
651 | False. Optional header specifies an alternative to the Content-Type | |
652 | header. | |
653 | """ | |
654 | if not self.has_key(header): | |
655 | return | |
656 | new_ctype = '' | |
657 | for p, v in self.get_params(header=header, unquote=requote): | |
658 | if p.lower() <> param.lower(): | |
659 | if not new_ctype: | |
660 | new_ctype = _formatparam(p, v, requote) | |
661 | else: | |
662 | new_ctype = SEMISPACE.join([new_ctype, | |
663 | _formatparam(p, v, requote)]) | |
664 | if new_ctype <> self.get(header): | |
665 | del self[header] | |
666 | self[header] = new_ctype | |
667 | ||
668 | def set_type(self, type, header='Content-Type', requote=True): | |
669 | """Set the main type and subtype for the Content-Type header. | |
670 | ||
671 | type must be a string in the form "maintype/subtype", otherwise a | |
672 | ValueError is raised. | |
673 | ||
674 | This method replaces the Content-Type header, keeping all the | |
675 | parameters in place. If requote is False, this leaves the existing | |
676 | header's quoting as is. Otherwise, the parameters will be quoted (the | |
677 | default). | |
678 | ||
679 | An alternative header can be specified in the header argument. When | |
680 | the Content-Type header is set, we'll always also add a MIME-Version | |
681 | header. | |
682 | """ | |
683 | # BAW: should we be strict? | |
684 | if not type.count('/') == 1: | |
685 | raise ValueError | |
686 | # Set the Content-Type, you get a MIME-Version | |
687 | if header.lower() == 'content-type': | |
688 | del self['mime-version'] | |
689 | self['MIME-Version'] = '1.0' | |
690 | if not self.has_key(header): | |
691 | self[header] = type | |
692 | return | |
693 | params = self.get_params(header=header, unquote=requote) | |
694 | del self[header] | |
695 | self[header] = type | |
696 | # Skip the first param; it's the old type. | |
697 | for p, v in params[1:]: | |
698 | self.set_param(p, v, header, requote) | |
699 | ||
700 | def get_filename(self, failobj=None): | |
701 | """Return the filename associated with the payload if present. | |
702 | ||
703 | The filename is extracted from the Content-Disposition header's | |
704 | `filename' parameter, and it is unquoted. | |
705 | """ | |
706 | missing = object() | |
707 | filename = self.get_param('filename', missing, 'content-disposition') | |
708 | if filename is missing: | |
709 | return failobj | |
710 | return Utils.collapse_rfc2231_value(filename).strip() | |
711 | ||
712 | def get_boundary(self, failobj=None): | |
713 | """Return the boundary associated with the payload if present. | |
714 | ||
715 | The boundary is extracted from the Content-Type header's `boundary' | |
716 | parameter, and it is unquoted. | |
717 | """ | |
718 | missing = object() | |
719 | boundary = self.get_param('boundary', missing) | |
720 | if boundary is missing: | |
721 | return failobj | |
722 | # RFC 2046 says that boundaries may begin but not end in w/s | |
723 | return Utils.collapse_rfc2231_value(boundary).rstrip() | |
724 | ||
725 | def set_boundary(self, boundary): | |
726 | """Set the boundary parameter in Content-Type to 'boundary'. | |
727 | ||
728 | This is subtly different than deleting the Content-Type header and | |
729 | adding a new one with a new boundary parameter via add_header(). The | |
730 | main difference is that using the set_boundary() method preserves the | |
731 | order of the Content-Type header in the original message. | |
732 | ||
733 | HeaderParseError is raised if the message has no Content-Type header. | |
734 | """ | |
735 | missing = object() | |
736 | params = self._get_params_preserve(missing, 'content-type') | |
737 | if params is missing: | |
738 | # There was no Content-Type header, and we don't know what type | |
739 | # to set it to, so raise an exception. | |
740 | raise Errors.HeaderParseError, 'No Content-Type header found' | |
741 | newparams = [] | |
742 | foundp = False | |
743 | for pk, pv in params: | |
744 | if pk.lower() == 'boundary': | |
745 | newparams.append(('boundary', '"%s"' % boundary)) | |
746 | foundp = True | |
747 | else: | |
748 | newparams.append((pk, pv)) | |
749 | if not foundp: | |
750 | # The original Content-Type header had no boundary attribute. | |
751 | # Tack one on the end. BAW: should we raise an exception | |
752 | # instead??? | |
753 | newparams.append(('boundary', '"%s"' % boundary)) | |
754 | # Replace the existing Content-Type header with the new value | |
755 | newheaders = [] | |
756 | for h, v in self._headers: | |
757 | if h.lower() == 'content-type': | |
758 | parts = [] | |
759 | for k, v in newparams: | |
760 | if v == '': | |
761 | parts.append(k) | |
762 | else: | |
763 | parts.append('%s=%s' % (k, v)) | |
764 | newheaders.append((h, SEMISPACE.join(parts))) | |
765 | ||
766 | else: | |
767 | newheaders.append((h, v)) | |
768 | self._headers = newheaders | |
769 | ||
770 | def get_content_charset(self, failobj=None): | |
771 | """Return the charset parameter of the Content-Type header. | |
772 | ||
773 | The returned string is always coerced to lower case. If there is no | |
774 | Content-Type header, or if that header has no charset parameter, | |
775 | failobj is returned. | |
776 | """ | |
777 | missing = object() | |
778 | charset = self.get_param('charset', missing) | |
779 | if charset is missing: | |
780 | return failobj | |
781 | if isinstance(charset, tuple): | |
782 | # RFC 2231 encoded, so decode it, and it better end up as ascii. | |
783 | pcharset = charset[0] or 'us-ascii' | |
784 | charset = unicode(charset[2], pcharset).encode('us-ascii') | |
785 | # RFC 2046, $4.1.2 says charsets are not case sensitive | |
786 | return charset.lower() | |
787 | ||
788 | def get_charsets(self, failobj=None): | |
789 | """Return a list containing the charset(s) used in this message. | |
790 | ||
791 | The returned list of items describes the Content-Type headers' | |
792 | charset parameter for this message and all the subparts in its | |
793 | payload. | |
794 | ||
795 | Each item will either be a string (the value of the charset parameter | |
796 | in the Content-Type header of that part) or the value of the | |
797 | 'failobj' parameter (defaults to None), if the part does not have a | |
798 | main MIME type of "text", or the charset is not defined. | |
799 | ||
800 | The list will contain one string for each part of the message, plus | |
801 | one for the container message (i.e. self), so that a non-multipart | |
802 | message will still return a list of length 1. | |
803 | """ | |
804 | return [part.get_content_charset(failobj) for part in self.walk()] | |
805 | ||
806 | # I.e. def walk(self): ... | |
807 | from email.Iterators import walk |