Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # Copyright (C) 2001-2004 Python Software Foundation |
2 | # Author: Barry Warsaw | |
3 | # Contact: email-sig@python.org | |
4 | ||
5 | """Miscellaneous utilities.""" | |
6 | ||
7 | import os | |
8 | import re | |
9 | import time | |
10 | import base64 | |
11 | import random | |
12 | import socket | |
13 | import warnings | |
14 | from cStringIO import StringIO | |
15 | ||
16 | from email._parseaddr import quote | |
17 | from email._parseaddr import AddressList as _AddressList | |
18 | from email._parseaddr import mktime_tz | |
19 | ||
20 | # We need wormarounds for bugs in these methods in older Pythons (see below) | |
21 | from email._parseaddr import parsedate as _parsedate | |
22 | from email._parseaddr import parsedate_tz as _parsedate_tz | |
23 | ||
24 | from quopri import decodestring as _qdecode | |
25 | ||
26 | # Intrapackage imports | |
27 | from email.Encoders import _bencode, _qencode | |
28 | ||
29 | COMMASPACE = ', ' | |
30 | EMPTYSTRING = '' | |
31 | UEMPTYSTRING = u'' | |
32 | CRLF = '\r\n' | |
33 | ||
34 | specialsre = re.compile(r'[][\\()<>@,:;".]') | |
35 | escapesre = re.compile(r'[][\\()"]') | |
36 | ||
37 | ||
38 | \f | |
39 | # Helpers | |
40 | ||
41 | def _identity(s): | |
42 | return s | |
43 | ||
44 | ||
45 | def _bdecode(s): | |
46 | # We can't quite use base64.encodestring() since it tacks on a "courtesy | |
47 | # newline". Blech! | |
48 | if not s: | |
49 | return s | |
50 | value = base64.decodestring(s) | |
51 | if not s.endswith('\n') and value.endswith('\n'): | |
52 | return value[:-1] | |
53 | return value | |
54 | ||
55 | ||
56 | \f | |
57 | def fix_eols(s): | |
58 | """Replace all line-ending characters with \r\n.""" | |
59 | # Fix newlines with no preceding carriage return | |
60 | s = re.sub(r'(?<!\r)\n', CRLF, s) | |
61 | # Fix carriage returns with no following newline | |
62 | s = re.sub(r'\r(?!\n)', CRLF, s) | |
63 | return s | |
64 | ||
65 | ||
66 | \f | |
67 | def formataddr(pair): | |
68 | """The inverse of parseaddr(), this takes a 2-tuple of the form | |
69 | (realname, email_address) and returns the string value suitable | |
70 | for an RFC 2822 From, To or Cc header. | |
71 | ||
72 | If the first element of pair is false, then the second element is | |
73 | returned unmodified. | |
74 | """ | |
75 | name, address = pair | |
76 | if name: | |
77 | quotes = '' | |
78 | if specialsre.search(name): | |
79 | quotes = '"' | |
80 | name = escapesre.sub(r'\\\g<0>', name) | |
81 | return '%s%s%s <%s>' % (quotes, name, quotes, address) | |
82 | return address | |
83 | ||
84 | ||
85 | \f | |
86 | def getaddresses(fieldvalues): | |
87 | """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" | |
88 | all = COMMASPACE.join(fieldvalues) | |
89 | a = _AddressList(all) | |
90 | return a.addresslist | |
91 | ||
92 | ||
93 | \f | |
94 | ecre = re.compile(r''' | |
95 | =\? # literal =? | |
96 | (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset | |
97 | \? # literal ? | |
98 | (?P<encoding>[qb]) # either a "q" or a "b", case insensitive | |
99 | \? # literal ? | |
100 | (?P<atom>.*?) # non-greedy up to the next ?= is the atom | |
101 | \?= # literal ?= | |
102 | ''', re.VERBOSE | re.IGNORECASE) | |
103 | ||
104 | ||
105 | \f | |
106 | def formatdate(timeval=None, localtime=False, usegmt=False): | |
107 | """Returns a date string as specified by RFC 2822, e.g.: | |
108 | ||
109 | Fri, 09 Nov 2001 01:08:47 -0000 | |
110 | ||
111 | Optional timeval if given is a floating point time value as accepted by | |
112 | gmtime() and localtime(), otherwise the current time is used. | |
113 | ||
114 | Optional localtime is a flag that when True, interprets timeval, and | |
115 | returns a date relative to the local timezone instead of UTC, properly | |
116 | taking daylight savings time into account. | |
117 | ||
118 | Optional argument usegmt means that the timezone is written out as | |
119 | an ascii string, not numeric one (so "GMT" instead of "+0000"). This | |
120 | is needed for HTTP, and is only used when localtime==False. | |
121 | """ | |
122 | # Note: we cannot use strftime() because that honors the locale and RFC | |
123 | # 2822 requires that day and month names be the English abbreviations. | |
124 | if timeval is None: | |
125 | timeval = time.time() | |
126 | if localtime: | |
127 | now = time.localtime(timeval) | |
128 | # Calculate timezone offset, based on whether the local zone has | |
129 | # daylight savings time, and whether DST is in effect. | |
130 | if time.daylight and now[-1]: | |
131 | offset = time.altzone | |
132 | else: | |
133 | offset = time.timezone | |
134 | hours, minutes = divmod(abs(offset), 3600) | |
135 | # Remember offset is in seconds west of UTC, but the timezone is in | |
136 | # minutes east of UTC, so the signs differ. | |
137 | if offset > 0: | |
138 | sign = '-' | |
139 | else: | |
140 | sign = '+' | |
141 | zone = '%s%02d%02d' % (sign, hours, minutes // 60) | |
142 | else: | |
143 | now = time.gmtime(timeval) | |
144 | # Timezone offset is always -0000 | |
145 | if usegmt: | |
146 | zone = 'GMT' | |
147 | else: | |
148 | zone = '-0000' | |
149 | return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( | |
150 | ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], | |
151 | now[2], | |
152 | ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', | |
153 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], | |
154 | now[0], now[3], now[4], now[5], | |
155 | zone) | |
156 | ||
157 | ||
158 | \f | |
159 | def make_msgid(idstring=None): | |
160 | """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: | |
161 | ||
162 | <20020201195627.33539.96671@nightshade.la.mastaler.com> | |
163 | ||
164 | Optional idstring if given is a string used to strengthen the | |
165 | uniqueness of the message id. | |
166 | """ | |
167 | timeval = time.time() | |
168 | utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) | |
169 | pid = os.getpid() | |
170 | randint = random.randrange(100000) | |
171 | if idstring is None: | |
172 | idstring = '' | |
173 | else: | |
174 | idstring = '.' + idstring | |
175 | idhost = socket.getfqdn() | |
176 | msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost) | |
177 | return msgid | |
178 | ||
179 | ||
180 | \f | |
181 | # These functions are in the standalone mimelib version only because they've | |
182 | # subsequently been fixed in the latest Python versions. We use this to worm | |
183 | # around broken older Pythons. | |
184 | def parsedate(data): | |
185 | if not data: | |
186 | return None | |
187 | return _parsedate(data) | |
188 | ||
189 | ||
190 | def parsedate_tz(data): | |
191 | if not data: | |
192 | return None | |
193 | return _parsedate_tz(data) | |
194 | ||
195 | ||
196 | def parseaddr(addr): | |
197 | addrs = _AddressList(addr).addresslist | |
198 | if not addrs: | |
199 | return '', '' | |
200 | return addrs[0] | |
201 | ||
202 | ||
203 | # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. | |
204 | def unquote(str): | |
205 | """Remove quotes from a string.""" | |
206 | if len(str) > 1: | |
207 | if str.startswith('"') and str.endswith('"'): | |
208 | return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') | |
209 | if str.startswith('<') and str.endswith('>'): | |
210 | return str[1:-1] | |
211 | return str | |
212 | ||
213 | ||
214 | \f | |
215 | # RFC2231-related functions - parameter encoding and decoding | |
216 | def decode_rfc2231(s): | |
217 | """Decode string according to RFC 2231""" | |
218 | import urllib | |
219 | parts = s.split("'", 2) | |
220 | if len(parts) == 1: | |
221 | return None, None, urllib.unquote(s) | |
222 | charset, language, s = parts | |
223 | return charset, language, urllib.unquote(s) | |
224 | ||
225 | ||
226 | def encode_rfc2231(s, charset=None, language=None): | |
227 | """Encode string according to RFC 2231. | |
228 | ||
229 | If neither charset nor language is given, then s is returned as-is. If | |
230 | charset is given but not language, the string is encoded using the empty | |
231 | string for language. | |
232 | """ | |
233 | import urllib | |
234 | s = urllib.quote(s, safe='') | |
235 | if charset is None and language is None: | |
236 | return s | |
237 | if language is None: | |
238 | language = '' | |
239 | return "%s'%s'%s" % (charset, language, s) | |
240 | ||
241 | ||
242 | rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$') | |
243 | ||
244 | def decode_params(params): | |
245 | """Decode parameters list according to RFC 2231. | |
246 | ||
247 | params is a sequence of 2-tuples containing (content type, string value). | |
248 | """ | |
249 | new_params = [] | |
250 | # maps parameter's name to a list of continuations | |
251 | rfc2231_params = {} | |
252 | # params is a sequence of 2-tuples containing (content_type, string value) | |
253 | name, value = params[0] | |
254 | new_params.append((name, value)) | |
255 | # Cycle through each of the rest of the parameters. | |
256 | for name, value in params[1:]: | |
257 | value = unquote(value) | |
258 | mo = rfc2231_continuation.match(name) | |
259 | if mo: | |
260 | name, num = mo.group('name', 'num') | |
261 | if num is not None: | |
262 | num = int(num) | |
263 | rfc2231_param1 = rfc2231_params.setdefault(name, []) | |
264 | rfc2231_param1.append((num, value)) | |
265 | else: | |
266 | new_params.append((name, '"%s"' % quote(value))) | |
267 | if rfc2231_params: | |
268 | for name, continuations in rfc2231_params.items(): | |
269 | value = [] | |
270 | # Sort by number | |
271 | continuations.sort() | |
272 | # And now append all values in num order | |
273 | for num, continuation in continuations: | |
274 | value.append(continuation) | |
275 | charset, language, value = decode_rfc2231(EMPTYSTRING.join(value)) | |
276 | new_params.append( | |
277 | (name, (charset, language, '"%s"' % quote(value)))) | |
278 | return new_params | |
279 | ||
280 | def collapse_rfc2231_value(value, errors='replace', | |
281 | fallback_charset='us-ascii'): | |
282 | if isinstance(value, tuple): | |
283 | rawval = unquote(value[2]) | |
284 | charset = value[0] or 'us-ascii' | |
285 | try: | |
286 | return unicode(rawval, charset, errors) | |
287 | except LookupError: | |
288 | # XXX charset is unknown to Python. | |
289 | return unicode(rawval, fallback_charset, errors) | |
290 | else: | |
291 | return unquote(value) |