Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # Copyright (C) 2002-2004 Python Software Foundation |
2 | # Contact: email-sig@python.org | |
3 | ||
4 | """Email address parsing code. | |
5 | ||
6 | Lifted directly from rfc822.py. This should eventually be rewritten. | |
7 | """ | |
8 | ||
9 | import time | |
10 | ||
11 | SPACE = ' ' | |
12 | EMPTYSTRING = '' | |
13 | COMMASPACE = ', ' | |
14 | ||
15 | # Parse a date field | |
16 | _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', | |
17 | 'aug', 'sep', 'oct', 'nov', 'dec', | |
18 | 'january', 'february', 'march', 'april', 'may', 'june', 'july', | |
19 | 'august', 'september', 'october', 'november', 'december'] | |
20 | ||
21 | _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] | |
22 | ||
23 | # The timezone table does not include the military time zones defined | |
24 | # in RFC822, other than Z. According to RFC1123, the description in | |
25 | # RFC822 gets the signs wrong, so we can't rely on any such time | |
26 | # zones. RFC1123 recommends that numeric timezone indicators be used | |
27 | # instead of timezone names. | |
28 | ||
29 | _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, | |
30 | 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) | |
31 | 'EST': -500, 'EDT': -400, # Eastern | |
32 | 'CST': -600, 'CDT': -500, # Central | |
33 | 'MST': -700, 'MDT': -600, # Mountain | |
34 | 'PST': -800, 'PDT': -700 # Pacific | |
35 | } | |
36 | ||
37 | ||
38 | def parsedate_tz(data): | |
39 | """Convert a date string to a time tuple. | |
40 | ||
41 | Accounts for military timezones. | |
42 | """ | |
43 | data = data.split() | |
44 | # The FWS after the comma after the day-of-week is optional, so search and | |
45 | # adjust for this. | |
46 | if data[0].endswith(',') or data[0].lower() in _daynames: | |
47 | # There's a dayname here. Skip it | |
48 | del data[0] | |
49 | else: | |
50 | i = data[0].rfind(',') | |
51 | if i >= 0: | |
52 | data[0] = data[0][i+1:] | |
53 | if len(data) == 3: # RFC 850 date, deprecated | |
54 | stuff = data[0].split('-') | |
55 | if len(stuff) == 3: | |
56 | data = stuff + data[1:] | |
57 | if len(data) == 4: | |
58 | s = data[3] | |
59 | i = s.find('+') | |
60 | if i > 0: | |
61 | data[3:] = [s[:i], s[i+1:]] | |
62 | else: | |
63 | data.append('') # Dummy tz | |
64 | if len(data) < 5: | |
65 | return None | |
66 | data = data[:5] | |
67 | [dd, mm, yy, tm, tz] = data | |
68 | mm = mm.lower() | |
69 | if mm not in _monthnames: | |
70 | dd, mm = mm, dd.lower() | |
71 | if mm not in _monthnames: | |
72 | return None | |
73 | mm = _monthnames.index(mm) + 1 | |
74 | if mm > 12: | |
75 | mm -= 12 | |
76 | if dd[-1] == ',': | |
77 | dd = dd[:-1] | |
78 | i = yy.find(':') | |
79 | if i > 0: | |
80 | yy, tm = tm, yy | |
81 | if yy[-1] == ',': | |
82 | yy = yy[:-1] | |
83 | if not yy[0].isdigit(): | |
84 | yy, tz = tz, yy | |
85 | if tm[-1] == ',': | |
86 | tm = tm[:-1] | |
87 | tm = tm.split(':') | |
88 | if len(tm) == 2: | |
89 | [thh, tmm] = tm | |
90 | tss = '0' | |
91 | elif len(tm) == 3: | |
92 | [thh, tmm, tss] = tm | |
93 | else: | |
94 | return None | |
95 | try: | |
96 | yy = int(yy) | |
97 | dd = int(dd) | |
98 | thh = int(thh) | |
99 | tmm = int(tmm) | |
100 | tss = int(tss) | |
101 | except ValueError: | |
102 | return None | |
103 | tzoffset = None | |
104 | tz = tz.upper() | |
105 | if _timezones.has_key(tz): | |
106 | tzoffset = _timezones[tz] | |
107 | else: | |
108 | try: | |
109 | tzoffset = int(tz) | |
110 | except ValueError: | |
111 | pass | |
112 | # Convert a timezone offset into seconds ; -0500 -> -18000 | |
113 | if tzoffset: | |
114 | if tzoffset < 0: | |
115 | tzsign = -1 | |
116 | tzoffset = -tzoffset | |
117 | else: | |
118 | tzsign = 1 | |
119 | tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) | |
120 | tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset) | |
121 | return tuple | |
122 | ||
123 | ||
124 | def parsedate(data): | |
125 | """Convert a time string to a time tuple.""" | |
126 | t = parsedate_tz(data) | |
127 | if isinstance(t, tuple): | |
128 | return t[:9] | |
129 | else: | |
130 | return t | |
131 | ||
132 | ||
133 | def mktime_tz(data): | |
134 | """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" | |
135 | if data[9] is None: | |
136 | # No zone info, so localtime is better assumption than GMT | |
137 | return time.mktime(data[:8] + (-1,)) | |
138 | else: | |
139 | t = time.mktime(data[:8] + (0,)) | |
140 | return t - data[9] - time.timezone | |
141 | ||
142 | ||
143 | def quote(str): | |
144 | """Add quotes around a string.""" | |
145 | return str.replace('\\', '\\\\').replace('"', '\\"') | |
146 | ||
147 | ||
148 | class AddrlistClass: | |
149 | """Address parser class by Ben Escoto. | |
150 | ||
151 | To understand what this class does, it helps to have a copy of RFC 2822 in | |
152 | front of you. | |
153 | ||
154 | Note: this class interface is deprecated and may be removed in the future. | |
155 | Use rfc822.AddressList instead. | |
156 | """ | |
157 | ||
158 | def __init__(self, field): | |
159 | """Initialize a new instance. | |
160 | ||
161 | `field' is an unparsed address header field, containing | |
162 | one or more addresses. | |
163 | """ | |
164 | self.specials = '()<>@,:;.\"[]' | |
165 | self.pos = 0 | |
166 | self.LWS = ' \t' | |
167 | self.CR = '\r\n' | |
168 | self.atomends = self.specials + self.LWS + self.CR | |
169 | # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it | |
170 | # is obsolete syntax. RFC 2822 requires that we recognize obsolete | |
171 | # syntax, so allow dots in phrases. | |
172 | self.phraseends = self.atomends.replace('.', '') | |
173 | self.field = field | |
174 | self.commentlist = [] | |
175 | ||
176 | def gotonext(self): | |
177 | """Parse up to the start of the next address.""" | |
178 | while self.pos < len(self.field): | |
179 | if self.field[self.pos] in self.LWS + '\n\r': | |
180 | self.pos += 1 | |
181 | elif self.field[self.pos] == '(': | |
182 | self.commentlist.append(self.getcomment()) | |
183 | else: | |
184 | break | |
185 | ||
186 | def getaddrlist(self): | |
187 | """Parse all addresses. | |
188 | ||
189 | Returns a list containing all of the addresses. | |
190 | """ | |
191 | result = [] | |
192 | while self.pos < len(self.field): | |
193 | ad = self.getaddress() | |
194 | if ad: | |
195 | result += ad | |
196 | else: | |
197 | result.append(('', '')) | |
198 | return result | |
199 | ||
200 | def getaddress(self): | |
201 | """Parse the next address.""" | |
202 | self.commentlist = [] | |
203 | self.gotonext() | |
204 | ||
205 | oldpos = self.pos | |
206 | oldcl = self.commentlist | |
207 | plist = self.getphraselist() | |
208 | ||
209 | self.gotonext() | |
210 | returnlist = [] | |
211 | ||
212 | if self.pos >= len(self.field): | |
213 | # Bad email address technically, no domain. | |
214 | if plist: | |
215 | returnlist = [(SPACE.join(self.commentlist), plist[0])] | |
216 | ||
217 | elif self.field[self.pos] in '.@': | |
218 | # email address is just an addrspec | |
219 | # this isn't very efficient since we start over | |
220 | self.pos = oldpos | |
221 | self.commentlist = oldcl | |
222 | addrspec = self.getaddrspec() | |
223 | returnlist = [(SPACE.join(self.commentlist), addrspec)] | |
224 | ||
225 | elif self.field[self.pos] == ':': | |
226 | # address is a group | |
227 | returnlist = [] | |
228 | ||
229 | fieldlen = len(self.field) | |
230 | self.pos += 1 | |
231 | while self.pos < len(self.field): | |
232 | self.gotonext() | |
233 | if self.pos < fieldlen and self.field[self.pos] == ';': | |
234 | self.pos += 1 | |
235 | break | |
236 | returnlist = returnlist + self.getaddress() | |
237 | ||
238 | elif self.field[self.pos] == '<': | |
239 | # Address is a phrase then a route addr | |
240 | routeaddr = self.getrouteaddr() | |
241 | ||
242 | if self.commentlist: | |
243 | returnlist = [(SPACE.join(plist) + ' (' + | |
244 | ' '.join(self.commentlist) + ')', routeaddr)] | |
245 | else: | |
246 | returnlist = [(SPACE.join(plist), routeaddr)] | |
247 | ||
248 | else: | |
249 | if plist: | |
250 | returnlist = [(SPACE.join(self.commentlist), plist[0])] | |
251 | elif self.field[self.pos] in self.specials: | |
252 | self.pos += 1 | |
253 | ||
254 | self.gotonext() | |
255 | if self.pos < len(self.field) and self.field[self.pos] == ',': | |
256 | self.pos += 1 | |
257 | return returnlist | |
258 | ||
259 | def getrouteaddr(self): | |
260 | """Parse a route address (Return-path value). | |
261 | ||
262 | This method just skips all the route stuff and returns the addrspec. | |
263 | """ | |
264 | if self.field[self.pos] != '<': | |
265 | return | |
266 | ||
267 | expectroute = False | |
268 | self.pos += 1 | |
269 | self.gotonext() | |
270 | adlist = '' | |
271 | while self.pos < len(self.field): | |
272 | if expectroute: | |
273 | self.getdomain() | |
274 | expectroute = False | |
275 | elif self.field[self.pos] == '>': | |
276 | self.pos += 1 | |
277 | break | |
278 | elif self.field[self.pos] == '@': | |
279 | self.pos += 1 | |
280 | expectroute = True | |
281 | elif self.field[self.pos] == ':': | |
282 | self.pos += 1 | |
283 | else: | |
284 | adlist = self.getaddrspec() | |
285 | self.pos += 1 | |
286 | break | |
287 | self.gotonext() | |
288 | ||
289 | return adlist | |
290 | ||
291 | def getaddrspec(self): | |
292 | """Parse an RFC 2822 addr-spec.""" | |
293 | aslist = [] | |
294 | ||
295 | self.gotonext() | |
296 | while self.pos < len(self.field): | |
297 | if self.field[self.pos] == '.': | |
298 | aslist.append('.') | |
299 | self.pos += 1 | |
300 | elif self.field[self.pos] == '"': | |
301 | aslist.append('"%s"' % self.getquote()) | |
302 | elif self.field[self.pos] in self.atomends: | |
303 | break | |
304 | else: | |
305 | aslist.append(self.getatom()) | |
306 | self.gotonext() | |
307 | ||
308 | if self.pos >= len(self.field) or self.field[self.pos] != '@': | |
309 | return EMPTYSTRING.join(aslist) | |
310 | ||
311 | aslist.append('@') | |
312 | self.pos += 1 | |
313 | self.gotonext() | |
314 | return EMPTYSTRING.join(aslist) + self.getdomain() | |
315 | ||
316 | def getdomain(self): | |
317 | """Get the complete domain name from an address.""" | |
318 | sdlist = [] | |
319 | while self.pos < len(self.field): | |
320 | if self.field[self.pos] in self.LWS: | |
321 | self.pos += 1 | |
322 | elif self.field[self.pos] == '(': | |
323 | self.commentlist.append(self.getcomment()) | |
324 | elif self.field[self.pos] == '[': | |
325 | sdlist.append(self.getdomainliteral()) | |
326 | elif self.field[self.pos] == '.': | |
327 | self.pos += 1 | |
328 | sdlist.append('.') | |
329 | elif self.field[self.pos] in self.atomends: | |
330 | break | |
331 | else: | |
332 | sdlist.append(self.getatom()) | |
333 | return EMPTYSTRING.join(sdlist) | |
334 | ||
335 | def getdelimited(self, beginchar, endchars, allowcomments=True): | |
336 | """Parse a header fragment delimited by special characters. | |
337 | ||
338 | `beginchar' is the start character for the fragment. | |
339 | If self is not looking at an instance of `beginchar' then | |
340 | getdelimited returns the empty string. | |
341 | ||
342 | `endchars' is a sequence of allowable end-delimiting characters. | |
343 | Parsing stops when one of these is encountered. | |
344 | ||
345 | If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed | |
346 | within the parsed fragment. | |
347 | """ | |
348 | if self.field[self.pos] != beginchar: | |
349 | return '' | |
350 | ||
351 | slist = [''] | |
352 | quote = False | |
353 | self.pos += 1 | |
354 | while self.pos < len(self.field): | |
355 | if quote: | |
356 | slist.append(self.field[self.pos]) | |
357 | quote = False | |
358 | elif self.field[self.pos] in endchars: | |
359 | self.pos += 1 | |
360 | break | |
361 | elif allowcomments and self.field[self.pos] == '(': | |
362 | slist.append(self.getcomment()) | |
363 | elif self.field[self.pos] == '\\': | |
364 | quote = True | |
365 | else: | |
366 | slist.append(self.field[self.pos]) | |
367 | self.pos += 1 | |
368 | ||
369 | return EMPTYSTRING.join(slist) | |
370 | ||
371 | def getquote(self): | |
372 | """Get a quote-delimited fragment from self's field.""" | |
373 | return self.getdelimited('"', '"\r', False) | |
374 | ||
375 | def getcomment(self): | |
376 | """Get a parenthesis-delimited fragment from self's field.""" | |
377 | return self.getdelimited('(', ')\r', True) | |
378 | ||
379 | def getdomainliteral(self): | |
380 | """Parse an RFC 2822 domain-literal.""" | |
381 | return '[%s]' % self.getdelimited('[', ']\r', False) | |
382 | ||
383 | def getatom(self, atomends=None): | |
384 | """Parse an RFC 2822 atom. | |
385 | ||
386 | Optional atomends specifies a different set of end token delimiters | |
387 | (the default is to use self.atomends). This is used e.g. in | |
388 | getphraselist() since phrase endings must not include the `.' (which | |
389 | is legal in phrases).""" | |
390 | atomlist = [''] | |
391 | if atomends is None: | |
392 | atomends = self.atomends | |
393 | ||
394 | while self.pos < len(self.field): | |
395 | if self.field[self.pos] in atomends: | |
396 | break | |
397 | else: | |
398 | atomlist.append(self.field[self.pos]) | |
399 | self.pos += 1 | |
400 | ||
401 | return EMPTYSTRING.join(atomlist) | |
402 | ||
403 | def getphraselist(self): | |
404 | """Parse a sequence of RFC 2822 phrases. | |
405 | ||
406 | A phrase is a sequence of words, which are in turn either RFC 2822 | |
407 | atoms or quoted-strings. Phrases are canonicalized by squeezing all | |
408 | runs of continuous whitespace into one space. | |
409 | """ | |
410 | plist = [] | |
411 | ||
412 | while self.pos < len(self.field): | |
413 | if self.field[self.pos] in self.LWS: | |
414 | self.pos += 1 | |
415 | elif self.field[self.pos] == '"': | |
416 | plist.append(self.getquote()) | |
417 | elif self.field[self.pos] == '(': | |
418 | self.commentlist.append(self.getcomment()) | |
419 | elif self.field[self.pos] in self.phraseends: | |
420 | break | |
421 | else: | |
422 | plist.append(self.getatom(self.phraseends)) | |
423 | ||
424 | return plist | |
425 | ||
426 | class AddressList(AddrlistClass): | |
427 | """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" | |
428 | def __init__(self, field): | |
429 | AddrlistClass.__init__(self, field) | |
430 | if field: | |
431 | self.addresslist = self.getaddrlist() | |
432 | else: | |
433 | self.addresslist = [] | |
434 | ||
435 | def __len__(self): | |
436 | return len(self.addresslist) | |
437 | ||
438 | def __add__(self, other): | |
439 | # Set union | |
440 | newaddr = AddressList(None) | |
441 | newaddr.addresslist = self.addresslist[:] | |
442 | for x in other.addresslist: | |
443 | if not x in self.addresslist: | |
444 | newaddr.addresslist.append(x) | |
445 | return newaddr | |
446 | ||
447 | def __iadd__(self, other): | |
448 | # Set union, in-place | |
449 | for x in other.addresslist: | |
450 | if not x in self.addresslist: | |
451 | self.addresslist.append(x) | |
452 | return self | |
453 | ||
454 | def __sub__(self, other): | |
455 | # Set difference | |
456 | newaddr = AddressList(None) | |
457 | for x in self.addresslist: | |
458 | if not x in other.addresslist: | |
459 | newaddr.addresslist.append(x) | |
460 | return newaddr | |
461 | ||
462 | def __isub__(self, other): | |
463 | # Set difference, in-place | |
464 | for x in other.addresslist: | |
465 | if x in self.addresslist: | |
466 | self.addresslist.remove(x) | |
467 | return self | |
468 | ||
469 | def __getitem__(self, index): | |
470 | # Make indexing, slices, and 'in' work | |
471 | return self.addresslist[index] |