Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / lib / python2.4 / rfc822.py
CommitLineData
920dae64
AT
1"""RFC 2822 message manipulation.
2
3Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
13
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
17
18 fp = open(file, 'r')
19
20You can use any other legal way of getting an open file object, e.g. use
21sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
24 m = Message(fp)
25
26This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
32
33The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
41
42To get the text of a particular header there are several methods:
43
44 str = m.getheader(name)
45 str = m.getrawheader(name)
46
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
52
53For addresses and address lists there are functions
54
55 realname, mailaddress = m.getaddr(name)
56 list = m.getaddrlist(name)
57
58where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
61
62 time = m.getdate(name)
63
64which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
72# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
73
74import time
75
76__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
77
78_blanklines = ('\r\n', '\n') # Optimization for islast()
79
80
81class Message:
82 """Represents a single RFC 2822-compliant message."""
83
84 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
86 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
91 except (AttributeError, IOError):
92 seekable = 0
93 else:
94 seekable = 1
95 self.fp = fp
96 self.seekable = seekable
97 self.startofheaders = None
98 self.startofbody = None
99 #
100 if self.seekable:
101 try:
102 self.startofheaders = self.fp.tell()
103 except IOError:
104 self.seekable = 0
105 #
106 self.readheaders()
107 #
108 if self.seekable:
109 try:
110 self.startofbody = self.fp.tell()
111 except IOError:
112 self.seekable = 0
113
114 def rewindbody(self):
115 """Rewind the file to the start of the body (if seekable)."""
116 if not self.seekable:
117 raise IOError, "unseekable file"
118 self.fp.seek(self.startofbody)
119
120 def readheaders(self):
121 """Read header lines.
122
123 Read header lines up to the entirely blank line that terminates them.
124 The (normally blank) line that ends the headers is skipped, but not
125 included in the returned list. If a non-header line ends the headers,
126 (which is an error), an attempt is made to backspace over it; it is
127 never included in the returned list.
128
129 The variable self.status is set to the empty string if all went well,
130 otherwise it is an error message. The variable self.headers is a
131 completely uninterpreted list of lines contained in the header (so
132 printing them will reproduce the header exactly as it appears in the
133 file).
134 """
135 self.dict = {}
136 self.unixfrom = ''
137 self.headers = list = []
138 self.status = ''
139 headerseen = ""
140 firstline = 1
141 startofline = unread = tell = None
142 if hasattr(self.fp, 'unread'):
143 unread = self.fp.unread
144 elif self.seekable:
145 tell = self.fp.tell
146 while 1:
147 if tell:
148 try:
149 startofline = tell()
150 except IOError:
151 startofline = tell = None
152 self.seekable = 0
153 line = self.fp.readline()
154 if not line:
155 self.status = 'EOF in headers'
156 break
157 # Skip unix From name time lines
158 if firstline and line.startswith('From '):
159 self.unixfrom = self.unixfrom + line
160 continue
161 firstline = 0
162 if headerseen and line[0] in ' \t':
163 # It's a continuation line.
164 list.append(line)
165 x = (self.dict[headerseen] + "\n " + line.strip())
166 self.dict[headerseen] = x.strip()
167 continue
168 elif self.iscomment(line):
169 # It's a comment. Ignore it.
170 continue
171 elif self.islast(line):
172 # Note! No pushback here! The delimiter line gets eaten.
173 break
174 headerseen = self.isheader(line)
175 if headerseen:
176 # It's a legal header line, save it.
177 list.append(line)
178 self.dict[headerseen] = line[len(headerseen)+1:].strip()
179 continue
180 else:
181 # It's not a header line; throw it back and stop here.
182 if not self.dict:
183 self.status = 'No headers'
184 else:
185 self.status = 'Non-header line where header expected'
186 # Try to undo the read.
187 if unread:
188 unread(line)
189 elif tell:
190 self.fp.seek(startofline)
191 else:
192 self.status = self.status + '; bad seek'
193 break
194
195 def isheader(self, line):
196 """Determine whether a given line is a legal header.
197
198 This method should return the header name, suitably canonicalized.
199 You may override this method in order to use Message parsing on tagged
200 data in RFC 2822-like formats with special header formats.
201 """
202 i = line.find(':')
203 if i > 0:
204 return line[:i].lower()
205 else:
206 return None
207
208 def islast(self, line):
209 """Determine whether a line is a legal end of RFC 2822 headers.
210
211 You may override this method if your application wants to bend the
212 rules, e.g. to strip trailing whitespace, or to recognize MH template
213 separators ('--------'). For convenience (e.g. for code reading from
214 sockets) a line consisting of \r\n also matches.
215 """
216 return line in _blanklines
217
218 def iscomment(self, line):
219 """Determine whether a line should be skipped entirely.
220
221 You may override this method in order to use Message parsing on tagged
222 data in RFC 2822-like formats that support embedded comments or
223 free-text data.
224 """
225 return False
226
227 def getallmatchingheaders(self, name):
228 """Find all header lines matching a given header name.
229
230 Look through the list of headers and find all lines matching a given
231 header name (and their continuation lines). A list of the lines is
232 returned, without interpretation. If the header does not occur, an
233 empty list is returned. If the header occurs multiple times, all
234 occurrences are returned. Case is not important in the header name.
235 """
236 name = name.lower() + ':'
237 n = len(name)
238 list = []
239 hit = 0
240 for line in self.headers:
241 if line[:n].lower() == name:
242 hit = 1
243 elif not line[:1].isspace():
244 hit = 0
245 if hit:
246 list.append(line)
247 return list
248
249 def getfirstmatchingheader(self, name):
250 """Get the first header line matching name.
251
252 This is similar to getallmatchingheaders, but it returns only the
253 first matching header (and its continuation lines).
254 """
255 name = name.lower() + ':'
256 n = len(name)
257 list = []
258 hit = 0
259 for line in self.headers:
260 if hit:
261 if not line[:1].isspace():
262 break
263 elif line[:n].lower() == name:
264 hit = 1
265 if hit:
266 list.append(line)
267 return list
268
269 def getrawheader(self, name):
270 """A higher-level interface to getfirstmatchingheader().
271
272 Return a string containing the literal text of the header but with the
273 keyword stripped. All leading, trailing and embedded whitespace is
274 kept in the string, however. Return None if the header does not
275 occur.
276 """
277
278 list = self.getfirstmatchingheader(name)
279 if not list:
280 return None
281 list[0] = list[0][len(name) + 1:]
282 return ''.join(list)
283
284 def getheader(self, name, default=None):
285 """Get the header value for a name.
286
287 This is the normal interface: it returns a stripped version of the
288 header value for a given header name, or None if it doesn't exist.
289 This uses the dictionary version which finds the *last* such header.
290 """
291 try:
292 return self.dict[name.lower()]
293 except KeyError:
294 return default
295 get = getheader
296
297 def getheaders(self, name):
298 """Get all values for a header.
299
300 This returns a list of values for headers given more than once; each
301 value in the result list is stripped in the same way as the result of
302 getheader(). If the header is not given, return an empty list.
303 """
304 result = []
305 current = ''
306 have_header = 0
307 for s in self.getallmatchingheaders(name):
308 if s[0].isspace():
309 if current:
310 current = "%s\n %s" % (current, s.strip())
311 else:
312 current = s.strip()
313 else:
314 if have_header:
315 result.append(current)
316 current = s[s.find(":") + 1:].strip()
317 have_header = 1
318 if have_header:
319 result.append(current)
320 return result
321
322 def getaddr(self, name):
323 """Get a single address from a header, as a tuple.
324
325 An example return value:
326 ('Guido van Rossum', 'guido@cwi.nl')
327 """
328 # New, by Ben Escoto
329 alist = self.getaddrlist(name)
330 if alist:
331 return alist[0]
332 else:
333 return (None, None)
334
335 def getaddrlist(self, name):
336 """Get a list of addresses from a header.
337
338 Retrieves a list of addresses from a header, where each address is a
339 tuple as returned by getaddr(). Scans all named headers, so it works
340 properly with multiple To: or Cc: headers for example.
341 """
342 raw = []
343 for h in self.getallmatchingheaders(name):
344 if h[0] in ' \t':
345 raw.append(h)
346 else:
347 if raw:
348 raw.append(', ')
349 i = h.find(':')
350 if i > 0:
351 addr = h[i+1:]
352 raw.append(addr)
353 alladdrs = ''.join(raw)
354 a = AddressList(alladdrs)
355 return a.addresslist
356
357 def getdate(self, name):
358 """Retrieve a date field from a header.
359
360 Retrieves a date field from the named header, returning a tuple
361 compatible with time.mktime().
362 """
363 try:
364 data = self[name]
365 except KeyError:
366 return None
367 return parsedate(data)
368
369 def getdate_tz(self, name):
370 """Retrieve a date field from a header as a 10-tuple.
371
372 The first 9 elements make up a tuple compatible with time.mktime(),
373 and the 10th is the offset of the poster's time zone from GMT/UTC.
374 """
375 try:
376 data = self[name]
377 except KeyError:
378 return None
379 return parsedate_tz(data)
380
381
382 # Access as a dictionary (only finds *last* header of each type):
383
384 def __len__(self):
385 """Get the number of headers in a message."""
386 return len(self.dict)
387
388 def __getitem__(self, name):
389 """Get a specific header, as from a dictionary."""
390 return self.dict[name.lower()]
391
392 def __setitem__(self, name, value):
393 """Set the value of a header.
394
395 Note: This is not a perfect inversion of __getitem__, because any
396 changed headers get stuck at the end of the raw-headers list rather
397 than where the altered header was.
398 """
399 del self[name] # Won't fail if it doesn't exist
400 self.dict[name.lower()] = value
401 text = name + ": " + value
402 lines = text.split("\n")
403 for line in lines:
404 self.headers.append(line + "\n")
405
406 def __delitem__(self, name):
407 """Delete all occurrences of a specific header, if it is present."""
408 name = name.lower()
409 if not name in self.dict:
410 return
411 del self.dict[name]
412 name = name + ':'
413 n = len(name)
414 list = []
415 hit = 0
416 for i in range(len(self.headers)):
417 line = self.headers[i]
418 if line[:n].lower() == name:
419 hit = 1
420 elif not line[:1].isspace():
421 hit = 0
422 if hit:
423 list.append(i)
424 for i in reversed(list):
425 del self.headers[i]
426
427 def setdefault(self, name, default=""):
428 lowername = name.lower()
429 if lowername in self.dict:
430 return self.dict[lowername]
431 else:
432 text = name + ": " + default
433 lines = text.split("\n")
434 for line in lines:
435 self.headers.append(line + "\n")
436 self.dict[lowername] = default
437 return default
438
439 def has_key(self, name):
440 """Determine whether a message contains the named header."""
441 return name.lower() in self.dict
442
443 def __contains__(self, name):
444 """Determine whether a message contains the named header."""
445 return name.lower() in self.dict
446
447 def __iter__(self):
448 return iter(self.dict)
449
450 def keys(self):
451 """Get all of a message's header field names."""
452 return self.dict.keys()
453
454 def values(self):
455 """Get all of a message's header field values."""
456 return self.dict.values()
457
458 def items(self):
459 """Get all of a message's headers.
460
461 Returns a list of name, value tuples.
462 """
463 return self.dict.items()
464
465 def __str__(self):
466 return ''.join(self.headers)
467
468
469# Utility functions
470# -----------------
471
472# XXX Should fix unquote() and quote() to be really conformant.
473# XXX The inverses of the parse functions may also be useful.
474
475
476def unquote(str):
477 """Remove quotes from a string."""
478 if len(str) > 1:
479 if str.startswith('"') and str.endswith('"'):
480 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
481 if str.startswith('<') and str.endswith('>'):
482 return str[1:-1]
483 return str
484
485
486def quote(str):
487 """Add quotes around a string."""
488 return str.replace('\\', '\\\\').replace('"', '\\"')
489
490
491def parseaddr(address):
492 """Parse an address into a (realname, mailaddr) tuple."""
493 a = AddressList(address)
494 list = a.addresslist
495 if not list:
496 return (None, None)
497 else:
498 return list[0]
499
500
501class AddrlistClass:
502 """Address parser class by Ben Escoto.
503
504 To understand what this class does, it helps to have a copy of
505 RFC 2822 in front of you.
506
507 http://www.faqs.org/rfcs/rfc2822.html
508
509 Note: this class interface is deprecated and may be removed in the future.
510 Use rfc822.AddressList instead.
511 """
512
513 def __init__(self, field):
514 """Initialize a new instance.
515
516 `field' is an unparsed address header field, containing one or more
517 addresses.
518 """
519 self.specials = '()<>@,:;.\"[]'
520 self.pos = 0
521 self.LWS = ' \t'
522 self.CR = '\r\n'
523 self.atomends = self.specials + self.LWS + self.CR
524 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
525 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
526 # syntax, so allow dots in phrases.
527 self.phraseends = self.atomends.replace('.', '')
528 self.field = field
529 self.commentlist = []
530
531 def gotonext(self):
532 """Parse up to the start of the next address."""
533 while self.pos < len(self.field):
534 if self.field[self.pos] in self.LWS + '\n\r':
535 self.pos = self.pos + 1
536 elif self.field[self.pos] == '(':
537 self.commentlist.append(self.getcomment())
538 else: break
539
540 def getaddrlist(self):
541 """Parse all addresses.
542
543 Returns a list containing all of the addresses.
544 """
545 result = []
546 while 1:
547 ad = self.getaddress()
548 if ad:
549 result += ad
550 else:
551 break
552 return result
553
554 def getaddress(self):
555 """Parse the next address."""
556 self.commentlist = []
557 self.gotonext()
558
559 oldpos = self.pos
560 oldcl = self.commentlist
561 plist = self.getphraselist()
562
563 self.gotonext()
564 returnlist = []
565
566 if self.pos >= len(self.field):
567 # Bad email address technically, no domain.
568 if plist:
569 returnlist = [(' '.join(self.commentlist), plist[0])]
570
571 elif self.field[self.pos] in '.@':
572 # email address is just an addrspec
573 # this isn't very efficient since we start over
574 self.pos = oldpos
575 self.commentlist = oldcl
576 addrspec = self.getaddrspec()
577 returnlist = [(' '.join(self.commentlist), addrspec)]
578
579 elif self.field[self.pos] == ':':
580 # address is a group
581 returnlist = []
582
583 fieldlen = len(self.field)
584 self.pos = self.pos + 1
585 while self.pos < len(self.field):
586 self.gotonext()
587 if self.pos < fieldlen and self.field[self.pos] == ';':
588 self.pos = self.pos + 1
589 break
590 returnlist = returnlist + self.getaddress()
591
592 elif self.field[self.pos] == '<':
593 # Address is a phrase then a route addr
594 routeaddr = self.getrouteaddr()
595
596 if self.commentlist:
597 returnlist = [(' '.join(plist) + ' (' + \
598 ' '.join(self.commentlist) + ')', routeaddr)]
599 else: returnlist = [(' '.join(plist), routeaddr)]
600
601 else:
602 if plist:
603 returnlist = [(' '.join(self.commentlist), plist[0])]
604 elif self.field[self.pos] in self.specials:
605 self.pos = self.pos + 1
606
607 self.gotonext()
608 if self.pos < len(self.field) and self.field[self.pos] == ',':
609 self.pos = self.pos + 1
610 return returnlist
611
612 def getrouteaddr(self):
613 """Parse a route address (Return-path value).
614
615 This method just skips all the route stuff and returns the addrspec.
616 """
617 if self.field[self.pos] != '<':
618 return
619
620 expectroute = 0
621 self.pos = self.pos + 1
622 self.gotonext()
623 adlist = ""
624 while self.pos < len(self.field):
625 if expectroute:
626 self.getdomain()
627 expectroute = 0
628 elif self.field[self.pos] == '>':
629 self.pos = self.pos + 1
630 break
631 elif self.field[self.pos] == '@':
632 self.pos = self.pos + 1
633 expectroute = 1
634 elif self.field[self.pos] == ':':
635 self.pos = self.pos + 1
636 else:
637 adlist = self.getaddrspec()
638 self.pos = self.pos + 1
639 break
640 self.gotonext()
641
642 return adlist
643
644 def getaddrspec(self):
645 """Parse an RFC 2822 addr-spec."""
646 aslist = []
647
648 self.gotonext()
649 while self.pos < len(self.field):
650 if self.field[self.pos] == '.':
651 aslist.append('.')
652 self.pos = self.pos + 1
653 elif self.field[self.pos] == '"':
654 aslist.append('"%s"' % self.getquote())
655 elif self.field[self.pos] in self.atomends:
656 break
657 else: aslist.append(self.getatom())
658 self.gotonext()
659
660 if self.pos >= len(self.field) or self.field[self.pos] != '@':
661 return ''.join(aslist)
662
663 aslist.append('@')
664 self.pos = self.pos + 1
665 self.gotonext()
666 return ''.join(aslist) + self.getdomain()
667
668 def getdomain(self):
669 """Get the complete domain name from an address."""
670 sdlist = []
671 while self.pos < len(self.field):
672 if self.field[self.pos] in self.LWS:
673 self.pos = self.pos + 1
674 elif self.field[self.pos] == '(':
675 self.commentlist.append(self.getcomment())
676 elif self.field[self.pos] == '[':
677 sdlist.append(self.getdomainliteral())
678 elif self.field[self.pos] == '.':
679 self.pos = self.pos + 1
680 sdlist.append('.')
681 elif self.field[self.pos] in self.atomends:
682 break
683 else: sdlist.append(self.getatom())
684 return ''.join(sdlist)
685
686 def getdelimited(self, beginchar, endchars, allowcomments = 1):
687 """Parse a header fragment delimited by special characters.
688
689 `beginchar' is the start character for the fragment. If self is not
690 looking at an instance of `beginchar' then getdelimited returns the
691 empty string.
692
693 `endchars' is a sequence of allowable end-delimiting characters.
694 Parsing stops when one of these is encountered.
695
696 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
697 within the parsed fragment.
698 """
699 if self.field[self.pos] != beginchar:
700 return ''
701
702 slist = ['']
703 quote = 0
704 self.pos = self.pos + 1
705 while self.pos < len(self.field):
706 if quote == 1:
707 slist.append(self.field[self.pos])
708 quote = 0
709 elif self.field[self.pos] in endchars:
710 self.pos = self.pos + 1
711 break
712 elif allowcomments and self.field[self.pos] == '(':
713 slist.append(self.getcomment())
714 elif self.field[self.pos] == '\\':
715 quote = 1
716 else:
717 slist.append(self.field[self.pos])
718 self.pos = self.pos + 1
719
720 return ''.join(slist)
721
722 def getquote(self):
723 """Get a quote-delimited fragment from self's field."""
724 return self.getdelimited('"', '"\r', 0)
725
726 def getcomment(self):
727 """Get a parenthesis-delimited fragment from self's field."""
728 return self.getdelimited('(', ')\r', 1)
729
730 def getdomainliteral(self):
731 """Parse an RFC 2822 domain-literal."""
732 return '[%s]' % self.getdelimited('[', ']\r', 0)
733
734 def getatom(self, atomends=None):
735 """Parse an RFC 2822 atom.
736
737 Optional atomends specifies a different set of end token delimiters
738 (the default is to use self.atomends). This is used e.g. in
739 getphraselist() since phrase endings must not include the `.' (which
740 is legal in phrases)."""
741 atomlist = ['']
742 if atomends is None:
743 atomends = self.atomends
744
745 while self.pos < len(self.field):
746 if self.field[self.pos] in atomends:
747 break
748 else: atomlist.append(self.field[self.pos])
749 self.pos = self.pos + 1
750
751 return ''.join(atomlist)
752
753 def getphraselist(self):
754 """Parse a sequence of RFC 2822 phrases.
755
756 A phrase is a sequence of words, which are in turn either RFC 2822
757 atoms or quoted-strings. Phrases are canonicalized by squeezing all
758 runs of continuous whitespace into one space.
759 """
760 plist = []
761
762 while self.pos < len(self.field):
763 if self.field[self.pos] in self.LWS:
764 self.pos = self.pos + 1
765 elif self.field[self.pos] == '"':
766 plist.append(self.getquote())
767 elif self.field[self.pos] == '(':
768 self.commentlist.append(self.getcomment())
769 elif self.field[self.pos] in self.phraseends:
770 break
771 else:
772 plist.append(self.getatom(self.phraseends))
773
774 return plist
775
776class AddressList(AddrlistClass):
777 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
778 def __init__(self, field):
779 AddrlistClass.__init__(self, field)
780 if field:
781 self.addresslist = self.getaddrlist()
782 else:
783 self.addresslist = []
784
785 def __len__(self):
786 return len(self.addresslist)
787
788 def __str__(self):
789 return ", ".join(map(dump_address_pair, self.addresslist))
790
791 def __add__(self, other):
792 # Set union
793 newaddr = AddressList(None)
794 newaddr.addresslist = self.addresslist[:]
795 for x in other.addresslist:
796 if not x in self.addresslist:
797 newaddr.addresslist.append(x)
798 return newaddr
799
800 def __iadd__(self, other):
801 # Set union, in-place
802 for x in other.addresslist:
803 if not x in self.addresslist:
804 self.addresslist.append(x)
805 return self
806
807 def __sub__(self, other):
808 # Set difference
809 newaddr = AddressList(None)
810 for x in self.addresslist:
811 if not x in other.addresslist:
812 newaddr.addresslist.append(x)
813 return newaddr
814
815 def __isub__(self, other):
816 # Set difference, in-place
817 for x in other.addresslist:
818 if x in self.addresslist:
819 self.addresslist.remove(x)
820 return self
821
822 def __getitem__(self, index):
823 # Make indexing, slices, and 'in' work
824 return self.addresslist[index]
825
826def dump_address_pair(pair):
827 """Dump a (name, address) pair in a canonicalized form."""
828 if pair[0]:
829 return '"' + pair[0] + '" <' + pair[1] + '>'
830 else:
831 return pair[1]
832
833# Parse a date field
834
835_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
836 'aug', 'sep', 'oct', 'nov', 'dec',
837 'january', 'february', 'march', 'april', 'may', 'june', 'july',
838 'august', 'september', 'october', 'november', 'december']
839_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
840
841# The timezone table does not include the military time zones defined
842# in RFC822, other than Z. According to RFC1123, the description in
843# RFC822 gets the signs wrong, so we can't rely on any such time
844# zones. RFC1123 recommends that numeric timezone indicators be used
845# instead of timezone names.
846
847_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
848 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
849 'EST': -500, 'EDT': -400, # Eastern
850 'CST': -600, 'CDT': -500, # Central
851 'MST': -700, 'MDT': -600, # Mountain
852 'PST': -800, 'PDT': -700 # Pacific
853 }
854
855
856def parsedate_tz(data):
857 """Convert a date string to a time tuple.
858
859 Accounts for military timezones.
860 """
861 if not data:
862 return None
863 data = data.split()
864 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
865 # There's a dayname here. Skip it
866 del data[0]
867 if len(data) == 3: # RFC 850 date, deprecated
868 stuff = data[0].split('-')
869 if len(stuff) == 3:
870 data = stuff + data[1:]
871 if len(data) == 4:
872 s = data[3]
873 i = s.find('+')
874 if i > 0:
875 data[3:] = [s[:i], s[i+1:]]
876 else:
877 data.append('') # Dummy tz
878 if len(data) < 5:
879 return None
880 data = data[:5]
881 [dd, mm, yy, tm, tz] = data
882 mm = mm.lower()
883 if not mm in _monthnames:
884 dd, mm = mm, dd.lower()
885 if not mm in _monthnames:
886 return None
887 mm = _monthnames.index(mm)+1
888 if mm > 12: mm = mm - 12
889 if dd[-1] == ',':
890 dd = dd[:-1]
891 i = yy.find(':')
892 if i > 0:
893 yy, tm = tm, yy
894 if yy[-1] == ',':
895 yy = yy[:-1]
896 if not yy[0].isdigit():
897 yy, tz = tz, yy
898 if tm[-1] == ',':
899 tm = tm[:-1]
900 tm = tm.split(':')
901 if len(tm) == 2:
902 [thh, tmm] = tm
903 tss = '0'
904 elif len(tm) == 3:
905 [thh, tmm, tss] = tm
906 else:
907 return None
908 try:
909 yy = int(yy)
910 dd = int(dd)
911 thh = int(thh)
912 tmm = int(tmm)
913 tss = int(tss)
914 except ValueError:
915 return None
916 tzoffset = None
917 tz = tz.upper()
918 if tz in _timezones:
919 tzoffset = _timezones[tz]
920 else:
921 try:
922 tzoffset = int(tz)
923 except ValueError:
924 pass
925 # Convert a timezone offset into seconds ; -0500 -> -18000
926 if tzoffset:
927 if tzoffset < 0:
928 tzsign = -1
929 tzoffset = -tzoffset
930 else:
931 tzsign = 1
932 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
933 tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
934 return tuple
935
936
937def parsedate(data):
938 """Convert a time string to a time tuple."""
939 t = parsedate_tz(data)
940 if type(t) == type( () ):
941 return t[:9]
942 else: return t
943
944
945def mktime_tz(data):
946 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
947 if data[9] is None:
948 # No zone info, so localtime is better assumption than GMT
949 return time.mktime(data[:8] + (-1,))
950 else:
951 t = time.mktime(data[:8] + (0,))
952 return t - data[9] - time.timezone
953
954def formatdate(timeval=None):
955 """Returns time format preferred for Internet standards.
956
957 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
958
959 According to RFC 1123, day and month names must always be in
960 English. If not for that, this code could use strftime(). It
961 can't because strftime() honors the locale and could generated
962 non-English names.
963 """
964 if timeval is None:
965 timeval = time.time()
966 timeval = time.gmtime(timeval)
967 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
968 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]],
969 timeval[2],
970 ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
971 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1],
972 timeval[0], timeval[3], timeval[4], timeval[5])
973
974
975# When used as script, run a small test program.
976# The first command line argument must be a filename containing one
977# message in RFC-822 format.
978
979if __name__ == '__main__':
980 import sys, os
981 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
982 if sys.argv[1:]: file = sys.argv[1]
983 f = open(file, 'r')
984 m = Message(f)
985 print 'From:', m.getaddr('from')
986 print 'To:', m.getaddrlist('to')
987 print 'Subject:', m.getheader('subject')
988 print 'Date:', m.getheader('date')
989 date = m.getdate_tz('date')
990 tz = date[-1]
991 date = time.localtime(mktime_tz(date))
992 if date:
993 print 'ParsedDate:', time.asctime(date),
994 hhmmss = tz
995 hhmm, ss = divmod(hhmmss, 60)
996 hh, mm = divmod(hhmm, 60)
997 print "%+03d%02d" % (hh, mm),
998 if ss: print ".%02d" % ss,
999 print
1000 else:
1001 print 'ParsedDate:', None
1002 m.rewindbody()
1003 n = 0
1004 while f.readline():
1005 n = n + 1
1006 print 'Lines:', n
1007 print '-'*70
1008 print 'len =', len(m)
1009 if 'Date' in m: print 'Date =', m['Date']
1010 if 'X-Nonsense' in m: pass
1011 print 'keys =', m.keys()
1012 print 'values =', m.values()
1013 print 'items =', m.items()