git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame - sam-t2/devtools/amd64/lib/python2.4/rfc822.py

Commit	Line	Data
920dae64 AT	1	"""RFC 2822 message manipulation.
	2
	3	Note: This is only a very rough sketch of a full RFC-822 parser; in particular
	4	the tokenizing of addresses does not adhere to all the quoting rules.
	5
	6	Note: RFC 2822 is a long awaited update to RFC 822. This module should
	7	conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
	8	effort at RFC 2822 updates have been made, but a thorough audit has not been
	9	performed. Consider any RFC 2822 non-conformance to be a bug.
	10
	11	RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
	12	RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
	13
	14	Directions for use:
	15
	16	To create a Message object: first open a file, e.g.:
	17
	18	fp = open(file, 'r')
	19
	20	You can use any other legal way of getting an open file object, e.g. use
	21	sys.stdin or call os.popen(). Then pass the open file object to the Message()
	22	constructor:
	23
	24	m = Message(fp)
	25
	26	This class can work with any input object that supports a readline method. If
	27	the input object has seek and tell capability, the rewindbody method will
	28	work; also illegal lines will be pushed back onto the input stream. If the
	29	input object lacks seek but has an `unread' method that can push back a line
	30	of input, Message will use that to push back illegal lines. Thus this class
	31	can be used to parse messages coming from a buffered stream.
	32
	33	The optional `seekable' argument is provided as a workaround for certain stdio
	34	libraries in which tell() discards buffered data before discovering that the
	35	lseek() system call doesn't work. For maximum portability, you should set the
	36	seekable argument to zero to prevent that initial \code{tell} when passing in
	37	an unseekable object such as a a file object created from a socket object. If
	38	it is 1 on entry -- which it is by default -- the tell() method of the open
	39	file object is called once; if this raises an exception, seekable is reset to
	40	0. For other nonzero values of seekable, this test is not made.
	41
	42	To get the text of a particular header there are several methods:
	43
	44	str = m.getheader(name)
	45	str = m.getrawheader(name)
	46
	47	where name is the name of the header, e.g. 'Subject'. The difference is that
	48	getheader() strips the leading and trailing whitespace, while getrawheader()
	49	doesn't. Both functions retain embedded whitespace (including newlines)
	50	exactly as they are specified in the header, and leave the case of the text
	51	unchanged.
	52
	53	For addresses and address lists there are functions
	54
	55	realname, mailaddress = m.getaddr(name)
	56	list = m.getaddrlist(name)
	57
	58	where the latter returns a list of (realname, mailaddr) tuples.
	59
	60	There is also a method
	61
	62	time = m.getdate(name)
	63
	64	which parses a Date-like field and returns a time-compatible tuple,
65	i.e. a tuple such as returned by time.localtime() or accepted by
66	time.mktime().
67
68	See the class definition for lower level access methods.
69
70	There are also some utility functions here.
71	"""
72	# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
73
74	import time
75
76	__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
77
78	_blanklines = ('\r\n', '\n') # Optimization for islast()
79
80
81	class Message:
82	"""Represents a single RFC 2822-compliant message."""
83
84	def __init__(self, fp, seekable = 1):
85	"""Initialize the class instance and read the headers."""
86	if seekable == 1:
87	# Exercise tell() to make sure it works
88	# (and then assume seek() works, too)
89	try:
90	fp.tell()
91	except (AttributeError, IOError):
92	seekable = 0
93	else:
94	seekable = 1
95	self.fp = fp
96	self.seekable = seekable
97	self.startofheaders = None
98	self.startofbody = None
99	#
100	if self.seekable:
101	try:
102	self.startofheaders = self.fp.tell()
103	except IOError:
104	self.seekable = 0
105	#
106	self.readheaders()
107	#
108	if self.seekable:
109	try:
110	self.startofbody = self.fp.tell()
111	except IOError:
112	self.seekable = 0
113
114	def rewindbody(self):
115	"""Rewind the file to the start of the body (if seekable)."""
116	if not self.seekable:
117	raise IOError, "unseekable file"
118	self.fp.seek(self.startofbody)
119
120	def readheaders(self):
121	"""Read header lines.
122
123	Read header lines up to the entirely blank line that terminates them.
124	The (normally blank) line that ends the headers is skipped, but not
125	included in the returned list. If a non-header line ends the headers,
126	(which is an error), an attempt is made to backspace over it; it is
127	never included in the returned list.
128
129	The variable self.status is set to the empty string if all went well,
130	otherwise it is an error message. The variable self.headers is a
131	completely uninterpreted list of lines contained in the header (so
132	printing them will reproduce the header exactly as it appears in the
133	file).
134	"""
135	self.dict = {}
136	self.unixfrom = ''
137	self.headers = list = []
138	self.status = ''
139	headerseen = ""
140	firstline = 1
141	startofline = unread = tell = None
142	if hasattr(self.fp, 'unread'):
143	unread = self.fp.unread
144	elif self.seekable:
145	tell = self.fp.tell
146	while 1:
147	if tell:
148	try:
149	startofline = tell()
150	except IOError:
151	startofline = tell = None
152	self.seekable = 0
153	line = self.fp.readline()
154	if not line:
155	self.status = 'EOF in headers'
156	break
157	# Skip unix From name time lines
158	if firstline and line.startswith('From '):
159	self.unixfrom = self.unixfrom + line
160	continue
161	firstline = 0
162	if headerseen and line[0] in ' \t':
163	# It's a continuation line.
164	list.append(line)
165	x = (self.dict[headerseen] + "\n " + line.strip())
166	self.dict[headerseen] = x.strip()
167	continue
168	elif self.iscomment(line):
169	# It's a comment. Ignore it.
170	continue
171	elif self.islast(line):
172	# Note! No pushback here! The delimiter line gets eaten.
173	break
174	headerseen = self.isheader(line)
175	if headerseen:
176	# It's a legal header line, save it.
177	list.append(line)
178	self.dict[headerseen] = line[len(headerseen)+1:].strip()
179	continue
180	else:
181	# It's not a header line; throw it back and stop here.
182	if not self.dict:
183	self.status = 'No headers'
184	else:
185	self.status = 'Non-header line where header expected'
186	# Try to undo the read.
187	if unread:
188	unread(line)
189	elif tell:
190	self.fp.seek(startofline)
191	else:
192	self.status = self.status + '; bad seek'
193	break
194
195	def isheader(self, line):
196	"""Determine whether a given line is a legal header.
197
198	This method should return the header name, suitably canonicalized.
199	You may override this method in order to use Message parsing on tagged
200	data in RFC 2822-like formats with special header formats.
201	"""
202	i = line.find(':')
203	if i > 0:
204	return line[:i].lower()
205	else:
206	return None
207
208	def islast(self, line):
209	"""Determine whether a line is a legal end of RFC 2822 headers.
210
211	You may override this method if your application wants to bend the
212	rules, e.g. to strip trailing whitespace, or to recognize MH template
213	separators ('--------'). For convenience (e.g. for code reading from
214	sockets) a line consisting of \r\n also matches.
215	"""
216	return line in _blanklines
217
218	def iscomment(self, line):
219	"""Determine whether a line should be skipped entirely.
220
221	You may override this method in order to use Message parsing on tagged
222	data in RFC 2822-like formats that support embedded comments or
223	free-text data.
224	"""
225	return False
226
227	def getallmatchingheaders(self, name):
228	"""Find all header lines matching a given header name.
229
230	Look through the list of headers and find all lines matching a given
231	header name (and their continuation lines). A list of the lines is
232	returned, without interpretation. If the header does not occur, an
233	empty list is returned. If the header occurs multiple times, all
234	occurrences are returned. Case is not important in the header name.
235	"""
236	name = name.lower() + ':'
237	n = len(name)
238	list = []
239	hit = 0
240	for line in self.headers:
241	if line[:n].lower() == name:
242	hit = 1
243	elif not line[:1].isspace():
244	hit = 0
245	if hit:
246	list.append(line)
247	return list
248
249	def getfirstmatchingheader(self, name):
250	"""Get the first header line matching name.
251
252	This is similar to getallmatchingheaders, but it returns only the
253	first matching header (and its continuation lines).
254	"""
255	name = name.lower() + ':'
256	n = len(name)
257	list = []
258	hit = 0
259	for line in self.headers:
260	if hit:
261	if not line[:1].isspace():
262	break
263	elif line[:n].lower() == name:
264	hit = 1
265	if hit:
266	list.append(line)
267	return list
268
269	def getrawheader(self, name):
270	"""A higher-level interface to getfirstmatchingheader().
271
272	Return a string containing the literal text of the header but with the
273	keyword stripped. All leading, trailing and embedded whitespace is
274	kept in the string, however. Return None if the header does not
275	occur.
276	"""
277
278	list = self.getfirstmatchingheader(name)
279	if not list:
280	return None
281	list[0] = list[0][len(name) + 1:]
282	return ''.join(list)
283
284	def getheader(self, name, default=None):
285	"""Get the header value for a name.
286
287	This is the normal interface: it returns a stripped version of the
288	header value for a given header name, or None if it doesn't exist.
289	This uses the dictionary version which finds the last such header.
290	"""
291	try:
292	return self.dict[name.lower()]
293	except KeyError:
294	return default
295	get = getheader
296
297	def getheaders(self, name):
298	"""Get all values for a header.
299
300	This returns a list of values for headers given more than once; each
301	value in the result list is stripped in the same way as the result of
302	getheader(). If the header is not given, return an empty list.
303	"""
304	result = []
305	current = ''
306	have_header = 0
307	for s in self.getallmatchingheaders(name):
308	if s[0].isspace():
309	if current:
310	current = "%s\n %s" % (current, s.strip())
311	else:
312	current = s.strip()
313	else:
314	if have_header:
315	result.append(current)
316	current = s[s.find(":") + 1:].strip()
317	have_header = 1
318	if have_header:
319	result.append(current)
320	return result
321
322	def getaddr(self, name):
323	"""Get a single address from a header, as a tuple.
324
325	An example return value:
326	('Guido van Rossum', 'guido@cwi.nl')
327	"""
328	# New, by Ben Escoto
329	alist = self.getaddrlist(name)
330	if alist:
331	return alist[0]
332	else:
333	return (None, None)
334
335	def getaddrlist(self, name):
336	"""Get a list of addresses from a header.
337
338	Retrieves a list of addresses from a header, where each address is a
339	tuple as returned by getaddr(). Scans all named headers, so it works
340	properly with multiple To: or Cc: headers for example.
341	"""
342	raw = []
343	for h in self.getallmatchingheaders(name):
344	if h[0] in ' \t':
345	raw.append(h)
346	else:
347	if raw:
348	raw.append(', ')
349	i = h.find(':')
350	if i > 0:
351	addr = h[i+1:]
352	raw.append(addr)
353	alladdrs = ''.join(raw)
354	a = AddressList(alladdrs)
355	return a.addresslist
356
357	def getdate(self, name):
358	"""Retrieve a date field from a header.
359
360	Retrieves a date field from the named header, returning a tuple
361	compatible with time.mktime().
362	"""
363	try:
364	data = self[name]
365	except KeyError:
366	return None
367	return parsedate(data)
368
369	def getdate_tz(self, name):
370	"""Retrieve a date field from a header as a 10-tuple.
371
372	The first 9 elements make up a tuple compatible with time.mktime(),
373	and the 10th is the offset of the poster's time zone from GMT/UTC.
374	"""
375	try:
376	data = self[name]
377	except KeyError:
378	return None
379	return parsedate_tz(data)
380
381
382	# Access as a dictionary (only finds last header of each type):
383
384	def __len__(self):
385	"""Get the number of headers in a message."""
386	return len(self.dict)
387
388	def __getitem__(self, name):
389	"""Get a specific header, as from a dictionary."""
390	return self.dict[name.lower()]
391
392	def __setitem__(self, name, value):
393	"""Set the value of a header.
394
395	Note: This is not a perfect inversion of __getitem__, because any
396	changed headers get stuck at the end of the raw-headers list rather
397	than where the altered header was.
398	"""
399	del self[name] # Won't fail if it doesn't exist
400	self.dict[name.lower()] = value
401	text = name + ": " + value
402	lines = text.split("\n")
403	for line in lines:
404	self.headers.append(line + "\n")
405
406	def __delitem__(self, name):
407	"""Delete all occurrences of a specific header, if it is present."""
408	name = name.lower()
409	if not name in self.dict:
410	return
411	del self.dict[name]
412	name = name + ':'
413	n = len(name)
414	list = []
415	hit = 0
416	for i in range(len(self.headers)):
417	line = self.headers[i]
418	if line[:n].lower() == name:
419	hit = 1
420	elif not line[:1].isspace():
421	hit = 0
422	if hit:
423	list.append(i)
424	for i in reversed(list):
425	del self.headers[i]
426
427	def setdefault(self, name, default=""):
428	lowername = name.lower()
429	if lowername in self.dict:
430	return self.dict[lowername]
431	else:
432	text = name + ": " + default
433	lines = text.split("\n")
434	for line in lines:
435	self.headers.append(line + "\n")
436	self.dict[lowername] = default
437	return default
438
439	def has_key(self, name):
440	"""Determine whether a message contains the named header."""
441	return name.lower() in self.dict
442
443	def __contains__(self, name):
444	"""Determine whether a message contains the named header."""
445	return name.lower() in self.dict
446
447	def __iter__(self):
448	return iter(self.dict)
449
450	def keys(self):
451	"""Get all of a message's header field names."""
452	return self.dict.keys()
453
454	def values(self):
455	"""Get all of a message's header field values."""
456	return self.dict.values()
457
458	def items(self):
459	"""Get all of a message's headers.
460
461	Returns a list of name, value tuples.
462	"""
463	return self.dict.items()
464
465	def __str__(self):
466	return ''.join(self.headers)
467
468
469	# Utility functions
470	# -----------------
471
472	# XXX Should fix unquote() and quote() to be really conformant.
473	# XXX The inverses of the parse functions may also be useful.
474
475
476	def unquote(str):
477	"""Remove quotes from a string."""
478	if len(str) > 1:
479	if str.startswith('"') and str.endswith('"'):
480	return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
481	if str.startswith('<') and str.endswith('>'):
482	return str[1:-1]
483	return str
484
485
486	def quote(str):
487	"""Add quotes around a string."""
488	return str.replace('\\', '\\\\').replace('"', '\\"')
489
490
491	def parseaddr(address):
492	"""Parse an address into a (realname, mailaddr) tuple."""
493	a = AddressList(address)
494	list = a.addresslist
495	if not list:
496	return (None, None)
497	else:
498	return list[0]
499
500
501	class AddrlistClass:
502	"""Address parser class by Ben Escoto.
503
504	To understand what this class does, it helps to have a copy of
505	RFC 2822 in front of you.
506
507	http://www.faqs.org/rfcs/rfc2822.html
508
509	Note: this class interface is deprecated and may be removed in the future.
510	Use rfc822.AddressList instead.
511	"""
512
513	def __init__(self, field):
514	"""Initialize a new instance.
515
516	`field' is an unparsed address header field, containing one or more
517	addresses.
518	"""
519	self.specials = '()<>@,:;.\"[]'
520	self.pos = 0
521	self.LWS = ' \t'
522	self.CR = '\r\n'
523	self.atomends = self.specials + self.LWS + self.CR
524	# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
525	# is obsolete syntax. RFC 2822 requires that we recognize obsolete
526	# syntax, so allow dots in phrases.
527	self.phraseends = self.atomends.replace('.', '')
528	self.field = field
529	self.commentlist = []
530
531	def gotonext(self):
532	"""Parse up to the start of the next address."""
533	while self.pos < len(self.field):
534	if self.field[self.pos] in self.LWS + '\n\r':
535	self.pos = self.pos + 1
536	elif self.field[self.pos] == '(':
537	self.commentlist.append(self.getcomment())
538	else: break
539
540	def getaddrlist(self):
541	"""Parse all addresses.
542
543	Returns a list containing all of the addresses.
544	"""
545	result = []
546	while 1:
547	ad = self.getaddress()
548	if ad:
549	result += ad
550	else:
551	break
552	return result
553
554	def getaddress(self):
555	"""Parse the next address."""
556	self.commentlist = []
557	self.gotonext()
558
559	oldpos = self.pos
560	oldcl = self.commentlist
561	plist = self.getphraselist()
562
563	self.gotonext()
564	returnlist = []
565
566	if self.pos >= len(self.field):
567	# Bad email address technically, no domain.
568	if plist:
569	returnlist = [(' '.join(self.commentlist), plist[0])]
570
571	elif self.field[self.pos] in '.@':
572	# email address is just an addrspec
573	# this isn't very efficient since we start over
574	self.pos = oldpos
575	self.commentlist = oldcl
576	addrspec = self.getaddrspec()
577	returnlist = [(' '.join(self.commentlist), addrspec)]
578
579	elif self.field[self.pos] == ':':
580	# address is a group
581	returnlist = []
582
583	fieldlen = len(self.field)
584	self.pos = self.pos + 1
585	while self.pos < len(self.field):
586	self.gotonext()
587	if self.pos < fieldlen and self.field[self.pos] == ';':
588	self.pos = self.pos + 1
589	break
590	returnlist = returnlist + self.getaddress()
591
592	elif self.field[self.pos] == '<':
593	# Address is a phrase then a route addr
594	routeaddr = self.getrouteaddr()
595
596	if self.commentlist:
597	returnlist = [(' '.join(plist) + ' (' + \
598	' '.join(self.commentlist) + ')', routeaddr)]
599	else: returnlist = [(' '.join(plist), routeaddr)]
600
601	else:
602	if plist:
603	returnlist = [(' '.join(self.commentlist), plist[0])]
604	elif self.field[self.pos] in self.specials:
605	self.pos = self.pos + 1
606
607	self.gotonext()
608	if self.pos < len(self.field) and self.field[self.pos] == ',':
609	self.pos = self.pos + 1
610	return returnlist
611
612	def getrouteaddr(self):
613	"""Parse a route address (Return-path value).
614
615	This method just skips all the route stuff and returns the addrspec.
616	"""
617	if self.field[self.pos] != '<':
618	return
619
620	expectroute = 0
621	self.pos = self.pos + 1
622	self.gotonext()
623	adlist = ""
624	while self.pos < len(self.field):
625	if expectroute:
626	self.getdomain()
627	expectroute = 0
628	elif self.field[self.pos] == '>':
629	self.pos = self.pos + 1
630	break
631	elif self.field[self.pos] == '@':
632	self.pos = self.pos + 1
633	expectroute = 1
634	elif self.field[self.pos] == ':':
635	self.pos = self.pos + 1
636	else:
637	adlist = self.getaddrspec()
638	self.pos = self.pos + 1
639	break
640	self.gotonext()
641
642	return adlist
643
644	def getaddrspec(self):
645	"""Parse an RFC 2822 addr-spec."""
646	aslist = []
647
648	self.gotonext()
649	while self.pos < len(self.field):
650	if self.field[self.pos] == '.':
651	aslist.append('.')
652	self.pos = self.pos + 1
653	elif self.field[self.pos] == '"':
654	aslist.append('"%s"' % self.getquote())
655	elif self.field[self.pos] in self.atomends:
656	break
657	else: aslist.append(self.getatom())
658	self.gotonext()
659
660	if self.pos >= len(self.field) or self.field[self.pos] != '@':
661	return ''.join(aslist)
662
663	aslist.append('@')
664	self.pos = self.pos + 1
665	self.gotonext()
666	return ''.join(aslist) + self.getdomain()
667
668	def getdomain(self):
669	"""Get the complete domain name from an address."""
670	sdlist = []
671	while self.pos < len(self.field):
672	if self.field[self.pos] in self.LWS:
673	self.pos = self.pos + 1
674	elif self.field[self.pos] == '(':
675	self.commentlist.append(self.getcomment())
676	elif self.field[self.pos] == '[':
677	sdlist.append(self.getdomainliteral())
678	elif self.field[self.pos] == '.':
679	self.pos = self.pos + 1
680	sdlist.append('.')
681	elif self.field[self.pos] in self.atomends:
682	break
683	else: sdlist.append(self.getatom())
684	return ''.join(sdlist)
685
686	def getdelimited(self, beginchar, endchars, allowcomments = 1):
687	"""Parse a header fragment delimited by special characters.
688
689	`beginchar' is the start character for the fragment. If self is not
690	looking at an instance of `beginchar' then getdelimited returns the
691	empty string.
692
693	`endchars' is a sequence of allowable end-delimiting characters.
694	Parsing stops when one of these is encountered.
695
696	If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
697	within the parsed fragment.
698	"""
699	if self.field[self.pos] != beginchar:
700	return ''
701
702	slist = ['']
703	quote = 0
704	self.pos = self.pos + 1
705	while self.pos < len(self.field):
706	if quote == 1:
707	slist.append(self.field[self.pos])
708	quote = 0
709	elif self.field[self.pos] in endchars:
710	self.pos = self.pos + 1
711	break
712	elif allowcomments and self.field[self.pos] == '(':
713	slist.append(self.getcomment())
714	elif self.field[self.pos] == '\\':
715	quote = 1
716	else:
717	slist.append(self.field[self.pos])
718	self.pos = self.pos + 1
719
720	return ''.join(slist)
721
722	def getquote(self):
723	"""Get a quote-delimited fragment from self's field."""
724	return self.getdelimited('"', '"\r', 0)
725
726	def getcomment(self):
727	"""Get a parenthesis-delimited fragment from self's field."""
728	return self.getdelimited('(', ')\r', 1)
729
730	def getdomainliteral(self):
731	"""Parse an RFC 2822 domain-literal."""
732	return '[%s]' % self.getdelimited('[', ']\r', 0)
733
734	def getatom(self, atomends=None):
735	"""Parse an RFC 2822 atom.
736
737	Optional atomends specifies a different set of end token delimiters
738	(the default is to use self.atomends). This is used e.g. in
739	getphraselist() since phrase endings must not include the `.' (which
740	is legal in phrases)."""
741	atomlist = ['']
742	if atomends is None:
743	atomends = self.atomends
744
745	while self.pos < len(self.field):
746	if self.field[self.pos] in atomends:
747	break
748	else: atomlist.append(self.field[self.pos])
749	self.pos = self.pos + 1
750
751	return ''.join(atomlist)
752
753	def getphraselist(self):
754	"""Parse a sequence of RFC 2822 phrases.
755
756	A phrase is a sequence of words, which are in turn either RFC 2822
757	atoms or quoted-strings. Phrases are canonicalized by squeezing all
758	runs of continuous whitespace into one space.
759	"""
760	plist = []
761
762	while self.pos < len(self.field):
763	if self.field[self.pos] in self.LWS:
764	self.pos = self.pos + 1
765	elif self.field[self.pos] == '"':
766	plist.append(self.getquote())
767	elif self.field[self.pos] == '(':
768	self.commentlist.append(self.getcomment())
769	elif self.field[self.pos] in self.phraseends:
770	break
771	else:
772	plist.append(self.getatom(self.phraseends))
773
774	return plist
775
776	class AddressList(AddrlistClass):
777	"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
778	def __init__(self, field):
779	AddrlistClass.__init__(self, field)
780	if field:
781	self.addresslist = self.getaddrlist()
782	else:
783	self.addresslist = []
784
785	def __len__(self):
786	return len(self.addresslist)
787
788	def __str__(self):
789	return ", ".join(map(dump_address_pair, self.addresslist))
790
791	def __add__(self, other):
792	# Set union
793	newaddr = AddressList(None)
794	newaddr.addresslist = self.addresslist[:]
795	for x in other.addresslist:
796	if not x in self.addresslist:
797	newaddr.addresslist.append(x)
798	return newaddr
799
800	def __iadd__(self, other):
801	# Set union, in-place
802	for x in other.addresslist:
803	if not x in self.addresslist:
804	self.addresslist.append(x)
805	return self
806
807	def __sub__(self, other):
808	# Set difference
809	newaddr = AddressList(None)
810	for x in self.addresslist:
811	if not x in other.addresslist:
812	newaddr.addresslist.append(x)
813	return newaddr
814
815	def __isub__(self, other):
816	# Set difference, in-place
817	for x in other.addresslist:
818	if x in self.addresslist:
819	self.addresslist.remove(x)
820	return self
821
822	def __getitem__(self, index):
823	# Make indexing, slices, and 'in' work
824	return self.addresslist[index]
825
826	def dump_address_pair(pair):
827	"""Dump a (name, address) pair in a canonicalized form."""
828	if pair[0]:
829	return '"' + pair[0] + '" <' + pair[1] + '>'
830	else:
831	return pair[1]
832
833	# Parse a date field
834
835	_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
836	'aug', 'sep', 'oct', 'nov', 'dec',
837	'january', 'february', 'march', 'april', 'may', 'june', 'july',
838	'august', 'september', 'october', 'november', 'december']
839	_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
840
841	# The timezone table does not include the military time zones defined
842	# in RFC822, other than Z. According to RFC1123, the description in
843	# RFC822 gets the signs wrong, so we can't rely on any such time
844	# zones. RFC1123 recommends that numeric timezone indicators be used
845	# instead of timezone names.
846
847	_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
848	'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
849	'EST': -500, 'EDT': -400, # Eastern
850	'CST': -600, 'CDT': -500, # Central
851	'MST': -700, 'MDT': -600, # Mountain
852	'PST': -800, 'PDT': -700 # Pacific
853	}
854
855
856	def parsedate_tz(data):
857	"""Convert a date string to a time tuple.
858
859	Accounts for military timezones.
860	"""
861	if not data:
862	return None
863	data = data.split()
864	if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
865	# There's a dayname here. Skip it
866	del data[0]
867	if len(data) == 3: # RFC 850 date, deprecated
868	stuff = data[0].split('-')
869	if len(stuff) == 3:
870	data = stuff + data[1:]
871	if len(data) == 4:
872	s = data[3]
873	i = s.find('+')
874	if i > 0:
875	data[3:] = [s[:i], s[i+1:]]
876	else:
877	data.append('') # Dummy tz
878	if len(data) < 5:
879	return None
880	data = data[:5]
881	[dd, mm, yy, tm, tz] = data
882	mm = mm.lower()
883	if not mm in _monthnames:
884	dd, mm = mm, dd.lower()
885	if not mm in _monthnames:
886	return None
887	mm = _monthnames.index(mm)+1
888	if mm > 12: mm = mm - 12
889	if dd[-1] == ',':
890	dd = dd[:-1]
891	i = yy.find(':')
892	if i > 0:
893	yy, tm = tm, yy
894	if yy[-1] == ',':
895	yy = yy[:-1]
896	if not yy[0].isdigit():
897	yy, tz = tz, yy
898	if tm[-1] == ',':
899	tm = tm[:-1]
900	tm = tm.split(':')
901	if len(tm) == 2:
902	[thh, tmm] = tm
903	tss = '0'
904	elif len(tm) == 3:
905	[thh, tmm, tss] = tm
906	else:
907	return None
908	try:
909	yy = int(yy)
910	dd = int(dd)
911	thh = int(thh)
912	tmm = int(tmm)
913	tss = int(tss)
914	except ValueError:
915	return None
916	tzoffset = None
917	tz = tz.upper()
918	if tz in _timezones:
919	tzoffset = _timezones[tz]
920	else:
921	try:
922	tzoffset = int(tz)
923	except ValueError:
924	pass
925	# Convert a timezone offset into seconds ; -0500 -> -18000
926	if tzoffset:
927	if tzoffset < 0:
928	tzsign = -1
929	tzoffset = -tzoffset
930	else:
931	tzsign = 1
932	tzoffset = tzsign * ( (tzoffset//100)3600 + (tzoffset % 100)60)
933	tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
934	return tuple
935
936
937	def parsedate(data):
938	"""Convert a time string to a time tuple."""
939	t = parsedate_tz(data)
940	if type(t) == type( () ):
941	return t[:9]
942	else: return t
943
944
945	def mktime_tz(data):
946	"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
947	if data[9] is None:
948	# No zone info, so localtime is better assumption than GMT
949	return time.mktime(data[:8] + (-1,))
950	else:
951	t = time.mktime(data[:8] + (0,))
952	return t - data[9] - time.timezone
953
954	def formatdate(timeval=None):
955	"""Returns time format preferred for Internet standards.
956
957	Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
958
959	According to RFC 1123, day and month names must always be in
960	English. If not for that, this code could use strftime(). It
961	can't because strftime() honors the locale and could generated
962	non-English names.
963	"""
964	if timeval is None:
965	timeval = time.time()
966	timeval = time.gmtime(timeval)
967	return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
968	["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]],
969	timeval[2],
970	["Jan", "Feb", "Mar", "Apr", "May", "Jun",
971	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1],
972	timeval[0], timeval[3], timeval[4], timeval[5])
973
974
975	# When used as script, run a small test program.
976	# The first command line argument must be a filename containing one
977	# message in RFC-822 format.
978
979	if __name__ == '__main__':
980	import sys, os
981	file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
982	if sys.argv[1:]: file = sys.argv[1]
983	f = open(file, 'r')
984	m = Message(f)
985	print 'From:', m.getaddr('from')
986	print 'To:', m.getaddrlist('to')
987	print 'Subject:', m.getheader('subject')
988	print 'Date:', m.getheader('date')
989	date = m.getdate_tz('date')
990	tz = date[-1]
991	date = time.localtime(mktime_tz(date))
992	if date:
993	print 'ParsedDate:', time.asctime(date),
994	hhmmss = tz
995	hhmm, ss = divmod(hhmmss, 60)
996	hh, mm = divmod(hhmm, 60)
997	print "%+03d%02d" % (hh, mm),
998	if ss: print ".%02d" % ss,
999	print
1000	else:
1001	print 'ParsedDate:', None
1002	m.rewindbody()
1003	n = 0
1004	while f.readline():
1005	n = n + 1
1006	print 'Lines:', n
1007	print '-'*70
1008	print 'len =', len(m)
1009	if 'Date' in m: print 'Date =', m['Date']
1010	if 'X-Nonsense' in m: pass
1011	print 'keys =', m.keys()
1012	print 'values =', m.values()
1013	print 'items =', m.items()