[OpenSPARC-T2-SAM] / sam-t2 / devtools / v9 / lib / python2.4 / email / Generator.py

# Copyright (C) 2001-2004 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org

"""Classes to generate plain text from a message object tree."""

import re
import sys
import time
import random
import warnings
from cStringIO import StringIO

from email.Header import Header

UNDERSCORE = '_'
NL = '\n'

fcre = re.compile(r'^From ', re.MULTILINE)

def _is8bitstring(s):
    if isinstance(s, str):
        try:
            unicode(s, 'us-ascii')
        except UnicodeError:
            return True
    return False


\f
class Generator:
    """Generates output from a Message object tree.

    This basic generator writes the message to the given file object as plain
    text.
    """
    #
    # Public interface
    #

    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
        """Create the generator for message flattening.

        outfp is the output file-like object for writing the message to.  It
        must have a write() method.

        Optional mangle_from_ is a flag that, when True (the default), escapes
        From_ lines in the body of the message by putting a `>' in front of
        them.

        Optional maxheaderlen specifies the longest length for a non-continued
        header.  When a header line is longer (in characters, with tabs
        expanded to 8 spaces) than maxheaderlen, the header will split as
        defined in the Header class.  Set maxheaderlen to zero to disable
        header wrapping.  The default is 78, as recommended (but not required)
        by RFC 2822.
        """
        self._fp = outfp
        self._mangle_from_ = mangle_from_
        self._maxheaderlen = maxheaderlen

    def write(self, s):
        # Just delegate to the file object
        self._fp.write(s)

    def flatten(self, msg, unixfrom=False):
        """Print the message object tree rooted at msg to the output file
        specified when the Generator instance was created.

        unixfrom is a flag that forces the printing of a Unix From_ delimiter
        before the first object in the message tree.  If the original message
        has no From_ delimiter, a `standard' one is crafted.  By default, this
        is False to inhibit the printing of any From_ delimiter.

        Note that for subobjects, no From_ line is printed.
        """
        if unixfrom:
            ufrom = msg.get_unixfrom()
            if not ufrom:
                ufrom = 'From nobody ' + time.ctime(time.time())
            print >> self._fp, ufrom
        self._write(msg)

    # For backwards compatibility, but this is slower
    def __call__(self, msg, unixfrom=False):
        warnings.warn('__call__() deprecated; use flatten()',
                      DeprecationWarning, 2)
        self.flatten(msg, unixfrom)

    def clone(self, fp):
        """Clone this generator with the exact same options."""
        return self.__class__(fp, self._mangle_from_, self._maxheaderlen)

    #
    # Protected interface - undocumented ;/
    #

    def _write(self, msg):
        # We can't write the headers yet because of the following scenario:
        # say a multipart message includes the boundary string somewhere in
        # its body.  We'd have to calculate the new boundary /before/ we write
        # the headers so that we can write the correct Content-Type:
        # parameter.
        #
        # The way we do this, so as to make the _handle_*() methods simpler,
        # is to cache any subpart writes into a StringIO.  The we write the
        # headers and the StringIO contents.  That way, subpart handlers can
        # Do The Right Thing, and can still modify the Content-Type: header if
        # necessary.
        oldfp = self._fp
        try:
            self._fp = sfp = StringIO()
            self._dispatch(msg)
        finally:
            self._fp = oldfp
        # Write the headers.  First we see if the message object wants to
        # handle that itself.  If not, we'll do it generically.
        meth = getattr(msg, '_write_headers', None)
        if meth is None:
            self._write_headers(msg)
        else:
            meth(self)
        self._fp.write(sfp.getvalue())

    def _dispatch(self, msg):
        # Get the Content-Type: for the message, then try to dispatch to
        # self._handle_<maintype>_<subtype>().  If there's no handler for the
        # full MIME type, then dispatch to self._handle_<maintype>().  If
        # that's missing too, then dispatch to self._writeBody().
        main = msg.get_content_maintype()
        sub = msg.get_content_subtype()
        specific = UNDERSCORE.join((main, sub)).replace('-', '_')
        meth = getattr(self, '_handle_' + specific, None)
        if meth is None:
            generic = main.replace('-', '_')
            meth = getattr(self, '_handle_' + generic, None)
            if meth is None:
                meth = self._writeBody
        meth(msg)

    #
    # Default handlers
    #

    def _write_headers(self, msg):
        for h, v in msg.items():
            print >> self._fp, '%s:' % h,
            if self._maxheaderlen == 0:
                # Explicit no-wrapping
                print >> self._fp, v
            elif isinstance(v, Header):
                # Header instances know what to do
                print >> self._fp, v.encode()
            elif _is8bitstring(v):
                # If we have raw 8bit data in a byte string, we have no idea
                # what the encoding is.  There is no safe way to split this
                # string.  If it's ascii-subset, then we could do a normal
                # ascii split, but if it's multibyte then we could break the
                # string.  There's no way to know so the least harm seems to
                # be to not split the string and risk it being too long.
                print >> self._fp, v
            else:
                # Header's got lots of smarts, so use it.
                print >> self._fp, Header(
                    v, maxlinelen=self._maxheaderlen,
                    header_name=h, continuation_ws='\t').encode()
        # A blank line always separates headers from body
        print >> self._fp

    #
    # Handlers for writing types and subtypes
    #

    def _handle_text(self, msg):
        payload = msg.get_payload()
        if payload is None:
            return
        cset = msg.get_charset()
        if cset is not None:
            payload = cset.body_encode(payload)
        if not isinstance(payload, basestring):
            raise TypeError('string payload expected: %s' % type(payload))
        if self._mangle_from_:
            payload = fcre.sub('>From ', payload)
        self._fp.write(payload)

    # Default body handler
    _writeBody = _handle_text

    def _handle_multipart(self, msg):
        # The trick here is to write out each part separately, merge them all
        # together, and then make sure that the boundary we've chosen isn't
        # present in the payload.
        msgtexts = []
        subparts = msg.get_payload()
        if subparts is None:
            subparts = []
        elif isinstance(subparts, basestring):
            # e.g. a non-strict parse of a message with no starting boundary.
            self._fp.write(subparts)
            return
        elif not isinstance(subparts, list):
            # Scalar payload
            subparts = [subparts]
        for part in subparts:
            s = StringIO()
            g = self.clone(s)
            g.flatten(part, unixfrom=False)
            msgtexts.append(s.getvalue())
        # Now make sure the boundary we've selected doesn't appear in any of
        # the message texts.
        alltext = NL.join(msgtexts)
        # BAW: What about boundaries that are wrapped in double-quotes?
        boundary = msg.get_boundary(failobj=_make_boundary(alltext))
        # If we had to calculate a new boundary because the body text
        # contained that string, set the new boundary.  We don't do it
        # unconditionally because, while set_boundary() preserves order, it
        # doesn't preserve newlines/continuations in headers.  This is no big
        # deal in practice, but turns out to be inconvenient for the unittest
        # suite.
        if msg.get_boundary() <> boundary:
            msg.set_boundary(boundary)
        # If there's a preamble, write it out, with a trailing CRLF
        if msg.preamble is not None:
            print >> self._fp, msg.preamble
        # dash-boundary transport-padding CRLF
        print >> self._fp, '--' + boundary
        # body-part
        if msgtexts:
            self._fp.write(msgtexts.pop(0))
        # *encapsulation
        # --> delimiter transport-padding
        # --> CRLF body-part
        for body_part in msgtexts:
            # delimiter transport-padding CRLF
            print >> self._fp, '\n--' + boundary
            # body-part
            self._fp.write(body_part)
        # close-delimiter transport-padding
        self._fp.write('\n--' + boundary + '--')
        if msg.epilogue is not None:
            print >> self._fp
            self._fp.write(msg.epilogue)

    def _handle_message_delivery_status(self, msg):
        # We can't just write the headers directly to self's file object
        # because this will leave an extra newline between the last header
        # block and the boundary.  Sigh.
        blocks = []
        for part in msg.get_payload():
            s = StringIO()
            g = self.clone(s)
            g.flatten(part, unixfrom=False)
            text = s.getvalue()
            lines = text.split('\n')
            # Strip off the unnecessary trailing empty line
            if lines and lines[-1] == '':
                blocks.append(NL.join(lines[:-1]))
            else:
                blocks.append(text)
        # Now join all the blocks with an empty line.  This has the lovely
        # effect of separating each block with an empty line, but not adding
        # an extra one after the last one.
        self._fp.write(NL.join(blocks))

    def _handle_message(self, msg):
        s = StringIO()
        g = self.clone(s)
        # The payload of a message/rfc822 part should be a multipart sequence
        # of length 1.  The zeroth element of the list should be the Message
        # object for the subpart.  Extract that object, stringify it, and
        # write it out.
        g.flatten(msg.get_payload(0), unixfrom=False)
        self._fp.write(s.getvalue())


\f
_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'

class DecodedGenerator(Generator):
    """Generator a text representation of a message.

    Like the Generator base class, except that non-text parts are substituted
    with a format string representing the part.
    """
    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
        """Like Generator.__init__() except that an additional optional
        argument is allowed.

        Walks through all subparts of a message.  If the subpart is of main
        type `text', then it prints the decoded payload of the subpart.

        Otherwise, fmt is a format string that is used instead of the message
        payload.  fmt is expanded with the following keywords (in
        %(keyword)s format):

        type       : Full MIME type of the non-text part
        maintype   : Main MIME type of the non-text part
        subtype    : Sub-MIME type of the non-text part
        filename   : Filename of the non-text part
        description: Description associated with the non-text part
        encoding   : Content transfer encoding of the non-text part

        The default value for fmt is None, meaning

        [Non-text (%(type)s) part of message omitted, filename %(filename)s]
        """
        Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
        if fmt is None:
            self._fmt = _FMT
        else:
            self._fmt = fmt

    def _dispatch(self, msg):
        for part in msg.walk():
            maintype = part.get_content_maintype()
            if maintype == 'text':
                print >> self, part.get_payload(decode=True)
            elif maintype == 'multipart':
                # Just skip this
                pass
            else:
                print >> self, self._fmt % {
                    'type'       : part.get_content_type(),
                    'maintype'   : part.get_content_maintype(),
                    'subtype'    : part.get_content_subtype(),
                    'filename'   : part.get_filename('[no filename]'),
                    'description': part.get('Content-Description',
                                            '[no description]'),
                    'encoding'   : part.get('Content-Transfer-Encoding',
                                            '[no encoding]'),
                    }


\f
# Helper
_width = len(repr(sys.maxint-1))
_fmt = '%%0%dd' % _width

def _make_boundary(text=None):
    # Craft a random boundary.  If text is given, ensure that the chosen
    # boundary doesn't appear in the text.
    token = random.randrange(sys.maxint)
    boundary = ('=' * 15) + (_fmt % token) + '=='
    if text is None:
        return boundary
    b = boundary
    counter = 0
    while True:
        cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
        if not cre.search(text):
            break
        b = boundary + '.' + str(counter)
        counter += 1
    return b
Commit	Line	Data
920dae64 AT	1	# Copyright (C) 2001-2004 Python Software Foundation
	2	# Author: Barry Warsaw
	3	# Contact: email-sig@python.org
	4
	5	"""Classes to generate plain text from a message object tree."""
	6
	7	import re
	8	import sys
	9	import time
	10	import random
	11	import warnings
	12	from cStringIO import StringIO
	13
	14	from email.Header import Header
	15
	16	UNDERSCORE = '_'
	17	NL = '\n'
	18
	19	fcre = re.compile(r'^From ', re.MULTILINE)
	20
	21	def _is8bitstring(s):
	22	if isinstance(s, str):
	23	try:
	24	unicode(s, 'us-ascii')
	25	except UnicodeError:
	26	return True
	27	return False
	28
	29
	30	\f
	31	class Generator:
	32	"""Generates output from a Message object tree.
	33
	34	This basic generator writes the message to the given file object as plain
	35	text.
	36	"""
	37	#
	38	# Public interface
	39	#
	40
	41	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
	42	"""Create the generator for message flattening.
	43
	44	outfp is the output file-like object for writing the message to. It
	45	must have a write() method.
	46
	47	Optional mangle_from_ is a flag that, when True (the default), escapes
	48	From_ lines in the body of the message by putting a `>' in front of
	49	them.
	50
	51	Optional maxheaderlen specifies the longest length for a non-continued
	52	header. When a header line is longer (in characters, with tabs
	53	expanded to 8 spaces) than maxheaderlen, the header will split as
	54	defined in the Header class. Set maxheaderlen to zero to disable
	55	header wrapping. The default is 78, as recommended (but not required)
	56	by RFC 2822.
	57	"""
	58	self._fp = outfp
	59	self._mangle_from_ = mangle_from_
	60	self._maxheaderlen = maxheaderlen
	61
	62	def write(self, s):
	63	# Just delegate to the file object
	64	self._fp.write(s)
65
66	def flatten(self, msg, unixfrom=False):
67	"""Print the message object tree rooted at msg to the output file
68	specified when the Generator instance was created.
69
70	unixfrom is a flag that forces the printing of a Unix From_ delimiter
71	before the first object in the message tree. If the original message
72	has no From_ delimiter, a `standard' one is crafted. By default, this
73	is False to inhibit the printing of any From_ delimiter.
74
75	Note that for subobjects, no From_ line is printed.
76	"""
77	if unixfrom:
78	ufrom = msg.get_unixfrom()
79	if not ufrom:
80	ufrom = 'From nobody ' + time.ctime(time.time())
81	print >> self._fp, ufrom
82	self._write(msg)
83
84	# For backwards compatibility, but this is slower
85	def __call__(self, msg, unixfrom=False):
86	warnings.warn('__call__() deprecated; use flatten()',
87	DeprecationWarning, 2)
88	self.flatten(msg, unixfrom)
89
90	def clone(self, fp):
91	"""Clone this generator with the exact same options."""
92	return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
93
94	#
95	# Protected interface - undocumented ;/
96	#
97
98	def _write(self, msg):
99	# We can't write the headers yet because of the following scenario:
100	# say a multipart message includes the boundary string somewhere in
101	# its body. We'd have to calculate the new boundary /before/ we write
102	# the headers so that we can write the correct Content-Type:
103	# parameter.
104	#
105	# The way we do this, so as to make the _handle_*() methods simpler,
106	# is to cache any subpart writes into a StringIO. The we write the
107	# headers and the StringIO contents. That way, subpart handlers can
108	# Do The Right Thing, and can still modify the Content-Type: header if
109	# necessary.
110	oldfp = self._fp
111	try:
112	self._fp = sfp = StringIO()
113	self._dispatch(msg)
114	finally:
115	self._fp = oldfp
116	# Write the headers. First we see if the message object wants to
117	# handle that itself. If not, we'll do it generically.
118	meth = getattr(msg, '_write_headers', None)
119	if meth is None:
120	self._write_headers(msg)
121	else:
122	meth(self)
123	self._fp.write(sfp.getvalue())
124
125	def _dispatch(self, msg):
126	# Get the Content-Type: for the message, then try to dispatch to
127	# self._handle_<maintype>_<subtype>(). If there's no handler for the
128	# full MIME type, then dispatch to self._handle_<maintype>(). If
129	# that's missing too, then dispatch to self._writeBody().
130	main = msg.get_content_maintype()
131	sub = msg.get_content_subtype()
132	specific = UNDERSCORE.join((main, sub)).replace('-', '_')
133	meth = getattr(self, '_handle_' + specific, None)
134	if meth is None:
135	generic = main.replace('-', '_')
136	meth = getattr(self, '_handle_' + generic, None)
137	if meth is None:
138	meth = self._writeBody
139	meth(msg)
140
141	#
142	# Default handlers
143	#
144
145	def _write_headers(self, msg):
146	for h, v in msg.items():
147	print >> self._fp, '%s:' % h,
148	if self._maxheaderlen == 0:
149	# Explicit no-wrapping
150	print >> self._fp, v
151	elif isinstance(v, Header):
152	# Header instances know what to do
153	print >> self._fp, v.encode()
154	elif _is8bitstring(v):
155	# If we have raw 8bit data in a byte string, we have no idea
156	# what the encoding is. There is no safe way to split this
157	# string. If it's ascii-subset, then we could do a normal
158	# ascii split, but if it's multibyte then we could break the
159	# string. There's no way to know so the least harm seems to
160	# be to not split the string and risk it being too long.
161	print >> self._fp, v
162	else:
163	# Header's got lots of smarts, so use it.
164	print >> self._fp, Header(
165	v, maxlinelen=self._maxheaderlen,
166	header_name=h, continuation_ws='\t').encode()
167	# A blank line always separates headers from body
168	print >> self._fp
169
170	#
171	# Handlers for writing types and subtypes
172	#
173
174	def _handle_text(self, msg):
175	payload = msg.get_payload()
176	if payload is None:
177	return
178	cset = msg.get_charset()
179	if cset is not None:
180	payload = cset.body_encode(payload)
181	if not isinstance(payload, basestring):
182	raise TypeError('string payload expected: %s' % type(payload))
183	if self._mangle_from_:
184	payload = fcre.sub('>From ', payload)
185	self._fp.write(payload)
186
187	# Default body handler
188	_writeBody = _handle_text
189
190	def _handle_multipart(self, msg):
191	# The trick here is to write out each part separately, merge them all
192	# together, and then make sure that the boundary we've chosen isn't
193	# present in the payload.
194	msgtexts = []
195	subparts = msg.get_payload()
196	if subparts is None:
197	subparts = []
198	elif isinstance(subparts, basestring):
199	# e.g. a non-strict parse of a message with no starting boundary.
200	self._fp.write(subparts)
201	return
202	elif not isinstance(subparts, list):
203	# Scalar payload
204	subparts = [subparts]
205	for part in subparts:
206	s = StringIO()
207	g = self.clone(s)
208	g.flatten(part, unixfrom=False)
209	msgtexts.append(s.getvalue())
210	# Now make sure the boundary we've selected doesn't appear in any of
211	# the message texts.
212	alltext = NL.join(msgtexts)
213	# BAW: What about boundaries that are wrapped in double-quotes?
214	boundary = msg.get_boundary(failobj=_make_boundary(alltext))
215	# If we had to calculate a new boundary because the body text
216	# contained that string, set the new boundary. We don't do it
217	# unconditionally because, while set_boundary() preserves order, it
218	# doesn't preserve newlines/continuations in headers. This is no big
219	# deal in practice, but turns out to be inconvenient for the unittest
220	# suite.
221	if msg.get_boundary() <> boundary:
222	msg.set_boundary(boundary)
223	# If there's a preamble, write it out, with a trailing CRLF
224	if msg.preamble is not None:
225	print >> self._fp, msg.preamble
226	# dash-boundary transport-padding CRLF
227	print >> self._fp, '--' + boundary
228	# body-part
229	if msgtexts:
230	self._fp.write(msgtexts.pop(0))
231	# *encapsulation
232	# --> delimiter transport-padding
233	# --> CRLF body-part
234	for body_part in msgtexts:
235	# delimiter transport-padding CRLF
236	print >> self._fp, '\n--' + boundary
237	# body-part
238	self._fp.write(body_part)
239	# close-delimiter transport-padding
240	self._fp.write('\n--' + boundary + '--')
241	if msg.epilogue is not None:
242	print >> self._fp
243	self._fp.write(msg.epilogue)
244
245	def _handle_message_delivery_status(self, msg):
246	# We can't just write the headers directly to self's file object
247	# because this will leave an extra newline between the last header
248	# block and the boundary. Sigh.
249	blocks = []
250	for part in msg.get_payload():
251	s = StringIO()
252	g = self.clone(s)
253	g.flatten(part, unixfrom=False)
254	text = s.getvalue()
255	lines = text.split('\n')
256	# Strip off the unnecessary trailing empty line
257	if lines and lines[-1] == '':
258	blocks.append(NL.join(lines[:-1]))
259	else:
260	blocks.append(text)
261	# Now join all the blocks with an empty line. This has the lovely
262	# effect of separating each block with an empty line, but not adding
263	# an extra one after the last one.
264	self._fp.write(NL.join(blocks))
265
266	def _handle_message(self, msg):
267	s = StringIO()
268	g = self.clone(s)
269	# The payload of a message/rfc822 part should be a multipart sequence
270	# of length 1. The zeroth element of the list should be the Message
271	# object for the subpart. Extract that object, stringify it, and
272	# write it out.
273	g.flatten(msg.get_payload(0), unixfrom=False)
274	self._fp.write(s.getvalue())
275
276
277	\f
278	_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
279
280	class DecodedGenerator(Generator):
281	"""Generator a text representation of a message.
282
283	Like the Generator base class, except that non-text parts are substituted
284	with a format string representing the part.
285	"""
286	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
287	"""Like Generator.__init__() except that an additional optional
288	argument is allowed.
289
290	Walks through all subparts of a message. If the subpart is of main
291	type `text', then it prints the decoded payload of the subpart.
292
293	Otherwise, fmt is a format string that is used instead of the message
294	payload. fmt is expanded with the following keywords (in
295	%(keyword)s format):
296
297	type : Full MIME type of the non-text part
298	maintype : Main MIME type of the non-text part
299	subtype : Sub-MIME type of the non-text part
300	filename : Filename of the non-text part
301	description: Description associated with the non-text part
302	encoding : Content transfer encoding of the non-text part
303
304	The default value for fmt is None, meaning
305
306	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
307	"""
308	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
309	if fmt is None:
310	self._fmt = _FMT
311	else:
312	self._fmt = fmt
313
314	def _dispatch(self, msg):
315	for part in msg.walk():
316	maintype = part.get_content_maintype()
317	if maintype == 'text':
318	print >> self, part.get_payload(decode=True)
319	elif maintype == 'multipart':
320	# Just skip this
321	pass
322	else:
323	print >> self, self._fmt % {
324	'type' : part.get_content_type(),
325	'maintype' : part.get_content_maintype(),
326	'subtype' : part.get_content_subtype(),
327	'filename' : part.get_filename('[no filename]'),
328	'description': part.get('Content-Description',
329	'[no description]'),
330	'encoding' : part.get('Content-Transfer-Encoding',
331	'[no encoding]'),
332	}
333
334
335	\f
336	# Helper
337	_width = len(repr(sys.maxint-1))
338	_fmt = '%%0%dd' % _width
339
340	def _make_boundary(text=None):
341	# Craft a random boundary. If text is given, ensure that the chosen
342	# boundary doesn't appear in the text.
343	token = random.randrange(sys.maxint)
344	boundary = ('=' * 15) + (_fmt % token) + '=='
345	if text is None:
346	return boundary
347	b = boundary
348	counter = 0
349	while True:
350	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
351	if not cre.search(text):
352	break
353	b = boundary + '.' + str(counter)
354	counter += 1
355	return b