[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / mhtxtplain.pl

##---------------------------------------------------------------------------##
##  File:
##	$Id: mhtxtplain.pl,v 2.24 2002/10/10 22:27:19 ehood Exp $
##  Author:
##      Earl Hood       mhonarc@mhonarc.org
##  Description:
##	Library defines routine to filter text/plain body parts to HTML
##	for MHonArc.
##	Filter routine can be registered with the following:
##              <MIMEFILTERS>
##              text/plain:m2h_text_plain'filter:mhtxtplain.pl
##              </MIMEFILTERS>
##---------------------------------------------------------------------------##
##    MHonArc -- Internet mail-to-HTML converter
##    Copyright (C) 1995-2001	Earl Hood, mhonarc@mhonarc.org
##
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program; if not, write to the Free Software
##    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##    02111-1307, USA
##---------------------------------------------------------------------------##

package m2h_text_plain;

require 'readmail.pl';

$Url    	= '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' .
		   '|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)';
$UrlExp 	= $Url . q/[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]/;
$HUrlExp        = $Url . q/(?:&(?![gl]t;)|[^\s\(\)\|<>"'\&])+/ .
			 q/[^\.?!;,"'\|\[\]\(\)\s<>\&]/;
$QuoteChars	= '[>\|\]+:]';
$HQuoteChars	= '&gt;|[\|\]+:]';

$StartFlowedQuote =
  '<blockquote style="border-left: #0000FF solid 0.1em; '.
                     'margin-left: 0.0em; padding-left: 1.0em">';
$EndFlowedQuote   = "</blockquote>";

##---------------------------------------------------------------------------##
##	Text/plain filter for mhonarc.  The following filter arguments
##	are recognized ($args):
##
##	asis=set1:set2:...
##			Colon separated lists of charsets to leave as-is.
##			Only HTML special characters will be converted into
##			entities.  The default value is "us-ascii:iso-8859-1".
##
##	attachcheck	Honor attachment disposition.  By default,
##			all text/plain data is displayed inline on
##			the message page.  If attachcheck is specified
##			and Content-Disposition specifies the data as
##			an attachment, the data is saved to a file
##			with a link to it from the message page.
##
##	default=set 	Default charset to use if not set.
##
##      inlineexts="ext1,ext2,..."
##                      A comma separated list of message specified filename
##                      extensions to treat as inline data.
##                      Applicable only when uudecode options specified.
##
##	htmlcheck	Check if message is actually an HTML message
##			(to get around abhorrent MUAs).  The message
##			is treated as HTML if the first non-whitespace
##			data looks like the start of an HTML document.
##
##	keepspace	Preserve whitespace if nonfixed
##
##	nourl		Do hyperlink URLs
##
##	nonfixed	Use normal typeface
##
##	maxwidth=#	Set the maximum width of lines.  Lines exceeding
##			the maxwidth will be broken up across multiple lines.
##
##	quote		Italicize quoted message text
##
##	target=name  	Set TARGET attribute for links if converting URLs
##			to links.  Defaults to _top.
##
##	usename		Use filename specified in uuencoded data when
##			converting uuencoded data.  This option is only
##			applicable of uudecode is specified.
##
##	uudecode	Decoded any embedded uuencoded data.
##
##	All arguments should be separated by at least one space
##
sub filter {
    my($fields, $data, $isdecode, $args) = @_;
    local($_);

    ## Parse arguments
    $args	= ""  unless defined($args);

    ## Check if content-disposition should be checked
    if ($args =~ /\battachcheck\b/i) {
	my($disp, $nameparm) = readmail::MAILhead_get_disposition($fields);
	if ($disp =~ /\battachment\b/i) {
	    require 'mhexternal.pl';
	    return (m2h_external::filter(
		      $fields, $data, $isdecode,
		      readmail::get_filter_args('m2h_external::filter')));
	}
    }

    ## Check if decoding uuencoded data.  The implementation chosen here
    ## for decoding uuencoded data was done so when uudecode is not
    ## specified, there is no extra overhead (besides the $args check for
    ## uudecode).  However, when uudecode is specified, more overhead may
    ## exist over other potential implementations.
    ## I.e.  We only try to penalize performance when uudecode is specified.
    if ($args =~ s/\buudecode\b//ig) {
	# $args has uudecode stripped out for recursive calls

	# Make sure we have needed routines
	my $decoder = readmail::load_decoder("uuencode");
	if (!defined($decoder) || !defined(&$decoder)) {
	    require 'base64.pl';
	    $decoder = \&base64::uudecode;
	}
	require 'mhmimetypes.pl';

	# Grab any filename extensions that imply inlining
	my $inlineexts = '';
	if ($args =~ /\binlineexts=(\S+)/) {
	    $inlineexts = ',' . lc($1) . ',';
	    $inlineexts =~ s/['"]//g;
	}
	my $usename = $args =~ /\busename\b/;

	my($pdata);	# have to use local() since typeglobs used
	my($inext, $uddata, $file, $urlfile);
	my @files = ( );
	my $ret = "";
	my $i = 0;

	# <CR><LF> => <LF> to make parsing easier
	$$data =~ s/\r\n/\n/g;

	# Split on uuencoded data.  For text portions, recursively call
	# filter to convert text data: makes it easier to handle all
	# the various formatting options.
	foreach $pdata
		(split(/^(begin\s+\d\d\d\s+[^\n]+\n[!-M].*?\nend\n)/sm,
		       $$data)) {
	    if ($i % 2) {	# uuencoded data
		# extract filename extension
		($file) = $pdata =~ /^begin\s+\d\d\d\s+([^\n]+)/;
		if ($file =~ /\.(\w+)$/) { $inext = $1; } else { $inext = ""; }

		# decode data
		$uddata = &$decoder($pdata);

		# save to file
		if (readmail::MAILis_excluded('application/octet-stream')) {
		    $ret .= &$readmail::ExcludedPartFunc($file);
		} else {
		    push(@files,
			 mhonarc::write_attachment(
			    'application/octet-stream', \$uddata, '',
			    ($usename?$file:''), $inext));
		    $urlfile = mhonarc::htmlize($files[$#files]);

		    # create link to file
		    if (index($inlineexts, ','.lc($inext).',') >= $[) {
			$ret .= qq|<a href="$urlfile"><img src="$urlfile">| .
				qq|</a><br>\n|;
		    } else {
			$ret .= qq|<a href="$urlfile">| .
				mhonarc::htmlize($file) .  qq|</a><br>\n|;
		    }
		}

	    } elsif ($pdata =~ /\S/) {	# plain text
		my(@subret) = filter($fields, \$pdata, $isdecode, $args);
		$ret .= shift @subret;
		push(@files, @subret);
	    } else {
		# Make sure readmail thinks we processed
		$ret .= " ";
	    }
	    ++$i;
	}

	## Done with uudecode
	return ($ret, @files);
    }

    
    ## Check for HTML data if requested
    if ($args =~ s/\bhtmlcheck\b//i &&
	    $$data =~ /\A\s*<(?:html\b|x-html\b|!doctype\s+html\s)/i) {
	if (readmail::MAILis_excluded('text/html')) {
	  return (&$readmail::ExcludedPartFunc('text/plain HTML'));
	}
	my $html_filter = readmail::load_filter('text/html');
	if (defined($html_filter) && defined(&$html_filter)) {
	    return (&$html_filter($fields, $data, $isdecode,
		      readmail::get_filter_args(
			'text/html', 'text/*', $html_filter)));
	} else {
	    require 'mhtxthtml.pl';
	    return (m2h_text_html::filter($fields, $data, $isdecode,
		      readmail::get_filter_args(
			'text/html', 'text/*', 'm2h_text_html::filter')));
	}
    }

    my($charset, $nourl, $doquote, $igncharset, $nonfixed, $textformat,
       $keepspace, $maxwidth, $target, $defset, $xhtml);
    my(%asis) = ( );

    $nourl	= ($mhonarc::NOURL || ($args =~ /\bnourl\b/i));
    $doquote	= ($args =~ /\bquote\b/i);
    $nonfixed	= ($args =~ /\bnonfixed\b/i);
    $keepspace	= ($args =~ /\bkeepspace\b/i);
    if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
	else { $maxwidth = 0; }
    if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
	else { $defset = 'us-ascii'; }
    $target = "";
    if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
	elsif ($args =~ /\btarget=(\S+)/i) { $target = $1; }
    $target =~ s/['"]//g;
    if ($target) {
	$target = qq/target="$target"/;
    }
    $defset =~ s/['"\s]//g;

    ## Grab charset parameter (if defined)
    if ( defined($fields->{'content-type'}[0]) and
	 $fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i ) {
	$charset = lc $1;
	$charset =~ s/['";\s]//g;
    } else {
	$charset = $defset;
    }
    ## Grab format parameter (if defined)
    if ( defined($fields->{'content-type'}[0]) and
	 $fields->{'content-type'}[0] =~ /\bformat\s*=\s*([^\s;]+)/i ) {
	$textformat = lc $1;
	$textformat =~ s/['";\s]//g;
    } else {
	$textformat = "fixed";
    }

    ## Check if certain charsets should be left alone
    if ($args =~ /\basis=(\S+)/i) {
	my $t = lc $1;  $t =~ s/['"]//g;
	local($_);  foreach (split(':', $t)) { $asis{$_} = 1; }
    }

    ## Check MIMECharSetConverters if charset should be left alone
    my($charcnv, $real_charset_name) =
	    readmail::MAILload_charset_converter($charset);
    if (defined($charcnv) && $charcnv eq '-decode-') {
	$asis{$charset} = 1;
    }

    ## Check if max-width set
    if ($maxwidth && $textformat eq 'fixed') {
	$$data =~ s/^(.*)$/&break_line($1, $maxwidth)/gem;
    }

    ## Convert data according to charset
    if (!$asis{$charset}) {
	# Registered in CHARSETCONVERTERS
	if (defined($charcnv) && defined(&$charcnv)) {
	    $$data = &$charcnv($$data, $real_charset_name);

	# Other
	} else {
	    warn qq/\n/,
		 qq/Warning: Unrecognized character set: $charset\n/,
		 qq/         Message-Id: <$mhonarc::MHAmsgid>\n/,
		 qq/         Message Number: $mhonarc::MHAmsgnum\n/;
	    esc_chars_inplace($data);
	}

    } else {
	esc_chars_inplace($data);
    }

    if ($textformat eq 'flowed') {
	# Initial code for format=flowed contributed by Ken Hirsch (May 2002).
	# text/plain; format=flowed defined in RFC2646

	my $currdepth = 0;
	my $ret='';
	$$data =~ s!^</?x-flowed>\r?\n>!!mg;
	while (length($$data)) {
	    $$data =~ /^((?:&gt;)*)/;
	    my $qd = $1;
	    if ($$data =~ s/^(.*(?:(?:\n|\r\n?)$qd(?!&gt;).*)*\n?)//) {
		# divide message into chunks by "quote-depth",
		# which is the number of leading > signs
		my $chunk = $1;
		$chunk =~ s/^$qd ?//mg;  # N.B. also takes care of
					 # space-stuffing
		$chunk =~ s/^-- $/--/mg; # special case for '-- '

		if ($chunk =~ / \r?\n/) {
		    # Treat this chunk as format=flowed
		    # Lines that end with spaces are
		    # considered to have soft line breaks.
		    # Lines that end with no spaces are
		    # considered to have hard line breaks.
		    # XXX: Negative look-behind assertion not supported
		    #	   on older versions of Perl 5 (<5.6)
		    #$chunk =~ s/(?<! )(\r?\n|\Z)/<br>$1/g;
		    $chunk =~ s/(^|[^ ])(\r?\n|\Z)/$1<br>$2/mg;

		} else {
		    # Treat this chunk as format=fixed
		    if ($nonfixed) {
			$chunk =~ s/(\r?\n)/<br>$1/g;
			if ($keepspace) {
			    $chunk =~ s/^(.*)$/&preserve_space($1)/gem;
			}
		    } else {
			$chunk = "<pre>" . $chunk . "</pre>\n";
		    }
		}
		my $newdepth = length($qd)/length('&gt;');
		if ($currdepth < $newdepth) {
		    $chunk = $StartFlowedQuote x
			     ($newdepth - $currdepth) . $chunk;
		} elsif ($currdepth > $newdepth) {
		    $chunk = $EndFlowedQuote x
			     ($currdepth - $newdepth) . $chunk;
		}
		$currdepth = $newdepth;
		$ret .= $chunk;

	    } else {
		# The above regex should always match, but just in case...
		warn qq/\n/,
		     qq/Warning: Dequoting problem with format=flowed data\n/,
		     qq/         Message-Id: <$MHAmsgid>\n/,
		     qq/         Message Number: $MHAmsgnum\n/;
		$ret .= $$data;
		last;
	    }
	}
	if ($currdepth > 0) {
	    $ret .= $EndFlowedQuote x $currdepth;
	}

	## Post-processing cleanup: makes things look nicer
	$ret =~ s/<br><\/blockquote>/<\/blockquote>/g;
	$ret =~ s/<\/blockquote><br>/<\/blockquote>/g;

	$$data = $ret;

    } else {
	## Check for quoting
	if ($doquote) {
	    $$data =~ s@^( ?${HQuoteChars})(.*)$@$1<i>$2</i>@gom;
	}

	## Check if using nonfixed font
	if ($nonfixed) {
	    $$data =~ s/(\r?\n)/<br>$1/g;
	    if ($keepspace) {
		$$data =~ s/^(.*)$/&preserve_space($1)/gem;
	    }
	} else {
	    $$data = "<pre>" . $$data . "</pre>\n";
	}
    }

    ## Convert URLs to hyperlinks
    $$data =~ s@($HUrlExp)@<a $target href="$1">$1</a>@gio
	unless $nourl;

    ($$data);
}

##---------------------------------------------------------------------------##

sub esc_chars_inplace {
    my($foo) = shift;
    $$foo =~ s/&/&amp;/g;
    $$foo =~ s/</&lt;/g;
    $$foo =~ s/>/&gt;/g;
    $$foo =~ s/"/&quot;/g;
    1;
}

##---------------------------------------------------------------------------##

sub preserve_space {
    my($str) = shift;

    1 while
    $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e;
    $str =~ s/ /\&nbsp;/g;
    $str;
}

##---------------------------------------------------------------------------##

sub break_line {
    my($str) = shift;
    my($width) = shift;
    my($q, $new) = ('', '');
    my($try, $trywidth, $len);

    ## Translate tabs to spaces
    1 while
    $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e;

    ## Do nothing if str <= width
    return $str  if length($str) <= $width;

    ## See if str begins with a quote char
    if ($str =~ s/^( ?$QuoteChars)//o) {
	$q = $1;
	--$width;
    }

    ## Create new string by breaking up str
    while ($str ne "") {

	# If $str less than width, break out
	if (length($str) <= $width) {
	    $new .= $q . $str;
	    last;
	}

	# handle case where no-whitespace line larger than width
	if (($str =~ /^(\S+)/) && (($len = length($1)) >= $width)) {
	    $new .= $q . $1;
	    substr($str, 0, $len) = "";
	    next;
	}

	# Break string at whitespace
	$try = '';
	$trywidth = $width;
	$try = substr($str, 0, $trywidth);
	if ($try =~ /(\S+)$/) {
	    $trywidth -= length($1);
	    $new .= $q . substr($str, 0, $trywidth);
	} else {
	    $new .= $q . $try;
	}
	substr($str, 0, $trywidth) = '';

    } continue {
	$new .= "\n"  if $str;
    }
    $new;
}

##---------------------------------------------------------------------------##
1;
Commit	Line	Data
86530b38 AT	1	##---------------------------------------------------------------------------##
	2	## File:
	3	## $Id: mhtxtplain.pl,v 2.24 2002/10/10 22:27:19 ehood Exp $
	4	## Author:
	5	## Earl Hood mhonarc@mhonarc.org
	6	## Description:
	7	## Library defines routine to filter text/plain body parts to HTML
	8	## for MHonArc.
	9	## Filter routine can be registered with the following:
	10	## <MIMEFILTERS>
	11	## text/plain:m2h_text_plain'filter:mhtxtplain.pl
	12	## </MIMEFILTERS>
	13	##---------------------------------------------------------------------------##
	14	## MHonArc -- Internet mail-to-HTML converter
	15	## Copyright (C) 1995-2001 Earl Hood, mhonarc@mhonarc.org
	16	##
	17	## This program is free software; you can redistribute it and/or modify
	18	## it under the terms of the GNU General Public License as published by
	19	## the Free Software Foundation; either version 2 of the License, or
	20	## (at your option) any later version.
	21	##
	22	## This program is distributed in the hope that it will be useful,
	23	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	24	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	25	## GNU General Public License for more details.
	26	##
	27	## You should have received a copy of the GNU General Public License
	28	## along with this program; if not, write to the Free Software
	29	## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
	30	## 02111-1307, USA
	31	##---------------------------------------------------------------------------##
	32
	33	package m2h_text_plain;
	34
	35	require 'readmail.pl';
	36
	37	$Url = '(http://\|https://\|ftp://\|afs://\|wais://\|telnet://\|ldap://' .
	38	'\|gopher://\|news:\|nntp:\|mid:\|cid:\|mailto:\|prospero:)';
	39	$UrlExp = $Url . q/[^\s\(\)\\|<>"']*[^\.?!;,"'\\|\[\]\(\)\s<>]/;
	40	$HUrlExp = $Url . q/(?:&(?![gl]t;)\|[^\s\(\)\\|<>"'\&])+/ .
	41	q/[^\.?!;,"'\\|\[\]\(\)\s<>\&]/;
	42	$QuoteChars = '[>\\|\]+:]';
	43	$HQuoteChars = '>\|[\\|\]+:]';
	44
	45	$StartFlowedQuote =
	46	'<blockquote style="border-left: #0000FF solid 0.1em; '.
	47	'margin-left: 0.0em; padding-left: 1.0em">';
	48	$EndFlowedQuote = "</blockquote>";
	49
	50	##---------------------------------------------------------------------------##
	51	## Text/plain filter for mhonarc. The following filter arguments
	52	## are recognized ($args):
	53	##
	54	## asis=set1:set2:...
	55	## Colon separated lists of charsets to leave as-is.
	56	## Only HTML special characters will be converted into
	57	## entities. The default value is "us-ascii:iso-8859-1".
	58	##
	59	## attachcheck Honor attachment disposition. By default,
	60	## all text/plain data is displayed inline on
	61	## the message page. If attachcheck is specified
	62	## and Content-Disposition specifies the data as
	63	## an attachment, the data is saved to a file
	64	## with a link to it from the message page.
65	##
66	## default=set Default charset to use if not set.
67	##
68	## inlineexts="ext1,ext2,..."
69	## A comma separated list of message specified filename
70	## extensions to treat as inline data.
71	## Applicable only when uudecode options specified.
72	##
73	## htmlcheck Check if message is actually an HTML message
74	## (to get around abhorrent MUAs). The message
75	## is treated as HTML if the first non-whitespace
76	## data looks like the start of an HTML document.
77	##
78	## keepspace Preserve whitespace if nonfixed
79	##
80	## nourl Do hyperlink URLs
81	##
82	## nonfixed Use normal typeface
83	##
84	## maxwidth=# Set the maximum width of lines. Lines exceeding
85	## the maxwidth will be broken up across multiple lines.
86	##
87	## quote Italicize quoted message text
88	##
89	## target=name Set TARGET attribute for links if converting URLs
90	## to links. Defaults to _top.
91	##
92	## usename Use filename specified in uuencoded data when
93	## converting uuencoded data. This option is only
94	## applicable of uudecode is specified.
95	##
96	## uudecode Decoded any embedded uuencoded data.
97	##
98	## All arguments should be separated by at least one space
99	##
100	sub filter {
101	my($fields, $data, $isdecode, $args) = @_;
102	local($_);
103
104	## Parse arguments
105	$args = "" unless defined($args);
106
107	## Check if content-disposition should be checked
108	if ($args =~ /\battachcheck\b/i) {
109	my($disp, $nameparm) = readmail::MAILhead_get_disposition($fields);
110	if ($disp =~ /\battachment\b/i) {
111	require 'mhexternal.pl';
112	return (m2h_external::filter(
113	$fields, $data, $isdecode,
114	readmail::get_filter_args('m2h_external::filter')));
115	}
116	}
117
118	## Check if decoding uuencoded data. The implementation chosen here
119	## for decoding uuencoded data was done so when uudecode is not
120	## specified, there is no extra overhead (besides the $args check for
121	## uudecode). However, when uudecode is specified, more overhead may
122	## exist over other potential implementations.
123	## I.e. We only try to penalize performance when uudecode is specified.
124	if ($args =~ s/\buudecode\b//ig) {
125	# $args has uudecode stripped out for recursive calls
126
127	# Make sure we have needed routines
128	my $decoder = readmail::load_decoder("uuencode");
129	if (!defined($decoder) \|\| !defined(&$decoder)) {
130	require 'base64.pl';
131	$decoder = \&base64::uudecode;
132	}
133	require 'mhmimetypes.pl';
134
135	# Grab any filename extensions that imply inlining
136	my $inlineexts = '';
137	if ($args =~ /\binlineexts=(\S+)/) {
138	$inlineexts = ',' . lc($1) . ',';
139	$inlineexts =~ s/['"]//g;
140	}
141	my $usename = $args =~ /\busename\b/;
142
143	my($pdata); # have to use local() since typeglobs used
144	my($inext, $uddata, $file, $urlfile);
145	my @files = ( );
146	my $ret = "";
147	my $i = 0;
148
149	# <CR><LF> => <LF> to make parsing easier
150	$$data =~ s/\r\n/\n/g;
151
152	# Split on uuencoded data. For text portions, recursively call
153	# filter to convert text data: makes it easier to handle all
154	# the various formatting options.
155	foreach $pdata
156	(split(/^(begin\s+\d\d\d\s+[^\n]+\n[!-M].*?\nend\n)/sm,
157	$$data)) {
158	if ($i % 2) { # uuencoded data
159	# extract filename extension
160	($file) = $pdata =~ /^begin\s+\d\d\d\s+([^\n]+)/;
161	if ($file =~ /\.(\w+)$/) { $inext = $1; } else { $inext = ""; }
162
163	# decode data
164	$uddata = &$decoder($pdata);
165
166	# save to file
167	if (readmail::MAILis_excluded('application/octet-stream')) {
168	$ret .= &$readmail::ExcludedPartFunc($file);
169	} else {
170	push(@files,
171	mhonarc::write_attachment(
172	'application/octet-stream', \$uddata, '',
173	($usename?$file:''), $inext));
174	$urlfile = mhonarc::htmlize($files[$#files]);
175
176	# create link to file
177	if (index($inlineexts, ','.lc($inext).',') >= $[) {
178	$ret .= qq\|<a href="$urlfile"><img src="$urlfile">\| .
179	qq\|</a><br>\n\|;
180	} else {
181	$ret .= qq\|<a href="$urlfile">\| .
182	mhonarc::htmlize($file) . qq\|</a><br>\n\|;
183	}
184	}
185
186	} elsif ($pdata =~ /\S/) { # plain text
187	my(@subret) = filter($fields, \$pdata, $isdecode, $args);
188	$ret .= shift @subret;
189	push(@files, @subret);
190	} else {
191	# Make sure readmail thinks we processed
192	$ret .= " ";
193	}
194	++$i;
195	}
196
197	## Done with uudecode
198	return ($ret, @files);
199	}
200
201
202	## Check for HTML data if requested
203	if ($args =~ s/\bhtmlcheck\b//i &&
204	$$data =~ /\A\s*<(?:html\b\|x-html\b\|!doctype\s+html\s)/i) {
205	if (readmail::MAILis_excluded('text/html')) {
206	return (&$readmail::ExcludedPartFunc('text/plain HTML'));
207	}
208	my $html_filter = readmail::load_filter('text/html');
209	if (defined($html_filter) && defined(&$html_filter)) {
210	return (&$html_filter($fields, $data, $isdecode,
211	readmail::get_filter_args(
212	'text/html', 'text/*', $html_filter)));
213	} else {
214	require 'mhtxthtml.pl';
215	return (m2h_text_html::filter($fields, $data, $isdecode,
216	readmail::get_filter_args(
217	'text/html', 'text/*', 'm2h_text_html::filter')));
218	}
219	}
220
221	my($charset, $nourl, $doquote, $igncharset, $nonfixed, $textformat,
222	$keepspace, $maxwidth, $target, $defset, $xhtml);
223	my(%asis) = ( );
224
225	$nourl = ($mhonarc::NOURL \|\| ($args =~ /\bnourl\b/i));
226	$doquote = ($args =~ /\bquote\b/i);
227	$nonfixed = ($args =~ /\bnonfixed\b/i);
228	$keepspace = ($args =~ /\bkeepspace\b/i);
229	if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
230	else { $maxwidth = 0; }
231	if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
232	else { $defset = 'us-ascii'; }
233	$target = "";
234	if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
235	elsif ($args =~ /\btarget=(\S+)/i) { $target = $1; }
236	$target =~ s/['"]//g;
237	if ($target) {
238	$target = qq/target="$target"/;
239	}
240	$defset =~ s/['"\s]//g;
241
242	## Grab charset parameter (if defined)
243	if ( defined($fields->{'content-type'}[0]) and
244	$fields->{'content-type'}[0] =~ /\bcharset\s=\s([^\s;]+)/i ) {
245	$charset = lc $1;
246	$charset =~ s/['";\s]//g;
247	} else {
248	$charset = $defset;
249	}
250	## Grab format parameter (if defined)
251	if ( defined($fields->{'content-type'}[0]) and
252	$fields->{'content-type'}[0] =~ /\bformat\s=\s([^\s;]+)/i ) {
253	$textformat = lc $1;
254	$textformat =~ s/['";\s]//g;
255	} else {
256	$textformat = "fixed";
257	}
258
259	## Check if certain charsets should be left alone
260	if ($args =~ /\basis=(\S+)/i) {
261	my $t = lc $1; $t =~ s/['"]//g;
262	local($_); foreach (split(':', $t)) { $asis{$_} = 1; }
263	}
264
265	## Check MIMECharSetConverters if charset should be left alone
266	my($charcnv, $real_charset_name) =
267	readmail::MAILload_charset_converter($charset);
268	if (defined($charcnv) && $charcnv eq '-decode-') {
269	$asis{$charset} = 1;
270	}
271
272	## Check if max-width set
273	if ($maxwidth && $textformat eq 'fixed') {
274	$$data =~ s/^(.*)$/&break_line($1, $maxwidth)/gem;
275	}
276
277	## Convert data according to charset
278	if (!$asis{$charset}) {
279	# Registered in CHARSETCONVERTERS
280	if (defined($charcnv) && defined(&$charcnv)) {
281	$$data = &$charcnv($$data, $real_charset_name);
282
283	# Other
284	} else {
285	warn qq/\n/,
286	qq/Warning: Unrecognized character set: $charset\n/,
287	qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
288	qq/ Message Number: $mhonarc::MHAmsgnum\n/;
289	esc_chars_inplace($data);
290	}
291
292	} else {
293	esc_chars_inplace($data);
294	}
295
296	if ($textformat eq 'flowed') {
297	# Initial code for format=flowed contributed by Ken Hirsch (May 2002).
298	# text/plain; format=flowed defined in RFC2646
299
300	my $currdepth = 0;
301	my $ret='';
302	$$data =~ s!^</?x-flowed>\r?\n>!!mg;
303	while (length($$data)) {
304	$$data =~ /^((?:>)*)/;
305	my $qd = $1;
306	if ($$data =~ s/^(.(?:(?:\n\|\r\n?)$qd(?!>).)*\n?)//) {
307	# divide message into chunks by "quote-depth",
308	# which is the number of leading > signs
309	my $chunk = $1;
310	$chunk =~ s/^$qd ?//mg; # N.B. also takes care of
311	# space-stuffing
312	$chunk =~ s/^-- $/--/mg; # special case for '-- '
313
314	if ($chunk =~ / \r?\n/) {
315	# Treat this chunk as format=flowed
316	# Lines that end with spaces are
317	# considered to have soft line breaks.
318	# Lines that end with no spaces are
319	# considered to have hard line breaks.
320	# XXX: Negative look-behind assertion not supported
321	# on older versions of Perl 5 (<5.6)
322	#$chunk =~ s/(?<! )(\r?\n\|\Z)/<br>$1/g;
323	$chunk =~ s/(^\|[^ ])(\r?\n\|\Z)/$1<br>$2/mg;
324
325	} else {
326	# Treat this chunk as format=fixed
327	if ($nonfixed) {
328	$chunk =~ s/(\r?\n)/<br>$1/g;
329	if ($keepspace) {
330	$chunk =~ s/^(.*)$/&preserve_space($1)/gem;
331	}
332	} else {
333	$chunk = "<pre>" . $chunk . "</pre>\n";
334	}
335	}
336	my $newdepth = length($qd)/length('>');
337	if ($currdepth < $newdepth) {
338	$chunk = $StartFlowedQuote x
339	($newdepth - $currdepth) . $chunk;
340	} elsif ($currdepth > $newdepth) {
341	$chunk = $EndFlowedQuote x
342	($currdepth - $newdepth) . $chunk;
343	}
344	$currdepth = $newdepth;
345	$ret .= $chunk;
346
347	} else {
348	# The above regex should always match, but just in case...
349	warn qq/\n/,
350	qq/Warning: Dequoting problem with format=flowed data\n/,
351	qq/ Message-Id: <$MHAmsgid>\n/,
352	qq/ Message Number: $MHAmsgnum\n/;
353	$ret .= $$data;
354	last;
355	}
356	}
357	if ($currdepth > 0) {
358	$ret .= $EndFlowedQuote x $currdepth;
359	}
360
361	## Post-processing cleanup: makes things look nicer
362	$ret =~ s/<br><\/blockquote>/<\/blockquote>/g;
363	$ret =~ s/<\/blockquote><br>/<\/blockquote>/g;
364
365	$$data = $ret;
366
367	} else {
368	## Check for quoting
369	if ($doquote) {
370	$$data =~ s@^( ?${HQuoteChars})(.*)$@$1<i>$2</i>@gom;
371	}
372
373	## Check if using nonfixed font
374	if ($nonfixed) {
375	$$data =~ s/(\r?\n)/<br>$1/g;
376	if ($keepspace) {
377	$$data =~ s/^(.*)$/&preserve_space($1)/gem;
378	}
379	} else {
380	$$data = "<pre>" . $$data . "</pre>\n";
381	}
382	}
383
384	## Convert URLs to hyperlinks
385	$$data =~ s@($HUrlExp)@<a $target href="$1">$1</a>@gio
386	unless $nourl;
387
388	($$data);
389	}
390
391	##---------------------------------------------------------------------------##
392
393	sub esc_chars_inplace {
394	my($foo) = shift;
395	$$foo =~ s/&/&/g;
396	$$foo =~ s/</</g;
397	$$foo =~ s/>/>/g;
398	$$foo =~ s/"/"/g;
399	1;
400	}
401
402	##---------------------------------------------------------------------------##
403
404	sub preserve_space {
405	my($str) = shift;
406
407	1 while
408	$str =~ s/^([^\t])(\t+)/$1 . ' ' x (length($2) 8 - length($1) % 8)/e;
409	$str =~ s/ /\ /g;
410	$str;
411	}
412
413	##---------------------------------------------------------------------------##
414
415	sub break_line {
416	my($str) = shift;
417	my($width) = shift;
418	my($q, $new) = ('', '');
419	my($try, $trywidth, $len);
420
421	## Translate tabs to spaces
422	1 while
423	$str =~ s/^([^\t])(\t+)/$1 . ' ' x (length($2) 8 - length($1) % 8)/e;
424
425	## Do nothing if str <= width
426	return $str if length($str) <= $width;
427
428	## See if str begins with a quote char
429	if ($str =~ s/^( ?$QuoteChars)//o) {
430	$q = $1;
431	--$width;
432	}
433
434	## Create new string by breaking up str
435	while ($str ne "") {
436
437	# If $str less than width, break out
438	if (length($str) <= $width) {
439	$new .= $q . $str;
440	last;
441	}
442
443	# handle case where no-whitespace line larger than width
444	if (($str =~ /^(\S+)/) && (($len = length($1)) >= $width)) {
445	$new .= $q . $1;
446	substr($str, 0, $len) = "";
447	next;
448	}
449
450	# Break string at whitespace
451	$try = '';
452	$trywidth = $width;
453	$try = substr($str, 0, $trywidth);
454	if ($try =~ /(\S+)$/) {
455	$trywidth -= length($1);
456	$new .= $q . substr($str, 0, $trywidth);
457	} else {
458	$new .= $q . $try;
459	}
460	substr($str, 0, $trywidth) = '';
461
462	} continue {
463	$new .= "\n" if $str;
464	}
465	$new;
466	}
467
468	##---------------------------------------------------------------------------##
469	1;