git.subgeniuskitty.com - OpenSPARC-T2-DV/.git/blame_incremental - tools/perl-5.8.0/lib/site

... / ...

Commit	Line	Data
	1	##---------------------------------------------------------------------------##
	2	## File:
	3	## $Id: mhtxtplain.pl,v 2.24 2002/10/10 22:27:19 ehood Exp $
	4	## Author:
	5	## Earl Hood mhonarc@mhonarc.org
	6	## Description:
	7	## Library defines routine to filter text/plain body parts to HTML
	8	## for MHonArc.
	9	## Filter routine can be registered with the following:
	10	## <MIMEFILTERS>
	11	## text/plain:m2h_text_plain'filter:mhtxtplain.pl
	12	## </MIMEFILTERS>
	13	##---------------------------------------------------------------------------##
	14	## MHonArc -- Internet mail-to-HTML converter
	15	## Copyright (C) 1995-2001 Earl Hood, mhonarc@mhonarc.org
	16	##
	17	## This program is free software; you can redistribute it and/or modify
	18	## it under the terms of the GNU General Public License as published by
	19	## the Free Software Foundation; either version 2 of the License, or
	20	## (at your option) any later version.
	21	##
	22	## This program is distributed in the hope that it will be useful,
	23	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	24	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	25	## GNU General Public License for more details.
	26	##
	27	## You should have received a copy of the GNU General Public License
	28	## along with this program; if not, write to the Free Software
	29	## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
	30	## 02111-1307, USA
	31	##---------------------------------------------------------------------------##
	32
	33	package m2h_text_plain;
	34
	35	require 'readmail.pl';
	36
	37	$Url = '(http://\|https://\|ftp://\|afs://\|wais://\|telnet://\|ldap://' .
	38	'\|gopher://\|news:\|nntp:\|mid:\|cid:\|mailto:\|prospero:)';
	39	$UrlExp = $Url . q/[^\s\\|<>"']*[^\.?!;,"'\\|\[\]\s<>]/;
	40	$HUrlExp = $Url . q/(?:&(?![gl]t;)\|[^\s\\|<>"'\&])+/ .
	41	q/[^\.?!;,"'\\|\[\]\s<>\&]/;
	42	$QuoteChars = '[>\\|\]+:]';
	43	$HQuoteChars = '>\|[\\|\]+:]';
	44
	45	$StartFlowedQuote =
	46	'<blockquote style="border-left: #0000FF solid 0.1em; '.
	47	'margin-left: 0.0em; padding-left: 1.0em">';
	48	$EndFlowedQuote = "</blockquote>";
	49
	50	##---------------------------------------------------------------------------##
	51	## Text/plain filter for mhonarc. The following filter arguments
	52	## are recognized ($args):
	53	##
	54	## asis=set1:set2:...
	55	## Colon separated lists of charsets to leave as-is.
	56	## Only HTML special characters will be converted into
	57	## entities. The default value is "us-ascii:iso-8859-1".
	58	##
	59	## attachcheck Honor attachment disposition. By default,
	60	## all text/plain data is displayed inline on
	61	## the message page. If attachcheck is specified
	62	## and Content-Disposition specifies the data as
	63	## an attachment, the data is saved to a file
	64	## with a link to it from the message page.
	65	##
	66	## default=set Default charset to use if not set.
	67	##
	68	## inlineexts="ext1,ext2,..."
	69	## A comma separated list of message specified filename
	70	## extensions to treat as inline data.
	71	## Applicable only when uudecode options specified.
	72	##
	73	## htmlcheck Check if message is actually an HTML message
	74	## (to get around abhorrent MUAs). The message
	75	## is treated as HTML if the first non-whitespace
	76	## data looks like the start of an HTML document.
	77	##
	78	## keepspace Preserve whitespace if nonfixed
	79	##
	80	## nourl Do hyperlink URLs
	81	##
	82	## nonfixed Use normal typeface
	83	##
	84	## maxwidth=# Set the maximum width of lines. Lines exceeding
	85	## the maxwidth will be broken up across multiple lines.
	86	##
	87	## quote Italicize quoted message text
	88	##
	89	## target=name Set TARGET attribute for links if converting URLs
	90	## to links. Defaults to _top.
	91	##
	92	## usename Use filename specified in uuencoded data when
	93	## converting uuencoded data. This option is only
	94	## applicable of uudecode is specified.
	95	##
	96	## uudecode Decoded any embedded uuencoded data.
	97	##
	98	## All arguments should be separated by at least one space
	99	##
	100	sub filter {
	101	my($fields, $data, $isdecode, $args) = @_;
	102	local($_);
	103
	104	## Parse arguments
	105	$args = "" unless defined($args);
	106
	107	## Check if content-disposition should be checked
	108	if ($args =~ /\battachcheck\b/i) {
	109	my($disp, $nameparm) = readmail::MAILhead_get_disposition($fields);
	110	if ($disp =~ /\battachment\b/i) {
	111	require 'mhexternal.pl';
	112	return (m2h_external::filter(
	113	$fields, $data, $isdecode,
	114	readmail::get_filter_args('m2h_external::filter')));
	115	}
	116	}
	117
	118	## Check if decoding uuencoded data. The implementation chosen here
	119	## for decoding uuencoded data was done so when uudecode is not
	120	## specified, there is no extra overhead (besides the $args check for
	121	## uudecode). However, when uudecode is specified, more overhead may
	122	## exist over other potential implementations.
	123	## I.e. We only try to penalize performance when uudecode is specified.
	124	if ($args =~ s/\buudecode\b//ig) {
	125	# $args has uudecode stripped out for recursive calls
	126
	127	# Make sure we have needed routines
	128	my $decoder = readmail::load_decoder("uuencode");
	129	if (!defined($decoder) \|\| !defined(&$decoder)) {
	130	require 'base64.pl';
	131	$decoder = \&base64::uudecode;
	132	}
	133	require 'mhmimetypes.pl';
	134
	135	# Grab any filename extensions that imply inlining
	136	my $inlineexts = '';
	137	if ($args =~ /\binlineexts=(\S+)/) {
	138	$inlineexts = ',' . lc($1) . ',';
	139	$inlineexts =~ s/['"]//g;
	140	}
	141	my $usename = $args =~ /\busename\b/;
	142
	143	my($pdata); # have to use local() since typeglobs used
	144	my($inext, $uddata, $file, $urlfile);
	145	my @files = ( );
	146	my $ret = "";
	147	my $i = 0;
	148
	149	# <CR><LF> => <LF> to make parsing easier
	150	$$data =~ s/\r\n/\n/g;
	151
	152	# Split on uuencoded data. For text portions, recursively call
	153	# filter to convert text data: makes it easier to handle all
	154	# the various formatting options.
	155	foreach $pdata
	156	(split(/^(begin\s+\d\d\d\s+[^\n]+\n[!-M].*?\nend\n)/sm,
	157	$$data)) {
	158	if ($i % 2) { # uuencoded data
	159	# extract filename extension
	160	($file) = $pdata =~ /^begin\s+\d\d\d\s+([^\n]+)/;
	161	if ($file =~ /\.(\w+)$/) { $inext = $1; } else { $inext = ""; }
	162
	163	# decode data
	164	$uddata = &$decoder($pdata);
	165
	166	# save to file
	167	if (readmail::MAILis_excluded('application/octet-stream')) {
	168	$ret .= &$readmail::ExcludedPartFunc($file);
	169	} else {
	170	push(@files,
	171	mhonarc::write_attachment(
	172	'application/octet-stream', \$uddata, '',
	173	($usename?$file:''), $inext));
	174	$urlfile = mhonarc::htmlize($files[$#files]);
	175
	176	# create link to file
	177	if (index($inlineexts, ','.lc($inext).',') >= $[) {
	178	$ret .= qq\|<a href="$urlfile"><img src="$urlfile">\| .
	179	qq\|</a><br>\n\|;
	180	} else {
	181	$ret .= qq\|<a href="$urlfile">\| .
	182	mhonarc::htmlize($file) . qq\|</a><br>\n\|;
	183	}
	184	}
	185
	186	} elsif ($pdata =~ /\S/) { # plain text
	187	my(@subret) = filter($fields, \$pdata, $isdecode, $args);
	188	$ret .= shift @subret;
	189	push(@files, @subret);
	190	} else {
	191	# Make sure readmail thinks we processed
	192	$ret .= " ";
	193	}
	194	++$i;
	195	}
	196
	197	## Done with uudecode
	198	return ($ret, @files);
	199	}
	200
	201
	202	## Check for HTML data if requested
	203	if ($args =~ s/\bhtmlcheck\b//i &&
	204	$$data =~ /\A\s*<(?:html\b\|x-html\b\|!doctype\s+html\s)/i) {
	205	if (readmail::MAILis_excluded('text/html')) {
	206	return (&$readmail::ExcludedPartFunc('text/plain HTML'));
	207	}
	208	my $html_filter = readmail::load_filter('text/html');
	209	if (defined($html_filter) && defined(&$html_filter)) {
	210	return (&$html_filter($fields, $data, $isdecode,
	211	readmail::get_filter_args(
	212	'text/html', 'text/*', $html_filter)));
	213	} else {
	214	require 'mhtxthtml.pl';
	215	return (m2h_text_html::filter($fields, $data, $isdecode,
	216	readmail::get_filter_args(
	217	'text/html', 'text/*', 'm2h_text_html::filter')));
	218	}
	219	}
	220
	221	my($charset, $nourl, $doquote, $igncharset, $nonfixed, $textformat,
	222	$keepspace, $maxwidth, $target, $defset, $xhtml);
	223	my(%asis) = ( );
	224
	225	$nourl = ($mhonarc::NOURL \|\| ($args =~ /\bnourl\b/i));
	226	$doquote = ($args =~ /\bquote\b/i);
	227	$nonfixed = ($args =~ /\bnonfixed\b/i);
	228	$keepspace = ($args =~ /\bkeepspace\b/i);
	229	if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
	230	else { $maxwidth = 0; }
	231	if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
	232	else { $defset = 'us-ascii'; }
	233	$target = "";
	234	if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
	235	elsif ($args =~ /\btarget=(\S+)/i) { $target = $1; }
	236	$target =~ s/['"]//g;
	237	if ($target) {
	238	$target = qq/target="$target"/;
	239	}
	240	$defset =~ s/['"\s]//g;
	241
	242	## Grab charset parameter (if defined)
	243	if ( defined($fields->{'content-type'}[0]) and
	244	$fields->{'content-type'}[0] =~ /\bcharset\s=\s([^\s;]+)/i ) {
	245	$charset = lc $1;
	246	$charset =~ s/['";\s]//g;
	247	} else {
	248	$charset = $defset;
	249	}
	250	## Grab format parameter (if defined)
	251	if ( defined($fields->{'content-type'}[0]) and
	252	$fields->{'content-type'}[0] =~ /\bformat\s=\s([^\s;]+)/i ) {
	253	$textformat = lc $1;
	254	$textformat =~ s/['";\s]//g;
	255	} else {
	256	$textformat = "fixed";
	257	}
	258
	259	## Check if certain charsets should be left alone
	260	if ($args =~ /\basis=(\S+)/i) {
	261	my $t = lc $1; $t =~ s/['"]//g;
	262	local($_); foreach (split(':', $t)) { $asis{$_} = 1; }
	263	}
	264
	265	## Check MIMECharSetConverters if charset should be left alone
	266	my($charcnv, $real_charset_name) =
	267	readmail::MAILload_charset_converter($charset);
	268	if (defined($charcnv) && $charcnv eq '-decode-') {
	269	$asis{$charset} = 1;
	270	}
	271
	272	## Check if max-width set
	273	if ($maxwidth && $textformat eq 'fixed') {
	274	$$data =~ s/^(.*)$/&break_line($1, $maxwidth)/gem;
	275	}
	276
	277	## Convert data according to charset
	278	if (!$asis{$charset}) {
	279	# Registered in CHARSETCONVERTERS
	280	if (defined($charcnv) && defined(&$charcnv)) {
	281	$$data = &$charcnv($$data, $real_charset_name);
	282
	283	# Other
	284	} else {
	285	warn qq/\n/,
	286	qq/Warning: Unrecognized character set: $charset\n/,
	287	qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
	288	qq/ Message Number: $mhonarc::MHAmsgnum\n/;
	289	esc_chars_inplace($data);
	290	}
	291
	292	} else {
	293	esc_chars_inplace($data);
	294	}
	295
	296	if ($textformat eq 'flowed') {
	297	# Initial code for format=flowed contributed by Ken Hirsch (May 2002).
	298	# text/plain; format=flowed defined in RFC2646
	299
	300	my $currdepth = 0;
	301	my $ret='';
	302	$$data =~ s!^</?x-flowed>\r?\n>!!mg;
	303	while (length($$data)) {
	304	$$data =~ /^((?:>)*)/;
	305	my $qd = $1;
	306	if ($$data =~ s/^(.(?:(?:\n\|\r\n?)$qd(?!>).)*\n?)//) {
	307	# divide message into chunks by "quote-depth",
	308	# which is the number of leading > signs
	309	my $chunk = $1;
	310	$chunk =~ s/^$qd ?//mg; # N.B. also takes care of
	311	# space-stuffing
	312	$chunk =~ s/^-- $/--/mg; # special case for '-- '
	313
	314	if ($chunk =~ / \r?\n/) {
	315	# Treat this chunk as format=flowed
	316	# Lines that end with spaces are
	317	# considered to have soft line breaks.
	318	# Lines that end with no spaces are
	319	# considered to have hard line breaks.
	320	# XXX: Negative look-behind assertion not supported
	321	# on older versions of Perl 5 (<5.6)
	322	#$chunk =~ s/(?<! )(\r?\n\|\Z)/<br>$1/g;
	323	$chunk =~ s/(^\|[^ ])(\r?\n\|\Z)/$1<br>$2/mg;
	324
	325	} else {
	326	# Treat this chunk as format=fixed
	327	if ($nonfixed) {
	328	$chunk =~ s/(\r?\n)/<br>$1/g;
	329	if ($keepspace) {
	330	$chunk =~ s/^(.*)$/&preserve_space($1)/gem;
	331	}
	332	} else {
	333	$chunk = "<pre>" . $chunk . "</pre>\n";
	334	}
	335	}
	336	my $newdepth = length($qd)/length('>');
	337	if ($currdepth < $newdepth) {
	338	$chunk = $StartFlowedQuote x
	339	($newdepth - $currdepth) . $chunk;
	340	} elsif ($currdepth > $newdepth) {
	341	$chunk = $EndFlowedQuote x
	342	($currdepth - $newdepth) . $chunk;
	343	}
	344	$currdepth = $newdepth;
	345	$ret .= $chunk;
	346
	347	} else {
	348	# The above regex should always match, but just in case...
	349	warn qq/\n/,
	350	qq/Warning: Dequoting problem with format=flowed data\n/,
	351	qq/ Message-Id: <$MHAmsgid>\n/,
	352	qq/ Message Number: $MHAmsgnum\n/;
	353	$ret .= $$data;
	354	last;
	355	}
	356	}
	357	if ($currdepth > 0) {
	358	$ret .= $EndFlowedQuote x $currdepth;
	359	}
	360
	361	## Post-processing cleanup: makes things look nicer
	362	$ret =~ s/<br><\/blockquote>/<\/blockquote>/g;
	363	$ret =~ s/<\/blockquote><br>/<\/blockquote>/g;
	364
	365	$$data = $ret;
	366
	367	} else {
	368	## Check for quoting
	369	if ($doquote) {
	370	$$data =~ s@^( ?${HQuoteChars})(.*)$@$1<i>$2</i>@gom;
	371	}
	372
	373	## Check if using nonfixed font
	374	if ($nonfixed) {
	375	$$data =~ s/(\r?\n)/<br>$1/g;
	376	if ($keepspace) {
	377	$$data =~ s/^(.*)$/&preserve_space($1)/gem;
	378	}
	379	} else {
	380	$$data = "<pre>" . $$data . "</pre>\n";
	381	}
	382	}
	383
	384	## Convert URLs to hyperlinks
	385	$$data =~ s@($HUrlExp)@<a $target href="$1">$1</a>@gio
	386	unless $nourl;
	387
	388	($$data);
	389	}
	390
	391	##---------------------------------------------------------------------------##
	392
	393	sub esc_chars_inplace {
	394	my($foo) = shift;
	395	$$foo =~ s/&/&/g;
	396	$$foo =~ s/</</g;
	397	$$foo =~ s/>/>/g;
	398	$$foo =~ s/"/"/g;
	399	1;
	400	}
	401
	402	##---------------------------------------------------------------------------##
	403
	404	sub preserve_space {
	405	my($str) = shift;
	406
	407	1 while
	408	$str =~ s/^([^\t])(\t+)/$1 . ' ' x (length($2) 8 - length($1) % 8)/e;
	409	$str =~ s/ /\ /g;
	410	$str;
	411	}
	412
	413	##---------------------------------------------------------------------------##
	414
	415	sub break_line {
	416	my($str) = shift;
	417	my($width) = shift;
	418	my($q, $new) = ('', '');
	419	my($try, $trywidth, $len);
	420
	421	## Translate tabs to spaces
	422	1 while
	423	$str =~ s/^([^\t])(\t+)/$1 . ' ' x (length($2) 8 - length($1) % 8)/e;
	424
	425	## Do nothing if str <= width
	426	return $str if length($str) <= $width;
	427
	428	## See if str begins with a quote char
	429	if ($str =~ s/^( ?$QuoteChars)//o) {
	430	$q = $1;
	431	--$width;
	432	}
	433
	434	## Create new string by breaking up str
	435	while ($str ne "") {
	436
	437	# If $str less than width, break out
	438	if (length($str) <= $width) {
	439	$new .= $q . $str;
	440	last;
	441	}
	442
	443	# handle case where no-whitespace line larger than width
	444	if (($str =~ /^(\S+)/) && (($len = length($1)) >= $width)) {
	445	$new .= $q . $1;
	446	substr($str, 0, $len) = "";
	447	next;
	448	}
	449
	450	# Break string at whitespace
	451	$try = '';
	452	$trywidth = $width;
	453	$try = substr($str, 0, $trywidth);
	454	if ($try =~ /(\S+)$/) {
	455	$trywidth -= length($1);
	456	$new .= $q . substr($str, 0, $trywidth);
	457	} else {
	458	$new .= $q . $try;
	459	}
	460	substr($str, 0, $trywidth) = '';
	461
	462	} continue {
	463	$new .= "\n" if $str;
	464	}
	465	$new;
	466	}
	467
	468	##---------------------------------------------------------------------------##
	469	1;