Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / mhtxtplain.pl
##---------------------------------------------------------------------------##
## File:
## $Id: mhtxtplain.pl,v 2.24 2002/10/10 22:27:19 ehood Exp $
## Author:
## Earl Hood mhonarc@mhonarc.org
## Description:
## Library defines routine to filter text/plain body parts to HTML
## for MHonArc.
## Filter routine can be registered with the following:
## <MIMEFILTERS>
## text/plain:m2h_text_plain'filter:mhtxtplain.pl
## </MIMEFILTERS>
##---------------------------------------------------------------------------##
## MHonArc -- Internet mail-to-HTML converter
## Copyright (C) 1995-2001 Earl Hood, mhonarc@mhonarc.org
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
## 02111-1307, USA
##---------------------------------------------------------------------------##
package m2h_text_plain;
require 'readmail.pl';
$Url = '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' .
'|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)';
$UrlExp = $Url . q/[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]/;
$HUrlExp = $Url . q/(?:&(?![gl]t;)|[^\s\(\)\|<>"'\&])+/ .
q/[^\.?!;,"'\|\[\]\(\)\s<>\&]/;
$QuoteChars = '[>\|\]+:]';
$HQuoteChars = '&gt;|[\|\]+:]';
$StartFlowedQuote =
'<blockquote style="border-left: #0000FF solid 0.1em; '.
'margin-left: 0.0em; padding-left: 1.0em">';
$EndFlowedQuote = "</blockquote>";
##---------------------------------------------------------------------------##
## Text/plain filter for mhonarc. The following filter arguments
## are recognized ($args):
##
## asis=set1:set2:...
## Colon separated lists of charsets to leave as-is.
## Only HTML special characters will be converted into
## entities. The default value is "us-ascii:iso-8859-1".
##
## attachcheck Honor attachment disposition. By default,
## all text/plain data is displayed inline on
## the message page. If attachcheck is specified
## and Content-Disposition specifies the data as
## an attachment, the data is saved to a file
## with a link to it from the message page.
##
## default=set Default charset to use if not set.
##
## inlineexts="ext1,ext2,..."
## A comma separated list of message specified filename
## extensions to treat as inline data.
## Applicable only when uudecode options specified.
##
## htmlcheck Check if message is actually an HTML message
## (to get around abhorrent MUAs). The message
## is treated as HTML if the first non-whitespace
## data looks like the start of an HTML document.
##
## keepspace Preserve whitespace if nonfixed
##
## nourl Do hyperlink URLs
##
## nonfixed Use normal typeface
##
## maxwidth=# Set the maximum width of lines. Lines exceeding
## the maxwidth will be broken up across multiple lines.
##
## quote Italicize quoted message text
##
## target=name Set TARGET attribute for links if converting URLs
## to links. Defaults to _top.
##
## usename Use filename specified in uuencoded data when
## converting uuencoded data. This option is only
## applicable of uudecode is specified.
##
## uudecode Decoded any embedded uuencoded data.
##
## All arguments should be separated by at least one space
##
sub filter {
my($fields, $data, $isdecode, $args) = @_;
local($_);
## Parse arguments
$args = "" unless defined($args);
## Check if content-disposition should be checked
if ($args =~ /\battachcheck\b/i) {
my($disp, $nameparm) = readmail::MAILhead_get_disposition($fields);
if ($disp =~ /\battachment\b/i) {
require 'mhexternal.pl';
return (m2h_external::filter(
$fields, $data, $isdecode,
readmail::get_filter_args('m2h_external::filter')));
}
}
## Check if decoding uuencoded data. The implementation chosen here
## for decoding uuencoded data was done so when uudecode is not
## specified, there is no extra overhead (besides the $args check for
## uudecode). However, when uudecode is specified, more overhead may
## exist over other potential implementations.
## I.e. We only try to penalize performance when uudecode is specified.
if ($args =~ s/\buudecode\b//ig) {
# $args has uudecode stripped out for recursive calls
# Make sure we have needed routines
my $decoder = readmail::load_decoder("uuencode");
if (!defined($decoder) || !defined(&$decoder)) {
require 'base64.pl';
$decoder = \&base64::uudecode;
}
require 'mhmimetypes.pl';
# Grab any filename extensions that imply inlining
my $inlineexts = '';
if ($args =~ /\binlineexts=(\S+)/) {
$inlineexts = ',' . lc($1) . ',';
$inlineexts =~ s/['"]//g;
}
my $usename = $args =~ /\busename\b/;
my($pdata); # have to use local() since typeglobs used
my($inext, $uddata, $file, $urlfile);
my @files = ( );
my $ret = "";
my $i = 0;
# <CR><LF> => <LF> to make parsing easier
$$data =~ s/\r\n/\n/g;
# Split on uuencoded data. For text portions, recursively call
# filter to convert text data: makes it easier to handle all
# the various formatting options.
foreach $pdata
(split(/^(begin\s+\d\d\d\s+[^\n]+\n[!-M].*?\nend\n)/sm,
$$data)) {
if ($i % 2) { # uuencoded data
# extract filename extension
($file) = $pdata =~ /^begin\s+\d\d\d\s+([^\n]+)/;
if ($file =~ /\.(\w+)$/) { $inext = $1; } else { $inext = ""; }
# decode data
$uddata = &$decoder($pdata);
# save to file
if (readmail::MAILis_excluded('application/octet-stream')) {
$ret .= &$readmail::ExcludedPartFunc($file);
} else {
push(@files,
mhonarc::write_attachment(
'application/octet-stream', \$uddata, '',
($usename?$file:''), $inext));
$urlfile = mhonarc::htmlize($files[$#files]);
# create link to file
if (index($inlineexts, ','.lc($inext).',') >= $[) {
$ret .= qq|<a href="$urlfile"><img src="$urlfile">| .
qq|</a><br>\n|;
} else {
$ret .= qq|<a href="$urlfile">| .
mhonarc::htmlize($file) . qq|</a><br>\n|;
}
}
} elsif ($pdata =~ /\S/) { # plain text
my(@subret) = filter($fields, \$pdata, $isdecode, $args);
$ret .= shift @subret;
push(@files, @subret);
} else {
# Make sure readmail thinks we processed
$ret .= " ";
}
++$i;
}
## Done with uudecode
return ($ret, @files);
}
## Check for HTML data if requested
if ($args =~ s/\bhtmlcheck\b//i &&
$$data =~ /\A\s*<(?:html\b|x-html\b|!doctype\s+html\s)/i) {
if (readmail::MAILis_excluded('text/html')) {
return (&$readmail::ExcludedPartFunc('text/plain HTML'));
}
my $html_filter = readmail::load_filter('text/html');
if (defined($html_filter) && defined(&$html_filter)) {
return (&$html_filter($fields, $data, $isdecode,
readmail::get_filter_args(
'text/html', 'text/*', $html_filter)));
} else {
require 'mhtxthtml.pl';
return (m2h_text_html::filter($fields, $data, $isdecode,
readmail::get_filter_args(
'text/html', 'text/*', 'm2h_text_html::filter')));
}
}
my($charset, $nourl, $doquote, $igncharset, $nonfixed, $textformat,
$keepspace, $maxwidth, $target, $defset, $xhtml);
my(%asis) = ( );
$nourl = ($mhonarc::NOURL || ($args =~ /\bnourl\b/i));
$doquote = ($args =~ /\bquote\b/i);
$nonfixed = ($args =~ /\bnonfixed\b/i);
$keepspace = ($args =~ /\bkeepspace\b/i);
if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
else { $maxwidth = 0; }
if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
else { $defset = 'us-ascii'; }
$target = "";
if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
elsif ($args =~ /\btarget=(\S+)/i) { $target = $1; }
$target =~ s/['"]//g;
if ($target) {
$target = qq/target="$target"/;
}
$defset =~ s/['"\s]//g;
## Grab charset parameter (if defined)
if ( defined($fields->{'content-type'}[0]) and
$fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i ) {
$charset = lc $1;
$charset =~ s/['";\s]//g;
} else {
$charset = $defset;
}
## Grab format parameter (if defined)
if ( defined($fields->{'content-type'}[0]) and
$fields->{'content-type'}[0] =~ /\bformat\s*=\s*([^\s;]+)/i ) {
$textformat = lc $1;
$textformat =~ s/['";\s]//g;
} else {
$textformat = "fixed";
}
## Check if certain charsets should be left alone
if ($args =~ /\basis=(\S+)/i) {
my $t = lc $1; $t =~ s/['"]//g;
local($_); foreach (split(':', $t)) { $asis{$_} = 1; }
}
## Check MIMECharSetConverters if charset should be left alone
my($charcnv, $real_charset_name) =
readmail::MAILload_charset_converter($charset);
if (defined($charcnv) && $charcnv eq '-decode-') {
$asis{$charset} = 1;
}
## Check if max-width set
if ($maxwidth && $textformat eq 'fixed') {
$$data =~ s/^(.*)$/&break_line($1, $maxwidth)/gem;
}
## Convert data according to charset
if (!$asis{$charset}) {
# Registered in CHARSETCONVERTERS
if (defined($charcnv) && defined(&$charcnv)) {
$$data = &$charcnv($$data, $real_charset_name);
# Other
} else {
warn qq/\n/,
qq/Warning: Unrecognized character set: $charset\n/,
qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
qq/ Message Number: $mhonarc::MHAmsgnum\n/;
esc_chars_inplace($data);
}
} else {
esc_chars_inplace($data);
}
if ($textformat eq 'flowed') {
# Initial code for format=flowed contributed by Ken Hirsch (May 2002).
# text/plain; format=flowed defined in RFC2646
my $currdepth = 0;
my $ret='';
$$data =~ s!^</?x-flowed>\r?\n>!!mg;
while (length($$data)) {
$$data =~ /^((?:&gt;)*)/;
my $qd = $1;
if ($$data =~ s/^(.*(?:(?:\n|\r\n?)$qd(?!&gt;).*)*\n?)//) {
# divide message into chunks by "quote-depth",
# which is the number of leading > signs
my $chunk = $1;
$chunk =~ s/^$qd ?//mg; # N.B. also takes care of
# space-stuffing
$chunk =~ s/^-- $/--/mg; # special case for '-- '
if ($chunk =~ / \r?\n/) {
# Treat this chunk as format=flowed
# Lines that end with spaces are
# considered to have soft line breaks.
# Lines that end with no spaces are
# considered to have hard line breaks.
# XXX: Negative look-behind assertion not supported
# on older versions of Perl 5 (<5.6)
#$chunk =~ s/(?<! )(\r?\n|\Z)/<br>$1/g;
$chunk =~ s/(^|[^ ])(\r?\n|\Z)/$1<br>$2/mg;
} else {
# Treat this chunk as format=fixed
if ($nonfixed) {
$chunk =~ s/(\r?\n)/<br>$1/g;
if ($keepspace) {
$chunk =~ s/^(.*)$/&preserve_space($1)/gem;
}
} else {
$chunk = "<pre>" . $chunk . "</pre>\n";
}
}
my $newdepth = length($qd)/length('&gt;');
if ($currdepth < $newdepth) {
$chunk = $StartFlowedQuote x
($newdepth - $currdepth) . $chunk;
} elsif ($currdepth > $newdepth) {
$chunk = $EndFlowedQuote x
($currdepth - $newdepth) . $chunk;
}
$currdepth = $newdepth;
$ret .= $chunk;
} else {
# The above regex should always match, but just in case...
warn qq/\n/,
qq/Warning: Dequoting problem with format=flowed data\n/,
qq/ Message-Id: <$MHAmsgid>\n/,
qq/ Message Number: $MHAmsgnum\n/;
$ret .= $$data;
last;
}
}
if ($currdepth > 0) {
$ret .= $EndFlowedQuote x $currdepth;
}
## Post-processing cleanup: makes things look nicer
$ret =~ s/<br><\/blockquote>/<\/blockquote>/g;
$ret =~ s/<\/blockquote><br>/<\/blockquote>/g;
$$data = $ret;
} else {
## Check for quoting
if ($doquote) {
$$data =~ s@^( ?${HQuoteChars})(.*)$@$1<i>$2</i>@gom;
}
## Check if using nonfixed font
if ($nonfixed) {
$$data =~ s/(\r?\n)/<br>$1/g;
if ($keepspace) {
$$data =~ s/^(.*)$/&preserve_space($1)/gem;
}
} else {
$$data = "<pre>" . $$data . "</pre>\n";
}
}
## Convert URLs to hyperlinks
$$data =~ s@($HUrlExp)@<a $target href="$1">$1</a>@gio
unless $nourl;
($$data);
}
##---------------------------------------------------------------------------##
sub esc_chars_inplace {
my($foo) = shift;
$$foo =~ s/&/&amp;/g;
$$foo =~ s/</&lt;/g;
$$foo =~ s/>/&gt;/g;
$$foo =~ s/"/&quot;/g;
1;
}
##---------------------------------------------------------------------------##
sub preserve_space {
my($str) = shift;
1 while
$str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e;
$str =~ s/ /\&nbsp;/g;
$str;
}
##---------------------------------------------------------------------------##
sub break_line {
my($str) = shift;
my($width) = shift;
my($q, $new) = ('', '');
my($try, $trywidth, $len);
## Translate tabs to spaces
1 while
$str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e;
## Do nothing if str <= width
return $str if length($str) <= $width;
## See if str begins with a quote char
if ($str =~ s/^( ?$QuoteChars)//o) {
$q = $1;
--$width;
}
## Create new string by breaking up str
while ($str ne "") {
# If $str less than width, break out
if (length($str) <= $width) {
$new .= $q . $str;
last;
}
# handle case where no-whitespace line larger than width
if (($str =~ /^(\S+)/) && (($len = length($1)) >= $width)) {
$new .= $q . $1;
substr($str, 0, $len) = "";
next;
}
# Break string at whitespace
$try = '';
$trywidth = $width;
$try = substr($str, 0, $trywidth);
if ($try =~ /(\S+)$/) {
$trywidth -= length($1);
$new .= $q . substr($str, 0, $trywidth);
} else {
$new .= $q . $try;
}
substr($str, 0, $trywidth) = '';
} continue {
$new .= "\n" if $str;
}
$new;
}
##---------------------------------------------------------------------------##
1;