Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / mhtxtplain.pl
CommitLineData
86530b38
AT
1##---------------------------------------------------------------------------##
2## File:
3## $Id: mhtxtplain.pl,v 2.24 2002/10/10 22:27:19 ehood Exp $
4## Author:
5## Earl Hood mhonarc@mhonarc.org
6## Description:
7## Library defines routine to filter text/plain body parts to HTML
8## for MHonArc.
9## Filter routine can be registered with the following:
10## <MIMEFILTERS>
11## text/plain:m2h_text_plain'filter:mhtxtplain.pl
12## </MIMEFILTERS>
13##---------------------------------------------------------------------------##
14## MHonArc -- Internet mail-to-HTML converter
15## Copyright (C) 1995-2001 Earl Hood, mhonarc@mhonarc.org
16##
17## This program is free software; you can redistribute it and/or modify
18## it under the terms of the GNU General Public License as published by
19## the Free Software Foundation; either version 2 of the License, or
20## (at your option) any later version.
21##
22## This program is distributed in the hope that it will be useful,
23## but WITHOUT ANY WARRANTY; without even the implied warranty of
24## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25## GNU General Public License for more details.
26##
27## You should have received a copy of the GNU General Public License
28## along with this program; if not, write to the Free Software
29## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
30## 02111-1307, USA
31##---------------------------------------------------------------------------##
32
33package m2h_text_plain;
34
35require 'readmail.pl';
36
37$Url = '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' .
38 '|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)';
39$UrlExp = $Url . q/[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]/;
40$HUrlExp = $Url . q/(?:&(?![gl]t;)|[^\s\(\)\|<>"'\&])+/ .
41 q/[^\.?!;,"'\|\[\]\(\)\s<>\&]/;
42$QuoteChars = '[>\|\]+:]';
43$HQuoteChars = '&gt;|[\|\]+:]';
44
45$StartFlowedQuote =
46 '<blockquote style="border-left: #0000FF solid 0.1em; '.
47 'margin-left: 0.0em; padding-left: 1.0em">';
48$EndFlowedQuote = "</blockquote>";
49
50##---------------------------------------------------------------------------##
51## Text/plain filter for mhonarc. The following filter arguments
52## are recognized ($args):
53##
54## asis=set1:set2:...
55## Colon separated lists of charsets to leave as-is.
56## Only HTML special characters will be converted into
57## entities. The default value is "us-ascii:iso-8859-1".
58##
59## attachcheck Honor attachment disposition. By default,
60## all text/plain data is displayed inline on
61## the message page. If attachcheck is specified
62## and Content-Disposition specifies the data as
63## an attachment, the data is saved to a file
64## with a link to it from the message page.
65##
66## default=set Default charset to use if not set.
67##
68## inlineexts="ext1,ext2,..."
69## A comma separated list of message specified filename
70## extensions to treat as inline data.
71## Applicable only when uudecode options specified.
72##
73## htmlcheck Check if message is actually an HTML message
74## (to get around abhorrent MUAs). The message
75## is treated as HTML if the first non-whitespace
76## data looks like the start of an HTML document.
77##
78## keepspace Preserve whitespace if nonfixed
79##
80## nourl Do hyperlink URLs
81##
82## nonfixed Use normal typeface
83##
84## maxwidth=# Set the maximum width of lines. Lines exceeding
85## the maxwidth will be broken up across multiple lines.
86##
87## quote Italicize quoted message text
88##
89## target=name Set TARGET attribute for links if converting URLs
90## to links. Defaults to _top.
91##
92## usename Use filename specified in uuencoded data when
93## converting uuencoded data. This option is only
94## applicable of uudecode is specified.
95##
96## uudecode Decoded any embedded uuencoded data.
97##
98## All arguments should be separated by at least one space
99##
100sub filter {
101 my($fields, $data, $isdecode, $args) = @_;
102 local($_);
103
104 ## Parse arguments
105 $args = "" unless defined($args);
106
107 ## Check if content-disposition should be checked
108 if ($args =~ /\battachcheck\b/i) {
109 my($disp, $nameparm) = readmail::MAILhead_get_disposition($fields);
110 if ($disp =~ /\battachment\b/i) {
111 require 'mhexternal.pl';
112 return (m2h_external::filter(
113 $fields, $data, $isdecode,
114 readmail::get_filter_args('m2h_external::filter')));
115 }
116 }
117
118 ## Check if decoding uuencoded data. The implementation chosen here
119 ## for decoding uuencoded data was done so when uudecode is not
120 ## specified, there is no extra overhead (besides the $args check for
121 ## uudecode). However, when uudecode is specified, more overhead may
122 ## exist over other potential implementations.
123 ## I.e. We only try to penalize performance when uudecode is specified.
124 if ($args =~ s/\buudecode\b//ig) {
125 # $args has uudecode stripped out for recursive calls
126
127 # Make sure we have needed routines
128 my $decoder = readmail::load_decoder("uuencode");
129 if (!defined($decoder) || !defined(&$decoder)) {
130 require 'base64.pl';
131 $decoder = \&base64::uudecode;
132 }
133 require 'mhmimetypes.pl';
134
135 # Grab any filename extensions that imply inlining
136 my $inlineexts = '';
137 if ($args =~ /\binlineexts=(\S+)/) {
138 $inlineexts = ',' . lc($1) . ',';
139 $inlineexts =~ s/['"]//g;
140 }
141 my $usename = $args =~ /\busename\b/;
142
143 my($pdata); # have to use local() since typeglobs used
144 my($inext, $uddata, $file, $urlfile);
145 my @files = ( );
146 my $ret = "";
147 my $i = 0;
148
149 # <CR><LF> => <LF> to make parsing easier
150 $$data =~ s/\r\n/\n/g;
151
152 # Split on uuencoded data. For text portions, recursively call
153 # filter to convert text data: makes it easier to handle all
154 # the various formatting options.
155 foreach $pdata
156 (split(/^(begin\s+\d\d\d\s+[^\n]+\n[!-M].*?\nend\n)/sm,
157 $$data)) {
158 if ($i % 2) { # uuencoded data
159 # extract filename extension
160 ($file) = $pdata =~ /^begin\s+\d\d\d\s+([^\n]+)/;
161 if ($file =~ /\.(\w+)$/) { $inext = $1; } else { $inext = ""; }
162
163 # decode data
164 $uddata = &$decoder($pdata);
165
166 # save to file
167 if (readmail::MAILis_excluded('application/octet-stream')) {
168 $ret .= &$readmail::ExcludedPartFunc($file);
169 } else {
170 push(@files,
171 mhonarc::write_attachment(
172 'application/octet-stream', \$uddata, '',
173 ($usename?$file:''), $inext));
174 $urlfile = mhonarc::htmlize($files[$#files]);
175
176 # create link to file
177 if (index($inlineexts, ','.lc($inext).',') >= $[) {
178 $ret .= qq|<a href="$urlfile"><img src="$urlfile">| .
179 qq|</a><br>\n|;
180 } else {
181 $ret .= qq|<a href="$urlfile">| .
182 mhonarc::htmlize($file) . qq|</a><br>\n|;
183 }
184 }
185
186 } elsif ($pdata =~ /\S/) { # plain text
187 my(@subret) = filter($fields, \$pdata, $isdecode, $args);
188 $ret .= shift @subret;
189 push(@files, @subret);
190 } else {
191 # Make sure readmail thinks we processed
192 $ret .= " ";
193 }
194 ++$i;
195 }
196
197 ## Done with uudecode
198 return ($ret, @files);
199 }
200
201
202 ## Check for HTML data if requested
203 if ($args =~ s/\bhtmlcheck\b//i &&
204 $$data =~ /\A\s*<(?:html\b|x-html\b|!doctype\s+html\s)/i) {
205 if (readmail::MAILis_excluded('text/html')) {
206 return (&$readmail::ExcludedPartFunc('text/plain HTML'));
207 }
208 my $html_filter = readmail::load_filter('text/html');
209 if (defined($html_filter) && defined(&$html_filter)) {
210 return (&$html_filter($fields, $data, $isdecode,
211 readmail::get_filter_args(
212 'text/html', 'text/*', $html_filter)));
213 } else {
214 require 'mhtxthtml.pl';
215 return (m2h_text_html::filter($fields, $data, $isdecode,
216 readmail::get_filter_args(
217 'text/html', 'text/*', 'm2h_text_html::filter')));
218 }
219 }
220
221 my($charset, $nourl, $doquote, $igncharset, $nonfixed, $textformat,
222 $keepspace, $maxwidth, $target, $defset, $xhtml);
223 my(%asis) = ( );
224
225 $nourl = ($mhonarc::NOURL || ($args =~ /\bnourl\b/i));
226 $doquote = ($args =~ /\bquote\b/i);
227 $nonfixed = ($args =~ /\bnonfixed\b/i);
228 $keepspace = ($args =~ /\bkeepspace\b/i);
229 if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
230 else { $maxwidth = 0; }
231 if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
232 else { $defset = 'us-ascii'; }
233 $target = "";
234 if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
235 elsif ($args =~ /\btarget=(\S+)/i) { $target = $1; }
236 $target =~ s/['"]//g;
237 if ($target) {
238 $target = qq/target="$target"/;
239 }
240 $defset =~ s/['"\s]//g;
241
242 ## Grab charset parameter (if defined)
243 if ( defined($fields->{'content-type'}[0]) and
244 $fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i ) {
245 $charset = lc $1;
246 $charset =~ s/['";\s]//g;
247 } else {
248 $charset = $defset;
249 }
250 ## Grab format parameter (if defined)
251 if ( defined($fields->{'content-type'}[0]) and
252 $fields->{'content-type'}[0] =~ /\bformat\s*=\s*([^\s;]+)/i ) {
253 $textformat = lc $1;
254 $textformat =~ s/['";\s]//g;
255 } else {
256 $textformat = "fixed";
257 }
258
259 ## Check if certain charsets should be left alone
260 if ($args =~ /\basis=(\S+)/i) {
261 my $t = lc $1; $t =~ s/['"]//g;
262 local($_); foreach (split(':', $t)) { $asis{$_} = 1; }
263 }
264
265 ## Check MIMECharSetConverters if charset should be left alone
266 my($charcnv, $real_charset_name) =
267 readmail::MAILload_charset_converter($charset);
268 if (defined($charcnv) && $charcnv eq '-decode-') {
269 $asis{$charset} = 1;
270 }
271
272 ## Check if max-width set
273 if ($maxwidth && $textformat eq 'fixed') {
274 $$data =~ s/^(.*)$/&break_line($1, $maxwidth)/gem;
275 }
276
277 ## Convert data according to charset
278 if (!$asis{$charset}) {
279 # Registered in CHARSETCONVERTERS
280 if (defined($charcnv) && defined(&$charcnv)) {
281 $$data = &$charcnv($$data, $real_charset_name);
282
283 # Other
284 } else {
285 warn qq/\n/,
286 qq/Warning: Unrecognized character set: $charset\n/,
287 qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
288 qq/ Message Number: $mhonarc::MHAmsgnum\n/;
289 esc_chars_inplace($data);
290 }
291
292 } else {
293 esc_chars_inplace($data);
294 }
295
296 if ($textformat eq 'flowed') {
297 # Initial code for format=flowed contributed by Ken Hirsch (May 2002).
298 # text/plain; format=flowed defined in RFC2646
299
300 my $currdepth = 0;
301 my $ret='';
302 $$data =~ s!^</?x-flowed>\r?\n>!!mg;
303 while (length($$data)) {
304 $$data =~ /^((?:&gt;)*)/;
305 my $qd = $1;
306 if ($$data =~ s/^(.*(?:(?:\n|\r\n?)$qd(?!&gt;).*)*\n?)//) {
307 # divide message into chunks by "quote-depth",
308 # which is the number of leading > signs
309 my $chunk = $1;
310 $chunk =~ s/^$qd ?//mg; # N.B. also takes care of
311 # space-stuffing
312 $chunk =~ s/^-- $/--/mg; # special case for '-- '
313
314 if ($chunk =~ / \r?\n/) {
315 # Treat this chunk as format=flowed
316 # Lines that end with spaces are
317 # considered to have soft line breaks.
318 # Lines that end with no spaces are
319 # considered to have hard line breaks.
320 # XXX: Negative look-behind assertion not supported
321 # on older versions of Perl 5 (<5.6)
322 #$chunk =~ s/(?<! )(\r?\n|\Z)/<br>$1/g;
323 $chunk =~ s/(^|[^ ])(\r?\n|\Z)/$1<br>$2/mg;
324
325 } else {
326 # Treat this chunk as format=fixed
327 if ($nonfixed) {
328 $chunk =~ s/(\r?\n)/<br>$1/g;
329 if ($keepspace) {
330 $chunk =~ s/^(.*)$/&preserve_space($1)/gem;
331 }
332 } else {
333 $chunk = "<pre>" . $chunk . "</pre>\n";
334 }
335 }
336 my $newdepth = length($qd)/length('&gt;');
337 if ($currdepth < $newdepth) {
338 $chunk = $StartFlowedQuote x
339 ($newdepth - $currdepth) . $chunk;
340 } elsif ($currdepth > $newdepth) {
341 $chunk = $EndFlowedQuote x
342 ($currdepth - $newdepth) . $chunk;
343 }
344 $currdepth = $newdepth;
345 $ret .= $chunk;
346
347 } else {
348 # The above regex should always match, but just in case...
349 warn qq/\n/,
350 qq/Warning: Dequoting problem with format=flowed data\n/,
351 qq/ Message-Id: <$MHAmsgid>\n/,
352 qq/ Message Number: $MHAmsgnum\n/;
353 $ret .= $$data;
354 last;
355 }
356 }
357 if ($currdepth > 0) {
358 $ret .= $EndFlowedQuote x $currdepth;
359 }
360
361 ## Post-processing cleanup: makes things look nicer
362 $ret =~ s/<br><\/blockquote>/<\/blockquote>/g;
363 $ret =~ s/<\/blockquote><br>/<\/blockquote>/g;
364
365 $$data = $ret;
366
367 } else {
368 ## Check for quoting
369 if ($doquote) {
370 $$data =~ s@^( ?${HQuoteChars})(.*)$@$1<i>$2</i>@gom;
371 }
372
373 ## Check if using nonfixed font
374 if ($nonfixed) {
375 $$data =~ s/(\r?\n)/<br>$1/g;
376 if ($keepspace) {
377 $$data =~ s/^(.*)$/&preserve_space($1)/gem;
378 }
379 } else {
380 $$data = "<pre>" . $$data . "</pre>\n";
381 }
382 }
383
384 ## Convert URLs to hyperlinks
385 $$data =~ s@($HUrlExp)@<a $target href="$1">$1</a>@gio
386 unless $nourl;
387
388 ($$data);
389}
390
391##---------------------------------------------------------------------------##
392
393sub esc_chars_inplace {
394 my($foo) = shift;
395 $$foo =~ s/&/&amp;/g;
396 $$foo =~ s/</&lt;/g;
397 $$foo =~ s/>/&gt;/g;
398 $$foo =~ s/"/&quot;/g;
399 1;
400}
401
402##---------------------------------------------------------------------------##
403
404sub preserve_space {
405 my($str) = shift;
406
407 1 while
408 $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e;
409 $str =~ s/ /\&nbsp;/g;
410 $str;
411}
412
413##---------------------------------------------------------------------------##
414
415sub break_line {
416 my($str) = shift;
417 my($width) = shift;
418 my($q, $new) = ('', '');
419 my($try, $trywidth, $len);
420
421 ## Translate tabs to spaces
422 1 while
423 $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e;
424
425 ## Do nothing if str <= width
426 return $str if length($str) <= $width;
427
428 ## See if str begins with a quote char
429 if ($str =~ s/^( ?$QuoteChars)//o) {
430 $q = $1;
431 --$width;
432 }
433
434 ## Create new string by breaking up str
435 while ($str ne "") {
436
437 # If $str less than width, break out
438 if (length($str) <= $width) {
439 $new .= $q . $str;
440 last;
441 }
442
443 # handle case where no-whitespace line larger than width
444 if (($str =~ /^(\S+)/) && (($len = length($1)) >= $width)) {
445 $new .= $q . $1;
446 substr($str, 0, $len) = "";
447 next;
448 }
449
450 # Break string at whitespace
451 $try = '';
452 $trywidth = $width;
453 $try = substr($str, 0, $trywidth);
454 if ($try =~ /(\S+)$/) {
455 $trywidth -= length($1);
456 $new .= $q . substr($str, 0, $trywidth);
457 } else {
458 $new .= $q . $try;
459 }
460 substr($str, 0, $trywidth) = '';
461
462 } continue {
463 $new .= "\n" if $str;
464 }
465 $new;
466}
467
468##---------------------------------------------------------------------------##
4691;