Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / mhtxthtml.pl
CommitLineData
86530b38
AT
1##---------------------------------------------------------------------------##
2## File:
3## $Id: mhtxthtml.pl,v 2.22.2.1 2002/12/22 00:43:56 ehood Exp $
4## Author:
5## Earl Hood mhonarc@mhonarc.org
6## Description:
7## Library defines routine to filter text/html body parts
8## for MHonArc.
9## Filter routine can be registered with the following:
10## <MIMEFILTERS>
11## text/html:m2h_text_html'filter:mhtxthtml.pl
12## </MIMEFILTERS>
13##---------------------------------------------------------------------------##
14## MHonArc -- Internet mail-to-HTML converter
15## Copyright (C) 1995-2000 Earl Hood, mhonarc@mhonarc.org
16##
17## This program is free software; you can redistribute it and/or modify
18## it under the terms of the GNU General Public License as published by
19## the Free Software Foundation; either version 2 of the License, or
20## (at your option) any later version.
21##
22## This program is distributed in the hope that it will be useful,
23## but WITHOUT ANY WARRANTY; without even the implied warranty of
24## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25## GNU General Public License for more details.
26##
27## You should have received a copy of the GNU General Public License
28## along with this program; if not, write to the Free Software
29## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30##---------------------------------------------------------------------------##
31
32
33package m2h_text_html;
34
35# Beginning of URL match expression
36my $Url = '(\w+://|\w+:)';
37
38# Script related attributes
39my $SAttr = q/\b(?:onload|onunload|onclick|ondblclick|/.
40 q/onmouse(?:down|up|over|move|out)|/.
41 q/onkey(?:press|down|up)|style)\b/;
42# Script/questionable related elements
43my $SElem = q/\b(?:applet|base|embed|form|ilayer|input|layer|link|meta|/.
44 q/object|option|param|select|textarea)\b/;
45
46# Elements with auto-loaded URL attributes
47my $AElem = q/\b(?:img|body|iframe|frame|object|script|input)\b/;
48# URL attributes
49my $UAttr = q/\b(?:action|background|cite|classid|codebase|data|datasrc|/.
50 q/dynsrc|for|href|longdesc|profile|src|url|usemap)\b/;
51
52##---------------------------------------------------------------------------
53## The filter must modify HTML content parts for merging into the
54## final filtered HTML messages. Modification is needed so the
55## resulting filtered message is valid HTML.
56##
57## Arguments:
58##
59## allowcomments Preserve any comment declarations. Normally
60## Comment declarations are munged to prevent
61## SSI attacks or comments that can conflict
62## with MHonArc processing. Use this option
63## with care.
64##
65## allownoncidurls Preserve URL-based attributes that are not
66## cid: URLs. Normally, any URL-based attribute
67## -- href, src, background, classid, data,
68## longdesc -- will be stripped if it is not a
69## cid: URL. This is to prevent malicious URLs
70## that verify mail addresses for spam purposes,
71## secretly set cookies, or gather some
72## statistical data automatically with the use of
73## elements that cause browsers to automatically
74## fetch data: IMG, BODY, IFRAME, FRAME, OBJECT,
75## SCRIPT, INPUT.
76##
77## allowscript Preserve any markup associated with scripting.
78## This includes elements and attributes related
79## to scripting. The default is to delete any
80## scripting markup for security reasons.
81##
82## attachcheck Honor attachment disposition. By default,
83## all text/html data is displayed inline on
84## the message page. If attachcheck is specified
85## and Content-Disposition specifies the data as
86## an attachment, the data is saved to a file
87## with a link to it from the message page.
88##
89## nofont Remove <FONT> tags.
90##
91## notitle Do not print title.
92##
93sub filter {
94 my($fields, $data, $isdecode, $args) = @_;
95 $args = '' unless defined $args;
96
97 ## Check if content-disposition should be checked
98 if ($args =~ /\battachcheck\b/i) {
99 my($disp, $nameparm) = readmail::MAILhead_get_disposition($fields);
100 if ($disp =~ /\battachment\b/i) {
101 require 'mhexternal.pl';
102 return (m2h_external::filter(
103 $fields, $data, $isdecode,
104 readmail::get_filter_args('m2h_external::filter')));
105 }
106 }
107
108 local(@files) = (); # XXX: Used by resolve_cid!!!
109 my $base = '';
110 my $title = '';
111 my $noscript = 1;
112 $noscript = 0 if $args =~ /\ballowscript\b/i;
113 my $nofont = $args =~ /\bnofont\b/i;
114 my $notitle = $args =~ /\bnotitle\b/i;
115 my $onlycid = $args !~ /\ballownoncidurls\b/i;
116 my $tmp;
117
118 ## Check comment declarations: may screw-up mhonarc processing
119 ## and avoids someone sneaking in SSIs.
120 #$$data =~ s/<!(?:--(?:[^-]|-[^-])*--\s*)+>//go; # can crash perl
121 $$data =~ s/<!--[^-]+[#X%\$\[]*/<!--/g; # Just mung them (faster)
122
123 ## Get/remove title
124 if (!$notitle) {
125 if ($$data =~ s|<title\s*>([^<]*)</title\s*>||io) {
126 $title = "<address>Title: <strong>$1</strong></address>\n"
127 unless $1 eq "";
128 }
129 } else {
130 $$data =~ s|<title\s*>[^<]*</title\s*>||io;
131 }
132
133 ## Get/remove BASE url
134 BASEURL: {
135 if ($$data =~ s|(<base\s[^>]*>)||i) {
136 $tmp = $1;
137 if ($tmp =~ m|href\s*=\s*['"]([^'"]+)['"]|i) {
138 $base = $1;
139 } elsif ($tmp =~ m|href\s*=\s*([^\s>]+)|i) {
140 $base = $1;
141 }
142 last BASEURL if ($base =~ /\S/);
143 }
144 if ((defined($tmp = $fields->{'content-base'}[0]) ||
145 defined($tmp = $fields->{'content-location'}[0])) &&
146 ($tmp =~ m%/%)) {
147 ($base = $tmp) =~ s/['"\s]//g;
148 }
149 }
150 $base =~ s|(.*/).*|$1|;
151
152 ## Strip out certain elements/tags to support proper inclusion
153 $$data =~ s|<head\s*>[\s\S]*</head\s*>||io;
154 1 while ($$data =~ s|<!doctype\s[^>]*>||io);
155 1 while ($$data =~ s|</?html\b[^>]*>||gio);
156 1 while ($$data =~ s|</?x-html\b[^>]*>||gio);
157 1 while ($$data =~ s|</?meta\b[^>]*>||gio);
158 1 while ($$data =~ s|</?link\b[^>]*>||gio);
159
160 ## Strip out <font> tags if requested
161 if ($nofont) {
162 $$data =~ s|<style[^>]*>.*?</style\s*>||gios;
163 1 while ($$data =~ s|</?font\b[^>]*>||gio);
164 1 while ($$data =~ s/\b(?:style|class)\s*=\s*"[^"]*"//gio);
165 1 while ($$data =~ s/\b(?:style|class)\s*=\s*'[^']*'//gio);
166 1 while ($$data =~ s/\b(?:style|class)\s*=\s*[^\s>]+//gio);
167 1 while ($$data =~ s|</?style\b[^>]*>||gi);
168
169 }
170
171 ## Strip out scripting markup if requested
172 if ($noscript) {
173 # remove scripting elements and attributes
174 $$data =~ s|<script[^>]*>.*?</script\s*>||gios;
175 unless ($nofont) { # avoid dup work if style already stripped
176 $$data =~ s|<style[^>]*>.*?</style\s*>||gios;
177 1 while ($$data =~ s|</?style\b[^>]*>||gi);
178 }
179 1 while ($$data =~ s|$SAttr\s*=\s*"[^"]*"||gio); #"
180 1 while ($$data =~ s|$SAttr\s*=\s*'[^']*'||gio); #'
181 1 while ($$data =~ s|$SAttr\s*=\s*[^\s>]+||gio);
182 1 while ($$data =~ s|</?$SElem[^>]*>||gio);
183 1 while ($$data =~ s|</?script\b||gi);
184
185 # for netscape 4.x browsers
186 $$data =~ s/(=\s*["']?\s*)(?:\&\{)+/$1/g;
187
188 # Hopefully complete pattern to neutralize javascript:... URLs.
189 # The pattern is ugly because we have to handle any combination
190 # of regular chars and entity refs.
191 $$data =~ s/\b(?:j|&\#(?:0*(?:74|106)|x0*(?:4a|6a))(?:;|(?![0-9])))
192 (?:a|&\#(?:0*(?:65|97)|x0*(?:41|61))(?:;|(?![0-9])))
193 (?:v|&\#(?:0*(?:86|118)|x0*(?:56|76))(?:;|(?![0-9])))
194 (?:a|&\#(?:0*(?:65|97)|x0*(?:41|61))(?:;|(?![0-9])))
195 (?:s|&\#(?:0*(?:83|115)|x0*(?:53|73))(?:;|(?![0-9])))
196 (?:c|&\#(?:0*(?:67|99)|x0*(?:43|63))(?:;|(?![0-9])))
197 (?:r|&\#(?:0*(?:82|114)|x0*(?:52|72))(?:;|(?![0-9])))
198 (?:i|&\#(?:0*(?:73|105)|x0*(?:49|69))(?:;|(?![0-9])))
199 (?:p|&\#(?:0*(?:80|112)|x0*(?:50|70))(?:;|(?![0-9])))
200 (?:t|&\#(?:0*(?:84|116)|x0*(?:54|74))(?:;|(?![0-9])))
201 /_javascript_/gix;
202
203 }
204
205 ## Modify relative urls to absolute using BASE
206 if ($base =~ /\S/) {
207 $$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
208 join("", $1, &addbase($base,$2), $3)/geoix;
209 }
210
211 ## Check for frames: Do not support, so just show source
212 if ($$data =~ m/<frameset\b/i) {
213 $$data = join('', '<pre>', mhonarc::htmlize($$data), '</pre>');
214 return ($title.$$data, @files);
215 }
216
217 ## Check for body attributes
218 if ($$data =~ s|<body\b([^>]*)>||i) {
219 require 'mhutil.pl';
220 my $a = $1;
221 my %attr = mhonarc::parse_vardef_str($a, 1);
222 if (%attr) {
223 ## Use a table with a single cell to encapsulate data to
224 ## set visual properties. We use a mixture of old attributes
225 ## and CSS to set properties since browsers may not support
226 ## all of the CSS settings via the STYLE attribute.
227 my $tpre = '<table width="100%"><tr><td ';
228 my $tsuf = "";
229 $tpre .= qq|background="$attr{'background'}" |
230 if $attr{'background'};
231 $tpre .= qq|bgcolor="$attr{'bgcolor'}" |
232 if $attr{'bgcolor'};
233 $tpre .= qq|style="|;
234 $tpre .= qq|background-color: $attr{'bgcolor'}; |
235 if $attr{'bgcolor'};
236 if ($attr{'background'}) {
237 if ($attr{'background'} =
238 &resolve_cid($onlycid, $attr{'background'})) {
239 $tpre .= qq|background-image: url($attr{'background'}) |;
240 }
241 }
242 $tpre .= qq|color: $attr{'text'}; |
243 if $attr{'text'};
244 $tpre .= qq|a:link { color: $attr{'link'} } |
245 if $attr{'link'};
246 $tpre .= qq|a:active { color: $attr{'alink'} } |
247 if $attr{'alink'};
248 $tpre .= qq|a:visited { color: $attr{'vlink'} } |
249 if $attr{'vlink'};
250 $tpre .= '">';
251 if ($attr{'text'}) {
252 $tpre .= qq|<font color="$attr{'text'}">|;
253 $tsuf .= '</font>';
254 }
255 $tsuf .= '</td></tr></table>';
256 $$data = $tpre . $$data . $tsuf;
257 }
258 }
259 1 while ($$data =~ s|</?body[^>]*>||ig);
260
261 ## Check for CID URLs (multipart/related HTML)
262 $$data =~ s/($UAttr\s*=\s*['"])([^'"]+)(['"])/
263 join("", $1, &resolve_cid($onlycid, $2), $3)/geoix;
264 $$data =~ s/($UAttr\s*=\s*)([^'">][^\s>]+)/
265 join("", $1, '"', &resolve_cid($onlycid, $2), '"')/geoix;
266
267 ($title.$$data, @files);
268}
269
270##---------------------------------------------------------------------------
271
272sub addbase {
273 my($b, $u) = @_;
274 return $u if !defined($b) || $b !~ /\S/;
275
276 my($ret);
277 $u =~ s/^\s+//;
278 if ($u =~ m%^$Url%o || $u =~ m/^#/) {
279 ## Absolute URL or scroll link; do nothing
280 $ret = $u;
281 } else {
282 ## Relative URL
283 if ($u =~ /^\./) {
284 ## "./---" or "../---": Need to remove and adjust base
285 ## accordingly.
286 $b =~ s/\/$//;
287 my @a = split(/\//, $b);
288 my $cnt = 0;
289 while ( $cnt <= scalar(@a) &&
290 $u =~ s|^(\.{1,2})/|| ) { ++$cnt if length($1) == 2; }
291 splice(@a, -$cnt) if $cnt > 0;
292 $b = join('/', @a, "");
293
294 } elsif ($u =~ m%^/%) {
295 ## "/---": Just use hostname:port of base.
296 $b =~ s%^(${Url}[^/]*)/.*%$1%o;
297 }
298 $ret = $b . $u;
299 }
300 $ret;
301}
302
303##---------------------------------------------------------------------------
304
305sub resolve_cid {
306 my $onlycid = shift;
307 my $cid = shift;
308 my $href = $readmail::Cid{$cid};
309 if (!defined($href)) {
310 my $basename = $cid;
311 $basename =~ s/.*\///;
312 if (!defined($href = $readmail::Cid{$basename})) {
313 return "" if $onlycid;
314 return ($cid =~ /^cid:/i)? "": $cid;
315 }
316 $cid = $basename;
317 }
318
319 if ($href->{'uri'}) {
320 # Part already converted; multiple references to part
321 return $href->{'uri'};
322 }
323
324 require 'mhmimetypes.pl';
325 my $filename;
326 my $decodefunc =
327 readmail::load_decoder(
328 $href->{'fields'}{'content-transfer-encoding'}[0]);
329 if (defined($decodefunc) && defined(&$decodefunc)) {
330 my $data = &$decodefunc(${$href->{'body'}});
331 $filename = mhonarc::write_attachment(
332 $href->{'fields'}{'content-type'}[0], \$data);
333 } else {
334 $filename = mhonarc::write_attachment(
335 $href->{'fields'}{'content-type'}[0],
336 $href->{'body'});
337 }
338 $href->{'filtered'} = 1; # mark part filtered for readmail.pl
339 $href->{'uri'} = $filename;
340
341 push(@files, $filename); # @files defined in filter!!
342 $filename;
343}
344
345##---------------------------------------------------------------------------
346
3471;