Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | ##---------------------------------------------------------------------------## |
2 | ## File: | |
3 | ## $Id: readmail.pl,v 2.21 2002/10/15 22:06:53 ehood Exp $ | |
4 | ## Author: | |
5 | ## Earl Hood mhonarc@mhonarc.org | |
6 | ## Description: | |
7 | ## Library defining routines to parse MIME e-mail messages. The | |
8 | ## library is designed so it may be reused for other e-mail | |
9 | ## filtering programs. The default behavior is for mail->html | |
10 | ## filtering, however, the defaults can be overridden to allow | |
11 | ## mail->whatever filtering. | |
12 | ## | |
13 | ## Public Functions: | |
14 | ## ---------------- | |
15 | ## $data = MAILdecode_1522_str($str); | |
16 | ## ($data, @files) = MAILread_body($fields_hash_ref, $body_ref); | |
17 | ## $hash_ref = MAILread_file_header($handle); | |
18 | ## $hash_ref = MAILread_header($mesg_str_ref); | |
19 | ## | |
20 | ## ($disp, $file) = MAILhead_get_disposition($fields_hash_ref); | |
21 | ## $boolean = MAILis_excluded($content_type); | |
22 | ## $parm_hash_ref = MAILparse_parameter_str($header_field); | |
23 | ## $parm_hash_ref = MAILparse_parameter_str($header_field, 1); | |
24 | ## | |
25 | ##---------------------------------------------------------------------------## | |
26 | ## Copyright (C) 1996-2001 Earl Hood, mhonarc@mhonarc.org | |
27 | ## | |
28 | ## This program is free software; you can redistribute it and/or modify | |
29 | ## it under the terms of the GNU General Public License as published by | |
30 | ## the Free Software Foundation; either version 2 of the License, or | |
31 | ## (at your option) any later version. | |
32 | ## | |
33 | ## This program is distributed in the hope that it will be useful, | |
34 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36 | ## GNU General Public License for more details. | |
37 | ## | |
38 | ## You should have received a copy of the GNU General Public License | |
39 | ## along with this program; if not, write to the Free Software | |
40 | ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
41 | ## 02111-1307, USA | |
42 | ##---------------------------------------------------------------------------## | |
43 | ||
44 | package readmail; | |
45 | ||
46 | ############################################################################### | |
47 | ## Private Globals ## | |
48 | ############################################################################### | |
49 | ||
50 | my $Url = '(\w+://|\w+:)'; | |
51 | ||
52 | my @_MIMEAltPrefs = (); | |
53 | my %_MIMEAltPrefs = (); | |
54 | ||
55 | ############################################################################### | |
56 | ## Public Globals ## | |
57 | ############################################################################### | |
58 | ||
59 | ##---------------------------------------------------------------------------## | |
60 | ## Constants | |
61 | ## | |
62 | ||
63 | ## Constants for use as second argument to MAILdecode_1522_str(). | |
64 | sub JUST_DECODE() { 1; } | |
65 | sub DECODE_ALL() { 2; } | |
66 | ||
67 | ##---------------------------------------------------------------------------## | |
68 | ||
69 | ##---------------------------------------------------------------------------## | |
70 | ## Scalar Variables | |
71 | ## | |
72 | ||
73 | ## Flag if message headers are decoded in the parse header routines: | |
74 | ## MAILread_header, MAILread_file_header. This only affects the | |
75 | ## values of the field hash created. The original header is still | |
76 | ## passed as the return value. | |
77 | ## | |
78 | ## The only 1522 data that will be decoded is data encoded with charsets | |
79 | ## set to "-decode-" in the %MIMECharSetConverters hash. | |
80 | ||
81 | $DecodeHeader = 0; | |
82 | ||
83 | ##---------------------------------------------------------------------------## | |
84 | ## Variables for holding information related to the functions used | |
85 | ## for processing MIME data. Variables are defined in the scope | |
86 | ## of main. | |
87 | ||
88 | ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
89 | ## %MIMEDecoders is the associative array for storing functions for | |
90 | ## decoding mime data. | |
91 | ## | |
92 | ## Keys => content-transfer-encoding (should be in lowercase) | |
93 | ## Values => function name. | |
94 | ## | |
95 | ## Function names should be qualified with package identifiers. | |
96 | ## Functions are called as follows: | |
97 | ## | |
98 | ## $decoded_data = &function($data); | |
99 | ## | |
100 | ## The value "as-is" may be used to allow the data to be passed without | |
101 | ## decoding to the registered filter, but the decoded flag will be | |
102 | ## set to true. | |
103 | ||
104 | %MIMEDecoders = () | |
105 | unless defined(%MIMEDecoders); | |
106 | %MIMEDecodersSrc = () | |
107 | unless defined(%MIMEDecodersSrc); | |
108 | ||
109 | ## Default settings: | |
110 | $MIMEDecoders{"7bit"} = "as-is" | |
111 | unless defined($MIMEDecoders{"7bit"}); | |
112 | $MIMEDecoders{"8bit"} = "as-is" | |
113 | unless defined($MIMEDecoders{"8bit"}); | |
114 | $MIMEDecoders{"binary"} = "as-is" | |
115 | unless defined($MIMEDecoders{"binary"}); | |
116 | $MIMEDecoders{"base64"} = "base64::b64decode" | |
117 | unless defined($MIMEDecoders{"base64"}); | |
118 | $MIMEDecoders{"quoted-printable"} = "quoted_printable::qprdecode" | |
119 | unless defined($MIMEDecoders{"quoted-printable"}); | |
120 | $MIMEDecoders{"x-uuencode"} = "base64::uudecode" | |
121 | unless defined($MIMEDecoders{"x-uuencode"}); | |
122 | $MIMEDecoders{"x-uue"} = "base64::uudecode" | |
123 | unless defined($MIMEDecoders{"x-uue"}); | |
124 | $MIMEDecoders{"uuencode"} = "base64::uudecode" | |
125 | unless defined($MIMEDecoders{"uuencode"}); | |
126 | ||
127 | $MIMEDecodersSrc{"base64"} = "base64.pl" | |
128 | unless defined($MIMEDecodersSrc{"base64"}); | |
129 | $MIMEDecodersSrc{"quoted-printable"} = "qprint.pl" | |
130 | unless defined($MIMEDecodersSrc{"quoted-printable"}); | |
131 | $MIMEDecodersSrc{"x-uuencode"} = "base64.pl" | |
132 | unless defined($MIMEDecodersSrc{"x-uuencode"}); | |
133 | $MIMEDecodersSrc{"x-uue"} = "base64.pl" | |
134 | unless defined($MIMEDecodersSrc{"x-uue"}); | |
135 | $MIMEDecodersSrc{"uuencode"} = "base64.pl" | |
136 | unless defined($MIMEDecodersSrc{"uuencode"}); | |
137 | ||
138 | ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
139 | ## %MIMECharSetConverters is the associative array for storing functions | |
140 | ## for converting data in a particular charset to a destination format | |
141 | ## within the MAILdecode_1522_str() routine. Destination format is defined | |
142 | ## by the function. | |
143 | ## | |
144 | ## Keys => charset (should be in lowercase) | |
145 | ## Values => function name. | |
146 | ## | |
147 | ## Charset values take on a form like "iso-8859-1" or "us-ascii". | |
148 | ## NOTE: Values need to be in lower-case. | |
149 | ## | |
150 | ## The key "default" can be assigned to define the default function | |
151 | ## to call if no explicit charset function is defined. | |
152 | ## | |
153 | ## The key "plain" can be set to a function for decoded regular text not | |
154 | ## encoded in 1522 format. | |
155 | ## | |
156 | ## Function names are name of defined perl function and should be | |
157 | ## qualified with package identifiers. Functions are called as follows: | |
158 | ## | |
159 | ## $converted_data = &function($data, $charset); | |
160 | ## | |
161 | ## A function called "-decode-" implies that the data should be | |
162 | ## decoded, but no converter is to be invoked. | |
163 | ## | |
164 | ## A function called "-ignore-" implies that the data should | |
165 | ## not be decoded and converted. Ie. For the specified charset, | |
166 | ## the encoding will stay unprocessed and passed back in the return | |
167 | ## string. | |
168 | ||
169 | %MIMECharSetConverters = () | |
170 | unless defined(%MIMECharSetConverters); | |
171 | %MIMECharSetConvertersSrc = () | |
172 | unless defined(%MIMECharSetConvertersSrc); | |
173 | ||
174 | ## Default settings: | |
175 | $MIMECharSetConverters{"default"} = "-ignore-" | |
176 | unless defined($MIMECharSetConverters{"default"}); | |
177 | ||
178 | ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
179 | ## %MIMEFilters is the associative array for storing functions that | |
180 | ## process various content-types in the MAILread_body routine. | |
181 | ## | |
182 | ## Keys => Content-type (should be in lowercase) | |
183 | ## Values => function name. | |
184 | ## | |
185 | ## Function names should be qualified with package identifiers. | |
186 | ## Functions are called as follows: | |
187 | ## | |
188 | ## $converted_data = &function($header, *parsed_header_assoc_array, | |
189 | ## *message_data, $decoded_flag, | |
190 | ## $optional_filter_arguments); | |
191 | ## | |
192 | ## Functions can be registered for base types. Example: | |
193 | ## | |
194 | ## $MIMEFilters{"image/*"} = "mypackage'function"; | |
195 | ## | |
196 | ## IMPORTANT: If a function specified is not defined when MAILread_body | |
197 | ## tries to invoke it, MAILread_body will silently ignore. Make sure | |
198 | ## that all functions are defined before invoking MAILread_body. | |
199 | ||
200 | %MIMEFilters = () | |
201 | unless defined(%MIMEFilters); | |
202 | %MIMEFiltersSrc = () | |
203 | unless defined(%MIMEFiltersSrc); | |
204 | ||
205 | ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
206 | ## %MIMEFiltersArgs is the associative array for storing any optional | |
207 | ## arguments to functions specified in MIMEFilters (the | |
208 | ## $optional_filter_arguments from above). | |
209 | ## | |
210 | ## Keys => Either one of the following: content-type, function name. | |
211 | ## Values => Argument string (format determined by filter function). | |
212 | ## | |
213 | ## Arguments listed for a content-type will be used over arguments | |
214 | ## listed for a function if both are applicable. | |
215 | ||
216 | %MIMEFiltersArgs = () | |
217 | unless defined(%MIMEFiltersArgs); | |
218 | ||
219 | ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
220 | ## %MIMEExcs is the associative array listing which data types | |
221 | ## should be auto-excluded during parsing: | |
222 | ## | |
223 | ## Keys => content-type, or base-type | |
224 | ## Values => <should evaluate to a true expression> | |
225 | ## | |
226 | ## For purposes of efficiency, content-types, or base-types, should | |
227 | ## be specified in lowercase. All key lookups are done in lowercase. | |
228 | ||
229 | %MIMEExcs = () | |
230 | unless defined(%MIMEExcs); | |
231 | ||
232 | ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
233 | ## %MIMECharsetAliases is a mapping of charset names to charset names. | |
234 | ## The MAILset_charset_aliases() routine should be used to set the | |
235 | ## values of this hash. | |
236 | ## | |
237 | ## Keys => charset name | |
238 | ## Values => real charset name | |
239 | ## | |
240 | %MIMECharsetAliases = () | |
241 | unless defined(%MIMECharsetAliases); | |
242 | ||
243 | ##--------------------------------------------------------------------------- | |
244 | ## Variables holding functions for generating processed output | |
245 | ## for MAILread_body(). The default functions generate HTML. | |
246 | ## However, the variables can be set to functions that generate | |
247 | ## a different type of output. | |
248 | ## | |
249 | ## $FormatHeaderFunc has no default, and must be defined by | |
250 | ## the calling program. | |
251 | ## | |
252 | ## Function that returns a message when failing to process a part of a | |
253 | ## a multipart message. The content-type of the message is passed | |
254 | ## as an argument. | |
255 | ||
256 | $CantProcessPartFunc = \&cantProcessPart | |
257 | unless(defined($CantProcessPartFunc)); | |
258 | ||
259 | ## Function that returns a message when a part is excluded via %MIMEExcs. | |
260 | ||
261 | $ExcludedPartFunc = \&excludedPart | |
262 | unless(defined($ExcludedPartFunc)); | |
263 | ||
264 | ## Function that returns a message when a part is unrecognized in a | |
265 | ## multipart/alternative message. I.e. No part could be processed. | |
266 | ## No arguments are passed to function. | |
267 | ||
268 | $UnrecognizedAltPartFunc = \&unrecognizedAltPart | |
269 | unless(defined($UnrecognizedAltPartFunc)); | |
270 | ||
271 | ## Function that returns a string to go before any data generated generating | |
272 | ## from processing an embedded message (message/rfc822 or message/news). | |
273 | ## No arguments are passed to function. | |
274 | ||
275 | $BeginEmbeddedMesgFunc = \&beginEmbeddedMesg | |
276 | unless(defined($BeginEmbeddedMesgFunc)); | |
277 | ||
278 | ## Function that returns a string to go after any data generated generating | |
279 | ## from processing an embedded message (message/rfc822 or message/news). | |
280 | ## No arguments are passed to function. | |
281 | ||
282 | $EndEmbeddedMesgFunc = \&endEmbeddedMesg | |
283 | unless(defined($EndEmbeddedMesgFunc)); | |
284 | ||
285 | ## Function to return a string that is a result of the functions | |
286 | ## processing of a message header. The function is called for | |
287 | ## embedded messages (message/rfc822 and message/news). The | |
288 | ## arguments to function are: | |
289 | ## | |
290 | ## 1. Pointer to associative array representing message header | |
291 | ## contents with the keys as field labels (in all lower-case) | |
292 | ## and the values as field values of the labels. | |
293 | ## | |
294 | ## 2. Pointer to associative array mapping lower-case keys of | |
295 | ## argument 1 to original case. | |
296 | ## | |
297 | ## Prototype: $return_data = &function(*fields, *lower2orig_fields); | |
298 | ||
299 | $FormatHeaderFunc = undef | |
300 | unless(defined($FormatHeaderFunc)); | |
301 | ||
302 | ############################################################################### | |
303 | ## Public Routines ## | |
304 | ############################################################################### | |
305 | ##---------------------------------------------------------------------------## | |
306 | ## MAILdecode_1522_str() decodes a string encoded in a format | |
307 | ## specified by RFC 1522. The decoded string is the return value. | |
308 | ## If no MIMECharSetConverters is registered for a charset, then | |
309 | ## the decoded data is returned "as-is". | |
310 | ## | |
311 | ## Usage: | |
312 | ## | |
313 | ## $ret_data = &MAILdecode_1522_str($str, $decoding_flag); | |
314 | ## | |
315 | ## If $decoding_flag is JUST_DECODE, $str will be decoded for only | |
316 | ## the charsets specified as "-decode-". If it is equal to | |
317 | ## DECODE_ALL, all encoded data is decoded without any conversion. | |
318 | ## | |
319 | sub MAILdecode_1522_str { | |
320 | my($str) = shift; | |
321 | my($decoding_flag) = shift || 0; | |
322 | my($charset, | |
323 | $encoding, | |
324 | $dec, | |
325 | $charcnv, | |
326 | $real_charset, | |
327 | $plaincnv, | |
328 | $plain_real_charset, | |
329 | $strtxt, | |
330 | $str_before); | |
331 | my($ret) = (''); | |
332 | ||
333 | # Get plain converter | |
334 | ($plaincnv, $plain_real_charset) = MAILload_charset_converter('plain'); | |
335 | $plain_real_charset = 'us-ascii' if $plain_real_charset eq 'plain'; | |
336 | ||
337 | # Decode string | |
338 | while ($str =~ /=\?([^?]+)\?(.)\?([^?]*)\?=/) { | |
339 | ||
340 | # Grab components | |
341 | ($charset, $encoding) = ($1, $2); | |
342 | $strtxt = $3; $str_before = $`; $str = $'; | |
343 | ||
344 | # Check encoding method and grab proper decoder | |
345 | if ($encoding =~ /b/i) { | |
346 | $dec = &load_decoder('base64'); | |
347 | } else { | |
348 | $dec = &load_decoder('quoted-printable'); | |
349 | } | |
350 | ||
351 | # Convert before (unencoded) text | |
352 | if ($decoding_flag) { # ignore if just decode | |
353 | $ret .= $str_before; | |
354 | } elsif (defined(&$plaincnv)) { # decode and convert | |
355 | $ret .= &$plaincnv($str_before, $plain_real_charset); | |
356 | } else { # ignore | |
357 | $ret .= $str_before; | |
358 | } | |
359 | ||
360 | # Convert encoded text | |
361 | if ($decoding_flag == DECODE_ALL) { | |
362 | $charcnv = '-decode-'; | |
363 | } else { | |
364 | ($charcnv, $real_charset) = MAILload_charset_converter($charset); | |
365 | } | |
366 | ||
367 | # Decode only | |
368 | if ($charcnv eq '-decode-') { | |
369 | $strtxt =~ s/_/ /g; | |
370 | $ret .= &$dec($strtxt); | |
371 | ||
372 | # Ignore if just decoding | |
373 | } elsif ($decoding_flag) { | |
374 | $ret .= "=?$charset?$encoding?$strtxt?="; | |
375 | ||
376 | # Decode and convert | |
377 | } elsif (defined(&$charcnv)) { | |
378 | $strtxt =~ s/_/ /g; | |
379 | $ret .= &$charcnv(&$dec($strtxt), $real_charset); | |
380 | ||
381 | # Fallback is to ignore | |
382 | } else { | |
383 | $ret .= "=?$charset?$encoding?$strtxt?="; | |
384 | } | |
385 | } | |
386 | ||
387 | # Convert left-over unencoded text | |
388 | if ($decoding_flag) { # ignore if just decode | |
389 | $ret .= $str; | |
390 | } elsif (defined(&$plaincnv)) { # decode and convert | |
391 | $ret .= &$plaincnv($str, $plain_real_charset); | |
392 | } else { # ignore | |
393 | $ret .= $str; | |
394 | } | |
395 | ||
396 | $ret; | |
397 | } | |
398 | ||
399 | ##---------------------------------------------------------------------------## | |
400 | ## MAILread_body() parses a MIME message body. | |
401 | ## Usage: | |
402 | ## ($data, @files) = | |
403 | ## MAILread_body($fields_hash_ref, $body_date_ref); | |
404 | ## | |
405 | ## Parameters: | |
406 | ## $fields_hash_ref | |
407 | ## A reference to hash of message/part header | |
408 | ## fields. Keys are field names in lowercase | |
409 | ## and values are array references containing the | |
410 | ## field values. For example, to obtain the | |
411 | ## content-type, if defined, one would do: | |
412 | ## | |
413 | ## $fields_hash_ref->{'content-type'}[0] | |
414 | ## | |
415 | ## Values for a fields are stored in arrays since | |
416 | ## duplication of fields are possible. For example, | |
417 | ## the Received: header field is typically repeated | |
418 | ## multiple times. For fields that only occur once, | |
419 | ## then array for the field will only contain one | |
420 | ## item. | |
421 | ## | |
422 | ## $body_data_ref | |
423 | ## Reference to body data. It is okay for the | |
424 | ## filter to modify the text in-place. | |
425 | ## | |
426 | ## Return: | |
427 | ## The first item in the return list is the text that should | |
428 | ## printed to the message page. Any other items in the return | |
429 | ## list are derived filenames created. | |
430 | ## | |
431 | ## See Also: | |
432 | ## MAILread_header(), MAILread_file_header() | |
433 | ## | |
434 | sub MAILread_body { | |
435 | my($fields, # Parsed header hash | |
436 | $body, # Reference to raw body text | |
437 | $inaltArg) = @_; # Flag if in multipart/alternative | |
438 | ||
439 | my($type, $subtype, $boundary, $content, $ctype, $pos, | |
440 | $encoding, $decodefunc, $args, $part, $uribase); | |
441 | my(@parts) = (); | |
442 | my(@files) = (); | |
443 | my(@array) = (); | |
444 | my $ret = ""; | |
445 | ||
446 | ## Get type/subtype | |
447 | if (defined($fields->{'content-type'})) { | |
448 | $content = $fields->{'content-type'}->[0]; | |
449 | } | |
450 | $content = 'text/plain' unless $content; | |
451 | ($ctype) = $content =~ m%^\s*([\w\-\./]+)%; # Extract content-type | |
452 | $ctype =~ tr/A-Z/a-z/; # Convert to lowercase | |
453 | if ($ctype =~ m%/%) { # Extract base and sub types | |
454 | ($type,$subtype) = split(/\//, $ctype, 2); | |
455 | } elsif ($ctype =~ /text/i) { | |
456 | $ctype = 'text/plain'; | |
457 | $type = 'text'; $subtype = 'plain'; | |
458 | } else { | |
459 | $type = $subtype = ''; | |
460 | } | |
461 | ||
462 | ## Check if type is excluded | |
463 | if ($MIMEExcs{$ctype} || $MIMEExcs{$type}) { | |
464 | return (&$ExcludedPartFunc($ctype)); | |
465 | } | |
466 | ||
467 | ## Get entity URI base | |
468 | if (defined($fields->{'content-base'}) && | |
469 | ($uribase = $fields->{'content-base'}[0])) { | |
470 | $uribase =~ s/['"\s]//g; | |
471 | } elsif (defined($fields->{'content-location'}) && | |
472 | ($uribase = $fields->{'content-location'}[0])) { | |
473 | $uribase =~ s/['"\s]//g; | |
474 | } | |
475 | $uribase =~ s|(.*/).*|$1| if $uribase; | |
476 | ||
477 | ## Load content-type filter | |
478 | if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) && | |
479 | (!defined($filter = &load_filter("$type/*")) || !defined(&$filter)) && | |
480 | (!$inaltArg && | |
481 | (!defined($filter = &load_filter('*/*')) || !defined(&$filter)) && | |
482 | $ctype !~ m^\bmessage/(?:rfc822|news)\b^i && | |
483 | $type !~ /\bmultipart\b/) ) { | |
484 | warn qq|Warning: Unrecognized content-type, "$ctype", |, | |
485 | qq|assuming "application/octet-stream"\n|; | |
486 | $filter = &load_filter('application/octet-stream'); | |
487 | } | |
488 | ||
489 | ## Check for filter arguments | |
490 | $args = get_filter_args($ctype, "$type/*", $filter); | |
491 | ||
492 | ## Check encoding | |
493 | if (defined($fields->{'content-transfer-encoding'})) { | |
494 | $encoding = lc $fields->{'content-transfer-encoding'}[0]; | |
495 | $encoding =~ s/\s//g; | |
496 | $decodefunc = &load_decoder($encoding); | |
497 | } else { | |
498 | $encoding = undef; | |
499 | $decodefunc = undef; | |
500 | } | |
501 | ||
502 | ## A filter is defined for given content-type | |
503 | if ($filter && defined(&$filter)) { | |
504 | ## decode data | |
505 | if (defined($decodefunc)) { | |
506 | if (defined(&$decodefunc)) { | |
507 | $decoded = &$decodefunc($$body); | |
508 | @array = &$filter($fields, \$decoded, 1, $args); | |
509 | } else { | |
510 | @array = &$filter($fields, $body, | |
511 | $decodefunc =~ /as-is/i, $args); | |
512 | } | |
513 | } else { | |
514 | @array = &$filter($fields, $body, 0, $args); | |
515 | } | |
516 | ||
517 | ## Setup return variables | |
518 | $ret = shift @array; # Return string | |
519 | push(@files, @array); # Derived files | |
520 | ||
521 | ## No filter defined for given content-type | |
522 | } else { | |
523 | ## If multipart, recursively process each part | |
524 | if ($type =~ /\bmultipart\b/i) { | |
525 | local(%Cid) = ( ) unless scalar(caller) eq 'readmail'; | |
526 | my($isalt) = $subtype =~ /\balternative\b/i; | |
527 | ||
528 | ## Get boundary | |
529 | $boundary = ""; | |
530 | if ($content =~ m/\bboundary\s*=\s*"([^"]*)"/i) { | |
531 | $boundary = $1; | |
532 | } else { | |
533 | ($boundary) = $content =~ m/\bboundary\s*=\s*(\S+)/i; | |
534 | $boundary =~ s/;$//; # chop ';' if grabbed | |
535 | } | |
536 | ||
537 | ## If boundary defined, split body into parts | |
538 | if ($boundary =~ /\S/) { | |
539 | my $found = 0; | |
540 | my $have_end = 0; | |
541 | my $start_pos = 0; | |
542 | substr($$body, 0, 0) = "\n"; | |
543 | substr($boundary, 0, 0) = "\n--"; | |
544 | my $blen = length($boundary); | |
545 | my $bchkstr; | |
546 | ||
547 | while (($pos = index($$body, $boundary, $start_pos)) > -1) { | |
548 | # have to check for case when boundary is a substring | |
549 | # of another boundary, yuck! | |
550 | $bchkstr = substr($$body, $pos+$blen, 2); | |
551 | unless ($bchkstr =~ /\A\r?\n/ || $bchkstr =~ /\A--/) { | |
552 | # incomplete match, continue search | |
553 | $start_pos = $pos+$blen; | |
554 | next; | |
555 | } | |
556 | $found = 1; | |
557 | push(@parts, substr($$body, 0, $pos)); | |
558 | $parts[$#parts] =~ s/^\r//; | |
559 | ||
560 | # prune out part data just grabbed | |
561 | substr($$body, 0, $pos+$blen) = ""; | |
562 | ||
563 | # check if hit end | |
564 | if ($$body =~ /\A--/) { | |
565 | $have_end = 1; | |
566 | last; | |
567 | } | |
568 | ||
569 | # remove EOL at the beginning | |
570 | $$body =~ s/\A\r?\n//; | |
571 | $start_pos = 0; | |
572 | } | |
573 | if (!$have_end) { | |
574 | warn qq/Warning: No end boundary delimiter found in /, | |
575 | qq/message body\n/; | |
576 | push(@parts, $$body); | |
577 | $parts[$#parts] =~ s/^\r//; | |
578 | $$body = ""; | |
579 | } | |
580 | if ($found) { | |
581 | # discard front-matter | |
582 | shift(@parts); | |
583 | } else { | |
584 | # no boundary separators in message! | |
585 | warn qq/Warning: No boundary delimiters found in /, | |
586 | qq/multipart body\n/; | |
587 | if ($$body =~ m/\A\n[\w\-]+:\s/) { | |
588 | # remove \n added above if part looks like it has | |
589 | # headers. we keep if it does not to avoid body | |
590 | # data being parsed as a header below. | |
591 | substr($$body, 0, 1) = ""; | |
592 | } | |
593 | push(@parts, $$body); | |
594 | } | |
595 | ||
596 | ## Else treat body as one part | |
597 | } else { | |
598 | @parts = ($$body); | |
599 | } | |
600 | ||
601 | ## Process parts | |
602 | my(@entity) = (); | |
603 | my($cid, $href, $pctype); | |
604 | my %alt_exc = ( ); | |
605 | my $have_alt_prefs = $isalt && scalar(@_MIMEAltPrefs); | |
606 | @parts = \(@parts); | |
607 | while (defined($part = shift(@parts))) { | |
608 | $href = { }; | |
609 | $partfields = $href->{'fields'} = (MAILread_header($part))[0]; | |
610 | $href->{'body'} = $part; | |
611 | $href->{'filtered'} = 0; | |
612 | $pctype = extract_ctype( | |
613 | $partfields->{'content-type'}, $ctype); | |
614 | ||
615 | ## check alternative preferences | |
616 | if ($have_alt_prefs) { | |
617 | next if ($alt_exc{$pctype}); | |
618 | my $pos = $_MIMEAltPrefs{$pctype}; | |
619 | if (defined($pos)) { | |
620 | for (++$pos; $pos <= $#_MIMEAltPrefs; ++$pos) { | |
621 | $alt_exc{$_MIMEAltPrefs[$pos]} = 1; | |
622 | } | |
623 | } | |
624 | } | |
625 | ||
626 | ## only add to %Cid if not excluded | |
627 | if (!&MAILis_excluded($pctype)) { | |
628 | if ($isalt) { | |
629 | unshift(@entity, $href); | |
630 | } else { | |
631 | push(@entity, $href); | |
632 | } | |
633 | $cid = $partfields->{'content-id'}[0] || | |
634 | $partfields->{'message-id'}[0]; | |
635 | if (defined($cid)) { | |
636 | $cid =~ s/[\s<>]//g; | |
637 | $Cid{"cid:$cid"} = $href if $cid =~ /\S/; | |
638 | } | |
639 | $cid = undef; | |
640 | if (defined($partfields->{'content-location'}) && | |
641 | ($cid = $partfields->{'content-location'}[0])) { | |
642 | my $partbase = $uribase; | |
643 | $cid =~ s/['"\s]//g; | |
644 | if (defined($partfields->{'content-base'})) { | |
645 | $partbase = $partfields->{'content-base'}[0]; | |
646 | } | |
647 | $cid = apply_base_url($partbase, $cid); | |
648 | if ($cid =~ /\S/ && !$Cid{$cid}) { | |
649 | $Cid{$cid} = $href; | |
650 | } | |
651 | } | |
652 | if ($cid) { | |
653 | $partfields->{'content-location'} = [ $cid ]; | |
654 | } elsif (!defined($partfields->{'content-base'})) { | |
655 | $partfields->{'content-base'} = [ $uribase ]; | |
656 | } | |
657 | } | |
658 | } | |
659 | ||
660 | my($entity); | |
661 | ENTITY: foreach $entity (@entity) { | |
662 | next if $entity->{'filtered'}; | |
663 | ||
664 | ## If content-type not defined for part, then determine | |
665 | ## content-type based upon multipart subtype. | |
666 | $partfields = $entity->{'fields'}; | |
667 | if (!defined($partfields->{'content-type'})) { | |
668 | $partfields->{'content-type'} = | |
669 | [ ($subtype =~ /digest/) ? | |
670 | 'message/rfc822' : 'text/plain' ]; | |
671 | } | |
672 | ||
673 | ## Process part | |
674 | @array = MAILread_body( | |
675 | $partfields, | |
676 | $entity->{'body'}, | |
677 | $isalt); | |
678 | ||
679 | ## Only use last filterable part in alternate | |
680 | if ($isalt) { | |
681 | $ret = shift @array; | |
682 | if ($ret) { | |
683 | push(@files, @array); | |
684 | $entity->{'filtered'} = 1; | |
685 | last ENTITY; | |
686 | } | |
687 | } else { | |
688 | if (!$array[0]) { | |
689 | $array[0] = &$CantProcessPartFunc( | |
690 | $partfields->{'content-type'}[0]); | |
691 | } | |
692 | $ret .= shift @array; | |
693 | } | |
694 | push(@files, @array); | |
695 | $entity->{'filtered'} = 1; | |
696 | } | |
697 | ||
698 | ## Check if multipart/alternative, and no success | |
699 | if (!$ret && $isalt) { | |
700 | warn qq|Warning: No recognized part in multipart/alternative; |, | |
701 | qq|will try to decode last part\n|; | |
702 | $entity = $entity[0]; | |
703 | @array = &MAILread_body( | |
704 | $entity->{'fields'}, | |
705 | $entity->{'body'}); | |
706 | $ret = shift @array; | |
707 | if ($ret) { | |
708 | push(@files, @array); | |
709 | } else { | |
710 | $ret = &$UnrecognizedAltPartFunc(); | |
711 | } | |
712 | } | |
713 | ||
714 | ## Else if message/rfc822 or message/news | |
715 | } elsif ($ctype =~ m^\bmessage/(?:rfc822|news)\b^i) { | |
716 | $partfields = (MAILread_header($body))[0]; | |
717 | ||
718 | $ret = &$BeginEmbeddedMesgFunc(); | |
719 | if ($FormatHeaderFunc && defined(&$FormatHeaderFunc)) { | |
720 | $ret .= &$FormatHeaderFunc($partfields); | |
721 | } else { | |
722 | warn "Warning: readmail: No message header formatting ", | |
723 | "function defined\n"; | |
724 | } | |
725 | @array = MAILread_body($partfields, $body); | |
726 | $ret .= shift @array || | |
727 | &$CantProcessPartFunc( | |
728 | $partfields->{'content-type'}[0] || 'text/plain'); | |
729 | $ret .= &$EndEmbeddedMesgFunc(); | |
730 | ||
731 | push(@files, @array); | |
732 | ||
733 | ## Else cannot handle type | |
734 | } else { | |
735 | $ret = ''; | |
736 | } | |
737 | } | |
738 | ||
739 | ($ret, @files); | |
740 | } | |
741 | ||
742 | ##---------------------------------------------------------------------------## | |
743 | ## MAILread_header reads (and strips) a mail message header from the | |
744 | ## variable $mesg. $mesg is a reference to the mail message in | |
745 | ## a string. | |
746 | ## | |
747 | ## $fields is a reference to a hash to put field values indexed by | |
748 | ## field labels that have been converted to all lowercase. | |
749 | ## Field values are array references to the values | |
750 | ## for each field. | |
751 | ## | |
752 | ## ($fields_hash_ref, $header_txt) = MAILread_header($mesg_data); | |
753 | ## | |
754 | sub MAILread_header { | |
755 | my($mesg) = shift; | |
756 | ||
757 | my $fields = { }; | |
758 | my $label = ''; | |
759 | my $header = ''; | |
760 | my($value, $tmp, $pos); | |
761 | ||
762 | ## Read a line at a time. | |
763 | for ($pos=0; $pos >= 0; ) { | |
764 | $pos = index($$mesg, "\n"); | |
765 | if ($pos >= 0) { | |
766 | $tmp = substr($$mesg, 0, $pos+1); | |
767 | substr($$mesg, 0, $pos+1) = ""; | |
768 | last if $tmp =~ /^\r?$/; # Done if blank line | |
769 | ||
770 | $header .= $tmp; | |
771 | chop $tmp; # Chop newline | |
772 | $tmp =~ s/\r$//; # Delete <CR> characters | |
773 | } else { | |
774 | $tmp = $$mesg; | |
775 | $header .= $tmp; | |
776 | } | |
777 | ||
778 | ## Decode text if requested | |
779 | $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE) if $DecodeHeader; | |
780 | ||
781 | ## Check for continuation of a field | |
782 | if ($tmp =~ s/^\s//) { | |
783 | $fields->{$label}[-1] .= $tmp if $label; | |
784 | next; | |
785 | } | |
786 | ||
787 | ## Separate head from field text | |
788 | if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) { | |
789 | ($label, $value) = (lc($1), $2); | |
790 | if ($fields->{$label}) { | |
791 | push(@{$fields->{$label}}, $value); | |
792 | } else { | |
793 | $fields->{$label} = [ $value ]; | |
794 | } | |
795 | } | |
796 | } | |
797 | ($fields, $header); | |
798 | } | |
799 | ||
800 | ##---------------------------------------------------------------------------## | |
801 | ## MAILread_file_header reads (and strips) a mail message header | |
802 | ## from the filehandle $handle. The routine behaves in the | |
803 | ## same manner as MAILread_header; | |
804 | ## | |
805 | ## ($fields_hash, $header_text) = MAILread_file_header($filehandle); | |
806 | ## | |
807 | sub MAILread_file_header { | |
808 | my($handle) = @_; | |
809 | my $label = ''; | |
810 | my $header = ''; | |
811 | my $fields = { }; | |
812 | local $/ = "\n"; | |
813 | ||
814 | my($value, $tmp); | |
815 | while (($tmp = <$handle>) !~ /^[\r]?$/) { | |
816 | ## Save raw text | |
817 | $header .= $tmp; | |
818 | ||
819 | ## Delete eol characters | |
820 | $tmp =~ s/[\r\n]//g; | |
821 | ||
822 | ## Decode text if requested | |
823 | $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE) if $DecodeHeader; | |
824 | ||
825 | ## Check for continuation of a field | |
826 | if ($tmp =~ s/^\s//) { | |
827 | $fields->{$label}[-1] .= $tmp if $label; | |
828 | next; | |
829 | } | |
830 | ||
831 | ## Separate head from field text | |
832 | if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) { | |
833 | ($label, $value) = (lc($1), $2); | |
834 | if (defined($fields->{$label})) { | |
835 | push(@{$fields->{$label}}, $value); | |
836 | } else { | |
837 | $fields->{$label} = [ $value ]; | |
838 | } | |
839 | } | |
840 | } | |
841 | ($fields, $header); | |
842 | } | |
843 | ||
844 | ##---------------------------------------------------------------------------## | |
845 | ## MAILis_excluded() checks if specified content-type has been | |
846 | ## specified to be excluded. | |
847 | ## | |
848 | sub MAILis_excluded { | |
849 | my $ctype = lc($_[0]) || 'text/plain'; | |
850 | if ($MIMEExcs{$ctype}) { | |
851 | return 1; | |
852 | } | |
853 | if ($ctype =~ m|([^/]+)/|) { | |
854 | return $MIMEExcs{$1}; | |
855 | } | |
856 | 0; | |
857 | } | |
858 | ||
859 | ##---------------------------------------------------------------------------## | |
860 | ## MAILhead_get_disposition gets the content disposition and | |
861 | ## filename from $hfields, $hfields is a hash produced by the | |
862 | ## MAILread_header and MAILread_file_header routines. | |
863 | ## | |
864 | sub MAILhead_get_disposition { | |
865 | my($hfields) = shift; | |
866 | my($disp, $filename) = ('', ''); | |
867 | local($_); | |
868 | ||
869 | if (defined($hfields->{'content-disposition'}) && | |
870 | ($_ = $hfields->{'content-disposition'}->[0])) { | |
871 | ($disp) = /^\s*([^\s;]+)/; | |
872 | if (/filename="([^"]+)"/i) { | |
873 | $filename = $1; | |
874 | } elsif (/filename=(\S+)/i) { | |
875 | ($filename = $1) =~ s/;\s*$//g; | |
876 | } | |
877 | } | |
878 | if (!$filename && defined($_ = $hfields->{'content-type'}[0])) { | |
879 | if (/name="([^"]+)"/i) { | |
880 | $filename = $1; | |
881 | } elsif (/name=(\S+)/i) { | |
882 | ($filename = $1) =~ s/;\s*$//g; | |
883 | } | |
884 | } | |
885 | $filename = MAILdecode_1522_str($filename, DECODE_ALL); | |
886 | $filename =~ s%.*[/\\:]%%; # Remove any path component | |
887 | $filename =~ s/^\s+//; # Remove leading whitespace | |
888 | $filename =~ s/\s+$//; # Remove trailing whitespace | |
889 | ($disp, $filename); | |
890 | } | |
891 | ||
892 | ##---------------------------------------------------------------------------## | |
893 | ## MAILparse_parameter_str(): parses a parameter/value string. | |
894 | ## Support for RFC 2184 extensions exists. The $hasmain flag tells | |
895 | ## the method if there is an intial main value for the sting. For | |
896 | ## example: | |
897 | ## | |
898 | ## text/plain; charset=us-ascii | |
899 | ## ----^^^^^^^^^^ | |
900 | ## | |
901 | ## The "text/plain" part is not a parameter/value pair, but having | |
902 | ## an initial value is common among some header fields that can have | |
903 | ## parameter/value pairs (egs: Content-Type, Content-Disposition). | |
904 | ## | |
905 | ## Return Value: | |
906 | ## Reference to a hash. Each key is the attribute name. | |
907 | ## The special key, 'x-main', is the main value if the | |
908 | ## $hasmain flag is set. | |
909 | ## | |
910 | ## Each hash value is a hash reference with three keys: | |
911 | ## 'charset', 'lang', 'value'. 'charset' and 'lang' may be | |
912 | ## undef if character set or language information is not | |
913 | ## specified. | |
914 | ## | |
915 | ## Example Usage: | |
916 | ## | |
917 | ## $content_type_field = 'text/plain; charset=us-ascii'; | |
918 | ## $parms = MAILparse_parameter_str($content_type_field, 1); | |
919 | ## $ctype = $parms->{'x-main'}; | |
920 | ## $mesg_body_charset = $parms->{'charset'}{'value'}; | |
921 | ## | |
922 | sub MAILparse_parameter_str { | |
923 | my $str = shift; # Input string | |
924 | my $hasmain = shift; # Flag if there is a main value to extract | |
925 | ||
926 | require 'rfc822.pl'; | |
927 | ||
928 | my $parm = { }; | |
929 | my(@toks) = (rfc822::uncomment($str)); | |
930 | my($tok, $name, $value, $charset, $lang, $part); | |
931 | ||
932 | $parm->{'x-main'} = shift @toks if $hasmain; | |
933 | ||
934 | ## Loop thru token list | |
935 | while ($tok = shift @toks) { | |
936 | next if $tok eq ";"; | |
937 | ($name, $value) = split(/=/, $tok, 2); | |
938 | ## Check if charset/lang specified | |
939 | if ($name =~ s/\*$//) { | |
940 | if ($value =~ s/^([^']*)'([^']*)'//) { | |
941 | ($charset, $lang) = ($1, $2); | |
942 | } else { | |
943 | ($charset, $lang) = (undef, undef); | |
944 | } | |
945 | } | |
946 | ## Check if parameter is only part | |
947 | if ($name =~ s/\*(\d+)$//) { | |
948 | $part = $1 - 1; # we start at 0 internally | |
949 | } else { | |
950 | $part = 0; | |
951 | } | |
952 | ## Set values for parameter | |
953 | $name = lc $name; | |
954 | $parm->{$name} = { | |
955 | 'charset' => $charset, | |
956 | 'lang' => $lang, | |
957 | }; | |
958 | ## Check if value is next token | |
959 | if ($value eq "") { | |
960 | ## If value next token, than it must be quoted | |
961 | $value = shift @toks; | |
962 | $value =~ s/^"//; $value =~ s/"$//; $value =~ s/\\//g; | |
963 | } | |
964 | $parm->{$name}{'vlist'}[$part] = $value; | |
965 | } | |
966 | ||
967 | ## Now we loop thru each parameter and define the final values from | |
968 | ## the parts | |
969 | foreach $name (keys %$parm) { | |
970 | next if $name eq 'x-main'; | |
971 | $parm->{$name}{'value'} = join("", @{$parm->{$name}{'vlist'}}); | |
972 | } | |
973 | ||
974 | $parm; | |
975 | } | |
976 | ||
977 | ##---------------------------------------------------------------------------## | |
978 | ## MAILset_alternative_prefs() is used to set content-type | |
979 | ## preferences for multipart/alternative entities. The list | |
980 | ## specified will supercede the prefered format as denoted by | |
981 | ## the ording of parts in the entity. | |
982 | ## | |
983 | ## A content-type listed earlier in the array will be prefered | |
984 | ## over one later. For example: | |
985 | ## | |
986 | ## MAILset_alternative_prefs('text/plain', 'text/html'); | |
987 | ## | |
988 | ## States that if a multipart/alternative entity contains a | |
989 | ## text/plain part and a text/html part, the text/plain part will | |
990 | ## be prefered over the text/html part. | |
991 | ## | |
992 | sub MAILset_alternative_prefs { | |
993 | @_MIMEAltPrefs = map { lc } @_; | |
994 | %_MIMEAltPrefs = (); | |
995 | my $i = 0; | |
996 | my $ctype; | |
997 | foreach $ctype (@_MIMEAltPrefs) { | |
998 | $_MIMEAltPrefs{$ctype} = $i++; | |
999 | } | |
1000 | } | |
1001 | ||
1002 | ##---------------------------------------------------------------------------## | |
1003 | ## MAILset_charset_aliases() is used to define name aliases for | |
1004 | ## charset names. | |
1005 | ## | |
1006 | ## Example usage: | |
1007 | ## MAILset_charset_aliases( { | |
1008 | ## 'iso-8859-1' => [ 'latin1', 'iso_8859_1', '8859-1' ], | |
1009 | ## 'iso-8859-15' => [ 'latin9', 'iso_8859_15', '8859-15' ], | |
1010 | ## }, $override ); | |
1011 | ## | |
1012 | sub MAILset_charset_aliases { | |
1013 | my $map = shift; | |
1014 | my $override = shift; | |
1015 | ||
1016 | %MIMECharsetAliases = () if $override; | |
1017 | my($charset, $aliases, $alias); | |
1018 | while (($charset, $aliases) = each(%$map)) { | |
1019 | $charset = lc $charset; | |
1020 | foreach $alias (@$aliases) { | |
1021 | $MIMECharsetAliases{lc $alias} = $charset; | |
1022 | } | |
1023 | } | |
1024 | } | |
1025 | ||
1026 | ##---------------------------------------------------------------------------## | |
1027 | ## MAILload_charset_converter() loads the charset converter function | |
1028 | ## associated with given charset name. | |
1029 | ## | |
1030 | ## Example usage: | |
1031 | ## ($func, $real_charset) = MAILload_charset_converter($charset); | |
1032 | ## | |
1033 | ## $func is the reference to the converter function, which may be | |
1034 | ## undef. $real_charset is the real charset name that should be | |
1035 | ## used when invoking the function. | |
1036 | ## | |
1037 | sub MAILload_charset_converter { | |
1038 | my $charset = lc shift; | |
1039 | $charset = $MIMECharsetAliases{$charset} if $MIMECharsetAliases{$charset}; | |
1040 | my $func = load_charset($charset); | |
1041 | if (!defined($func) || !defined(&$func)) { | |
1042 | $func = load_charset('default'); | |
1043 | } | |
1044 | ($func, $charset); | |
1045 | } | |
1046 | ||
1047 | ############################################################################### | |
1048 | ## Private Routines | |
1049 | ############################################################################### | |
1050 | ||
1051 | ##---------------------------------------------------------------------------## | |
1052 | ## Default function for unable to process a part of a multipart | |
1053 | ## message. | |
1054 | ## | |
1055 | sub cantProcessPart { | |
1056 | my($ctype) = $_[0]; | |
1057 | warn "Warning: Could not process part with given Content-Type: ", | |
1058 | "$ctype\n"; | |
1059 | "<br><tt><<< $ctype: Unrecognized >>></tt><br>\n"; | |
1060 | } | |
1061 | ##---------------------------------------------------------------------------## | |
1062 | ## Default function returning message for content-types excluded. | |
1063 | ## | |
1064 | sub excludedPart { | |
1065 | my($ctype) = $_[0]; | |
1066 | "<br><tt><<< $ctype: EXCLUDED >>></tt><br>\n"; | |
1067 | } | |
1068 | ##---------------------------------------------------------------------------## | |
1069 | ## Default function for unrecognizeable part in multipart/alternative. | |
1070 | ## | |
1071 | sub unrecognizedAltPart { | |
1072 | warn "Warning: No recognizable part in multipart/alternative\n"; | |
1073 | "<br><tt><<< multipart/alternative: ". | |
1074 | "No recognizable part >>></tt><br>\n"; | |
1075 | } | |
1076 | ##---------------------------------------------------------------------------## | |
1077 | ## Default function for beggining of embedded message | |
1078 | ## (ie message/rfc822 or message/news). | |
1079 | ## | |
1080 | sub beginEmbeddedMesg { | |
1081 | qq|<blockquote><small>--- <i>Begin Message</i> ---</small>\n|; | |
1082 | } | |
1083 | ##---------------------------------------------------------------------------## | |
1084 | ## Default function for end of embedded message | |
1085 | ## (ie message/rfc822 or message/news). | |
1086 | ## | |
1087 | sub endEmbeddedMesg { | |
1088 | qq|<small>--- <i>End Message</i> ---</small></blockquote>\n|; | |
1089 | } | |
1090 | ||
1091 | ##---------------------------------------------------------------------------## | |
1092 | ||
1093 | sub load_charset { | |
1094 | require $MIMECharSetConvertersSrc{$_[0]} | |
1095 | if defined($MIMECharSetConvertersSrc{$_[0]}) && | |
1096 | $MIMECharSetConvertersSrc{$_[0]}; | |
1097 | $MIMECharSetConverters{$_[0]}; | |
1098 | } | |
1099 | sub load_decoder { | |
1100 | my $enc = lc shift; $enc =~ s/\s//; | |
1101 | require $MIMEDecodersSrc{$enc} | |
1102 | if defined($MIMEDecodersSrc{$enc}) && | |
1103 | $MIMEDecodersSrc{$enc}; | |
1104 | $MIMEDecoders{$enc}; | |
1105 | } | |
1106 | sub load_filter { | |
1107 | require $MIMEFiltersSrc{$_[0]} | |
1108 | if defined($MIMEFiltersSrc{$_[0]}) && | |
1109 | $MIMEFiltersSrc{$_[0]}; | |
1110 | $MIMEFilters{$_[0]}; | |
1111 | } | |
1112 | sub get_filter_args { | |
1113 | my $args = ''; | |
1114 | my $s; | |
1115 | foreach $s (@_) { | |
1116 | next unless defined $s; | |
1117 | $args = $MIMEFiltersArgs{$s}; | |
1118 | last if defined($args) && ($args ne ''); | |
1119 | } | |
1120 | $args; | |
1121 | } | |
1122 | ||
1123 | ##---------------------------------------------------------------------------## | |
1124 | ## extract_ctype() extracts the content-type specification from | |
1125 | ## the beginning of given string. | |
1126 | ## | |
1127 | sub extract_ctype { | |
1128 | if (!defined($_[0]) || | |
1129 | (ref($_[0]) && ($_[0][0] !~ /\S/)) || | |
1130 | ($_[0] !~ /\S/)) { | |
1131 | return 'message/rfc822' | |
1132 | if (defined($_[1]) && ($_[1] eq 'multipart/digest')); | |
1133 | return 'text/plain'; | |
1134 | } | |
1135 | if (ref($_[0])) { | |
1136 | $_[0][0] =~ m|^\s*([\w\-\./]+)|; | |
1137 | return lc($1); | |
1138 | } | |
1139 | $_[0] =~ m|^\s*([\w\-\./]+)|; | |
1140 | lc($1); | |
1141 | } | |
1142 | ||
1143 | ##---------------------------------------------------------------------------## | |
1144 | ||
1145 | sub apply_base_url { | |
1146 | my($b, $u) = @_; | |
1147 | return $u if !defined($b) || $b !~ /\S/; | |
1148 | ||
1149 | my($ret); | |
1150 | $u =~ s/^\s+//; | |
1151 | if ($u =~ m%^$Url%o || $u =~ m/^#/) { | |
1152 | ## Absolute URL or scroll link; do nothing | |
1153 | $ret = $u; | |
1154 | } else { | |
1155 | ## Relative URL | |
1156 | if ($u =~ /^\./) { | |
1157 | ## "./---" or "../---": Need to remove and adjust base | |
1158 | ## accordingly. | |
1159 | $b =~ s/\/$//; | |
1160 | my @a = split(/\//, $b); | |
1161 | my $cnt = 0; | |
1162 | while ( $cnt <= scalar(@a) && | |
1163 | $u =~ s|^(\.{1,2})/|| ) { ++$cnt if length($1) == 2; } | |
1164 | splice(@a, -$cnt) if $cnt > 0; | |
1165 | $b = join('/', @a, ""); | |
1166 | ||
1167 | } elsif ($u =~ m%^/%) { | |
1168 | ## "/---": Just use hostname:port of base. | |
1169 | $b =~ s%^(${Url}[^/]*)/.*%$1%o; | |
1170 | } | |
1171 | $ret = $b . $u; | |
1172 | } | |
1173 | $ret; | |
1174 | } | |
1175 | ||
1176 | ##---------------------------------------------------------------------------## | |
1177 | ||
1178 | sub dump_header { | |
1179 | my $fh = shift; | |
1180 | my $fields = shift; | |
1181 | my($key, $a, $value); | |
1182 | foreach $key (sort keys %$fields) { | |
1183 | $a = $fields->{$key}; | |
1184 | if (ref($a)) { | |
1185 | foreach $value (@$a) { | |
1186 | print $fh "$key: $value\n"; | |
1187 | } | |
1188 | } else { | |
1189 | print $fh "$key: $a\n"; | |
1190 | } | |
1191 | } | |
1192 | } | |
1193 | ||
1194 | ##---------------------------------------------------------------------------## | |
1195 | 1; # for require |