Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / readmail.pl
CommitLineData
86530b38
AT
1##---------------------------------------------------------------------------##
2## File:
3## $Id: readmail.pl,v 2.21 2002/10/15 22:06:53 ehood Exp $
4## Author:
5## Earl Hood mhonarc@mhonarc.org
6## Description:
7## Library defining routines to parse MIME e-mail messages. The
8## library is designed so it may be reused for other e-mail
9## filtering programs. The default behavior is for mail->html
10## filtering, however, the defaults can be overridden to allow
11## mail->whatever filtering.
12##
13## Public Functions:
14## ----------------
15## $data = MAILdecode_1522_str($str);
16## ($data, @files) = MAILread_body($fields_hash_ref, $body_ref);
17## $hash_ref = MAILread_file_header($handle);
18## $hash_ref = MAILread_header($mesg_str_ref);
19##
20## ($disp, $file) = MAILhead_get_disposition($fields_hash_ref);
21## $boolean = MAILis_excluded($content_type);
22## $parm_hash_ref = MAILparse_parameter_str($header_field);
23## $parm_hash_ref = MAILparse_parameter_str($header_field, 1);
24##
25##---------------------------------------------------------------------------##
26## Copyright (C) 1996-2001 Earl Hood, mhonarc@mhonarc.org
27##
28## This program is free software; you can redistribute it and/or modify
29## it under the terms of the GNU General Public License as published by
30## the Free Software Foundation; either version 2 of the License, or
31## (at your option) any later version.
32##
33## This program is distributed in the hope that it will be useful,
34## but WITHOUT ANY WARRANTY; without even the implied warranty of
35## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36## GNU General Public License for more details.
37##
38## You should have received a copy of the GNU General Public License
39## along with this program; if not, write to the Free Software
40## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
41## 02111-1307, USA
42##---------------------------------------------------------------------------##
43
44package readmail;
45
46###############################################################################
47## Private Globals ##
48###############################################################################
49
50my $Url = '(\w+://|\w+:)';
51
52my @_MIMEAltPrefs = ();
53my %_MIMEAltPrefs = ();
54
55###############################################################################
56## Public Globals ##
57###############################################################################
58
59##---------------------------------------------------------------------------##
60## Constants
61##
62
63## Constants for use as second argument to MAILdecode_1522_str().
64sub JUST_DECODE() { 1; }
65sub DECODE_ALL() { 2; }
66
67##---------------------------------------------------------------------------##
68
69##---------------------------------------------------------------------------##
70## Scalar Variables
71##
72
73## Flag if message headers are decoded in the parse header routines:
74## MAILread_header, MAILread_file_header. This only affects the
75## values of the field hash created. The original header is still
76## passed as the return value.
77##
78## The only 1522 data that will be decoded is data encoded with charsets
79## set to "-decode-" in the %MIMECharSetConverters hash.
80
81$DecodeHeader = 0;
82
83##---------------------------------------------------------------------------##
84## Variables for holding information related to the functions used
85## for processing MIME data. Variables are defined in the scope
86## of main.
87
88## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
89## %MIMEDecoders is the associative array for storing functions for
90## decoding mime data.
91##
92## Keys => content-transfer-encoding (should be in lowercase)
93## Values => function name.
94##
95## Function names should be qualified with package identifiers.
96## Functions are called as follows:
97##
98## $decoded_data = &function($data);
99##
100## The value "as-is" may be used to allow the data to be passed without
101## decoding to the registered filter, but the decoded flag will be
102## set to true.
103
104%MIMEDecoders = ()
105 unless defined(%MIMEDecoders);
106%MIMEDecodersSrc = ()
107 unless defined(%MIMEDecodersSrc);
108
109## Default settings:
110$MIMEDecoders{"7bit"} = "as-is"
111 unless defined($MIMEDecoders{"7bit"});
112$MIMEDecoders{"8bit"} = "as-is"
113 unless defined($MIMEDecoders{"8bit"});
114$MIMEDecoders{"binary"} = "as-is"
115 unless defined($MIMEDecoders{"binary"});
116$MIMEDecoders{"base64"} = "base64::b64decode"
117 unless defined($MIMEDecoders{"base64"});
118$MIMEDecoders{"quoted-printable"} = "quoted_printable::qprdecode"
119 unless defined($MIMEDecoders{"quoted-printable"});
120$MIMEDecoders{"x-uuencode"} = "base64::uudecode"
121 unless defined($MIMEDecoders{"x-uuencode"});
122$MIMEDecoders{"x-uue"} = "base64::uudecode"
123 unless defined($MIMEDecoders{"x-uue"});
124$MIMEDecoders{"uuencode"} = "base64::uudecode"
125 unless defined($MIMEDecoders{"uuencode"});
126
127$MIMEDecodersSrc{"base64"} = "base64.pl"
128 unless defined($MIMEDecodersSrc{"base64"});
129$MIMEDecodersSrc{"quoted-printable"} = "qprint.pl"
130 unless defined($MIMEDecodersSrc{"quoted-printable"});
131$MIMEDecodersSrc{"x-uuencode"} = "base64.pl"
132 unless defined($MIMEDecodersSrc{"x-uuencode"});
133$MIMEDecodersSrc{"x-uue"} = "base64.pl"
134 unless defined($MIMEDecodersSrc{"x-uue"});
135$MIMEDecodersSrc{"uuencode"} = "base64.pl"
136 unless defined($MIMEDecodersSrc{"uuencode"});
137
138## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
139## %MIMECharSetConverters is the associative array for storing functions
140## for converting data in a particular charset to a destination format
141## within the MAILdecode_1522_str() routine. Destination format is defined
142## by the function.
143##
144## Keys => charset (should be in lowercase)
145## Values => function name.
146##
147## Charset values take on a form like "iso-8859-1" or "us-ascii".
148## NOTE: Values need to be in lower-case.
149##
150## The key "default" can be assigned to define the default function
151## to call if no explicit charset function is defined.
152##
153## The key "plain" can be set to a function for decoded regular text not
154## encoded in 1522 format.
155##
156## Function names are name of defined perl function and should be
157## qualified with package identifiers. Functions are called as follows:
158##
159## $converted_data = &function($data, $charset);
160##
161## A function called "-decode-" implies that the data should be
162## decoded, but no converter is to be invoked.
163##
164## A function called "-ignore-" implies that the data should
165## not be decoded and converted. Ie. For the specified charset,
166## the encoding will stay unprocessed and passed back in the return
167## string.
168
169%MIMECharSetConverters = ()
170 unless defined(%MIMECharSetConverters);
171%MIMECharSetConvertersSrc = ()
172 unless defined(%MIMECharSetConvertersSrc);
173
174## Default settings:
175$MIMECharSetConverters{"default"} = "-ignore-"
176 unless defined($MIMECharSetConverters{"default"});
177
178## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
179## %MIMEFilters is the associative array for storing functions that
180## process various content-types in the MAILread_body routine.
181##
182## Keys => Content-type (should be in lowercase)
183## Values => function name.
184##
185## Function names should be qualified with package identifiers.
186## Functions are called as follows:
187##
188## $converted_data = &function($header, *parsed_header_assoc_array,
189## *message_data, $decoded_flag,
190## $optional_filter_arguments);
191##
192## Functions can be registered for base types. Example:
193##
194## $MIMEFilters{"image/*"} = "mypackage'function";
195##
196## IMPORTANT: If a function specified is not defined when MAILread_body
197## tries to invoke it, MAILread_body will silently ignore. Make sure
198## that all functions are defined before invoking MAILread_body.
199
200%MIMEFilters = ()
201 unless defined(%MIMEFilters);
202%MIMEFiltersSrc = ()
203 unless defined(%MIMEFiltersSrc);
204
205## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
206## %MIMEFiltersArgs is the associative array for storing any optional
207## arguments to functions specified in MIMEFilters (the
208## $optional_filter_arguments from above).
209##
210## Keys => Either one of the following: content-type, function name.
211## Values => Argument string (format determined by filter function).
212##
213## Arguments listed for a content-type will be used over arguments
214## listed for a function if both are applicable.
215
216%MIMEFiltersArgs = ()
217 unless defined(%MIMEFiltersArgs);
218
219## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
220## %MIMEExcs is the associative array listing which data types
221## should be auto-excluded during parsing:
222##
223## Keys => content-type, or base-type
224## Values => <should evaluate to a true expression>
225##
226## For purposes of efficiency, content-types, or base-types, should
227## be specified in lowercase. All key lookups are done in lowercase.
228
229%MIMEExcs = ()
230 unless defined(%MIMEExcs);
231
232## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
233## %MIMECharsetAliases is a mapping of charset names to charset names.
234## The MAILset_charset_aliases() routine should be used to set the
235## values of this hash.
236##
237## Keys => charset name
238## Values => real charset name
239##
240%MIMECharsetAliases = ()
241 unless defined(%MIMECharsetAliases);
242
243##---------------------------------------------------------------------------
244## Variables holding functions for generating processed output
245## for MAILread_body(). The default functions generate HTML.
246## However, the variables can be set to functions that generate
247## a different type of output.
248##
249## $FormatHeaderFunc has no default, and must be defined by
250## the calling program.
251##
252## Function that returns a message when failing to process a part of a
253## a multipart message. The content-type of the message is passed
254## as an argument.
255
256$CantProcessPartFunc = \&cantProcessPart
257 unless(defined($CantProcessPartFunc));
258
259## Function that returns a message when a part is excluded via %MIMEExcs.
260
261$ExcludedPartFunc = \&excludedPart
262 unless(defined($ExcludedPartFunc));
263
264## Function that returns a message when a part is unrecognized in a
265## multipart/alternative message. I.e. No part could be processed.
266## No arguments are passed to function.
267
268$UnrecognizedAltPartFunc = \&unrecognizedAltPart
269 unless(defined($UnrecognizedAltPartFunc));
270
271## Function that returns a string to go before any data generated generating
272## from processing an embedded message (message/rfc822 or message/news).
273## No arguments are passed to function.
274
275$BeginEmbeddedMesgFunc = \&beginEmbeddedMesg
276 unless(defined($BeginEmbeddedMesgFunc));
277
278## Function that returns a string to go after any data generated generating
279## from processing an embedded message (message/rfc822 or message/news).
280## No arguments are passed to function.
281
282$EndEmbeddedMesgFunc = \&endEmbeddedMesg
283 unless(defined($EndEmbeddedMesgFunc));
284
285## Function to return a string that is a result of the functions
286## processing of a message header. The function is called for
287## embedded messages (message/rfc822 and message/news). The
288## arguments to function are:
289##
290## 1. Pointer to associative array representing message header
291## contents with the keys as field labels (in all lower-case)
292## and the values as field values of the labels.
293##
294## 2. Pointer to associative array mapping lower-case keys of
295## argument 1 to original case.
296##
297## Prototype: $return_data = &function(*fields, *lower2orig_fields);
298
299$FormatHeaderFunc = undef
300 unless(defined($FormatHeaderFunc));
301
302###############################################################################
303## Public Routines ##
304###############################################################################
305##---------------------------------------------------------------------------##
306## MAILdecode_1522_str() decodes a string encoded in a format
307## specified by RFC 1522. The decoded string is the return value.
308## If no MIMECharSetConverters is registered for a charset, then
309## the decoded data is returned "as-is".
310##
311## Usage:
312##
313## $ret_data = &MAILdecode_1522_str($str, $decoding_flag);
314##
315## If $decoding_flag is JUST_DECODE, $str will be decoded for only
316## the charsets specified as "-decode-". If it is equal to
317## DECODE_ALL, all encoded data is decoded without any conversion.
318##
319sub MAILdecode_1522_str {
320 my($str) = shift;
321 my($decoding_flag) = shift || 0;
322 my($charset,
323 $encoding,
324 $dec,
325 $charcnv,
326 $real_charset,
327 $plaincnv,
328 $plain_real_charset,
329 $strtxt,
330 $str_before);
331 my($ret) = ('');
332
333 # Get plain converter
334 ($plaincnv, $plain_real_charset) = MAILload_charset_converter('plain');
335 $plain_real_charset = 'us-ascii' if $plain_real_charset eq 'plain';
336
337 # Decode string
338 while ($str =~ /=\?([^?]+)\?(.)\?([^?]*)\?=/) {
339
340 # Grab components
341 ($charset, $encoding) = ($1, $2);
342 $strtxt = $3; $str_before = $`; $str = $';
343
344 # Check encoding method and grab proper decoder
345 if ($encoding =~ /b/i) {
346 $dec = &load_decoder('base64');
347 } else {
348 $dec = &load_decoder('quoted-printable');
349 }
350
351 # Convert before (unencoded) text
352 if ($decoding_flag) { # ignore if just decode
353 $ret .= $str_before;
354 } elsif (defined(&$plaincnv)) { # decode and convert
355 $ret .= &$plaincnv($str_before, $plain_real_charset);
356 } else { # ignore
357 $ret .= $str_before;
358 }
359
360 # Convert encoded text
361 if ($decoding_flag == DECODE_ALL) {
362 $charcnv = '-decode-';
363 } else {
364 ($charcnv, $real_charset) = MAILload_charset_converter($charset);
365 }
366
367 # Decode only
368 if ($charcnv eq '-decode-') {
369 $strtxt =~ s/_/ /g;
370 $ret .= &$dec($strtxt);
371
372 # Ignore if just decoding
373 } elsif ($decoding_flag) {
374 $ret .= "=?$charset?$encoding?$strtxt?=";
375
376 # Decode and convert
377 } elsif (defined(&$charcnv)) {
378 $strtxt =~ s/_/ /g;
379 $ret .= &$charcnv(&$dec($strtxt), $real_charset);
380
381 # Fallback is to ignore
382 } else {
383 $ret .= "=?$charset?$encoding?$strtxt?=";
384 }
385 }
386
387 # Convert left-over unencoded text
388 if ($decoding_flag) { # ignore if just decode
389 $ret .= $str;
390 } elsif (defined(&$plaincnv)) { # decode and convert
391 $ret .= &$plaincnv($str, $plain_real_charset);
392 } else { # ignore
393 $ret .= $str;
394 }
395
396 $ret;
397}
398
399##---------------------------------------------------------------------------##
400## MAILread_body() parses a MIME message body.
401## Usage:
402## ($data, @files) =
403## MAILread_body($fields_hash_ref, $body_date_ref);
404##
405## Parameters:
406## $fields_hash_ref
407## A reference to hash of message/part header
408## fields. Keys are field names in lowercase
409## and values are array references containing the
410## field values. For example, to obtain the
411## content-type, if defined, one would do:
412##
413## $fields_hash_ref->{'content-type'}[0]
414##
415## Values for a fields are stored in arrays since
416## duplication of fields are possible. For example,
417## the Received: header field is typically repeated
418## multiple times. For fields that only occur once,
419## then array for the field will only contain one
420## item.
421##
422## $body_data_ref
423## Reference to body data. It is okay for the
424## filter to modify the text in-place.
425##
426## Return:
427## The first item in the return list is the text that should
428## printed to the message page. Any other items in the return
429## list are derived filenames created.
430##
431## See Also:
432## MAILread_header(), MAILread_file_header()
433##
434sub MAILread_body {
435 my($fields, # Parsed header hash
436 $body, # Reference to raw body text
437 $inaltArg) = @_; # Flag if in multipart/alternative
438
439 my($type, $subtype, $boundary, $content, $ctype, $pos,
440 $encoding, $decodefunc, $args, $part, $uribase);
441 my(@parts) = ();
442 my(@files) = ();
443 my(@array) = ();
444 my $ret = "";
445
446 ## Get type/subtype
447 if (defined($fields->{'content-type'})) {
448 $content = $fields->{'content-type'}->[0];
449 }
450 $content = 'text/plain' unless $content;
451 ($ctype) = $content =~ m%^\s*([\w\-\./]+)%; # Extract content-type
452 $ctype =~ tr/A-Z/a-z/; # Convert to lowercase
453 if ($ctype =~ m%/%) { # Extract base and sub types
454 ($type,$subtype) = split(/\//, $ctype, 2);
455 } elsif ($ctype =~ /text/i) {
456 $ctype = 'text/plain';
457 $type = 'text'; $subtype = 'plain';
458 } else {
459 $type = $subtype = '';
460 }
461
462 ## Check if type is excluded
463 if ($MIMEExcs{$ctype} || $MIMEExcs{$type}) {
464 return (&$ExcludedPartFunc($ctype));
465 }
466
467 ## Get entity URI base
468 if (defined($fields->{'content-base'}) &&
469 ($uribase = $fields->{'content-base'}[0])) {
470 $uribase =~ s/['"\s]//g;
471 } elsif (defined($fields->{'content-location'}) &&
472 ($uribase = $fields->{'content-location'}[0])) {
473 $uribase =~ s/['"\s]//g;
474 }
475 $uribase =~ s|(.*/).*|$1| if $uribase;
476
477 ## Load content-type filter
478 if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) &&
479 (!defined($filter = &load_filter("$type/*")) || !defined(&$filter)) &&
480 (!$inaltArg &&
481 (!defined($filter = &load_filter('*/*')) || !defined(&$filter)) &&
482 $ctype !~ m^\bmessage/(?:rfc822|news)\b^i &&
483 $type !~ /\bmultipart\b/) ) {
484 warn qq|Warning: Unrecognized content-type, "$ctype", |,
485 qq|assuming "application/octet-stream"\n|;
486 $filter = &load_filter('application/octet-stream');
487 }
488
489 ## Check for filter arguments
490 $args = get_filter_args($ctype, "$type/*", $filter);
491
492 ## Check encoding
493 if (defined($fields->{'content-transfer-encoding'})) {
494 $encoding = lc $fields->{'content-transfer-encoding'}[0];
495 $encoding =~ s/\s//g;
496 $decodefunc = &load_decoder($encoding);
497 } else {
498 $encoding = undef;
499 $decodefunc = undef;
500 }
501
502 ## A filter is defined for given content-type
503 if ($filter && defined(&$filter)) {
504 ## decode data
505 if (defined($decodefunc)) {
506 if (defined(&$decodefunc)) {
507 $decoded = &$decodefunc($$body);
508 @array = &$filter($fields, \$decoded, 1, $args);
509 } else {
510 @array = &$filter($fields, $body,
511 $decodefunc =~ /as-is/i, $args);
512 }
513 } else {
514 @array = &$filter($fields, $body, 0, $args);
515 }
516
517 ## Setup return variables
518 $ret = shift @array; # Return string
519 push(@files, @array); # Derived files
520
521 ## No filter defined for given content-type
522 } else {
523 ## If multipart, recursively process each part
524 if ($type =~ /\bmultipart\b/i) {
525 local(%Cid) = ( ) unless scalar(caller) eq 'readmail';
526 my($isalt) = $subtype =~ /\balternative\b/i;
527
528 ## Get boundary
529 $boundary = "";
530 if ($content =~ m/\bboundary\s*=\s*"([^"]*)"/i) {
531 $boundary = $1;
532 } else {
533 ($boundary) = $content =~ m/\bboundary\s*=\s*(\S+)/i;
534 $boundary =~ s/;$//; # chop ';' if grabbed
535 }
536
537 ## If boundary defined, split body into parts
538 if ($boundary =~ /\S/) {
539 my $found = 0;
540 my $have_end = 0;
541 my $start_pos = 0;
542 substr($$body, 0, 0) = "\n";
543 substr($boundary, 0, 0) = "\n--";
544 my $blen = length($boundary);
545 my $bchkstr;
546
547 while (($pos = index($$body, $boundary, $start_pos)) > -1) {
548 # have to check for case when boundary is a substring
549 # of another boundary, yuck!
550 $bchkstr = substr($$body, $pos+$blen, 2);
551 unless ($bchkstr =~ /\A\r?\n/ || $bchkstr =~ /\A--/) {
552 # incomplete match, continue search
553 $start_pos = $pos+$blen;
554 next;
555 }
556 $found = 1;
557 push(@parts, substr($$body, 0, $pos));
558 $parts[$#parts] =~ s/^\r//;
559
560 # prune out part data just grabbed
561 substr($$body, 0, $pos+$blen) = "";
562
563 # check if hit end
564 if ($$body =~ /\A--/) {
565 $have_end = 1;
566 last;
567 }
568
569 # remove EOL at the beginning
570 $$body =~ s/\A\r?\n//;
571 $start_pos = 0;
572 }
573 if (!$have_end) {
574 warn qq/Warning: No end boundary delimiter found in /,
575 qq/message body\n/;
576 push(@parts, $$body);
577 $parts[$#parts] =~ s/^\r//;
578 $$body = "";
579 }
580 if ($found) {
581 # discard front-matter
582 shift(@parts);
583 } else {
584 # no boundary separators in message!
585 warn qq/Warning: No boundary delimiters found in /,
586 qq/multipart body\n/;
587 if ($$body =~ m/\A\n[\w\-]+:\s/) {
588 # remove \n added above if part looks like it has
589 # headers. we keep if it does not to avoid body
590 # data being parsed as a header below.
591 substr($$body, 0, 1) = "";
592 }
593 push(@parts, $$body);
594 }
595
596 ## Else treat body as one part
597 } else {
598 @parts = ($$body);
599 }
600
601 ## Process parts
602 my(@entity) = ();
603 my($cid, $href, $pctype);
604 my %alt_exc = ( );
605 my $have_alt_prefs = $isalt && scalar(@_MIMEAltPrefs);
606 @parts = \(@parts);
607 while (defined($part = shift(@parts))) {
608 $href = { };
609 $partfields = $href->{'fields'} = (MAILread_header($part))[0];
610 $href->{'body'} = $part;
611 $href->{'filtered'} = 0;
612 $pctype = extract_ctype(
613 $partfields->{'content-type'}, $ctype);
614
615 ## check alternative preferences
616 if ($have_alt_prefs) {
617 next if ($alt_exc{$pctype});
618 my $pos = $_MIMEAltPrefs{$pctype};
619 if (defined($pos)) {
620 for (++$pos; $pos <= $#_MIMEAltPrefs; ++$pos) {
621 $alt_exc{$_MIMEAltPrefs[$pos]} = 1;
622 }
623 }
624 }
625
626 ## only add to %Cid if not excluded
627 if (!&MAILis_excluded($pctype)) {
628 if ($isalt) {
629 unshift(@entity, $href);
630 } else {
631 push(@entity, $href);
632 }
633 $cid = $partfields->{'content-id'}[0] ||
634 $partfields->{'message-id'}[0];
635 if (defined($cid)) {
636 $cid =~ s/[\s<>]//g;
637 $Cid{"cid:$cid"} = $href if $cid =~ /\S/;
638 }
639 $cid = undef;
640 if (defined($partfields->{'content-location'}) &&
641 ($cid = $partfields->{'content-location'}[0])) {
642 my $partbase = $uribase;
643 $cid =~ s/['"\s]//g;
644 if (defined($partfields->{'content-base'})) {
645 $partbase = $partfields->{'content-base'}[0];
646 }
647 $cid = apply_base_url($partbase, $cid);
648 if ($cid =~ /\S/ && !$Cid{$cid}) {
649 $Cid{$cid} = $href;
650 }
651 }
652 if ($cid) {
653 $partfields->{'content-location'} = [ $cid ];
654 } elsif (!defined($partfields->{'content-base'})) {
655 $partfields->{'content-base'} = [ $uribase ];
656 }
657 }
658 }
659
660 my($entity);
661 ENTITY: foreach $entity (@entity) {
662 next if $entity->{'filtered'};
663
664 ## If content-type not defined for part, then determine
665 ## content-type based upon multipart subtype.
666 $partfields = $entity->{'fields'};
667 if (!defined($partfields->{'content-type'})) {
668 $partfields->{'content-type'} =
669 [ ($subtype =~ /digest/) ?
670 'message/rfc822' : 'text/plain' ];
671 }
672
673 ## Process part
674 @array = MAILread_body(
675 $partfields,
676 $entity->{'body'},
677 $isalt);
678
679 ## Only use last filterable part in alternate
680 if ($isalt) {
681 $ret = shift @array;
682 if ($ret) {
683 push(@files, @array);
684 $entity->{'filtered'} = 1;
685 last ENTITY;
686 }
687 } else {
688 if (!$array[0]) {
689 $array[0] = &$CantProcessPartFunc(
690 $partfields->{'content-type'}[0]);
691 }
692 $ret .= shift @array;
693 }
694 push(@files, @array);
695 $entity->{'filtered'} = 1;
696 }
697
698 ## Check if multipart/alternative, and no success
699 if (!$ret && $isalt) {
700 warn qq|Warning: No recognized part in multipart/alternative; |,
701 qq|will try to decode last part\n|;
702 $entity = $entity[0];
703 @array = &MAILread_body(
704 $entity->{'fields'},
705 $entity->{'body'});
706 $ret = shift @array;
707 if ($ret) {
708 push(@files, @array);
709 } else {
710 $ret = &$UnrecognizedAltPartFunc();
711 }
712 }
713
714 ## Else if message/rfc822 or message/news
715 } elsif ($ctype =~ m^\bmessage/(?:rfc822|news)\b^i) {
716 $partfields = (MAILread_header($body))[0];
717
718 $ret = &$BeginEmbeddedMesgFunc();
719 if ($FormatHeaderFunc && defined(&$FormatHeaderFunc)) {
720 $ret .= &$FormatHeaderFunc($partfields);
721 } else {
722 warn "Warning: readmail: No message header formatting ",
723 "function defined\n";
724 }
725 @array = MAILread_body($partfields, $body);
726 $ret .= shift @array ||
727 &$CantProcessPartFunc(
728 $partfields->{'content-type'}[0] || 'text/plain');
729 $ret .= &$EndEmbeddedMesgFunc();
730
731 push(@files, @array);
732
733 ## Else cannot handle type
734 } else {
735 $ret = '';
736 }
737 }
738
739 ($ret, @files);
740}
741
742##---------------------------------------------------------------------------##
743## MAILread_header reads (and strips) a mail message header from the
744## variable $mesg. $mesg is a reference to the mail message in
745## a string.
746##
747## $fields is a reference to a hash to put field values indexed by
748## field labels that have been converted to all lowercase.
749## Field values are array references to the values
750## for each field.
751##
752## ($fields_hash_ref, $header_txt) = MAILread_header($mesg_data);
753##
754sub MAILread_header {
755 my($mesg) = shift;
756
757 my $fields = { };
758 my $label = '';
759 my $header = '';
760 my($value, $tmp, $pos);
761
762 ## Read a line at a time.
763 for ($pos=0; $pos >= 0; ) {
764 $pos = index($$mesg, "\n");
765 if ($pos >= 0) {
766 $tmp = substr($$mesg, 0, $pos+1);
767 substr($$mesg, 0, $pos+1) = "";
768 last if $tmp =~ /^\r?$/; # Done if blank line
769
770 $header .= $tmp;
771 chop $tmp; # Chop newline
772 $tmp =~ s/\r$//; # Delete <CR> characters
773 } else {
774 $tmp = $$mesg;
775 $header .= $tmp;
776 }
777
778 ## Decode text if requested
779 $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE) if $DecodeHeader;
780
781 ## Check for continuation of a field
782 if ($tmp =~ s/^\s//) {
783 $fields->{$label}[-1] .= $tmp if $label;
784 next;
785 }
786
787 ## Separate head from field text
788 if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) {
789 ($label, $value) = (lc($1), $2);
790 if ($fields->{$label}) {
791 push(@{$fields->{$label}}, $value);
792 } else {
793 $fields->{$label} = [ $value ];
794 }
795 }
796 }
797 ($fields, $header);
798}
799
800##---------------------------------------------------------------------------##
801## MAILread_file_header reads (and strips) a mail message header
802## from the filehandle $handle. The routine behaves in the
803## same manner as MAILread_header;
804##
805## ($fields_hash, $header_text) = MAILread_file_header($filehandle);
806##
807sub MAILread_file_header {
808 my($handle) = @_;
809 my $label = '';
810 my $header = '';
811 my $fields = { };
812 local $/ = "\n";
813
814 my($value, $tmp);
815 while (($tmp = <$handle>) !~ /^[\r]?$/) {
816 ## Save raw text
817 $header .= $tmp;
818
819 ## Delete eol characters
820 $tmp =~ s/[\r\n]//g;
821
822 ## Decode text if requested
823 $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE) if $DecodeHeader;
824
825 ## Check for continuation of a field
826 if ($tmp =~ s/^\s//) {
827 $fields->{$label}[-1] .= $tmp if $label;
828 next;
829 }
830
831 ## Separate head from field text
832 if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) {
833 ($label, $value) = (lc($1), $2);
834 if (defined($fields->{$label})) {
835 push(@{$fields->{$label}}, $value);
836 } else {
837 $fields->{$label} = [ $value ];
838 }
839 }
840 }
841 ($fields, $header);
842}
843
844##---------------------------------------------------------------------------##
845## MAILis_excluded() checks if specified content-type has been
846## specified to be excluded.
847##
848sub MAILis_excluded {
849 my $ctype = lc($_[0]) || 'text/plain';
850 if ($MIMEExcs{$ctype}) {
851 return 1;
852 }
853 if ($ctype =~ m|([^/]+)/|) {
854 return $MIMEExcs{$1};
855 }
856 0;
857}
858
859##---------------------------------------------------------------------------##
860## MAILhead_get_disposition gets the content disposition and
861## filename from $hfields, $hfields is a hash produced by the
862## MAILread_header and MAILread_file_header routines.
863##
864sub MAILhead_get_disposition {
865 my($hfields) = shift;
866 my($disp, $filename) = ('', '');
867 local($_);
868
869 if (defined($hfields->{'content-disposition'}) &&
870 ($_ = $hfields->{'content-disposition'}->[0])) {
871 ($disp) = /^\s*([^\s;]+)/;
872 if (/filename="([^"]+)"/i) {
873 $filename = $1;
874 } elsif (/filename=(\S+)/i) {
875 ($filename = $1) =~ s/;\s*$//g;
876 }
877 }
878 if (!$filename && defined($_ = $hfields->{'content-type'}[0])) {
879 if (/name="([^"]+)"/i) {
880 $filename = $1;
881 } elsif (/name=(\S+)/i) {
882 ($filename = $1) =~ s/;\s*$//g;
883 }
884 }
885 $filename = MAILdecode_1522_str($filename, DECODE_ALL);
886 $filename =~ s%.*[/\\:]%%; # Remove any path component
887 $filename =~ s/^\s+//; # Remove leading whitespace
888 $filename =~ s/\s+$//; # Remove trailing whitespace
889 ($disp, $filename);
890}
891
892##---------------------------------------------------------------------------##
893## MAILparse_parameter_str(): parses a parameter/value string.
894## Support for RFC 2184 extensions exists. The $hasmain flag tells
895## the method if there is an intial main value for the sting. For
896## example:
897##
898## text/plain; charset=us-ascii
899## ----^^^^^^^^^^
900##
901## The "text/plain" part is not a parameter/value pair, but having
902## an initial value is common among some header fields that can have
903## parameter/value pairs (egs: Content-Type, Content-Disposition).
904##
905## Return Value:
906## Reference to a hash. Each key is the attribute name.
907## The special key, 'x-main', is the main value if the
908## $hasmain flag is set.
909##
910## Each hash value is a hash reference with three keys:
911## 'charset', 'lang', 'value'. 'charset' and 'lang' may be
912## undef if character set or language information is not
913## specified.
914##
915## Example Usage:
916##
917## $content_type_field = 'text/plain; charset=us-ascii';
918## $parms = MAILparse_parameter_str($content_type_field, 1);
919## $ctype = $parms->{'x-main'};
920## $mesg_body_charset = $parms->{'charset'}{'value'};
921##
922sub MAILparse_parameter_str {
923 my $str = shift; # Input string
924 my $hasmain = shift; # Flag if there is a main value to extract
925
926 require 'rfc822.pl';
927
928 my $parm = { };
929 my(@toks) = (rfc822::uncomment($str));
930 my($tok, $name, $value, $charset, $lang, $part);
931
932 $parm->{'x-main'} = shift @toks if $hasmain;
933
934 ## Loop thru token list
935 while ($tok = shift @toks) {
936 next if $tok eq ";";
937 ($name, $value) = split(/=/, $tok, 2);
938 ## Check if charset/lang specified
939 if ($name =~ s/\*$//) {
940 if ($value =~ s/^([^']*)'([^']*)'//) {
941 ($charset, $lang) = ($1, $2);
942 } else {
943 ($charset, $lang) = (undef, undef);
944 }
945 }
946 ## Check if parameter is only part
947 if ($name =~ s/\*(\d+)$//) {
948 $part = $1 - 1; # we start at 0 internally
949 } else {
950 $part = 0;
951 }
952 ## Set values for parameter
953 $name = lc $name;
954 $parm->{$name} = {
955 'charset' => $charset,
956 'lang' => $lang,
957 };
958 ## Check if value is next token
959 if ($value eq "") {
960 ## If value next token, than it must be quoted
961 $value = shift @toks;
962 $value =~ s/^"//; $value =~ s/"$//; $value =~ s/\\//g;
963 }
964 $parm->{$name}{'vlist'}[$part] = $value;
965 }
966
967 ## Now we loop thru each parameter and define the final values from
968 ## the parts
969 foreach $name (keys %$parm) {
970 next if $name eq 'x-main';
971 $parm->{$name}{'value'} = join("", @{$parm->{$name}{'vlist'}});
972 }
973
974 $parm;
975}
976
977##---------------------------------------------------------------------------##
978## MAILset_alternative_prefs() is used to set content-type
979## preferences for multipart/alternative entities. The list
980## specified will supercede the prefered format as denoted by
981## the ording of parts in the entity.
982##
983## A content-type listed earlier in the array will be prefered
984## over one later. For example:
985##
986## MAILset_alternative_prefs('text/plain', 'text/html');
987##
988## States that if a multipart/alternative entity contains a
989## text/plain part and a text/html part, the text/plain part will
990## be prefered over the text/html part.
991##
992sub MAILset_alternative_prefs {
993 @_MIMEAltPrefs = map { lc } @_;
994 %_MIMEAltPrefs = ();
995 my $i = 0;
996 my $ctype;
997 foreach $ctype (@_MIMEAltPrefs) {
998 $_MIMEAltPrefs{$ctype} = $i++;
999 }
1000}
1001
1002##---------------------------------------------------------------------------##
1003## MAILset_charset_aliases() is used to define name aliases for
1004## charset names.
1005##
1006## Example usage:
1007## MAILset_charset_aliases( {
1008## 'iso-8859-1' => [ 'latin1', 'iso_8859_1', '8859-1' ],
1009## 'iso-8859-15' => [ 'latin9', 'iso_8859_15', '8859-15' ],
1010## }, $override );
1011##
1012sub MAILset_charset_aliases {
1013 my $map = shift;
1014 my $override = shift;
1015
1016 %MIMECharsetAliases = () if $override;
1017 my($charset, $aliases, $alias);
1018 while (($charset, $aliases) = each(%$map)) {
1019 $charset = lc $charset;
1020 foreach $alias (@$aliases) {
1021 $MIMECharsetAliases{lc $alias} = $charset;
1022 }
1023 }
1024}
1025
1026##---------------------------------------------------------------------------##
1027## MAILload_charset_converter() loads the charset converter function
1028## associated with given charset name.
1029##
1030## Example usage:
1031## ($func, $real_charset) = MAILload_charset_converter($charset);
1032##
1033## $func is the reference to the converter function, which may be
1034## undef. $real_charset is the real charset name that should be
1035## used when invoking the function.
1036##
1037sub MAILload_charset_converter {
1038 my $charset = lc shift;
1039 $charset = $MIMECharsetAliases{$charset} if $MIMECharsetAliases{$charset};
1040 my $func = load_charset($charset);
1041 if (!defined($func) || !defined(&$func)) {
1042 $func = load_charset('default');
1043 }
1044 ($func, $charset);
1045}
1046
1047###############################################################################
1048## Private Routines
1049###############################################################################
1050
1051##---------------------------------------------------------------------------##
1052## Default function for unable to process a part of a multipart
1053## message.
1054##
1055sub cantProcessPart {
1056 my($ctype) = $_[0];
1057 warn "Warning: Could not process part with given Content-Type: ",
1058 "$ctype\n";
1059 "<br><tt>&lt;&lt;&lt; $ctype: Unrecognized &gt;&gt;&gt;</tt><br>\n";
1060}
1061##---------------------------------------------------------------------------##
1062## Default function returning message for content-types excluded.
1063##
1064sub excludedPart {
1065 my($ctype) = $_[0];
1066 "<br><tt>&lt;&lt;&lt; $ctype: EXCLUDED &gt;&gt;&gt;</tt><br>\n";
1067}
1068##---------------------------------------------------------------------------##
1069## Default function for unrecognizeable part in multipart/alternative.
1070##
1071sub unrecognizedAltPart {
1072 warn "Warning: No recognizable part in multipart/alternative\n";
1073 "<br><tt>&lt;&lt;&lt; multipart/alternative: ".
1074 "No recognizable part &gt;&gt;&gt;</tt><br>\n";
1075}
1076##---------------------------------------------------------------------------##
1077## Default function for beggining of embedded message
1078## (ie message/rfc822 or message/news).
1079##
1080sub beginEmbeddedMesg {
1081qq|<blockquote><small>---&nbsp;<i>Begin&nbsp;Message</i>&nbsp;---</small>\n|;
1082}
1083##---------------------------------------------------------------------------##
1084## Default function for end of embedded message
1085## (ie message/rfc822 or message/news).
1086##
1087sub endEmbeddedMesg {
1088qq|<small>---&nbsp;<i>End Message</i>&nbsp;---</small></blockquote>\n|;
1089}
1090
1091##---------------------------------------------------------------------------##
1092
1093sub load_charset {
1094 require $MIMECharSetConvertersSrc{$_[0]}
1095 if defined($MIMECharSetConvertersSrc{$_[0]}) &&
1096 $MIMECharSetConvertersSrc{$_[0]};
1097 $MIMECharSetConverters{$_[0]};
1098}
1099sub load_decoder {
1100 my $enc = lc shift; $enc =~ s/\s//;
1101 require $MIMEDecodersSrc{$enc}
1102 if defined($MIMEDecodersSrc{$enc}) &&
1103 $MIMEDecodersSrc{$enc};
1104 $MIMEDecoders{$enc};
1105}
1106sub load_filter {
1107 require $MIMEFiltersSrc{$_[0]}
1108 if defined($MIMEFiltersSrc{$_[0]}) &&
1109 $MIMEFiltersSrc{$_[0]};
1110 $MIMEFilters{$_[0]};
1111}
1112sub get_filter_args {
1113 my $args = '';
1114 my $s;
1115 foreach $s (@_) {
1116 next unless defined $s;
1117 $args = $MIMEFiltersArgs{$s};
1118 last if defined($args) && ($args ne '');
1119 }
1120 $args;
1121}
1122
1123##---------------------------------------------------------------------------##
1124## extract_ctype() extracts the content-type specification from
1125## the beginning of given string.
1126##
1127sub extract_ctype {
1128 if (!defined($_[0]) ||
1129 (ref($_[0]) && ($_[0][0] !~ /\S/)) ||
1130 ($_[0] !~ /\S/)) {
1131 return 'message/rfc822'
1132 if (defined($_[1]) && ($_[1] eq 'multipart/digest'));
1133 return 'text/plain';
1134 }
1135 if (ref($_[0])) {
1136 $_[0][0] =~ m|^\s*([\w\-\./]+)|;
1137 return lc($1);
1138 }
1139 $_[0] =~ m|^\s*([\w\-\./]+)|;
1140 lc($1);
1141}
1142
1143##---------------------------------------------------------------------------##
1144
1145sub apply_base_url {
1146 my($b, $u) = @_;
1147 return $u if !defined($b) || $b !~ /\S/;
1148
1149 my($ret);
1150 $u =~ s/^\s+//;
1151 if ($u =~ m%^$Url%o || $u =~ m/^#/) {
1152 ## Absolute URL or scroll link; do nothing
1153 $ret = $u;
1154 } else {
1155 ## Relative URL
1156 if ($u =~ /^\./) {
1157 ## "./---" or "../---": Need to remove and adjust base
1158 ## accordingly.
1159 $b =~ s/\/$//;
1160 my @a = split(/\//, $b);
1161 my $cnt = 0;
1162 while ( $cnt <= scalar(@a) &&
1163 $u =~ s|^(\.{1,2})/|| ) { ++$cnt if length($1) == 2; }
1164 splice(@a, -$cnt) if $cnt > 0;
1165 $b = join('/', @a, "");
1166
1167 } elsif ($u =~ m%^/%) {
1168 ## "/---": Just use hostname:port of base.
1169 $b =~ s%^(${Url}[^/]*)/.*%$1%o;
1170 }
1171 $ret = $b . $u;
1172 }
1173 $ret;
1174}
1175
1176##---------------------------------------------------------------------------##
1177
1178sub dump_header {
1179 my $fh = shift;
1180 my $fields = shift;
1181 my($key, $a, $value);
1182 foreach $key (sort keys %$fields) {
1183 $a = $fields->{$key};
1184 if (ref($a)) {
1185 foreach $value (@$a) {
1186 print $fh "$key: $value\n";
1187 }
1188 } else {
1189 print $fh "$key: $a\n";
1190 }
1191 }
1192}
1193
1194##---------------------------------------------------------------------------##
11951; # for require