tools/perl-5.8.0/lib/site_perl/5.8.0/readmail.pl

##---------------------------------------------------------------------------##
##  File:
##      $Id: readmail.pl,v 2.21 2002/10/15 22:06:53 ehood Exp $
##  Author:
##      Earl Hood       mhonarc@mhonarc.org
##  Description:
##      Library defining routines to parse MIME e-mail messages.  The
##      library is designed so it may be reused for other e-mail
##      filtering programs.  The default behavior is for mail->html
##      filtering, however, the defaults can be overridden to allow
##      mail->whatever filtering.
##
##      Public Functions:
##      ----------------
##      $data           = MAILdecode_1522_str($str);
##      ($data, @files) = MAILread_body($fields_hash_ref, $body_ref);
##      $hash_ref       = MAILread_file_header($handle);
##      $hash_ref       = MAILread_header($mesg_str_ref);
##
##      ($disp, $file)  = MAILhead_get_disposition($fields_hash_ref);
##      $boolean        = MAILis_excluded($content_type);
##      $parm_hash_ref  = MAILparse_parameter_str($header_field);
##      $parm_hash_ref  = MAILparse_parameter_str($header_field, 1);
##
##---------------------------------------------------------------------------##
##    Copyright (C) 1996-2001   Earl Hood, mhonarc@mhonarc.org
##
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program; if not, write to the Free Software
##    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##    02111-1307, USA
##---------------------------------------------------------------------------##

package readmail;

###############################################################################
##      Private Globals                                                      ##
###############################################################################

my $Url           = '(\w+://|\w+:)';

my @_MIMEAltPrefs = ();
my %_MIMEAltPrefs = ();

###############################################################################
##      Public Globals                                                       ##
###############################################################################

##---------------------------------------------------------------------------##
##      Constants
##

##  Constants for use as second argument to MAILdecode_1522_str().
sub JUST_DECODE() { 1; }
sub DECODE_ALL()  { 2; }

##---------------------------------------------------------------------------##

##---------------------------------------------------------------------------##
##      Scalar Variables
##

##  Flag if message headers are decoded in the parse header routines:
##  MAILread_header, MAILread_file_header.  This only affects the
##  values of the field hash created.  The original header is still
##  passed as the return value.
##
##  The only 1522 data that will be decoded is data encoded with charsets
##  set to "-decode-" in the %MIMECharSetConverters hash.

$DecodeHeader   = 0;

##---------------------------------------------------------------------------##
##      Variables for holding information related to the functions used
##      for processing MIME data.  Variables are defined in the scope
##      of main.

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##  %MIMEDecoders is the associative array for storing functions for
##  decoding mime data.
##
##      Keys => content-transfer-encoding (should be in lowercase)
##      Values => function name.
##
##  Function names should be qualified with package identifiers.
##  Functions are called as follows:
##
##      $decoded_data = &function($data);
##
##  The value "as-is" may be used to allow the data to be passed without
##  decoding to the registered filter, but the decoded flag will be
##  set to true.

%MIMEDecoders                   = ()
    unless defined(%MIMEDecoders);
%MIMEDecodersSrc                = ()
    unless defined(%MIMEDecodersSrc);

##      Default settings:
$MIMEDecoders{"7bit"}             = "as-is"
    unless defined($MIMEDecoders{"7bit"});
$MIMEDecoders{"8bit"}             = "as-is"
    unless defined($MIMEDecoders{"8bit"});
$MIMEDecoders{"binary"}           = "as-is"
    unless defined($MIMEDecoders{"binary"});
$MIMEDecoders{"base64"}           = "base64::b64decode"
    unless defined($MIMEDecoders{"base64"});
$MIMEDecoders{"quoted-printable"} = "quoted_printable::qprdecode"
    unless defined($MIMEDecoders{"quoted-printable"});
$MIMEDecoders{"x-uuencode"}       = "base64::uudecode"
    unless defined($MIMEDecoders{"x-uuencode"});
$MIMEDecoders{"x-uue"}            = "base64::uudecode"
    unless defined($MIMEDecoders{"x-uue"});
$MIMEDecoders{"uuencode"}         = "base64::uudecode"
    unless defined($MIMEDecoders{"uuencode"});

$MIMEDecodersSrc{"base64"}              = "base64.pl"
    unless defined($MIMEDecodersSrc{"base64"});
$MIMEDecodersSrc{"quoted-printable"}    = "qprint.pl"
    unless defined($MIMEDecodersSrc{"quoted-printable"});
$MIMEDecodersSrc{"x-uuencode"}          = "base64.pl"
    unless defined($MIMEDecodersSrc{"x-uuencode"});
$MIMEDecodersSrc{"x-uue"}               = "base64.pl"
    unless defined($MIMEDecodersSrc{"x-uue"});
$MIMEDecodersSrc{"uuencode"}            = "base64.pl"
    unless defined($MIMEDecodersSrc{"uuencode"});

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##  %MIMECharSetConverters is the associative array for storing functions
##  for converting data in a particular charset to a destination format
##  within the MAILdecode_1522_str() routine. Destination format is defined
##  by the function.
##
##      Keys => charset (should be in lowercase)
##      Values => function name.
##
##  Charset values take on a form like "iso-8859-1" or "us-ascii".
##              NOTE: Values need to be in lower-case.
##
##  The key "default" can be assigned to define the default function
##  to call if no explicit charset function is defined.
##
##  The key "plain" can be set to a function for decoded regular text not
##  encoded in 1522 format.
##
##  Function names are name of defined perl function and should be
##  qualified with package identifiers. Functions are called as follows:
##
##      $converted_data = &function($data, $charset);
##
##  A function called "-decode-" implies that the data should be
##  decoded, but no converter is to be invoked.
##
##  A function called "-ignore-" implies that the data should
##  not be decoded and converted.  Ie.  For the specified charset,
##  the encoding will stay unprocessed and passed back in the return
##  string.

%MIMECharSetConverters                  = ()
    unless defined(%MIMECharSetConverters);
%MIMECharSetConvertersSrc               = ()
    unless defined(%MIMECharSetConvertersSrc);

##      Default settings:
$MIMECharSetConverters{"default"}       = "-ignore-"
    unless defined($MIMECharSetConverters{"default"});

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##  %MIMEFilters is the associative array for storing functions that
##  process various content-types in the MAILread_body routine.
##
##      Keys => Content-type (should be in lowercase)
##      Values => function name.
##
##  Function names should be qualified with package identifiers.
##  Functions are called as follows:
##
##      $converted_data = &function($header, *parsed_header_assoc_array,
##                                  *message_data, $decoded_flag,
##                                  $optional_filter_arguments);
##
##  Functions can be registered for base types.  Example:
##
##      $MIMEFilters{"image/*"} = "mypackage'function";
##
##  IMPORTANT: If a function specified is not defined when MAILread_body
##  tries to invoke it, MAILread_body will silently ignore.  Make sure
##  that all functions are defined before invoking MAILread_body.

%MIMEFilters    = ()
    unless defined(%MIMEFilters);
%MIMEFiltersSrc = ()
    unless defined(%MIMEFiltersSrc);

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##  %MIMEFiltersArgs is the associative array for storing any optional
##  arguments to functions specified in MIMEFilters (the
##  $optional_filter_arguments from above).
##
##      Keys => Either one of the following: content-type, function name.
##      Values => Argument string (format determined by filter function).
##
##  Arguments listed for a content-type will be used over arguments
##  listed for a function if both are applicable.

%MIMEFiltersArgs        = ()
    unless defined(%MIMEFiltersArgs);

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##  %MIMEExcs is the associative array listing which data types
##  should be auto-excluded during parsing:
##
##      Keys => content-type, or base-type
##      Values => <should evaluate to a true expression>
##
##  For purposes of efficiency, content-types, or base-types, should
##  be specified in lowercase.  All key lookups are done in lowercase.

%MIMEExcs                       = ()
    unless defined(%MIMEExcs);

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##  %MIMECharsetAliases is a mapping of charset names to charset names.
##  The MAILset_charset_aliases() routine should be used to set the
##  values of this hash.
##
##      Keys => charset name
##      Values => real charset name
##
%MIMECharsetAliases = ()
    unless defined(%MIMECharsetAliases);

##---------------------------------------------------------------------------
##      Variables holding functions for generating processed output
##      for MAILread_body().  The default functions generate HTML.
##      However, the variables can be set to functions that generate
##      a different type of output.
##
##      $FormatHeaderFunc has no default, and must be defined by
##      the calling program.
##
##  Function that returns a message when failing to process a part of a
##  a multipart message.  The content-type of the message is passed
##  as an argument.

$CantProcessPartFunc            = \&cantProcessPart
    unless(defined($CantProcessPartFunc));

##  Function that returns a message when a part is excluded via %MIMEExcs.

$ExcludedPartFunc       = \&excludedPart
    unless(defined($ExcludedPartFunc));

##  Function that returns a message when a part is unrecognized in a
##  multipart/alternative message.  I.e. No part could be processed.
##  No arguments are passed to function.

$UnrecognizedAltPartFunc        = \&unrecognizedAltPart
    unless(defined($UnrecognizedAltPartFunc));

##  Function that returns a string to go before any data generated generating
##  from processing an embedded message (message/rfc822 or message/news).
##  No arguments are passed to function.

$BeginEmbeddedMesgFunc          = \&beginEmbeddedMesg
    unless(defined($BeginEmbeddedMesgFunc));

##  Function that returns a string to go after any data generated generating
##  from processing an embedded message (message/rfc822 or message/news).
##  No arguments are passed to function.

$EndEmbeddedMesgFunc            = \&endEmbeddedMesg
    unless(defined($EndEmbeddedMesgFunc));

##  Function to return a string that is a result of the functions
##  processing of a message header.  The function is called for
##  embedded messages (message/rfc822 and message/news).  The
##  arguments to function are:
##
##   1. Pointer to associative array representing message header
##      contents with the keys as field labels (in all lower-case)
##      and the values as field values of the labels.
##
##   2. Pointer to associative array mapping lower-case keys of
##      argument 1 to original case.
##
##  Prototype: $return_data = &function(*fields, *lower2orig_fields);

$FormatHeaderFunc               = undef
    unless(defined($FormatHeaderFunc));

###############################################################################
##      Public Routines                                                      ##
###############################################################################
##---------------------------------------------------------------------------##
##      MAILdecode_1522_str() decodes a string encoded in a format
##      specified by RFC 1522.  The decoded string is the return value.
##      If no MIMECharSetConverters is registered for a charset, then
##      the decoded data is returned "as-is".
##
##      Usage:
##
##          $ret_data = &MAILdecode_1522_str($str, $decoding_flag);
##
##      If $decoding_flag is JUST_DECODE, $str will be decoded for only
##      the charsets specified as "-decode-".  If it is equal to
##      DECODE_ALL, all encoded data is decoded without any conversion.
##
sub MAILdecode_1522_str {
    my($str) = shift;
    my($decoding_flag) = shift || 0;
    my($charset,
       $encoding,
       $dec,
       $charcnv,
       $real_charset,
       $plaincnv,
       $plain_real_charset,
       $strtxt,
       $str_before);
    my($ret) = ('');

    # Get plain converter
    ($plaincnv, $plain_real_charset) = MAILload_charset_converter('plain');
    $plain_real_charset = 'us-ascii'  if $plain_real_charset eq 'plain';

    # Decode string
    while ($str =~ /=\?([^?]+)\?(.)\?([^?]*)\?=/) {

        # Grab components
        ($charset, $encoding) = ($1, $2);
        $strtxt = $3; $str_before = $`; $str = $';

        # Check encoding method and grab proper decoder
        if ($encoding =~ /b/i) {
            $dec = &load_decoder('base64');
        } else {
            $dec = &load_decoder('quoted-printable');
        }

        # Convert before (unencoded) text
        if ($decoding_flag) {                           # ignore if just decode
            $ret .= $str_before;
        } elsif (defined(&$plaincnv)) {                 # decode and convert
            $ret .= &$plaincnv($str_before, $plain_real_charset);
        } else {                                        # ignore
            $ret .= $str_before;
        }

        # Convert encoded text
        if ($decoding_flag == DECODE_ALL) {
            $charcnv = '-decode-';
        } else {
            ($charcnv, $real_charset) = MAILload_charset_converter($charset);
        }

        # Decode only
        if ($charcnv eq '-decode-') {
            $strtxt =~ s/_/ /g;
            $ret .= &$dec($strtxt);

        # Ignore if just decoding
        } elsif ($decoding_flag) {
            $ret .= "=?$charset?$encoding?$strtxt?=";

        # Decode and convert
        } elsif (defined(&$charcnv)) {
            $strtxt =~ s/_/ /g;
            $ret .= &$charcnv(&$dec($strtxt), $real_charset);

        # Fallback is to ignore
        } else {
            $ret .= "=?$charset?$encoding?$strtxt?=";
        }
    }

    # Convert left-over unencoded text
    if ($decoding_flag) {                       # ignore if just decode
        $ret .= $str;
    } elsif (defined(&$plaincnv)) {             # decode and convert
        $ret .= &$plaincnv($str, $plain_real_charset);
    } else {                                    # ignore
        $ret .= $str;
    }

    $ret;
}

##---------------------------------------------------------------------------##
##      MAILread_body() parses a MIME message body.
##      Usage:
##        ($data, @files) =
##            MAILread_body($fields_hash_ref, $body_date_ref);
##
##      Parameters:
##        $fields_hash_ref
##                    A reference to hash of message/part header
##                    fields.  Keys are field names in lowercase
##                    and values are array references containing the
##                    field values.  For example, to obtain the
##                    content-type, if defined, one would do:
##
##                      $fields_hash_ref->{'content-type'}[0]
##
##                    Values for a fields are stored in arrays since
##                    duplication of fields are possible.  For example,
##                    the Received: header field is typically repeated
##                    multiple times.  For fields that only occur once,
##                    then array for the field will only contain one
##                    item.
##
##        $body_data_ref
##                    Reference to body data.  It is okay for the
##                    filter to modify the text in-place.
##
##      Return:
##        The first item in the return list is the text that should
##        printed to the message page.  Any other items in the return
##        list are derived filenames created.
##
##      See Also:
##        MAILread_header(), MAILread_file_header()
##
sub MAILread_body {
    my($fields,         # Parsed header hash
       $body,           # Reference to raw body text
       $inaltArg) = @_; # Flag if in multipart/alternative

    my($type, $subtype, $boundary, $content, $ctype, $pos,
       $encoding, $decodefunc, $args, $part, $uribase);
    my(@parts) = ();
    my(@files) = ();
    my(@array) = ();
    my $ret = "";

    ## Get type/subtype
    if (defined($fields->{'content-type'})) {
        $content = $fields->{'content-type'}->[0];
    }
    $content = 'text/plain'  unless $content;
    ($ctype) = $content =~ m%^\s*([\w\-\./]+)%; # Extract content-type
    $ctype =~ tr/A-Z/a-z/;                      # Convert to lowercase
    if ($ctype =~ m%/%) {                       # Extract base and sub types
        ($type,$subtype) = split(/\//, $ctype, 2);
    } elsif ($ctype =~ /text/i) {
        $ctype = 'text/plain';
        $type = 'text';  $subtype = 'plain';
    } else {
        $type = $subtype = '';
    }

    ## Check if type is excluded
    if ($MIMEExcs{$ctype} || $MIMEExcs{$type}) {
        return (&$ExcludedPartFunc($ctype));
    }

    ## Get entity URI base
    if (defined($fields->{'content-base'}) &&
            ($uribase = $fields->{'content-base'}[0])) {
        $uribase =~ s/['"\s]//g;
    } elsif (defined($fields->{'content-location'}) &&
                ($uribase = $fields->{'content-location'}[0])) {
        $uribase =~ s/['"\s]//g;
    }
    $uribase =~ s|(.*/).*|$1|  if $uribase;

    ## Load content-type filter
    if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) &&
         (!defined($filter = &load_filter("$type/*")) || !defined(&$filter)) &&
         (!$inaltArg &&
          (!defined($filter = &load_filter('*/*')) || !defined(&$filter)) &&
             $ctype !~ m^\bmessage/(?:rfc822|news)\b^i &&
             $type  !~ /\bmultipart\b/) ) {
        warn qq|Warning: Unrecognized content-type, "$ctype", |,
             qq|assuming "application/octet-stream"\n|;
        $filter = &load_filter('application/octet-stream');
    }

    ## Check for filter arguments
    $args = get_filter_args($ctype, "$type/*", $filter);

    ## Check encoding
    if (defined($fields->{'content-transfer-encoding'})) {
        $encoding = lc $fields->{'content-transfer-encoding'}[0];
        $encoding =~ s/\s//g;
        $decodefunc = &load_decoder($encoding);
    } else {
        $encoding = undef;
        $decodefunc = undef;
    }

    ## A filter is defined for given content-type
    if ($filter && defined(&$filter)) {
        ## decode data
        if (defined($decodefunc)) {
            if (defined(&$decodefunc)) {
                $decoded = &$decodefunc($$body);
                @array = &$filter($fields, \$decoded, 1, $args);
            } else {
                @array = &$filter($fields, $body,
                                  $decodefunc =~ /as-is/i, $args);
            }
        } else {
            @array = &$filter($fields, $body, 0, $args);
        }

        ## Setup return variables
        $ret = shift @array;                            # Return string
        push(@files, @array);                           # Derived files

    ## No filter defined for given content-type
    } else {
        ## If multipart, recursively process each part
        if ($type =~ /\bmultipart\b/i) {
            local(%Cid) = ( )  unless scalar(caller) eq 'readmail';
            my($isalt) = $subtype =~ /\balternative\b/i;

            ## Get boundary
            $boundary = "";
            if ($content =~ m/\bboundary\s*=\s*"([^"]*)"/i) {
                $boundary = $1;
            } else {
                ($boundary) = $content =~ m/\bboundary\s*=\s*(\S+)/i;
                $boundary =~ s/;$//;  # chop ';' if grabbed
            }

            ## If boundary defined, split body into parts
            if ($boundary =~ /\S/) {
                my $found = 0;
                my $have_end = 0;
                my $start_pos = 0;
                substr($$body, 0, 0) = "\n";
                substr($boundary, 0, 0) = "\n--";
                my $blen = length($boundary);
                my $bchkstr;

                while (($pos = index($$body, $boundary, $start_pos)) > -1) {
                    # have to check for case when boundary is a substring
                    #   of another boundary, yuck!
                    $bchkstr = substr($$body, $pos+$blen, 2);
                    unless ($bchkstr =~ /\A\r?\n/ || $bchkstr =~ /\A--/) {
                        # incomplete match, continue search
                        $start_pos = $pos+$blen;
                        next;
                    }
                    $found = 1;
                    push(@parts, substr($$body, 0, $pos));
                    $parts[$#parts] =~ s/^\r//;

                    # prune out part data just grabbed
                    substr($$body, 0, $pos+$blen) = "";

                    # check if hit end
                    if ($$body =~ /\A--/) {
                        $have_end = 1;
                        last;
                    }

                    # remove EOL at the beginning
                    $$body =~ s/\A\r?\n//;
                    $start_pos = 0;
                }
                if (!$have_end) {
                    warn qq/Warning: No end boundary delimiter found in /,
                         qq/message body\n/;
                    push(@parts, $$body);
                    $parts[$#parts] =~ s/^\r//;
                    $$body = "";
                }
                if ($found) {
                    # discard front-matter
                    shift(@parts);
                } else {
                    # no boundary separators in message!
                    warn qq/Warning: No boundary delimiters found in /,
                         qq/multipart body\n/;
                    if ($$body =~ m/\A\n[\w\-]+:\s/) {
                        # remove \n added above if part looks like it has
                        # headers.  we keep if it does not to avoid body
                        # data being parsed as a header below.
                        substr($$body, 0, 1) = "";
                    }
                    push(@parts, $$body);
                }

            ## Else treat body as one part
            } else {
                @parts = ($$body);
            }

            ## Process parts
            my(@entity) = ();
            my($cid, $href, $pctype);
            my %alt_exc = ( );
            my $have_alt_prefs = $isalt && scalar(@_MIMEAltPrefs);
            @parts = \(@parts);
            while (defined($part = shift(@parts))) {
                $href = { };
                $partfields = $href->{'fields'} = (MAILread_header($part))[0];
                $href->{'body'} = $part;
                $href->{'filtered'} = 0;
                $pctype = extract_ctype(
                    $partfields->{'content-type'}, $ctype);

                ## check alternative preferences
                if ($have_alt_prefs) {
                  next  if ($alt_exc{$pctype});
                  my $pos = $_MIMEAltPrefs{$pctype};
                  if (defined($pos)) {
                      for (++$pos; $pos <= $#_MIMEAltPrefs; ++$pos) {
                          $alt_exc{$_MIMEAltPrefs[$pos]} = 1;
                      }
                  }
                }

                ## only add to %Cid if not excluded
                if (!&MAILis_excluded($pctype)) {
                    if ($isalt) {
                        unshift(@entity, $href);
                    } else {
                        push(@entity, $href);
                    }
                    $cid = $partfields->{'content-id'}[0] ||
                           $partfields->{'message-id'}[0];
                    if (defined($cid)) {
                        $cid =~ s/[\s<>]//g;
                        $Cid{"cid:$cid"} = $href  if $cid =~ /\S/;
                    }
                    $cid = undef;
                    if (defined($partfields->{'content-location'}) &&
                            ($cid = $partfields->{'content-location'}[0])) {
                        my $partbase = $uribase;
                        $cid =~ s/['"\s]//g;
                        if (defined($partfields->{'content-base'})) {
                            $partbase = $partfields->{'content-base'}[0];
                        }
                        $cid = apply_base_url($partbase, $cid);
                        if ($cid =~ /\S/ && !$Cid{$cid}) {
                            $Cid{$cid} = $href;
                        }
                    }
                    if ($cid) {
                        $partfields->{'content-location'} = [ $cid ];
                    } elsif (!defined($partfields->{'content-base'})) {
                        $partfields->{'content-base'} = [ $uribase ];
                    }
                }
            }

            my($entity);
            ENTITY: foreach $entity (@entity) {
                next  if $entity->{'filtered'};

                ## If content-type not defined for part, then determine
                ## content-type based upon multipart subtype.
                $partfields = $entity->{'fields'};
                if (!defined($partfields->{'content-type'})) {
                    $partfields->{'content-type'} =
                      [ ($subtype =~ /digest/) ?
                            'message/rfc822' : 'text/plain' ];
                }

                ## Process part
                @array = MAILread_body(
                            $partfields,
                            $entity->{'body'},
                            $isalt);

                ## Only use last filterable part in alternate
                if ($isalt) {
                    $ret = shift @array;
                    if ($ret) {
                        push(@files, @array);
                        $entity->{'filtered'} = 1;
                        last ENTITY;
                    }
                } else {
                    if (!$array[0]) {
                        $array[0] = &$CantProcessPartFunc(
                                        $partfields->{'content-type'}[0]);
                    }
                    $ret .= shift @array;
                }
                push(@files, @array);
                $entity->{'filtered'} = 1;
            }

            ## Check if multipart/alternative, and no success
            if (!$ret && $isalt) {
                warn qq|Warning: No recognized part in multipart/alternative; |,
                     qq|will try to decode last part\n|;
                $entity = $entity[0];
                @array = &MAILread_body(
                            $entity->{'fields'},
                            $entity->{'body'});
                $ret = shift @array;
                if ($ret) {
                    push(@files, @array);
                } else {
                    $ret = &$UnrecognizedAltPartFunc();
                }
            }

        ## Else if message/rfc822 or message/news
        } elsif ($ctype =~ m^\bmessage/(?:rfc822|news)\b^i) {
            $partfields = (MAILread_header($body))[0];

            $ret = &$BeginEmbeddedMesgFunc();
            if ($FormatHeaderFunc && defined(&$FormatHeaderFunc)) {
                $ret .= &$FormatHeaderFunc($partfields);
            } else {
                warn "Warning: readmail: No message header formatting ",
                     "function defined\n";
            }
            @array = MAILread_body($partfields, $body);
            $ret .= shift @array ||
                        &$CantProcessPartFunc(
                            $partfields->{'content-type'}[0] || 'text/plain');
            $ret .= &$EndEmbeddedMesgFunc();

            push(@files, @array);

        ## Else cannot handle type
        } else {
            $ret = '';
        }
    }

    ($ret, @files);
}

##---------------------------------------------------------------------------##
##      MAILread_header reads (and strips) a mail message header from the
##      variable $mesg.  $mesg is a reference to the mail message in
##      a string.
##
##      $fields is a reference to a hash to put field values indexed by
##      field labels that have been converted to all lowercase.
##      Field values are array references to the values
##      for each field.
##
##      ($fields_hash_ref, $header_txt) = MAILread_header($mesg_data);
##
sub MAILread_header {
    my($mesg) = shift;

    my $fields = { };
    my $label = '';
    my $header = '';
    my($value, $tmp, $pos);

    ## Read a line at a time.
    for ($pos=0; $pos >= 0; ) {
        $pos = index($$mesg, "\n");
        if ($pos >= 0) {
            $tmp = substr($$mesg, 0, $pos+1);
            substr($$mesg, 0, $pos+1) = "";
            last  if $tmp =~ /^\r?$/;   # Done if blank line

            $header .= $tmp;
            chop $tmp;                  # Chop newline
            $tmp =~ s/\r$//;            # Delete <CR> characters
        } else {
            $tmp = $$mesg;
            $header .= $tmp;
        }

        ## Decode text if requested
        $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE)  if $DecodeHeader;

        ## Check for continuation of a field
        if ($tmp =~ s/^\s//) {
            $fields->{$label}[-1] .= $tmp  if $label;
            next;
        }

        ## Separate head from field text
        if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) {
            ($label, $value) = (lc($1), $2);
            if ($fields->{$label}) {
                push(@{$fields->{$label}}, $value);
            } else {
                $fields->{$label} = [ $value ];
            }
        }
    }
    ($fields, $header);
}

##---------------------------------------------------------------------------##
##      MAILread_file_header reads (and strips) a mail message header
##      from the filehandle $handle.  The routine behaves in the
##      same manner as MAILread_header;
##
##      ($fields_hash, $header_text) = MAILread_file_header($filehandle);
##
sub MAILread_file_header {
    my($handle) = @_;
    my $label  = '';
    my $header = '';
    my $fields = { };
    local $/   = "\n";

    my($value, $tmp);
    while (($tmp = <$handle>) !~ /^[\r]?$/) {
        ## Save raw text
        $header .= $tmp;

        ## Delete eol characters
        $tmp =~ s/[\r\n]//g;

        ## Decode text if requested
        $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE)  if $DecodeHeader;

        ## Check for continuation of a field
        if ($tmp =~ s/^\s//) {
            $fields->{$label}[-1] .= $tmp  if $label;
            next;
        }

        ## Separate head from field text
        if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) {
            ($label, $value) = (lc($1), $2);
            if (defined($fields->{$label})) {
                push(@{$fields->{$label}}, $value);
            } else {
                $fields->{$label} = [ $value ];
            }
        }
    }
    ($fields, $header);
}

##---------------------------------------------------------------------------##
##      MAILis_excluded() checks if specified content-type has been
##      specified to be excluded.
##
sub MAILis_excluded {
    my $ctype = lc($_[0]) || 'text/plain';
    if ($MIMEExcs{$ctype}) {
        return 1;
    }
    if ($ctype =~ m|([^/]+)/|) {
        return $MIMEExcs{$1};
    }
    0;
}

##---------------------------------------------------------------------------##
##      MAILhead_get_disposition gets the content disposition and
##      filename from $hfields, $hfields is a hash produced by the
##      MAILread_header and MAILread_file_header routines.
##
sub MAILhead_get_disposition {
    my($hfields) = shift;
    my($disp, $filename) = ('', '');
    local($_);

    if (defined($hfields->{'content-disposition'}) &&
            ($_ = $hfields->{'content-disposition'}->[0])) {
        ($disp) = /^\s*([^\s;]+)/;
        if (/filename="([^"]+)"/i) {
            $filename = $1;
        } elsif (/filename=(\S+)/i) {
            ($filename = $1) =~ s/;\s*$//g;
        }
    }
    if (!$filename && defined($_ = $hfields->{'content-type'}[0])) {
        if (/name="([^"]+)"/i) {
            $filename = $1;
        } elsif (/name=(\S+)/i) {
            ($filename = $1) =~ s/;\s*$//g;
        }
    }
    $filename = MAILdecode_1522_str($filename, DECODE_ALL);
    $filename =~ s%.*[/\\:]%%;  # Remove any path component
    $filename =~ s/^\s+//;      # Remove leading whitespace
    $filename =~ s/\s+$//;      # Remove trailing whitespace
    ($disp, $filename);
}

##---------------------------------------------------------------------------##
##      MAILparse_parameter_str(): parses a parameter/value string.
##      Support for RFC 2184 extensions exists.  The $hasmain flag tells
##      the method if there is an intial main value for the sting.  For
##      example:
##
##          text/plain; charset=us-ascii
##      ----^^^^^^^^^^
##
##      The "text/plain" part is not a parameter/value pair, but having
##      an initial value is common among some header fields that can have
##      parameter/value pairs (egs: Content-Type, Content-Disposition).
##
##      Return Value:
##          Reference to a hash.  Each key is the attribute name.
##          The special key, 'x-main', is the main value if the
##          $hasmain flag is set.
##
##          Each hash value is a hash reference with three keys:
##          'charset', 'lang', 'value'.  'charset' and 'lang' may be
##          undef if character set or language information is not
##          specified.
##
##      Example Usage:
##
##          $content_type_field = 'text/plain; charset=us-ascii';
##          $parms = MAILparse_parameter_str($content_type_field, 1);
##          $ctype = $parms->{'x-main'};
##          $mesg_body_charset = $parms->{'charset'}{'value'};
##
sub MAILparse_parameter_str {
    my $str     = shift;        # Input string
    my $hasmain = shift;        # Flag if there is a main value to extract

    require 'rfc822.pl';

    my $parm    = { };
    my(@toks)   = (rfc822::uncomment($str));
    my($tok, $name, $value, $charset, $lang, $part);

    $parm->{'x-main'} = shift @toks  if $hasmain;

    ## Loop thru token list
    while ($tok = shift @toks) {
        next if $tok eq ";";
        ($name, $value) = split(/=/, $tok, 2);
        ## Check if charset/lang specified
        if ($name =~ s/\*$//) {
            if ($value =~ s/^([^']*)'([^']*)'//) {
                ($charset, $lang) = ($1, $2);
            } else {
                ($charset, $lang) = (undef, undef);
            }
        }
        ## Check if parameter is only part
        if ($name =~ s/\*(\d+)$//) {
            $part = $1 - 1;     # we start at 0 internally
        } else {
            $part = 0;
        }
        ## Set values for parameter
        $name = lc $name;
        $parm->{$name} = {
            'charset'   => $charset,
            'lang'      => $lang,
        };
        ## Check if value is next token
        if ($value eq "") {
            ## If value next token, than it must be quoted
            $value = shift @toks;
            $value =~ s/^"//;  $value =~ s/"$//;  $value =~ s/\\//g;
        }
        $parm->{$name}{'vlist'}[$part] = $value;
    }

    ## Now we loop thru each parameter and define the final values from
    ## the parts
    foreach $name (keys %$parm) {
        next  if $name eq 'x-main';
        $parm->{$name}{'value'} = join("", @{$parm->{$name}{'vlist'}});
    }

    $parm;
}

##---------------------------------------------------------------------------##
##      MAILset_alternative_prefs() is used to set content-type
##      preferences for multipart/alternative entities.  The list
##      specified will supercede the prefered format as denoted by
##      the ording of parts in the entity.
##
##      A content-type listed earlier in the array will be prefered
##      over one later.  For example:
##
##        MAILset_alternative_prefs('text/plain', 'text/html');
##
##      States that if a multipart/alternative entity contains a
##      text/plain part and a text/html part, the text/plain part will
##      be prefered over the text/html part.
##
sub MAILset_alternative_prefs {
    @_MIMEAltPrefs = map { lc } @_;
    %_MIMEAltPrefs = ();
    my $i = 0;
    my $ctype;
    foreach $ctype (@_MIMEAltPrefs) {
        $_MIMEAltPrefs{$ctype} = $i++;
    }
}

##---------------------------------------------------------------------------##
##      MAILset_charset_aliases() is used to define name aliases for
##      charset names.
##
##      Example usage:
##        MAILset_charset_aliases( {
##          'iso-8859-1' =>  [ 'latin1', 'iso_8859_1', '8859-1' ],
##          'iso-8859-15' => [ 'latin9', 'iso_8859_15', '8859-15' ],
##        }, $override );
##
sub MAILset_charset_aliases {
    my $map = shift;
    my $override = shift;

    %MIMECharsetAliases = ()  if $override;
    my($charset, $aliases, $alias);
    while (($charset, $aliases) = each(%$map)) {
        $charset = lc $charset;
        foreach $alias (@$aliases) {
            $MIMECharsetAliases{lc $alias} = $charset;
        }
    }
}

##---------------------------------------------------------------------------##
##      MAILload_charset_converter() loads the charset converter function
##      associated with given charset name.
##
##      Example usage:
##        ($func, $real_charset) = MAILload_charset_converter($charset);
##
##      $func is the reference to the converter function, which may be
##      undef.  $real_charset is the real charset name that should be
##      used when invoking the function.
##
sub MAILload_charset_converter {
    my $charset = lc shift;
    $charset = $MIMECharsetAliases{$charset}  if $MIMECharsetAliases{$charset};
    my $func = load_charset($charset);
    if (!defined($func) || !defined(&$func)) {
        $func = load_charset('default');
    }
    ($func, $charset);
}

###############################################################################
##      Private Routines
###############################################################################

##---------------------------------------------------------------------------##
##      Default function for unable to process a part of a multipart
##      message.
##
sub cantProcessPart {
    my($ctype) = $_[0];
    warn "Warning: Could not process part with given Content-Type: ",
         "$ctype\n";
    "<br><tt>&lt;&lt;&lt; $ctype: Unrecognized &gt;&gt;&gt;</tt><br>\n";
}
##---------------------------------------------------------------------------##
##      Default function returning message for content-types excluded.
##
sub excludedPart {
    my($ctype) = $_[0];
    "<br><tt>&lt;&lt;&lt; $ctype: EXCLUDED &gt;&gt;&gt;</tt><br>\n";
}
##---------------------------------------------------------------------------##
##      Default function for unrecognizeable part in multipart/alternative.
##
sub unrecognizedAltPart {
    warn "Warning: No recognizable part in multipart/alternative\n";
    "<br><tt>&lt;&lt;&lt; multipart/alternative: ".
    "No recognizable part &gt;&gt;&gt;</tt><br>\n";
}
##---------------------------------------------------------------------------##
##      Default function for beggining of embedded message
##      (ie message/rfc822 or message/news).
##
sub beginEmbeddedMesg {
qq|<blockquote><small>---&nbsp;<i>Begin&nbsp;Message</i>&nbsp;---</small>\n|;
}
##---------------------------------------------------------------------------##
##      Default function for end of embedded message
##      (ie message/rfc822 or message/news).
##
sub endEmbeddedMesg {
qq|<small>---&nbsp;<i>End Message</i>&nbsp;---</small></blockquote>\n|;
}

##---------------------------------------------------------------------------##

sub load_charset {
    require $MIMECharSetConvertersSrc{$_[0]}
        if defined($MIMECharSetConvertersSrc{$_[0]}) &&
           $MIMECharSetConvertersSrc{$_[0]};
    $MIMECharSetConverters{$_[0]};
}
sub load_decoder {
    my $enc = lc shift; $enc =~ s/\s//;
    require $MIMEDecodersSrc{$enc}
        if defined($MIMEDecodersSrc{$enc}) &&
           $MIMEDecodersSrc{$enc};
    $MIMEDecoders{$enc};
}
sub load_filter {
    require $MIMEFiltersSrc{$_[0]}
        if defined($MIMEFiltersSrc{$_[0]}) &&
           $MIMEFiltersSrc{$_[0]};
    $MIMEFilters{$_[0]};
}
sub get_filter_args {
    my $args    = '';
    my $s;
    foreach $s (@_) {
        next  unless defined $s;
        $args = $MIMEFiltersArgs{$s};
        last  if defined($args) && ($args ne '');
    }
    $args;
}

##---------------------------------------------------------------------------##
##      extract_ctype() extracts the content-type specification from
##      the beginning of given string.
##
sub extract_ctype {
    if (!defined($_[0]) ||
          (ref($_[0]) && ($_[0][0] !~ /\S/)) ||
          ($_[0] !~ /\S/)) {
        return 'message/rfc822'
            if (defined($_[1]) && ($_[1] eq 'multipart/digest'));
        return 'text/plain';
    }
    if (ref($_[0])) {
        $_[0][0] =~ m|^\s*([\w\-\./]+)|;
        return lc($1);
    }
    $_[0] =~ m|^\s*([\w\-\./]+)|;
    lc($1);
}

##---------------------------------------------------------------------------##

sub apply_base_url {
    my($b, $u) = @_;
    return $u  if !defined($b) || $b !~ /\S/;

    my($ret);
    $u =~ s/^\s+//;
    if ($u =~ m%^$Url%o || $u =~ m/^#/) {
        ## Absolute URL or scroll link; do nothing
        $ret = $u;
    } else {
        ## Relative URL
        if ($u =~ /^\./) {
            ## "./---" or "../---": Need to remove and adjust base
            ## accordingly.
            $b =~ s/\/$//;
            my @a = split(/\//, $b);
            my $cnt = 0;
            while ( $cnt <= scalar(@a) &&
                    $u =~ s|^(\.{1,2})/|| ) { ++$cnt  if length($1) == 2; }
            splice(@a, -$cnt)  if $cnt > 0;
            $b = join('/', @a, "");

        } elsif ($u =~ m%^/%) {
            ## "/---": Just use hostname:port of base.
            $b =~ s%^(${Url}[^/]*)/.*%$1%o;
        }
        $ret = $b . $u;
    }
    $ret;
}

##---------------------------------------------------------------------------##

sub dump_header {
    my $fh      = shift;
    my $fields  = shift;
    my($key, $a, $value);
    foreach $key (sort keys %$fields) {
        $a = $fields->{$key};
        if (ref($a)) {
            foreach $value (@$a) {
                print $fh "$key: $value\n";
            }
        } else {
            print $fh "$key: $a\n";
        }
    }
}

##---------------------------------------------------------------------------##
1; # for require