[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / mhutil.pl

##---------------------------------------------------------------------------##
##  File:
##	$Id: mhutil.pl,v 2.20 2002/10/20 02:53:00 ehood Exp $
##  Author:
##      Earl Hood       mhonarc@mhonarc.org
##  Description:
##      Utility routines for MHonArc
##---------------------------------------------------------------------------##
##    MHonArc -- Internet mail-to-HTML converter
##    Copyright (C) 1995-1999	Earl Hood, mhonarc@mhonarc.org
##
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program; if not, write to the Free Software
##    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##    02111-1307, USA
##---------------------------------------------------------------------------##

package mhonarc;

## RFC 2369 header fields to check for URLs
my %HFieldsList = (
    'list-archive'  	=> 1,
    'list-help'  	=> 1,
    'list-owner'  	=> 1,
    'list-post'  	=> 1,
    'list-subscribe'  	=> 1,
    'list-unsubscribe' 	=> 1,
);

## Header fields that contain addresses
my %HFieldsAddr = (
    'apparently-from'	=> 1,
    'apparently-to'	=> 1,
    'bcc'		=> 1,
    'cc'		=> 1,
    'dcc'		=> 1,
    'from'		=> 1,
    'mail-reply-to'	=> 1,
    'original-bcc'	=> 1,
    'original-cc'	=> 1,
    'original-from'	=> 1,
    'original-sender'	=> 1,
    'original-to'	=> 1,
    'reply-to'		=> 1,
    'resent-bcc'	=> 1,
    'resent-cc'		=> 1,
    'resent-from'	=> 1,
    'resent-sender'	=> 1,
    'resent-to'		=> 1,
    'return-path'	=> 1,
    'sender'		=> 1,
    'to'		=> 1,
    'x-envelope'	=> 1,
);


##---------------------------------------------------------------------------
##    Clip text to specified length.
##
sub clip_text {
    my $str      = \shift;  # Prevent unnecessary copy.
    my $len      = shift;   # Clip length
    my $is_html  = shift;   # If entity references should be considered
    my $has_tags = shift;   # If html tags should be stripped

    if (!$is_html) {
      return substr($$str, 0, $len);
    }

    my $text = "";
    my $subtext = "";
    my $html_len = length($$str);
    my($pos, $sublen, $erlen, $real_len);
    my $er_len = 0;
    
    for ( $pos=0, $sublen=$len; $pos < $html_len; ) {
	$subtext = substr($$str, $pos, $sublen);
	$pos += $sublen;

	# strip tags
	if ($has_tags) {
	    # Strip full tags
	    $subtext =~ s/<[^>]*>//g;
	    # Check if clipped part of a tag
	    if ($subtext =~ s/<[^>]*\Z//) {
		my $gt = index($$str, '>', $pos);
		$pos = ($gt < 0) ? $html_len : ($gt+1);
	    }
	}

	# check for clipped entity reference
	if (($pos < $html_len) && ($subtext =~ /\&[^;]*\Z/)) {
	    my $semi = index($$str, ';', $pos);
	    if ($semi < 0) {
		# malformed entity reference
		$subtext .= substr($$str, $pos);
		$pos = $html_len;
	    } else {
		$subtext .= substr($$str, $pos, $semi-$pos+1);
		$pos = $semi+1;
	    }
	}

	# compute entity reference lengths to determine "real" character
	# count and not raw character count.
	while ($subtext =~ /(\&[^;]+);/g) {
	    $er_len += length($1);
	}

	$text .= $subtext;

	# done if we have enough
	$real_len = length($text)-$er_len;
	if ($real_len >= $len) {
	    last;
	}
	$sublen = $len - (length($text)-$er_len);
    }
    $text;
}

##---------------------------------------------------------------------------
##	Get an e-mail address from (HTML) $str.
##
sub extract_email_address {
    return ''  unless defined $_[0];
    my $str = shift;

    if ($str =~ /($AddrExp)/o) {
	return $1;
    }
    if ($str =~ /<(\S+)>/) {
	return $1;
    }
    if ($str =~ s/\([^\)]+\)//) {
	$str =~ /\s*(\S+)\s*/;
	return $1;
    }
    $str =~ /\s*(\S+)\s*/;
    return $1;
}

##---------------------------------------------------------------------------
##	Get an e-mail name from $str.
##
sub extract_email_name {
    my($str) = shift;
    my($ret);

    if ($str =~ s/<(\S+)>//) {		# Check for: name <addr>
	$ret = $1;
	if ($str =~ /\S/) {
	    $ret = $str;
	} else {			# no name
	    $ret =~ s/@.*//;
	}
    } elsif ($str =~ /"([^"]+)"/) {		# Name in ""'s
	$ret = $1;
    } elsif ($str =~ /\(([^\)]+)\)/) {		# Name in ()'s
	$ret = $1;
    } else {					# Just address
	($ret = $str) =~ s/@.*//;
    }
    $ret =~ s/^["\s]+//g; $ret =~ s/["\s]+$//g;
    $ret;
}

##---------------------------------------------------------------------------
##	Routine to sort messages
##
sub sort_messages {
    my($nosort, $subsort, $authsort, $revsort) = @_;
    $nosort   = $NOSORT    if !defined($nosort);
    $subsort  = $SUBSORT   if !defined($subsort);
    $authsort = $AUTHSORT  if !defined($authsort);
    $revsort  = $REVSORT   if !defined($revsort);

    if ($nosort) {
	## Process order
	if ($revsort) {
	    return sort { $IndexNum{$b} <=> $IndexNum{$a} } keys %Subject;
	} else {
	    return sort { $IndexNum{$a} <=> $IndexNum{$b} } keys %Subject;
	}

    } elsif ($subsort) {
	## Subject order
	my(%sub, $idx, $sub);
	use locale;
	eval {
	    my $hs = scalar(%Subject);  $hs =~ s|^[^/]+/||;
	    keys(%sub) = $hs;
	};
	while (($idx, $sub) = each(%Subject)) {
	    $sub = lc $sub;
	    1 while $sub =~ s/$SubReplyRxp//io;
	    $sub =~ s/$SubArtRxp//io;
	    $sub{$idx} = $sub;
	}
	if ($revsort) {
	    return sort { ($sub{$a} cmp $sub{$b}) ||
			  (get_time_from_index($b) <=> get_time_from_index($a))
			} keys %Subject;
	} else {
	    return sort { ($sub{$a} cmp $sub{$b}) ||
			  (get_time_from_index($a) <=> get_time_from_index($b))
			} keys %Subject;
	}
	
    } elsif ($authsort) {
	## Author order
	my(%from, $idx, $from);
	use locale;
	eval {
	    my $hs = scalar(%From);  $hs =~ s|^[^/]+/||;
	    keys(%from) = $hs;
	};
	while (($idx, $from) = each(%From)) {
	    $from = lc extract_email_name($from);
	    $from{$idx} = $from;
	}
	if ($revsort) {
	    return sort { ($from{$a} cmp $from{$b}) ||
			  (get_time_from_index($b) <=> get_time_from_index($a))
			} keys %Subject;
	} else {
	    return sort { ($from{$a} cmp $from{$b}) ||
			  (get_time_from_index($a) <=> get_time_from_index($b))
			} keys %Subject;
	}

    } else {
	## Date order
	if ($revsort) {
	    return sort { (get_time_from_index($b) <=> get_time_from_index($a))
			  || ($IndexNum{$b} <=> $IndexNum{$a})
			} keys %Subject;
	} else {
	    return sort { (get_time_from_index($a) <=> get_time_from_index($b))
			  || ($IndexNum{$a} <=> $IndexNum{$b})
			} keys %Subject;
	}

    }
}

##---------------------------------------------------------------------------
##	Message-sort routines for sort().
##
sub increase_index {
    (&get_time_from_index($a) <=> &get_time_from_index($b)) ||
	($IndexNum{$a} <=> $IndexNum{$b});
}

##---------------------------------------------------------------------------
##	Routine for formating a message number for use in filenames or links.
##
sub fmt_msgnum {
    sprintf("%05d", $_[0]);
}

##---------------------------------------------------------------------------
##	Routine to get filename of a message number.
##
sub msgnum_filename {
    my($fmtstr) = "$MsgPrefix%05d.$HtmlExt";
    $fmtstr .= ".gz"  if $GzipLinks;
    sprintf($fmtstr, $_[0]);
}

##---------------------------------------------------------------------------
##	Routine to get filename of an index
##
sub get_filename_from_index {
    &msgnum_filename($IndexNum{$_[0]});
}

##---------------------------------------------------------------------------
##	Routine to get time component from index
##
sub get_time_from_index {
    (split(/$X/o, $_[0], 2))[0];
}

##---------------------------------------------------------------------------
##	Routine to get annotation of a message
##
sub get_note {
    my $index = shift;
    my $file = join($DIRSEP, get_note_dir(),
			     msgid_to_filename($Index2MsgId{$index}));
    if (!open(NOTEFILE, $file)) { return ""; }
    my $ret = join("", <NOTEFILE>);
    close NOTEFILE;
    $ret;
}

##---------------------------------------------------------------------------
##	Routine to determine if a message has an annotation
##
sub note_exists {
    my $index = shift;
    -e join($DIRSEP, get_note_dir(),
		     msgid_to_filename($Index2MsgId{$index}));
}

##---------------------------------------------------------------------------
##	Routine to get full pathname to annotation directory
##
sub get_note_dir {
    if (!OSis_absolute_path($NoteDir)) {
	return join($DIRSEP, $OUTDIR, $NoteDir);
    }
    $NoteDir;
}

##---------------------------------------------------------------------------
##	Routine to get lc author name from index
##
sub get_base_author {
    lc extract_email_name($From{$_[0]});
}

##---------------------------------------------------------------------------
##	Determine time from date.  Use %Zone for timezone offsets
##
sub get_time_from_date {
    my($mday, $mon, $yr, $hr, $min, $sec, $zone) = @_;
    my($time) = 0;

    $yr -= 1900  if $yr >= 1900;  # if given full 4 digit year
    $yr += 100   if $yr <= 37;    # in case of 2 digit years
    if (($yr < 70) || ($yr > 137)) {
	warn "Warning: Bad year (", $yr+1900, ") using current\n";
	$yr = (localtime(time))[5];
    }

    ## If $zone, grab gmt time, else grab local
    if ($zone) {
	$zone =~ tr/a-z/A-Z/;
	$time = &timegm($sec,$min,$hr,$mday,$mon,$yr);

	# try to modify time/date based on timezone
	OFFSET: {
	    # numeric timezone
	    if ($zone =~ /^[\+-]\d+$/) {
		$time -= &zone_offset_to_secs($zone);
		last OFFSET;
	    }
	    # Zone
	    if (defined($Zone{$zone})) {
		# timezone abbrev
		$time += &zone_offset_to_secs($Zone{$zone});
		last OFFSET;

	    }
	    # Zone[+-]DDDD
	    if ($zone =~ /^([A-Z]\w+)([\+-]\d+)$/) {
		$time -= &zone_offset_to_secs($2);
		if (defined($Zone{$1})) {
		    $time += &zone_offset_to_secs($Zone{$1});
		    last OFFSET;
		}
	    }
	    # undefined timezone
	    warn qq|Warning: Unrecognized time zone, "$zone"\n|;
	}

    } else {
	$time = &timelocal($sec,$min,$hr,$mday,$mon,$yr);
    }
    $time;
}

##---------------------------------------------------------------------------
##	Routine to check if time has expired.
##
sub expired_time {
    ($ExpireTime && (time - $_[0] > $ExpireTime)) ||
    ($_[0] < $ExpireDateTime);
}

##---------------------------------------------------------------------------
##      Get HTML tags for formatting message headers
##
sub get_header_tags {
    my($f) = shift;
    my($ftago, $ftagc, $tago, $tagc);
 
    ## Get user specified tags (this is one funcky looking code)
    $tag = (defined($HeadHeads{$f}) ?
            $HeadHeads{$f} : $HeadHeads{"-default-"});
    $ftag = (defined($HeadFields{$f}) ?
             $HeadFields{$f} : $HeadFields{"-default-"});
    if ($tag) { $tago = "<$tag>";  $tagc = "</$tag>"; }
    else { $tago = $tagc = ''; }
    if ($ftag) { $ftago = "<$ftag>";  $ftagc = "</$ftag>"; }
    else { $ftago = $ftagc = ''; }
 
    ($tago, $tagc, $ftago, $ftagc);
}

##---------------------------------------------------------------------------
##	Format message headers in HTML.
##	$html = htmlize_header($fields_hash_ref);
##
sub htmlize_header {
    my $fields = shift;
    my($key,
       $tago, $tagc,
       $ftago, $ftagc,
       $item,
       @array);
    my($tmp);

    my $mesg = "";
    my %hf = %$fields;
    foreach $item (@FieldOrder) {
	if ($item eq '-extra-') {
	    foreach $key (sort keys %hf) {
		next  if $FieldODefs{$key};
		next  if $key =~ /^x-mha-/;
		delete $hf{$key}, next  if &exclude_field($key);

		@array = @{$hf{$key}};
		foreach $tmp (@array) {
		    $tmp = $HFieldsList{$key} ? mlist_field_add_links($tmp) :
						&$MHeadCnvFunc($tmp);
		    $tmp = field_add_links($key, $tmp, $fields);
		    ($tago, $tagc, $ftago, $ftagc) = get_header_tags($key);
		    $mesg .= join('', $LABELBEG,
				  $tago, htmlize(ucfirst($key)), $tagc,
				  $LABELEND,
				  $FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
				  "\n");
		}
		delete $hf{$key};
	    }
	} else {
	    if (!&exclude_field($item) && $hf{$item}) {
		@array = @{$hf{$item}};
		foreach $tmp (@array) {
		    $tmp = $HFieldsList{$item} ? mlist_field_add_links($tmp) :
						 &$MHeadCnvFunc($tmp);
		    $tmp = field_add_links($item, $tmp, $fields);
		    ($tago, $tagc, $ftago, $ftagc) = &get_header_tags($item);
		    $mesg .= join('', $LABELBEG,
				  $tago, htmlize(ucfirst($item)), $tagc,
				  $LABELEND,
				  $FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
				  "\n");
		}
	    }
	    delete $hf{$item};
	}
    }
    if ($mesg) { $mesg = $FIELDSBEG . $mesg . $FIELDSEND; }
    $mesg;
}

##---------------------------------------------------------------------------

sub mlist_field_add_links {
    my $txt	= shift;
    my $ret	= "";
    local($_);
    foreach (split(/(<[^>]+>)/, $txt)) {
	if (/^<\w+:/) {
	    chop; substr($_, 0, 1) = "";
	    $ret .= qq|&lt;<a href="$_">$_</a>&gt;|;
	} else {
	    $ret .= &$MHeadCnvFunc($_);
	}
    }
    $ret;
}

##---------------------------------------------------------------------------
##	Routine to add mailto/news links to a message header string.
##
sub field_add_links {
    my $label = lc shift;
    my $fld_text = shift;
    my $fields	 = shift;

    LBLSW: {
	if ($HFieldsAddr{$label}) {
	    if (!$NOMAILTO) {
		$fld_text =~ s|([\!\%\w\.\-+=/]+@[\w\.\-]+)
			      |&mailUrl($1, $fields->{'x-mha-message-id'},
					    $fields->{'x-mha-subject'},
					    $fields->{'x-mha-from'});
			      |gex;
	    } else {
		$fld_text =~ s|([\!\%\w\.\-+=/]+@[\w\.\-]+)
			      |&htmlize(&rewrite_address($1))
			      |gex;
	    }
	    last LBLSW;
	}
	if ($label eq 'newsgroup') {
	    $fld_text = newsurl($fld_text)  unless $NONEWS;
	    last LBLSW;
	}
	last LBLSW;
    }
    $fld_text;
}


##---------------------------------------------------------------------------
##	Routine to add news links of newsgroups names
##
sub newsurl {
    my $str = shift;
    my $h = "";

    if ($str =~ s/^([^:]*:\s*)//) {
	$h = $1;
    }
    $str =~ s/\s//g;			# Strip whitespace
    my @groups = split(/,/, $str);	# Split groups
    foreach (@groups) {			# Make hyperlinks
	s|(.*)|<a href="news:$1">$1</a>|;
    }
    $h . join(', ', @groups);	# Rejoin string
}

##---------------------------------------------------------------------------
##	$html = mailUrl($email_addr, $msgid, $subject, $from);
##
sub mailUrl {
    my $eaddr = shift || '';
    my $msgid = shift || '';
    my $sub = shift || '';
    my $from = shift || '';

    local $_;
    my($url) = ($MAILTOURL);
    my($to) = (&urlize($eaddr));
    my($toname, $todomain) = map { urlize($_) } split(/@/,$eaddr,2);
    my($froml, $msgidl) = (&urlize($from), &urlize($msgid));
    my($fromaddrl) = (&extract_email_address($from));
    my($faddrnamel, $faddrdomainl) = map { urlize($_) } split(/@/,$fromaddrl,2);
    $fromaddrl = &urlize($fromaddrl);
    my($subjectl);

    # Add "Re:" to subject if not present
    if ($sub !~ /^$SubReplyRxp/io) {
	$subjectl = 'Re:%20' . &urlize($sub);
    } else {
	$subjectl = &urlize($sub);
    }
    $url =~ s/\$FROM\$/$froml/g;
    $url =~ s/\$FROMADDR\$/$fromaddrl/g;
    $url =~ s/\$FROMADDRNAME\$/$faddrnamel/g;
    $url =~ s/\$FROMADDRDOMAIN\$/$faddrdomainl/g;
    $url =~ s/\$MSGID\$/$msgidl/g;
    $url =~ s/\$SUBJECT\$/$subjectl/g;
    $url =~ s/\$SUBJECTNA\$/$subjectl/g;
    $url =~ s/\$TO\$/$to/g;
    $url =~ s/\$TOADDRNAME\$/$toname/g;
    $url =~ s/\$TOADDRDOMAIN\$/$todomain/g;
    $url =~ s/\$ADDR\$/$to/g;
    qq|<a href="$url">| . &htmlize(&rewrite_address($eaddr)) . q|</a>|;
}

##---------------------------------------------------------------------------##
##	Routine to parse variable definitions in a string.  The
##	function returns a list of variable/value pairs.  The format of
##	the string is similiar to attribute specification lists in
##	SGML, but NAMEs are any non-whitespace character.
##
sub parse_vardef_str {
    my($org) = shift;
    my($lower) = shift;
    my(%hash) = ();
    my($str, $q, $var, $value);

    ($str = $org) =~ s/^\s+//;
    while ($str =~ s/^([^=\s]+)\s*=\s*//) {
	$var = $1;
	if ($str =~ s/^(['"])//) {
	    $q = $1;
	    if (!($q eq "'" ? $str =~ s/^([^']*)'// :
			      $str =~ s/^([^"]*)"//)) {
		warn "Warning: Unclosed quote in: $org\n";
		return ();
	    }
	    $value = $1;

	} else {
	    if ($str =~ s/^(\S+)//) {
		$value = $1;
	    } else {
		warn "Warning: No value after $var in: $org\n";
		return ();
	    }
	}
	$str =~ s/^\s+//;
	$hash{$lower? lc($var): $var} = $value;
    }
    if ($str =~ /\S/) {
	warn "Warning: Trailing characters in: $org\n";
    }
    %hash;
}

##---------------------------------------------------------------------------##

sub msgid_to_filename {
    my $msgid = shift;
    if ($VMS) {
	$msgid =~ s/([^\w\-])/sprintf("=%02X",unpack("C",$1))/geo;
    } else {
	$msgid =~ s/([^\w.\-\@])/sprintf("=%02X",unpack("C",$1))/geo;
    }
    $msgid;
}

##---------------------------------------------------------------------------##
##	Check if new follow up list for a message is different from
##	old follow up list.
##
sub is_follow_ups_diff {
    my $f	= $Follow{$_[0]};
    my $o	= $FollowOld{$_[0]};
    if (defined($f) && defined($o)) {
	return 1  unless @$f == @$o;
	local $^W = 0;
	my $i;
	for ($i=0; $i < @$f; ++$i) {
	    return 1  if $f->[$i] ne $o->[$i];
	}
	return 0;
    }
    return (defined($f) || defined($o));
}

##---------------------------------------------------------------------------##
##	Retrieve icon URL for specified content-type.
##
sub get_icon_url {
    my $ctype = shift;
    my $icon = $Icons{$ctype};
    ICON: {
	last ICON  if defined $icon;
	if ($ctype =~ s|/.*||) {
	  $ctype .= '/*';
	  $icon = $Icons{$ctype};
	  last ICON  if defined $icon;
	}
	$icon = $Icons{'*/*'} || $Icons{'unknown'};
    }
    if (!defined($icon)) {
	return (undef, undef, undef);
    }
    if ($icon =~ s/\[(\d+)x(\d+)\]//) {
	return ($icon, $1, $2);
    }
    ($icon, undef, undef);
}

##---------------------------------------------------------------------------##

sub log_mesg {
    my $fh	= shift;
    my $doDate	= shift;

    if ($doDate) {
	my($sec,$min,$hour,$mday,$mon,$year) = localtime(time);
	print $fh sprintf("[%4d-%02d-%02d %02d:%02d:%02d] ",
			  $year+1900, $mon+1, $mday, $hour, $min, $sec);
    }
    print $fh @_;
}

##---------------------------------------------------------------------------##

sub dump_hash {
    my $fh = shift;
    my $h = shift;
    local $_;
    foreach (sort keys %$h) {
	print $fh "$_ => ", $h->{$_}, "\n";
    }
}

##---------------------------------------------------------------------------##
1;
Commit	Line	Data
86530b38 AT	1	##---------------------------------------------------------------------------##
	2	## File:
	3	## $Id: mhutil.pl,v 2.20 2002/10/20 02:53:00 ehood Exp $
	4	## Author:
	5	## Earl Hood mhonarc@mhonarc.org
	6	## Description:
	7	## Utility routines for MHonArc
	8	##---------------------------------------------------------------------------##
	9	## MHonArc -- Internet mail-to-HTML converter
	10	## Copyright (C) 1995-1999 Earl Hood, mhonarc@mhonarc.org
	11	##
	12	## This program is free software; you can redistribute it and/or modify
	13	## it under the terms of the GNU General Public License as published by
	14	## the Free Software Foundation; either version 2 of the License, or
	15	## (at your option) any later version.
	16	##
	17	## This program is distributed in the hope that it will be useful,
	18	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	## GNU General Public License for more details.
	21	##
	22	## You should have received a copy of the GNU General Public License
	23	## along with this program; if not, write to the Free Software
	24	## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
	25	## 02111-1307, USA
	26	##---------------------------------------------------------------------------##
	27
	28	package mhonarc;
	29
	30	## RFC 2369 header fields to check for URLs
	31	my %HFieldsList = (
	32	'list-archive' => 1,
	33	'list-help' => 1,
	34	'list-owner' => 1,
	35	'list-post' => 1,
	36	'list-subscribe' => 1,
	37	'list-unsubscribe' => 1,
	38	);
	39
	40	## Header fields that contain addresses
	41	my %HFieldsAddr = (
	42	'apparently-from' => 1,
	43	'apparently-to' => 1,
	44	'bcc' => 1,
	45	'cc' => 1,
	46	'dcc' => 1,
	47	'from' => 1,
	48	'mail-reply-to' => 1,
	49	'original-bcc' => 1,
	50	'original-cc' => 1,
	51	'original-from' => 1,
	52	'original-sender' => 1,
	53	'original-to' => 1,
	54	'reply-to' => 1,
	55	'resent-bcc' => 1,
	56	'resent-cc' => 1,
	57	'resent-from' => 1,
	58	'resent-sender' => 1,
	59	'resent-to' => 1,
	60	'return-path' => 1,
	61	'sender' => 1,
	62	'to' => 1,
	63	'x-envelope' => 1,
	64	);
65
66
67	##---------------------------------------------------------------------------
68	## Clip text to specified length.
69	##
70	sub clip_text {
71	my $str = \shift; # Prevent unnecessary copy.
72	my $len = shift; # Clip length
73	my $is_html = shift; # If entity references should be considered
74	my $has_tags = shift; # If html tags should be stripped
75
76	if (!$is_html) {
77	return substr($$str, 0, $len);
78	}
79
80	my $text = "";
81	my $subtext = "";
82	my $html_len = length($$str);
83	my($pos, $sublen, $erlen, $real_len);
84	my $er_len = 0;
85
86	for ( $pos=0, $sublen=$len; $pos < $html_len; ) {
87	$subtext = substr($$str, $pos, $sublen);
88	$pos += $sublen;
89
90	# strip tags
91	if ($has_tags) {
92	# Strip full tags
93	$subtext =~ s/<[^>]*>//g;
94	# Check if clipped part of a tag
95	if ($subtext =~ s/<[^>]*\Z//) {
96	my $gt = index($$str, '>', $pos);
97	$pos = ($gt < 0) ? $html_len : ($gt+1);
98	}
99	}
100
101	# check for clipped entity reference
102	if (($pos < $html_len) && ($subtext =~ /\&[^;]*\Z/)) {
103	my $semi = index($$str, ';', $pos);
104	if ($semi < 0) {
105	# malformed entity reference
106	$subtext .= substr($$str, $pos);
107	$pos = $html_len;
108	} else {
109	$subtext .= substr($$str, $pos, $semi-$pos+1);
110	$pos = $semi+1;
111	}
112	}
113
114	# compute entity reference lengths to determine "real" character
115	# count and not raw character count.
116	while ($subtext =~ /(\&[^;]+);/g) {
117	$er_len += length($1);
118	}
119
120	$text .= $subtext;
121
122	# done if we have enough
123	$real_len = length($text)-$er_len;
124	if ($real_len >= $len) {
125	last;
126	}
127	$sublen = $len - (length($text)-$er_len);
128	}
129	$text;
130	}
131
132	##---------------------------------------------------------------------------
133	## Get an e-mail address from (HTML) $str.
134	##
135	sub extract_email_address {
136	return '' unless defined $_[0];
137	my $str = shift;
138
139	if ($str =~ /($AddrExp)/o) {
140	return $1;
141	}
142	if ($str =~ /<(\S+)>/) {
143	return $1;
144	}
145	if ($str =~ s/\([^\)]+\)//) {
146	$str =~ /\s(\S+)\s/;
147	return $1;
148	}
149	$str =~ /\s(\S+)\s/;
150	return $1;
151	}
152
153	##---------------------------------------------------------------------------
154	## Get an e-mail name from $str.
155	##
156	sub extract_email_name {
157	my($str) = shift;
158	my($ret);
159
160	if ($str =~ s/<(\S+)>//) { # Check for: name <addr>
161	$ret = $1;
162	if ($str =~ /\S/) {
163	$ret = $str;
164	} else { # no name
165	$ret =~ s/@.*//;
166	}
167	} elsif ($str =~ /"([^"]+)"/) { # Name in ""'s
168	$ret = $1;
169	} elsif ($str =~ /\(([^\)]+)\)/) { # Name in ()'s
170	$ret = $1;
171	} else { # Just address
172	($ret = $str) =~ s/@.*//;
173	}
174	$ret =~ s/^["\s]+//g; $ret =~ s/["\s]+$//g;
175	$ret;
176	}
177
178	##---------------------------------------------------------------------------
179	## Routine to sort messages
180	##
181	sub sort_messages {
182	my($nosort, $subsort, $authsort, $revsort) = @_;
183	$nosort = $NOSORT if !defined($nosort);
184	$subsort = $SUBSORT if !defined($subsort);
185	$authsort = $AUTHSORT if !defined($authsort);
186	$revsort = $REVSORT if !defined($revsort);
187
188	if ($nosort) {
189	## Process order
190	if ($revsort) {
191	return sort { $IndexNum{$b} <=> $IndexNum{$a} } keys %Subject;
192	} else {
193	return sort { $IndexNum{$a} <=> $IndexNum{$b} } keys %Subject;
194	}
195
196	} elsif ($subsort) {
197	## Subject order
198	my(%sub, $idx, $sub);
199	use locale;
200	eval {
201	my $hs = scalar(%Subject); $hs =~ s\|^[^/]+/\|\|;
202	keys(%sub) = $hs;
203	};
204	while (($idx, $sub) = each(%Subject)) {
205	$sub = lc $sub;
206	1 while $sub =~ s/$SubReplyRxp//io;
207	$sub =~ s/$SubArtRxp//io;
208	$sub{$idx} = $sub;
209	}
210	if ($revsort) {
211	return sort { ($sub{$a} cmp $sub{$b}) \|\|
212	(get_time_from_index($b) <=> get_time_from_index($a))
213	} keys %Subject;
214	} else {
215	return sort { ($sub{$a} cmp $sub{$b}) \|\|
216	(get_time_from_index($a) <=> get_time_from_index($b))
217	} keys %Subject;
218	}
219
220	} elsif ($authsort) {
221	## Author order
222	my(%from, $idx, $from);
223	use locale;
224	eval {
225	my $hs = scalar(%From); $hs =~ s\|^[^/]+/\|\|;
226	keys(%from) = $hs;
227	};
228	while (($idx, $from) = each(%From)) {
229	$from = lc extract_email_name($from);
230	$from{$idx} = $from;
231	}
232	if ($revsort) {
233	return sort { ($from{$a} cmp $from{$b}) \|\|
234	(get_time_from_index($b) <=> get_time_from_index($a))
235	} keys %Subject;
236	} else {
237	return sort { ($from{$a} cmp $from{$b}) \|\|
238	(get_time_from_index($a) <=> get_time_from_index($b))
239	} keys %Subject;
240	}
241
242	} else {
243	## Date order
244	if ($revsort) {
245	return sort { (get_time_from_index($b) <=> get_time_from_index($a))
246	\|\| ($IndexNum{$b} <=> $IndexNum{$a})
247	} keys %Subject;
248	} else {
249	return sort { (get_time_from_index($a) <=> get_time_from_index($b))
250	\|\| ($IndexNum{$a} <=> $IndexNum{$b})
251	} keys %Subject;
252	}
253
254	}
255	}
256
257	##---------------------------------------------------------------------------
258	## Message-sort routines for sort().
259	##
260	sub increase_index {
261	(&get_time_from_index($a) <=> &get_time_from_index($b)) \|\|
262	($IndexNum{$a} <=> $IndexNum{$b});
263	}
264
265	##---------------------------------------------------------------------------
266	## Routine for formating a message number for use in filenames or links.
267	##
268	sub fmt_msgnum {
269	sprintf("%05d", $_[0]);
270	}
271
272	##---------------------------------------------------------------------------
273	## Routine to get filename of a message number.
274	##
275	sub msgnum_filename {
276	my($fmtstr) = "$MsgPrefix%05d.$HtmlExt";
277	$fmtstr .= ".gz" if $GzipLinks;
278	sprintf($fmtstr, $_[0]);
279	}
280
281	##---------------------------------------------------------------------------
282	## Routine to get filename of an index
283	##
284	sub get_filename_from_index {
285	&msgnum_filename($IndexNum{$_[0]});
286	}
287
288	##---------------------------------------------------------------------------
289	## Routine to get time component from index
290	##
291	sub get_time_from_index {
292	(split(/$X/o, $_[0], 2))[0];
293	}
294
295	##---------------------------------------------------------------------------
296	## Routine to get annotation of a message
297	##
298	sub get_note {
299	my $index = shift;
300	my $file = join($DIRSEP, get_note_dir(),
301	msgid_to_filename($Index2MsgId{$index}));
302	if (!open(NOTEFILE, $file)) { return ""; }
303	my $ret = join("", <NOTEFILE>);
304	close NOTEFILE;
305	$ret;
306	}
307
308	##---------------------------------------------------------------------------
309	## Routine to determine if a message has an annotation
310	##
311	sub note_exists {
312	my $index = shift;
313	-e join($DIRSEP, get_note_dir(),
314	msgid_to_filename($Index2MsgId{$index}));
315	}
316
317	##---------------------------------------------------------------------------
318	## Routine to get full pathname to annotation directory
319	##
320	sub get_note_dir {
321	if (!OSis_absolute_path($NoteDir)) {
322	return join($DIRSEP, $OUTDIR, $NoteDir);
323	}
324	$NoteDir;
325	}
326
327	##---------------------------------------------------------------------------
328	## Routine to get lc author name from index
329	##
330	sub get_base_author {
331	lc extract_email_name($From{$_[0]});
332	}
333
334	##---------------------------------------------------------------------------
335	## Determine time from date. Use %Zone for timezone offsets
336	##
337	sub get_time_from_date {
338	my($mday, $mon, $yr, $hr, $min, $sec, $zone) = @_;
339	my($time) = 0;
340
341	$yr -= 1900 if $yr >= 1900; # if given full 4 digit year
342	$yr += 100 if $yr <= 37; # in case of 2 digit years
343	if (($yr < 70) \|\| ($yr > 137)) {
344	warn "Warning: Bad year (", $yr+1900, ") using current\n";
345	$yr = (localtime(time))[5];
346	}
347
348	## If $zone, grab gmt time, else grab local
349	if ($zone) {
350	$zone =~ tr/a-z/A-Z/;
351	$time = &timegm($sec,$min,$hr,$mday,$mon,$yr);
352
353	# try to modify time/date based on timezone
354	OFFSET: {
355	# numeric timezone
356	if ($zone =~ /^[\+-]\d+$/) {
357	$time -= &zone_offset_to_secs($zone);
358	last OFFSET;
359	}
360	# Zone
361	if (defined($Zone{$zone})) {
362	# timezone abbrev
363	$time += &zone_offset_to_secs($Zone{$zone});
364	last OFFSET;
365
366	}
367	# Zone[+-]DDDD
368	if ($zone =~ /^([A-Z]\w+)([\+-]\d+)$/) {
369	$time -= &zone_offset_to_secs($2);
370	if (defined($Zone{$1})) {
371	$time += &zone_offset_to_secs($Zone{$1});
372	last OFFSET;
373	}
374	}
375	# undefined timezone
376	warn qq\|Warning: Unrecognized time zone, "$zone"\n\|;
377	}
378
379	} else {
380	$time = &timelocal($sec,$min,$hr,$mday,$mon,$yr);
381	}
382	$time;
383	}
384
385	##---------------------------------------------------------------------------
386	## Routine to check if time has expired.
387	##
388	sub expired_time {
389	($ExpireTime && (time - $_[0] > $ExpireTime)) \|\|
390	($_[0] < $ExpireDateTime);
391	}
392
393	##---------------------------------------------------------------------------
394	## Get HTML tags for formatting message headers
395	##
396	sub get_header_tags {
397	my($f) = shift;
398	my($ftago, $ftagc, $tago, $tagc);
399
400	## Get user specified tags (this is one funcky looking code)
401	$tag = (defined($HeadHeads{$f}) ?
402	$HeadHeads{$f} : $HeadHeads{"-default-"});
403	$ftag = (defined($HeadFields{$f}) ?
404	$HeadFields{$f} : $HeadFields{"-default-"});
405	if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
406	else { $tago = $tagc = ''; }
407	if ($ftag) { $ftago = "<$ftag>"; $ftagc = "</$ftag>"; }
408	else { $ftago = $ftagc = ''; }
409
410	($tago, $tagc, $ftago, $ftagc);
411	}
412
413	##---------------------------------------------------------------------------
414	## Format message headers in HTML.
415	## $html = htmlize_header($fields_hash_ref);
416	##
417	sub htmlize_header {
418	my $fields = shift;
419	my($key,
420	$tago, $tagc,
421	$ftago, $ftagc,
422	$item,
423	@array);
424	my($tmp);
425
426	my $mesg = "";
427	my %hf = %$fields;
428	foreach $item (@FieldOrder) {
429	if ($item eq '-extra-') {
430	foreach $key (sort keys %hf) {
431	next if $FieldODefs{$key};
432	next if $key =~ /^x-mha-/;
433	delete $hf{$key}, next if &exclude_field($key);
434
435	@array = @{$hf{$key}};
436	foreach $tmp (@array) {
437	$tmp = $HFieldsList{$key} ? mlist_field_add_links($tmp) :
438	&$MHeadCnvFunc($tmp);
439	$tmp = field_add_links($key, $tmp, $fields);
440	($tago, $tagc, $ftago, $ftagc) = get_header_tags($key);
441	$mesg .= join('', $LABELBEG,
442	$tago, htmlize(ucfirst($key)), $tagc,
443	$LABELEND,
444	$FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
445	"\n");
446	}
447	delete $hf{$key};
448	}
449	} else {
450	if (!&exclude_field($item) && $hf{$item}) {
451	@array = @{$hf{$item}};
452	foreach $tmp (@array) {
453	$tmp = $HFieldsList{$item} ? mlist_field_add_links($tmp) :
454	&$MHeadCnvFunc($tmp);
455	$tmp = field_add_links($item, $tmp, $fields);
456	($tago, $tagc, $ftago, $ftagc) = &get_header_tags($item);
457	$mesg .= join('', $LABELBEG,
458	$tago, htmlize(ucfirst($item)), $tagc,
459	$LABELEND,
460	$FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
461	"\n");
462	}
463	}
464	delete $hf{$item};
465	}
466	}
467	if ($mesg) { $mesg = $FIELDSBEG . $mesg . $FIELDSEND; }
468	$mesg;
469	}
470
471	##---------------------------------------------------------------------------
472
473	sub mlist_field_add_links {
474	my $txt = shift;
475	my $ret = "";
476	local($_);
477	foreach (split(/(<[^>]+>)/, $txt)) {
478	if (/^<\w+:/) {
479	chop; substr($_, 0, 1) = "";
480	$ret .= qq\|<<a href="$_">$_</a>>\|;
481	} else {
482	$ret .= &$MHeadCnvFunc($_);
483	}
484	}
485	$ret;
486	}
487
488	##---------------------------------------------------------------------------
489	## Routine to add mailto/news links to a message header string.
490	##
491	sub field_add_links {
492	my $label = lc shift;
493	my $fld_text = shift;
494	my $fields = shift;
495
496	LBLSW: {
497	if ($HFieldsAddr{$label}) {
498	if (!$NOMAILTO) {
499	$fld_text =~ s\|([\!\%\w\.\-+=/]+@[\w\.\-]+)
500	\|&mailUrl($1, $fields->{'x-mha-message-id'},
501	$fields->{'x-mha-subject'},
502	$fields->{'x-mha-from'});
503	\|gex;
504	} else {
505	$fld_text =~ s\|([\!\%\w\.\-+=/]+@[\w\.\-]+)
506	\|&htmlize(&rewrite_address($1))
507	\|gex;
508	}
509	last LBLSW;
510	}
511	if ($label eq 'newsgroup') {
512	$fld_text = newsurl($fld_text) unless $NONEWS;
513	last LBLSW;
514	}
515	last LBLSW;
516	}
517	$fld_text;
518	}
519
520
521	##---------------------------------------------------------------------------
522	## Routine to add news links of newsgroups names
523	##
524	sub newsurl {
525	my $str = shift;
526	my $h = "";
527
528	if ($str =~ s/^([^:]:\s)//) {
529	$h = $1;
530	}
531	$str =~ s/\s//g; # Strip whitespace
532	my @groups = split(/,/, $str); # Split groups
533	foreach (@groups) { # Make hyperlinks
534	s\|(.*)\|<a href="news:$1">$1</a>\|;
535	}
536	$h . join(', ', @groups); # Rejoin string
537	}
538
539	##---------------------------------------------------------------------------
540	## $html = mailUrl($email_addr, $msgid, $subject, $from);
541	##
542	sub mailUrl {
543	my $eaddr = shift \|\| '';
544	my $msgid = shift \|\| '';
545	my $sub = shift \|\| '';
546	my $from = shift \|\| '';
547
548	local $_;
549	my($url) = ($MAILTOURL);
550	my($to) = (&urlize($eaddr));
551	my($toname, $todomain) = map { urlize($_) } split(/@/,$eaddr,2);
552	my($froml, $msgidl) = (&urlize($from), &urlize($msgid));
553	my($fromaddrl) = (&extract_email_address($from));
554	my($faddrnamel, $faddrdomainl) = map { urlize($_) } split(/@/,$fromaddrl,2);
555	$fromaddrl = &urlize($fromaddrl);
556	my($subjectl);
557
558	# Add "Re:" to subject if not present
559	if ($sub !~ /^$SubReplyRxp/io) {
560	$subjectl = 'Re:%20' . &urlize($sub);
561	} else {
562	$subjectl = &urlize($sub);
563	}
564	$url =~ s/\$FROM\$/$froml/g;
565	$url =~ s/\$FROMADDR\$/$fromaddrl/g;
566	$url =~ s/\$FROMADDRNAME\$/$faddrnamel/g;
567	$url =~ s/\$FROMADDRDOMAIN\$/$faddrdomainl/g;
568	$url =~ s/\$MSGID\$/$msgidl/g;
569	$url =~ s/\$SUBJECT\$/$subjectl/g;
570	$url =~ s/\$SUBJECTNA\$/$subjectl/g;
571	$url =~ s/\$TO\$/$to/g;
572	$url =~ s/\$TOADDRNAME\$/$toname/g;
573	$url =~ s/\$TOADDRDOMAIN\$/$todomain/g;
574	$url =~ s/\$ADDR\$/$to/g;
575	qq\|<a href="$url">\| . &htmlize(&rewrite_address($eaddr)) . q\|</a>\|;
576	}
577
578	##---------------------------------------------------------------------------##
579	## Routine to parse variable definitions in a string. The
580	## function returns a list of variable/value pairs. The format of
581	## the string is similiar to attribute specification lists in
582	## SGML, but NAMEs are any non-whitespace character.
583	##
584	sub parse_vardef_str {
585	my($org) = shift;
586	my($lower) = shift;
587	my(%hash) = ();
588	my($str, $q, $var, $value);
589
590	($str = $org) =~ s/^\s+//;
591	while ($str =~ s/^([^=\s]+)\s=\s//) {
592	$var = $1;
593	if ($str =~ s/^(['"])//) {
594	$q = $1;
595	if (!($q eq "'" ? $str =~ s/^([^']*)'// :
596	$str =~ s/^([^"]*)"//)) {
597	warn "Warning: Unclosed quote in: $org\n";
598	return ();
599	}
600	$value = $1;
601
602	} else {
603	if ($str =~ s/^(\S+)//) {
604	$value = $1;
605	} else {
606	warn "Warning: No value after $var in: $org\n";
607	return ();
608	}
609	}
610	$str =~ s/^\s+//;
611	$hash{$lower? lc($var): $var} = $value;
612	}
613	if ($str =~ /\S/) {
614	warn "Warning: Trailing characters in: $org\n";
615	}
616	%hash;
617	}
618
619	##---------------------------------------------------------------------------##
620
621	sub msgid_to_filename {
622	my $msgid = shift;
623	if ($VMS) {
624	$msgid =~ s/([^\w\-])/sprintf("=%02X",unpack("C",$1))/geo;
625	} else {
626	$msgid =~ s/([^\w.\-\@])/sprintf("=%02X",unpack("C",$1))/geo;
627	}
628	$msgid;
629	}
630
631	##---------------------------------------------------------------------------##
632	## Check if new follow up list for a message is different from
633	## old follow up list.
634	##
635	sub is_follow_ups_diff {
636	my $f = $Follow{$_[0]};
637	my $o = $FollowOld{$_[0]};
638	if (defined($f) && defined($o)) {
639	return 1 unless @$f == @$o;
640	local $^W = 0;
641	my $i;
642	for ($i=0; $i < @$f; ++$i) {
643	return 1 if $f->[$i] ne $o->[$i];
644	}
645	return 0;
646	}
647	return (defined($f) \|\| defined($o));
648	}
649
650	##---------------------------------------------------------------------------##
651	## Retrieve icon URL for specified content-type.
652	##
653	sub get_icon_url {
654	my $ctype = shift;
655	my $icon = $Icons{$ctype};
656	ICON: {
657	last ICON if defined $icon;
658	if ($ctype =~ s\|/.*\|\|) {
659	$ctype .= '/*';
660	$icon = $Icons{$ctype};
661	last ICON if defined $icon;
662	}
663	$icon = $Icons{'/'} \|\| $Icons{'unknown'};
664	}
665	if (!defined($icon)) {
666	return (undef, undef, undef);
667	}
668	if ($icon =~ s/\[(\d+)x(\d+)\]//) {
669	return ($icon, $1, $2);
670	}
671	($icon, undef, undef);
672	}
673
674	##---------------------------------------------------------------------------##
675
676	sub log_mesg {
677	my $fh = shift;
678	my $doDate = shift;
679
680	if ($doDate) {
681	my($sec,$min,$hour,$mday,$mon,$year) = localtime(time);
682	print $fh sprintf("[%4d-%02d-%02d %02d:%02d:%02d] ",
683	$year+1900, $mon+1, $mday, $hour, $min, $sec);
684	}
685	print $fh @_;
686	}
687
688	##---------------------------------------------------------------------------##
689
690	sub dump_hash {
691	my $fh = shift;
692	my $h = shift;
693	local $_;
694	foreach (sort keys %$h) {
695	print $fh "$_ => ", $h->{$_}, "\n";
696	}
697	}
698
699	##---------------------------------------------------------------------------##
700	1;