##---------------------------------------------------------------------------##
## $Id: mhutil.pl,v 2.20 2002/10/20 02:53:00 ehood Exp $
## Earl Hood mhonarc@mhonarc.org
## Utility routines for MHonArc
##---------------------------------------------------------------------------##
## MHonArc -- Internet mail-to-HTML converter
## Copyright (C) 1995-1999 Earl Hood, mhonarc@mhonarc.org
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##---------------------------------------------------------------------------##
## RFC 2369 header fields to check for URLs
## Header fields that contain addresses
##---------------------------------------------------------------------------
## Clip text to specified length.
my $str = \
shift; # Prevent unnecessary copy.
my $len = shift; # Clip length
my $is_html = shift; # If entity references should be considered
my $has_tags = shift; # If html tags should be stripped
return substr($$str, 0, $len);
my $html_len = length($$str);
my($pos, $sublen, $erlen, $real_len);
for ( $pos=0, $sublen=$len; $pos < $html_len; ) {
$subtext = substr($$str, $pos, $sublen);
$subtext =~ s/<[^>]*>//g;
# Check if clipped part of a tag
if ($subtext =~ s/<[^>]*\Z//) {
my $gt = index($$str, '>', $pos);
$pos = ($gt < 0) ?
$html_len : ($gt+1);
# check for clipped entity reference
if (($pos < $html_len) && ($subtext =~ /\&[^;]*\Z/)) {
my $semi = index($$str, ';', $pos);
# malformed entity reference
$subtext .= substr($$str, $pos);
$subtext .= substr($$str, $pos, $semi-$pos+1);
# compute entity reference lengths to determine "real" character
# count and not raw character count.
while ($subtext =~ /(\&[^;]+);/g) {
$real_len = length($text)-$er_len;
$sublen = $len - (length($text)-$er_len);
##---------------------------------------------------------------------------
## Get an e-mail address from (HTML) $str.
sub extract_email_address
{
return '' unless defined $_[0];
if ($str =~ /($AddrExp)/o) {
if ($str =~ s/\([^\)]+\)//) {
##---------------------------------------------------------------------------
## Get an e-mail name from $str.
if ($str =~ s/<(\S+)>//) { # Check for: name <addr>
} elsif ($str =~ /"([^"]+)"/) { # Name in ""'s
} elsif ($str =~ /\(([^\)]+)\)/) { # Name in ()'s
($ret = $str) =~ s/@.*//;
$ret =~ s/^["\s]+//g; $ret =~ s/["\s]+$//g;
##---------------------------------------------------------------------------
## Routine to sort messages
my($nosort, $subsort, $authsort, $revsort) = @_;
$nosort = $NOSORT if !defined($nosort);
$subsort = $SUBSORT if !defined($subsort);
$authsort = $AUTHSORT if !defined($authsort);
$revsort = $REVSORT if !defined($revsort);
return sort { $IndexNum{$b} <=> $IndexNum{$a} } keys %Subject;
return sort { $IndexNum{$a} <=> $IndexNum{$b} } keys %Subject;
my $hs = scalar(%Subject); $hs =~ s
|^[^/]+/||;
while (($idx, $sub) = each(%Subject)) {
1 while $sub =~ s/$SubReplyRxp//io;
$sub =~ s/$SubArtRxp//io;
return sort { ($sub{$a} cmp $sub{$b}) ||
(get_time_from_index
($b) <=> get_time_from_index
($a))
return sort { ($sub{$a} cmp $sub{$b}) ||
(get_time_from_index
($a) <=> get_time_from_index
($b))
my $hs = scalar(%From); $hs =~ s
|^[^/]+/||;
while (($idx, $from) = each(%From)) {
$from = lc extract_email_name
($from);
return sort { ($from{$a} cmp $from{$b}) ||
(get_time_from_index
($b) <=> get_time_from_index
($a))
return sort { ($from{$a} cmp $from{$b}) ||
(get_time_from_index
($a) <=> get_time_from_index
($b))
return sort { (get_time_from_index
($b) <=> get_time_from_index
($a))
|| ($IndexNum{$b} <=> $IndexNum{$a})
return sort { (get_time_from_index
($a) <=> get_time_from_index
($b))
|| ($IndexNum{$a} <=> $IndexNum{$b})
##---------------------------------------------------------------------------
## Message-sort routines for sort().
(&get_time_from_index
($a) <=> &get_time_from_index
($b)) ||
($IndexNum{$a} <=> $IndexNum{$b});
##---------------------------------------------------------------------------
## Routine for formating a message number for use in filenames or links.
##---------------------------------------------------------------------------
## Routine to get filename of a message number.
my($fmtstr) = "$MsgPrefix%05d.$HtmlExt";
$fmtstr .= ".gz" if $GzipLinks;
##---------------------------------------------------------------------------
## Routine to get filename of an index
sub get_filename_from_index
{
&msgnum_filename
($IndexNum{$_[0]});
##---------------------------------------------------------------------------
## Routine to get time component from index
sub get_time_from_index
{
(split(/$X/o, $_[0], 2))[0];
##---------------------------------------------------------------------------
## Routine to get annotation of a message
my $file = join($DIRSEP, get_note_dir
(),
msgid_to_filename
($Index2MsgId{$index}));
if (!open(NOTEFILE
, $file)) { return ""; }
my $ret = join("", <NOTEFILE
>);
##---------------------------------------------------------------------------
## Routine to determine if a message has an annotation
-e
join($DIRSEP, get_note_dir
(),
msgid_to_filename
($Index2MsgId{$index}));
##---------------------------------------------------------------------------
## Routine to get full pathname to annotation directory
if (!OSis_absolute_path
($NoteDir)) {
return join($DIRSEP, $OUTDIR, $NoteDir);
##---------------------------------------------------------------------------
## Routine to get lc author name from index
lc extract_email_name
($From{$_[0]});
##---------------------------------------------------------------------------
## Determine time from date. Use %Zone for timezone offsets
my($mday, $mon, $yr, $hr, $min, $sec, $zone) = @_;
$yr -= 1900 if $yr >= 1900; # if given full 4 digit year
$yr += 100 if $yr <= 37; # in case of 2 digit years
if (($yr < 70) || ($yr > 137)) {
warn "Warning: Bad year (", $yr+1900, ") using current\n";
$yr = (localtime(time))[5];
## If $zone, grab gmt time, else grab local
$time = &timegm
($sec,$min,$hr,$mday,$mon,$yr);
# try to modify time/date based on timezone
if ($zone =~ /^[\+-]\d+$/) {
$time -= &zone_offset_to_secs
($zone);
if (defined($Zone{$zone})) {
$time += &zone_offset_to_secs
($Zone{$zone});
if ($zone =~ /^([A-Z]\w+)([\+-]\d+)$/) {
$time -= &zone_offset_to_secs
($2);
if (defined($Zone{$1})) {
$time += &zone_offset_to_secs
($Zone{$1});
warn qq|Warning
: Unrecognized
time zone
, "$zone"\n|;
$time = &timelocal
($sec,$min,$hr,$mday,$mon,$yr);
##---------------------------------------------------------------------------
## Routine to check if time has expired.
($ExpireTime && (time - $_[0] > $ExpireTime)) ||
($_[0] < $ExpireDateTime);
##---------------------------------------------------------------------------
## Get HTML tags for formatting message headers
my($ftago, $ftagc, $tago, $tagc);
## Get user specified tags (this is one funcky looking code)
$tag = (defined($HeadHeads{$f}) ?
$HeadHeads{$f} : $HeadHeads{"-default-"});
$ftag = (defined($HeadFields{$f}) ?
$HeadFields{$f} : $HeadFields{"-default-"});
if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
else { $tago = $tagc = ''; }
if ($ftag) { $ftago = "<$ftag>"; $ftagc = "</$ftag>"; }
else { $ftago = $ftagc = ''; }
($tago, $tagc, $ftago, $ftagc);
##---------------------------------------------------------------------------
## Format message headers in HTML.
## $html = htmlize_header($fields_hash_ref);
foreach $item (@FieldOrder) {
if ($item eq '-extra-') {
foreach $key (sort keys %hf) {
next if $FieldODefs{$key};
next if $key =~ /^x-mha-/;
delete $hf{$key}, next if &exclude_field
($key);
$tmp = $HFieldsList{$key} ? mlist_field_add_links
($tmp) :
$tmp = field_add_links
($key, $tmp, $fields);
($tago, $tagc, $ftago, $ftagc) = get_header_tags
($key);
$mesg .= join('', $LABELBEG,
$tago, htmlize
(ucfirst($key)), $tagc,
$FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
if (!&exclude_field
($item) && $hf{$item}) {
$tmp = $HFieldsList{$item} ? mlist_field_add_links
($tmp) :
$tmp = field_add_links
($item, $tmp, $fields);
($tago, $tagc, $ftago, $ftagc) = &get_header_tags
($item);
$mesg .= join('', $LABELBEG,
$tago, htmlize
(ucfirst($item)), $tagc,
$FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
if ($mesg) { $mesg = $FIELDSBEG . $mesg . $FIELDSEND; }
##---------------------------------------------------------------------------
sub mlist_field_add_links
{
foreach (split(/(<[^>]+>)/, $txt)) {
chop; substr($_, 0, 1) = "";
$ret .= qq|<<a href
="$_">$_</a
>>|;
$ret .= &$MHeadCnvFunc($_);
##---------------------------------------------------------------------------
## Routine to add mailto/news links to a message header string.
if ($HFieldsAddr{$label}) {
$fld_text =~ s
|([\
!\
%\w\
.\
-+=/]+@
[\w\
.\
-]+)
|&mailUrl
($1, $fields->{'x-mha-message-id'},
$fields->{'x-mha-subject'},
$fields->{'x-mha-from'});
$fld_text =~ s
|([\
!\
%\w\
.\
-+=/]+@
[\w\
.\
-]+)
|&htmlize
(&rewrite_address
($1))
if ($label eq 'newsgroup') {
$fld_text = newsurl
($fld_text) unless $NONEWS;
##---------------------------------------------------------------------------
## Routine to add news links of newsgroups names
if ($str =~ s/^([^:]*:\s*)//) {
$str =~ s/\s//g; # Strip whitespace
my @groups = split(/,/, $str); # Split groups
foreach (@groups) { # Make hyperlinks
s
|(.*)|<a href
="news:$1">$1</a
>|;
$h . join(', ', @groups); # Rejoin string
##---------------------------------------------------------------------------
## $html = mailUrl($email_addr, $msgid, $subject, $from);
my($to) = (&urlize
($eaddr));
my($toname, $todomain) = map { urlize
($_) } split(/@/,$eaddr,2);
my($froml, $msgidl) = (&urlize
($from), &urlize
($msgid));
my($fromaddrl) = (&extract_email_address
($from));
my($faddrnamel, $faddrdomainl) = map { urlize
($_) } split(/@/,$fromaddrl,2);
$fromaddrl = &urlize
($fromaddrl);
# Add "Re:" to subject if not present
if ($sub !~ /^$SubReplyRxp/io) {
$subjectl = 'Re:%20' . &urlize
($sub);
$subjectl = &urlize
($sub);
$url =~ s/\$FROM\$/$froml/g;
$url =~ s/\$FROMADDR\$/$fromaddrl/g;
$url =~ s/\$FROMADDRNAME\$/$faddrnamel/g;
$url =~ s/\$FROMADDRDOMAIN\$/$faddrdomainl/g;
$url =~ s/\$MSGID\$/$msgidl/g;
$url =~ s/\$SUBJECT\$/$subjectl/g;
$url =~ s/\$SUBJECTNA\$/$subjectl/g;
$url =~ s/\$TOADDRNAME\$/$toname/g;
$url =~ s/\$TOADDRDOMAIN\$/$todomain/g;
$url =~ s/\$ADDR\$/$to/g;
qq|<a href
="$url">| . &htmlize
(&rewrite_address
($eaddr)) . q
|</a
>|;
##---------------------------------------------------------------------------##
## Routine to parse variable definitions in a string. The
## function returns a list of variable/value pairs. The format of
## the string is similiar to attribute specification lists in
## SGML, but NAMEs are any non-whitespace character.
my($str, $q, $var, $value);
($str = $org) =~ s/^\s+//;
while ($str =~ s/^([^=\s]+)\s*=\s*//) {
if ($str =~ s/^(['"])//) {
if (!($q eq "'" ?
$str =~ s/^([^']*)'// :
$str =~ s/^([^"]*)"//)) {
warn "Warning: Unclosed quote in: $org\n";
if ($str =~ s/^(\S+)//) {
warn "Warning: No value after $var in: $org\n";
$hash{$lower?
lc($var): $var} = $value;
warn "Warning: Trailing characters in: $org\n";
##---------------------------------------------------------------------------##
$msgid =~ s/([^\w\-])/sprintf("=%02X",unpack("C",$1))/geo;
$msgid =~ s/([^\w.\-\@])/sprintf("=%02X",unpack("C",$1))/geo;
##---------------------------------------------------------------------------##
## Check if new follow up list for a message is different from
my $o = $FollowOld{$_[0]};
if (defined($f) && defined($o)) {
return 1 unless @
$f == @
$o;
for ($i=0; $i < @
$f; ++$i) {
return 1 if $f->[$i] ne $o->[$i];
return (defined($f) || defined($o));
##---------------------------------------------------------------------------##
## Retrieve icon URL for specified content-type.
my $icon = $Icons{$ctype};
last ICON
if defined $icon;
last ICON
if defined $icon;
$icon = $Icons{'*/*'} || $Icons{'unknown'};
return (undef, undef, undef);
if ($icon =~ s/\[(\d+)x(\d+)\]//) {
##---------------------------------------------------------------------------##
my($sec,$min,$hour,$mday,$mon,$year) = localtime(time);
print $fh sprintf("[%4d-%02d-%02d %02d:%02d:%02d] ",
$year+1900, $mon+1, $mday, $hour, $min, $sec);
##---------------------------------------------------------------------------##
foreach (sort keys %$h) {
print $fh "$_ => ", $h->{$_}, "\n";
##---------------------------------------------------------------------------##