##---------------------------------------------------------------------------##
## $Id: mhtxtsetext.pl,v 2.3 2001/08/25 20:01:31 ehood Exp $
## Library to convert text/setext to HTML. Adapted for use in MHonArc
## by ehood@medusa.acs.uci.edu, Sept 1994.
## Filter routine can be registered with the following:
## text/setext:m2h_text_setext'filter:mhtxtsetext.pl
## text/x-setext:m2h_text_setext'filter:mhtxtsetext.pl
##---------------------------------------------------------------------------##
# setext -> HTML converter
# $Id: mhtxtsetext.pl,v 2.3 2001/08/25 20:01:31 ehood Exp $
# Tony Sanders <sanders@earth.com>, June 1993
# header-tt passed untouched (XXX: use Subject: in next release)
# title-tt <H1>...</H1> (and <TITLE> if needed)
# subhead-tt <H2>...</H2> (and <TITLE> if needed)
# indent-tt reflows paragraphs
# underline-tt <I>...</I>
# hot-tt <A HREF="...">...</A> (see also href-tt)
# quote-tt <BLOCKQUOTE>...</BLOCKQUOTE>
# NIY bullet-tt <UL>...</UL>
# suppress-tt suppressed in output
# Additional typotags supported for HTML:
# isindex-tt .. <isindex>
# setext'html -- converts setext (.etx files) to HTML
# setext'title -- utility routine to convert setext titles and subheads to HTML
# I need to figure out how to allow HTML markup in the text while at the
# same time suppresing "unintentional" markup. For now < & > are HTML'ized.
# Define the translations supported
# $trans{'text/setext'} = "text/html:setext'html";
$FMT = 0; # in free flow text (normal HTML mode)
$PRE = 1; # in preformated text <PRE>...</PRE>
$QUOTE = 2; # in blockquote <BLOCKQUOTE>...</BLOCKQUOTE>
my(@data) = split(/\n/,$$body);
# first pass, process <HEAD> items and hypertext link information
for ($i = 0; $i <= $#data; $i++) {
$_ = $data[$i]; # $_ is default for m//
# <ISINDEX> must be inside <HEAD>...</HEAD>
do { $data[$i] = ".."; next; };
# locate HREF's: .. _href URL
/^\.\.\s+_([^\s]*)\s+(.*)\s*/ && do { $href{$1} = $2; next; };
# first title-tt or subhead-tt gets <TITLE>...</TITLE>
# &title also adds the <H#>...</H#> to the appropriate line
/^===/ && do { &title
("H1", $i); next; };
/^---/ && do { &title
("H2", $i); next; };
# second pass, handle remaining typotags
# process title information
/^\.\.\s+(<H.>)(.*)(<\/H
.>)/i
&& do {
&to_fmt
; $ret .= $1. &htmlize
($2). $3. "\n"; next; };
if ($curstate == $FMT && /^\s*$/) {
$ret .= "<P>\n" unless $fold++; next; }
if (/^>\s/) { &to_quote
; }
elsif (/^ [^ ]/) { &to_fmt
; }
s/^>\s*//; # fix quote-tt
s/^ ([^ ])/$1/; # fix indent-tt
s
#\*\*([^\*]*)\*\*#\376B\377$1\376/B\377#;
s
#~([^~]*)~#\376I\377$1\376/I\377#;
$h = $href{$1}; ($a = $1) =~ s
,_
, ,g
;
$h ?
qq'\376A HREF="$h"\377$a\376/A\377' : "\376I\377$a\376/I\377"; #e;
($a = $1) =~ s
,_
, ,g
; "\376I\377$a\376/I\377"; #e;
$ret .= &htmlize
($_). "\n";
return if $curstate == $FMT;
$ret .= "</PRE>\n" if $curstate == $PRE;
$ret .= "</PRE></BLOCKQUOTE>\n" if $curstate == $QUOTE; #XXX
return if $curstate == $PRE;
$ret .= "<PRE>\n" if $curstate == $FMT;
$ret .= "</PRE></BLOCKQUOTE><PRE>\n" if $curstate == $QUOTE; #XXX
return if $curstate == $QUOTE;
$ret .= "<BLOCKQUOTE><PRE>\n" if $curstate == $FMT; #XXX
$ret .= "</PRE><BLOCKQUOTE><PRE>\n" if $curstate == $PRE; #XXX
s/\&/\&\#38\;/g; s/\</\&\#60\;/g; s/\>/\&\#62\;/g;
s/\376/</g; s/\377/>/g; # convert back
$data[$i--] = ".."; $data[$i] =~ s/^\s*//;
# $ret .= "<TITLE>$data[$i]</TITLE>\n" unless $title++;
$data[$i] = ".. <$head>" . $data[$i] . "</$head>";