| 1 | ##---------------------------------------------------------------------------## |
| 2 | ## $Id: mhtxtsetext.pl,v 2.3 2001/08/25 20:01:31 ehood Exp $ |
| 3 | ## Library to convert text/setext to HTML. Adapted for use in MHonArc |
| 4 | ## by ehood@medusa.acs.uci.edu, Sept 1994. |
| 5 | ## Filter routine can be registered with the following: |
| 6 | ## <MIMEFILTERS> |
| 7 | ## text/setext:m2h_text_setext'filter:mhtxtsetext.pl |
| 8 | ## text/x-setext:m2h_text_setext'filter:mhtxtsetext.pl |
| 9 | ## </MIMEFILTERS> |
| 10 | ##---------------------------------------------------------------------------## |
| 11 | # setext -> HTML converter |
| 12 | # |
| 13 | # $Id: mhtxtsetext.pl,v 2.3 2001/08/25 20:01:31 ehood Exp $ |
| 14 | # |
| 15 | # Tony Sanders <sanders@earth.com>, June 1993 |
| 16 | # |
| 17 | # Status of typotags: |
| 18 | # header-tt passed untouched (XXX: use Subject: in next release) |
| 19 | # title-tt <H1>...</H1> (and <TITLE> if needed) |
| 20 | # subhead-tt <H2>...</H2> (and <TITLE> if needed) |
| 21 | # indent-tt reflows paragraphs |
| 22 | # |
| 23 | # bold-tt <B>...</B> |
| 24 | # italic-tt <I>...</I> |
| 25 | # underline-tt <I>...</I> |
| 26 | # hot-tt <A HREF="...">...</A> (see also href-tt) |
| 27 | # |
| 28 | # quote-tt <BLOCKQUOTE>...</BLOCKQUOTE> |
| 29 | # NIY bullet-tt <UL>...</UL> |
| 30 | # |
| 31 | # twobuck-tt ignored |
| 32 | # suppress-tt suppressed in output |
| 33 | # twodot-tt ignored |
| 34 | # |
| 35 | # Additional typotags supported for HTML: |
| 36 | # href-tt .. _text HREF |
| 37 | # isindex-tt .. <isindex> |
| 38 | # |
| 39 | # setext'html -- converts setext (.etx files) to HTML |
| 40 | # setext'title -- utility routine to convert setext titles and subheads to HTML |
| 41 | # |
| 42 | |
| 43 | # TODO:XXX |
| 44 | # I need to figure out how to allow HTML markup in the text while at the |
| 45 | # same time suppresing "unintentional" markup. For now < & > are HTML'ized. |
| 46 | |
| 47 | # Define the translations supported |
| 48 | # $trans{'text/setext'} = "text/html:setext'html"; |
| 49 | |
| 50 | package m2h_text_setext; |
| 51 | |
| 52 | # parser states |
| 53 | $FMT = 0; # in free flow text (normal HTML mode) |
| 54 | $PRE = 1; # in preformated text <PRE>...</PRE> |
| 55 | $QUOTE = 2; # in blockquote <BLOCKQUOTE>...</BLOCKQUOTE> |
| 56 | |
| 57 | sub filter { |
| 58 | my($fields, $body) = @_; |
| 59 | my(@data) = split(/\n/,$$body); |
| 60 | |
| 61 | $ret = ''; |
| 62 | # first pass, process <HEAD> items and hypertext link information |
| 63 | for ($i = 0; $i <= $#data; $i++) { |
| 64 | $_ = $data[$i]; # $_ is default for m// |
| 65 | |
| 66 | # <ISINDEX> must be inside <HEAD>...</HEAD> |
| 67 | /^\.\.\s+<isindex>/i && |
| 68 | do { $data[$i] = ".."; next; }; |
| 69 | |
| 70 | # locate HREF's: .. _href URL |
| 71 | /^\.\.\s+_([^\s]*)\s+(.*)\s*/ && do { $href{$1} = $2; next; }; |
| 72 | |
| 73 | # first title-tt or subhead-tt gets <TITLE>...</TITLE> |
| 74 | # &title also adds the <H#>...</H#> to the appropriate line |
| 75 | /^===/ && do { &title("H1", $i); next; }; |
| 76 | /^---/ && do { &title("H2", $i); next; }; |
| 77 | } |
| 78 | |
| 79 | # second pass, handle remaining typotags |
| 80 | $curstate = $FMT; |
| 81 | foreach (@data) { |
| 82 | # process title information |
| 83 | /^\.\.\s+(<H.>)(.*)(<\/H.>)/i && do { |
| 84 | &to_fmt; $ret .= $1. &htmlize($2). $3. "\n"; next; }; |
| 85 | next if /^\.\./; |
| 86 | |
| 87 | # handle line breaks |
| 88 | if ($curstate == $FMT && /^\s*$/) { |
| 89 | $ret .= "<P>\n" unless $fold++; next; } |
| 90 | $fold = 0; |
| 91 | |
| 92 | # state transitions |
| 93 | if (/^>\s/) { &to_quote; } |
| 94 | elsif (/^ [^ ]/) { &to_fmt; } |
| 95 | else { &to_pre; } |
| 96 | |
| 97 | s/^>\s*//; # fix quote-tt |
| 98 | s/^ ([^ ])/$1/; # fix indent-tt |
| 99 | |
| 100 | # bold-tt |
| 101 | s#\*\*([^\*]*)\*\*#\376B\377$1\376/B\377#; |
| 102 | # italic-tt |
| 103 | s#~([^~]*)~#\376I\377$1\376/I\377#; |
| 104 | # hot-tt |
| 105 | s#\b([^\s]*)_\b# |
| 106 | $h = $href{$1}; ($a = $1) =~ s,_, ,g; |
| 107 | $h ? qq'\376A HREF="$h"\377$a\376/A\377' : "\376I\377$a\376/I\377"; #e; |
| 108 | # underline-tt |
| 109 | s#_([^\s]*)_# |
| 110 | ($a = $1) =~ s,_, ,g; "\376I\377$a\376/I\377"; #e; |
| 111 | $ret .= &htmlize($_). "\n"; |
| 112 | } |
| 113 | &to_fmt; |
| 114 | ($ret); |
| 115 | } |
| 116 | |
| 117 | sub to_fmt { |
| 118 | return if $curstate == $FMT; |
| 119 | $ret .= "</PRE>\n" if $curstate == $PRE; |
| 120 | $ret .= "</PRE></BLOCKQUOTE>\n" if $curstate == $QUOTE; #XXX |
| 121 | $curstate = $FMT; |
| 122 | } |
| 123 | sub to_pre { |
| 124 | return if $curstate == $PRE; |
| 125 | $ret .= "<PRE>\n" if $curstate == $FMT; |
| 126 | $ret .= "</PRE></BLOCKQUOTE><PRE>\n" if $curstate == $QUOTE; #XXX |
| 127 | $curstate = $PRE; |
| 128 | } |
| 129 | sub to_quote { |
| 130 | return if $curstate == $QUOTE; |
| 131 | $ret .= "<BLOCKQUOTE><PRE>\n" if $curstate == $FMT; #XXX |
| 132 | $ret .= "</PRE><BLOCKQUOTE><PRE>\n" if $curstate == $PRE; #XXX |
| 133 | $curstate = $QUOTE; |
| 134 | } |
| 135 | sub htmlize { |
| 136 | local($_) = @_; |
| 137 | s/\&/\&\#38\;/g; s/\</\&\#60\;/g; s/\>/\&\#62\;/g; |
| 138 | s/\376/</g; s/\377/>/g; # convert back |
| 139 | $_; |
| 140 | } |
| 141 | sub title { |
| 142 | local($head, $i) = @_; |
| 143 | $data[$i--] = ".."; $data[$i] =~ s/^\s*//; |
| 144 | # $ret .= "<TITLE>$data[$i]</TITLE>\n" unless $title++; |
| 145 | $data[$i] = ".. <$head>" . $data[$i] . "</$head>"; |
| 146 | } |
| 147 | |
| 148 | 1; |