Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | ##---------------------------------------------------------------------------## |
2 | ## $Id: mhtxtsetext.pl,v 2.3 2001/08/25 20:01:31 ehood Exp $ | |
3 | ## Library to convert text/setext to HTML. Adapted for use in MHonArc | |
4 | ## by ehood@medusa.acs.uci.edu, Sept 1994. | |
5 | ## Filter routine can be registered with the following: | |
6 | ## <MIMEFILTERS> | |
7 | ## text/setext:m2h_text_setext'filter:mhtxtsetext.pl | |
8 | ## text/x-setext:m2h_text_setext'filter:mhtxtsetext.pl | |
9 | ## </MIMEFILTERS> | |
10 | ##---------------------------------------------------------------------------## | |
11 | # setext -> HTML converter | |
12 | # | |
13 | # $Id: mhtxtsetext.pl,v 2.3 2001/08/25 20:01:31 ehood Exp $ | |
14 | # | |
15 | # Tony Sanders <sanders@earth.com>, June 1993 | |
16 | # | |
17 | # Status of typotags: | |
18 | # header-tt passed untouched (XXX: use Subject: in next release) | |
19 | # title-tt <H1>...</H1> (and <TITLE> if needed) | |
20 | # subhead-tt <H2>...</H2> (and <TITLE> if needed) | |
21 | # indent-tt reflows paragraphs | |
22 | # | |
23 | # bold-tt <B>...</B> | |
24 | # italic-tt <I>...</I> | |
25 | # underline-tt <I>...</I> | |
26 | # hot-tt <A HREF="...">...</A> (see also href-tt) | |
27 | # | |
28 | # quote-tt <BLOCKQUOTE>...</BLOCKQUOTE> | |
29 | # NIY bullet-tt <UL>...</UL> | |
30 | # | |
31 | # twobuck-tt ignored | |
32 | # suppress-tt suppressed in output | |
33 | # twodot-tt ignored | |
34 | # | |
35 | # Additional typotags supported for HTML: | |
36 | # href-tt .. _text HREF | |
37 | # isindex-tt .. <isindex> | |
38 | # | |
39 | # setext'html -- converts setext (.etx files) to HTML | |
40 | # setext'title -- utility routine to convert setext titles and subheads to HTML | |
41 | # | |
42 | ||
43 | # TODO:XXX | |
44 | # I need to figure out how to allow HTML markup in the text while at the | |
45 | # same time suppresing "unintentional" markup. For now < & > are HTML'ized. | |
46 | ||
47 | # Define the translations supported | |
48 | # $trans{'text/setext'} = "text/html:setext'html"; | |
49 | ||
50 | package m2h_text_setext; | |
51 | ||
52 | # parser states | |
53 | $FMT = 0; # in free flow text (normal HTML mode) | |
54 | $PRE = 1; # in preformated text <PRE>...</PRE> | |
55 | $QUOTE = 2; # in blockquote <BLOCKQUOTE>...</BLOCKQUOTE> | |
56 | ||
57 | sub filter { | |
58 | my($fields, $body) = @_; | |
59 | my(@data) = split(/\n/,$$body); | |
60 | ||
61 | $ret = ''; | |
62 | # first pass, process <HEAD> items and hypertext link information | |
63 | for ($i = 0; $i <= $#data; $i++) { | |
64 | $_ = $data[$i]; # $_ is default for m// | |
65 | ||
66 | # <ISINDEX> must be inside <HEAD>...</HEAD> | |
67 | /^\.\.\s+<isindex>/i && | |
68 | do { $data[$i] = ".."; next; }; | |
69 | ||
70 | # locate HREF's: .. _href URL | |
71 | /^\.\.\s+_([^\s]*)\s+(.*)\s*/ && do { $href{$1} = $2; next; }; | |
72 | ||
73 | # first title-tt or subhead-tt gets <TITLE>...</TITLE> | |
74 | # &title also adds the <H#>...</H#> to the appropriate line | |
75 | /^===/ && do { &title("H1", $i); next; }; | |
76 | /^---/ && do { &title("H2", $i); next; }; | |
77 | } | |
78 | ||
79 | # second pass, handle remaining typotags | |
80 | $curstate = $FMT; | |
81 | foreach (@data) { | |
82 | # process title information | |
83 | /^\.\.\s+(<H.>)(.*)(<\/H.>)/i && do { | |
84 | &to_fmt; $ret .= $1. &htmlize($2). $3. "\n"; next; }; | |
85 | next if /^\.\./; | |
86 | ||
87 | # handle line breaks | |
88 | if ($curstate == $FMT && /^\s*$/) { | |
89 | $ret .= "<P>\n" unless $fold++; next; } | |
90 | $fold = 0; | |
91 | ||
92 | # state transitions | |
93 | if (/^>\s/) { &to_quote; } | |
94 | elsif (/^ [^ ]/) { &to_fmt; } | |
95 | else { &to_pre; } | |
96 | ||
97 | s/^>\s*//; # fix quote-tt | |
98 | s/^ ([^ ])/$1/; # fix indent-tt | |
99 | ||
100 | # bold-tt | |
101 | s#\*\*([^\*]*)\*\*#\376B\377$1\376/B\377#; | |
102 | # italic-tt | |
103 | s#~([^~]*)~#\376I\377$1\376/I\377#; | |
104 | # hot-tt | |
105 | s#\b([^\s]*)_\b# | |
106 | $h = $href{$1}; ($a = $1) =~ s,_, ,g; | |
107 | $h ? qq'\376A HREF="$h"\377$a\376/A\377' : "\376I\377$a\376/I\377"; #e; | |
108 | # underline-tt | |
109 | s#_([^\s]*)_# | |
110 | ($a = $1) =~ s,_, ,g; "\376I\377$a\376/I\377"; #e; | |
111 | $ret .= &htmlize($_). "\n"; | |
112 | } | |
113 | &to_fmt; | |
114 | ($ret); | |
115 | } | |
116 | ||
117 | sub to_fmt { | |
118 | return if $curstate == $FMT; | |
119 | $ret .= "</PRE>\n" if $curstate == $PRE; | |
120 | $ret .= "</PRE></BLOCKQUOTE>\n" if $curstate == $QUOTE; #XXX | |
121 | $curstate = $FMT; | |
122 | } | |
123 | sub to_pre { | |
124 | return if $curstate == $PRE; | |
125 | $ret .= "<PRE>\n" if $curstate == $FMT; | |
126 | $ret .= "</PRE></BLOCKQUOTE><PRE>\n" if $curstate == $QUOTE; #XXX | |
127 | $curstate = $PRE; | |
128 | } | |
129 | sub to_quote { | |
130 | return if $curstate == $QUOTE; | |
131 | $ret .= "<BLOCKQUOTE><PRE>\n" if $curstate == $FMT; #XXX | |
132 | $ret .= "</PRE><BLOCKQUOTE><PRE>\n" if $curstate == $PRE; #XXX | |
133 | $curstate = $QUOTE; | |
134 | } | |
135 | sub htmlize { | |
136 | local($_) = @_; | |
137 | s/\&/\&\#38\;/g; s/\</\&\#60\;/g; s/\>/\&\#62\;/g; | |
138 | s/\376/</g; s/\377/>/g; # convert back | |
139 | $_; | |
140 | } | |
141 | sub title { | |
142 | local($head, $i) = @_; | |
143 | $data[$i--] = ".."; $data[$i] =~ s/^\s*//; | |
144 | # $ret .= "<TITLE>$data[$i]</TITLE>\n" unless $title++; | |
145 | $data[$i] = ".. <$head>" . $data[$i] . "</$head>"; | |
146 | } | |
147 | ||
148 | 1; |