Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / iso2022jp.pl
CommitLineData
86530b38
AT
1##---------------------------------------------------------------------------##
2## File:
3## $Id: iso2022jp.pl,v 1.8 2002/07/30 18:20:46 ehood Exp $
4## Author(s):
5## Earl Hood mhonarc@mhonarc.org
6## NIIBE Yutaka gniibe@mri.co.jp
7## Takashi P.KATOH p-katoh@shiratori.riec.tohoku.ac.jp
8## Description:
9## Library defines routine to process iso-2022-jp data.
10##---------------------------------------------------------------------------##
11## Copyright (C) 1995-2002
12## Earl Hood, mhonarc@mhonarc.org
13## NIIBE Yutaka, gniibe@mri.co.jp
14## Takashi P.KATOH, p-katoh@shiratori.riec.tohoku.ac.jp
15##
16## This program is free software; you can redistribute it and/or modify
17## it under the terms of the GNU General Public License as published by
18## the Free Software Foundation; either version 2 of the License, or
19## (at your option) any later version.
20##
21## This program is distributed in the hope that it will be useful,
22## but WITHOUT ANY WARRANTY; without even the implied warranty of
23## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24## GNU General Public License for more details.
25##
26## You should have received a copy of the GNU General Public License
27## along with this program; if not, write to the Free Software
28## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29## 02111-1307, USA
30##---------------------------------------------------------------------------##
31
32package iso_2022_jp;
33
34$Url = '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' .
35 '|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)';
36$UrlExp = $Url . q%[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]%;
37$HUrlExp = $Url . q%[^\s\(\)\|<>"'\&]*[^\.?!;,"'\|\[\]\(\)\s<>\&]%;
38
39##---------------------------------------------------------------------------##
40## str2html(): Convert an iso-2022-jp string into HTML. Function
41## interface similiar as iso8859.pl function.
42##
43sub str2html { jp2022_to_html($_[0], 1); }
44
45##---------------------------------------------------------------------------##
46## Function to convert ISO-2022-JP data into HTML. Function is based
47## on the following RFCs:
48##
49## RFC-1468 I
50## J. Murai, M. Crispin, E. van der Poel, "Japanese Character
51## Encoding for Internet Messages", 06/04/1993. (Pages=6)
52##
53## RFC-1554 I
54## M. Ohta, K. Handa, "ISO-2022-JP-2: Multilingual Extension of
55## ISO-2022-JP", 12/23/1993. (Pages=6)
56##
57sub jp2022_to_html {
58 my($body) = shift;
59 my($nourl) = shift;
60 my(@lines) = split(/\r?\n/,$body);
61 my($ret, $ascii_text);
62 local($_);
63
64 $ret = "";
65 foreach (@lines) {
66 # a trick to process preceding ASCII text
67 $_ = "\033(B" . $_ unless /^\033/;
68
69 # Process Each Segment
70 while(1) {
71 if (s/^(\033\([BJ])//) { # Single Byte Segment
72 $ret .= $1;
73 while(1) {
74 if (s/^([^\033]+)//) { # ASCII plain text
75 $ascii_text = $1;
76
77 # Replace meta characters in ASCII plain text
78 $ascii_text =~ s%\&%\&amp;%g;
79 $ascii_text =~ s%<%\&lt;%g;
80 $ascii_text =~ s%>%\&gt;%g;
81 ## Convert URLs to hyperlinks
82 $ascii_text =~ s%($HUrlExp)%<a href="$1">$1</a>%gio
83 unless $nourl;
84
85 $ret .= $ascii_text;
86 } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
87 $ret .= $1;
88 } elsif (s/(\033N[ -\7f])//) { # Single Shift Sequence
89 $ret .= $1;
90 } else {
91 last;
92 }
93 }
94 } elsif (s/^(\033\$[\@AB]|\033\$\([CD])//) { # Double Byte Segment
95 $ret .= $1;
96 while (1) {
97 if (s/^([!-~][!-~]+)//) { # Double Char plain text
98 $ret .= $1;
99 } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
100 $ret .= $1;
101 } elsif (s/(\033N[ -\7f])//) { # Single Shift Sequence
102 $ret .= $1;
103 } else {
104 last;
105 }
106 }
107 } else {
108 # Something wrong in text
109 $ret .= $_;
110 last;
111 }
112 }
113
114 # remove a `trick'
115 $ret =~ s/^\033\(B//;
116
117 $ret .= "\n";
118 }
119
120 ($ret);
121}
122
123
124##---------------------------------------------------------------------------##
125## clip($str, $length, $is_html, $has_tags): Clip an iso-2022-jp string.
126##
127## The last argument $is_html specifies '&' should be treated
128## as HTML character or not.
129## (i.e., the length of '&amp;' will be 1 if $is_html).
130##
131sub clip { # &clip($str, 10, 1, 1);
132 my($str) = shift;
133 my($length) = shift;
134 my($is_html) = shift;
135 my($has_tags) = shift;
136 my($ret, $inascii);
137 local($_) = $str;
138
139 $ret = "";
140 # a trick to process preceding ASCII text
141 $_ = "\033(B" . $_ unless /^\033/;
142
143 # Process Each Segment
144 CLIP: while(1) {
145 if (s/^(\033\([BJ])//) { # Single Byte Segment
146 $inascii = 1;
147 $ret .= $1;
148 while(1) {
149 if (s/^([^\033])//) { # ASCII plain text
150 if ($is_html) {
151 if (($1 eq '<') && $has_tags) {
152 s/^[^>\033]*>//;
153 } else {
154 if ($1 eq '&') {
155 s/^([^\;]*\;)//;
156 $ret .= "&$1";
157 } else {
158 $ret .= $1;
159 }
160 $length--;
161 }
162 } else {
163 $ret .= $1;
164 $length--;
165 }
166 } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
167 $ret .= $1;
168 } elsif (s/(\033N[ -\7f])//) { # Single Shift Sequence
169 $ret .= $1;
170 $length--;
171 } else {
172 last;
173 }
174 last CLIP if ($length <= 0);
175 }
176 } elsif (s/^(\033\$[\@AB]|\033\$\([CD])//) { # Double Byte Segment
177 $inascii = 0;
178 $ret .= $1;
179 while (1) {
180 if (s/^([!-~][!-~])//) { # Double Char plain text
181 $ret .= $1;
182 # The length of a double-byte-char is assumed 2.
183 # If we consider compatibility with UTF-8, it should be 1.
184 $length -= 2;
185 } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
186 $ret .= $1;
187 } elsif (s/(\033N[ -\7f])//) { # Single Shift Sequence
188 $ret .= $1;
189 $length--;
190 } else {
191 last;
192 }
193 last CLIP if ($length <= 0);
194 }
195 } else {
196 # Something wrong in text
197 $ret .= $_;
198 last;
199 }
200 }
201
202 # remove a `trick'
203 $ret =~ s/^\033\(B//;
204
205 # Shuold we check the last \033\([BJ] sequence?
206 # (I believe it is too paranoid).
207 $ret .= "\033(B" unless $inascii;
208
209 ($ret);
210}
211##---------------------------------------------------------------------------##
2121;