Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / 5.8.0 / sun4-solaris / Encode / Alias.pm
CommitLineData
86530b38
AT
1package Encode::Alias;
2use strict;
3use Encode;
4our $VERSION = do { my @r = (q$Revision: 1.32 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5our $DEBUG = 0;
6
7use base qw(Exporter);
8
9# Public, encouraged API is exported by default
10
11our @EXPORT =
12 qw (
13 define_alias
14 find_alias
15 );
16
17our @Alias; # ordered matching list
18our %Alias; # cached known aliases
19
20sub find_alias
21{
22 my $class = shift;
23 local $_ = shift;
24 unless (exists $Alias{$_})
25 {
26 $Alias{$_} = undef; # Recursion guard
27 for (my $i=0; $i < @Alias; $i += 2)
28 {
29 my $alias = $Alias[$i];
30 my $val = $Alias[$i+1];
31 my $new;
32 if (ref($alias) eq 'Regexp' && $_ =~ $alias)
33 {
34 $DEBUG and warn "eval $val";
35 $new = eval $val;
36 # $@ and warn "$val, $@";
37 }
38 elsif (ref($alias) eq 'CODE')
39 {
40 $DEBUG and warn "$alias", "->", "($val)";
41 $new = $alias->($val);
42 }
43 elsif (lc($_) eq lc($alias))
44 {
45 $new = $val;
46 }
47 if (defined($new))
48 {
49 next if $new eq $_; # avoid (direct) recursion on bugs
50 $DEBUG and warn "$alias, $new";
51 my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
52 if ($enc)
53 {
54 $Alias{$_} = $enc;
55 last;
56 }
57 }
58 }
59 }
60 if ($DEBUG){
61 my $name;
62 if (my $e = $Alias{$_}){
63 $name = $e->name;
64 }else{
65 $name = "";
66 }
67 warn "find_alias($class, $_)->name = $name";
68 }
69 return $Alias{$_};
70}
71
72sub define_alias
73{
74 while (@_)
75 {
76 my ($alias,$name) = splice(@_,0,2);
77 unshift(@Alias, $alias => $name); # newer one has precedence
78 # clear %Alias cache to allow overrides
79 if (ref($alias)){
80 my @a = keys %Alias;
81 for my $k (@a){
82 if (ref($alias) eq 'Regexp' && $k =~ $alias)
83 {
84 $DEBUG and warn "delete \$Alias\{$k\}";
85 delete $Alias{$k};
86 }
87 elsif (ref($alias) eq 'CODE')
88 {
89 $DEBUG and warn "delete \$Alias\{$k\}";
90 delete $Alias{$alias->($name)};
91 }
92 }
93 }else{
94 $DEBUG and warn "delete \$Alias\{$alias\}";
95 delete $Alias{$alias};
96 }
97 }
98}
99
100# Allow latin-1 style names as well
101 # 0 1 2 3 4 5 6 7 8 9 10
102our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
103# Allow winlatin1 style names as well
104our %Winlatin2cp = (
105 'latin1' => 1252,
106 'latin2' => 1250,
107 'cyrillic' => 1251,
108 'greek' => 1253,
109 'turkish' => 1254,
110 'hebrew' => 1255,
111 'arabic' => 1256,
112 'baltic' => 1257,
113 'vietnamese' => 1258,
114 );
115
116init_aliases();
117
118sub undef_aliases{
119 @Alias = ();
120 %Alias = ();
121}
122
123sub init_aliases
124{
125 undef_aliases();
126
127 # Try all-lower-case version should all else fails
128 define_alias( qr/^(.*)$/ => '"\L$1"' );
129
130 # UTF/UCS stuff
131 define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
132 define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
133 qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
134 qr/^iso-10646-1$/i => '"UCS-2BE"' );
135 define_alias( qr/^UTF(16|32)-?BE$/i => '"UTF-$1BE"',
136 qr/^UTF(16|32)-?LE$/i => '"UTF-$1LE"',
137 qr/^UTF(16|32)$/i => '"UTF-$1"',
138 );
139 # ASCII
140 define_alias(qr/^(?:US-?)ascii$/i => '"ascii"');
141 define_alias('C' => 'ascii');
142 define_alias(qr/\bISO[-_]?646[-_]?US$/i => '"ascii"');
143 # Allow variants of iso-8859-1 etc.
144 define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
145
146 # At least HP-UX has these.
147 define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
148
149 # More HP stuff.
150 define_alias( qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
151
152 # The Official name of ASCII.
153 define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
154
155 # This is a font issue, not an encoding issue.
156 # (The currency symbol of the Latin 1 upper half
157 # has been redefined as the euro symbol.)
158 define_alias( qr/^(.+)\@euro$/i => '"$1"' );
159
160 define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
161 => 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' );
162
163 define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
164 hebrew|arabic|baltic|vietnamese)$/ix =>
165 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
166
167 # Common names for non-latin prefered MIME names
168 define_alias( 'ascii' => 'US-ascii',
169 'cyrillic' => 'iso-8859-5',
170 'arabic' => 'iso-8859-6',
171 'greek' => 'iso-8859-7',
172 'hebrew' => 'iso-8859-8',
173 'thai' => 'iso-8859-11',
174 'tis620' => 'iso-8859-11',
175 );
176
177 # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
178 # And Microsoft has their own naming (again, surprisingly).
179 # And windows-* is registered in IANA!
180 define_alias( qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"');
181
182 # Sometimes seen with a leading zero.
183 # define_alias( qr/\bcp037\b/i => '"cp37"');
184
185 # Mac Mappings
186 # predefined in *.ucm; unneeded
187 # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
188 define_alias( qr/^mac_(.*)$/i => '"mac$1"');
189 # Ououououou. gone. They are differente!
190 # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
191
192 # Standardize on the dashed versions.
193 # define_alias( qr/\butf8$/i => 'utf-8' );
194 define_alias( qr/\bkoi8r$/i => 'koi8-r' );
195 define_alias( qr/\bkoi8u$/i => 'koi8-u' );
196
197 unless ($Encode::ON_EBCDIC){
198 # for Encode::CN
199 define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
200 define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
201 # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
202 # CP936 doesn't have vendor-addon for GBK, so they're identical.
203 define_alias( qr/^gbk$/i => '"cp936"');
204 # This fixes gb2312 vs. euc-cn confusion, practically
205 define_alias( qr/\bGB[-_ ]?2312(?:\D.*$|$)/i => '"euc-cn"' );
206 # for Encode::JP
207 define_alias( qr/\bjis$/i => '"7bit-jis"' );
208 define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
209 define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
210 define_alias( qr/\bujis$/i => '"euc-jp"' );
211 define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
212 define_alias( qr/\bsjis$/i => '"shiftjis"' );
213 # for Encode::KR
214 define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
215 define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
216 # This fixes ksc5601 vs. euc-kr confusion, practically
217 define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
218 define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
219 define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
220 # for Encode::TW
221 define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
222 define_alias( qr/\bbig5-?et(?:en)$/i => '"big5-eten"' );
223 define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
224 define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
225 define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
226 }
227 # utf8 is blessed :)
228 define_alias( qr/^UTF-8$/i => '"utf8"',);
229 # At last, Map white space and _ to '-'
230 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
231}
232
2331;
234__END__
235
236# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
237# TODO: HP-UX '15' encodings japanese15 korean15 roi15
238# TODO: Cyrillic encoding ISO-IR-111 (useful?)
239# TODO: Armenian encoding ARMSCII-8
240# TODO: Hebrew encoding ISO-8859-8-1
241# TODO: Thai encoding TCVN
242# TODO: Vietnamese encodings VPS
243# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
244# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
245# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
246# Kannada Khmer Korean Laotian Malayalam Mongolian
247# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
248
249=head1 NAME
250
251Encode::Alias - alias definitions to encodings
252
253=head1 SYNOPSIS
254
255 use Encode;
256 use Encode::Alias;
257 define_alias( newName => ENCODING);
258
259=head1 DESCRIPTION
260
261Allows newName to be used as an alias for ENCODING. ENCODING may be
262either the name of an encoding or an encoding object (as described
263in L<Encode>).
264
265Currently I<newName> can be specified in the following ways:
266
267=over 4
268
269=item As a simple string.
270
271=item As a qr// compiled regular expression, e.g.:
272
273 define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
274
275In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
276in order to allow C<$1> etc. to be substituted. The example is one
277way to alias names as used in X11 fonts to the MIME names for the
278iso-8859-* family. Note the double quotes inside the single quotes.
279
280If you are using a regex here, you have to use the quotes as shown or
281it won't work. Also note that regex handling is tricky even for the
282experienced. Use it with caution.
283
284=item As a code reference, e.g.:
285
286 define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
287
288In this case, C<$_> will be set to the name that is being looked up and
289I<ENCODING> is passed to the sub as its first argument. The example
290is another way to alias names as used in X11 fonts to the MIME names
291for the iso-8859-* family.
292
293=back
294
295=head2 Alias overloading
296
297You can override predefined aliases by simply applying define_alias().
298The new alias is always evaluated first, and when neccessary,
299define_alias() flushes the internal cache to make the new definition
300available.
301
302 # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
303 # superset of SHIFT_JIS
304
305 define_alias( qr/shift.*jis$/i => '"cp932"' );
306 define_alias( qr/sjis$/i => '"cp932"' );
307
308If you want to zap all predefined aliases, you can use
309
310 Encode::Alias->undef_aliases;
311
312to do so. And
313
314 Encode::Alias->init_aliases;
315
316gets the factory settings back.
317
318=head1 SEE ALSO
319
320L<Encode>, L<Encode::Supported>
321
322=cut
323