Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | # |
2 | # Locale::Script - ISO codes for script identification (ISO 15924) | |
3 | # | |
4 | # $Id: Script.pm,v 2.2 2002/07/10 16:33:28 neilb Exp $ | |
5 | # | |
6 | ||
7 | package Locale::Script; | |
8 | use strict; | |
9 | require 5.002; | |
10 | ||
11 | require Exporter; | |
12 | use Carp; | |
13 | use Locale::Constants; | |
14 | ||
15 | ||
16 | #----------------------------------------------------------------------- | |
17 | # Public Global Variables | |
18 | #----------------------------------------------------------------------- | |
19 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); | |
20 | $VERSION = sprintf("%d.%02d", q$Revision: 2.2 $ =~ /(\d+)\.(\d+)/); | |
21 | @ISA = qw(Exporter); | |
22 | @EXPORT = qw(code2script script2code | |
23 | all_script_codes all_script_names | |
24 | script_code2code | |
25 | LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); | |
26 | ||
27 | #----------------------------------------------------------------------- | |
28 | # Private Global Variables | |
29 | #----------------------------------------------------------------------- | |
30 | my $CODES = []; | |
31 | my $COUNTRIES = []; | |
32 | ||
33 | ||
34 | #======================================================================= | |
35 | # | |
36 | # code2script ( CODE [, CODESET ] ) | |
37 | # | |
38 | #======================================================================= | |
39 | sub code2script | |
40 | { | |
41 | my $code = shift; | |
42 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; | |
43 | ||
44 | ||
45 | return undef unless defined $code; | |
46 | ||
47 | #------------------------------------------------------------------- | |
48 | # Make sure the code is in the right form before we use it | |
49 | # to look up the corresponding script. | |
50 | # We have to sprintf because the codes are given as 3-digits, | |
51 | # with leading 0's. Eg 070 for Egyptian demotic. | |
52 | #------------------------------------------------------------------- | |
53 | if ($codeset == LOCALE_CODE_NUMERIC) | |
54 | { | |
55 | return undef if ($code =~ /\D/); | |
56 | $code = sprintf("%.3d", $code); | |
57 | } | |
58 | else | |
59 | { | |
60 | $code = lc($code); | |
61 | } | |
62 | ||
63 | if (exists $CODES->[$codeset]->{$code}) | |
64 | { | |
65 | return $CODES->[$codeset]->{$code}; | |
66 | } | |
67 | else | |
68 | { | |
69 | #--------------------------------------------------------------- | |
70 | # no such script code! | |
71 | #--------------------------------------------------------------- | |
72 | return undef; | |
73 | } | |
74 | } | |
75 | ||
76 | ||
77 | #======================================================================= | |
78 | # | |
79 | # script2code ( SCRIPT [, CODESET ] ) | |
80 | # | |
81 | #======================================================================= | |
82 | sub script2code | |
83 | { | |
84 | my $script = shift; | |
85 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; | |
86 | ||
87 | ||
88 | return undef unless defined $script; | |
89 | $script = lc($script); | |
90 | if (exists $COUNTRIES->[$codeset]->{$script}) | |
91 | { | |
92 | return $COUNTRIES->[$codeset]->{$script}; | |
93 | } | |
94 | else | |
95 | { | |
96 | #--------------------------------------------------------------- | |
97 | # no such script! | |
98 | #--------------------------------------------------------------- | |
99 | return undef; | |
100 | } | |
101 | } | |
102 | ||
103 | ||
104 | #======================================================================= | |
105 | # | |
106 | # script_code2code ( CODE, IN-CODESET, OUT-CODESET ) | |
107 | # | |
108 | #======================================================================= | |
109 | sub script_code2code | |
110 | { | |
111 | (@_ == 3) or croak "script_code2code() takes 3 arguments!"; | |
112 | ||
113 | my $code = shift; | |
114 | my $inset = shift; | |
115 | my $outset = shift; | |
116 | my $outcode; | |
117 | my $script; | |
118 | ||
119 | ||
120 | return undef if $inset == $outset; | |
121 | $script = code2script($code, $inset); | |
122 | return undef if not defined $script; | |
123 | $outcode = script2code($script, $outset); | |
124 | return $outcode; | |
125 | } | |
126 | ||
127 | ||
128 | #======================================================================= | |
129 | # | |
130 | # all_script_codes() | |
131 | # | |
132 | #======================================================================= | |
133 | sub all_script_codes | |
134 | { | |
135 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; | |
136 | ||
137 | return keys %{ $CODES->[$codeset] }; | |
138 | } | |
139 | ||
140 | ||
141 | #======================================================================= | |
142 | # | |
143 | # all_script_names() | |
144 | # | |
145 | #======================================================================= | |
146 | sub all_script_names | |
147 | { | |
148 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; | |
149 | ||
150 | return values %{ $CODES->[$codeset] }; | |
151 | } | |
152 | ||
153 | ||
154 | #======================================================================= | |
155 | # | |
156 | # initialisation code - stuff the DATA into the ALPHA2 hash | |
157 | # | |
158 | #======================================================================= | |
159 | { | |
160 | my ($alpha2, $alpha3, $numeric); | |
161 | my $script; | |
162 | ||
163 | ||
164 | while (<DATA>) | |
165 | { | |
166 | next unless /\S/; | |
167 | chop; | |
168 | ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4); | |
169 | ||
170 | $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script; | |
171 | $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2; | |
172 | ||
173 | if ($alpha3) | |
174 | { | |
175 | $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script; | |
176 | $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3; | |
177 | } | |
178 | ||
179 | if ($numeric) | |
180 | { | |
181 | $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script; | |
182 | $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric; | |
183 | } | |
184 | ||
185 | } | |
186 | ||
187 | close(DATA); | |
188 | } | |
189 | ||
190 | 1; | |
191 | ||
192 | __DATA__ | |
193 | am:ama:130:Aramaic | |
194 | ar:ara:160:Arabic | |
195 | av:ave:151:Avestan | |
196 | bh:bhm:300:Brahmi (Ashoka) | |
197 | bi:bid:372:Buhid | |
198 | bn:ben:325:Bengali | |
199 | bo:bod:330:Tibetan | |
200 | bp:bpm:285:Bopomofo | |
201 | br:brl:570:Braille | |
202 | bt:btk:365:Batak | |
203 | bu:bug:367:Buginese (Makassar) | |
204 | by:bys:550:Blissymbols | |
205 | ca:cam:358:Cham | |
206 | ch:chu:221:Old Church Slavonic | |
207 | ci:cir:291:Cirth | |
208 | cm:cmn:402:Cypro-Minoan | |
209 | co:cop:205:Coptic | |
210 | cp:cpr:403:Cypriote syllabary | |
211 | cy:cyr:220:Cyrillic | |
212 | ds:dsr:250:Deserel (Mormon) | |
213 | dv:dvn:315:Devanagari (Nagari) | |
214 | ed:egd:070:Egyptian demotic | |
215 | eg:egy:050:Egyptian hieroglyphs | |
216 | eh:egh:060:Egyptian hieratic | |
217 | el:ell:200:Greek | |
218 | eo:eos:210:Etruscan and Oscan | |
219 | et:eth:430:Ethiopic | |
220 | gl:glg:225:Glagolitic | |
221 | gm:gmu:310:Gurmukhi | |
222 | gt:gth:206:Gothic | |
223 | gu:guj:320:Gujarati | |
224 | ha:han:500:Han ideographs | |
225 | he:heb:125:Hebrew | |
226 | hg:hgl:420:Hangul | |
227 | hm:hmo:450:Pahawh Hmong | |
228 | ho:hoo:371:Hanunoo | |
229 | hr:hrg:410:Hiragana | |
230 | hu:hun:176:Old Hungarian runic | |
231 | hv:hvn:175:Kok Turki runic | |
232 | hy:hye:230:Armenian | |
233 | iv:ivl:610:Indus Valley | |
234 | ja:jap:930:(alias for Han + Hiragana + Katakana) | |
235 | jl:jlg:445:Cherokee syllabary | |
236 | jw:jwi:360:Javanese | |
237 | ka:kam:241:Georgian (Mxedruli) | |
238 | kh:khn:931:(alias for Hangul + Han) | |
239 | kk:kkn:411:Katakana | |
240 | km:khm:354:Khmer | |
241 | kn:kan:345:Kannada | |
242 | kr:krn:357:Karenni (Kayah Li) | |
243 | ks:kst:305:Kharoshthi | |
244 | kx:kax:240:Georgian (Xucuri) | |
245 | la:lat:217:Latin | |
246 | lf:laf:215:Latin (Fraktur variant) | |
247 | lg:lag:216:Latin (Gaelic variant) | |
248 | lo:lao:356:Lao | |
249 | lp:lpc:335:Lepcha (Rong) | |
250 | md:mda:140:Mandaean | |
251 | me:mer:100:Meroitic | |
252 | mh:may:090:Mayan hieroglyphs | |
253 | ml:mlm:347:Malayalam | |
254 | mn:mon:145:Mongolian | |
255 | my:mya:350:Burmese | |
256 | na:naa:400:Linear A | |
257 | nb:nbb:401:Linear B | |
258 | og:ogm:212:Ogham | |
259 | or:ory:327:Oriya | |
260 | os:osm:260:Osmanya | |
261 | ph:phx:115:Phoenician | |
262 | ph:pah:150:Pahlavi | |
263 | pl:pld:282:Pollard Phonetic | |
264 | pq:pqd:295:Klingon plQaD | |
265 | pr:prm:227:Old Permic | |
266 | ps:pst:600:Phaistos Disk | |
267 | rn:rnr:211:Runic (Germanic) | |
268 | rr:rro:620:Rongo-rongo | |
269 | sa:sar:110:South Arabian | |
270 | si:sin:348:Sinhala | |
271 | sj:syj:137:Syriac (Jacobite variant) | |
272 | sl:slb:440:Unified Canadian Aboriginal Syllabics | |
273 | sn:syn:136:Syriac (Nestorian variant) | |
274 | sw:sww:281:Shavian (Shaw) | |
275 | sy:syr:135:Syriac (Estrangelo) | |
276 | ta:tam:346:Tamil | |
277 | tb:tbw:373:Tagbanwa | |
278 | te:tel:340:Telugu | |
279 | tf:tfn:120:Tifnagh | |
280 | tg:tag:370:Tagalog | |
281 | th:tha:352:Thai | |
282 | tn:tna:170:Thaana | |
283 | tw:twr:290:Tengwar | |
284 | va:vai:470:Vai | |
285 | vs:vsp:280:Visible Speech | |
286 | xa:xas:000:Cuneiform, Sumero-Akkadian | |
287 | xf:xfa:105:Cuneiform, Old Persian | |
288 | xk:xkn:412:(alias for Hiragana + Katakana) | |
289 | xu:xug:106:Cuneiform, Ugaritic | |
290 | yi:yii:460:Yi | |
291 | zx:zxx:997:Unwritten language | |
292 | zy:zyy:998:Undetermined script | |
293 | zz:zzz:999:Uncoded script |