| 1 | # |
| 2 | # Locale::Script - ISO codes for script identification (ISO 15924) |
| 3 | # |
| 4 | # $Id: Script.pm,v 2.7 2004/06/10 21:19:34 neilb Exp $ |
| 5 | # |
| 6 | |
| 7 | package Locale::Script; |
| 8 | use strict; |
| 9 | require 5.002; |
| 10 | |
| 11 | require Exporter; |
| 12 | use Carp; |
| 13 | use Locale::Constants; |
| 14 | |
| 15 | |
| 16 | #----------------------------------------------------------------------- |
| 17 | # Public Global Variables |
| 18 | #----------------------------------------------------------------------- |
| 19 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); |
| 20 | $VERSION = sprintf("%d.%02d", q$Revision: 2.7 $ =~ /(\d+)\.(\d+)/); |
| 21 | @ISA = qw(Exporter); |
| 22 | @EXPORT = qw(code2script script2code |
| 23 | all_script_codes all_script_names |
| 24 | script_code2code |
| 25 | LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); |
| 26 | |
| 27 | #----------------------------------------------------------------------- |
| 28 | # Private Global Variables |
| 29 | #----------------------------------------------------------------------- |
| 30 | my $CODES = []; |
| 31 | my $COUNTRIES = []; |
| 32 | |
| 33 | |
| 34 | #======================================================================= |
| 35 | # |
| 36 | # code2script ( CODE [, CODESET ] ) |
| 37 | # |
| 38 | #======================================================================= |
| 39 | sub code2script |
| 40 | { |
| 41 | my $code = shift; |
| 42 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| 43 | |
| 44 | |
| 45 | return undef unless defined $code; |
| 46 | |
| 47 | #------------------------------------------------------------------- |
| 48 | # Make sure the code is in the right form before we use it |
| 49 | # to look up the corresponding script. |
| 50 | # We have to sprintf because the codes are given as 3-digits, |
| 51 | # with leading 0's. Eg 070 for Egyptian demotic. |
| 52 | #------------------------------------------------------------------- |
| 53 | if ($codeset == LOCALE_CODE_NUMERIC) |
| 54 | { |
| 55 | return undef if ($code =~ /\D/); |
| 56 | $code = sprintf("%.3d", $code); |
| 57 | } |
| 58 | else |
| 59 | { |
| 60 | $code = lc($code); |
| 61 | } |
| 62 | |
| 63 | if (exists $CODES->[$codeset]->{$code}) |
| 64 | { |
| 65 | return $CODES->[$codeset]->{$code}; |
| 66 | } |
| 67 | else |
| 68 | { |
| 69 | #--------------------------------------------------------------- |
| 70 | # no such script code! |
| 71 | #--------------------------------------------------------------- |
| 72 | return undef; |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | |
| 77 | #======================================================================= |
| 78 | # |
| 79 | # script2code ( SCRIPT [, CODESET ] ) |
| 80 | # |
| 81 | #======================================================================= |
| 82 | sub script2code |
| 83 | { |
| 84 | my $script = shift; |
| 85 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| 86 | |
| 87 | |
| 88 | return undef unless defined $script; |
| 89 | $script = lc($script); |
| 90 | if (exists $COUNTRIES->[$codeset]->{$script}) |
| 91 | { |
| 92 | return $COUNTRIES->[$codeset]->{$script}; |
| 93 | } |
| 94 | else |
| 95 | { |
| 96 | #--------------------------------------------------------------- |
| 97 | # no such script! |
| 98 | #--------------------------------------------------------------- |
| 99 | return undef; |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | |
| 104 | #======================================================================= |
| 105 | # |
| 106 | # script_code2code ( CODE, IN-CODESET, OUT-CODESET ) |
| 107 | # |
| 108 | #======================================================================= |
| 109 | sub script_code2code |
| 110 | { |
| 111 | (@_ == 3) or croak "script_code2code() takes 3 arguments!"; |
| 112 | |
| 113 | my $code = shift; |
| 114 | my $inset = shift; |
| 115 | my $outset = shift; |
| 116 | my $outcode; |
| 117 | my $script; |
| 118 | |
| 119 | |
| 120 | return undef if $inset == $outset; |
| 121 | $script = code2script($code, $inset); |
| 122 | return undef if not defined $script; |
| 123 | $outcode = script2code($script, $outset); |
| 124 | return $outcode; |
| 125 | } |
| 126 | |
| 127 | |
| 128 | #======================================================================= |
| 129 | # |
| 130 | # all_script_codes() |
| 131 | # |
| 132 | #======================================================================= |
| 133 | sub all_script_codes |
| 134 | { |
| 135 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| 136 | |
| 137 | return keys %{ $CODES->[$codeset] }; |
| 138 | } |
| 139 | |
| 140 | |
| 141 | #======================================================================= |
| 142 | # |
| 143 | # all_script_names() |
| 144 | # |
| 145 | #======================================================================= |
| 146 | sub all_script_names |
| 147 | { |
| 148 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
| 149 | |
| 150 | return values %{ $CODES->[$codeset] }; |
| 151 | } |
| 152 | |
| 153 | |
| 154 | #======================================================================= |
| 155 | # |
| 156 | # initialisation code - stuff the DATA into the ALPHA2 hash |
| 157 | # |
| 158 | #======================================================================= |
| 159 | { |
| 160 | my ($alpha2, $alpha3, $numeric); |
| 161 | my $script; |
| 162 | local $_; |
| 163 | |
| 164 | |
| 165 | while (<DATA>) |
| 166 | { |
| 167 | next unless /\S/; |
| 168 | chop; |
| 169 | ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4); |
| 170 | |
| 171 | $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script; |
| 172 | $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2; |
| 173 | |
| 174 | if ($alpha3) |
| 175 | { |
| 176 | $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script; |
| 177 | $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3; |
| 178 | } |
| 179 | |
| 180 | if ($numeric) |
| 181 | { |
| 182 | $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script; |
| 183 | $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric; |
| 184 | } |
| 185 | |
| 186 | } |
| 187 | |
| 188 | close(DATA); |
| 189 | } |
| 190 | |
| 191 | 1; |
| 192 | |
| 193 | __DATA__ |
| 194 | am:ama:130:Aramaic |
| 195 | ar:ara:160:Arabic |
| 196 | av:ave:151:Avestan |
| 197 | bh:bhm:300:Brahmi (Ashoka) |
| 198 | bi:bid:372:Buhid |
| 199 | bn:ben:325:Bengali |
| 200 | bo:bod:330:Tibetan |
| 201 | bp:bpm:285:Bopomofo |
| 202 | br:brl:570:Braille |
| 203 | bt:btk:365:Batak |
| 204 | bu:bug:367:Buginese (Makassar) |
| 205 | by:bys:550:Blissymbols |
| 206 | ca:cam:358:Cham |
| 207 | ch:chu:221:Old Church Slavonic |
| 208 | ci:cir:291:Cirth |
| 209 | cm:cmn:402:Cypro-Minoan |
| 210 | co:cop:205:Coptic |
| 211 | cp:cpr:403:Cypriote syllabary |
| 212 | cy:cyr:220:Cyrillic |
| 213 | ds:dsr:250:Deserel (Mormon) |
| 214 | dv:dvn:315:Devanagari (Nagari) |
| 215 | ed:egd:070:Egyptian demotic |
| 216 | eg:egy:050:Egyptian hieroglyphs |
| 217 | eh:egh:060:Egyptian hieratic |
| 218 | el:ell:200:Greek |
| 219 | eo:eos:210:Etruscan and Oscan |
| 220 | et:eth:430:Ethiopic |
| 221 | gl:glg:225:Glagolitic |
| 222 | gm:gmu:310:Gurmukhi |
| 223 | gt:gth:206:Gothic |
| 224 | gu:guj:320:Gujarati |
| 225 | ha:han:500:Han ideographs |
| 226 | he:heb:125:Hebrew |
| 227 | hg:hgl:420:Hangul |
| 228 | hm:hmo:450:Pahawh Hmong |
| 229 | ho:hoo:371:Hanunoo |
| 230 | hr:hrg:410:Hiragana |
| 231 | hu:hun:176:Old Hungarian runic |
| 232 | hv:hvn:175:Kok Turki runic |
| 233 | hy:hye:230:Armenian |
| 234 | iv:ivl:610:Indus Valley |
| 235 | ja:jap:930:(alias for Han + Hiragana + Katakana) |
| 236 | jl:jlg:445:Cherokee syllabary |
| 237 | jw:jwi:360:Javanese |
| 238 | ka:kam:241:Georgian (Mxedruli) |
| 239 | kh:khn:931:(alias for Hangul + Han) |
| 240 | kk:kkn:411:Katakana |
| 241 | km:khm:354:Khmer |
| 242 | kn:kan:345:Kannada |
| 243 | kr:krn:357:Karenni (Kayah Li) |
| 244 | ks:kst:305:Kharoshthi |
| 245 | kx:kax:240:Georgian (Xucuri) |
| 246 | la:lat:217:Latin |
| 247 | lf:laf:215:Latin (Fraktur variant) |
| 248 | lg:lag:216:Latin (Gaelic variant) |
| 249 | lo:lao:356:Lao |
| 250 | lp:lpc:335:Lepcha (Rong) |
| 251 | md:mda:140:Mandaean |
| 252 | me:mer:100:Meroitic |
| 253 | mh:may:090:Mayan hieroglyphs |
| 254 | ml:mlm:347:Malayalam |
| 255 | mn:mon:145:Mongolian |
| 256 | my:mya:350:Burmese |
| 257 | na:naa:400:Linear A |
| 258 | nb:nbb:401:Linear B |
| 259 | og:ogm:212:Ogham |
| 260 | or:ory:327:Oriya |
| 261 | os:osm:260:Osmanya |
| 262 | ph:phx:115:Phoenician |
| 263 | ph:pah:150:Pahlavi |
| 264 | pl:pld:282:Pollard Phonetic |
| 265 | pq:pqd:295:Klingon plQaD |
| 266 | pr:prm:227:Old Permic |
| 267 | ps:pst:600:Phaistos Disk |
| 268 | rn:rnr:211:Runic (Germanic) |
| 269 | rr:rro:620:Rongo-rongo |
| 270 | sa:sar:110:South Arabian |
| 271 | si:sin:348:Sinhala |
| 272 | sj:syj:137:Syriac (Jacobite variant) |
| 273 | sl:slb:440:Unified Canadian Aboriginal Syllabics |
| 274 | sn:syn:136:Syriac (Nestorian variant) |
| 275 | sw:sww:281:Shavian (Shaw) |
| 276 | sy:syr:135:Syriac (Estrangelo) |
| 277 | ta:tam:346:Tamil |
| 278 | tb:tbw:373:Tagbanwa |
| 279 | te:tel:340:Telugu |
| 280 | tf:tfn:120:Tifnagh |
| 281 | tg:tag:370:Tagalog |
| 282 | th:tha:352:Thai |
| 283 | tn:tna:170:Thaana |
| 284 | tw:twr:290:Tengwar |
| 285 | va:vai:470:Vai |
| 286 | vs:vsp:280:Visible Speech |
| 287 | xa:xas:000:Cuneiform, Sumero-Akkadian |
| 288 | xf:xfa:105:Cuneiform, Old Persian |
| 289 | xk:xkn:412:(alias for Hiragana + Katakana) |
| 290 | xu:xug:106:Cuneiform, Ugaritic |
| 291 | yi:yii:460:Yi |
| 292 | zx:zxx:997:Unwritten language |
| 293 | zy:zyy:998:Undetermined script |
| 294 | zz:zzz:999:Uncoded script |