Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | |
2 | require 5; | |
3 | package I18N::LangTags::List; | |
4 | # Time-stamp: "2004-10-06 23:26:21 ADT" | |
5 | use strict; | |
6 | use vars qw(%Name %Is_Disrec $Debug $VERSION); | |
7 | $VERSION = '0.35'; | |
8 | # POD at the end. | |
9 | ||
10 | #---------------------------------------------------------------------- | |
11 | { | |
12 | # read the table out of our own POD! | |
13 | my $seeking = 1; | |
14 | my $count = 0; | |
15 | my($disrec,$tag,$name); | |
16 | my $last_name = ''; | |
17 | while(<I18N::LangTags::List::DATA>) { | |
18 | if($seeking) { | |
19 | $seeking = 0 if m/=for woohah/; | |
20 | } elsif( ($disrec, $tag, $name) = | |
21 | m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ | |
22 | ) { | |
23 | $name =~ s/\s*[;\.]*\s*$//g; | |
24 | next unless $name; | |
25 | ++$count; | |
26 | print "<$tag> <$name>\n" if $Debug; | |
27 | $last_name = $Name{$tag} = $name; | |
28 | $Is_Disrec{$tag} = 1 if $disrec; | |
29 | } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) { | |
30 | $Name{$1} = "$last_name (old tag)" if $last_name; | |
31 | $Is_Disrec{$1} = 1; | |
32 | } | |
33 | } | |
34 | die "No tags read??" unless $count; | |
35 | } | |
36 | #---------------------------------------------------------------------- | |
37 | ||
38 | sub name { | |
39 | my $tag = lc($_[0] || return); | |
40 | $tag =~ s/^\s+//s; | |
41 | $tag =~ s/\s+$//s; | |
42 | ||
43 | my $alt; | |
44 | if($tag =~ m/^x-(.+)/) { | |
45 | $alt = "i-$1"; | |
46 | } elsif($tag =~ m/^i-(.+)/) { | |
47 | $alt = "x-$1"; | |
48 | } else { | |
49 | $alt = ''; | |
50 | } | |
51 | ||
52 | my $subform = ''; | |
53 | my $name = ''; | |
54 | print "Input: {$tag}\n" if $Debug; | |
55 | while(length $tag) { | |
56 | last if $name = $Name{$tag}; | |
57 | last if $name = $Name{$alt}; | |
58 | if($tag =~ s/(-[a-z0-9]+)$//s) { | |
59 | print "Shaving off: $1 leaving $tag\n" if $Debug; | |
60 | $subform = "$1$subform"; | |
61 | # and loop around again | |
62 | ||
63 | $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n"; | |
64 | } else { | |
65 | # we're trying to pull a subform off a primary tag. TILT! | |
66 | print "Aborting on: {$name}{$subform}\n" if $Debug; | |
67 | last; | |
68 | } | |
69 | } | |
70 | print "Output: {$name}{$subform}\n" if $Debug; | |
71 | ||
72 | return unless $name; # Failure | |
73 | return $name unless $subform; # Exact match | |
74 | $subform =~ s/^-//s; | |
75 | $subform =~ s/-$//s; | |
76 | return "$name (Subform \"$subform\")"; | |
77 | } | |
78 | ||
79 | #-------------------------------------------------------------------------- | |
80 | ||
81 | sub is_decent { | |
82 | my $tag = lc($_[0] || return 0); | |
83 | #require I18N::LangTags; | |
84 | ||
85 | return 0 unless | |
86 | $tag =~ | |
87 | /^(?: # First subtag | |
88 | [xi] | [a-z]{2,3} | |
89 | ) | |
90 | (?: # Subtags thereafter | |
91 | - # separator | |
92 | [a-z0-9]{1,8} # subtag | |
93 | )* | |
94 | $/xs; | |
95 | ||
96 | my @supers = (); | |
97 | foreach my $bit (split('-', $tag)) { | |
98 | push @supers, | |
99 | scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; | |
100 | } | |
101 | return 0 unless @supers; | |
102 | shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s; | |
103 | return 0 unless @supers; | |
104 | ||
105 | foreach my $f ($tag, @supers) { | |
106 | return 0 if $Is_Disrec{$f}; | |
107 | return 2 if $Name{$f}; | |
108 | # so that decent subforms of indecent tags are decent | |
109 | } | |
110 | return 2 if $Name{$tag}; # not only is it decent, it's known! | |
111 | return 1; | |
112 | } | |
113 | ||
114 | #-------------------------------------------------------------------------- | |
115 | 1; | |
116 | ||
117 | __DATA__ | |
118 | ||
119 | =head1 NAME | |
120 | ||
121 | I18N::LangTags::List -- tags and names for human languages | |
122 | ||
123 | =head1 SYNOPSIS | |
124 | ||
125 | use I18N::LangTags::List; | |
126 | print "Parlez-vous... ", join(', ', | |
127 | I18N::LangTags::List::name('elx') || 'unknown_language', | |
128 | I18N::LangTags::List::name('ar-Kw') || 'unknown_language', | |
129 | I18N::LangTags::List::name('en') || 'unknown_language', | |
130 | I18N::LangTags::List::name('en-CA') || 'unknown_language', | |
131 | ), "?\n"; | |
132 | ||
133 | prints: | |
134 | ||
135 | Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English? | |
136 | ||
137 | =head1 DESCRIPTION | |
138 | ||
139 | This module provides a function | |
140 | C<I18N::LangTags::List::name( I<langtag> ) > that takes | |
141 | a language tag (see L<I18N::LangTags|I18N::LangTags>) | |
142 | and returns the best attempt at an English name for it, or | |
143 | undef if it can't make sense of the tag. | |
144 | ||
145 | The function I18N::LangTags::List::name(...) is not exported. | |
146 | ||
147 | This module also provides a function | |
148 | C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff | |
149 | the language tag is syntactically valid and is for general use (like | |
150 | "fr" or "fr-ca", below). That is, it returns false for tags that are | |
151 | syntactically invalid and for tags, like "aus", that are listed in | |
152 | brackets below. This function is not exported. | |
153 | ||
154 | The map of tags-to-names that it uses is accessable as | |
155 | %I18N::LangTags::List::Name, and it's the same as the list | |
156 | that follows in this documentation, which should be useful | |
157 | to you even if you don't use this module. | |
158 | ||
159 | =head1 ABOUT LANGUAGE TAGS | |
160 | ||
161 | Internet language tags, as defined in RFC 3066, are a formalism | |
162 | for denoting human languages. The two-letter ISO 639-1 language | |
163 | codes are well known (as "en" for English), as are their forms | |
164 | when qualified by a country code ("en-US"). Less well-known are the | |
165 | arbitrary-length non-ISO codes (like "i-mingo"), and the | |
166 | recently (in 2001) introduced three-letter ISO-639-2 codes. | |
167 | ||
168 | Remember these important facts: | |
169 | ||
170 | =over | |
171 | ||
172 | =item * | |
173 | ||
174 | Language tags are not locale IDs. A locale ID is written with a "_" | |
175 | instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and | |
176 | I<means> something different than a language tag. A language tag | |
177 | denotes a language. A locale ID denotes a language I<as used in> | |
178 | a particular place, in combination with non-linguistic | |
179 | location-specific information such as what currency is used | |
180 | there. Locales I<also> often denote character set information, | |
181 | as in "en_US.ISO8859-1". | |
182 | ||
183 | =item * | |
184 | ||
185 | Language tags are not for computer languages. | |
186 | ||
187 | =item * | |
188 | ||
189 | "Dialect" is not a useful term, since there is no objective | |
190 | criterion for establishing when two language-forms are | |
191 | dialects of eachother, or are separate languages. | |
192 | ||
193 | =item * | |
194 | ||
195 | Language tags are not case-sensitive. en-US, en-us, En-Us, etc., | |
196 | are all the same tag, and denote the same language. | |
197 | ||
198 | =item * | |
199 | ||
200 | Not every language tag really refers to a single language. Some | |
201 | language tags refer to conditions: i-default (system-message text | |
202 | in English plus maybe other languages), und (undetermined | |
203 | language). Others (notably lots of the three-letter codes) are | |
204 | bibliographic tags that classify whole groups of languages, as | |
205 | with cus "Cushitic (Other)" (i.e., a | |
206 | language that has been classed as Cushtic, but which has no more | |
207 | specific code) or the even less linguistically coherent | |
208 | sai for "South American Indian (Other)". Though useful in | |
209 | bibliography, B<SUCH TAGS ARE NOT | |
210 | FOR GENERAL USE>. For further guidance, email me. | |
211 | ||
212 | =item * | |
213 | ||
214 | Language tags are not country codes. In fact, they are often | |
215 | distinct codes, as with language tag ja for Japanese, and | |
216 | ISO 3166 country code C<.jp> for Japan. | |
217 | ||
218 | =back | |
219 | ||
220 | =head1 LIST OF LANGUAGES | |
221 | ||
222 | The first part of each item is the language tag, between | |
223 | {...}. It | |
224 | is followed by an English name for the language or language-group. | |
225 | Language tags that I judge to be not for general use, are bracketed. | |
226 | ||
227 | This list is in alphabetical order by English name of the language. | |
228 | ||
229 | =for reminder | |
230 | The name in the =item line MUST NOT have E<...>'s in it!! | |
231 | ||
232 | =for woohah START | |
233 | ||
234 | =over | |
235 | ||
236 | =item {ab} : Abkhazian | |
237 | ||
238 | eq Abkhaz | |
239 | ||
240 | =item {ace} : Achinese | |
241 | ||
242 | =item {ach} : Acoli | |
243 | ||
244 | =item {ada} : Adangme | |
245 | ||
246 | =item {ady} : Adyghe | |
247 | ||
248 | eq Adygei | |
249 | ||
250 | =item {aa} : Afar | |
251 | ||
252 | =item {afh} : Afrihili | |
253 | ||
254 | (Artificial) | |
255 | ||
256 | =item {af} : Afrikaans | |
257 | ||
258 | =item [{afa} : Afro-Asiatic (Other)] | |
259 | ||
260 | =item {ak} : Akan | |
261 | ||
262 | (Formerly "aka".) | |
263 | ||
264 | =item {akk} : Akkadian | |
265 | ||
266 | (Historical) | |
267 | ||
268 | =item {sq} : Albanian | |
269 | ||
270 | =item {ale} : Aleut | |
271 | ||
272 | =item [{alg} : Algonquian languages] | |
273 | ||
274 | NOT Algonquin! | |
275 | ||
276 | =item [{tut} : Altaic (Other)] | |
277 | ||
278 | =item {am} : Amharic | |
279 | ||
280 | NOT Aramaic! | |
281 | ||
282 | =item {i-ami} : Ami | |
283 | ||
284 | eq Amis. eq 'Amis. eq Pangca. | |
285 | ||
286 | =item [{apa} : Apache languages] | |
287 | ||
288 | =item {ar} : Arabic | |
289 | ||
290 | Many forms are mutually un-intelligible in spoken media. | |
291 | Notable forms: | |
292 | {ar-ae} UAE Arabic; | |
293 | {ar-bh} Bahrain Arabic; | |
294 | {ar-dz} Algerian Arabic; | |
295 | {ar-eg} Egyptian Arabic; | |
296 | {ar-iq} Iraqi Arabic; | |
297 | {ar-jo} Jordanian Arabic; | |
298 | {ar-kw} Kuwait Arabic; | |
299 | {ar-lb} Lebanese Arabic; | |
300 | {ar-ly} Libyan Arabic; | |
301 | {ar-ma} Moroccan Arabic; | |
302 | {ar-om} Omani Arabic; | |
303 | {ar-qa} Qatari Arabic; | |
304 | {ar-sa} Sauda Arabic; | |
305 | {ar-sy} Syrian Arabic; | |
306 | {ar-tn} Tunisian Arabic; | |
307 | {ar-ye} Yemen Arabic. | |
308 | ||
309 | =item {arc} : Aramaic | |
310 | ||
311 | NOT Amharic! NOT Samaritan Aramaic! | |
312 | ||
313 | =item {arp} : Arapaho | |
314 | ||
315 | =item {arn} : Araucanian | |
316 | ||
317 | =item {arw} : Arawak | |
318 | ||
319 | =item {hy} : Armenian | |
320 | ||
321 | =item {an} : Aragonese | |
322 | ||
323 | =item [{art} : Artificial (Other)] | |
324 | ||
325 | =item {ast} : Asturian | |
326 | ||
327 | eq Bable. | |
328 | ||
329 | =item {as} : Assamese | |
330 | ||
331 | =item [{ath} : Athapascan languages] | |
332 | ||
333 | eq Athabaskan. eq Athapaskan. eq Athabascan. | |
334 | ||
335 | =item [{aus} : Australian languages] | |
336 | ||
337 | =item [{map} : Austronesian (Other)] | |
338 | ||
339 | =item {av} : Avaric | |
340 | ||
341 | (Formerly "ava".) | |
342 | ||
343 | =item {ae} : Avestan | |
344 | ||
345 | eq Zend | |
346 | ||
347 | =item {awa} : Awadhi | |
348 | ||
349 | =item {ay} : Aymara | |
350 | ||
351 | =item {az} : Azerbaijani | |
352 | ||
353 | eq Azeri | |
354 | ||
355 | Notable forms: | |
356 | {az-Arab} Azerbaijani in Arabic script; | |
357 | {az-Cyrl} Azerbaijani in Cyrillic script; | |
358 | {az-Latn} Azerbaijani in Latin script. | |
359 | ||
360 | =item {ban} : Balinese | |
361 | ||
362 | =item [{bat} : Baltic (Other)] | |
363 | ||
364 | =item {bal} : Baluchi | |
365 | ||
366 | =item {bm} : Bambara | |
367 | ||
368 | (Formerly "bam".) | |
369 | ||
370 | =item [{bai} : Bamileke languages] | |
371 | ||
372 | =item {bad} : Banda | |
373 | ||
374 | =item [{bnt} : Bantu (Other)] | |
375 | ||
376 | =item {bas} : Basa | |
377 | ||
378 | =item {ba} : Bashkir | |
379 | ||
380 | =item {eu} : Basque | |
381 | ||
382 | =item {btk} : Batak (Indonesia) | |
383 | ||
384 | =item {bej} : Beja | |
385 | ||
386 | =item {be} : Belarusian | |
387 | ||
388 | eq Belarussian. eq Byelarussian. | |
389 | eq Belorussian. eq Byelorussian. | |
390 | eq White Russian. eq White Ruthenian. | |
391 | NOT Ruthenian! | |
392 | ||
393 | =item {bem} : Bemba | |
394 | ||
395 | =item {bn} : Bengali | |
396 | ||
397 | eq Bangla. | |
398 | ||
399 | =item [{ber} : Berber (Other)] | |
400 | ||
401 | =item {bho} : Bhojpuri | |
402 | ||
403 | =item {bh} : Bihari | |
404 | ||
405 | =item {bik} : Bikol | |
406 | ||
407 | =item {bin} : Bini | |
408 | ||
409 | =item {bi} : Bislama | |
410 | ||
411 | eq Bichelamar. | |
412 | ||
413 | =item {bs} : Bosnian | |
414 | ||
415 | =item {bra} : Braj | |
416 | ||
417 | =item {br} : Breton | |
418 | ||
419 | =item {bug} : Buginese | |
420 | ||
421 | =item {bg} : Bulgarian | |
422 | ||
423 | =item {i-bnn} : Bunun | |
424 | ||
425 | =item {bua} : Buriat | |
426 | ||
427 | =item {my} : Burmese | |
428 | ||
429 | =item {cad} : Caddo | |
430 | ||
431 | =item {car} : Carib | |
432 | ||
433 | =item {ca} : Catalan | |
434 | ||
435 | eq CatalE<aacute>n. eq Catalonian. | |
436 | ||
437 | =item [{cau} : Caucasian (Other)] | |
438 | ||
439 | =item {ceb} : Cebuano | |
440 | ||
441 | =item [{cel} : Celtic (Other)] | |
442 | ||
443 | Notable forms: | |
444 | {cel-gaulish} Gaulish (Historical) | |
445 | ||
446 | =item [{cai} : Central American Indian (Other)] | |
447 | ||
448 | =item {chg} : Chagatai | |
449 | ||
450 | (Historical?) | |
451 | ||
452 | =item [{cmc} : Chamic languages] | |
453 | ||
454 | =item {ch} : Chamorro | |
455 | ||
456 | =item {ce} : Chechen | |
457 | ||
458 | =item {chr} : Cherokee | |
459 | ||
460 | eq Tsalagi | |
461 | ||
462 | =item {chy} : Cheyenne | |
463 | ||
464 | =item {chb} : Chibcha | |
465 | ||
466 | (Historical) NOT Chibchan (which is a language family). | |
467 | ||
468 | =item {ny} : Chichewa | |
469 | ||
470 | eq Nyanja. eq Chinyanja. | |
471 | ||
472 | =item {zh} : Chinese | |
473 | ||
474 | Many forms are mutually un-intelligible in spoken media. | |
475 | Notable forms: | |
476 | {zh-Hans} Chinese, in simplified script; | |
477 | {zh-Hant} Chinese, in traditional script; | |
478 | {zh-tw} Taiwan Chinese; | |
479 | {zh-cn} PRC Chinese; | |
480 | {zh-sg} Singapore Chinese; | |
481 | {zh-mo} Macau Chinese; | |
482 | {zh-hk} Hong Kong Chinese; | |
483 | {zh-guoyu} Mandarin [Putonghua/Guoyu]; | |
484 | {zh-hakka} Hakka [formerly "i-hakka"]; | |
485 | {zh-min} Hokkien; | |
486 | {zh-min-nan} Southern Hokkien; | |
487 | {zh-wuu} Shanghaiese; | |
488 | {zh-xiang} Hunanese; | |
489 | {zh-gan} Gan; | |
490 | {zh-yue} Cantonese. | |
491 | ||
492 | =for etc | |
493 | {i-hakka} Hakka (old tag) | |
494 | ||
495 | =item {chn} : Chinook Jargon | |
496 | ||
497 | eq Chinook Wawa. | |
498 | ||
499 | =item {chp} : Chipewyan | |
500 | ||
501 | =item {cho} : Choctaw | |
502 | ||
503 | =item {cu} : Church Slavic | |
504 | ||
505 | eq Old Church Slavonic. | |
506 | ||
507 | =item {chk} : Chuukese | |
508 | ||
509 | eq Trukese. eq Chuuk. eq Truk. eq Ruk. | |
510 | ||
511 | =item {cv} : Chuvash | |
512 | ||
513 | =item {cop} : Coptic | |
514 | ||
515 | =item {kw} : Cornish | |
516 | ||
517 | =item {co} : Corsican | |
518 | ||
519 | eq Corse. | |
520 | ||
521 | =item {cr} : Cree | |
522 | ||
523 | NOT Creek! (Formerly "cre".) | |
524 | ||
525 | =item {mus} : Creek | |
526 | ||
527 | NOT Cree! | |
528 | ||
529 | =item [{cpe} : English-based Creoles and pidgins (Other)] | |
530 | ||
531 | =item [{cpf} : French-based Creoles and pidgins (Other)] | |
532 | ||
533 | =item [{cpp} : Portuguese-based Creoles and pidgins (Other)] | |
534 | ||
535 | =item [{crp} : Creoles and pidgins (Other)] | |
536 | ||
537 | =item {hr} : Croatian | |
538 | ||
539 | eq Croat. | |
540 | ||
541 | =item [{cus} : Cushitic (Other)] | |
542 | ||
543 | =item {cs} : Czech | |
544 | ||
545 | =item {dak} : Dakota | |
546 | ||
547 | eq Nakota. eq Latoka. | |
548 | ||
549 | =item {da} : Danish | |
550 | ||
551 | =item {dar} : Dargwa | |
552 | ||
553 | =item {day} : Dayak | |
554 | ||
555 | =item {i-default} : Default (Fallthru) Language | |
556 | ||
557 | Defined in RFC 2277, this is for tagging text | |
558 | (which must include English text, and might/should include text | |
559 | in other appropriate languages) that is emitted in a context | |
560 | where language-negotiation wasn't possible -- in SMTP mail failure | |
561 | messages, for example. | |
562 | ||
563 | =item {del} : Delaware | |
564 | ||
565 | =item {din} : Dinka | |
566 | ||
567 | =item {dv} : Divehi | |
568 | ||
569 | eq Maldivian. (Formerly "div".) | |
570 | ||
571 | =item {doi} : Dogri | |
572 | ||
573 | NOT Dogrib! | |
574 | ||
575 | =item {dgr} : Dogrib | |
576 | ||
577 | NOT Dogri! | |
578 | ||
579 | =item [{dra} : Dravidian (Other)] | |
580 | ||
581 | =item {dua} : Duala | |
582 | ||
583 | =item {nl} : Dutch | |
584 | ||
585 | eq Netherlander. Notable forms: | |
586 | {nl-nl} Netherlands Dutch; | |
587 | {nl-be} Belgian Dutch. | |
588 | ||
589 | =item {dum} : Middle Dutch (ca.1050-1350) | |
590 | ||
591 | (Historical) | |
592 | ||
593 | =item {dyu} : Dyula | |
594 | ||
595 | =item {dz} : Dzongkha | |
596 | ||
597 | =item {efi} : Efik | |
598 | ||
599 | =item {egy} : Ancient Egyptian | |
600 | ||
601 | (Historical) | |
602 | ||
603 | =item {eka} : Ekajuk | |
604 | ||
605 | =item {elx} : Elamite | |
606 | ||
607 | (Historical) | |
608 | ||
609 | =item {en} : English | |
610 | ||
611 | Notable forms: | |
612 | {en-au} Australian English; | |
613 | {en-bz} Belize English; | |
614 | {en-ca} Canadian English; | |
615 | {en-gb} UK English; | |
616 | {en-ie} Irish English; | |
617 | {en-jm} Jamaican English; | |
618 | {en-nz} New Zealand English; | |
619 | {en-ph} Philippine English; | |
620 | {en-tt} Trinidad English; | |
621 | {en-us} US English; | |
622 | {en-za} South African English; | |
623 | {en-zw} Zimbabwe English. | |
624 | ||
625 | =item {enm} : Old English (1100-1500) | |
626 | ||
627 | (Historical) | |
628 | ||
629 | =item {ang} : Old English (ca.450-1100) | |
630 | ||
631 | eq Anglo-Saxon. (Historical) | |
632 | ||
633 | =item {i-enochian} : Enochian (Artificial) | |
634 | ||
635 | =item {myv} : Erzya | |
636 | ||
637 | =item {eo} : Esperanto | |
638 | ||
639 | (Artificial) | |
640 | ||
641 | =item {et} : Estonian | |
642 | ||
643 | =item {ee} : Ewe | |
644 | ||
645 | (Formerly "ewe".) | |
646 | ||
647 | =item {ewo} : Ewondo | |
648 | ||
649 | =item {fan} : Fang | |
650 | ||
651 | =item {fat} : Fanti | |
652 | ||
653 | =item {fo} : Faroese | |
654 | ||
655 | =item {fj} : Fijian | |
656 | ||
657 | =item {fi} : Finnish | |
658 | ||
659 | =item [{fiu} : Finno-Ugrian (Other)] | |
660 | ||
661 | eq Finno-Ugric. NOT Ugaritic! | |
662 | ||
663 | =item {fon} : Fon | |
664 | ||
665 | =item {fr} : French | |
666 | ||
667 | Notable forms: | |
668 | {fr-fr} France French; | |
669 | {fr-be} Belgian French; | |
670 | {fr-ca} Canadian French; | |
671 | {fr-ch} Swiss French; | |
672 | {fr-lu} Luxembourg French; | |
673 | {fr-mc} Monaco French. | |
674 | ||
675 | =item {frm} : Middle French (ca.1400-1600) | |
676 | ||
677 | (Historical) | |
678 | ||
679 | =item {fro} : Old French (842-ca.1400) | |
680 | ||
681 | (Historical) | |
682 | ||
683 | =item {fy} : Frisian | |
684 | ||
685 | =item {fur} : Friulian | |
686 | ||
687 | =item {ff} : Fulah | |
688 | ||
689 | (Formerly "ful".) | |
690 | ||
691 | =item {gaa} : Ga | |
692 | ||
693 | =item {gd} : Scots Gaelic | |
694 | ||
695 | NOT Scots! | |
696 | ||
697 | =item {gl} : Gallegan | |
698 | ||
699 | eq Galician | |
700 | ||
701 | =item {lg} : Ganda | |
702 | ||
703 | (Formerly "lug".) | |
704 | ||
705 | =item {gay} : Gayo | |
706 | ||
707 | =item {gba} : Gbaya | |
708 | ||
709 | =item {gez} : Geez | |
710 | ||
711 | eq Ge'ez | |
712 | ||
713 | =item {ka} : Georgian | |
714 | ||
715 | =item {de} : German | |
716 | ||
717 | Notable forms: | |
718 | {de-at} Austrian German; | |
719 | {de-be} Belgian German; | |
720 | {de-ch} Swiss German; | |
721 | {de-de} Germany German; | |
722 | {de-li} Liechtenstein German; | |
723 | {de-lu} Luxembourg German. | |
724 | ||
725 | =item {gmh} : Middle High German (ca.1050-1500) | |
726 | ||
727 | (Historical) | |
728 | ||
729 | =item {goh} : Old High German (ca.750-1050) | |
730 | ||
731 | (Historical) | |
732 | ||
733 | =item [{gem} : Germanic (Other)] | |
734 | ||
735 | =item {gil} : Gilbertese | |
736 | ||
737 | =item {gon} : Gondi | |
738 | ||
739 | =item {gor} : Gorontalo | |
740 | ||
741 | =item {got} : Gothic | |
742 | ||
743 | (Historical) | |
744 | ||
745 | =item {grb} : Grebo | |
746 | ||
747 | =item {grc} : Ancient Greek | |
748 | ||
749 | (Historical) (Until 15th century or so.) | |
750 | ||
751 | =item {el} : Modern Greek | |
752 | ||
753 | (Since 15th century or so.) | |
754 | ||
755 | =item {gn} : Guarani | |
756 | ||
757 | GuaranE<iacute> | |
758 | ||
759 | =item {gu} : Gujarati | |
760 | ||
761 | =item {gwi} : Gwich'in | |
762 | ||
763 | eq Gwichin | |
764 | ||
765 | =item {hai} : Haida | |
766 | ||
767 | =item {ht} : Haitian | |
768 | ||
769 | eq Haitian Creole | |
770 | ||
771 | =item {ha} : Hausa | |
772 | ||
773 | =item {haw} : Hawaiian | |
774 | ||
775 | Hawai'ian | |
776 | ||
777 | =item {he} : Hebrew | |
778 | ||
779 | (Formerly "iw".) | |
780 | ||
781 | =for etc | |
782 | {iw} Hebrew (old tag) | |
783 | ||
784 | =item {hz} : Herero | |
785 | ||
786 | =item {hil} : Hiligaynon | |
787 | ||
788 | =item {him} : Himachali | |
789 | ||
790 | =item {hi} : Hindi | |
791 | ||
792 | =item {ho} : Hiri Motu | |
793 | ||
794 | =item {hit} : Hittite | |
795 | ||
796 | (Historical) | |
797 | ||
798 | =item {hmn} : Hmong | |
799 | ||
800 | =item {hu} : Hungarian | |
801 | ||
802 | =item {hup} : Hupa | |
803 | ||
804 | =item {iba} : Iban | |
805 | ||
806 | =item {is} : Icelandic | |
807 | ||
808 | =item {io} : Ido | |
809 | ||
810 | (Artificial) | |
811 | ||
812 | =item {ig} : Igbo | |
813 | ||
814 | (Formerly "ibo".) | |
815 | ||
816 | =item {ijo} : Ijo | |
817 | ||
818 | =item {ilo} : Iloko | |
819 | ||
820 | =item [{inc} : Indic (Other)] | |
821 | ||
822 | =item [{ine} : Indo-European (Other)] | |
823 | ||
824 | =item {id} : Indonesian | |
825 | ||
826 | (Formerly "in".) | |
827 | ||
828 | =for etc | |
829 | {in} Indonesian (old tag) | |
830 | ||
831 | =item {inh} : Ingush | |
832 | ||
833 | =item {ia} : Interlingua (International Auxiliary Language Association) | |
834 | ||
835 | (Artificial) NOT Interlingue! | |
836 | ||
837 | =item {ie} : Interlingue | |
838 | ||
839 | (Artificial) NOT Interlingua! | |
840 | ||
841 | =item {iu} : Inuktitut | |
842 | ||
843 | A subform of "Eskimo". | |
844 | ||
845 | =item {ik} : Inupiaq | |
846 | ||
847 | A subform of "Eskimo". | |
848 | ||
849 | =item [{ira} : Iranian (Other)] | |
850 | ||
851 | =item {ga} : Irish | |
852 | ||
853 | =item {mga} : Middle Irish (900-1200) | |
854 | ||
855 | (Historical) | |
856 | ||
857 | =item {sga} : Old Irish (to 900) | |
858 | ||
859 | (Historical) | |
860 | ||
861 | =item [{iro} : Iroquoian languages] | |
862 | ||
863 | =item {it} : Italian | |
864 | ||
865 | Notable forms: | |
866 | {it-it} Italy Italian; | |
867 | {it-ch} Swiss Italian. | |
868 | ||
869 | =item {ja} : Japanese | |
870 | ||
871 | (NOT "jp"!) | |
872 | ||
873 | =item {jv} : Javanese | |
874 | ||
875 | (Formerly "jw" because of a typo.) | |
876 | ||
877 | =item {jrb} : Judeo-Arabic | |
878 | ||
879 | =item {jpr} : Judeo-Persian | |
880 | ||
881 | =item {kbd} : Kabardian | |
882 | ||
883 | =item {kab} : Kabyle | |
884 | ||
885 | =item {kac} : Kachin | |
886 | ||
887 | =item {kl} : Kalaallisut | |
888 | ||
889 | eq Greenlandic "Eskimo" | |
890 | ||
891 | =item {xal} : Kalmyk | |
892 | ||
893 | =item {kam} : Kamba | |
894 | ||
895 | =item {kn} : Kannada | |
896 | ||
897 | eq Kanarese. NOT Canadian! | |
898 | ||
899 | =item {kr} : Kanuri | |
900 | ||
901 | (Formerly "kau".) | |
902 | ||
903 | =item {krc} : Karachay-Balkar | |
904 | ||
905 | =item {kaa} : Kara-Kalpak | |
906 | ||
907 | =item {kar} : Karen | |
908 | ||
909 | =item {ks} : Kashmiri | |
910 | ||
911 | =item {csb} : Kashubian | |
912 | ||
913 | eq Kashub | |
914 | ||
915 | =item {kaw} : Kawi | |
916 | ||
917 | =item {kk} : Kazakh | |
918 | ||
919 | =item {kha} : Khasi | |
920 | ||
921 | =item {km} : Khmer | |
922 | ||
923 | eq Cambodian. eq Kampuchean. | |
924 | ||
925 | =item [{khi} : Khoisan (Other)] | |
926 | ||
927 | =item {kho} : Khotanese | |
928 | ||
929 | =item {ki} : Kikuyu | |
930 | ||
931 | eq Gikuyu. | |
932 | ||
933 | =item {kmb} : Kimbundu | |
934 | ||
935 | =item {rw} : Kinyarwanda | |
936 | ||
937 | =item {ky} : Kirghiz | |
938 | ||
939 | =item {i-klingon} : Klingon | |
940 | ||
941 | =item {kv} : Komi | |
942 | ||
943 | =item {kg} : Kongo | |
944 | ||
945 | (Formerly "kon".) | |
946 | ||
947 | =item {kok} : Konkani | |
948 | ||
949 | =item {ko} : Korean | |
950 | ||
951 | =item {kos} : Kosraean | |
952 | ||
953 | =item {kpe} : Kpelle | |
954 | ||
955 | =item {kro} : Kru | |
956 | ||
957 | =item {kj} : Kuanyama | |
958 | ||
959 | =item {kum} : Kumyk | |
960 | ||
961 | =item {ku} : Kurdish | |
962 | ||
963 | =item {kru} : Kurukh | |
964 | ||
965 | =item {kut} : Kutenai | |
966 | ||
967 | =item {lad} : Ladino | |
968 | ||
969 | eq Judeo-Spanish. NOT Ladin (a minority language in Italy). | |
970 | ||
971 | =item {lah} : Lahnda | |
972 | ||
973 | NOT Lamba! | |
974 | ||
975 | =item {lam} : Lamba | |
976 | ||
977 | NOT Lahnda! | |
978 | ||
979 | =item {lo} : Lao | |
980 | ||
981 | eq Laotian. | |
982 | ||
983 | =item {la} : Latin | |
984 | ||
985 | (Historical) NOT Ladin! NOT Ladino! | |
986 | ||
987 | =item {lv} : Latvian | |
988 | ||
989 | eq Lettish. | |
990 | ||
991 | =item {lb} : Letzeburgesch | |
992 | ||
993 | eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".) | |
994 | ||
995 | =for etc | |
996 | {i-lux} Letzeburgesch (old tag) | |
997 | ||
998 | =item {lez} : Lezghian | |
999 | ||
1000 | =item {li} : Limburgish | |
1001 | ||
1002 | eq Limburger, eq Limburgan. NOT Letzeburgesch! | |
1003 | ||
1004 | =item {ln} : Lingala | |
1005 | ||
1006 | =item {lt} : Lithuanian | |
1007 | ||
1008 | =item {nds} : Low German | |
1009 | ||
1010 | eq Low Saxon. eq Low German. eq Low Saxon. | |
1011 | ||
1012 | =item {art-lojban} : Lojban (Artificial) | |
1013 | ||
1014 | =item {loz} : Lozi | |
1015 | ||
1016 | =item {lu} : Luba-Katanga | |
1017 | ||
1018 | (Formerly "lub".) | |
1019 | ||
1020 | =item {lua} : Luba-Lulua | |
1021 | ||
1022 | =item {lui} : Luiseno | |
1023 | ||
1024 | eq LuiseE<ntilde>o. | |
1025 | ||
1026 | =item {lun} : Lunda | |
1027 | ||
1028 | =item {luo} : Luo (Kenya and Tanzania) | |
1029 | ||
1030 | =item {lus} : Lushai | |
1031 | ||
1032 | =item {mk} : Macedonian | |
1033 | ||
1034 | eq the modern Slavic language spoken in what was Yugoslavia. | |
1035 | NOT the form of Greek spoken in Greek Macedonia! | |
1036 | ||
1037 | =item {mad} : Madurese | |
1038 | ||
1039 | =item {mag} : Magahi | |
1040 | ||
1041 | =item {mai} : Maithili | |
1042 | ||
1043 | =item {mak} : Makasar | |
1044 | ||
1045 | =item {mg} : Malagasy | |
1046 | ||
1047 | =item {ms} : Malay | |
1048 | ||
1049 | NOT Malayalam! | |
1050 | ||
1051 | =item {ml} : Malayalam | |
1052 | ||
1053 | NOT Malay! | |
1054 | ||
1055 | =item {mt} : Maltese | |
1056 | ||
1057 | =item {mnc} : Manchu | |
1058 | ||
1059 | =item {mdr} : Mandar | |
1060 | ||
1061 | NOT Mandarin! | |
1062 | ||
1063 | =item {man} : Mandingo | |
1064 | ||
1065 | =item {mni} : Manipuri | |
1066 | ||
1067 | eq Meithei. | |
1068 | ||
1069 | =item [{mno} : Manobo languages] | |
1070 | ||
1071 | =item {gv} : Manx | |
1072 | ||
1073 | =item {mi} : Maori | |
1074 | ||
1075 | NOT Mari! | |
1076 | ||
1077 | =item {mr} : Marathi | |
1078 | ||
1079 | =item {chm} : Mari | |
1080 | ||
1081 | NOT Maori! | |
1082 | ||
1083 | =item {mh} : Marshall | |
1084 | ||
1085 | eq Marshallese. | |
1086 | ||
1087 | =item {mwr} : Marwari | |
1088 | ||
1089 | =item {mas} : Masai | |
1090 | ||
1091 | =item [{myn} : Mayan languages] | |
1092 | ||
1093 | =item {men} : Mende | |
1094 | ||
1095 | =item {mic} : Micmac | |
1096 | ||
1097 | =item {min} : Minangkabau | |
1098 | ||
1099 | =item {i-mingo} : Mingo | |
1100 | ||
1101 | eq the Irquoian language West Virginia Seneca. NOT New York Seneca! | |
1102 | ||
1103 | =item [{mis} : Miscellaneous languages] | |
1104 | ||
1105 | Don't use this. | |
1106 | ||
1107 | =item {moh} : Mohawk | |
1108 | ||
1109 | =item {mdf} : Moksha | |
1110 | ||
1111 | =item {mo} : Moldavian | |
1112 | ||
1113 | eq Moldovan. | |
1114 | ||
1115 | =item [{mkh} : Mon-Khmer (Other)] | |
1116 | ||
1117 | =item {lol} : Mongo | |
1118 | ||
1119 | =item {mn} : Mongolian | |
1120 | ||
1121 | eq Mongol. | |
1122 | ||
1123 | =item {mos} : Mossi | |
1124 | ||
1125 | =item [{mul} : Multiple languages] | |
1126 | ||
1127 | Not for normal use. | |
1128 | ||
1129 | =item [{mun} : Munda languages] | |
1130 | ||
1131 | =item {nah} : Nahuatl | |
1132 | ||
1133 | =item {nap} : Neapolitan | |
1134 | ||
1135 | =item {na} : Nauru | |
1136 | ||
1137 | =item {nv} : Navajo | |
1138 | ||
1139 | eq Navaho. (Formerly "i-navajo".) | |
1140 | ||
1141 | =for etc | |
1142 | {i-navajo} Navajo (old tag) | |
1143 | ||
1144 | =item {nd} : North Ndebele | |
1145 | ||
1146 | =item {nr} : South Ndebele | |
1147 | ||
1148 | =item {ng} : Ndonga | |
1149 | ||
1150 | =item {ne} : Nepali | |
1151 | ||
1152 | eq Nepalese. Notable forms: | |
1153 | {ne-np} Nepal Nepali; | |
1154 | {ne-in} India Nepali. | |
1155 | ||
1156 | =item {new} : Newari | |
1157 | ||
1158 | =item {nia} : Nias | |
1159 | ||
1160 | =item [{nic} : Niger-Kordofanian (Other)] | |
1161 | ||
1162 | =item [{ssa} : Nilo-Saharan (Other)] | |
1163 | ||
1164 | =item {niu} : Niuean | |
1165 | ||
1166 | =item {nog} : Nogai | |
1167 | ||
1168 | =item {non} : Old Norse | |
1169 | ||
1170 | (Historical) | |
1171 | ||
1172 | =item [{nai} : North American Indian] | |
1173 | ||
1174 | Do not use this. | |
1175 | ||
1176 | =item {no} : Norwegian | |
1177 | ||
1178 | Note the two following forms: | |
1179 | ||
1180 | =item {nb} : Norwegian Bokmal | |
1181 | ||
1182 | eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".) | |
1183 | ||
1184 | =for etc | |
1185 | {no-bok} Norwegian Bokmal (old tag) | |
1186 | ||
1187 | =item {nn} : Norwegian Nynorsk | |
1188 | ||
1189 | (A form of Norwegian.) (Formerly "no-nyn".) | |
1190 | ||
1191 | =for etc | |
1192 | {no-nyn} Norwegian Nynorsk (old tag) | |
1193 | ||
1194 | =item [{nub} : Nubian languages] | |
1195 | ||
1196 | =item {nym} : Nyamwezi | |
1197 | ||
1198 | =item {nyn} : Nyankole | |
1199 | ||
1200 | =item {nyo} : Nyoro | |
1201 | ||
1202 | =item {nzi} : Nzima | |
1203 | ||
1204 | =item {oc} : Occitan (post 1500) | |
1205 | ||
1206 | eq ProvenE<ccedil>al, eq Provencal | |
1207 | ||
1208 | =item {oj} : Ojibwa | |
1209 | ||
1210 | eq Ojibwe. (Formerly "oji".) | |
1211 | ||
1212 | =item {or} : Oriya | |
1213 | ||
1214 | =item {om} : Oromo | |
1215 | ||
1216 | =item {osa} : Osage | |
1217 | ||
1218 | =item {os} : Ossetian; Ossetic | |
1219 | ||
1220 | =item [{oto} : Otomian languages] | |
1221 | ||
1222 | Group of languages collectively called "OtomE<iacute>". | |
1223 | ||
1224 | =item {pal} : Pahlavi | |
1225 | ||
1226 | eq Pahlevi | |
1227 | ||
1228 | =item {i-pwn} : Paiwan | |
1229 | ||
1230 | eq Pariwan | |
1231 | ||
1232 | =item {pau} : Palauan | |
1233 | ||
1234 | =item {pi} : Pali | |
1235 | ||
1236 | (Historical?) | |
1237 | ||
1238 | =item {pam} : Pampanga | |
1239 | ||
1240 | =item {pag} : Pangasinan | |
1241 | ||
1242 | =item {pa} : Panjabi | |
1243 | ||
1244 | eq Punjabi | |
1245 | ||
1246 | =item {pap} : Papiamento | |
1247 | ||
1248 | eq Papiamentu. | |
1249 | ||
1250 | =item [{paa} : Papuan (Other)] | |
1251 | ||
1252 | =item {fa} : Persian | |
1253 | ||
1254 | eq Farsi. eq Iranian. | |
1255 | ||
1256 | =item {peo} : Old Persian (ca.600-400 B.C.) | |
1257 | ||
1258 | =item [{phi} : Philippine (Other)] | |
1259 | ||
1260 | =item {phn} : Phoenician | |
1261 | ||
1262 | (Historical) | |
1263 | ||
1264 | =item {pon} : Pohnpeian | |
1265 | ||
1266 | NOT Pompeiian! | |
1267 | ||
1268 | =item {pl} : Polish | |
1269 | ||
1270 | =item {pt} : Portuguese | |
1271 | ||
1272 | eq Portugese. Notable forms: | |
1273 | {pt-pt} Portugal Portuguese; | |
1274 | {pt-br} Brazilian Portuguese. | |
1275 | ||
1276 | =item [{pra} : Prakrit languages] | |
1277 | ||
1278 | =item {pro} : Old Provencal (to 1500) | |
1279 | ||
1280 | eq Old ProvenE<ccedil>al. (Historical.) | |
1281 | ||
1282 | =item {ps} : Pushto | |
1283 | ||
1284 | eq Pashto. eq Pushtu. | |
1285 | ||
1286 | =item {qu} : Quechua | |
1287 | ||
1288 | eq Quecha. | |
1289 | ||
1290 | =item {rm} : Raeto-Romance | |
1291 | ||
1292 | eq Romansh. | |
1293 | ||
1294 | =item {raj} : Rajasthani | |
1295 | ||
1296 | =item {rap} : Rapanui | |
1297 | ||
1298 | =item {rar} : Rarotongan | |
1299 | ||
1300 | =item [{qaa - qtz} : Reserved for local use.] | |
1301 | ||
1302 | =item [{roa} : Romance (Other)] | |
1303 | ||
1304 | NOT Romanian! NOT Romany! NOT Romansh! | |
1305 | ||
1306 | =item {ro} : Romanian | |
1307 | ||
1308 | eq Rumanian. NOT Romany! | |
1309 | ||
1310 | =item {rom} : Romany | |
1311 | ||
1312 | eq Rom. NOT Romanian! | |
1313 | ||
1314 | =item {rn} : Rundi | |
1315 | ||
1316 | =item {ru} : Russian | |
1317 | ||
1318 | NOT White Russian! NOT Rusyn! | |
1319 | ||
1320 | =item [{sal} : Salishan languages] | |
1321 | ||
1322 | Large language group. | |
1323 | ||
1324 | =item {sam} : Samaritan Aramaic | |
1325 | ||
1326 | NOT Aramaic! | |
1327 | ||
1328 | =item {se} : Northern Sami | |
1329 | ||
1330 | eq Lappish. eq Lapp. eq (Northern) Saami. | |
1331 | ||
1332 | =item {sma} : Southern Sami | |
1333 | ||
1334 | =item {smn} : Inari Sami | |
1335 | ||
1336 | =item {smj} : Lule Sami | |
1337 | ||
1338 | =item {sms} : Skolt Sami | |
1339 | ||
1340 | =item [{smi} : Sami languages (Other)] | |
1341 | ||
1342 | =item {sm} : Samoan | |
1343 | ||
1344 | =item {sad} : Sandawe | |
1345 | ||
1346 | =item {sg} : Sango | |
1347 | ||
1348 | =item {sa} : Sanskrit | |
1349 | ||
1350 | (Historical) | |
1351 | ||
1352 | =item {sat} : Santali | |
1353 | ||
1354 | =item {sc} : Sardinian | |
1355 | ||
1356 | eq Sard. | |
1357 | ||
1358 | =item {sas} : Sasak | |
1359 | ||
1360 | =item {sco} : Scots | |
1361 | ||
1362 | NOT Scots Gaelic! | |
1363 | ||
1364 | =item {sel} : Selkup | |
1365 | ||
1366 | =item [{sem} : Semitic (Other)] | |
1367 | ||
1368 | =item {sr} : Serbian | |
1369 | ||
1370 | eq Serb. NOT Sorbian. | |
1371 | ||
1372 | Notable forms: | |
1373 | {sr-Cyrl} : Serbian in Cyrillic script; | |
1374 | {sr-Latn} : Serbian in Latin script. | |
1375 | ||
1376 | =item {srr} : Serer | |
1377 | ||
1378 | =item {shn} : Shan | |
1379 | ||
1380 | =item {sn} : Shona | |
1381 | ||
1382 | =item {sid} : Sidamo | |
1383 | ||
1384 | =item {sgn-...} : Sign Languages | |
1385 | ||
1386 | Always use with a subtag. Notable forms: | |
1387 | {sgn-gb} British Sign Language (BSL); | |
1388 | {sgn-ie} Irish Sign Language (ESL); | |
1389 | {sgn-ni} Nicaraguan Sign Language (ISN); | |
1390 | {sgn-us} American Sign Language (ASL). | |
1391 | ||
1392 | (And so on with other country codes as the subtag.) | |
1393 | ||
1394 | =item {bla} : Siksika | |
1395 | ||
1396 | eq Blackfoot. eq Pikanii. | |
1397 | ||
1398 | =item {sd} : Sindhi | |
1399 | ||
1400 | =item {si} : Sinhalese | |
1401 | ||
1402 | eq Sinhala. | |
1403 | ||
1404 | =item [{sit} : Sino-Tibetan (Other)] | |
1405 | ||
1406 | =item [{sio} : Siouan languages] | |
1407 | ||
1408 | =item {den} : Slave (Athapascan) | |
1409 | ||
1410 | ("Slavey" is a subform.) | |
1411 | ||
1412 | =item [{sla} : Slavic (Other)] | |
1413 | ||
1414 | =item {sk} : Slovak | |
1415 | ||
1416 | eq Slovakian. | |
1417 | ||
1418 | =item {sl} : Slovenian | |
1419 | ||
1420 | eq Slovene. | |
1421 | ||
1422 | =item {sog} : Sogdian | |
1423 | ||
1424 | =item {so} : Somali | |
1425 | ||
1426 | =item {son} : Songhai | |
1427 | ||
1428 | =item {snk} : Soninke | |
1429 | ||
1430 | =item {wen} : Sorbian languages | |
1431 | ||
1432 | eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian! | |
1433 | ||
1434 | =item {nso} : Northern Sotho | |
1435 | ||
1436 | =item {st} : Southern Sotho | |
1437 | ||
1438 | eq Sutu. eq Sesotho. | |
1439 | ||
1440 | =item [{sai} : South American Indian (Other)] | |
1441 | ||
1442 | =item {es} : Spanish | |
1443 | ||
1444 | Notable forms: | |
1445 | {es-ar} Argentine Spanish; | |
1446 | {es-bo} Bolivian Spanish; | |
1447 | {es-cl} Chilean Spanish; | |
1448 | {es-co} Colombian Spanish; | |
1449 | {es-do} Dominican Spanish; | |
1450 | {es-ec} Ecuadorian Spanish; | |
1451 | {es-es} Spain Spanish; | |
1452 | {es-gt} Guatemalan Spanish; | |
1453 | {es-hn} Honduran Spanish; | |
1454 | {es-mx} Mexican Spanish; | |
1455 | {es-pa} Panamanian Spanish; | |
1456 | {es-pe} Peruvian Spanish; | |
1457 | {es-pr} Puerto Rican Spanish; | |
1458 | {es-py} Paraguay Spanish; | |
1459 | {es-sv} Salvadoran Spanish; | |
1460 | {es-us} US Spanish; | |
1461 | {es-uy} Uruguayan Spanish; | |
1462 | {es-ve} Venezuelan Spanish. | |
1463 | ||
1464 | =item {suk} : Sukuma | |
1465 | ||
1466 | =item {sux} : Sumerian | |
1467 | ||
1468 | (Historical) | |
1469 | ||
1470 | =item {su} : Sundanese | |
1471 | ||
1472 | =item {sus} : Susu | |
1473 | ||
1474 | =item {sw} : Swahili | |
1475 | ||
1476 | eq Kiswahili | |
1477 | ||
1478 | =item {ss} : Swati | |
1479 | ||
1480 | =item {sv} : Swedish | |
1481 | ||
1482 | Notable forms: | |
1483 | {sv-se} Sweden Swedish; | |
1484 | {sv-fi} Finland Swedish. | |
1485 | ||
1486 | =item {syr} : Syriac | |
1487 | ||
1488 | =item {tl} : Tagalog | |
1489 | ||
1490 | =item {ty} : Tahitian | |
1491 | ||
1492 | =item [{tai} : Tai (Other)] | |
1493 | ||
1494 | NOT Thai! | |
1495 | ||
1496 | =item {tg} : Tajik | |
1497 | ||
1498 | =item {tmh} : Tamashek | |
1499 | ||
1500 | =item {ta} : Tamil | |
1501 | ||
1502 | =item {i-tao} : Tao | |
1503 | ||
1504 | eq Yami. | |
1505 | ||
1506 | =item {tt} : Tatar | |
1507 | ||
1508 | =item {i-tay} : Tayal | |
1509 | ||
1510 | eq Atayal. eq Atayan. | |
1511 | ||
1512 | =item {te} : Telugu | |
1513 | ||
1514 | =item {ter} : Tereno | |
1515 | ||
1516 | =item {tet} : Tetum | |
1517 | ||
1518 | =item {th} : Thai | |
1519 | ||
1520 | NOT Tai! | |
1521 | ||
1522 | =item {bo} : Tibetan | |
1523 | ||
1524 | =item {tig} : Tigre | |
1525 | ||
1526 | =item {ti} : Tigrinya | |
1527 | ||
1528 | =item {tem} : Timne | |
1529 | ||
1530 | eq Themne. eq Timene. | |
1531 | ||
1532 | =item {tiv} : Tiv | |
1533 | ||
1534 | =item {tli} : Tlingit | |
1535 | ||
1536 | =item {tpi} : Tok Pisin | |
1537 | ||
1538 | =item {tkl} : Tokelau | |
1539 | ||
1540 | =item {tog} : Tonga (Nyasa) | |
1541 | ||
1542 | NOT Tsonga! | |
1543 | ||
1544 | =item {to} : Tonga (Tonga Islands) | |
1545 | ||
1546 | (Pronounced "Tong-a", not "Tong-ga") | |
1547 | ||
1548 | NOT Tsonga! | |
1549 | ||
1550 | =item {tsi} : Tsimshian | |
1551 | ||
1552 | eq Sm'algyax | |
1553 | ||
1554 | =item {ts} : Tsonga | |
1555 | ||
1556 | NOT Tonga! | |
1557 | ||
1558 | =item {i-tsu} : Tsou | |
1559 | ||
1560 | =item {tn} : Tswana | |
1561 | ||
1562 | Same as Setswana. | |
1563 | ||
1564 | =item {tum} : Tumbuka | |
1565 | ||
1566 | =item [{tup} : Tupi languages] | |
1567 | ||
1568 | =item {tr} : Turkish | |
1569 | ||
1570 | (Typically in Roman script) | |
1571 | ||
1572 | =item {ota} : Ottoman Turkish (1500-1928) | |
1573 | ||
1574 | (Typically in Arabic script) (Historical) | |
1575 | ||
1576 | =item {crh} : Crimean Turkish | |
1577 | ||
1578 | eq Crimean Tatar | |
1579 | ||
1580 | =item {tk} : Turkmen | |
1581 | ||
1582 | eq Turkmeni. | |
1583 | ||
1584 | =item {tvl} : Tuvalu | |
1585 | ||
1586 | =item {tyv} : Tuvinian | |
1587 | ||
1588 | eq Tuvan. eq Tuvin. | |
1589 | ||
1590 | =item {tw} : Twi | |
1591 | ||
1592 | =item {udm} : Udmurt | |
1593 | ||
1594 | =item {uga} : Ugaritic | |
1595 | ||
1596 | NOT Ugric! | |
1597 | ||
1598 | =item {ug} : Uighur | |
1599 | ||
1600 | =item {uk} : Ukrainian | |
1601 | ||
1602 | =item {umb} : Umbundu | |
1603 | ||
1604 | =item {und} : Undetermined | |
1605 | ||
1606 | Not a tag for normal use. | |
1607 | ||
1608 | =item {ur} : Urdu | |
1609 | ||
1610 | =item {uz} : Uzbek | |
1611 | ||
1612 | eq E<Ouml>zbek | |
1613 | ||
1614 | Notable forms: | |
1615 | {uz-Cyrl} Uzbek in Cyrillic script; | |
1616 | {uz-Latn} Uzbek in Latin script. | |
1617 | ||
1618 | =item {vai} : Vai | |
1619 | ||
1620 | =item {ve} : Venda | |
1621 | ||
1622 | NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".) | |
1623 | ||
1624 | =item {vi} : Vietnamese | |
1625 | ||
1626 | eq Viet. | |
1627 | ||
1628 | =item {vo} : Volapuk | |
1629 | ||
1630 | eq VolapE<uuml>k. (Artificial) | |
1631 | ||
1632 | =item {vot} : Votic | |
1633 | ||
1634 | eq Votian. eq Vod. | |
1635 | ||
1636 | =item [{wak} : Wakashan languages] | |
1637 | ||
1638 | =item {wa} : Walloon | |
1639 | ||
1640 | =item {wal} : Walamo | |
1641 | ||
1642 | eq Wolaytta. | |
1643 | ||
1644 | =item {war} : Waray | |
1645 | ||
1646 | Presumably the Philippine language Waray-Waray (SamareE<ntilde>o), | |
1647 | not the smaller Philippine language Waray Sorsogon, nor the extinct | |
1648 | Australian language Waray. | |
1649 | ||
1650 | =item {was} : Washo | |
1651 | ||
1652 | eq Washoe | |
1653 | ||
1654 | =item {cy} : Welsh | |
1655 | ||
1656 | =item {wo} : Wolof | |
1657 | ||
1658 | =item {x-...} : Unregistered (Semi-Private Use) | |
1659 | ||
1660 | "x-" is a prefix for language tags that are not registered with ISO | |
1661 | or IANA. Example, x-double-dutch | |
1662 | ||
1663 | =item {xh} : Xhosa | |
1664 | ||
1665 | =item {sah} : Yakut | |
1666 | ||
1667 | =item {yao} : Yao | |
1668 | ||
1669 | (The Yao in Malawi?) | |
1670 | ||
1671 | =item {yap} : Yapese | |
1672 | ||
1673 | eq Yap | |
1674 | ||
1675 | =item {ii} : Sichuan Yi | |
1676 | ||
1677 | =item {yi} : Yiddish | |
1678 | ||
1679 | Formerly "ji". Usually in Hebrew script. | |
1680 | ||
1681 | Notable forms: | |
1682 | {yi-latn} Yiddish in Latin script | |
1683 | ||
1684 | =item {yo} : Yoruba | |
1685 | ||
1686 | =item [{ypk} : Yupik languages] | |
1687 | ||
1688 | Several "Eskimo" languages. | |
1689 | ||
1690 | =item {znd} : Zande | |
1691 | ||
1692 | =item [{zap} : Zapotec] | |
1693 | ||
1694 | (A group of languages.) | |
1695 | ||
1696 | =item {zen} : Zenaga | |
1697 | ||
1698 | NOT Zend. | |
1699 | ||
1700 | =item {za} : Zhuang | |
1701 | ||
1702 | =item {zu} : Zulu | |
1703 | ||
1704 | =item {zun} : Zuni | |
1705 | ||
1706 | eq ZuE<ntilde>i | |
1707 | ||
1708 | =back | |
1709 | ||
1710 | =for woohah END | |
1711 | ||
1712 | =head1 SEE ALSO | |
1713 | ||
1714 | L<I18N::LangTags|I18N::LangTags> and its "See Also" section. | |
1715 | ||
1716 | =head1 COPYRIGHT AND DISCLAIMER | |
1717 | ||
1718 | Copyright (c) 2001+ Sean M. Burke. All rights reserved. | |
1719 | ||
1720 | You can redistribute and/or | |
1721 | modify this document under the same terms as Perl itself. | |
1722 | ||
1723 | This document is provided in the hope that it will be | |
1724 | useful, but without any warranty; | |
1725 | without even the implied warranty of accuracy, authoritativeness, | |
1726 | completeness, merchantability, or fitness for a particular purpose. | |
1727 | ||
1728 | Email any corrections or questions to me. | |
1729 | ||
1730 | =head1 AUTHOR | |
1731 | ||
1732 | Sean M. Burke, sburkeE<64>cpan.org | |
1733 | ||
1734 | =cut | |
1735 | ||
1736 | ||
1737 | # To generate a list of just the two and three-letter codes: | |
1738 | ||
1739 | #!/usr/local/bin/perl -w | |
1740 | ||
1741 | require 5; # Time-stamp: "2001-03-13 21:53:39 MST" | |
1742 | # Sean M. Burke, sburke@cpan.org | |
1743 | # This program is for generating the language_codes.txt file | |
1744 | use strict; | |
1745 | use LWP::Simple; | |
1746 | use HTML::TreeBuilder 3.10; | |
1747 | my $root = HTML::TreeBuilder->new(); | |
1748 | my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html'; | |
1749 | $root->parse(get($url) || die "Can't get $url"); | |
1750 | $root->eof(); | |
1751 | ||
1752 | my @codes; | |
1753 | ||
1754 | foreach my $tr ($root->find_by_tag_name('tr')) { | |
1755 | my @f = map $_->as_text(), $tr->content_list(); | |
1756 | #print map("<$_> ", @f), "\n"; | |
1757 | next unless @f == 5; | |
1758 | pop @f; # nix the French name | |
1759 | next if $f[-1] eq 'Language Name (English)'; # it's a header line | |
1760 | my $xx = splice(@f, 2,1); # pull out the two-letter code | |
1761 | $f[-1] =~ s/^\s+//; | |
1762 | $f[-1] =~ s/\s+$//; | |
1763 | if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it | |
1764 | push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ]; | |
1765 | } else { # print the three-letter codes. | |
1766 | if($f[0] eq $f[1]) { | |
1767 | push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ]; | |
1768 | } else { # shouldn't happen | |
1769 | push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; | |
1770 | } | |
1771 | } | |
1772 | } | |
1773 | ||
1774 | print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes; | |
1775 | print "[ based on $url\n at ", scalar(localtime), "]\n", | |
1776 | "[Note: doesn't include IANA-registered codes.]\n"; | |
1777 | exit; | |
1778 | __END__ | |
1779 |