Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / 5.8.0 / I18N / LangTags / List.pm
CommitLineData
86530b38
AT
1
2require 5;
3package I18N::LangTags::List;
4# Time-stamp: "2002-02-02 20:13:58 MST"
5use strict;
6use vars qw(%Name $Debug $VERSION);
7$VERSION = '0.25';
8# POD at the end.
9
10#----------------------------------------------------------------------
11{
12# read the table out of our own POD!
13 my $seeking = 1;
14 my $count = 0;
15 my($tag,$name);
16 while(<I18N::LangTags::List::DATA>) {
17 if($seeking) {
18 $seeking = 0 if m/=for woohah/;
19 } else {
20 next unless ($tag, $name) =
21 m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;
22 $name =~ s/\s*[;\.]*\s*$//g;
23 next unless $name;
24 ++$count;
25 print "<$tag> <$name>\n" if $Debug;
26 $Name{$tag} = $name;
27 }
28 }
29 die "No tags read??" unless $count;
30}
31#----------------------------------------------------------------------
32
33sub name {
34 my $tag = lc($_[0] || return);
35 $tag =~ s/^\s+//s;
36 $tag =~ s/\s+$//s;
37
38 my $alt;
39 if($tag =~ m/^x-(.+)/) {
40 $alt = "i-$1";
41 } elsif($tag =~ m/^i-(.+)/) {
42 $alt = "x-$1";
43 } else {
44 $alt = '';
45 }
46
47 my $subform = '';
48 my $name = '';
49 print "Input: {$tag}\n" if $Debug;
50 while(length $tag) {
51 last if $name = $Name{$tag};
52 last if $name = $Name{$alt};
53 if($tag =~ s/(-[a-z0-9]+)$//s) {
54 print "Shaving off: $1 leaving $tag\n" if $Debug;
55 $subform = "$1$subform";
56 # and loop around again
57
58 $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";
59 } else {
60 # we're trying to pull a subform off a primary tag. TILT!
61 print "Aborting on: {$name}{$subform}\n" if $Debug;
62 last;
63 }
64 }
65 print "Output: {$name}{$subform}\n" if $Debug;
66
67 return unless $name; # Failure
68 return $name unless $subform; # Exact match
69 $subform =~ s/^-//s;
70 $subform =~ s/-$//s;
71 return "$name (Subform \"$subform\")";
72}
73
741;
75
76__DATA__
77
78=head1 NAME
79
80I18N::LangTags::List -- tags and names for human languages
81
82=head1 SYNOPSIS
83
84 use I18N::LangTags::List;
85 print "Parlez-vous... ", join(', ',
86 I18N::LangTags::List::name('elx') || 'unknown_language',
87 I18N::LangTags::List::name('ar-Kw') || 'unknown_language',
88 I18N::LangTags::List::name('en') || 'unknown_language',
89 I18N::LangTags::List::name('en-CA') || 'unknown_language',
90 ), "?\n";
91
92prints:
93
94 Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?
95
96=head1 DESCRIPTION
97
98This module provides a function
99C<I18N::LangTags::List::name( I<langtag> ) > that takes
100a language tag (see L<I18N::LangTags|I18N::LangTags>)
101and returns the best attempt at an English name for it, or
102undef if it can't make sense of the tag.
103
104The function I18N::LangTags::List::name(...) is not exported.
105
106The map of tags-to-names that it uses is accessable as
107%I18N::LangTags::List::Name, and it's the same as the list
108that follows in this documentation, which should be useful
109to you even if you don't use this module.
110
111=head1 ABOUT LANGUAGE TAGS
112
113Internet language tags, as defined in RFC 3066, are a formalism
114for denoting human languages. The two-letter ISO 639-1 language
115codes are well known (as "en" for English), as are their forms
116when qualified by a country code ("en-US"). Less well-known are the
117arbitrary-length non-ISO codes (like "i-mingo"), and the
118recently (in 2001) introduced three-letter ISO-639-2 codes.
119
120Remember these important facts:
121
122=over
123
124=item *
125
126Language tags are not locale IDs. A locale ID is written with a "_"
127instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
128I<means> something different than a language tag. A language tag
129denotes a language. A locale ID denotes a language I<as used in>
130a particular place, in combination with non-linguistic
131location-specific information such as what currency is used
132there. Locales I<also> often denote character set information,
133as in "en_US.ISO8859-1".
134
135=item *
136
137Language tags are not for computer languages.
138
139=item *
140
141"Dialect" is not a useful term, since there is no objective
142criterion for establishing when two language-forms are
143dialects of eachother, or are separate languages.
144
145=item *
146
147Language tags are not case-sensitive. en-US, en-us, En-Us, etc.,
148are all the same tag, and denote the same language.
149
150=item *
151
152Not every language tag really refers to a single language. Some
153language tags refer to conditions: i-default (system-message text
154in English plus maybe other languages), und (undetermined
155language). Others (notably lots of the three-letter codes) are
156bibliographic tags that classify whole groups of languages, as
157with cus "Cushitic (Other)" (i.e., a
158language that has been classed as Cushtic, but which has no more
159specific code) or the even less linguistically coherent
160sai for "South American Indian (Other)". Though useful in
161bibliography, B<SUCH TAGS ARE NOT
162FOR GENERAL USE>. For further guidance, email me.
163
164=item *
165
166Language tags are not country codes. In fact, they are often
167distinct codes, as with language tag ja for Japanese, and
168ISO 3166 country code C<.jp> for Japan.
169
170=back
171
172=head1 LIST OF LANGUAGES
173
174The first part of each item is the language tag, between
175{...}. It
176is followed by an English name for the language or language-group.
177Language tags that I judge to be not for general use, are bracketed.
178
179This list is in alphabetical order by English name of the language.
180
181=for reminder
182 The name in the =item line MUST NOT have E<...>'s in it!!
183
184=for woohah START
185
186=over
187
188=item {ab} : Abkhazian
189
190eq Abkhaz
191
192=item {ace} : Achinese
193
194=item {ach} : Acoli
195
196=item {ada} : Adangme
197
198=item {aa} : Afar
199
200=item {afh} : Afrihili
201
202(Artificial)
203
204=item {af} : Afrikaans
205
206=item [{afa} : Afro-Asiatic (Other)]
207
208=item {aka} : Akan
209
210=item {akk} : Akkadian
211
212(Historical)
213
214=item {sq} : Albanian
215
216=item {ale} : Aleut
217
218=item [{alg} : Algonquian languages]
219
220NOT Algonquin!
221
222=item [{tut} : Altaic (Other)]
223
224=item {am} : Amharic
225
226NOT Aramaic!
227
228=item {i-ami} : Ami
229
230eq Amis. eq 'Amis. eq Pangca.
231
232=item [{apa} : Apache languages]
233
234=item {ar} : Arabic
235
236Many forms are mutually un-intelligible in spoken media.
237Notable forms:
238{ar-ae} UAE Arabic;
239{ar-bh} Bahrain Arabic;
240{ar-dz} Algerian Arabic;
241{ar-eg} Egyptian Arabic;
242{ar-iq} Iraqi Arabic;
243{ar-jo} Jordanian Arabic;
244{ar-kw} Kuwait Arabic;
245{ar-lb} Lebanese Arabic;
246{ar-ly} Libyan Arabic;
247{ar-ma} Moroccan Arabic;
248{ar-om} Omani Arabic;
249{ar-qa} Qatari Arabic;
250{ar-sa} Sauda Arabic;
251{ar-sy} Syrian Arabic;
252{ar-tn} Tunisian Arabic;
253{ar-ye} Yemen Arabic.
254
255=item {arc} : Aramaic
256
257NOT Amharic! NOT Samaritan Aramaic!
258
259=item {arp} : Arapaho
260
261=item {arn} : Araucanian
262
263=item {arw} : Arawak
264
265=item {hy} : Armenian
266
267=item [{art} : Artificial (Other)]
268
269=item {as} : Assamese
270
271=item [{ath} : Athapascan languages]
272
273eq Athabaskan. eq Athapaskan. eq Athabascan.
274
275=item [{aus} : Australian languages]
276
277=item [{map} : Austronesian (Other)]
278
279=item {ava} : Avaric
280
281=item {ae} : Avestan
282
283eq Zend
284
285=item {awa} : Awadhi
286
287=item {ay} : Aymara
288
289=item {az} : Azerbaijani
290
291eq Azeri
292
293=item {ban} : Balinese
294
295=item [{bat} : Baltic (Other)]
296
297=item {bal} : Baluchi
298
299=item {bam} : Bambara
300
301=item [{bai} : Bamileke languages]
302
303=item {bad} : Banda
304
305=item [{bnt} : Bantu (Other)]
306
307=item {bas} : Basa
308
309=item {ba} : Bashkir
310
311=item {eu} : Basque
312
313=item {btk} : Batak (Indonesia)
314
315=item {bej} : Beja
316
317=item {be} : Belarusian
318
319eq Belarussian. eq Byelarussian.
320eq Belorussian. eq Byelorussian.
321eq White Russian. eq White Ruthenian.
322NOT Ruthenian!
323
324=item {bem} : Bemba
325
326=item {bn} : Bengali
327
328eq Bangla.
329
330=item [{ber} : Berber (Other)]
331
332=item {bho} : Bhojpuri
333
334=item {bh} : Bihari
335
336=item {bik} : Bikol
337
338=item {bin} : Bini
339
340=item {bi} : Bislama
341
342eq Bichelamar.
343
344=item {bs} : Bosnian
345
346=item {bra} : Braj
347
348=item {br} : Breton
349
350=item {bug} : Buginese
351
352=item {bg} : Bulgarian
353
354=item {i-bnn} : Bunun
355
356=item {bua} : Buriat
357
358=item {my} : Burmese
359
360=item {cad} : Caddo
361
362=item {car} : Carib
363
364=item {ca} : Catalan
365
366eq CatalE<aacute>n. eq Catalonian.
367
368=item [{cau} : Caucasian (Other)]
369
370=item {ceb} : Cebuano
371
372=item [{cel} : Celtic (Other)]
373
374Notable forms:
375{cel-gaulish} Gaulish (Historical)
376
377=item [{cai} : Central American Indian (Other)]
378
379=item {chg} : Chagatai
380
381(Historical?)
382
383=item [{cmc} : Chamic languages]
384
385=item {ch} : Chamorro
386
387=item {ce} : Chechen
388
389=item {chr} : Cherokee
390
391eq Tsalagi
392
393=item {chy} : Cheyenne
394
395=item {chb} : Chibcha
396
397(Historical) NOT Chibchan (which is a language family).
398
399=item {ny} : Chichewa
400
401eq Nyanja. eq Chinyanja.
402
403=item {zh} : Chinese
404
405Many forms are mutually un-intelligible in spoken media.
406Notable subforms:
407{zh-cn} PRC Chinese;
408{zh-hk} Hong Kong Chinese;
409{zh-mo} Macau Chinese;
410{zh-sg} Singapore Chinese;
411{zh-tw} Taiwan Chinese;
412{zh-guoyu} Mandarin [Putonghua/Guoyu];
413{zh-hakka} Hakka [formerly i-hakka];
414{zh-min} Hokkien;
415{zh-min-nan} Southern Hokkien;
416{zh-wuu} Shanghaiese;
417{zh-xiang} Hunanese;
418{zh-gan} Gan;
419{zh-yue} Cantonese.
420
421=for etc
422{i-hakka} Hakka (old tag)
423
424=item {chn} : Chinook Jargon
425
426eq Chinook Wawa.
427
428=item {chp} : Chipewyan
429
430=item {cho} : Choctaw
431
432=item {cu} : Church Slavic
433
434eq Old Church Slavonic.
435
436=item {chk} : Chuukese
437
438eq Trukese. eq Chuuk. eq Truk. eq Ruk.
439
440=item {cv} : Chuvash
441
442=item {cop} : Coptic
443
444=item {kw} : Cornish
445
446=item {co} : Corsican
447
448eq Corse.
449
450=item {cre} : Cree
451
452NOT Creek!
453
454=item {mus} : Creek
455
456NOT Cree!
457
458=item [{cpe} : English-based Creoles and pidgins (Other)]
459
460=item [{cpf} : French-based Creoles and pidgins (Other)]
461
462=item [{cpp} : Portuguese-based Creoles and pidgins (Other)]
463
464=item [{crp} : Creoles and pidgins (Other)]
465
466=item {hr} : Croatian
467
468eq Croat.
469
470=item [{cus} : Cushitic (Other)]
471
472=item {cs} : Czech
473
474=item {dak} : Dakota
475
476eq Nakota. eq Latoka.
477
478=item {da} : Danish
479
480=item {day} : Dayak
481
482=item {i-default} : Default (Fallthru) Language
483
484Defined in RFC 2277, this is for tagging text
485(which must include English text, and might/should include text
486in other appropriate languages) that is emitted in a context
487where language-negotiation wasn't possible -- in SMTP mail failure
488messages, for example.
489
490=item {del} : Delaware
491
492=item {din} : Dinka
493
494=item {div} : Divehi
495
496=item {doi} : Dogri
497
498NOT Dogrib!
499
500=item {dgr} : Dogrib
501
502NOT Dogri!
503
504=item [{dra} : Dravidian (Other)]
505
506=item {dua} : Duala
507
508=item {nl} : Dutch
509
510eq Netherlander. Notable forms:
511{nl-nl} Netherlands Dutch;
512{nl-be} Belgian Dutch.
513
514=item {dum} : Middle Dutch (ca.1050-1350)
515
516(Historical)
517
518=item {dyu} : Dyula
519
520=item {dz} : Dzongkha
521
522=item {efi} : Efik
523
524=item {egy} : Ancient Egyptian
525
526(Historical)
527
528=item {eka} : Ekajuk
529
530=item {elx} : Elamite
531
532(Historical)
533
534=item {en} : English
535
536Notable forms:
537{en-au} Australian English;
538{en-bz} Belize English;
539{en-ca} Canadian English;
540{en-gb} UK English;
541{en-ie} Irish English;
542{en-jm} Jamaican English;
543{en-nz} New Zealand English;
544{en-ph} Philippine English;
545{en-tt} Trinidad English;
546{en-us} US English;
547{en-za} South African English;
548{en-zw} Zimbabwe English.
549
550=item {enm} : Old English (1100-1500)
551
552(Historical)
553
554=item {ang} : Old English (ca.450-1100)
555
556eq Anglo-Saxon. (Historical)
557
558=item {eo} : Esperanto
559
560(Artificial)
561
562=item {et} : Estonian
563
564=item {ewe} : Ewe
565
566=item {ewo} : Ewondo
567
568=item {fan} : Fang
569
570=item {fat} : Fanti
571
572=item {fo} : Faroese
573
574=item {fj} : Fijian
575
576=item {fi} : Finnish
577
578=item [{fiu} : Finno-Ugrian (Other)]
579
580eq Finno-Ugric. NOT Ugaritic!
581
582=item {fon} : Fon
583
584=item {fr} : French
585
586Notable forms:
587{fr-fr} France French;
588{fr-be} Belgian French;
589{fr-ca} Canadian French;
590{fr-ch} Swiss French;
591{fr-lu} Luxembourg French;
592{fr-mc} Monaco French.
593
594=item {frm} : Middle French (ca.1400-1600)
595
596(Historical)
597
598=item {fro} : Old French (842-ca.1400)
599
600(Historical)
601
602=item {fy} : Frisian
603
604=item {fur} : Friulian
605
606=item {ful} : Fulah
607
608=item {gaa} : Ga
609
610=item {gd} : Scots Gaelic
611
612NOT Scots!
613
614=item {gl} : Gallegan
615
616eq Galician
617
618=item {lug} : Ganda
619
620=item {gay} : Gayo
621
622=item {gba} : Gbaya
623
624=item {gez} : Geez
625
626eq Ge'ez
627
628=item {ka} : Georgian
629
630=item {de} : German
631
632Notable forms:
633{de-at} Austrian German;
634{de-be} Belgian German;
635{de-ch} Swiss German;
636{de-de} Germany German;
637{de-li} Liechtenstein German;
638{de-lu} Luxembourg German.
639
640=item {gmh} : Middle High German (ca.1050-1500)
641
642(Historical)
643
644=item {goh} : Old High German (ca.750-1050)
645
646(Historical)
647
648=item [{gem} : Germanic (Other)]
649
650=item {gil} : Gilbertese
651
652=item {gon} : Gondi
653
654=item {gor} : Gorontalo
655
656=item {got} : Gothic
657
658(Historical)
659
660=item {grb} : Grebo
661
662=item {grc} : Ancient Greek
663
664(Historical) (Until 15th century or so.)
665
666=item {el} : Modern Greek
667
668(Since 15th century or so.)
669
670=item {gn} : Guarani
671
672GuaranE<iacute>
673
674=item {gu} : Gujarati
675
676=item {gwi} : Gwich'in
677
678eq Gwichin
679
680=item {hai} : Haida
681
682=item {ha} : Hausa
683
684=item {haw} : Hawaiian
685
686Hawai'ian
687
688=item {he} : Hebrew
689
690(Formerly "iw".)
691
692=for etc
693{iw} Hebrew (old tag)
694
695=item {hz} : Herero
696
697=item {hil} : Hiligaynon
698
699=item {him} : Himachali
700
701=item {hi} : Hindi
702
703=item {ho} : Hiri Motu
704
705=item {hit} : Hittite
706
707(Historical)
708
709=item {hmn} : Hmong
710
711=item {hu} : Hungarian
712
713=item {hup} : Hupa
714
715=item {iba} : Iban
716
717=item {is} : Icelandic
718
719=item {ibo} : Igbo
720
721=item {ijo} : Ijo
722
723=item {ilo} : Iloko
724
725=item [{inc} : Indic (Other)]
726
727=item [{ine} : Indo-European (Other)]
728
729=item {id} : Indonesian
730
731(Formerly "in".)
732
733=for etc
734{in} Indonesian (old tag)
735
736=item {ia} : Interlingua (International Auxiliary Language Association)
737
738(Artificial) NOT Interlingue!
739
740=item {ie} : Interlingue
741
742(Artificial) NOT Interlingua!
743
744=item {iu} : Inuktitut
745
746A subform of "Eskimo".
747
748=item {ik} : Inupiaq
749
750A subform of "Eskimo".
751
752=item [{ira} : Iranian (Other)]
753
754=item {ga} : Irish
755
756=item {mga} : Middle Irish (900-1200)
757
758(Historical)
759
760=item {sga} : Old Irish (to 900)
761
762(Historical)
763
764=item [{iro} : Iroquoian languages]
765
766=item {it} : Italian
767
768Notable forms:
769{it-it} Italy Italian;
770{it-ch} Swiss Italian.
771
772=item {ja} : Japanese
773
774(NOT "jp"!)
775
776=item {jw} : Javanese
777
778=item {jrb} : Judeo-Arabic
779
780=item {jpr} : Judeo-Persian
781
782=item {kab} : Kabyle
783
784=item {kac} : Kachin
785
786=item {kl} : Kalaallisut
787
788eq Greenlandic "Eskimo"
789
790=item {kam} : Kamba
791
792=item {kn} : Kannada
793
794eq Kanarese. NOT Canadian!
795
796=item {kau} : Kanuri
797
798=item {kaa} : Kara-Kalpak
799
800=item {kar} : Karen
801
802=item {ks} : Kashmiri
803
804=item {kaw} : Kawi
805
806=item {kk} : Kazakh
807
808=item {kha} : Khasi
809
810=item {km} : Khmer
811
812eq Cambodian. eq Kampuchean.
813
814=item [{khi} : Khoisan (Other)]
815
816=item {kho} : Khotanese
817
818=item {ki} : Kikuyu
819
820eq Gikuyu.
821
822=item {kmb} : Kimbundu
823
824=item {rw} : Kinyarwanda
825
826=item {ky} : Kirghiz
827
828=item {i-klingon} : Klingon
829
830=item {kv} : Komi
831
832=item {kon} : Kongo
833
834=item {kok} : Konkani
835
836=item {ko} : Korean
837
838=item {kos} : Kosraean
839
840=item {kpe} : Kpelle
841
842=item {kro} : Kru
843
844=item {kj} : Kuanyama
845
846=item {kum} : Kumyk
847
848=item {ku} : Kurdish
849
850=item {kru} : Kurukh
851
852=item {kut} : Kutenai
853
854=item {lad} : Ladino
855
856eq Judeo-Spanish. NOT Ladin (a minority language in Italy).
857
858=item {lah} : Lahnda
859
860NOT Lamba!
861
862=item {lam} : Lamba
863
864NOT Lahnda!
865
866=item {lo} : Lao
867
868eq Laotian.
869
870=item {la} : Latin
871
872(Historical) NOT Ladin! NOT Ladino!
873
874=item {lv} : Latvian
875
876eq Lettish.
877
878=item {lb} : Letzeburgesch
879
880eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)
881
882=for etc
883{i-lux} Letzeburgesch (old tag)
884
885=item {lez} : Lezghian
886
887=item {ln} : Lingala
888
889=item {lt} : Lithuanian
890
891=item {nds} : Low German
892
893eq Low Saxon. eq Low German. eq Low Saxon.
894
895=item {loz} : Lozi
896
897=item {lub} : Luba-Katanga
898
899=item {lua} : Luba-Lulua
900
901=item {lui} : Luiseno
902
903eq LuiseE<ntilde>o.
904
905=item {lun} : Lunda
906
907=item {luo} : Luo (Kenya and Tanzania)
908
909=item {lus} : Lushai
910
911=item {mk} : Macedonian
912
913eq the modern Slavic language spoken in what was Yugoslavia.
914NOT the form of Greek spoken in Greek Macedonia!
915
916=item {mad} : Madurese
917
918=item {mag} : Magahi
919
920=item {mai} : Maithili
921
922=item {mak} : Makasar
923
924=item {mg} : Malagasy
925
926=item {ms} : Malay
927
928NOT Malayalam!
929
930=item {ml} : Malayalam
931
932NOT Malay!
933
934=item {mt} : Maltese
935
936=item {mnc} : Manchu
937
938=item {mdr} : Mandar
939
940NOT Mandarin!
941
942=item {man} : Mandingo
943
944=item {mni} : Manipuri
945
946eq Meithei.
947
948=item [{mno} : Manobo languages]
949
950=item {gv} : Manx
951
952=item {mi} : Maori
953
954NOT Mari!
955
956=item {mr} : Marathi
957
958=item {chm} : Mari
959
960NOT Maori!
961
962=item {mh} : Marshall
963
964eq Marshallese.
965
966=item {mwr} : Marwari
967
968=item {mas} : Masai
969
970=item [{myn} : Mayan languages]
971
972=item {men} : Mende
973
974=item {mic} : Micmac
975
976=item {min} : Minangkabau
977
978=item {i-mingo} : Mingo
979
980eq the Irquoian language West Virginia Seneca. NOT New York Seneca!
981
982=item [{mis} : Miscellaneous languages]
983
984Don't use this.
985
986=item {moh} : Mohawk
987
988=item {mo} : Moldavian
989
990eq Moldovan.
991
992=item [{mkh} : Mon-Khmer (Other)]
993
994=item {lol} : Mongo
995
996=item {mn} : Mongolian
997
998eq Mongol.
999
1000=item {mos} : Mossi
1001
1002=item [{mul} : Multiple languages]
1003
1004Not for normal use.
1005
1006=item [{mun} : Munda languages]
1007
1008=item {nah} : Nahuatl
1009
1010=item {na} : Nauru
1011
1012=item {nv} : Navajo
1013
1014eq Navaho. (Formerly i-navajo.)
1015
1016=for etc
1017{i-navajo} Navajo (old tag)
1018
1019=item {nd} : North Ndebele
1020
1021=item {nr} : South Ndebele
1022
1023=item {ng} : Ndonga
1024
1025=item {ne} : Nepali
1026
1027eq Nepalese. Notable forms:
1028{ne-np} Nepal Nepali;
1029{ne-in} India Nepali.
1030
1031=item {new} : Newari
1032
1033=item {nia} : Nias
1034
1035=item [{nic} : Niger-Kordofanian (Other)]
1036
1037=item [{ssa} : Nilo-Saharan (Other)]
1038
1039=item {niu} : Niuean
1040
1041=item {non} : Old Norse
1042
1043(Historical)
1044
1045=item [{nai} : North American Indian]
1046
1047Do not use this.
1048
1049=item {se} : Northern Sami
1050
1051eq Lappish. eq Lapp. eq (Northern) Saami.
1052
1053=item {no} : Norwegian
1054
1055Note the two following forms:
1056
1057=item {nb} : Norwegian Bokmal
1058
1059eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)
1060
1061=for etc
1062{no-bok} Norwegian Bokmal (old tag)
1063
1064=item {nn} : Norwegian Nynorsk
1065
1066(A form of Norwegian.) (Formerly no-nyn.)
1067
1068=for etc
1069{no-nyn} Norwegian Nynorsk (old tag)
1070
1071=item [{nub} : Nubian languages]
1072
1073=item {nym} : Nyamwezi
1074
1075=item {nyn} : Nyankole
1076
1077=item {nyo} : Nyoro
1078
1079=item {nzi} : Nzima
1080
1081=item {oc} : Occitan (post 1500)
1082
1083eq ProvenE<ccedil>al, eq Provencal
1084
1085=item {oji} : Ojibwa
1086
1087eq Ojibwe.
1088
1089=item {or} : Oriya
1090
1091=item {om} : Oromo
1092
1093=item {osa} : Osage
1094
1095=item {os} : Ossetian; Ossetic
1096
1097=item [{oto} : Otomian languages]
1098
1099Group of languages collectively called "OtomE<iacute>".
1100
1101=item {pal} : Pahlavi
1102
1103eq Pahlevi
1104
1105=item {i-pwn} : Paiwan
1106
1107eq Pariwan
1108
1109=item {pau} : Palauan
1110
1111=item {pi} : Pali
1112
1113(Historical?)
1114
1115=item {pam} : Pampanga
1116
1117=item {pag} : Pangasinan
1118
1119=item {pa} : Panjabi
1120
1121eq Punjabi
1122
1123=item {pap} : Papiamento
1124
1125eq Papiamentu.
1126
1127=item [{paa} : Papuan (Other)]
1128
1129=item {fa} : Persian
1130
1131eq Farsi. eq Iranian.
1132
1133=item {peo} : Old Persian (ca.600-400 B.C.)
1134
1135=item [{phi} : Philippine (Other)]
1136
1137=item {phn} : Phoenician
1138
1139(Historical)
1140
1141=item {pon} : Pohnpeian
1142
1143NOT Pompeiian!
1144
1145=item {pl} : Polish
1146
1147=item {pt} : Portuguese
1148
1149eq Portugese. Notable forms:
1150{pt-pt} Portugal Portuguese;
1151{pt-br} Brazilian Portuguese.
1152
1153=item [{pra} : Prakrit languages]
1154
1155=item {pro} : Old Provencal (to 1500)
1156
1157eq Old ProvenE<ccedil>al. (Historical.)
1158
1159=item {ps} : Pushto
1160
1161eq Pashto. eq Pushtu.
1162
1163=item {qu} : Quechua
1164
1165eq Quecha.
1166
1167=item {rm} : Raeto-Romance
1168
1169eq Romansh.
1170
1171=item {raj} : Rajasthani
1172
1173=item {rap} : Rapanui
1174
1175=item {rar} : Rarotongan
1176
1177=item [{qaa - qtz} : Reserved for local use.]
1178
1179=item [{roa} : Romance (Other)]
1180
1181NOT Romanian! NOT Romany! NOT Romansh!
1182
1183=item {ro} : Romanian
1184
1185eq Rumanian. NOT Romany!
1186
1187=item {rom} : Romany
1188
1189eq Rom. NOT Romanian!
1190
1191=item {rn} : Rundi
1192
1193=item {ru} : Russian
1194
1195NOT White Russian! NOT Rusyn!
1196
1197=item [{sal} : Salishan languages]
1198
1199Large language group.
1200
1201=item {sam} : Samaritan Aramaic
1202
1203NOT Aramaic!
1204
1205=item [{smi} : Sami languages (Other)]
1206
1207=item {sm} : Samoan
1208
1209=item {sad} : Sandawe
1210
1211=item {sg} : Sango
1212
1213=item {sa} : Sanskrit
1214
1215(Historical)
1216
1217=item {sat} : Santali
1218
1219=item {sc} : Sardinian
1220
1221eq Sard.
1222
1223=item {sas} : Sasak
1224
1225=item {sco} : Scots
1226
1227NOT Scots Gaelic!
1228
1229=item {sel} : Selkup
1230
1231=item [{sem} : Semitic (Other)]
1232
1233=item {sr} : Serbian
1234
1235eq Serb. NOT Sorbian.
1236
1237=item {srr} : Serer
1238
1239=item {shn} : Shan
1240
1241=item {sn} : Shona
1242
1243=item {sid} : Sidamo
1244
1245=item {sgn-...} : Sign Languages
1246
1247Always use with a subtag. Notable forms:
1248{sgn-gb} British Sign Language (BSL);
1249{sgn-ie} Irish Sign Language (ESL);
1250{sgn-ni} Nicaraguan Sign Language (ISN);
1251{sgn-us} American Sign Language (ASL).
1252
1253=item {bla} : Siksika
1254
1255eq Blackfoot. eq Pikanii.
1256
1257=item {sd} : Sindhi
1258
1259=item {si} : Sinhalese
1260
1261eq Sinhala.
1262
1263=item [{sit} : Sino-Tibetan (Other)]
1264
1265=item [{sio} : Siouan languages]
1266
1267=item {den} : Slave (Athapascan)
1268
1269("Slavey" is a subform.)
1270
1271=item [{sla} : Slavic (Other)]
1272
1273=item {sk} : Slovak
1274
1275eq Slovakian.
1276
1277=item {sl} : Slovenian
1278
1279eq Slovene.
1280
1281=item {sog} : Sogdian
1282
1283=item {so} : Somali
1284
1285=item {son} : Songhai
1286
1287=item {snk} : Soninke
1288
1289=item {wen} : Sorbian languages
1290
1291eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian!
1292
1293=item {nso} : Northern Sotho
1294
1295=item {st} : Southern Sotho
1296
1297eq Sutu. eq Sesotho.
1298
1299=item [{sai} : South American Indian (Other)]
1300
1301=item {es} : Spanish
1302
1303Notable forms:
1304{es-ar} Argentine Spanish;
1305{es-bo} Bolivian Spanish;
1306{es-cl} Chilean Spanish;
1307{es-co} Colombian Spanish;
1308{es-do} Dominican Spanish;
1309{es-ec} Ecuadorian Spanish;
1310{es-es} Spain Spanish;
1311{es-gt} Guatemalan Spanish;
1312{es-hn} Honduran Spanish;
1313{es-mx} Mexican Spanish;
1314{es-pa} Panamanian Spanish;
1315{es-pe} Peruvian Spanish;
1316{es-pr} Puerto Rican Spanish;
1317{es-py} Paraguay Spanish;
1318{es-sv} Salvadoran Spanish;
1319{es-us} US Spanish;
1320{es-uy} Uruguayan Spanish;
1321{es-ve} Venezuelan Spanish.
1322
1323=item {suk} : Sukuma
1324
1325=item {sux} : Sumerian
1326
1327(Historical)
1328
1329=item {su} : Sundanese
1330
1331=item {sus} : Susu
1332
1333=item {sw} : Swahili
1334
1335eq Kiswahili
1336
1337=item {ss} : Swati
1338
1339=item {sv} : Swedish
1340
1341Notable forms:
1342{sv-se} Sweden Swedish;
1343{sv-fi} Finland Swedish.
1344
1345=item {syr} : Syriac
1346
1347=item {tl} : Tagalog
1348
1349=item {ty} : Tahitian
1350
1351=item [{tai} : Tai (Other)]
1352
1353NOT Thai!
1354
1355=item {tg} : Tajik
1356
1357=item {tmh} : Tamashek
1358
1359=item {ta} : Tamil
1360
1361=item {i-tao} : Tao
1362
1363eq Yami.
1364
1365=item {tt} : Tatar
1366
1367=item {i-tay} : Tayal
1368
1369eq Atayal. eq Atayan.
1370
1371=item {te} : Telugu
1372
1373=item {ter} : Tereno
1374
1375=item {tet} : Tetum
1376
1377=item {th} : Thai
1378
1379NOT Tai!
1380
1381=item {bo} : Tibetan
1382
1383=item {tig} : Tigre
1384
1385=item {ti} : Tigrinya
1386
1387=item {tem} : Timne
1388
1389eq Themne. eq Timene.
1390
1391=item {tiv} : Tiv
1392
1393=item {tli} : Tlingit
1394
1395=item {tpi} : Tok Pisin
1396
1397=item {tkl} : Tokelau
1398
1399=item {tog} : Tonga (Nyasa)
1400
1401NOT Tsonga!
1402
1403=item {to} : Tonga (Tonga Islands)
1404
1405(Pronounced "Tong-a", not "Tong-ga")
1406
1407NOT Tsonga!
1408
1409=item {tsi} : Tsimshian
1410
1411eq Sm'algyax
1412
1413=item {ts} : Tsonga
1414
1415NOT Tonga!
1416
1417=item {i-tsu} : Tsou
1418
1419=item {tn} : Tswana
1420
1421Same as Setswana.
1422
1423=item {tum} : Tumbuka
1424
1425=item {tr} : Turkish
1426
1427(Typically in Roman script)
1428
1429=item {ota} : Ottoman Turkish (1500-1928)
1430
1431(Typically in Arabic script) (Historical)
1432
1433=item {tk} : Turkmen
1434
1435eq Turkmeni.
1436
1437=item {tvl} : Tuvalu
1438
1439=item {tyv} : Tuvinian
1440
1441eq Tuvan. eq Tuvin.
1442
1443=item {tw} : Twi
1444
1445=item {uga} : Ugaritic
1446
1447NOT Ugric!
1448
1449=item {ug} : Uighur
1450
1451=item {uk} : Ukrainian
1452
1453=item {umb} : Umbundu
1454
1455=item {und} : Undetermined
1456
1457Not a tag for normal use.
1458
1459=item {ur} : Urdu
1460
1461=item {uz} : Uzbek
1462
1463eq E<Ouml>zbek
1464
1465=item {vai} : Vai
1466
1467=item {ven} : Venda
1468
1469NOT Wendish! NOT Wend! NOT Avestan!
1470
1471=item {vi} : Vietnamese
1472
1473eq Viet.
1474
1475=item {vo} : Volapuk
1476
1477eq VolapE<uuml>k. (Artificial)
1478
1479=item {vot} : Votic
1480
1481eq Votian. eq Vod.
1482
1483=item [{wak} : Wakashan languages]
1484
1485=item {wal} : Walamo
1486
1487eq Wolaytta.
1488
1489=item {war} : Waray
1490
1491Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
1492not the smaller Philippine language Waray Sorsogon, nor the extinct
1493Australian language Waray.
1494
1495=item {was} : Washo
1496
1497eq Washoe
1498
1499=item {cy} : Welsh
1500
1501=item {wo} : Wolof
1502
1503=item {x-...} : Unregistered (Semi-Private Use)
1504
1505"x-" is a prefix for language tags that are not registered with ISO
1506or IANA. Example, x-double-dutch
1507
1508=item {xh} : Xhosa
1509
1510=item {sah} : Yakut
1511
1512=item {yao} : Yao
1513
1514(The Yao in Malawi?)
1515
1516=item {yap} : Yapese
1517
1518eq Yap
1519
1520=item {yi} : Yiddish
1521
1522Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script.
1523
1524=for etc
1525{ji} Yiddish (old tag)
1526
1527=item {yo} : Yoruba
1528
1529=item [{ypk} : Yupik languages]
1530
1531Several "Eskimo" languages.
1532
1533=item {znd} : Zande
1534
1535=item [{zap} : Zapotec]
1536
1537(A group of languages.)
1538
1539=item {zen} : Zenaga
1540
1541NOT Zend.
1542
1543=item {za} : Zhuang
1544
1545=item {zu} : Zulu
1546
1547=item {zun} : Zuni
1548
1549eq ZuE<ntilde>i
1550
1551=back
1552
1553=for woohah END
1554
1555=head1 SEE ALSO
1556
1557L<I18N::LangTags|I18N::LangTags> and its "See Also" section.
1558
1559=head1 COPYRIGHT AND DISCLAIMER
1560
1561Copyright (c) 2001,2002 Sean M. Burke. All rights reserved.
1562
1563You can redistribute and/or
1564modify this document under the same terms as Perl itself.
1565
1566This document is provided in the hope that it will be
1567useful, but without any warranty;
1568without even the implied warranty of accuracy, authoritativeness,
1569completeness, merchantability, or fitness for a particular purpose.
1570
1571Email any corrections or questions to me.
1572
1573=head1 AUTHOR
1574
1575Sean M. Burke, sburkeE<64>cpan.org
1576
1577=cut
1578
1579
1580# To generate a list of just the two and three-letter codes:
1581
1582#!/usr/local/bin/perl -w
1583
1584require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
1585 # Sean M. Burke, sburke@cpan.org
1586 # This program is for generating the language_codes.txt file
1587use strict;
1588use LWP::Simple;
1589use HTML::TreeBuilder 3.10;
1590my $root = HTML::TreeBuilder->new();
1591my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
1592$root->parse(get($url) || die "Can't get $url");
1593$root->eof();
1594
1595my @codes;
1596
1597foreach my $tr ($root->find_by_tag_name('tr')) {
1598 my @f = map $_->as_text(), $tr->content_list();
1599 #print map("<$_> ", @f), "\n";
1600 next unless @f == 5;
1601 pop @f; # nix the French name
1602 next if $f[-1] eq 'Language Name (English)'; # it's a header line
1603 my $xx = splice(@f, 2,1); # pull out the two-letter code
1604 $f[-1] =~ s/^\s+//;
1605 $f[-1] =~ s/\s+$//;
1606 if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it
1607 push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ];
1608 } else { # print the three-letter codes.
1609 if($f[0] eq $f[1]) {
1610 push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
1611 } else { # shouldn't happen
1612 push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ];
1613 }
1614 }
1615}
1616
1617print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
1618print "[ based on $url\n at ", scalar(localtime), "]\n",
1619 "[Note: doesn't include IANA-registered codes.]\n";
1620exit;
1621__END__
1622