git.subgeniuskitty.com - OpenSPARC-T2-DV/.git/blame_incremental - tools/perl-5.8.0/lib/5.8.0/I18N/LangTags.pm

... / ...

Commit	Line	Data
	1
	2	# Time-stamp: "2002-02-02 20:43:03 MST"
	3	# Sean M. Burke <sburke@cpan.org>
	4
	5	require 5.000;
	6	package I18N::LangTags;
	7	use strict;
	8	use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION %Panic);
	9	require Exporter;
	10	@ISA = qw(Exporter);
	11	@EXPORT = qw();
	12	@EXPORT_OK = qw(is_language_tag same_language_tag
	13	extract_language_tags super_languages
	14	similarity_language_tag is_dialect_of
	15	locale2language_tag alternate_language_tags
	16	encode_language_tag panic_languages
	17	);
	18	%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
	19
	20	$VERSION = "0.27";
	21
	22	=head1 NAME
	23
	24	I18N::LangTags - functions for dealing with RFC3066-style language tags
	25
	26	=head1 SYNOPSIS
	27
	28	use I18N::LangTags qw(is_language_tag same_language_tag
	29	extract_language_tags super_languages
	30	similarity_language_tag is_dialect_of
	31	locale2language_tag alternate_language_tags
	32	encode_language_tag panic_languages
	33	);
	34
	35	...or whatever of those functions you want to import. Those are
	36	all the exportable functions -- you're free to import only some,
	37	or none at all. By default, none are imported. If you say:
	38
	39	use I18N::LangTags qw(:ALL)
	40
	41	...then all are exported. (This saves you from having to use
	42	something less obvious like C<use I18N::LangTags qw(/./)>.)
	43
	44	If you don't import any of these functions, assume a C<&I18N::LangTags::>
	45	in front of all the function names in the following examples.
	46
	47	=head1 DESCRIPTION
	48
	49	Language tags are a formalism, described in RFC 3066 (obsoleting
	50	1766), for declaring what language form (language and possibly
	51	dialect) a given chunk of information is in.
	52
	53	This library provides functions for common tasks involving language
	54	tags as they are needed in a variety of protocols and applications.
	55
	56	Please see the "See Also" references for a thorough explanation
	57	of how to correctly use language tags.
	58
	59	=over
	60
	61	=cut
	62
	63	###########################################################################
	64
	65	=item * the function is_language_tag($lang1)
	66
	67	Returns true iff $lang1 is a formally valid language tag.
	68
	69	is_language_tag("fr") is TRUE
	70	is_language_tag("x-jicarilla") is FALSE
	71	(Subtags can be 8 chars long at most -- 'jicarilla' is 9)
	72
	73	is_language_tag("sgn-US") is TRUE
	74	(That's American Sign Language)
	75
	76	is_language_tag("i-Klikitat") is TRUE
	77	(True without regard to the fact noone has actually
	78	registered Klikitat -- it's a formally valid tag)
	79
	80	is_language_tag("fr-patois") is TRUE
	81	(Formally valid -- altho descriptively weak!)
	82
	83	is_language_tag("Spanish") is FALSE
	84	is_language_tag("french-patois") is FALSE
	85	(No good -- first subtag has to match
	86	/^([xXiI]\|[a-zA-Z]{2,3})$/ -- see RFC3066)
	87
	88	is_language_tag("x-borg-prot2532") is TRUE
	89	(Yes, subtags can contain digits, as of RFC3066)
	90
	91	=cut
	92
	93	sub is_language_tag {
	94
	95	## Changes in the language tagging standards may have to be reflected here.
	96
	97	my($tag) = lc($_[0]);
	98
	99	return 0 if $tag eq "i" or $tag eq "x";
	100	# Bad degenerate cases that the following
	101	# regexp would erroneously let pass
	102
	103	return $tag =~
	104	/^(?: # First subtag
	105	[xi] \| [a-z]{2,3}
	106	)
	107	(?: # Subtags thereafter
	108	- # separator
	109	[a-z0-9]{1,8} # subtag
	110	)*
	111	$/xs ? 1 : 0;
	112	}
	113
	114	###########################################################################
	115
	116	=item * the function extract_language_tags($whatever)
	117
	118	Returns a list of whatever looks like formally valid language tags
	119	in $whatever. Not very smart, so don't get too creative with
	120	what you want to feed it.
	121
	122	extract_language_tags("fr, fr-ca, i-mingo")
	123	returns: ('fr', 'fr-ca', 'i-mingo')
	124
	125	extract_language_tags("It's like this: I'm in fr -- French!")
	126	returns: ('It', 'in', 'fr')
	127	(So don't just feed it any old thing.)
	128
	129	The output is untainted. If you don't know what tainting is,
	130	don't worry about it.
	131
	132	=cut
	133
	134	sub extract_language_tags {
	135
	136	## Changes in the language tagging standards may have to be reflected here.
	137
	138	my($text) =
	139	$_[0] =~ m/(.+)/ # to make for an untainted result
	140	? $1 : ''
	141	;
	142
	143	return grep(!m/^[ixIX]$/s, # 'i' and 'x' aren't good tags
	144	$text =~
	145	m/
	146	\b
	147	(?: # First subtag
	148	[iIxX] \| [a-zA-Z]{2,3}
	149	)
	150	(?: # Subtags thereafter
	151	- # separator
	152	[a-zA-Z0-9]{1,8} # subtag
	153	)*
	154	\b
	155	/xsg
	156	);
	157	}
	158
	159	###########################################################################
	160
	161	=item * the function same_language_tag($lang1, $lang2)
	162
	163	Returns true iff $lang1 and $lang2 are acceptable variant tags
	164	representing the same language-form.
	165
	166	same_language_tag('x-kadara', 'i-kadara') is TRUE
	167	(The x/i- alternation doesn't matter)
	168	same_language_tag('X-KADARA', 'i-kadara') is TRUE
	169	(...and neither does case)
	170	same_language_tag('en', 'en-US') is FALSE
	171	(all-English is not the SAME as US English)
	172	same_language_tag('x-kadara', 'x-kadar') is FALSE
	173	(these are totally unrelated tags)
	174	same_language_tag('no-bok', 'nb') is TRUE
	175	(no-bok is a legacy tag for nb (Norwegian Bokmal))
	176
	177	C<same_language_tag> works by just seeing whether
	178	C<encode_language_tag($lang1)> is the same as
	179	C<encode_language_tag($lang2)>.
	180
	181	(Yes, I know this function is named a bit oddly. Call it historic
	182	reasons.)
	183
	184	=cut
	185
	186	sub same_language_tag {
	187	my $el1 = &encode_language_tag($_[0]);
	188	return 0 unless defined $el1;
	189	# this avoids the problem of
	190	# encode_language_tag($lang1) eq and encode_language_tag($lang2)
	191	# being true if $lang1 and $lang2 are both undef
	192
	193	return $el1 eq &encode_language_tag($_[1]) ? 1 : 0;
	194	}
	195
	196	###########################################################################
	197
	198	=item * the function similarity_language_tag($lang1, $lang2)
	199
	200	Returns an integer representing the degree of similarity between
	201	tags $lang1 and $lang2 (the order of which does not matter), where
	202	similarity is the number of common elements on the left,
	203	without regard to case and to x/i- alternation.
	204
	205	similarity_language_tag('fr', 'fr-ca') is 1
	206	(one element in common)
	207	similarity_language_tag('fr-ca', 'fr-FR') is 1
	208	(one element in common)
	209
	210	similarity_language_tag('fr-CA-joual',
	211	'fr-CA-PEI') is 2
	212	similarity_language_tag('fr-CA-joual', 'fr-CA') is 2
	213	(two elements in common)
	214
	215	similarity_language_tag('x-kadara', 'i-kadara') is 1
	216	(x/i- doesn't matter)
	217
	218	similarity_language_tag('en', 'x-kadar') is 0
	219	similarity_language_tag('x-kadara', 'x-kadar') is 0
	220	(unrelated tags -- no similarity)
	221
	222	similarity_language_tag('i-cree-syllabic',
	223	'i-cherokee-syllabic') is 0
	224	(no B<leftmost> elements in common!)
	225
	226	=cut
	227
	228	sub similarity_language_tag {
	229	my $lang1 = &encode_language_tag($_[0]);
	230	my $lang2 = &encode_language_tag($_[1]);
	231	# And encode_language_tag takes care of the whole
	232	# no-nyn==nn, i-hakka==zh-hakka, etc, things
	233
	234	# NB: (i-sil-...)? (i-sgn-...)?
	235
	236	return undef if !defined($lang1) and !defined($lang2);
	237	return 0 if !defined($lang1) or !defined($lang2);
	238
	239	my @l1_subtags = split('-', $lang1);
	240	my @l2_subtags = split('-', $lang2);
	241	my $similarity = 0;
	242
	243	while(@l1_subtags and @l2_subtags) {
	244	if(shift(@l1_subtags) eq shift(@l2_subtags)) {
	245	++$similarity;
	246	} else {
	247	last;
	248	}
	249	}
	250	return $similarity;
	251	}
	252
	253	###########################################################################
	254
	255	=item * the function is_dialect_of($lang1, $lang2)
	256
	257	Returns true iff language tag $lang1 represents a subform of
	258	language tag $lang2.
	259
	260	B<Get the order right! It doesn't work the other way around!>
	261
	262	is_dialect_of('en-US', 'en') is TRUE
	263	(American English IS a dialect of all-English)
	264
	265	is_dialect_of('fr-CA-joual', 'fr-CA') is TRUE
	266	is_dialect_of('fr-CA-joual', 'fr') is TRUE
	267	(Joual is a dialect of (a dialect of) French)
	268
	269	is_dialect_of('en', 'en-US') is FALSE
	270	(all-English is a NOT dialect of American English)
	271
	272	is_dialect_of('fr', 'en-CA') is FALSE
	273
	274	is_dialect_of('en', 'en' ) is TRUE
	275	is_dialect_of('en-US', 'en-US') is TRUE
	276	(B<Note:> these are degenerate cases)
	277
	278	is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE
	279	(the x/i thing doesn't matter, nor does case)
	280
	281	is_dialect_of('nn', 'no') is TRUE
	282	(because 'nn' (New Norse) is aliased to 'no-nyn',
	283	as a special legacy case, and 'no-nyn' is a
	284	subform of 'no' (Norwegian))
	285
	286	=cut
	287
	288	sub is_dialect_of {
	289
	290	my $lang1 = &encode_language_tag($_[0]);
	291	my $lang2 = &encode_language_tag($_[1]);
	292
	293	return undef if !defined($lang1) and !defined($lang2);
	294	return 0 if !defined($lang1) or !defined($lang2);
	295
	296	return 1 if $lang1 eq $lang2;
	297	return 0 if length($lang1) < length($lang2);
	298
	299	$lang1 .= '-';
	300	$lang2 .= '-';
	301	return
	302	(substr($lang1, 0, length($lang2)) eq $lang2) ? 1 : 0;
	303	}
	304
	305	###########################################################################
	306
	307	=item * the function super_languages($lang1)
	308
	309	Returns a list of language tags that are superordinate tags to $lang1
	310	-- it gets this by removing subtags from the end of $lang1 until
	311	nothing (or just "i" or "x") is left.
	312
	313	super_languages("fr-CA-joual") is ("fr-CA", "fr")
	314
	315	super_languages("en-AU") is ("en")
	316
	317	super_languages("en") is empty-list, ()
	318
	319	super_languages("i-cherokee") is empty-list, ()
	320	...not ("i"), which would be illegal as well as pointless.
	321
	322	If $lang1 is not a valid language tag, returns empty-list in
	323	a list context, undef in a scalar context.
	324
	325	A notable and rather unavoidable problem with this method:
	326	"x-mingo-tom" has an "x" because the whole tag isn't an
	327	IANA-registered tag -- but super_languages('x-mingo-tom') is
	328	('x-mingo') -- which isn't really right, since 'i-mingo' is
	329	registered. But this module has no way of knowing that. (But note
	330	that same_language_tag('x-mingo', 'i-mingo') is TRUE.)
	331
	332	More importantly, you assume I<at your peril> that superordinates of
	333	$lang1 are mutually intelligible with $lang1. Consider this
	334	carefully.
	335
	336	=cut
	337
	338	sub super_languages {
	339	my $lang1 = $_[0];
	340	return() unless defined($lang1) && &is_language_tag($lang1);
	341
	342	# a hack for those annoying new (2001) tags:
	343	$lang1 =~ s/^nb\b/no-bok/i; # yes, backwards
	344	$lang1 =~ s/^nn\b/no-nyn/i; # yes, backwards
	345	$lang1 =~ s/^[ix](-hakka\b)/zh$1/i; # goes the right way
	346	# i-hakka-bork-bjork-bjark => zh-hakka-bork-bjork-bjark
	347
	348	my @l1_subtags = split('-', $lang1);
	349
	350	## Changes in the language tagging standards may have to be reflected here.
	351
	352	# NB: (i-sil-...)?
	353
	354	my @supers = ();
	355	foreach my $bit (@l1_subtags) {
	356	push @supers,
	357	scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit;
	358	}
	359	pop @supers if @supers;
	360	shift @supers if @supers && $supers[0] =~ m<^[iIxX]$>s;
	361	return reverse @supers;
	362	}
	363
	364	###########################################################################
	365
	366	=item * the function locale2language_tag($locale_identifier)
	367
	368	This takes a locale name (like "en", "en_US", or "en_US.ISO8859-1")
	369	and maps it to a language tag. If it's not mappable (as with,
	370	notably, "C" and "POSIX"), this returns empty-list in a list context,
	371	or undef in a scalar context.
	372
	373	locale2language_tag("en") is "en"
	374
	375	locale2language_tag("en_US") is "en-US"
	376
	377	locale2language_tag("en_US.ISO8859-1") is "en-US"
	378
	379	locale2language_tag("C") is undef or ()
	380
	381	locale2language_tag("POSIX") is undef or ()
	382
	383	locale2language_tag("POSIX") is undef or ()
	384
	385	I'm not totally sure that locale names map satisfactorily to language
	386	tags. Think REAL hard about how you use this. YOU HAVE BEEN WARNED.
	387
	388	The output is untainted. If you don't know what tainting is,
	389	don't worry about it.
	390
	391	=cut
	392
	393	sub locale2language_tag {
	394	my $lang =
	395	$_[0] =~ m/(.+)/ # to make for an untainted result
	396	? $1 : ''
	397	;
	398
	399	return $lang if &is_language_tag($lang); # like "en"
	400
	401	$lang =~ tr<_><->; # "en_US" -> en-US
	402	$lang =~ s<\.[-_a-zA-Z0-9\.]*><>s; # "en_US.ISO8859-1" -> en-US
	403
	404	return $lang if &is_language_tag($lang);
	405
	406	return;
	407	}
	408
	409	###########################################################################
	410
	411	=item * the function encode_language_tag($lang1)
	412
	413	This function, if given a language tag, returns an encoding of it such
	414	that:
	415
	416	* tags representing different languages never get the same encoding.
	417
	418	* tags representing the same language always get the same encoding.
	419
	420	* an encoding of a formally valid language tag always is a string
	421	value that is defined, has length, and is true if considered as a
	422	boolean.
	423
	424	Note that the encoding itself is B<not> a formally valid language tag.
	425	Note also that you cannot, currently, go from an encoding back to a
	426	language tag that it's an encoding of.
	427
	428	Note also that you B<must> consider the encoded value as atomic; i.e.,
	429	you should not consider it as anything but an opaque, unanalysable
	430	string value. (The internals of the encoding method may change in
	431	future versions, as the language tagging standard changes over time.)
	432
	433	C<encode_language_tag> returns undef if given anything other than a
	434	formally valid language tag.
	435
	436	The reason C<encode_language_tag> exists is because different language
	437	tags may represent the same language; this is normally treatable with
	438	C<same_language_tag>, but consider this situation:
	439
	440	You have a data file that expresses greetings in different languages.
	441	Its format is "[language tag]=[how to say 'Hello']", like:
	442
	443	en-US=Hiho
	444	fr=Bonjour
	445	i-mingo=Hau'
	446
	447	And suppose you write a program that reads that file and then runs as
	448	a daemon, answering client requests that specify a language tag and
	449	then expect the string that says how to greet in that language. So an
	450	interaction looks like:
	451
	452	greeting-client asks: fr
	453	greeting-server answers: Bonjour
	454
	455	So far so good. But suppose the way you're implementing this is:
	456
	457	my %greetings;
	458	die unless open(IN, "<in.dat");
	459	while(<IN>) {
	460	chomp;
	461	next unless /^([^=]+)=(.+)/s;
	462	my($lang, $expr) = ($1, $2);
	463	$greetings{$lang} = $expr;
	464	}
	465	close(IN);
	466
	467	at which point %greetings has the contents:
	468
	469	"en-US" => "Hiho"
	470	"fr" => "Bonjour"
	471	"i-mingo" => "Hau'"
	472
	473	And suppose then that you answer client requests for language $wanted
	474	by just looking up $greetings{$wanted}.
	475
	476	If the client asks for "fr", that will look up successfully in
	477	%greetings, to the value "Bonjour". And if the client asks for
	478	"i-mingo", that will look up successfully in %greetings, to the value
	479	"Hau'".
	480
	481	But if the client asks for "i-Mingo" or "x-mingo", or "Fr", then the
	482	lookup in %greetings fails. That's the Wrong Thing.
	483
	484	You could instead do lookups on $wanted with:
	485
	486	use I18N::LangTags qw(same_language_tag);
	487	my $repsonse = '';
	488	foreach my $l2 (keys %greetings) {
	489	if(same_language_tag($wanted, $l2)) {
	490	$response = $greetings{$l2};
	491	last;
	492	}
	493	}
	494
	495	But that's rather inefficient. A better way to do it is to start your
	496	program with:
	497
	498	use I18N::LangTags qw(encode_language_tag);
	499	my %greetings;
	500	die unless open(IN, "<in.dat");
	501	while(<IN>) {
	502	chomp;
	503	next unless /^([^=]+)=(.+)/s;
	504	my($lang, $expr) = ($1, $2);
	505	$greetings{
	506	encode_language_tag($lang)
	507	} = $expr;
	508	}
	509	close(IN);
	510
	511	and then just answer client requests for language $wanted by just
	512	looking up
	513
	514	$greetings{encode_language_tag($wanted)}
	515
	516	And that does the Right Thing.
	517
	518	=cut
	519
	520	sub encode_language_tag {
	521	# Only similarity_language_tag() is allowed to analyse encodings!
	522
	523	## Changes in the language tagging standards may have to be reflected here.
	524
	525	my($tag) = $_[0] \|\| return undef;
	526	return undef unless &is_language_tag($tag);
	527
	528	# For the moment, these legacy variances are few enough that
	529	# we can just handle them here with regexps.
	530	$tag =~ s/^iw\b/he/i; # Hebrew
	531	$tag =~ s/^in\b/id/i; # Indonesian
	532	$tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger
	533	$tag =~ s/^[ix]-navajo\b/nv/i; # Navajo
	534	$tag =~ s/^ji\b/yi/i; # Yiddish
	535	#
	536	# These go FROM the simplex to complex form, to get
	537	# similarity-comparison right. And that's okay, since
	538	# similarity_language_tag is the only thing that
	539	# analyzes our output.
	540	$tag =~ s/^[ix]-hakka\b/zh-hakka/i; # Hakka
	541	$tag =~ s/^nb\b/no-bok/i; # BACKWARDS for Bokmal
	542	$tag =~ s/^nn\b/no-nyn/i; # BACKWARDS for Nynorsk
	543
	544	$tag =~ s/^[xiXI]-//s;
	545	# Just lop off any leading "x/i-"
	546
	547	return "~" . uc($tag);
	548	}
	549
	550	#--------------------------------------------------------------------------
	551
	552	=item * the function alternate_language_tags($lang1)
	553
	554	This function, if given a language tag, returns all language tags that
	555	are alternate forms of this language tag. (I.e., tags which refer to
	556	the same language.) This is meant to handle legacy tags caused by
	557	the minor changes in language tag standards over the years; and
	558	the x-/i- alternation is also dealt with.
	559
	560	Note that this function does I<not> try to equate new (and never-used,
	561	and unusable)
	562	ISO639-2 three-letter tags to old (and still in use) ISO639-1
	563	two-letter equivalents -- like "ara" -> "ar" -- because
	564	"ara" has I<never> been in use as an Internet language tag,
	565	and RFC 3066 stipulates that it never should be, since a shorter
	566	tag ("ar") exists.
	567
	568	Examples:
	569
	570	alternate_language_tags('no-bok') is ('nb')
	571	alternate_language_tags('nb') is ('no-bok')
	572	alternate_language_tags('he') is ('iw')
	573	alternate_language_tags('iw') is ('he')
	574	alternate_language_tags('i-hakka') is ('zh-hakka', 'x-hakka')
	575	alternate_language_tags('zh-hakka') is ('i-hakka', 'x-hakka')
	576	alternate_language_tags('en') is ()
	577	alternate_language_tags('x-mingo-tom') is ('i-mingo-tom')
	578	alternate_language_tags('x-klikitat') is ('i-klikitat')
	579	alternate_language_tags('i-klikitat') is ('x-klikitat')
	580
	581	This function returns empty-list if given anything other than a formally
	582	valid language tag.
	583
	584	=cut
	585
	586	my %alt = qw( i x x i I X X I );
	587	sub alternate_language_tags {
	588	my $tag = $_[0];
	589	return() unless &is_language_tag($tag);
	590
	591	my @em; # push 'em real goood!
	592
	593	# For the moment, these legacy variances are few enough that
	594	# we can just handle them here with regexps.
	595
	596	if( $tag =~ m/^[ix]-hakka\b(.*)/i) {push @em, "zh-hakka$1";
	597	} elsif($tag =~ m/^zh-hakka\b(.*)/i) { push @em, "x-hakka$1", "i-hakka$1";
	598
	599	} elsif($tag =~ m/^he\b(.*)/i) { push @em, "iw$1";
	600	} elsif($tag =~ m/^iw\b(.*)/i) { push @em, "he$1";
	601
	602	} elsif($tag =~ m/^in\b(.*)/i) { push @em, "id$1";
	603	} elsif($tag =~ m/^id\b(.*)/i) { push @em, "in$1";
	604
	605	} elsif($tag =~ m/^[ix]-lux\b(.*)/i) { push @em, "lb$1";
	606	} elsif($tag =~ m/^lb\b(.*)/i) { push @em, "i-lux$1", "x-lux$1";
	607
	608	} elsif($tag =~ m/^[ix]-navajo\b(.*)/i) { push @em, "nv$1";
	609	} elsif($tag =~ m/^nv\b(.*)/i) { push @em, "i-navajo$1", "x-navajo$1";
	610
	611	} elsif($tag =~ m/^yi\b(.*)/i) { push @em, "ji$1";
	612	} elsif($tag =~ m/^ji\b(.*)/i) { push @em, "yi$1";
	613
	614	} elsif($tag =~ m/^nb\b(.*)/i) { push @em, "no-bok$1";
	615	} elsif($tag =~ m/^no-bok\b(.*)/i) { push @em, "nb$1";
	616
	617	} elsif($tag =~ m/^nn\b(.*)/i) { push @em, "no-nyn$1";
	618	} elsif($tag =~ m/^no-nyn\b(.*)/i) { push @em, "nn$1";
	619	}
	620
	621	push @em, $alt{$1} . $2 if $tag =~ /^([XIxi])(-.+)/;
	622	return @em;
	623	}
	624
	625	###########################################################################
	626
	627	{
	628	# Init %Panic...
	629
	630	my @panic = ( # MUST all be lowercase!
	631	# Only large ("national") languages make it in this list.
	632	# If you, as a user, are so bizarre that the /only/ language
	633	# you claim to accept is Galician, then no, we won't do you
	634	# the favor of providing Catalan as a panic-fallback for
	635	# you. Because if I start trying to add "little languages" in
	636	# here, I'll just go crazy.
	637
	638	# Scandinavian lgs. All based on opinion and hearsay.
	639	'sv' => [qw(nb no da nn)],
	640	'da' => [qw(nb no sv nn)], # I guess
	641	[qw(no nn nb)], [qw(no nn nb sv da)],
	642	'is' => [qw(da sv no nb nn)],
	643	'fo' => [qw(da is no nb nn sv)], # I guess
	644
	645	# I think this is about the extent of tolerable intelligibility
	646	# among large modern Romance languages.
	647	'pt' => [qw(es ca it fr)], # Portuguese, Spanish, Catalan, Italian, French
	648	'ca' => [qw(es pt it fr)],
	649	'es' => [qw(ca it fr pt)],
	650	'it' => [qw(es fr ca pt)],
	651	'fr' => [qw(es it ca pt)],
	652
	653	# Also assume that speakers of the main Indian languages prefer
	654	# to read/hear Hindi over English
	655	[qw(
	656	as bn gu kn ks kok ml mni mr ne or pa sa sd te ta ur
	657	)] => 'hi',
	658	# Assamese, Bengali, Gujarati, [Hindi,] Kannada (Kanarese), Kashmiri,
	659	# Konkani, Malayalam, Meithei (Manipuri), Marathi, Nepali, Oriya,
	660	# Punjabi, Sanskrit, Sindhi, Telugu, Tamil, and Urdu.
	661	'hi' => [qw(bn pa as or)],
	662	# I welcome finer data for the other Indian languages.
	663	# E.g., what should Oriya's list be, besides just Hindi?
	664
	665	# And the panic languages for English is, of course, nil!
	666
	667	# My guesses at Slavic intelligibility:
	668	([qw(ru be uk)]) x 2, # Russian, Belarusian, Ukranian
	669	'sr' => 'hr', 'hr' => 'sr', # Serb + Croat
	670	'cs' => 'sk', 'sk' => 'cs', # Czech + Slovak
	671
	672	'ms' => 'id', 'id' => 'ms', # Malay + Indonesian
	673
	674	'et' => 'fi', 'fi' => 'et', # Estonian + Finnish
	675
	676	#?? 'lo' => 'th', 'th' => 'lo', # Lao + Thai
	677
	678	);
	679	my($k,$v);
	680	while(@panic) {
	681	($k,$v) = splice(@panic,0,2);
	682	foreach my $k (ref($k) ? @$k : $k) {
	683	foreach my $v (ref($v) ? @$v : $v) {
	684	push @{$Panic{$k} \|\|= []}, $v unless $k eq $v;
	685	}
	686	}
	687	}
	688	}
	689
	690	=item * the function @langs = panic_languages(@accept_languages)
	691
	692	This function takes a list of 0 or more language
	693	tags that constitute a given user's Accept-Language list, and
	694	returns a list of tags for I<other> (non-super)
	695	languages that are probably acceptable to the user, to be
	696	used I<if all else fails>.
	697
	698	For example, if a user accepts only 'ca' (Catalan) and
	699	'es' (Spanish), and the documents/interfaces you have
	700	available are just in German, Italian, and Chinese, then
	701	the user will most likely want the Italian one (and not
	702	the Chinese or German one!), instead of getting
	703	nothing. So C<panic_languages('ca', 'es')> returns
	704	a list containing 'it' (Italian).
	705
	706	English ('en') is I<always> in the return list, but
	707	whether it's at the very end or not depends
	708	on the input languages. This function works by consulting
	709	an internal table that stipulates what common
	710	languages are "close" to each other.
	711
	712	A useful construct you might consider using is:
	713
	714	@fallbacks = super_languages(@accept_languages);
	715	push @fallbacks, panic_languages(
	716	@accept_languages, @fallbacks,
	717	);
	718
	719	=cut
	720
	721	sub panic_languages {
	722	# When in panic or in doubt, run in circles, scream, and shout!
	723	my(@out, %seen);
	724	foreach my $t (@_) {
	725	next unless $t;
	726	next if $seen{$t}++; # so we don't return it or hit it again
	727	# push @out, super_languages($t); # nah, keep that separate
	728	push @out, @{ $Panic{lc $t} \|\| next };
	729	}
	730	return grep !$seen{$_}++, @out, 'en';
	731	}
	732
	733	###########################################################################
	734	1;
	735	__END__
	736
	737	=back
	738
	739	=head1 ABOUT LOWERCASING
	740
	741	I've considered making all the above functions that output language
	742	tags return all those tags strictly in lowercase. Having all your
	743	language tags in lowercase does make some things easier. But you
	744	might as well just lowercase as you like, or call
	745	C<encode_language_tag($lang1)> where appropriate.
	746
	747	=head1 ABOUT UNICODE PLAINTEXT LANGUAGE TAGS
	748
	749	In some future version of I18N::LangTags, I plan to include support
	750	for RFC2482-style language tags -- which are basically just normal
	751	language tags with their ASCII characters shifted into Plane 14.
	752
	753	=head1 SEE ALSO
	754
	755	* L<I18N::LangTags::List\|I18N::LangTags::List>
	756
	757	* RFC 3066, C<ftp://ftp.isi.edu/in-notes/rfc3066.txt>, "Tags for the
	758	Identification of Languages". (Obsoletes RFC 1766)
	759
	760	* RFC 2277, C<ftp://ftp.isi.edu/in-notes/rfc2277.txt>, "IETF Policy on
	761	Character Sets and Languages".
	762
	763	* RFC 2231, C<ftp://ftp.isi.edu/in-notes/rfc2231.txt>, "MIME Parameter
	764	Value and Encoded Word Extensions: Character Sets, Languages, and
	765	Continuations".
	766
	767	* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>,
	768	"Language Tagging in Unicode Plain Text".
	769
	770	* Locale::Codes, in
	771	C<http://www.perl.com/CPAN/modules/by-module/Locale/>
	772
	773	* ISO 639, "Code for the representation of names of languages",
	774	C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html>
	775
	776	* ISO 639-2, "Codes for the representation of names of languages",
	777	including three-letter codes,
	778	C<http://lcweb.loc.gov/standards/iso639-2/bibcodes.html>
	779
	780	* The IANA list of registered languages (hopefully up-to-date),
	781	C<ftp://ftp.isi.edu/in-notes/iana/assignments/languages/>
	782
	783	=head1 COPYRIGHT
	784
	785	Copyright (c) 1998-2001 Sean M. Burke. All rights reserved.
	786
	787	This library is free software; you can redistribute it and/or
	788	modify it under the same terms as Perl itself.
	789
	790	The programs and documentation in this dist are distributed in
	791	the hope that they will be useful, but without any warranty; without
	792	even the implied warranty of merchantability or fitness for a
	793	particular purpose.
	794
	795	=head1 AUTHOR
	796
	797	Sean M. Burke C<sburke@cpan.org>
	798
	799	=cut
	800