git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame_incremental - sam-t2/devtools/v8plus/man/man3/Encode::Supported.3

... / ...

Commit	Line	Data
	1	.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
	2	.\"
	3	.\" Standard preamble:
	4	.\" ========================================================================
	5	.de Sh \" Subsection heading
	6	.br
	7	.if t .Sp
	8	.ne 5
	9	.PP
	10	\fB\\$1\fR
	11	.PP
	12	..
	13	.de Sp \" Vertical space (when we can't use .PP)
	14	.if t .sp .5v
	15	.if n .sp
	16	..
	17	.de Vb \" Begin verbatim text
	18	.ft CW
	19	.nf
	20	.ne \\$1
	21	..
	22	.de Ve \" End verbatim text
	23	.ft R
	24	.fi
	25	..
	26	.\" Set up some character translations and predefined strings. \*(-- will
	27	.\" give an unbreakable dash, \(PI will give pi, \(L" will give a left
	28	.\" double quote, and \*(R" will give a right double quote. \| will give a
	29	.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
	30	.\" do unbreakable dashes and therefore won't be available. \(C` and \(C'
	31	.\" expand to `' in nroff, nothing in troff, for use with C<>.
	32	.tr \(W-\|\(bv\(Tr
	33	.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
	34	.ie n \{\
	35	. ds -- \(*W-
	36	. ds PI pi
	37	. if (\n(.H=4u)&(1m=24u) .ds -- \(W\h'-12u'\(W\h'-12u'-\" diablo 10 pitch
	38	. if (\n(.H=4u)&(1m=20u) .ds -- \(W\h'-12u'\(W\h'-8u'-\" diablo 12 pitch
	39	. ds L" ""
	40	. ds R" ""
	41	. ds C` ""
	42	. ds C' ""
	43	'br\}
	44	.el\{\
	45	. ds -- \\|\(em\\|
	46	. ds PI \(*p
	47	. ds L" ``
	48	. ds R" ''
	49	'br\}
	50	.\"
	51	.\" If the F register is turned on, we'll generate index entries on stderr for
	52	.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
	53	.\" entries marked with X<> in POD. Of course, you'll have to process the
	54	.\" output yourself in some meaningful fashion.
	55	.if \nF \{\
	56	. de IX
	57	. tm Index:\\$1\t\\n%\t"\\$2"
	58	..
	59	. nr % 0
	60	. rr F
	61	.\}
	62	.\"
	63	.\" For nroff, turn off justification. Always turn off hyphenation; it makes
	64	.\" way too many mistakes in technical documents.
	65	.hy 0
	66	.if n .na
	67	.\"
	68	.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
	69	.\" Fear. Run. Save yourself. No user-serviceable parts.
	70	. \" fudge factors for nroff and troff
	71	.if n \{\
	72	. ds #H 0
	73	. ds #V .8m
	74	. ds #F .3m
	75	. ds #[ \f1
	76	. ds #] \fP
	77	.\}
	78	.if t \{\
	79	. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
	80	. ds #V .6m
	81	. ds #F 0
	82	. ds #[ \&
	83	. ds #] \&
	84	.\}
	85	. \" simple accents for nroff and troff
	86	.if n \{\
	87	. ds ' \&
	88	. ds ` \&
	89	. ds ^ \&
	90	. ds , \&
	91	. ds ~ ~
	92	. ds /
	93	.\}
	94	.if t \{\
	95	. ds ' \\k:\h'-(\\n(.wu8/10-\(#H)'\'\h"\|\\n:u"
	96	. ds ` \\k:\h'-(\\n(.wu8/10-\(#H)'\`\h'\|\\n:u'
	97	. ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'^\h'\|\\n:u'
	98	. ds , \\k:\h'-(\\n(.wu*8/10)',\h'\|\\n:u'
	99	. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'\|\\n:u'
	100	. ds / \\k:\h'-(\\n(.wu8/10-\(#H)'\z\(sl\h'\|\\n:u'
	101	.\}
	102	. \" troff and (daisy-wheel) nroff accents
	103	.ds : \\k:\h'-(\\n(.wu8/10-\(#H+.1m+\(#F)'\v'-\(#V'\z.\h'.2m+\(#F'.\h'\|\\n:u'\v'\(#V'
	104	.ds 8 \h'\(#H'\(b\h'-\*(#H'
	105	.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\(#H)/2u'\v'-.3n'\(#[\z\(de\v'.3n'\h'\|\\n:u'\*(#]
	106	.ds d- \h'\(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\(#H'
	107	.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'\|\\n:u'
	108	.ds th \(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u2/3)'\s-1o\s+1\*(#]
	109	.ds Th \(#[\s+2I\s-2\h'-\w'I'u3/5'\v'-.3m'o\v'.3m'\*(#]
	110	.ds ae a\h'-(\w'a'u*4/10)'e
	111	.ds Ae A\h'-(\w'A'u*4/10)'E
	112	. \" corrections for vroff
	113	.if v .ds ~ \\k:\h'-(\\n(.wu9/10-\(#H)'\s-2\u~\d\s+2\h'\|\\n:u'
	114	.if v .ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'\v'-.4m'^\v'.4m'\h'\|\\n:u'
	115	. \" for low resolution devices (crt and lpr)
	116	.if \n(.H>23 .if \n(.V>19 \
	117	\{\
	118	. ds : e
	119	. ds 8 ss
	120	. ds o a
	121	. ds d- d\h'-1'\(ga
	122	. ds D- D\h'-1'\(hy
	123	. ds th \o'bp'
	124	. ds Th \o'LP'
	125	. ds ae ae
	126	. ds Ae AE
	127	.\}
	128	.rm #[ #] #H #V #F C
	129	.\" ========================================================================
	130	.\"
	131	.IX Title "Encode::Supported 3"
	132	.TH Encode::Supported 3 "2001-09-21" "perl v5.8.8" "Perl Programmers Reference Guide"
	133	.SH "NAME"
	134	Encode::Supported \-\- Encodings supported by Encode
	135	.SH "DESCRIPTION"
	136	.IX Header "DESCRIPTION"
	137	.Sh "Encoding Names"
	138	.IX Subsection "Encoding Names"
	139	Encoding names are case insensitive. White space in names
	140	is ignored. In addition, an encoding may have aliases.
	141	Each encoding has one \(L"canonical\(R" name. The \(L"canonical\(R"
	142	name is chosen from the names of the encoding by picking
	143	the first in the following sequence (with a few exceptions).
	144	.IP "\(bu" 4
	145	The name used by the Perl community. That includes 'utf8' and 'ascii'.
	146	Unlike aliases, canonical names directly reach the method so such
	147	frequently used words like 'utf8' don't need to do alias lookups.
	148	.IP "\(bu" 4
	149	The \s-1MIME\s0 name as defined in \s-1IETF\s0 RFCs. This includes all \(L"iso\-\(R"s.
	150	.IP "\(bu" 4
	151	The name in the \s-1IANA\s0 registry.
	152	.IP "\(bu" 4
	153	The name used by the organization that defined it.
	154	.PP
	155	In case \fIde jure\fR canonical names differ from that of the Encode
	156	module, they are always aliased if it ever be implemented. So you can
	157	safely tell if a given encoding is implemented or not just by passing
	158	the canonical name.
	159	.PP
	160	Because of all the alias issues, and because in the general case
	161	encodings have state, \(L"Encode\(R" uses an encoding object internally
	162	once an operation is in progress.
	163	.SH "Supported Encodings"
	164	.IX Header "Supported Encodings"
	165	As of Perl 5.8.0, at least the following encodings are recognized.
	166	Note that unless otherwise specified, they are all case insensitive
	167	(via alias) and all occurrence of spaces are replaced with '\-'.
	168	In other words, \(L"\s-1ISO\s0 8859 1\(R" and \(L"iso\-8859\-1\(R" are identical.
	169	.PP
	170	Encodings are categorized and implemented in several different modules
	171	but you don't have to \f(CW\(C`use Encode::XX\(C'\fR to make them available for
	172	most cases. Encode.pm will automatically load those modules on demand.
	173	.Sh "Built-in Encodings"
	174	.IX Subsection "Built-in Encodings"
	175	The following encodings are always available.
	176	.PP
	177	.Vb 8
	178	\& Canonical Aliases Comments & References
	179	\& ----------------------------------------------------------------
	180	\& ascii US-ascii ISO-646-US [ECMA]
	181	\& ascii-ctrl Special Encoding
	182	\& iso-8859-1 latin1 [ISO]
	183	\& null Special Encoding
	184	\& utf8 UTF-8 [RFC2279]
	185	\& ----------------------------------------------------------------
	186	.Ve
	187	.PP
	188	\&\fInull\fR and \fIascii-ctrl\fR are special. \(L"null\(R" fails for all character
	189	so when you set fallback mode to \s-1PERLQQ\s0, \s-1HTMLCREF\s0 or \s-1XMLCREF\s0, \s-1ALL\s0
	190	\&\s-1CHARACTERS\s0 will fall back to character references. Ditto for
	191	\&\(L"ascii\-ctrl\(R" except for control characters. For fallback modes, see
	192	Encode.
	193	.Sh "Encode::Unicode \*(-- other Unicode encodings"
	194	.IX Subsection "Encode::Unicode other Unicode encodings"
	195	Unicode coding schemes other than native utf8 are supported by
	196	Encode::Unicode, which will be autoloaded on demand.
	197	.PP
	198	.Vb 11
	199	\& ----------------------------------------------------------------
	200	\& UCS-2BE UCS-2, iso-10646-1 [IANA, UC]
	201	\& UCS-2LE [UC]
	202	\& UTF-16 [UC]
	203	\& UTF-16BE [UC]
	204	\& UTF-16LE [UC]
	205	\& UTF-32 [UC]
	206	\& UTF-32BE UCS-4 [UC]
	207	\& UTF-32LE [UC]
	208	\& UTF-7 [RFC2152]
	209	\& ----------------------------------------------------------------
	210	.Ve
	211	.PP
	212	To find how (UCS\-2\|UTF\-(16\|32))(LE\|BE)? differ from one another,
	213	see Encode::Unicode.
	214	.PP
	215	\&\s-1UTF\-7\s0 is a special encoding which \(L"re\-encodes\(R" \s-1UTF\-16BE\s0 into a 7\-bit
	216	encoding. It is implemented seperately by Encode::Unicode::UTF7.
	217	.Sh "Encode::Byte \*(-- Extended \s-1ASCII\s0"
	218	.IX Subsection "Encode::Byte Extended ASCII"
	219	Encode::Byte implements most single-byte encodings except for
	220	Symbols and \s-1EBCDIC\s0. The following encodings are based on single-byte
	221	encodings implemented as extended \s-1ASCII\s0. Most of them map
	222	\&\ex80\-\exff (upper half) to non-ASCII characters.
	223	.IP "\s-1ISO\-8859\s0 and corresponding vendor mappings" 4
	224	.IX Item "ISO-8859 and corresponding vendor mappings"
	225	Since there are so many, they are presented in table format with
	226	languages and corresponding encoding names by vendors. Note that
	227	the table is sorted in order of \s-1ISO\-8859\s0 and the corresponding vendor
	228	mappings are slightly different from that of \s-1ISO\s0. See
	229	<http://czyborra.com/charsets/iso8859.html> for details.
	230	.Sp
	231	.Vb 32
	232	\& Lang/Regions ISO/Other Std. DOS Windows Macintosh Others
	233	\& ----------------------------------------------------------------
	234	\& N. America (ASCII) cp437 AdobeStandardEncoding
	235	\& cp863 (DOSCanadaF)
	236	\& W. Europe iso-8859-1 cp850 cp1252 MacRoman nextstep
	237	\& hp-roman8
	238	\& cp860 (DOSPortuguese)
	239	\& Cntrl. Europe iso-8859-2 cp852 cp1250 MacCentralEurRoman
	240	\& MacCroatian
	241	\& MacRomanian
	242	\& MacRumanian
	243	\& Latin3[1] iso-8859-3
	244	\& Latin4[2] iso-8859-4
	245	\& Cyrillics iso-8859-5 cp855 cp1251 MacCyrillic
	246	\& (See also next section) cp866 MacUkrainian
	247	\& Arabic iso-8859-6 cp864 cp1256 MacArabic
	248	\& cp1006 MacFarsi
	249	\& Greek iso-8859-7 cp737 cp1253 MacGreek
	250	\& cp869 (DOSGreek2)
	251	\& Hebrew iso-8859-8 cp862 cp1255 MacHebrew
	252	\& Turkish iso-8859-9 cp857 cp1254 MacTurkish
	253	\& Nordics iso-8859-10 cp865
	254	\& cp861 MacIcelandic
	255	\& MacSami
	256	\& Thai iso-8859-11[3] cp874 MacThai
	257	\& (iso-8859-12 is nonexistent. Reserved for Indics?)
	258	\& Baltics iso-8859-13 cp775 cp1257
	259	\& Celtics iso-8859-14
	260	\& Latin9 [4] iso-8859-15
	261	\& Latin10 iso-8859-16
	262	\& Vietnamese viscii cp1258 MacVietnamese
	263	\& ----------------------------------------------------------------
	264	.Ve
	265	.Sp
	266	.Vb 5
	267	\& [1] Esperanto, Maltese, and Turkish. Turkish is now on 8859-9.
	268	\& [2] Baltics. Now on 8859-10, except for Latvian.
	269	\& [3] TIS 620 + Non-Breaking Space (0xA0 / U+00A0)
	270	\& [4] Nicknamed Latin0; the Euro sign as well as French and Finnish
	271	\& letters that are missing from 8859-1 were added.
	272	.Ve
	273	.Sp
	274	All cp* are also available as ibm\-, ms\-, and windows\-* . See also
	275	<http://czyborra.com/charsets/codepages.html>.
	276	.Sp
	277	Macintosh encodings don't seem to be registered in such entities as
	278	\&\s-1IANA\s0. \(L"Canonical\(R" names in Encode are based upon Apple's Tech Note
	279	1150. See <http://developer.apple.com/technotes/tn/tn1150.html>
	280	for details.
	281	.IP "\s-1KOI8\s0 \- De Facto Standard for the Cyrillic world" 4
	282	.IX Item "KOI8 - De Facto Standard for the Cyrillic world"
	283	Though \s-1ISO\-8859\s0 does have \s-1ISO\-8859\-5\s0, the \s-1KOI8\s0 series is far more
	284	popular in the Net. Encode comes with the following \s-1KOI\s0 charsets.
	285	For gory details, see <http://czyborra.com/charsets/cyrillic.html>
	286	.Sp
	287	.Vb 5
	288	\& ----------------------------------------------------------------
	289	\& koi8-f
	290	\& koi8-r cp878 [RFC1489]
	291	\& koi8-u [RFC2319]
	292	\& ----------------------------------------------------------------
	293	.Ve
	294	.IP "gsm0338 \- Hentai Latin 1" 4
	295	.IX Item "gsm0338 - Hentai Latin 1"
	296	\&\s-1GSM0338\s0 is for \s-1GSM\s0 handsets. Though it shares alphanumerals with
	297	\&\s-1ASCII\s0, control character ranges and other parts are mapped very
	298	differently, mainly to store Greek characters. There are also escape
	299	sequences (starting with 0x1B) to cover e.g. the Euro sign. Some
	300	special cases like a trailing 0x00 byte or a lone 0x1B byte are not
	301	well-defined and \fIdecode()\fR will return an empty string for them.
	302	One possible workaround is
	303	.Sp
	304	.Vb 3
	305	\& $gsm =~ s/\ex00\ez/\ex00\ex00/;
	306	\& $uni = decode("gsm0338", $gsm);
	307	\& $uni .= "\exA0" if $gsm =~ /\ex1B\ez/;
	308	.Ve
	309	.Sp
	310	Note that the Encode implementation of \s-1GSM0338\s0 does not implement the
	311	reuse of Latin capital letters as Greek capital letters (for example,
	312	the 0x5A is U+005A (\s-1LATIN\s0 \s-1CAPITAL\s0 \s-1LETTER\s0 Z), not U+0396 (\s-1GREEK\s0 \s-1CAPITAL\s0
	313	\&\s-1LETTER\s0 \s-1ZETA\s0).
	314	.Sp
	315	The \s-1GSM0338\s0 is also covered in Encode::Byte even though it is not
	316	an \(L"extended \s-1ASCII\s0\(R" encoding.
	317	.Sh "\s-1CJK:\s0 Chinese, Japanese, Korean (Multibyte)"
	318	.IX Subsection "CJK: Chinese, Japanese, Korean (Multibyte)"
	319	Note that Vietnamese is listed above. Also read \(L"Encoding vs Charset\(R"
	320	below. Also note that these are implemented in distinct modules by
	321	countries, due to the size concerns (simplified Chinese is mapped
	322	to '\s-1CN\s0', continental China, while traditional Chinese is mapped to
	323	\&'\s-1TW\s0', Taiwan). Please refer to their respective documentation pages.
	324	.IP "Encode::CN \*(-- Continental China" 4
	325	.IX Item "Encode::CN Continental China"
	326	.Vb 9
	327	\& Standard DOS/Win Macintosh Comment/Reference
	328	\& ----------------------------------------------------------------
	329	\& euc-cn [1] MacChineseSimp
	330	\& (gbk) cp936 [2]
	331	\& gb12345-raw { GB12345 without CES }
	332	\& gb2312-raw { GB2312 without CES }
	333	\& hz
	334	\& iso-ir-165
	335	\& ----------------------------------------------------------------
	336	.Ve
	337	.Sp
	338	.Vb 2
	339	\& [1] GB2312 is aliased to this. See L<Microsoft-related naming mess>
	340	\& [2] gbk is aliased to this. See L<Microsoft-related naming mess>
	341	.Ve
	342	.IP "Encode::JP \*(-- Japan" 4
	343	.IX Item "Encode::JP Japan"
	344	.Vb 11
	345	\& Standard DOS/Win Macintosh Comment/Reference
	346	\& ----------------------------------------------------------------
	347	\& euc-jp
	348	\& shiftjis cp932 macJapanese
	349	\& 7bit-jis
	350	\& iso-2022-jp [RFC1468]
	351	\& iso-2022-jp-1 [RFC2237]
	352	\& jis0201-raw { JIS X 0201 (roman + halfwidth kana) without CES }
	353	\& jis0208-raw { JIS X 0208 (Kanji + fullwidth kana) without CES }
	354	\& jis0212-raw { JIS X 0212 (Extended Kanji) without CES }
	355	\& ----------------------------------------------------------------
	356	.Ve
	357	.IP "Encode::KR \*(-- Korea" 4
	358	.IX Item "Encode::KR Korea"
	359	.Vb 8
	360	\& Standard DOS/Win Macintosh Comment/Reference
	361	\& ----------------------------------------------------------------
	362	\& euc-kr MacKorean [RFC1557]
	363	\& cp949 [1]
	364	\& iso-2022-kr [RFC1557]
	365	\& johab [KS X 1001:1998, Annex 3]
	366	\& ksc5601-raw { KSC5601 without CES }
	367	\& ----------------------------------------------------------------
	368	.Ve
	369	.Sp
	370	.Vb 2
	371	\& [1] ks_c_5601-1987, (x-)?windows-949, and uhc are aliased to this.
	372	\& See below.
	373	.Ve
	374	.IP "Encode::TW \*(-- Taiwan" 4
	375	.IX Item "Encode::TW Taiwan"
	376	.Vb 5
	377	\& Standard DOS/Win Macintosh Comment/Reference
	378	\& ----------------------------------------------------------------
	379	\& big5-eten cp950 MacChineseTrad {big5 aliased to big5-eten}
	380	\& big5-hkscs
	381	\& ----------------------------------------------------------------
	382	.Ve
	383	.IP "Encode::HanExtra \*(-- More Chinese via \s-1CPAN\s0" 4
	384	.IX Item "Encode::HanExtra More Chinese via CPAN"
	385	Due to the size concerns, additional Chinese encodings below are
	386	distributed separately on \s-1CPAN\s0, under the name Encode::HanExtra.
	387	.Sp
	388	.Vb 8
	389	\& Standard DOS/Win Macintosh Comment/Reference
	390	\& ----------------------------------------------------------------
	391	\& big5ext CMEX's Big5e Extension
	392	\& big5plus CMEX's Big5+ Extension
	393	\& cccii Chinese Character Code for Information Interchange
	394	\& euc-tw EUC (Extended Unix Character)
	395	\& gb18030 GBK with Traditional Characters
	396	\& ----------------------------------------------------------------
	397	.Ve
	398	.IP "Encode::JIS2K \*(-- \s-1JIS\s0 X 0213 encodings via \s-1CPAN\s0" 4
	399	.IX Item "Encode::JIS2K JIS X 0213 encodings via CPAN"
	400	Due to size concerns, additional Japanese encodings below are
	401	distributed separately on \s-1CPAN\s0, under the name Encode::JIS2K.
	402	.Sp
	403	.Vb 8
	404	\& Standard DOS/Win Macintosh Comment/Reference
	405	\& ----------------------------------------------------------------
	406	\& euc-jisx0213
	407	\& shiftjisx0123
	408	\& iso-2022-jp-3
	409	\& jis0213-1-raw
	410	\& jis0213-2-raw
	411	\& ----------------------------------------------------------------
	412	.Ve
	413	.Sh "Miscellaneous encodings"
	414	.IX Subsection "Miscellaneous encodings"
	415	.IP "Encode::EBCDIC" 4
	416	.IX Item "Encode::EBCDIC"
	417	See perlebcdic for details.
	418	.Sp
	419	.Vb 8
	420	\& ----------------------------------------------------------------
	421	\& cp37
	422	\& cp500
	423	\& cp875
	424	\& cp1026
	425	\& cp1047
	426	\& posix-bc
	427	\& ----------------------------------------------------------------
	428	.Ve
	429	.IP "Encode::Symbols" 4
	430	.IX Item "Encode::Symbols"
	431	For symbols and dingbats.
	432	.Sp
	433	.Vb 7
	434	\& ----------------------------------------------------------------
	435	\& symbol
	436	\& dingbats
	437	\& MacDingbats
	438	\& AdobeZdingbat
	439	\& AdobeSymbol
	440	\& ----------------------------------------------------------------
	441	.Ve
	442	.IP "Encode::MIME::Header" 4
	443	.IX Item "Encode::MIME::Header"
	444	Strictly speaking, \s-1MIME\s0 header encoding documented in \s-1RFC\s0 2047 is more
	445	of encapsulation than encoding. However, their support in modern
	446	world is imperative so they are supported.
	447	.Sp
	448	.Vb 5
	449	\& ----------------------------------------------------------------
	450	\& MIME-Header [RFC2047]
	451	\& MIME-B [RFC2047]
	452	\& MIME-Q [RFC2047]
	453	\& ----------------------------------------------------------------
	454	.Ve
	455	.IP "Encode::Guess" 4
	456	.IX Item "Encode::Guess"
	457	This one is not a name of encoding but a utility that lets you pick up
	458	the most appropriate encoding for a data out of given \fIsuspects\fR. See
	459	Encode::Guess for details.
	460	.SH "Unsupported encodings"
	461	.IX Header "Unsupported encodings"
	462	The following encodings are not supported as yet; some because they
	463	are rarely used, some because of technical difficulties. They may
	464	be supported by external modules via \s-1CPAN\s0 in the future, however.
	465	.IP "\s-1ISO\-2022\-JP\-2\s0 [\s-1RFC1554\s0]" 4
	466	.IX Item "ISO-2022-JP-2 [RFC1554]"
	467	Not very popular yet. Needs Unicode Database or equivalent to
	468	implement \fIencode()\fR (because it includes \s-1JIS\s0 X 0208/0212, \s-1KSC5601\s0, and
	469	\&\s-1GB2312\s0 simultaneously, whose code points in Unicode overlap. So you
	470	need to lookup the database to determine to what character set a given
	471	Unicode character should belong).
	472	.IP "\s-1ISO\-2022\-CN\s0 [\s-1RFC1922\s0]" 4
	473	.IX Item "ISO-2022-CN [RFC1922]"
	474	Not very popular. Needs \s-1CNS\s0 11643\-1 and \-2 which are not available in
	475	this module. \s-1CNS\s0 11643 is supported (via euc\-tw) in Encode::HanExtra.
	476	Autrijus Tang may add support for this encoding in his module in future.
	477	.IP "Various HP-UX encodings" 4
	478	.IX Item "Various HP-UX encodings"
	479	The following are unsupported due to the lack of mapping data.
	480	.Sp
	481	.Vb 2
	482	\& '8' - arabic8, greek8, hebrew8, kana8, thai8, and turkish8
	483	\& '15' - japanese15, korean15, and roi15
	484	.Ve
	485	.IP "Cyrillic encoding \s-1ISO\-IR\-111\s0" 4
	486	.IX Item "Cyrillic encoding ISO-IR-111"
	487	Anton Tagunov doubts its usefulness.
	488	.IP "\s-1ISO\-8859\-8\-1\s0 [Hebrew]" 4
	489	.IX Item "ISO-8859-8-1 [Hebrew]"
	490	None of the Encode team knows Hebrew enough (\s-1ISO\-8859\-8\s0, cp1255 and
	491	MacHebrew are supported because and just because there were mappings
	492	available at <http://www.unicode.org/>). Contributions welcome.
	493	.IP "\s-1ISIRI\s0 3342, Iran System, \s-1ISIRI\s0 2900 [Farsi]" 4
	494	.IX Item "ISIRI 3342, Iran System, ISIRI 2900 [Farsi]"
	495	Ditto.
	496	.IP "Thai encoding \s-1TCVN\s0" 4
	497	.IX Item "Thai encoding TCVN"
	498	Ditto.
	499	.IP "Vietnamese encodings \s-1VPS\s0" 4
	500	.IX Item "Vietnamese encodings VPS"
	501	Though Jungshik Shin has reported that Mozilla supports this encoding,
	502	it was too late before 5.8.0 for us to add it. In the future, it
	503	may be available via a separate module. See
	504	<http://lxr.mozilla.org/seamonkey/source/intl/uconv/ucvlatin/vps.uf>
	505	and
	506	<http://lxr.mozilla.org/seamonkey/source/intl/uconv/ucvlatin/vps.ut>
	507	if you are interested in helping us.
	508	.IP "Various Mac encodings" 4
	509	.IX Item "Various Mac encodings"
	510	The following are unsupported due to the lack of mapping data.
	511	.Sp
	512	.Vb 5
	513	\& MacArmenian, MacBengali, MacBurmese, MacEthiopic
	514	\& MacExtArabic, MacGeorgian, MacKannada, MacKhmer
	515	\& MacLaotian, MacMalayalam, MacMongolian, MacOriya
	516	\& MacSinhalese, MacTamil, MacTelugu, MacTibetan
	517	\& MacVietnamese
	518	.Ve
	519	.Sp
	520	The rest which are already available are based upon the vendor mappings
	521	at <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> .
	522	.IP "(Mac) Indic encodings" 4
	523	.IX Item "(Mac) Indic encodings"
	524	The maps for the following are available at <http://www.unicode.org/>
	525	but remain unsupport because those encodings need algorithmical
	526	approach, currently unsupported by \fIenc2xs\fR:
	527	.Sp
	528	.Vb 3
	529	\& MacDevanagari
	530	\& MacGurmukhi
	531	\& MacGujarati
	532	.Ve
	533	.Sp
	534	For details, please see \f(CW\(C`Unicode mapping issues and notes:\(C'\fR at
	535	<http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/DEVANAGA.TXT> .
	536	.Sp
	537	I believe this issue is prevalent not only for Mac Indics but also in
	538	other Indic encodings, but the above were the only Indic encodings
	539	maps that I could find at <http://www.unicode.org/> .
	540	.SH "Encoding vs. Charset \*(-- terminology"
	541	.IX Header "Encoding vs. Charset terminology"
	542	We are used to using the term (character) \fIencoding\fR and \fIcharacter
	543	set\fR interchangeably. But just as confusing the terms byte and
	544	character is dangerous and the terms should be differentiated when
	545	needed, we need to differentiate \fIencoding\fR and \fIcharacter set\fR.
	546	.PP
	547	To understand that, here is a description of how we make computers
	548	grok our characters.
	549	.IP "\(bu" 4
	550	First we start with which characters to include. We call this
	551	collection of characters \fIcharacter repertoire\fR.
	552	.IP "\(bu" 4
	553	Then we have to give each character a unique \s-1ID\s0 so your computer can
	554	tell the difference between 'a' and 'A'. This itemized character
	555	repertoire is now a \fIcharacter set\fR.
	556	.IP "\(bu" 4
	557	If your computer can grow the character set without further
	558	processing, you can go ahead and use it. This is called a \fIcoded
	559	character set\fR (\s-1CCS\s0) or \fIraw character encoding\fR. \s-1ASCII\s0 is used this
	560	way for most cases.
	561	.IP "\(bu" 4
	562	But in many cases, especially multi-byte \s-1CJK\s0 encodings, you have to
	563	tweak a little more. Your network connection may not accept any data
	564	with the Most Significant Bit set, and your computer may not be able to
	565	tell if a given byte is a whole character or just half of it. So you
	566	have to \fIencode\fR the character set to use it.
	567	.Sp
	568	A \fIcharacter encoding scheme\fR (\s-1CES\s0) determines how to encode a given
	569	character set, or a set of multiple character sets. 7bit \s-1ISO\-2022\s0 is
	570	an example of a \s-1CES\s0. You switch between character sets via \fIescape
	571	sequences\fR.
	572	.PP
	573	Technically, or mathematically, speaking, a character set encoded in
	574	such a \s-1CES\s0 that maps character by character may form a \s-1CCS\s0. \s-1EUC\s0 is such
	575	an example. The \s-1CES\s0 of \s-1EUC\s0 is as follows:
	576	.IP "\(bu" 4
	577	Map \s-1ASCII\s0 unchanged.
	578	.IP "\(bu" 4
	579	Map such a character set that consists of 94 or 96 powered by N
	580	members by adding 0x80 to each byte.
	581	.IP "\(bu" 4
	582	You can also use 0x8e and 0x8f to indicate that the following sequence of
	583	characters belongs to yet another character set. To each following byte
	584	is added the value 0x80.
	585	.PP
	586	By carefully looking at the encoded byte sequence, you can find that the
	587	byte sequence conforms a unique number. In that sense, \s-1EUC\s0 is a \s-1CCS\s0
	588	generated by a \s-1CES\s0 above from up to four \s-1CCS\s0 (complicated?). \s-1UTF\-8\s0
	589	falls into this category. See \(L"\s-1UTF\-8\s0\(R" in perlUnicode to find out how
	590	\&\s-1UTF\-8\s0 maps Unicode to a byte sequence.
	591	.PP
	592	You may also have found out by now why 7bit \s-1ISO\-2022\s0 cannot comprise
	593	a \s-1CCS\s0. If you look at a byte sequence \ex21\ex21, you can't tell if
	594	it is two !'s or \s-1IDEOGRAPHIC\s0 \s-1SPACE\s0. \s-1EUC\s0 maps the latter to \exA1\exA1
	595	so you have no trouble differentiating between \(L"!!\(R". and \(L"\ \(R".
	596	.SH "Encoding Classification (by Anton Tagunov and Dan Kogai)"
	597	.IX Header "Encoding Classification (by Anton Tagunov and Dan Kogai)"
	598	This section tries to classify the supported encodings by their
	599	applicability for information exchange over the Internet and to
	600	choose the most suitable aliases to name them in the context of
	601	such communication.
	602	.IP "\(bu" 4
	603	To (en\|de)code encodings marked by \f(CW\(C`()\(C'\fR, you need
	604	\&\f(CW\(C`Encode::HanExtra\(C'\fR, available from \s-1CPAN\s0.
	605	.PP
	606	Encoding names
	607	.PP
	608	.Vb 3
	609	\& US-ASCII UTF-8 ISO-8859-* KOI8-R
	610	\& Shift_JIS EUC-JP ISO-2022-JP ISO-2022-JP-1
	611	\& EUC-KR Big5 GB2312
	612	.Ve
	613	.PP
	614	are registered with \s-1IANA\s0 as preferred \s-1MIME\s0 names and may
	615	be used over the Internet.
	616	.PP
	617	\&\f(CW\(C`Shift_JIS\(C'\fR has been officialized by \s-1JIS\s0 X 0208:1997.
	618	\&\(L"Microsoft\-related naming mess\(R" gives details.
	619	.PP
	620	\&\f(CW\(C`GB2312\(C'\fR is the \s-1IANA\s0 name for \f(CW\(C`EUC\-CN\(C'\fR.
	621	See \(L"Microsoft\-related naming mess\(R" for details.
	622	.PP
	623	\&\f(CW\(C`GB_2312\-80\(C'\fR \fIraw\fR encoding is available as \f(CW\(C`gb2312\-raw\(C'\fR
	624	with Encode. See Encode::CN for details.
	625	.PP
	626	.Vb 2
	627	\& EUC-CN
	628	\& KOI8-U [RFC2319]
	629	.Ve
	630	.PP
	631	have not been registered with \s-1IANA\s0 (as of March 2002) but
	632	seem to be supported by major web browsers.
	633	The \s-1IANA\s0 name for \f(CW\(C`EUC\-CN\(C'\fR is \f(CW\(C`GB2312\(C'\fR.
	634	.PP
	635	.Vb 1
	636	\& KS_C_5601-1987
	637	.Ve
	638	.PP
	639	is heavily misused.
	640	See \(L"Microsoft\-related naming mess\(R" for details.
	641	.PP
	642	\&\f(CW\(C`KS_C_5601\-1987\(C'\fR \fIraw\fR encoding is available as \f(CW\(C`kcs5601\-raw\(C'\fR
	643	with Encode. See Encode::KR for details.
	644	.PP
	645	.Vb 1
	646	\& UTF-16 UTF-16BE UTF-16LE
	647	.Ve
	648	.PP
	649	are IANA-registered \f(CW\(C`charset\(C'\fRs. See [\s-1RFC\s0 2781] for details.
	650	Jungshik Shin reports that \s-1UTF\-16\s0 with a \s-1BOM\s0 is well accepted
	651	by \s-1MS\s0 \s-1IE\s0 5/6 and \s-1NS\s0 4/6. Beware however that
	652	.IP "\(bu" 4
	653	\&\f(CW\(C`UTF\-16\(C'\fR support in any software you're going to be
	654	using/interoperating with has probably been less tested
	655	then \f(CW\(C`UTF\-8\(C'\fR support
	656	.IP "\(bu" 4
	657	\&\f(CW\(C`UTF\-8\(C'\fR coded data seamlessly passes traditional
	658	command piping (\f(CW\(C`cat\(C'\fR, \f(CW\(C`more\(C'\fR, etc.) while \f(CW\(C`UTF\-16\(C'\fR coded
	659	data is likely to cause confusion (with its zero bytes,
	660	for example)
	661	.IP "\(bu" 4
	662	it is beyond the power of words to describe the way \s-1HTML\s0 browsers
	663	encode non\-\f(CW\(C`ASCII\(C'\fR form data. To get a general impression, visit
	664	<http://ppewww.ph.gla.ac.uk/~flavell/charset/form\-i18n.html>.
	665	While encoding of form data has stabilized for \f(CW\(C`UTF\-8\(C'\fR encoded pages
	666	(at least \s-1IE\s0 5/6, \s-1NS\s0 6, and Opera 6 behave consistently), be sure to
	667	expect fun (and cross-browser discrepancies) with \f(CW\(C`UTF\-16\(C'\fR encoded
	668	pages!
	669	.PP
	670	The rule of thumb is to use \f(CW\(C`UTF\-8\(C'\fR unless you know what
	671	you're doing and unless you really benefit from using \f(CW\(C`UTF\-16\(C'\fR.
	672	.PP
	673	.Vb 5
	674	\& ISO-IR-165 [RFC1345]
	675	\& VISCII
	676	\& GB 12345
	677	\& GB 18030 (**) (see links bellow)
	678	\& EUC-TW (**)
	679	.Ve
	680	.PP
	681	are totally valid encodings but not registered at \s-1IANA\s0.
	682	The names under which they are listed here are probably the
	683	most widely-known names for these encodings and are recommended
	684	names.
	685	.PP
	686	.Vb 1
	687	\& BIG5PLUS (**)
	688	.Ve
	689	.PP
	690	is a proprietary name.
	691	.Sh "Microsoft-related naming mess"
	692	.IX Subsection "Microsoft-related naming mess"
	693	Microsoft products misuse the following names:
	694	.IP "\s-1KS_C_5601\-1987\s0" 4
	695	.IX Item "KS_C_5601-1987"
	696	Microsoft extension to \f(CW\(C`EUC\-KR\(C'\fR.
	697	.Sp
	698	Proper names: \f(CW\(C`CP949\(C'\fR, \f(CW\(C`UHC\(C'\fR, \f(CW\(C`x\-windows\-949\(C'\fR (as used by Mozilla).
	699	.Sp
	700	See <http://lists.w3.org/Archives/Public/ietf\-charsets/2001AprJun/0033.html>
	701	for details.
	702	.Sp
	703	Encode aliases \f(CW\(C`KS_C_5601\-1987\(C'\fR to \f(CW\(C`cp949\(C'\fR to reflect this common
	704	misusage. \fIRaw\fR \f(CW\(C`KS_C_5601\-1987\(C'\fR encoding is available as
	705	\&\f(CW\(C`kcs5601\-raw\(C'\fR.
	706	.Sp
	707	See Encode::KR for details.
	708	.IP "\s-1GB2312\s0" 4
	709	.IX Item "GB2312"
	710	Microsoft extension to \f(CW\(C`EUC\-CN\(C'\fR.
	711	.Sp
	712	Proper names: \f(CW\(C`CP936\(C'\fR, \f(CW\(C`GBK\(C'\fR.
	713	.Sp
	714	\&\f(CW\(C`GB2312\(C'\fR has been registered in the \f(CW\(C`EUC\-CN\(C'\fR meaning at
	715	\&\s-1IANA\s0. This has partially repaired the situation: Microsoft's
	716	\&\f(CW\(C`GB2312\(C'\fR has become a superset of the official \f(CW\(C`GB2312\(C'\fR.
	717	.Sp
	718	Encode aliases \f(CW\(C`GB2312\(C'\fR to \f(CW\(C`euc\-cn\(C'\fR in full agreement with
	719	\&\s-1IANA\s0 registration. \f(CW\(C`cp936\(C'\fR is supported separately.
	720	\&\fIRaw\fR \f(CW\(C`GB_2312\-80\(C'\fR encoding is available as \f(CW\(C`gb2312\-raw\(C'\fR.
	721	.Sp
	722	See Encode::CN for details.
	723	.IP "Big5" 4
	724	.IX Item "Big5"
	725	Microsoft extension to \f(CW\(C`Big5\(C'\fR.
	726	.Sp
	727	Proper name: \f(CW\(C`CP950\(C'\fR.
	728	.Sp
	729	Encode separately supports \f(CW\(C`Big5\(C'\fR and \f(CW\(C`cp950\(C'\fR.
	730	.IP "Shift_JIS" 4
	731	.IX Item "Shift_JIS"
	732	Microsoft's understanding of \f(CW\(C`Shift_JIS\(C'\fR.
	733	.Sp
	734	\&\s-1JIS\s0 has not endorsed the full Microsoft standard however.
	735	The official \f(CW\(C`Shift_JIS\(C'\fR includes only \s-1JIS\s0 X 0201 and \s-1JIS\s0 X 0208
	736	character sets, while Microsoft has always used \f(CW\(C`Shift_JIS\(C'\fR
	737	to encode a wider character repertoire. See \f(CW\(C`IANA\(C'\fR registration for
	738	\&\f(CW\(C`Windows\-31J\(C'\fR.
	739	.Sp
	740	As a historical predecessor, Microsoft's variant
	741	probably has more rights for the name, though it may be objected
	742	that Microsoft shouldn't have used \s-1JIS\s0 as part of the name
	743	in the first place.
	744	.Sp
	745	Unambiguous name: \f(CW\(C`CP932\(C'\fR. \f(CW\(C`IANA\(C'\fR name (also used by Mozilla, and
	746	provided as an alias by Encode): \f(CW\(C`Windows\-31J\(C'\fR.
	747	.Sp
	748	Encode separately supports \f(CW\(C`Shift_JIS\(C'\fR and \f(CW\(C`cp932\(C'\fR.
	749	.SH "Glossary"
	750	.IX Header "Glossary"
	751	.IP "character repertoire" 4
	752	.IX Item "character repertoire"
	753	A collection of unique characters. A \fIcharacter\fR set in the strictest
	754	sense. At this stage, characters are not numbered.
	755	.IP "coded character set (\s-1CCS\s0)" 4
	756	.IX Item "coded character set (CCS)"
	757	A character set that is mapped in a way computers can use directly.
	758	Many character encodings, including \s-1EUC\s0, fall in this category.
	759	.IP "character encoding scheme (\s-1CES\s0)" 4
	760	.IX Item "character encoding scheme (CES)"
	761	An algorithm to map a character set to a byte sequence. You don't
	762	have to be able to tell which character set a given byte sequence
	763	belongs. 7\-bit \s-1ISO\-2022\s0 is a \s-1CES\s0 but it cannot be a \s-1CCS\s0. \s-1EUC\s0 is an
	764	example of being both a \s-1CCS\s0 and \s-1CES\s0.
	765	.IP "charset (in \s-1MIME\s0 context)" 4
	766	.IX Item "charset (in MIME context)"
	767	has long been used in the meaning of \f(CW\(C`encoding\(C'\fR, \s-1CES\s0.
	768	.Sp
	769	While the word combination \f(CW\(C`character set\(C'\fR has lost this meaning
	770	in \s-1MIME\s0 context since [\s-1RFC\s0 2130], the \f(CW\(C`charset\(C'\fR abbreviation has
	771	retained it. This is how [\s-1RFC\s0 2277] and [\s-1RFC\s0 2278] bless \f(CW\(C`charset\(C'\fR:
	772	.Sp
	773	.Vb 7
	774	\& This document uses the term "charset" to mean a set of rules for
	775	\& mapping from a sequence of octets to a sequence of characters, such
	776	\& as the combination of a coded character set and a character encoding
	777	\& scheme; this is also what is used as an identifier in MIME "charset="
	778	\& parameters, and registered in the IANA charset registry ... (Note
	779	\& that this is NOT a term used by other standards bodies, such as ISO).
	780	\& [RFC 2277]
	781	.Ve
	782	.IP "\s-1EUC\s0" 4
	783	.IX Item "EUC"
	784	Extended Unix Character. See \s-1ISO\-2022\s0.
	785	.IP "\s-1ISO\-2022\s0" 4
	786	.IX Item "ISO-2022"
	787	A \s-1CES\s0 that was carefully designed to coexist with \s-1ASCII\s0. There are a 7
	788	bit version and an 8 bit version.
	789	.Sp
	790	The 7 bit version switches character set via escape sequence so it
	791	cannot form a \s-1CCS\s0. Since this is more difficult to handle in programs
	792	than the 8 bit version, the 7 bit version is not very popular except for
	793	iso\-2022\-jp, the \fIde facto\fR standard \s-1CES\s0 for e\-mails.
	794	.Sp
	795	The 8 bit version can form a \s-1CCS\s0. \s-1EUC\s0 and \s-1ISO\-8859\s0 are two examples
	796	thereof. Pre\-5.6 perl could use them as string literals.
	797	.IP "\s-1UCS\s0" 4
	798	.IX Item "UCS"
	799	Short for \fIUniversal Character Set\fR. When you say just \s-1UCS\s0, it means
	800	\&\fIUnicode\fR.
	801	.IP "\s-1UCS\-2\s0" 4
	802	.IX Item "UCS-2"
	803	\&\s-1ISO/IEC\s0 10646 encoding form: Universal Character Set coded in two
	804	octets.
	805	.IP "Unicode" 4
	806	.IX Item "Unicode"
	807	A character set that aims to include all character repertoires of the
	808	world. Many character sets in various national as well as industrial
	809	standards have become, in a way, just subsets of Unicode.
	810	.IP "\s-1UTF\s0" 4
	811	.IX Item "UTF"
	812	Short for \fIUnicode Transformation Format\fR. Determines how to map a
	813	Unicode character into a byte sequence.
	814	.IP "\s-1UTF\-16\s0" 4
	815	.IX Item "UTF-16"
	816	A \s-1UTF\s0 in 16\-bit encoding. Can either be in big endian or little
	817	endian. The big endian version is called \s-1UTF\-16BE\s0 (equal to \s-1UCS\-2\s0 +
	818	surrogate support) and the little endian version is called \s-1UTF\-16LE\s0.
	819	.SH "See Also"
	820	.IX Header "See Also"
	821	Encode,
	822	Encode::Byte,
	823	Encode::CN, Encode::JP, Encode::KR, Encode::TW,
	824	Encode::EBCDIC, Encode::Symbol
	825	Encode::MIME::Header, Encode::Guess
	826	.SH "References"
	827	.IX Header "References"
	828	.IP "\s-1ECMA\s0" 4
	829	.IX Item "ECMA"
	830	European Computer Manufacturers Association
	831	<http://www.ecma.ch>
	832	.RS 4
	833	.ie n .IP "\s-1ECMA\-035\s0 (eq ""ISO\-2022"")" 4
	834	.el .IP "\s-1ECMA\-035\s0 (eq \f(CWISO\-2022\fR)" 4
	835	.IX Item "ECMA-035 (eq ISO-2022)"
	836	<http://www.ecma.ch/ecma1/STAND/ECMA\-035.HTM>
	837	.Sp
	838	The specification of \s-1ISO\-2022\s0 is available from the link above.
	839	.RE
	840	.RS 4
	841	.RE
	842	.IP "\s-1IANA\s0" 4
	843	.IX Item "IANA"
	844	Internet Assigned Numbers Authority
	845	<http://www.iana.org/>
	846	.RS 4
	847	.IP "Assigned Charset Names by \s-1IANA\s0" 4
	848	.IX Item "Assigned Charset Names by IANA"
	849	<http://www.iana.org/assignments/character\-sets>
	850	.Sp
	851	Most of the \f(CW\(C`canonical names\(C'\fR in Encode derive from this list
	852	so you can directly apply the string you have extracted from \s-1MIME\s0
	853	header of mails and web pages.
	854	.RE
	855	.RS 4
	856	.RE
	857	.IP "\s-1ISO\s0" 4
	858	.IX Item "ISO"
	859	International Organization for Standardization
	860	<http://www.iso.ch/>
	861	.IP "\s-1RFC\s0" 4
	862	.IX Item "RFC"
	863	Request For Comments \*(-- need I say more?
	864	<http://www.rfc\-editor.org/>, <http://www.rfc.net/>,
	865	<http://www.faqs.org/rfcs/>
	866	.IP "\s-1UC\s0" 4
	867	.IX Item "UC"
	868	Unicode Consortium
	869	<http://www.unicode.org/>
	870	.RS 4
	871	.IP "Unicode Glossary" 4
	872	.IX Item "Unicode Glossary"
	873	<http://www.unicode.org/glossary/>
	874	.Sp
	875	The glossary of this document is based upon this site.
	876	.RE
	877	.RS 4
	878	.RE
	879	.Sh "Other Notable Sites"
	880	.IX Subsection "Other Notable Sites"
	881	.IP "czyborra.com" 4
	882	.IX Item "czyborra.com"
	883	<http://czyborra.com/>
	884	.Sp
	885	Contains a lot of useful information, especially gory details of \s-1ISO\s0
	886	vs. vendor mappings.
	887	.IP "\s-1CJK\s0.inf" 4
	888	.IX Item "CJK.inf"
	889	<http://www.oreilly.com/people/authors/lunde/cjk_inf.html>
	890	.Sp
	891	Somewhat obsolete (last update in 1996), but still useful. Also try
	892	.Sp
	893	<ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/pdf/GB18030_Summary.pdf>
	894	.Sp
	895	You will find brief info on \f(CW\(C`EUC\-CN\(C'\fR, \f(CW\(C`GBK\(C'\fR and mostly on \f(CW\(C`GB 18030\(C'\fR.
	896	.IP "Jungshik Shin's Hangul \s-1FAQ\s0" 4
	897	.IX Item "Jungshik Shin's Hangul FAQ"
	898	<http://jshin.net/faq>
	899	.Sp
	900	And especially its subject 8.
	901	.Sp
	902	<http://jshin.net/faq/qa8.html>
	903	.Sp
	904	A comprehensive overview of the Korean (\f(CW\(C`KS \*(C'\fR) standards.
	905	.ie n .IP "debian.org: ""Introduction to i18n""" 4
	906	.el .IP "debian.org: ``Introduction to i18n''" 4
	907	.IX Item "debian.org: Introduction to i18n"
	908	A brief description for most of the mentioned \s-1CJK\s0 encodings is
	909	contained in
	910	<http://www.debian.org/doc/manuals/intro\-i18n/ch\-codes.en.html>
	911	.Sh "Offline sources"
	912	.IX Subsection "Offline sources"
	913	.ie n .IP """CJKV Information Processing"" by Ken Lunde" 4
	914	.el .IP "\f(CWCJKV Information Processing\fR by Ken Lunde" 4
	915	.IX Item "CJKV Information Processing by Ken Lunde"
	916	\&\s-1CJKV\s0 Information Processing
	917	1999 O'Reilly & Associates, \s-1ISBN\s0 : 1\-56592\-224\-7
	918	.Sp
	919	The modern successor of \f(CW\(C`CJK.inf\(C'\fR.
	920	.Sp
	921	Features a comprehensive coverage of \s-1CJKV\s0 character sets and
	922	encodings along with many other issues faced by anyone trying
	923	to better support \s-1CJKV\s0 languages/scripts in all the areas of
	924	information processing.
	925	.Sp
	926	To purchase this book, visit
	927	<http://www.oreilly.com/catalog/cjkvinfo/>
	928	or your favourite bookstore.