.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
.\" ========================================================================
.de Sh \" Subsection heading
.de Sp \" Vertical space (when we can't use .PP)
.de Vb \" Begin verbatim text
.de Ve \" End verbatim text
.\" Set up some character translations and predefined strings. \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote. | will give a
.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
.\" expand to `' in nroff, nothing in troff, for use with C<>.
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
.\" entries marked with X<> in POD. Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
. tm Index:\\$1\t\\n%\t"\\$2"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear. Run. Save yourself. No user-serviceable parts.
. \" fudge factors for nroff and troff
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
. \" simple accents for nroff and troff
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
. \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
. \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
. \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
.\" ========================================================================
.TH PERLCN 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide"
perlcn \- ¼òÌåÖÐÎÄ Perl Ö¸ÄÏ
´Ó 5.8.0 °æ¿ªÊ¼, Perl ¾ß±¸ÁËÍêÉÆµÄ Unicode (ͳһÂë) Ö§Ô®,
Ò²Á¬´øÖ§Ô®ÁËÐí¶àÀ¶¡ÓïϵÒÔÍâµÄ±àÂ뷽ʽ; \s-1CJK\s0 (ÖÐÈÕº«) ±ãÊÇÆäÖеÄÒ»²¿·Ý.
Unicode Êǹú¼ÊÐԵıê×¼, ÊÔͼº¸ÇÊÀ½çÉÏËùÓеÄ×Ö·û: Î÷·½ÊÀ½ç, ¶«·½ÊÀ½ç,
ÒÔ¼°Á½Õß¼äµÄÒ»ÇÐ (Ï£À°ÎÄ, ÐðÀûÑÇÎÄ, ÑÇÀ²®ÎÄ, Ï£²®À´ÎÄ, Ó¡¶ÈÎÄ,
Ó¡µØ°²ÎÄ, µÈµÈ). ËüÒ²ÈÝÄÉÁ˶àÖÖ×÷ҵϵͳÓëƽ̨ (Èç \s-1PC\s0 ¼°Âó½ðËþ).
Perl ±¾ÉíÒÔ Unicode ½øÐвÙ×÷. Õâ±íʾ Perl ÄÚ²¿µÄ×Ö·û´®Êý¾Ý¿ÉÓà Unicode
±íʾ; Perl µÄº¯Ê½ÓëËã·û (ÀýÈçÕý¹æ±íʾʽ±È¶Ô) Ò²ÄÜ¶Ô Unicode ½øÐвÙ×÷.
ÔÚÊäÈë¼°Êä³öʱ, ΪÁË´¦ÀíÒÔ Unicode ֮ǰµÄ±àÂ뷽ʽ´æ·ÅµÄÊý¾Ý, Perl
ÌṩÁË Encode Õâ¸öÄ£¿é, ¿ÉÒÔÈÃÄãÇáÒ׵ضÁÈ¡¼°Ð´Èë¾ÉÓеıàÂëÊý¾Ý.
Encode ÑÓÉìÄ£¿éÖ§Ô®ÏÂÁмòÌåÖÐÎĵıàÂ뷽ʽ ('gb2312' ±íʾ 'euc\-cn'):
\& euc-cn Unix ÑÓÉì×Ö·û¼¯, Ò²¾ÍÊÇË׳ƵĹú±êÂë
\& gb2312-raw δ¾´¦ÀíµÄ (µÍ±ÈÌØ) GB2312 ×Ö·û±í
\& gb12345 δ¾´¦ÀíµÄÖйúÓ÷±ÌåÖÐÎıàÂë
\& iso-ir-165 GB2312 + GB6345 + GB8565 + ÐÂÔö×Ö·û
\& cp936 ×ÖÂëÒ³ 936, Ò²¿ÉÒÔÓà 'GBK' (À©³ä¹ú±êÂë) Ö¸Ã÷
\& hz 7 ±ÈÌØÒݳöʽ GB2312 ±àÂë
¾ÙÀýÀ´Ëµ, ½« EUC-CN ±àÂëµÄµµ°¸×ª³É Unicode, ìóÐè¼üÈëÏÂÁÐÖ¸Áî:
\& perl -Mencoding=euc-cn,STDOUT,utf8 -pe1 < file.euc-cn > file.utf8
Perl Ò²ÄÚ¸½ÁË \*(L"piconv\*(R", Ò»Ö§ÍêÈ«ÒÔ Perl д³ÉµÄ×Ö·ûת»»¹¤¾ß³ÌÐò, Ó÷¨ÈçÏÂ:
\& piconv -f euc-cn -t utf8 < file.euc-cn > file.utf8
\& piconv -f utf8 -t euc-cn < file.utf8 > file.euc-cn
ÁíÍâ, ÀûÓà encoding Ä£¿é, Äã¿ÉÒÔÇáÒ×д³öÒÔ×Ö·ûΪµ¥Î»µÄ³ÌÐòÂë, ÈçÏÂËùʾ:
\& # Æô¶¯ euc-cn ×Ö´®½âÎö; ±ê×¼Êä³öÈë¼°±ê×¼´íÎó¶¼ÉèΪ euc-cn ±àÂë
\& use encoding 'euc-cn', STDIN => 'euc-cn', STDOUT => 'euc-cn';
\& print length("ÂæÍÕ"); # 2 (Ë«ÒýºÅ±íʾ×Ö·û)
\& print length('ÂæÍÕ'); # 4 (µ¥ÒýºÅ±íʾ×Ö½Ú)
\& print index("×»×»½Ì»å", "»×»½"); # -1 (²»°üº¬´Ë×Ó×Ö·û´®)
\& print index('×»×»½Ì»å', '»×»½'); # 1 (´ÓµÚ¶þ¸ö×Ö½Ú¿ªÊ¼)
ÔÚ×îºóÒ»ÁÐÀý×ÓÀï, \*(L"×»\*(R" µÄµÚ¶þ¸ö×Ö½ÚÓë \*(L"×»\*(R" µÄµÚÒ»¸ö×Ö½Ú½áºÏ³É EUC-CN
ÂëµÄ \*(L"»×\*(R"; \*(L"×»\*(R" µÄµÚ¶þ¸ö×Ö½ÚÔòÓë \*(L"½Ì\*(R" µÄµÚÒ»¸ö×Ö½Ú½áºÏ³É \*(L"»½\*(R".
Õâ½â¾öÁËÒÔÇ° EUC-CN Âë±È¶Ô´¦ÀíÉϳ£¼ûµÄÎÊÌâ.
.IX Subsection "¶îÍâµÄÖÐÎıàÂë"
Èç¹ûÐèÒª¸ü¶àµÄÖÐÎıàÂë, ¿ÉÒÔ´Ó \s-1CPAN\s0 (<http://www.cpan.org/>) ÏÂÔØ
Encode::HanExtra Ä£¿é. ËüÄ¿Ç°ÌṩÏÂÁбàÂ뷽ʽ:
\& gb18030 À©³ä¹ýµÄ¹ú±êÂë, °üº¬·±ÌåÖÐÎÄ
ÁíÍâ, Encode::HanConvert Ä£¿éÔòÌṩÁ˼ò·±×ª»»ÓõÄÁ½ÖÖ±àÂë:
\& big5-simp Big5 ·±ÌåÖÐÎÄÓë Unicode ¼òÌåÖÐÎÄ»¥×ª
\& gbk-trad GBK ¼òÌåÖÐÎÄÓë Unicode ·±ÌåÖÐÎÄ»¥×ª
ÈôÏëÔÚ \s-1GBK\s0 Óë Big5 Ö®¼ä»¥×ª, Çë²Î¿¼¸ÃÄ£¿éÄÚ¸½µÄ b2g.pl Óë g2b.pl Á½Ö§³ÌÐò,
\& use Encode::HanConvert;
\& $euc_cn = big5_to_gb($big5); # ´Ó Big5 תΪ GBK
\& $big5 = gb_to_big5($euc_cn); # ´Ó GBK תΪ Big5
.IX Subsection "½øÒ»²½µÄÐÅÏ¢"
Çë²Î¿¼ Perl ÄÚ¸½µÄ´óÁ¿ËµÃ÷Îļþ (²»ÐÒÈ«ÊÇÓÃÓ¢ÎÄдµÄ), À´Ñ§Ï°¸ü¶à¹ØÓÚ
Perl µÄ֪ʶ, ÒÔ¼° Unicode µÄʹÓ÷½Ê½. ²»¹ý, ÍⲿµÄ×ÊÔ´Ï൱·á¸»:
.Sh "Ìṩ Perl ×ÊÔ´µÄÍøÖ·"
.IX Subsection "Ìṩ Perl ×ÊÔ´µÄÍøÖ·"
.IP "<http://www.perl.com/>" 4
.IX Item "<http://www.perl.com/>"
Perl µÄÊ×Ò³ (ÓÉÅ·À³Àñ¹«Ë¾Î¬»¤)
.IP "<http://www.cpan.org/>" 4
.IX Item "<http://www.cpan.org/>"
Perl ×ۺϵä²ØÍø (Comprehensive Perl Archive Network)
.IP "<http://lists.perl.org/>" 4
.IX Item "<http://lists.perl.org/>"
.IX Subsection "ѧϰ Perl µÄÍøÖ·"
.IP "<http://www.oreilly.com.cn/html/perl.html>" 4
.IX Item "<http://www.oreilly.com.cn/html/perl.html>"
¼òÌåÖÐÎÄ°æµÄÅ·À³Àñ Perl Êé½å
.IX Subsection "Perl ʹÓÃÕß¼¯»á"
.IP "<http://www.pm.org/groups/asia.shtml#China>" 4
.IX Item "<http://www.pm.org/groups/asia.shtml#China>"
.IX Subsection "Unicode Ïà¹ØÍøÖ·"
.IP "<http://www.unicode.org/>" 4
.IX Item "<http://www.unicode.org/>"
Unicode ѧÊõѧ»á (Unicode ±ê×¼µÄÖƶ¨Õß)
.IP "<http://www.cl.cam.ac.uk/%7Emgk25/unicode.html>" 4
.IX Item "<http://www.cl.cam.ac.uk/%7Emgk25/unicode.html>"
Unix/Linux É쵀 \s-1UTF\-8\s0 ¼° Unicode ´ð¿ÍÎÊ
Encode, Encode::CN, encoding, perluniintro, perlunicode
Jarkko Hietaniemi <jhi@iki.fi>
Autrijus Tang (ÌÆ×Úºº) <autrijus@autrijus.org>