Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | package Encode::JP; |
2 | BEGIN { | |
3 | if (ord("A") == 193) { | |
4 | die "Encode::JP not supported on EBCDIC\n"; | |
5 | } | |
6 | } | |
7 | use Encode; | |
8 | our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; | |
9 | ||
10 | use XSLoader; | |
11 | XSLoader::load(__PACKAGE__,$VERSION); | |
12 | ||
13 | use Encode::JP::JIS7; | |
14 | ||
15 | 1; | |
16 | __END__ | |
17 | ||
18 | =head1 NAME | |
19 | ||
20 | Encode::JP - Japanese Encodings | |
21 | ||
22 | =head1 SYNOPSIS | |
23 | ||
24 | use Encode qw/encode decode/; | |
25 | $euc_jp = encode("euc-jp", $utf8); # loads Encode::JP implicitly | |
26 | $utf8 = decode("euc-jp", $euc_jp); # ditto | |
27 | ||
28 | =head1 ABSTRACT | |
29 | ||
30 | This module implements Japanese charset encodings. Encodings | |
31 | supported are as follows. | |
32 | ||
33 | Canonical Alias Description | |
34 | -------------------------------------------------------------------- | |
35 | euc-jp /\beuc.*jp$/i EUC (Extended Unix Character) | |
36 | /\bjp.*euc/i | |
37 | /\bujis$/i | |
38 | shiftjis /\bshift.*jis$/i Shift JIS (aka MS Kanji) | |
39 | /\bsjis$/i | |
40 | 7bit-jis /\bjis$/i 7bit JIS | |
41 | iso-2022-jp ISO-2022-JP [RFC1468] | |
42 | = 7bit JIS with all Halfwidth Kana | |
43 | converted to Fullwidth | |
44 | iso-2022-jp-1 ISO-2022-JP-1 [RFC2237] | |
45 | = ISO-2022-JP with JIS X 0212-1990 | |
46 | support. See below | |
47 | MacJapanese Shift JIS + Apple vendor mappings | |
48 | cp932 /\bwindows-31j$/i Code Page 932 | |
49 | = Shift JIS + MS/IBM vendor mappings | |
50 | jis0201-raw JIS0201, raw format | |
51 | jis0208-raw JIS0201, raw format | |
52 | jis0212-raw JIS0201, raw format | |
53 | -------------------------------------------------------------------- | |
54 | ||
55 | =head1 DESCRIPTION | |
56 | ||
57 | To find out how to use this module in detail, see L<Encode>. | |
58 | ||
59 | =head1 Note on ISO-2022-JP(-1)? | |
60 | ||
61 | ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which | |
62 | adds support for JIS X 0212-1990. That means you can use the same | |
63 | code to decode to utf8 but not vice versa. | |
64 | ||
65 | $utf8 = decode('iso-2022-jp-1', $stream); | |
66 | ||
67 | and | |
68 | ||
69 | $utf8 = decode('iso-2022-jp', $stream); | |
70 | ||
71 | yield the same result but | |
72 | ||
73 | $with_0212 = encode('iso-2022-jp-1', $utf8); | |
74 | ||
75 | is now different from | |
76 | ||
77 | $without_0212 = encode('iso-2022-jp', $utf8 ); | |
78 | ||
79 | In the latter case, characters that map to 0212 are first converted | |
80 | to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or | |
81 | 'geta mark') then fed to the decoding engine. U+FFFD is not used, | |
82 | in order to preserve text layout as much as possible. | |
83 | ||
84 | =head1 BUGS | |
85 | ||
86 | The ASCII region (0x00-0x7f) is preserved for all encodings, even | |
87 | though this conflicts with mappings by the Unicode Consortium. See | |
88 | ||
89 | L<http://www.debian.or.jp/~kubota/unicode-symbols.html.en> | |
90 | ||
91 | to find out why it is implemented that way. | |
92 | ||
93 | =head1 SEE ALSO | |
94 | ||
95 | L<Encode> | |
96 | ||
97 | =cut |