| 1 | package Encode::JP; |
| 2 | BEGIN { |
| 3 | if (ord("A") == 193) { |
| 4 | die "Encode::JP not supported on EBCDIC\n"; |
| 5 | } |
| 6 | } |
| 7 | use Encode; |
| 8 | our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
| 9 | |
| 10 | use XSLoader; |
| 11 | XSLoader::load(__PACKAGE__,$VERSION); |
| 12 | |
| 13 | use Encode::JP::JIS7; |
| 14 | |
| 15 | 1; |
| 16 | __END__ |
| 17 | |
| 18 | =head1 NAME |
| 19 | |
| 20 | Encode::JP - Japanese Encodings |
| 21 | |
| 22 | =head1 SYNOPSIS |
| 23 | |
| 24 | use Encode qw/encode decode/; |
| 25 | $euc_jp = encode("euc-jp", $utf8); # loads Encode::JP implicitly |
| 26 | $utf8 = decode("euc-jp", $euc_jp); # ditto |
| 27 | |
| 28 | =head1 ABSTRACT |
| 29 | |
| 30 | This module implements Japanese charset encodings. Encodings |
| 31 | supported are as follows. |
| 32 | |
| 33 | Canonical Alias Description |
| 34 | -------------------------------------------------------------------- |
| 35 | euc-jp /\beuc.*jp$/i EUC (Extended Unix Character) |
| 36 | /\bjp.*euc/i |
| 37 | /\bujis$/i |
| 38 | shiftjis /\bshift.*jis$/i Shift JIS (aka MS Kanji) |
| 39 | /\bsjis$/i |
| 40 | 7bit-jis /\bjis$/i 7bit JIS |
| 41 | iso-2022-jp ISO-2022-JP [RFC1468] |
| 42 | = 7bit JIS with all Halfwidth Kana |
| 43 | converted to Fullwidth |
| 44 | iso-2022-jp-1 ISO-2022-JP-1 [RFC2237] |
| 45 | = ISO-2022-JP with JIS X 0212-1990 |
| 46 | support. See below |
| 47 | MacJapanese Shift JIS + Apple vendor mappings |
| 48 | cp932 /\bwindows-31j$/i Code Page 932 |
| 49 | = Shift JIS + MS/IBM vendor mappings |
| 50 | jis0201-raw JIS0201, raw format |
| 51 | jis0208-raw JIS0201, raw format |
| 52 | jis0212-raw JIS0201, raw format |
| 53 | -------------------------------------------------------------------- |
| 54 | |
| 55 | =head1 DESCRIPTION |
| 56 | |
| 57 | To find out how to use this module in detail, see L<Encode>. |
| 58 | |
| 59 | =head1 Note on ISO-2022-JP(-1)? |
| 60 | |
| 61 | ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which |
| 62 | adds support for JIS X 0212-1990. That means you can use the same |
| 63 | code to decode to utf8 but not vice versa. |
| 64 | |
| 65 | $utf8 = decode('iso-2022-jp-1', $stream); |
| 66 | |
| 67 | and |
| 68 | |
| 69 | $utf8 = decode('iso-2022-jp', $stream); |
| 70 | |
| 71 | yield the same result but |
| 72 | |
| 73 | $with_0212 = encode('iso-2022-jp-1', $utf8); |
| 74 | |
| 75 | is now different from |
| 76 | |
| 77 | $without_0212 = encode('iso-2022-jp', $utf8 ); |
| 78 | |
| 79 | In the latter case, characters that map to 0212 are first converted |
| 80 | to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or |
| 81 | 'geta mark') then fed to the decoding engine. U+FFFD is not used, |
| 82 | in order to preserve text layout as much as possible. |
| 83 | |
| 84 | =head1 BUGS |
| 85 | |
| 86 | The ASCII region (0x00-0x7f) is preserved for all encodings, even |
| 87 | though this conflicts with mappings by the Unicode Consortium. See |
| 88 | |
| 89 | L<http://www.debian.or.jp/~kubota/unicode-symbols.html.en> |
| 90 | |
| 91 | to find out why it is implemented that way. |
| 92 | |
| 93 | =head1 SEE ALSO |
| 94 | |
| 95 | L<Encode> |
| 96 | |
| 97 | =cut |