| 1 | package Encode::CN::HZ; |
| 2 | |
| 3 | use strict; |
| 4 | |
| 5 | use vars qw($VERSION); |
| 6 | $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
| 7 | |
| 8 | use Encode (); |
| 9 | |
| 10 | use base qw(Encode::Encoding); |
| 11 | __PACKAGE__->Define('hz'); |
| 12 | |
| 13 | # HZ is only escaped GB, so we implement it with the |
| 14 | # GB2312(raw) encoding here. Cf. RFCs 1842 & 1843. |
| 15 | |
| 16 | |
| 17 | |
| 18 | sub needs_lines { 1 } |
| 19 | |
| 20 | sub perlio_ok { |
| 21 | return 0; # for the time being |
| 22 | } |
| 23 | |
| 24 | sub decode |
| 25 | { |
| 26 | my ($obj,$str,$chk) = @_; |
| 27 | my $gb = Encode::find_encoding('gb2312-raw'); |
| 28 | |
| 29 | $str =~ s{~ # starting tilde |
| 30 | (?: |
| 31 | (~) # another tilde - escaped (set $1) |
| 32 | | # or |
| 33 | \n # \n - output nothing |
| 34 | | # or |
| 35 | \{ # opening brace of GB data |
| 36 | ( # set $2 to any number of... |
| 37 | (?: |
| 38 | [^~] # non-tilde GB character |
| 39 | | # or |
| 40 | ~(?!\}) # tilde not followed by a closing brace |
| 41 | )* |
| 42 | ) |
| 43 | ~\} # closing brace of GB data |
| 44 | | # XXX: invalid escape - maybe die on $chk? |
| 45 | ) |
| 46 | }{ |
| 47 | (defined $1) ? '~' # two tildes make one tilde |
| 48 | : |
| 49 | (defined $2) ? $gb->decode($2, $chk) # decode the characters |
| 50 | : |
| 51 | '' # ~\n and invalid escape = '' |
| 52 | }egx; |
| 53 | |
| 54 | return $str; |
| 55 | } |
| 56 | |
| 57 | sub encode |
| 58 | { |
| 59 | my ($obj,$str,$chk) = @_; |
| 60 | my ($out, $in_gb); |
| 61 | my $gb = Encode::find_encoding('gb2312-raw'); |
| 62 | |
| 63 | $str =~ s/~/~~/g; |
| 64 | |
| 65 | # XXX: Since CHECK and partial decoding has not been implemented yet, |
| 66 | # we'll use a very crude way to test for GB2312ness. |
| 67 | |
| 68 | for my $index (0 .. length($str) - 1) { |
| 69 | no warnings 'utf8'; |
| 70 | |
| 71 | my $char = substr($str, $index, 1); |
| 72 | # try to encode this character |
| 73 | # with CHECK on so it stops at proper place. |
| 74 | # also note that the assignement was braced in eval |
| 75 | # -- dankogai |
| 76 | my $try; |
| 77 | eval{ $try = $gb->encode($char, 1) }; |
| 78 | |
| 79 | if (defined($try)) { # is a GB character: |
| 80 | if ($in_gb) { |
| 81 | $out .= $try; # in GB mode - just append it |
| 82 | } |
| 83 | else { |
| 84 | $in_gb = 1; # enter GB mode, then append it |
| 85 | $out .= "~{$try"; |
| 86 | } |
| 87 | } # not a GB character: |
| 88 | elsif ($in_gb) { |
| 89 | $in_gb = 0; # leave GB mode, then append it |
| 90 | $out .= "~}$char"; |
| 91 | } |
| 92 | else { |
| 93 | $out .= $char; # not in GB mode - just append it |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | $out .= '~}' if $in_gb; # add closing brace if needed |
| 98 | |
| 99 | return $out; |
| 100 | } |
| 101 | |
| 102 | 1; |
| 103 | __END__ |
| 104 | |
| 105 | |
| 106 | =head1 NAME |
| 107 | |
| 108 | Encode::CN::HZ -- internally used by Encode::CN |
| 109 | |
| 110 | =cut |