Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | package Encode::CN::HZ; |
2 | ||
3 | use strict; | |
4 | ||
5 | use vars qw($VERSION); | |
6 | $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; | |
7 | ||
8 | use Encode (); | |
9 | ||
10 | use base qw(Encode::Encoding); | |
11 | __PACKAGE__->Define('hz'); | |
12 | ||
13 | # HZ is only escaped GB, so we implement it with the | |
14 | # GB2312(raw) encoding here. Cf. RFCs 1842 & 1843. | |
15 | ||
16 | ||
17 | ||
18 | sub needs_lines { 1 } | |
19 | ||
20 | sub perlio_ok { | |
21 | return 0; # for the time being | |
22 | } | |
23 | ||
24 | sub decode | |
25 | { | |
26 | my ($obj,$str,$chk) = @_; | |
27 | my $gb = Encode::find_encoding('gb2312-raw'); | |
28 | ||
29 | $str =~ s{~ # starting tilde | |
30 | (?: | |
31 | (~) # another tilde - escaped (set $1) | |
32 | | # or | |
33 | \n # \n - output nothing | |
34 | | # or | |
35 | \{ # opening brace of GB data | |
36 | ( # set $2 to any number of... | |
37 | (?: | |
38 | [^~] # non-tilde GB character | |
39 | | # or | |
40 | ~(?!\}) # tilde not followed by a closing brace | |
41 | )* | |
42 | ) | |
43 | ~\} # closing brace of GB data | |
44 | | # XXX: invalid escape - maybe die on $chk? | |
45 | ) | |
46 | }{ | |
47 | (defined $1) ? '~' # two tildes make one tilde | |
48 | : | |
49 | (defined $2) ? $gb->decode($2, $chk) # decode the characters | |
50 | : | |
51 | '' # ~\n and invalid escape = '' | |
52 | }egx; | |
53 | ||
54 | return $str; | |
55 | } | |
56 | ||
57 | sub encode | |
58 | { | |
59 | my ($obj,$str,$chk) = @_; | |
60 | my ($out, $in_gb); | |
61 | my $gb = Encode::find_encoding('gb2312-raw'); | |
62 | ||
63 | $str =~ s/~/~~/g; | |
64 | ||
65 | # XXX: Since CHECK and partial decoding has not been implemented yet, | |
66 | # we'll use a very crude way to test for GB2312ness. | |
67 | ||
68 | for my $index (0 .. length($str) - 1) { | |
69 | no warnings 'utf8'; | |
70 | ||
71 | my $char = substr($str, $index, 1); | |
72 | # try to encode this character | |
73 | # with CHECK on so it stops at proper place. | |
74 | # also note that the assignement was braced in eval | |
75 | # -- dankogai | |
76 | my $try; | |
77 | eval{ $try = $gb->encode($char, 1) }; | |
78 | ||
79 | if (defined($try)) { # is a GB character: | |
80 | if ($in_gb) { | |
81 | $out .= $try; # in GB mode - just append it | |
82 | } | |
83 | else { | |
84 | $in_gb = 1; # enter GB mode, then append it | |
85 | $out .= "~{$try"; | |
86 | } | |
87 | } # not a GB character: | |
88 | elsif ($in_gb) { | |
89 | $in_gb = 0; # leave GB mode, then append it | |
90 | $out .= "~}$char"; | |
91 | } | |
92 | else { | |
93 | $out .= $char; # not in GB mode - just append it | |
94 | } | |
95 | } | |
96 | ||
97 | $out .= '~}' if $in_gb; # add closing brace if needed | |
98 | ||
99 | return $out; | |
100 | } | |
101 | ||
102 | 1; | |
103 | __END__ | |
104 | ||
105 | ||
106 | =head1 NAME | |
107 | ||
108 | Encode::CN::HZ -- internally used by Encode::CN | |
109 | ||
110 | =cut |