Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # |
2 | # $Id: Encoder.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $ | |
3 | # | |
4 | package Encode::Encoder; | |
5 | use strict; | |
6 | use warnings; | |
7 | our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; | |
8 | ||
9 | require Exporter; | |
10 | our @ISA = qw(Exporter); | |
11 | our @EXPORT_OK = qw ( encoder ); | |
12 | ||
13 | our $AUTOLOAD; | |
14 | sub DEBUG () { 0 } | |
15 | use Encode qw(encode decode find_encoding from_to); | |
16 | use Carp; | |
17 | ||
18 | sub new{ | |
19 | my ($class, $data, $encname) = @_; | |
20 | unless($encname){ | |
21 | $encname = Encode::is_utf8($data) ? 'utf8' : ''; | |
22 | }else{ | |
23 | my $obj = find_encoding($encname) | |
24 | or croak __PACKAGE__, ": unknown encoding: $encname"; | |
25 | $encname = $obj->name; | |
26 | } | |
27 | my $self = { | |
28 | data => $data, | |
29 | encoding => $encname, | |
30 | }; | |
31 | bless $self => $class; | |
32 | } | |
33 | ||
34 | sub encoder{ __PACKAGE__->new(@_) } | |
35 | ||
36 | sub data{ | |
37 | my ($self, $data) = @_; | |
38 | if (defined $data){ | |
39 | $self->{data} = $data; | |
40 | return $data; | |
41 | }else{ | |
42 | return $self->{data}; | |
43 | } | |
44 | } | |
45 | ||
46 | sub encoding{ | |
47 | my ($self, $encname) = @_; | |
48 | if ($encname){ | |
49 | my $obj = find_encoding($encname) | |
50 | or confess __PACKAGE__, ": unknown encoding: $encname"; | |
51 | $self->{encoding} = $obj->name; | |
52 | return $self; | |
53 | }else{ | |
54 | return $self->{encoding} | |
55 | } | |
56 | } | |
57 | ||
58 | sub bytes { | |
59 | my ($self, $encname) = @_; | |
60 | $encname ||= $self->{encoding}; | |
61 | my $obj = find_encoding($encname) | |
62 | or confess __PACKAGE__, ": unknown encoding: $encname"; | |
63 | $self->{data} = $obj->decode($self->{data}, 1); | |
64 | $self->{encoding} = '' ; | |
65 | return $self; | |
66 | } | |
67 | ||
68 | sub DESTROY{ # defined so it won't autoload. | |
69 | DEBUG and warn shift; | |
70 | } | |
71 | ||
72 | sub AUTOLOAD { | |
73 | my $self = shift; | |
74 | my $type = ref($self) | |
75 | or confess "$self is not an object"; | |
76 | my $myname = $AUTOLOAD; | |
77 | $myname =~ s/.*://; # strip fully-qualified portion | |
78 | my $obj = find_encoding($myname) | |
79 | or confess __PACKAGE__, ": unknown encoding: $myname"; | |
80 | DEBUG and warn $self->{encoding}, " => ", $obj->name; | |
81 | if ($self->{encoding}){ | |
82 | from_to($self->{data}, $self->{encoding}, $obj->name, 1); | |
83 | }else{ | |
84 | $self->{data} = $obj->encode($self->{data}, 1); | |
85 | } | |
86 | $self->{encoding} = $obj->name; | |
87 | return $self; | |
88 | } | |
89 | ||
90 | use overload | |
91 | q("") => sub { $_[0]->{data} }, | |
92 | q(0+) => sub { use bytes (); bytes::length($_[0]->{data}) }, | |
93 | fallback => 1, | |
94 | ; | |
95 | ||
96 | 1; | |
97 | __END__ | |
98 | ||
99 | =head1 NAME | |
100 | ||
101 | Encode::Encoder -- Object Oriented Encoder | |
102 | ||
103 | =head1 SYNOPSIS | |
104 | ||
105 | use Encode::Encoder; | |
106 | # Encode::encode("ISO-8859-1", $data); | |
107 | Encode::Encoder->new($data)->iso_8859_1; # OOP way | |
108 | # shortcut | |
109 | use Encode::Encoder qw(encoder); | |
110 | encoder($data)->iso_8859_1; | |
111 | # you can stack them! | |
112 | encoder($data)->iso_8859_1->base64; # provided base64() is defined | |
113 | # you can use it as a decoder as well | |
114 | encoder($base64)->bytes('base64')->latin1; | |
115 | # stringified | |
116 | print encoder($data)->utf8->latin1; # prints the string in latin1 | |
117 | # numified | |
118 | encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data) | |
119 | ||
120 | =head1 ABSTRACT | |
121 | ||
122 | B<Encode::Encoder> allows you to use Encode in an object-oriented | |
123 | style. This is not only more intuitive than a functional approach, | |
124 | but also handier when you want to stack encodings. Suppose you want | |
125 | your UTF-8 string converted to Latin1 then Base64: you can simply say | |
126 | ||
127 | my $base64 = encoder($utf8)->latin1->base64; | |
128 | ||
129 | instead of | |
130 | ||
131 | my $latin1 = encode("latin1", $utf8); | |
132 | my $base64 = encode_base64($utf8); | |
133 | ||
134 | or the lazier and more convoluted | |
135 | ||
136 | my $base64 = encode_base64(encode("latin1", $utf8)); | |
137 | ||
138 | =head1 Description | |
139 | ||
140 | Here is how to use this module. | |
141 | ||
142 | =over 4 | |
143 | ||
144 | =item * | |
145 | ||
146 | There are at least two instance variables stored in a hash reference, | |
147 | {data} and {encoding}. | |
148 | ||
149 | =item * | |
150 | ||
151 | When there is no method, it takes the method name as the name of the | |
152 | encoding and encodes the instance I<data> with I<encoding>. If successful, | |
153 | the instance I<encoding> is set accordingly. | |
154 | ||
155 | =item * | |
156 | ||
157 | You can retrieve the result via -E<gt>data but usually you don't have to | |
158 | because the stringify operator ("") is overridden to do exactly that. | |
159 | ||
160 | =back | |
161 | ||
162 | =head2 Predefined Methods | |
163 | ||
164 | This module predefines the methods below: | |
165 | ||
166 | =over 4 | |
167 | ||
168 | =item $e = Encode::Encoder-E<gt>new([$data, $encoding]); | |
169 | ||
170 | returns an encoder object. Its data is initialized with $data if | |
171 | present, and its encoding is set to $encoding if present. | |
172 | ||
173 | When $encoding is omitted, it defaults to utf8 if $data is already in | |
174 | utf8 or "" (empty string) otherwise. | |
175 | ||
176 | =item encoder() | |
177 | ||
178 | is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand. | |
179 | ||
180 | =item $e-E<gt>data([$data]) | |
181 | ||
182 | When $data is present, sets the instance data to $data and returns the | |
183 | object itself. Otherwise, the current instance data is returned. | |
184 | ||
185 | =item $e-E<gt>encoding([$encoding]) | |
186 | ||
187 | When $encoding is present, sets the instance encoding to $encoding and | |
188 | returns the object itself. Otherwise, the current instance encoding is | |
189 | returned. | |
190 | ||
191 | =item $e-E<gt>bytes([$encoding]) | |
192 | ||
193 | decodes instance data from $encoding, or the instance encoding if | |
194 | omitted. If the conversion is successful, the instance encoding | |
195 | will be set to "". | |
196 | ||
197 | The name I<bytes> was deliberately picked to avoid namespace tainting | |
198 | -- this module may be used as a base class so method names that appear | |
199 | in Encode::Encoding are avoided. | |
200 | ||
201 | =back | |
202 | ||
203 | =head2 Example: base64 transcoder | |
204 | ||
205 | This module is designed to work with L<Encode::Encoding>. | |
206 | To make the Base64 transcoder example above really work, you could | |
207 | write a module like this: | |
208 | ||
209 | package Encode::Base64; | |
210 | use base 'Encode::Encoding'; | |
211 | __PACKAGE__->Define('base64'); | |
212 | use MIME::Base64; | |
213 | sub encode{ | |
214 | my ($obj, $data) = @_; | |
215 | return encode_base64($data); | |
216 | } | |
217 | sub decode{ | |
218 | my ($obj, $data) = @_; | |
219 | return decode_base64($data); | |
220 | } | |
221 | 1; | |
222 | __END__ | |
223 | ||
224 | And your caller module would be something like this: | |
225 | ||
226 | use Encode::Encoder; | |
227 | use Encode::Base64; | |
228 | ||
229 | # now you can really do the following | |
230 | ||
231 | encoder($data)->iso_8859_1->base64; | |
232 | encoder($base64)->bytes('base64')->latin1; | |
233 | ||
234 | =head2 Operator Overloading | |
235 | ||
236 | This module overloads two operators, stringify ("") and numify (0+). | |
237 | ||
238 | Stringify dumps the data inside the object. | |
239 | ||
240 | Numify returns the number of bytes in the instance data. | |
241 | ||
242 | They come in handy when you want to print or find the size of data. | |
243 | ||
244 | =head1 SEE ALSO | |
245 | ||
246 | L<Encode>, | |
247 | L<Encode::Encoding> | |
248 | ||
249 | =cut |