# $Id: Encoder.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $
our $VERSION = do { my @r = (q
$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x
$#r, @r };
our @EXPORT_OK = qw
( encoder
);
use Encode
qw(encode decode find_encoding from_to);
my ($class, $data, $encname) = @_;
$encname = Encode
::is_utf8
($data) ?
'utf8' : '';
my $obj = find_encoding
($encname)
or croak __PACKAGE__
, ": unknown encoding: $encname";
sub encoder
{ __PACKAGE__
->new(@_) }
my ($self, $encname) = @_;
my $obj = find_encoding
($encname)
or confess __PACKAGE__
, ": unknown encoding: $encname";
$self->{encoding
} = $obj->name;
my ($self, $encname) = @_;
$encname ||= $self->{encoding
};
my $obj = find_encoding
($encname)
or confess __PACKAGE__
, ": unknown encoding: $encname";
$self->{data
} = $obj->decode($self->{data
}, 1);
sub DESTROY
{ # defined so it won't autoload.
or confess
"$self is not an object";
$myname =~ s/.*://; # strip fully-qualified portion
my $obj = find_encoding
($myname)
or confess __PACKAGE__
, ": unknown encoding: $myname";
DEBUG
and warn $self->{encoding
}, " => ", $obj->name;
from_to
($self->{data
}, $self->{encoding
}, $obj->name, 1);
$self->{data
} = $obj->encode($self->{data
}, 1);
$self->{encoding
} = $obj->name;
q
("") => sub { $_[0]->{data
} },
q
(0+) => sub { use bytes
(); bytes
::length($_[0]->{data
}) },
Encode::Encoder -- Object Oriented Encoder
# Encode::encode("ISO-8859-1", $data);
Encode::Encoder->new($data)->iso_8859_1; # OOP way
use Encode::Encoder qw(encoder);
encoder($data)->iso_8859_1;
encoder($data)->iso_8859_1->base64; # provided base64() is defined
# you can use it as a decoder as well
encoder($base64)->bytes('base64')->latin1;
print encoder($data)->utf8->latin1; # prints the string in latin1
encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
B<Encode::Encoder> allows you to use Encode in an object-oriented
style. This is not only more intuitive than a functional approach,
but also handier when you want to stack encodings. Suppose you want
your UTF-8 string converted to Latin1 then Base64: you can simply say
my $base64 = encoder($utf8)->latin1->base64;
my $latin1 = encode("latin1", $utf8);
my $base64 = encode_base64($utf8);
or the lazier and more convoluted
my $base64 = encode_base64(encode("latin1", $utf8));
Here is how to use this module.
There are at least two instance variables stored in a hash reference,
When there is no method, it takes the method name as the name of the
encoding and encodes the instance I<data> with I<encoding>. If successful,
the instance I<encoding> is set accordingly.
You can retrieve the result via -E<gt>data but usually you don't have to
because the stringify operator ("") is overridden to do exactly that.
=head2 Predefined Methods
This module predefines the methods below:
=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
returns an encoder object. Its data is initialized with $data if
present, and its encoding is set to $encoding if present.
When $encoding is omitted, it defaults to utf8 if $data is already in
utf8 or "" (empty string) otherwise.
is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand.
=item $e-E<gt>data([$data])
When $data is present, sets the instance data to $data and returns the
object itself. Otherwise, the current instance data is returned.
=item $e-E<gt>encoding([$encoding])
When $encoding is present, sets the instance encoding to $encoding and
returns the object itself. Otherwise, the current instance encoding is
=item $e-E<gt>bytes([$encoding])
decodes instance data from $encoding, or the instance encoding if
omitted. If the conversion is successful, the instance encoding
The name I<bytes> was deliberately picked to avoid namespace tainting
-- this module may be used as a base class so method names that appear
in Encode::Encoding are avoided.
=head2 Example: base64 transcoder
This module is designed to work with L<Encode::Encoding>.
To make the Base64 transcoder example above really work, you could
write a module like this:
use base 'Encode::Encoding';
__PACKAGE__->Define('base64');
return encode_base64($data);
return decode_base64($data);
And your caller module would be something like this:
# now you can really do the following
encoder($data)->iso_8859_1->base64;
encoder($base64)->bytes('base64')->latin1;
=head2 Operator Overloading
This module overloads two operators, stringify ("") and numify (0+).
Stringify dumps the data inside the object.
Numify returns the number of bytes in the instance data.
They come in handy when you want to print or find the size of data.