Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / 5.8.0 / sun4-solaris / Encode / Encoding.pm
CommitLineData
86530b38
AT
1package Encode::Encoding;
2# Base class for classes which implement encodings
3use strict;
4our $VERSION = do { my @r = (q$Revision: 1.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5
6require Encode;
7
8sub Define
9{
10 my $obj = shift;
11 my $canonical = shift;
12 $obj = bless { Name => $canonical },$obj unless ref $obj;
13 # warn "$canonical => $obj\n";
14 Encode::define_encoding($obj, $canonical, @_);
15}
16
17sub name { return shift->{'Name'} }
18sub new_sequence { return $_[0] }
19
20sub needs_lines { 0 };
21
22sub perlio_ok {
23 eval{ require PerlIO::encoding };
24 return $@ ? 0 : 1;
25}
26
27# Temporary legacy methods
28sub toUnicode { shift->decode(@_) }
29sub fromUnicode { shift->encode(@_) }
30
31#
32# Needs to be overloaded or just croak
33#
34
35sub encode {
36 require Carp;
37 my $obj = shift;
38 my $class = ref($obj) ? ref($obj) : $obj;
39 Carp::croak $class, "->encode() not defined!";
40}
41
42sub decode{
43 require Carp;
44 my $obj = shift;
45 my $class = ref($obj) ? ref($obj) : $obj;
46 Carp::croak $class, "->encode() not defined!";
47}
48
49sub DESTROY {}
50
511;
52__END__
53
54=head1 NAME
55
56Encode::Encoding - Encode Implementation Base Class
57
58=head1 SYNOPSIS
59
60 package Encode::MyEncoding;
61 use base qw(Encode::Encoding);
62
63 __PACKAGE__->Define(qw(myCanonical myAlias));
64
65=head1 DESCRIPTION
66
67As mentioned in L<Encode>, encodings are (in the current
68implementation at least) defined as objects. The mapping of encoding
69name to object is via the C<%Encode::Encoding> hash. Though you can
70directly manipulate this hash, it is strongly encouraged to use this
71base class module and add encode() and decode() methods.
72
73=head2 Methods you should implement
74
75You are strongly encouraged to implement methods below, at least
76either encode() or decode().
77
78=over 4
79
80=item -E<gt>encode($string [,$check])
81
82MUST return the octet sequence representing I<$string>.
83
84=over 2
85
86=item *
87
88If I<$check> is true, it SHOULD modify I<$string> in place to remove
89the converted part (i.e. the whole string unless there is an error).
90If perlio_ok() is true, SHOULD becomes MUST.
91
92=item *
93
94If an error occurs, it SHOULD return the octet sequence for the
95fragment of string that has been converted and modify $string in-place
96to remove the converted part leaving it starting with the problem
97fragment. If perlio_ok() is true, SHOULD becomes MUST.
98
99=item *
100
101If I<$check> is is false then C<encode> MUST make a "best effort" to
102convert the string - for example, by using a replacement character.
103
104=back
105
106=item -E<gt>decode($octets [,$check])
107
108MUST return the string that I<$octets> represents.
109
110=over 2
111
112=item *
113
114If I<$check> is true, it SHOULD modify I<$octets> in place to remove
115the converted part (i.e. the whole sequence unless there is an
116error). If perlio_ok() is true, SHOULD becomes MUST.
117
118=item *
119
120If an error occurs, it SHOULD return the fragment of string that has
121been converted and modify $octets in-place to remove the converted
122part leaving it starting with the problem fragment. If perlio_ok() is
123true, SHOULD becomes MUST.
124
125=item *
126
127If I<$check> is false then C<decode> should make a "best effort" to
128convert the string - for example by using Unicode's "\x{FFFD}" as a
129replacement character.
130
131=back
132
133=head2 Other methods defined in Encode::Encodings
134
135You do not have to override methods shown below unless you have to.
136
137=over 4
138
139=item -E<gt>name
140
141Predefined As:
142
143 sub name { return shift->{'Name'} }
144
145MUST return the string representing the canonical name of the encoding.
146
147=item -E<gt>new_sequence
148
149Predefined As:
150
151 sub new_sequence { return $_[0] }
152
153This is a placeholder for encodings with state. It should return an
154object which implements this interface. All current implementations
155return the original object.
156
157=item -E<gt>perlio_ok()
158
159Predefined As:
160
161 sub perlio_ok {
162 eval{ require PerlIO::encoding };
163 return $@ ? 0 : 1;
164 }
165
166If your encoding does not support PerlIO for some reasons, just;
167
168 sub perlio_ok { 0 }
169
170=item -E<gt>needs_lines()
171
172Predefined As:
173
174 sub needs_lines { 0 };
175
176If your encoding can work with PerlIO but needs line buffering, you
177MUST define this method so it returns true. 7bit ISO-2022 encodings
178are one example that needs this. When this method is missing, false
179is assumed.
180
181=back
182
183=head2 Example: Encode::ROT13
184
185 package Encode::ROT13;
186 use strict;
187 use base qw(Encode::Encoding);
188
189 __PACKAGE__->Define('rot13');
190
191 sub encode($$;$){
192 my ($obj, $str, $chk) = @_;
193 $str =~ tr/A-Za-z/N-ZA-Mn-za-m/;
194 $_[1] = '' if $chk; # this is what in-place edit means
195 return $str;
196 }
197
198 # Jr pna or ynml yvxr guvf;
199 *decode = \&encode;
200
201 1;
202
203=head1 Why the heck Encode API is different?
204
205It should be noted that the I<$check> behaviour is different from the
206outer public API. The logic is that the "unchecked" case is useful
207when the encoding is part of a stream which may be reporting errors
208(e.g. STDERR). In such cases, it is desirable to get everything
209through somehow without causing additional errors which obscure the
210original one. Also, the encoding is best placed to know what the
211correct replacement character is, so if that is the desired behaviour
212then letting low level code do it is the most efficient.
213
214By contrast, if I<$check> is true, the scheme above allows the
215encoding to do as much as it can and tell the layer above how much
216that was. What is lacking at present is a mechanism to report what
217went wrong. The most likely interface will be an additional method
218call to the object, or perhaps (to avoid forcing per-stream objects
219on otherwise stateless encodings) an additional parameter.
220
221It is also highly desirable that encoding classes inherit from
222C<Encode::Encoding> as a base class. This allows that class to define
223additional behaviour for all encoding objects.
224
225 package Encode::MyEncoding;
226 use base qw(Encode::Encoding);
227
228 __PACKAGE__->Define(qw(myCanonical myAlias));
229
230to create an object with C<< bless {Name => ...}, $class >>, and call
231define_encoding. They inherit their C<name> method from
232C<Encode::Encoding>.
233
234=head2 Compiled Encodings
235
236For the sake of speed and efficiency, most of the encodings are now
237supported via a I<compiled form>: XS modules generated from UCM
238files. Encode provides the enc2xs tool to achieve that. Please see
239L<enc2xs> for more details.
240
241=head1 SEE ALSO
242
243L<perlmod>, L<enc2xs>
244
245=begin future
246
247=over 4
248
249=item Scheme 1
250
251The fixup routine gets passed the remaining fragment of string being
252processed. It modifies it in place to remove bytes/characters it can
253understand and returns a string used to represent them. For example:
254
255 sub fixup {
256 my $ch = substr($_[0],0,1,'');
257 return sprintf("\x{%02X}",ord($ch);
258 }
259
260This scheme is close to how the underlying C code for Encode works,
261but gives the fixup routine very little context.
262
263=item Scheme 2
264
265The fixup routine gets passed the original string, an index into
266it of the problem area, and the output string so far. It appends
267what it wants to the output string and returns a new index into the
268original string. For example:
269
270 sub fixup {
271 # my ($s,$i,$d) = @_;
272 my $ch = substr($_[0],$_[1],1);
273 $_[2] .= sprintf("\x{%02X}",ord($ch);
274 return $_[1]+1;
275 }
276
277This scheme gives maximal control to the fixup routine but is more
278complicated to code, and may require that the internals of Encode be tweaked to
279keep the original string intact.
280
281=item Other Schemes
282
283Hybrids of the above.
284
285Multiple return values rather than in-place modifications.
286
287Index into the string could be C<pos($str)> allowing C<s/\G...//>.
288
289=back
290
291=end future
292
293=cut