Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | package open; |
2 | use warnings; | |
3 | use Carp; | |
4 | $open::hint_bits = 0x20000; # HINT_LOCALIZE_HH | |
5 | ||
6 | our $VERSION = '1.05'; | |
7 | ||
8 | require 5.008001; # for PerlIO::get_layers() | |
9 | ||
10 | my $locale_encoding; | |
11 | ||
12 | sub _get_encname { | |
13 | return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/; | |
14 | return; | |
15 | } | |
16 | ||
17 | sub _drop_oldenc { | |
18 | # If by the time we arrive here there already is at the top of the | |
19 | # perlio layer stack an encoding identical to what we would like | |
20 | # to push via this open pragma, we will pop away the old encoding | |
21 | # (+utf8) so that we can push ourselves in place (this is easier | |
22 | # than ignoring pushing ourselves because of the way how ${^OPEN} | |
23 | # works). So we are looking for something like | |
24 | # | |
25 | # stdio encoding(xxx) utf8 | |
26 | # | |
27 | # in the existing layer stack, and in the new stack chunk for | |
28 | # | |
29 | # :encoding(xxx) | |
30 | # | |
31 | # If we find a match, we pop the old stack (once, since | |
32 | # the utf8 is just a flag on the encoding layer) | |
33 | my ($h, @new) = @_; | |
34 | return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/; | |
35 | my @old = PerlIO::get_layers($h); | |
36 | return unless @old >= 3 && | |
37 | $old[-1] eq 'utf8' && | |
38 | $old[-2] =~ /^encoding\(.+\)$/; | |
39 | require Encode; | |
40 | my ($loname, $lcname) = _get_encname($old[-2]); | |
41 | unless (defined $lcname) { # Should we trust get_layers()? | |
42 | require Carp; | |
43 | Carp::croak("open: Unknown encoding '$loname'"); | |
44 | } | |
45 | my ($voname, $vcname) = _get_encname($new[-1]); | |
46 | unless (defined $vcname) { | |
47 | require Carp; | |
48 | Carp::croak("open: Unknown encoding '$voname'"); | |
49 | } | |
50 | if ($lcname eq $vcname) { | |
51 | binmode($h, ":pop"); # utf8 is part of the encoding layer | |
52 | } | |
53 | } | |
54 | ||
55 | sub import { | |
56 | my ($class,@args) = @_; | |
57 | croak("open: needs explicit list of PerlIO layers") unless @args; | |
58 | my $std; | |
59 | $^H |= $open::hint_bits; | |
60 | my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); | |
61 | while (@args) { | |
62 | my $type = shift(@args); | |
63 | my $dscp; | |
64 | if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { | |
65 | $type = 'IO'; | |
66 | $dscp = ":$1"; | |
67 | } elsif ($type eq ':std') { | |
68 | $std = 1; | |
69 | next; | |
70 | } else { | |
71 | $dscp = shift(@args) || ''; | |
72 | } | |
73 | my @val; | |
74 | foreach my $layer (split(/\s+/,$dscp)) { | |
75 | $layer =~ s/^://; | |
76 | if ($layer eq 'locale') { | |
77 | require Encode; | |
78 | require encoding; | |
79 | $locale_encoding = encoding::_get_locale_encoding() | |
80 | unless defined $locale_encoding; | |
81 | (warnings::warnif("layer", "Cannot figure out an encoding to use"), last) | |
82 | unless defined $locale_encoding; | |
83 | if ($locale_encoding =~ /^utf-?8$/i) { | |
84 | $layer = "utf8"; | |
85 | } else { | |
86 | $layer = "encoding($locale_encoding)"; | |
87 | } | |
88 | $std = 1; | |
89 | } else { | |
90 | my $target = $layer; # the layer name itself | |
91 | $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters | |
92 | ||
93 | unless(PerlIO::Layer::->find($target,1)) { | |
94 | warnings::warnif("layer", "Unknown PerlIO layer '$target'"); | |
95 | } | |
96 | } | |
97 | push(@val,":$layer"); | |
98 | if ($layer =~ /^(crlf|raw)$/) { | |
99 | $^H{"open_$type"} = $layer; | |
100 | } | |
101 | } | |
102 | if ($type eq 'IN') { | |
103 | _drop_oldenc(*STDIN, @val); | |
104 | $in = join(' ', @val); | |
105 | } | |
106 | elsif ($type eq 'OUT') { | |
107 | _drop_oldenc(*STDOUT, @val); | |
108 | $out = join(' ', @val); | |
109 | } | |
110 | elsif ($type eq 'IO') { | |
111 | _drop_oldenc(*STDIN, @val); | |
112 | _drop_oldenc(*STDOUT, @val); | |
113 | $in = $out = join(' ', @val); | |
114 | } | |
115 | else { | |
116 | croak "Unknown PerlIO layer class '$type'"; | |
117 | } | |
118 | } | |
119 | ${^OPEN} = join("\0", $in, $out); | |
120 | if ($std) { | |
121 | if ($in) { | |
122 | if ($in =~ /:utf8\b/) { | |
123 | binmode(STDIN, ":utf8"); | |
124 | } elsif ($in =~ /(\w+\(.+\))/) { | |
125 | binmode(STDIN, ":$1"); | |
126 | } | |
127 | } | |
128 | if ($out) { | |
129 | if ($out =~ /:utf8\b/) { | |
130 | binmode(STDOUT, ":utf8"); | |
131 | binmode(STDERR, ":utf8"); | |
132 | } elsif ($out =~ /(\w+\(.+\))/) { | |
133 | binmode(STDOUT, ":$1"); | |
134 | binmode(STDERR, ":$1"); | |
135 | } | |
136 | } | |
137 | } | |
138 | } | |
139 | ||
140 | 1; | |
141 | __END__ | |
142 | ||
143 | =head1 NAME | |
144 | ||
145 | open - perl pragma to set default PerlIO layers for input and output | |
146 | ||
147 | =head1 SYNOPSIS | |
148 | ||
149 | use open IN => ":crlf", OUT => ":bytes"; | |
150 | use open OUT => ':utf8'; | |
151 | use open IO => ":encoding(iso-8859-7)"; | |
152 | ||
153 | use open IO => ':locale'; | |
154 | ||
155 | use open ':utf8'; | |
156 | use open ':locale'; | |
157 | use open ':encoding(iso-8859-7)'; | |
158 | ||
159 | use open ':std'; | |
160 | ||
161 | =head1 DESCRIPTION | |
162 | ||
163 | Full-fledged support for I/O layers is now implemented provided | |
164 | Perl is configured to use PerlIO as its IO system (which is now the | |
165 | default). | |
166 | ||
167 | The C<open> pragma serves as one of the interfaces to declare default | |
168 | "layers" (also known as "disciplines") for all I/O. Any two-argument | |
169 | open(), readpipe() (aka qx//) and similar operators found within the | |
170 | lexical scope of this pragma will use the declared defaults. | |
171 | Even three-argument opens may be affected by this pragma | |
172 | when they don't specify IO layers in MODE. | |
173 | ||
174 | With the C<IN> subpragma you can declare the default layers | |
175 | of input streams, and with the C<OUT> subpragma you can declare | |
176 | the default layers of output streams. With the C<IO> subpragma | |
177 | you can control both input and output streams simultaneously. | |
178 | ||
179 | If you have a legacy encoding, you can use the C<:encoding(...)> tag. | |
180 | ||
181 | If you want to set your encoding layers based on your | |
182 | locale environment variables, you can use the C<:locale> tag. | |
183 | For example: | |
184 | ||
185 | $ENV{LANG} = 'ru_RU.KOI8-R'; | |
186 | # the :locale will probe the locale environment variables like LANG | |
187 | use open OUT => ':locale'; | |
188 | open(O, ">koi8"); | |
189 | print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 | |
190 | close O; | |
191 | open(I, "<koi8"); | |
192 | printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 | |
193 | close I; | |
194 | ||
195 | These are equivalent | |
196 | ||
197 | use open ':utf8'; | |
198 | use open IO => ':utf8'; | |
199 | ||
200 | as are these | |
201 | ||
202 | use open ':locale'; | |
203 | use open IO => ':locale'; | |
204 | ||
205 | and these | |
206 | ||
207 | use open ':encoding(iso-8859-7)'; | |
208 | use open IO => ':encoding(iso-8859-7)'; | |
209 | ||
210 | The matching of encoding names is loose: case does not matter, and | |
211 | many encodings have several aliases. See L<Encode::Supported> for | |
212 | details and the list of supported locales. | |
213 | ||
214 | Note that C<:utf8> PerlIO layer must always be specified exactly like | |
215 | that, it is not subject to the loose matching of encoding names. | |
216 | ||
217 | When open() is given an explicit list of layers (with the three-arg | |
218 | syntax), they override the list declared using this pragma. | |
219 | ||
220 | The C<:std> subpragma on its own has no effect, but if combined with | |
221 | the C<:utf8> or C<:encoding> subpragmas, it converts the standard | |
222 | filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected | |
223 | for input/output handles. For example, if both input and out are | |
224 | chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and | |
225 | STDERR are also in C<:utf8>. On the other hand, if only output is | |
226 | chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the | |
227 | STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma | |
228 | implicitly turns on C<:std>. | |
229 | ||
230 | The logic of C<:locale> is described in full in L<encoding>, | |
231 | but in short it is first trying nl_langinfo(CODESET) and then | |
232 | guessing from the LC_ALL and LANG locale environment variables. | |
233 | ||
234 | Directory handles may also support PerlIO layers in the future. | |
235 | ||
236 | =head1 NONPERLIO FUNCTIONALITY | |
237 | ||
238 | If Perl is not built to use PerlIO as its IO system then only the two | |
239 | pseudo-layers C<:bytes> and C<:crlf> are available. | |
240 | ||
241 | The C<:bytes> layer corresponds to "binary mode" and the C<:crlf> | |
242 | layer corresponds to "text mode" on platforms that distinguish | |
243 | between the two modes when opening files (which is many DOS-like | |
244 | platforms, including Windows). These two layers are no-ops on | |
245 | platforms where binmode() is a no-op, but perform their functions | |
246 | everywhere if PerlIO is enabled. | |
247 | ||
248 | =head1 IMPLEMENTATION DETAILS | |
249 | ||
250 | There is a class method in C<PerlIO::Layer> C<find> which is | |
251 | implemented as XS code. It is called by C<import> to validate the | |
252 | layers: | |
253 | ||
254 | PerlIO::Layer::->find("perlio") | |
255 | ||
256 | The return value (if defined) is a Perl object, of class | |
257 | C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As | |
258 | yet there is nothing useful you can do with the object at the perl | |
259 | level. | |
260 | ||
261 | =head1 SEE ALSO | |
262 | ||
263 | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, | |
264 | L<encoding> | |
265 | ||
266 | =cut |