Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | .\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLOPENTUT 1" | |
132 | .TH PERLOPENTUT 1 "2002-06-08" "perl v5.8.0" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlopentut \- tutorial on opening things in Perl | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | Perl has two simple, built-in ways to open files: the shell way for | |
138 | convenience, and the C way for precision. The choice is yours. | |
139 | .SH "Open A\*` la shell" | |
140 | .IX Header "Open A` la shell" | |
141 | Perl's \f(CW\*(C`open\*(C'\fR function was designed to mimic the way command-line | |
142 | redirection in the shell works. Here are some basic examples | |
143 | from the shell: | |
144 | .PP | |
145 | .Vb 6 | |
146 | \& $ myprogram file1 file2 file3 | |
147 | \& $ myprogram < inputfile | |
148 | \& $ myprogram > outputfile | |
149 | \& $ myprogram >> outputfile | |
150 | \& $ myprogram | otherprogram | |
151 | \& $ otherprogram | myprogram | |
152 | .Ve | |
153 | .PP | |
154 | And here are some more advanced examples: | |
155 | .PP | |
156 | .Vb 4 | |
157 | \& $ otherprogram | myprogram f1 - f2 | |
158 | \& $ otherprogram 2>&1 | myprogram - | |
159 | \& $ myprogram <&3 | |
160 | \& $ myprogram >&4 | |
161 | .Ve | |
162 | .PP | |
163 | Programmers accustomed to constructs like those above can take comfort | |
164 | in learning that Perl directly supports these familiar constructs using | |
165 | virtually the same syntax as the shell. | |
166 | .Sh "Simple Opens" | |
167 | .IX Subsection "Simple Opens" | |
168 | The \f(CW\*(C`open\*(C'\fR function takes two arguments: the first is a filehandle, | |
169 | and the second is a single string comprising both what to open and how | |
170 | to open it. \f(CW\*(C`open\*(C'\fR returns true when it works, and when it fails, | |
171 | returns a false value and sets the special variable $! to reflect | |
172 | the system error. If the filehandle was previously opened, it will | |
173 | be implicitly closed first. | |
174 | .PP | |
175 | For example: | |
176 | .PP | |
177 | .Vb 4 | |
178 | \& open(INFO, "datafile") || die("can't open datafile: $!"); | |
179 | \& open(INFO, "< datafile") || die("can't open datafile: $!"); | |
180 | \& open(RESULTS,"> runstats") || die("can't open runstats: $!"); | |
181 | \& open(LOG, ">> logfile ") || die("can't open logfile: $!"); | |
182 | .Ve | |
183 | .PP | |
184 | If you prefer the low-punctuation version, you could write that this way: | |
185 | .PP | |
186 | .Vb 3 | |
187 | \& open INFO, "< datafile" or die "can't open datafile: $!"; | |
188 | \& open RESULTS,"> runstats" or die "can't open runstats: $!"; | |
189 | \& open LOG, ">> logfile " or die "can't open logfile: $!"; | |
190 | .Ve | |
191 | .PP | |
192 | A few things to notice. First, the leading less-than is optional. | |
193 | If omitted, Perl assumes that you want to open the file for reading. | |
194 | .PP | |
195 | The other important thing to notice is that, just as in the shell, | |
196 | any white space before or after the filename is ignored. This is good, | |
197 | because you wouldn't want these to do different things: | |
198 | .PP | |
199 | .Vb 3 | |
200 | \& open INFO, "<datafile" | |
201 | \& open INFO, "< datafile" | |
202 | \& open INFO, "< datafile" | |
203 | .Ve | |
204 | .PP | |
205 | Ignoring surround whitespace also helps for when you read a filename in | |
206 | from a different file, and forget to trim it before opening: | |
207 | .PP | |
208 | .Vb 2 | |
209 | \& $filename = <INFO>; # oops, \en still there | |
210 | \& open(EXTRA, "< $filename") || die "can't open $filename: $!"; | |
211 | .Ve | |
212 | .PP | |
213 | This is not a bug, but a feature. Because \f(CW\*(C`open\*(C'\fR mimics the shell in | |
214 | its style of using redirection arrows to specify how to open the file, it | |
215 | also does so with respect to extra white space around the filename itself | |
216 | as well. For accessing files with naughty names, see | |
217 | \&\*(L"Dispelling the Dweomer\*(R". | |
218 | .Sh "Pipe Opens" | |
219 | .IX Subsection "Pipe Opens" | |
220 | In C, when you want to open a file using the standard I/O library, | |
221 | you use the \f(CW\*(C`fopen\*(C'\fR function, but when opening a pipe, you use the | |
222 | \&\f(CW\*(C`popen\*(C'\fR function. But in the shell, you just use a different redirection | |
223 | character. That's also the case for Perl. The \f(CW\*(C`open\*(C'\fR call | |
224 | remains the same\*(--just its argument differs. | |
225 | .PP | |
226 | If the leading character is a pipe symbol, \f(CW\*(C`open\*(C'\fR starts up a new | |
227 | command and open a write-only filehandle leading into that command. | |
228 | This lets you write into that handle and have what you write show up on | |
229 | that command's standard input. For example: | |
230 | .PP | |
231 | .Vb 3 | |
232 | \& open(PRINTER, "| lpr -Plp1") || die "can't run lpr: $!"; | |
233 | \& print PRINTER "stuff\en"; | |
234 | \& close(PRINTER) || die "can't close lpr: $!"; | |
235 | .Ve | |
236 | .PP | |
237 | If the trailing character is a pipe, you start up a new command and open a | |
238 | read-only filehandle leading out of that command. This lets whatever that | |
239 | command writes to its standard output show up on your handle for reading. | |
240 | For example: | |
241 | .PP | |
242 | .Vb 3 | |
243 | \& open(NET, "netstat -i -n |") || die "can't fun netstat: $!"; | |
244 | \& while (<NET>) { } # do something with input | |
245 | \& close(NET) || die "can't close netstat: $!"; | |
246 | .Ve | |
247 | .PP | |
248 | What happens if you try to open a pipe to or from a non-existent | |
249 | command? If possible, Perl will detect the failure and set \f(CW$!\fR as | |
250 | usual. But if the command contains special shell characters, such as | |
251 | \&\f(CW\*(C`>\*(C'\fR or \f(CW\*(C`*\*(C'\fR, called 'metacharacters', Perl does not execute the | |
252 | command directly. Instead, Perl runs the shell, which then tries to | |
253 | run the command. This means that it's the shell that gets the error | |
254 | indication. In such a case, the \f(CW\*(C`open\*(C'\fR call will only indicate | |
255 | failure if Perl can't even run the shell. See \*(L"How can I capture \s-1STDERR\s0 from an external command?\*(R" in perlfaq8 to see how to cope with | |
256 | this. There's also an explanation in perlipc. | |
257 | .PP | |
258 | If you would like to open a bidirectional pipe, the IPC::Open2 | |
259 | library will handle this for you. Check out | |
260 | \&\*(L"Bidirectional Communication with Another Process\*(R" in perlipc | |
261 | .Sh "The Minus File" | |
262 | .IX Subsection "The Minus File" | |
263 | Again following the lead of the standard shell utilities, Perl's | |
264 | \&\f(CW\*(C`open\*(C'\fR function treats a file whose name is a single minus, \*(L"\-\*(R", in a | |
265 | special way. If you open minus for reading, it really means to access | |
266 | the standard input. If you open minus for writing, it really means to | |
267 | access the standard output. | |
268 | .PP | |
269 | If minus can be used as the default input or default output, what happens | |
270 | if you open a pipe into or out of minus? What's the default command it | |
271 | would run? The same script as you're currently running! This is actually | |
272 | a stealth \f(CW\*(C`fork\*(C'\fR hidden inside an \f(CW\*(C`open\*(C'\fR call. See | |
273 | \&\*(L"Safe Pipe Opens\*(R" in perlipc for details. | |
274 | .Sh "Mixing Reads and Writes" | |
275 | .IX Subsection "Mixing Reads and Writes" | |
276 | It is possible to specify both read and write access. All you do is | |
277 | add a \*(L"+\*(R" symbol in front of the redirection. But as in the shell, | |
278 | using a less-than on a file never creates a new file; it only opens an | |
279 | existing one. On the other hand, using a greater-than always clobbers | |
280 | (truncates to zero length) an existing file, or creates a brand-new one | |
281 | if there isn't an old one. Adding a \*(L"+\*(R" for read-write doesn't affect | |
282 | whether it only works on existing files or always clobbers existing ones. | |
283 | .PP | |
284 | .Vb 2 | |
285 | \& open(WTMP, "+< /usr/adm/wtmp") | |
286 | \& || die "can't open /usr/adm/wtmp: $!"; | |
287 | .Ve | |
288 | .PP | |
289 | .Vb 2 | |
290 | \& open(SCREEN, "+> /tmp/lkscreen") | |
291 | \& || die "can't open /tmp/lkscreen: $!"; | |
292 | .Ve | |
293 | .PP | |
294 | .Vb 2 | |
295 | \& open(LOGFILE, "+>> /tmp/applog" | |
296 | \& || die "can't open /tmp/applog: $!"; | |
297 | .Ve | |
298 | .PP | |
299 | The first one won't create a new file, and the second one will always | |
300 | clobber an old one. The third one will create a new file if necessary | |
301 | and not clobber an old one, and it will allow you to read at any point | |
302 | in the file, but all writes will always go to the end. In short, | |
303 | the first case is substantially more common than the second and third | |
304 | cases, which are almost always wrong. (If you know C, the plus in | |
305 | Perl's \f(CW\*(C`open\*(C'\fR is historically derived from the one in C's fopen(3S), | |
306 | which it ultimately calls.) | |
307 | .PP | |
308 | In fact, when it comes to updating a file, unless you're working on | |
309 | a binary file as in the \s-1WTMP\s0 case above, you probably don't want to | |
310 | use this approach for updating. Instead, Perl's \fB\-i\fR flag comes to | |
311 | the rescue. The following command takes all the C, \*(C+, or yacc source | |
312 | or header files and changes all their foo's to bar's, leaving | |
313 | the old version in the original file name with a \*(L".orig\*(R" tacked | |
314 | on the end: | |
315 | .PP | |
316 | .Vb 1 | |
317 | \& $ perl -i.orig -pe 's/\ebfoo\eb/bar/g' *.[Cchy] | |
318 | .Ve | |
319 | .PP | |
320 | This is a short cut for some renaming games that are really | |
321 | the best way to update textfiles. See the second question in | |
322 | perlfaq5 for more details. | |
323 | .Sh "Filters" | |
324 | .IX Subsection "Filters" | |
325 | One of the most common uses for \f(CW\*(C`open\*(C'\fR is one you never | |
326 | even notice. When you process the \s-1ARGV\s0 filehandle using | |
327 | \&\f(CW\*(C`<ARGV>\*(C'\fR, Perl actually does an implicit open | |
328 | on each file in \f(CW@ARGV\fR. Thus a program called like this: | |
329 | .PP | |
330 | .Vb 1 | |
331 | \& $ myprogram file1 file2 file3 | |
332 | .Ve | |
333 | .PP | |
334 | Can have all its files opened and processed one at a time | |
335 | using a construct no more complex than: | |
336 | .PP | |
337 | .Vb 3 | |
338 | \& while (<>) { | |
339 | \& # do something with $_ | |
340 | \& } | |
341 | .Ve | |
342 | .PP | |
343 | If \f(CW@ARGV\fR is empty when the loop first begins, Perl pretends you've opened | |
344 | up minus, that is, the standard input. In fact, \f(CW$ARGV\fR, the currently | |
345 | open file during \f(CW\*(C`<ARGV>\*(C'\fR processing, is even set to \*(L"\-\*(R" | |
346 | in these circumstances. | |
347 | .PP | |
348 | You are welcome to pre-process your \f(CW@ARGV\fR before starting the loop to | |
349 | make sure it's to your liking. One reason to do this might be to remove | |
350 | command options beginning with a minus. While you can always roll the | |
351 | simple ones by hand, the Getopts modules are good for this. | |
352 | .PP | |
353 | .Vb 1 | |
354 | \& use Getopt::Std; | |
355 | .Ve | |
356 | .PP | |
357 | .Vb 2 | |
358 | \& # -v, -D, -o ARG, sets $opt_v, $opt_D, $opt_o | |
359 | \& getopts("vDo:"); | |
360 | .Ve | |
361 | .PP | |
362 | .Vb 2 | |
363 | \& # -v, -D, -o ARG, sets $args{v}, $args{D}, $args{o} | |
364 | \& getopts("vDo:", \e%args); | |
365 | .Ve | |
366 | .PP | |
367 | Or the standard Getopt::Long module to permit named arguments: | |
368 | .PP | |
369 | .Vb 5 | |
370 | \& use Getopt::Long; | |
371 | \& GetOptions( "verbose" => \e$verbose, # --verbose | |
372 | \& "Debug" => \e$debug, # --Debug | |
373 | \& "output=s" => \e$output ); | |
374 | \& # --output=somestring or --output somestring | |
375 | .Ve | |
376 | .PP | |
377 | Another reason for preprocessing arguments is to make an empty | |
378 | argument list default to all files: | |
379 | .PP | |
380 | .Vb 1 | |
381 | \& @ARGV = glob("*") unless @ARGV; | |
382 | .Ve | |
383 | .PP | |
384 | You could even filter out all but plain, text files. This is a bit | |
385 | silent, of course, and you might prefer to mention them on the way. | |
386 | .PP | |
387 | .Vb 1 | |
388 | \& @ARGV = grep { -f && -T } @ARGV; | |
389 | .Ve | |
390 | .PP | |
391 | If you're using the \fB\-n\fR or \fB\-p\fR command-line options, you | |
392 | should put changes to \f(CW@ARGV\fR in a \f(CW\*(C`BEGIN{}\*(C'\fR block. | |
393 | .PP | |
394 | Remember that a normal \f(CW\*(C`open\*(C'\fR has special properties, in that it might | |
395 | call fopen(3S) or it might called popen(3S), depending on what its | |
396 | argument looks like; that's why it's sometimes called \*(L"magic open\*(R". | |
397 | Here's an example: | |
398 | .PP | |
399 | .Vb 3 | |
400 | \& $pwdinfo = `domainname` =~ /^(\e(none\e))?$/ | |
401 | \& ? '< /etc/passwd' | |
402 | \& : 'ypcat passwd |'; | |
403 | .Ve | |
404 | .PP | |
405 | .Vb 2 | |
406 | \& open(PWD, $pwdinfo) | |
407 | \& or die "can't open $pwdinfo: $!"; | |
408 | .Ve | |
409 | .PP | |
410 | This sort of thing also comes into play in filter processing. Because | |
411 | \&\f(CW\*(C`<ARGV>\*(C'\fR processing employs the normal, shell-style Perl \f(CW\*(C`open\*(C'\fR, | |
412 | it respects all the special things we've already seen: | |
413 | .PP | |
414 | .Vb 1 | |
415 | \& $ myprogram f1 "cmd1|" - f2 "cmd2|" f3 < tmpfile | |
416 | .Ve | |
417 | .PP | |
418 | That program will read from the file \fIf1\fR, the process \fIcmd1\fR, standard | |
419 | input (\fItmpfile\fR in this case), the \fIf2\fR file, the \fIcmd2\fR command, | |
420 | and finally the \fIf3\fR file. | |
421 | .PP | |
422 | Yes, this also means that if you have a file named \*(L"\-\*(R" (and so on) in | |
423 | your directory, that they won't be processed as literal files by \f(CW\*(C`open\*(C'\fR. | |
424 | You'll need to pass them as \*(L"./\-\*(R" much as you would for the \fIrm\fR program. | |
425 | Or you could use \f(CW\*(C`sysopen\*(C'\fR as described below. | |
426 | .PP | |
427 | One of the more interesting applications is to change files of a certain | |
428 | name into pipes. For example, to autoprocess gzipped or compressed | |
429 | files by decompressing them with \fIgzip\fR: | |
430 | .PP | |
431 | .Vb 1 | |
432 | \& @ARGV = map { /^\e.(gz|Z)$/ ? "gzip -dc $_ |" : $_ } @ARGV; | |
433 | .Ve | |
434 | .PP | |
435 | Or, if you have the \fI\s-1GET\s0\fR program installed from \s-1LWP\s0, | |
436 | you can fetch URLs before processing them: | |
437 | .PP | |
438 | .Vb 1 | |
439 | \& @ARGV = map { m#^\ew+://# ? "GET $_ |" : $_ } @ARGV; | |
440 | .Ve | |
441 | .PP | |
442 | It's not for nothing that this is called magic \f(CW\*(C`<ARGV>\*(C'\fR. | |
443 | Pretty nifty, eh? | |
444 | .SH "Open A\*` la C" | |
445 | .IX Header "Open A` la C" | |
446 | If you want the convenience of the shell, then Perl's \f(CW\*(C`open\*(C'\fR is | |
447 | definitely the way to go. On the other hand, if you want finer precision | |
448 | than C's simplistic fopen(3S) provides, then you should look to Perl's | |
449 | \&\f(CW\*(C`sysopen\*(C'\fR, which is a direct hook into the \fIopen\fR\|(2) system call. | |
450 | That does mean it's a bit more involved, but that's the price of | |
451 | precision. | |
452 | .PP | |
453 | \&\f(CW\*(C`sysopen\*(C'\fR takes 3 (or 4) arguments. | |
454 | .PP | |
455 | .Vb 1 | |
456 | \& sysopen HANDLE, PATH, FLAGS, [MASK] | |
457 | .Ve | |
458 | .PP | |
459 | The \s-1HANDLE\s0 argument is a filehandle just as with \f(CW\*(C`open\*(C'\fR. The \s-1PATH\s0 is | |
460 | a literal path, one that doesn't pay attention to any greater-thans or | |
461 | less-thans or pipes or minuses, nor ignore white space. If it's there, | |
462 | it's part of the path. The \s-1FLAGS\s0 argument contains one or more values | |
463 | derived from the Fcntl module that have been or'd together using the | |
464 | bitwise \*(L"|\*(R" operator. The final argument, the \s-1MASK\s0, is optional; if | |
465 | present, it is combined with the user's current umask for the creation | |
466 | mode of the file. You should usually omit this. | |
467 | .PP | |
468 | Although the traditional values of read\-only, write\-only, and read-write | |
469 | are 0, 1, and 2 respectively, this is known not to hold true on some | |
470 | systems. Instead, it's best to load in the appropriate constants first | |
471 | from the Fcntl module, which supplies the following standard flags: | |
472 | .PP | |
473 | .Vb 8 | |
474 | \& O_RDONLY Read only | |
475 | \& O_WRONLY Write only | |
476 | \& O_RDWR Read and write | |
477 | \& O_CREAT Create the file if it doesn't exist | |
478 | \& O_EXCL Fail if the file already exists | |
479 | \& O_APPEND Append to the file | |
480 | \& O_TRUNC Truncate the file | |
481 | \& O_NONBLOCK Non-blocking access | |
482 | .Ve | |
483 | .PP | |
484 | Less common flags that are sometimes available on some operating | |
485 | systems include \f(CW\*(C`O_BINARY\*(C'\fR, \f(CW\*(C`O_TEXT\*(C'\fR, \f(CW\*(C`O_SHLOCK\*(C'\fR, \f(CW\*(C`O_EXLOCK\*(C'\fR, | |
486 | \&\f(CW\*(C`O_DEFER\*(C'\fR, \f(CW\*(C`O_SYNC\*(C'\fR, \f(CW\*(C`O_ASYNC\*(C'\fR, \f(CW\*(C`O_DSYNC\*(C'\fR, \f(CW\*(C`O_RSYNC\*(C'\fR, | |
487 | \&\f(CW\*(C`O_NOCTTY\*(C'\fR, \f(CW\*(C`O_NDELAY\*(C'\fR and \f(CW\*(C`O_LARGEFILE\*(C'\fR. Consult your \fIopen\fR\|(2) | |
488 | manpage or its local equivalent for details. (Note: starting from | |
489 | Perl release 5.6 the O_LARGEFILE flag, if available, is automatically | |
490 | added to the \fIsysopen()\fR flags because large files are the default.) | |
491 | .PP | |
492 | Here's how to use \f(CW\*(C`sysopen\*(C'\fR to emulate the simple \f(CW\*(C`open\*(C'\fR calls we had | |
493 | before. We'll omit the \f(CW\*(C`|| die $!\*(C'\fR checks for clarity, but make sure | |
494 | you always check the return values in real code. These aren't quite | |
495 | the same, since \f(CW\*(C`open\*(C'\fR will trim leading and trailing white space, | |
496 | but you'll get the idea: | |
497 | .PP | |
498 | To open a file for reading: | |
499 | .PP | |
500 | .Vb 2 | |
501 | \& open(FH, "< $path"); | |
502 | \& sysopen(FH, $path, O_RDONLY); | |
503 | .Ve | |
504 | .PP | |
505 | To open a file for writing, creating a new file if needed or else truncating | |
506 | an old file: | |
507 | .PP | |
508 | .Vb 2 | |
509 | \& open(FH, "> $path"); | |
510 | \& sysopen(FH, $path, O_WRONLY | O_TRUNC | O_CREAT); | |
511 | .Ve | |
512 | .PP | |
513 | To open a file for appending, creating one if necessary: | |
514 | .PP | |
515 | .Vb 2 | |
516 | \& open(FH, ">> $path"); | |
517 | \& sysopen(FH, $path, O_WRONLY | O_APPEND | O_CREAT); | |
518 | .Ve | |
519 | .PP | |
520 | To open a file for update, where the file must already exist: | |
521 | .PP | |
522 | .Vb 2 | |
523 | \& open(FH, "+< $path"); | |
524 | \& sysopen(FH, $path, O_RDWR); | |
525 | .Ve | |
526 | .PP | |
527 | And here are things you can do with \f(CW\*(C`sysopen\*(C'\fR that you cannot do with | |
528 | a regular \f(CW\*(C`open\*(C'\fR. As you see, it's just a matter of controlling the | |
529 | flags in the third argument. | |
530 | .PP | |
531 | To open a file for writing, creating a new file which must not previously | |
532 | exist: | |
533 | .PP | |
534 | .Vb 1 | |
535 | \& sysopen(FH, $path, O_WRONLY | O_EXCL | O_CREAT); | |
536 | .Ve | |
537 | .PP | |
538 | To open a file for appending, where that file must already exist: | |
539 | .PP | |
540 | .Vb 1 | |
541 | \& sysopen(FH, $path, O_WRONLY | O_APPEND); | |
542 | .Ve | |
543 | .PP | |
544 | To open a file for update, creating a new file if necessary: | |
545 | .PP | |
546 | .Vb 1 | |
547 | \& sysopen(FH, $path, O_RDWR | O_CREAT); | |
548 | .Ve | |
549 | .PP | |
550 | To open a file for update, where that file must not already exist: | |
551 | .PP | |
552 | .Vb 1 | |
553 | \& sysopen(FH, $path, O_RDWR | O_EXCL | O_CREAT); | |
554 | .Ve | |
555 | .PP | |
556 | To open a file without blocking, creating one if necessary: | |
557 | .PP | |
558 | .Vb 1 | |
559 | \& sysopen(FH, $path, O_WRONLY | O_NONBLOCK | O_CREAT); | |
560 | .Ve | |
561 | .Sh "Permissions A\*` la mode" | |
562 | .IX Subsection "Permissions A` la mode" | |
563 | If you omit the \s-1MASK\s0 argument to \f(CW\*(C`sysopen\*(C'\fR, Perl uses the octal value | |
564 | 0666. The normal \s-1MASK\s0 to use for executables and directories should | |
565 | be 0777, and for anything else, 0666. | |
566 | .PP | |
567 | Why so permissive? Well, it isn't really. The \s-1MASK\s0 will be modified | |
568 | by your process's current \f(CW\*(C`umask\*(C'\fR. A umask is a number representing | |
569 | \&\fIdisabled\fR permissions bits; that is, bits that will not be turned on | |
570 | in the created files' permissions field. | |
571 | .PP | |
572 | For example, if your \f(CW\*(C`umask\*(C'\fR were 027, then the 020 part would | |
573 | disable the group from writing, and the 007 part would disable others | |
574 | from reading, writing, or executing. Under these conditions, passing | |
575 | \&\f(CW\*(C`sysopen\*(C'\fR 0666 would create a file with mode 0640, since \f(CW\*(C`0666 &~ 027\*(C'\fR | |
576 | is 0640. | |
577 | .PP | |
578 | You should seldom use the \s-1MASK\s0 argument to \f(CW\*(C`sysopen()\*(C'\fR. That takes | |
579 | away the user's freedom to choose what permission new files will have. | |
580 | Denying choice is almost always a bad thing. One exception would be for | |
581 | cases where sensitive or private data is being stored, such as with mail | |
582 | folders, cookie files, and internal temporary files. | |
583 | .SH "Obscure Open Tricks" | |
584 | .IX Header "Obscure Open Tricks" | |
585 | .Sh "Re-Opening Files (dups)" | |
586 | .IX Subsection "Re-Opening Files (dups)" | |
587 | Sometimes you already have a filehandle open, and want to make another | |
588 | handle that's a duplicate of the first one. In the shell, we place an | |
589 | ampersand in front of a file descriptor number when doing redirections. | |
590 | For example, \f(CW\*(C`2>&1\*(C'\fR makes descriptor 2 (that's \s-1STDERR\s0 in Perl) | |
591 | be redirected into descriptor 1 (which is usually Perl's \s-1STDOUT\s0). | |
592 | The same is essentially true in Perl: a filename that begins with an | |
593 | ampersand is treated instead as a file descriptor if a number, or as a | |
594 | filehandle if a string. | |
595 | .PP | |
596 | .Vb 2 | |
597 | \& open(SAVEOUT, ">&SAVEERR") || die "couldn't dup SAVEERR: $!"; | |
598 | \& open(MHCONTEXT, "<&4") || die "couldn't dup fd4: $!"; | |
599 | .Ve | |
600 | .PP | |
601 | That means that if a function is expecting a filename, but you don't | |
602 | want to give it a filename because you already have the file open, you | |
603 | can just pass the filehandle with a leading ampersand. It's best to | |
604 | use a fully qualified handle though, just in case the function happens | |
605 | to be in a different package: | |
606 | .PP | |
607 | .Vb 1 | |
608 | \& somefunction("&main::LOGFILE"); | |
609 | .Ve | |
610 | .PP | |
611 | This way if \fIsomefunction()\fR is planning on opening its argument, it can | |
612 | just use the already opened handle. This differs from passing a handle, | |
613 | because with a handle, you don't open the file. Here you have something | |
614 | you can pass to open. | |
615 | .PP | |
616 | If you have one of those tricky, newfangled I/O objects that the \*(C+ | |
617 | folks are raving about, then this doesn't work because those aren't a | |
618 | proper filehandle in the native Perl sense. You'll have to use \fIfileno()\fR | |
619 | to pull out the proper descriptor number, assuming you can: | |
620 | .PP | |
621 | .Vb 4 | |
622 | \& use IO::Socket; | |
623 | \& $handle = IO::Socket::INET->new("www.perl.com:80"); | |
624 | \& $fd = $handle->fileno; | |
625 | \& somefunction("&$fd"); # not an indirect function call | |
626 | .Ve | |
627 | .PP | |
628 | It can be easier (and certainly will be faster) just to use real | |
629 | filehandles though: | |
630 | .PP | |
631 | .Vb 4 | |
632 | \& use IO::Socket; | |
633 | \& local *REMOTE = IO::Socket::INET->new("www.perl.com:80"); | |
634 | \& die "can't connect" unless defined(fileno(REMOTE)); | |
635 | \& somefunction("&main::REMOTE"); | |
636 | .Ve | |
637 | .PP | |
638 | If the filehandle or descriptor number is preceded not just with a simple | |
639 | \&\*(L"&\*(R" but rather with a \*(L"&=\*(R" combination, then Perl will not create a | |
640 | completely new descriptor opened to the same place using the \fIdup\fR\|(2) | |
641 | system call. Instead, it will just make something of an alias to the | |
642 | existing one using the fdopen(3S) library call This is slightly more | |
643 | parsimonious of systems resources, although this is less a concern | |
644 | these days. Here's an example of that: | |
645 | .PP | |
646 | .Vb 2 | |
647 | \& $fd = $ENV{"MHCONTEXTFD"}; | |
648 | \& open(MHCONTEXT, "<&=$fd") or die "couldn't fdopen $fd: $!"; | |
649 | .Ve | |
650 | .PP | |
651 | If you're using magic \f(CW\*(C`<ARGV>\*(C'\fR, you could even pass in as a | |
652 | command line argument in \f(CW@ARGV\fR something like \f(CW"<&=$MHCONTEXTFD"\fR, | |
653 | but we've never seen anyone actually do this. | |
654 | .Sh "Dispelling the Dweomer" | |
655 | .IX Subsection "Dispelling the Dweomer" | |
656 | Perl is more of a DWIMmer language than something like Java\*(--where \s-1DWIM\s0 | |
657 | is an acronym for \*(L"do what I mean\*(R". But this principle sometimes leads | |
658 | to more hidden magic than one knows what to do with. In this way, Perl | |
659 | is also filled with \fIdweomer\fR, an obscure word meaning an enchantment. | |
660 | Sometimes, Perl's DWIMmer is just too much like dweomer for comfort. | |
661 | .PP | |
662 | If magic \f(CW\*(C`open\*(C'\fR is a bit too magical for you, you don't have to turn | |
663 | to \f(CW\*(C`sysopen\*(C'\fR. To open a file with arbitrary weird characters in | |
664 | it, it's necessary to protect any leading and trailing whitespace. | |
665 | Leading whitespace is protected by inserting a \f(CW"./"\fR in front of a | |
666 | filename that starts with whitespace. Trailing whitespace is protected | |
667 | by appending an \s-1ASCII\s0 \s-1NUL\s0 byte (\f(CW"\e0"\fR) at the end off the string. | |
668 | .PP | |
669 | .Vb 2 | |
670 | \& $file =~ s#^(\es)#./$1#; | |
671 | \& open(FH, "< $file\e0") || die "can't open $file: $!"; | |
672 | .Ve | |
673 | .PP | |
674 | This assumes, of course, that your system considers dot the current | |
675 | working directory, slash the directory separator, and disallows \s-1ASCII\s0 | |
676 | NULs within a valid filename. Most systems follow these conventions, | |
677 | including all \s-1POSIX\s0 systems as well as proprietary Microsoft systems. | |
678 | The only vaguely popular system that doesn't work this way is the | |
679 | proprietary Macintosh system, which uses a colon where the rest of us | |
680 | use a slash. Maybe \f(CW\*(C`sysopen\*(C'\fR isn't such a bad idea after all. | |
681 | .PP | |
682 | If you want to use \f(CW\*(C`<ARGV>\*(C'\fR processing in a totally boring | |
683 | and non-magical way, you could do this first: | |
684 | .PP | |
685 | .Vb 10 | |
686 | \& # "Sam sat on the ground and put his head in his hands. | |
687 | \& # 'I wish I had never come here, and I don't want to see | |
688 | \& # no more magic,' he said, and fell silent." | |
689 | \& for (@ARGV) { | |
690 | \& s#^([^./])#./$1#; | |
691 | \& $_ .= "\e0"; | |
692 | \& } | |
693 | \& while (<>) { | |
694 | \& # now process $_ | |
695 | \& } | |
696 | .Ve | |
697 | .PP | |
698 | But be warned that users will not appreciate being unable to use \*(L"\-\*(R" | |
699 | to mean standard input, per the standard convention. | |
700 | .Sh "Paths as Opens" | |
701 | .IX Subsection "Paths as Opens" | |
702 | You've probably noticed how Perl's \f(CW\*(C`warn\*(C'\fR and \f(CW\*(C`die\*(C'\fR functions can | |
703 | produce messages like: | |
704 | .PP | |
705 | .Vb 1 | |
706 | \& Some warning at scriptname line 29, <FH> line 7. | |
707 | .Ve | |
708 | .PP | |
709 | That's because you opened a filehandle \s-1FH\s0, and had read in seven records | |
710 | from it. But what was the name of the file, not the handle? | |
711 | .PP | |
712 | If you aren't running with \f(CW\*(C`strict refs\*(C'\fR, or if you've turn them off | |
713 | temporarily, then all you have to do is this: | |
714 | .PP | |
715 | .Vb 4 | |
716 | \& open($path, "< $path") || die "can't open $path: $!"; | |
717 | \& while (<$path>) { | |
718 | \& # whatever | |
719 | \& } | |
720 | .Ve | |
721 | .PP | |
722 | Since you're using the pathname of the file as its handle, | |
723 | you'll get warnings more like | |
724 | .PP | |
725 | .Vb 1 | |
726 | \& Some warning at scriptname line 29, </etc/motd> line 7. | |
727 | .Ve | |
728 | .Sh "Single Argument Open" | |
729 | .IX Subsection "Single Argument Open" | |
730 | Remember how we said that Perl's open took two arguments? That was a | |
731 | passive prevarication. You see, it can also take just one argument. | |
732 | If and only if the variable is a global variable, not a lexical, you | |
733 | can pass \f(CW\*(C`open\*(C'\fR just one argument, the filehandle, and it will | |
734 | get the path from the global scalar variable of the same name. | |
735 | .PP | |
736 | .Vb 5 | |
737 | \& $FILE = "/etc/motd"; | |
738 | \& open FILE or die "can't open $FILE: $!"; | |
739 | \& while (<FILE>) { | |
740 | \& # whatever | |
741 | \& } | |
742 | .Ve | |
743 | .PP | |
744 | Why is this here? Someone has to cater to the hysterical porpoises. | |
745 | It's something that's been in Perl since the very beginning, if not | |
746 | before. | |
747 | .Sh "Playing with \s-1STDIN\s0 and \s-1STDOUT\s0" | |
748 | .IX Subsection "Playing with STDIN and STDOUT" | |
749 | One clever move with \s-1STDOUT\s0 is to explicitly close it when you're done | |
750 | with the program. | |
751 | .PP | |
752 | .Vb 1 | |
753 | \& END { close(STDOUT) || die "can't close stdout: $!" } | |
754 | .Ve | |
755 | .PP | |
756 | If you don't do this, and your program fills up the disk partition due | |
757 | to a command line redirection, it won't report the error exit with a | |
758 | failure status. | |
759 | .PP | |
760 | You don't have to accept the \s-1STDIN\s0 and \s-1STDOUT\s0 you were given. You are | |
761 | welcome to reopen them if you'd like. | |
762 | .PP | |
763 | .Vb 2 | |
764 | \& open(STDIN, "< datafile") | |
765 | \& || die "can't open datafile: $!"; | |
766 | .Ve | |
767 | .PP | |
768 | .Vb 2 | |
769 | \& open(STDOUT, "> output") | |
770 | \& || die "can't open output: $!"; | |
771 | .Ve | |
772 | .PP | |
773 | And then these can be read directly or passed on to subprocesses. | |
774 | This makes it look as though the program were initially invoked | |
775 | with those redirections from the command line. | |
776 | .PP | |
777 | It's probably more interesting to connect these to pipes. For example: | |
778 | .PP | |
779 | .Vb 3 | |
780 | \& $pager = $ENV{PAGER} || "(less || more)"; | |
781 | \& open(STDOUT, "| $pager") | |
782 | \& || die "can't fork a pager: $!"; | |
783 | .Ve | |
784 | .PP | |
785 | This makes it appear as though your program were called with its stdout | |
786 | already piped into your pager. You can also use this kind of thing | |
787 | in conjunction with an implicit fork to yourself. You might do this | |
788 | if you would rather handle the post processing in your own program, | |
789 | just in a different process: | |
790 | .PP | |
791 | .Vb 4 | |
792 | \& head(100); | |
793 | \& while (<>) { | |
794 | \& print; | |
795 | \& } | |
796 | .Ve | |
797 | .PP | |
798 | .Vb 10 | |
799 | \& sub head { | |
800 | \& my $lines = shift || 20; | |
801 | \& return unless $pid = open(STDOUT, "|-"); | |
802 | \& die "cannot fork: $!" unless defined $pid; | |
803 | \& while (<STDIN>) { | |
804 | \& print; | |
805 | \& last if --$lines < 0; | |
806 | \& } | |
807 | \& exit; | |
808 | \& } | |
809 | .Ve | |
810 | .PP | |
811 | This technique can be applied to repeatedly push as many filters on your | |
812 | output stream as you wish. | |
813 | .SH "Other I/O Issues" | |
814 | .IX Header "Other I/O Issues" | |
815 | These topics aren't really arguments related to \f(CW\*(C`open\*(C'\fR or \f(CW\*(C`sysopen\*(C'\fR, | |
816 | but they do affect what you do with your open files. | |
817 | .Sh "Opening Non-File Files" | |
818 | .IX Subsection "Opening Non-File Files" | |
819 | When is a file not a file? Well, you could say when it exists but | |
820 | isn't a plain file. We'll check whether it's a symbolic link first, | |
821 | just in case. | |
822 | .PP | |
823 | .Vb 3 | |
824 | \& if (-l $file || ! -f _) { | |
825 | \& print "$file is not a plain file\en"; | |
826 | \& } | |
827 | .Ve | |
828 | .PP | |
829 | What other kinds of files are there than, well, files? Directories, | |
830 | symbolic links, named pipes, Unix-domain sockets, and block and character | |
831 | devices. Those are all files, too\*(--just not \fIplain\fR files. This isn't | |
832 | the same issue as being a text file. Not all text files are plain files. | |
833 | Not all plain files are textfiles. That's why there are separate \f(CW\*(C`\-f\*(C'\fR | |
834 | and \f(CW\*(C`\-T\*(C'\fR file tests. | |
835 | .PP | |
836 | To open a directory, you should use the \f(CW\*(C`opendir\*(C'\fR function, then | |
837 | process it with \f(CW\*(C`readdir\*(C'\fR, carefully restoring the directory | |
838 | name if necessary: | |
839 | .PP | |
840 | .Vb 5 | |
841 | \& opendir(DIR, $dirname) or die "can't opendir $dirname: $!"; | |
842 | \& while (defined($file = readdir(DIR))) { | |
843 | \& # do something with "$dirname/$file" | |
844 | \& } | |
845 | \& closedir(DIR); | |
846 | .Ve | |
847 | .PP | |
848 | If you want to process directories recursively, it's better to use the | |
849 | File::Find module. For example, this prints out all files recursively, | |
850 | add adds a slash to their names if the file is a directory. | |
851 | .PP | |
852 | .Vb 3 | |
853 | \& @ARGV = qw(.) unless @ARGV; | |
854 | \& use File::Find; | |
855 | \& find sub { print $File::Find::name, -d && '/', "\en" }, @ARGV; | |
856 | .Ve | |
857 | .PP | |
858 | This finds all bogus symbolic links beneath a particular directory: | |
859 | .PP | |
860 | .Vb 1 | |
861 | \& find sub { print "$File::Find::name\en" if -l && !-e }, $dir; | |
862 | .Ve | |
863 | .PP | |
864 | As you see, with symbolic links, you can just pretend that it is | |
865 | what it points to. Or, if you want to know \fIwhat\fR it points to, then | |
866 | \&\f(CW\*(C`readlink\*(C'\fR is called for: | |
867 | .PP | |
868 | .Vb 7 | |
869 | \& if (-l $file) { | |
870 | \& if (defined($whither = readlink($file))) { | |
871 | \& print "$file points to $whither\en"; | |
872 | \& } else { | |
873 | \& print "$file points nowhere: $!\en"; | |
874 | \& } | |
875 | \& } | |
876 | .Ve | |
877 | .PP | |
878 | Named pipes are a different matter. You pretend they're regular files, | |
879 | but their opens will normally block until there is both a reader and | |
880 | a writer. You can read more about them in \*(L"Named Pipes\*(R" in perlipc. | |
881 | Unix-domain sockets are rather different beasts as well; they're | |
882 | described in \*(L"Unix\-Domain \s-1TCP\s0 Clients and Servers\*(R" in perlipc. | |
883 | .PP | |
884 | When it comes to opening devices, it can be easy and it can tricky. | |
885 | We'll assume that if you're opening up a block device, you know what | |
886 | you're doing. The character devices are more interesting. These are | |
887 | typically used for modems, mice, and some kinds of printers. This is | |
888 | described in \*(L"How do I read and write the serial port?\*(R" in perlfaq8 | |
889 | It's often enough to open them carefully: | |
890 | .PP | |
891 | .Vb 5 | |
892 | \& sysopen(TTYIN, "/dev/ttyS1", O_RDWR | O_NDELAY | O_NOCTTY) | |
893 | \& # (O_NOCTTY no longer needed on POSIX systems) | |
894 | \& or die "can't open /dev/ttyS1: $!"; | |
895 | \& open(TTYOUT, "+>&TTYIN") | |
896 | \& or die "can't dup TTYIN: $!"; | |
897 | .Ve | |
898 | .PP | |
899 | .Vb 1 | |
900 | \& $ofh = select(TTYOUT); $| = 1; select($ofh); | |
901 | .Ve | |
902 | .PP | |
903 | .Vb 2 | |
904 | \& print TTYOUT "+++at\e015"; | |
905 | \& $answer = <TTYIN>; | |
906 | .Ve | |
907 | .PP | |
908 | With descriptors that you haven't opened using \f(CW\*(C`sysopen\*(C'\fR, such as a | |
909 | socket, you can set them to be non-blocking using \f(CW\*(C`fcntl\*(C'\fR: | |
910 | .PP | |
911 | .Vb 3 | |
912 | \& use Fcntl; | |
913 | \& fcntl(Connection, F_SETFL, O_NONBLOCK) | |
914 | \& or die "can't set non blocking: $!"; | |
915 | .Ve | |
916 | .PP | |
917 | Rather than losing yourself in a morass of twisting, turning \f(CW\*(C`ioctl\*(C'\fRs, | |
918 | all dissimilar, if you're going to manipulate ttys, it's best to | |
919 | make calls out to the \fIstty\fR\|(1) program if you have it, or else use the | |
920 | portable \s-1POSIX\s0 interface. To figure this all out, you'll need to read the | |
921 | \&\fItermios\fR\|(3) manpage, which describes the \s-1POSIX\s0 interface to tty devices, | |
922 | and then \s-1POSIX\s0, which describes Perl's interface to \s-1POSIX\s0. There are | |
923 | also some high-level modules on \s-1CPAN\s0 that can help you with these games. | |
924 | Check out Term::ReadKey and Term::ReadLine. | |
925 | .PP | |
926 | What else can you open? To open a connection using sockets, you won't use | |
927 | one of Perl's two open functions. See | |
928 | \&\*(L"Sockets: Client/Server Communication\*(R" in perlipc for that. Here's an | |
929 | example. Once you have it, you can use \s-1FH\s0 as a bidirectional filehandle. | |
930 | .PP | |
931 | .Vb 2 | |
932 | \& use IO::Socket; | |
933 | \& local *FH = IO::Socket::INET->new("www.perl.com:80"); | |
934 | .Ve | |
935 | .PP | |
936 | For opening up a \s-1URL\s0, the \s-1LWP\s0 modules from \s-1CPAN\s0 are just what | |
937 | the doctor ordered. There's no filehandle interface, but | |
938 | it's still easy to get the contents of a document: | |
939 | .PP | |
940 | .Vb 2 | |
941 | \& use LWP::Simple; | |
942 | \& $doc = get('http://www.linpro.no/lwp/'); | |
943 | .Ve | |
944 | .Sh "Binary Files" | |
945 | .IX Subsection "Binary Files" | |
946 | On certain legacy systems with what could charitably be called terminally | |
947 | convoluted (some would say broken) I/O models, a file isn't a file\*(--at | |
948 | least, not with respect to the C standard I/O library. On these old | |
949 | systems whose libraries (but not kernels) distinguish between text and | |
950 | binary streams, to get files to behave properly you'll have to bend over | |
951 | backwards to avoid nasty problems. On such infelicitous systems, sockets | |
952 | and pipes are already opened in binary mode, and there is currently no | |
953 | way to turn that off. With files, you have more options. | |
954 | .PP | |
955 | Another option is to use the \f(CW\*(C`binmode\*(C'\fR function on the appropriate | |
956 | handles before doing regular I/O on them: | |
957 | .PP | |
958 | .Vb 3 | |
959 | \& binmode(STDIN); | |
960 | \& binmode(STDOUT); | |
961 | \& while (<STDIN>) { print } | |
962 | .Ve | |
963 | .PP | |
964 | Passing \f(CW\*(C`sysopen\*(C'\fR a non-standard flag option will also open the file in | |
965 | binary mode on those systems that support it. This is the equivalent of | |
966 | opening the file normally, then calling \f(CW\*(C`binmode\*(C'\fRing on the handle. | |
967 | .PP | |
968 | .Vb 2 | |
969 | \& sysopen(BINDAT, "records.data", O_RDWR | O_BINARY) | |
970 | \& || die "can't open records.data: $!"; | |
971 | .Ve | |
972 | .PP | |
973 | Now you can use \f(CW\*(C`read\*(C'\fR and \f(CW\*(C`print\*(C'\fR on that handle without worrying | |
974 | about the system non-standard I/O library breaking your data. It's not | |
975 | a pretty picture, but then, legacy systems seldom are. \s-1CP/M\s0 will be | |
976 | with us until the end of days, and after. | |
977 | .PP | |
978 | On systems with exotic I/O systems, it turns out that, astonishingly | |
979 | enough, even unbuffered I/O using \f(CW\*(C`sysread\*(C'\fR and \f(CW\*(C`syswrite\*(C'\fR might do | |
980 | sneaky data mutilation behind your back. | |
981 | .PP | |
982 | .Vb 3 | |
983 | \& while (sysread(WHENCE, $buf, 1024)) { | |
984 | \& syswrite(WHITHER, $buf, length($buf)); | |
985 | \& } | |
986 | .Ve | |
987 | .PP | |
988 | Depending on the vicissitudes of your runtime system, even these calls | |
989 | may need \f(CW\*(C`binmode\*(C'\fR or \f(CW\*(C`O_BINARY\*(C'\fR first. Systems known to be free of | |
990 | such difficulties include Unix, the Mac \s-1OS\s0, Plan 9, and Inferno. | |
991 | .Sh "File Locking" | |
992 | .IX Subsection "File Locking" | |
993 | In a multitasking environment, you may need to be careful not to collide | |
994 | with other processes who want to do I/O on the same files as others | |
995 | are working on. You'll often need shared or exclusive locks | |
996 | on files for reading and writing respectively. You might just | |
997 | pretend that only exclusive locks exist. | |
998 | .PP | |
999 | Never use the existence of a file \f(CW\*(C`\-e $file\*(C'\fR as a locking indication, | |
1000 | because there is a race condition between the test for the existence of | |
1001 | the file and its creation. Atomicity is critical. | |
1002 | .PP | |
1003 | Perl's most portable locking interface is via the \f(CW\*(C`flock\*(C'\fR function, | |
1004 | whose simplicity is emulated on systems that don't directly support it, | |
1005 | such as SysV or WindowsNT. The underlying semantics may affect how | |
1006 | it all works, so you should learn how \f(CW\*(C`flock\*(C'\fR is implemented on your | |
1007 | system's port of Perl. | |
1008 | .PP | |
1009 | File locking \fIdoes not\fR lock out another process that would like to | |
1010 | do I/O. A file lock only locks out others trying to get a lock, not | |
1011 | processes trying to do I/O. Because locks are advisory, if one process | |
1012 | uses locking and another doesn't, all bets are off. | |
1013 | .PP | |
1014 | By default, the \f(CW\*(C`flock\*(C'\fR call will block until a lock is granted. | |
1015 | A request for a shared lock will be granted as soon as there is no | |
1016 | exclusive locker. A request for an exclusive lock will be granted as | |
1017 | soon as there is no locker of any kind. Locks are on file descriptors, | |
1018 | not file names. You can't lock a file until you open it, and you can't | |
1019 | hold on to a lock once the file has been closed. | |
1020 | .PP | |
1021 | Here's how to get a blocking shared lock on a file, typically used | |
1022 | for reading: | |
1023 | .PP | |
1024 | .Vb 5 | |
1025 | \& use 5.004; | |
1026 | \& use Fcntl qw(:DEFAULT :flock); | |
1027 | \& open(FH, "< filename") or die "can't open filename: $!"; | |
1028 | \& flock(FH, LOCK_SH) or die "can't lock filename: $!"; | |
1029 | \& # now read from FH | |
1030 | .Ve | |
1031 | .PP | |
1032 | You can get a non-blocking lock by using \f(CW\*(C`LOCK_NB\*(C'\fR. | |
1033 | .PP | |
1034 | .Vb 2 | |
1035 | \& flock(FH, LOCK_SH | LOCK_NB) | |
1036 | \& or die "can't lock filename: $!"; | |
1037 | .Ve | |
1038 | .PP | |
1039 | This can be useful for producing more user-friendly behaviour by warning | |
1040 | if you're going to be blocking: | |
1041 | .PP | |
1042 | .Vb 10 | |
1043 | \& use 5.004; | |
1044 | \& use Fcntl qw(:DEFAULT :flock); | |
1045 | \& open(FH, "< filename") or die "can't open filename: $!"; | |
1046 | \& unless (flock(FH, LOCK_SH | LOCK_NB)) { | |
1047 | \& $| = 1; | |
1048 | \& print "Waiting for lock..."; | |
1049 | \& flock(FH, LOCK_SH) or die "can't lock filename: $!"; | |
1050 | \& print "got it.\en" | |
1051 | \& } | |
1052 | \& # now read from FH | |
1053 | .Ve | |
1054 | .PP | |
1055 | To get an exclusive lock, typically used for writing, you have to be | |
1056 | careful. We \f(CW\*(C`sysopen\*(C'\fR the file so it can be locked before it gets | |
1057 | emptied. You can get a nonblocking version using \f(CW\*(C`LOCK_EX | LOCK_NB\*(C'\fR. | |
1058 | .PP | |
1059 | .Vb 9 | |
1060 | \& use 5.004; | |
1061 | \& use Fcntl qw(:DEFAULT :flock); | |
1062 | \& sysopen(FH, "filename", O_WRONLY | O_CREAT) | |
1063 | \& or die "can't open filename: $!"; | |
1064 | \& flock(FH, LOCK_EX) | |
1065 | \& or die "can't lock filename: $!"; | |
1066 | \& truncate(FH, 0) | |
1067 | \& or die "can't truncate filename: $!"; | |
1068 | \& # now write to FH | |
1069 | .Ve | |
1070 | .PP | |
1071 | Finally, due to the uncounted millions who cannot be dissuaded from | |
1072 | wasting cycles on useless vanity devices called hit counters, here's | |
1073 | how to increment a number in a file safely: | |
1074 | .PP | |
1075 | .Vb 1 | |
1076 | \& use Fcntl qw(:DEFAULT :flock); | |
1077 | .Ve | |
1078 | .PP | |
1079 | .Vb 6 | |
1080 | \& sysopen(FH, "numfile", O_RDWR | O_CREAT) | |
1081 | \& or die "can't open numfile: $!"; | |
1082 | \& # autoflush FH | |
1083 | \& $ofh = select(FH); $| = 1; select ($ofh); | |
1084 | \& flock(FH, LOCK_EX) | |
1085 | \& or die "can't write-lock numfile: $!"; | |
1086 | .Ve | |
1087 | .PP | |
1088 | .Vb 5 | |
1089 | \& $num = <FH> || 0; | |
1090 | \& seek(FH, 0, 0) | |
1091 | \& or die "can't rewind numfile : $!"; | |
1092 | \& print FH $num+1, "\en" | |
1093 | \& or die "can't write numfile: $!"; | |
1094 | .Ve | |
1095 | .PP | |
1096 | .Vb 4 | |
1097 | \& truncate(FH, tell(FH)) | |
1098 | \& or die "can't truncate numfile: $!"; | |
1099 | \& close(FH) | |
1100 | \& or die "can't close numfile: $!"; | |
1101 | .Ve | |
1102 | .Sh "\s-1IO\s0 Layers" | |
1103 | .IX Subsection "IO Layers" | |
1104 | In Perl 5.8.0 a new I/O framework called \*(L"PerlIO\*(R" was introduced. | |
1105 | This is a new \*(L"plumbing\*(R" for all the I/O happening in Perl; for the | |
1106 | most part everything will work just as it did, but PerlIO brought in | |
1107 | also some new features, like the capability of think of I/O as \*(L"layers\*(R". | |
1108 | One I/O layer may in addition to just moving the data also do | |
1109 | transformations on the data. Such transformations may include | |
1110 | compression and decompression, encryption and decryption, and transforming | |
1111 | between various character encodings. | |
1112 | .PP | |
1113 | Full discussion about the features of PerlIO is out of scope for this | |
1114 | tutorial, but here is how to recognize the layers being used: | |
1115 | .IP "\(bu" 4 | |
1116 | The three\-(or more)\-argument form of \f(CW\*(C`open()\*(C'\fR is being used and the | |
1117 | second argument contains something else in addition to the usual | |
1118 | \&\f(CW'<'\fR, \f(CW'>'\fR, \f(CW'>>'\fR, \f(CW'|'\fR and their variants, | |
1119 | for example: | |
1120 | .Sp | |
1121 | .Vb 1 | |
1122 | \& open(my $fh, "<:utf8", $fn); | |
1123 | .Ve | |
1124 | .IP "\(bu" 4 | |
1125 | The two-argument form of \f(CW\*(C`binmode<open()\*(C'\fR is being used, for example | |
1126 | .Sp | |
1127 | .Vb 1 | |
1128 | \& binmode($fh, ":encoding(utf16)"); | |
1129 | .Ve | |
1130 | .PP | |
1131 | For more detailed discussion about PerlIO see perlio; | |
1132 | for more detailed discussion about Unicode and I/O see perluniintro. | |
1133 | .SH "SEE ALSO" | |
1134 | .IX Header "SEE ALSO" | |
1135 | The \f(CW\*(C`open\*(C'\fR and \f(CW\*(C`sysopen\*(C'\fR function in \fIperlfunc\fR\|(1); | |
1136 | the standard \fIopen\fR\|(2), \fIdup\fR\|(2), \fIfopen\fR\|(3), and \fIfdopen\fR\|(3) manpages; | |
1137 | the \s-1POSIX\s0 documentation. | |
1138 | .SH "AUTHOR and COPYRIGHT" | |
1139 | .IX Header "AUTHOR and COPYRIGHT" | |
1140 | Copyright 1998 Tom Christiansen. | |
1141 | .PP | |
1142 | This documentation is free; you can redistribute it and/or modify it | |
1143 | under the same terms as Perl itself. | |
1144 | .PP | |
1145 | Irrespective of its distribution, all code examples in these files are | |
1146 | hereby placed into the public domain. You are permitted and | |
1147 | encouraged to use this code in your own programs for fun or for profit | |
1148 | as you see fit. A simple comment in the code giving credit would be | |
1149 | courteous but is not required. | |
1150 | .SH "HISTORY" | |
1151 | .IX Header "HISTORY" | |
1152 | First release: Sat Jan 9 08:09:11 \s-1MST\s0 1999 |