| 1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
| 2 | .\" |
| 3 | .\" Standard preamble: |
| 4 | .\" ======================================================================== |
| 5 | .de Sh \" Subsection heading |
| 6 | .br |
| 7 | .if t .Sp |
| 8 | .ne 5 |
| 9 | .PP |
| 10 | \fB\\$1\fR |
| 11 | .PP |
| 12 | .. |
| 13 | .de Sp \" Vertical space (when we can't use .PP) |
| 14 | .if t .sp .5v |
| 15 | .if n .sp |
| 16 | .. |
| 17 | .de Vb \" Begin verbatim text |
| 18 | .ft CW |
| 19 | .nf |
| 20 | .ne \\$1 |
| 21 | .. |
| 22 | .de Ve \" End verbatim text |
| 23 | .ft R |
| 24 | .fi |
| 25 | .. |
| 26 | .\" Set up some character translations and predefined strings. \*(-- will |
| 27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
| 28 | .\" double quote, and \*(R" will give a right double quote. | will give a |
| 29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to |
| 30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' |
| 31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. |
| 32 | .tr \(*W-|\(bv\*(Tr |
| 33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
| 34 | .ie n \{\ |
| 35 | . ds -- \(*W- |
| 36 | . ds PI pi |
| 37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
| 38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
| 39 | . ds L" "" |
| 40 | . ds R" "" |
| 41 | . ds C` "" |
| 42 | . ds C' "" |
| 43 | 'br\} |
| 44 | .el\{\ |
| 45 | . ds -- \|\(em\| |
| 46 | . ds PI \(*p |
| 47 | . ds L" `` |
| 48 | . ds R" '' |
| 49 | 'br\} |
| 50 | .\" |
| 51 | .\" If the F register is turned on, we'll generate index entries on stderr for |
| 52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index |
| 53 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
| 54 | .\" output yourself in some meaningful fashion. |
| 55 | .if \nF \{\ |
| 56 | . de IX |
| 57 | . tm Index:\\$1\t\\n%\t"\\$2" |
| 58 | .. |
| 59 | . nr % 0 |
| 60 | . rr F |
| 61 | .\} |
| 62 | .\" |
| 63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
| 64 | .\" way too many mistakes in technical documents. |
| 65 | .hy 0 |
| 66 | .if n .na |
| 67 | .\" |
| 68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
| 69 | .\" Fear. Run. Save yourself. No user-serviceable parts. |
| 70 | . \" fudge factors for nroff and troff |
| 71 | .if n \{\ |
| 72 | . ds #H 0 |
| 73 | . ds #V .8m |
| 74 | . ds #F .3m |
| 75 | . ds #[ \f1 |
| 76 | . ds #] \fP |
| 77 | .\} |
| 78 | .if t \{\ |
| 79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
| 80 | . ds #V .6m |
| 81 | . ds #F 0 |
| 82 | . ds #[ \& |
| 83 | . ds #] \& |
| 84 | .\} |
| 85 | . \" simple accents for nroff and troff |
| 86 | .if n \{\ |
| 87 | . ds ' \& |
| 88 | . ds ` \& |
| 89 | . ds ^ \& |
| 90 | . ds , \& |
| 91 | . ds ~ ~ |
| 92 | . ds / |
| 93 | .\} |
| 94 | .if t \{\ |
| 95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
| 96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
| 97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
| 98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
| 99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
| 100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
| 101 | .\} |
| 102 | . \" troff and (daisy-wheel) nroff accents |
| 103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
| 104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
| 105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
| 106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
| 107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
| 108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
| 109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
| 110 | .ds ae a\h'-(\w'a'u*4/10)'e |
| 111 | .ds Ae A\h'-(\w'A'u*4/10)'E |
| 112 | . \" corrections for vroff |
| 113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
| 114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
| 115 | . \" for low resolution devices (crt and lpr) |
| 116 | .if \n(.H>23 .if \n(.V>19 \ |
| 117 | \{\ |
| 118 | . ds : e |
| 119 | . ds 8 ss |
| 120 | . ds o a |
| 121 | . ds d- d\h'-1'\(ga |
| 122 | . ds D- D\h'-1'\(hy |
| 123 | . ds th \o'bp' |
| 124 | . ds Th \o'LP' |
| 125 | . ds ae ae |
| 126 | . ds Ae AE |
| 127 | .\} |
| 128 | .rm #[ #] #H #V #F C |
| 129 | .\" ======================================================================== |
| 130 | .\" |
| 131 | .IX Title "PERLOPENTUT 1" |
| 132 | .TH PERLOPENTUT 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide" |
| 133 | .SH "NAME" |
| 134 | perlopentut \- tutorial on opening things in Perl |
| 135 | .SH "DESCRIPTION" |
| 136 | .IX Header "DESCRIPTION" |
| 137 | Perl has two simple, built-in ways to open files: the shell way for |
| 138 | convenience, and the C way for precision. The shell way also has 2\- and |
| 139 | 3\-argument forms, which have different semantics for handling the filename. |
| 140 | The choice is yours. |
| 141 | .SH "Open A\*` la shell" |
| 142 | .IX Header "Open A` la shell" |
| 143 | Perl's \f(CW\*(C`open\*(C'\fR function was designed to mimic the way command-line |
| 144 | redirection in the shell works. Here are some basic examples |
| 145 | from the shell: |
| 146 | .PP |
| 147 | .Vb 6 |
| 148 | \& $ myprogram file1 file2 file3 |
| 149 | \& $ myprogram < inputfile |
| 150 | \& $ myprogram > outputfile |
| 151 | \& $ myprogram >> outputfile |
| 152 | \& $ myprogram | otherprogram |
| 153 | \& $ otherprogram | myprogram |
| 154 | .Ve |
| 155 | .PP |
| 156 | And here are some more advanced examples: |
| 157 | .PP |
| 158 | .Vb 4 |
| 159 | \& $ otherprogram | myprogram f1 - f2 |
| 160 | \& $ otherprogram 2>&1 | myprogram - |
| 161 | \& $ myprogram <&3 |
| 162 | \& $ myprogram >&4 |
| 163 | .Ve |
| 164 | .PP |
| 165 | Programmers accustomed to constructs like those above can take comfort |
| 166 | in learning that Perl directly supports these familiar constructs using |
| 167 | virtually the same syntax as the shell. |
| 168 | .Sh "Simple Opens" |
| 169 | .IX Subsection "Simple Opens" |
| 170 | The \f(CW\*(C`open\*(C'\fR function takes two arguments: the first is a filehandle, |
| 171 | and the second is a single string comprising both what to open and how |
| 172 | to open it. \f(CW\*(C`open\*(C'\fR returns true when it works, and when it fails, |
| 173 | returns a false value and sets the special variable \f(CW$!\fR to reflect |
| 174 | the system error. If the filehandle was previously opened, it will |
| 175 | be implicitly closed first. |
| 176 | .PP |
| 177 | For example: |
| 178 | .PP |
| 179 | .Vb 4 |
| 180 | \& open(INFO, "datafile") || die("can't open datafile: $!"); |
| 181 | \& open(INFO, "< datafile") || die("can't open datafile: $!"); |
| 182 | \& open(RESULTS,"> runstats") || die("can't open runstats: $!"); |
| 183 | \& open(LOG, ">> logfile ") || die("can't open logfile: $!"); |
| 184 | .Ve |
| 185 | .PP |
| 186 | If you prefer the low-punctuation version, you could write that this way: |
| 187 | .PP |
| 188 | .Vb 3 |
| 189 | \& open INFO, "< datafile" or die "can't open datafile: $!"; |
| 190 | \& open RESULTS,"> runstats" or die "can't open runstats: $!"; |
| 191 | \& open LOG, ">> logfile " or die "can't open logfile: $!"; |
| 192 | .Ve |
| 193 | .PP |
| 194 | A few things to notice. First, the leading less-than is optional. |
| 195 | If omitted, Perl assumes that you want to open the file for reading. |
| 196 | .PP |
| 197 | Note also that the first example uses the \f(CW\*(C`||\*(C'\fR logical operator, and the |
| 198 | second uses \f(CW\*(C`or\*(C'\fR, which has lower precedence. Using \f(CW\*(C`||\*(C'\fR in the latter |
| 199 | examples would effectively mean |
| 200 | .PP |
| 201 | .Vb 1 |
| 202 | \& open INFO, ( "< datafile" || die "can't open datafile: $!" ); |
| 203 | .Ve |
| 204 | .PP |
| 205 | which is definitely not what you want. |
| 206 | .PP |
| 207 | The other important thing to notice is that, just as in the shell, |
| 208 | any whitespace before or after the filename is ignored. This is good, |
| 209 | because you wouldn't want these to do different things: |
| 210 | .PP |
| 211 | .Vb 3 |
| 212 | \& open INFO, "<datafile" |
| 213 | \& open INFO, "< datafile" |
| 214 | \& open INFO, "< datafile" |
| 215 | .Ve |
| 216 | .PP |
| 217 | Ignoring surrounding whitespace also helps for when you read a filename |
| 218 | in from a different file, and forget to trim it before opening: |
| 219 | .PP |
| 220 | .Vb 2 |
| 221 | \& $filename = <INFO>; # oops, \en still there |
| 222 | \& open(EXTRA, "< $filename") || die "can't open $filename: $!"; |
| 223 | .Ve |
| 224 | .PP |
| 225 | This is not a bug, but a feature. Because \f(CW\*(C`open\*(C'\fR mimics the shell in |
| 226 | its style of using redirection arrows to specify how to open the file, it |
| 227 | also does so with respect to extra whitespace around the filename itself |
| 228 | as well. For accessing files with naughty names, see |
| 229 | \&\*(L"Dispelling the Dweomer\*(R". |
| 230 | .PP |
| 231 | There is also a 3\-argument version of \f(CW\*(C`open\*(C'\fR, which lets you put the |
| 232 | special redirection characters into their own argument: |
| 233 | .PP |
| 234 | .Vb 1 |
| 235 | \& open( INFO, ">", $datafile ) || die "Can't create $datafile: $!"; |
| 236 | .Ve |
| 237 | .PP |
| 238 | In this case, the filename to open is the actual string in \f(CW$datafile\fR, |
| 239 | so you don't have to worry about \f(CW$datafile\fR containing characters |
| 240 | that might influence the open mode, or whitespace at the beginning of |
| 241 | the filename that would be absorbed in the 2\-argument version. Also, |
| 242 | any reduction of unnecessary string interpolation is a good thing. |
| 243 | .Sh "Indirect Filehandles" |
| 244 | .IX Subsection "Indirect Filehandles" |
| 245 | \&\f(CW\*(C`open\*(C'\fR's first argument can be a reference to a filehandle. As of |
| 246 | perl 5.6.0, if the argument is uninitialized, Perl will automatically |
| 247 | create a filehandle and put a reference to it in the first argument, |
| 248 | like so: |
| 249 | .PP |
| 250 | .Vb 5 |
| 251 | \& open( my $in, $infile ) or die "Couldn't read $infile: $!"; |
| 252 | \& while ( <$in> ) { |
| 253 | \& # do something with $_ |
| 254 | \& } |
| 255 | \& close $in; |
| 256 | .Ve |
| 257 | .PP |
| 258 | Indirect filehandles make namespace management easier. Since filehandles |
| 259 | are global to the current package, two subroutines trying to open |
| 260 | \&\f(CW\*(C`INFILE\*(C'\fR will clash. With two functions opening indirect filehandles |
| 261 | like \f(CW\*(C`my $infile\*(C'\fR, there's no clash and no need to worry about future |
| 262 | conflicts. |
| 263 | .PP |
| 264 | Another convenient behavior is that an indirect filehandle automatically |
| 265 | closes when it goes out of scope or when you undefine it: |
| 266 | .PP |
| 267 | .Vb 4 |
| 268 | \& sub firstline { |
| 269 | \& open( my $in, shift ) && return scalar <$in>; |
| 270 | \& # no close() required |
| 271 | \& } |
| 272 | .Ve |
| 273 | .Sh "Pipe Opens" |
| 274 | .IX Subsection "Pipe Opens" |
| 275 | In C, when you want to open a file using the standard I/O library, |
| 276 | you use the \f(CW\*(C`fopen\*(C'\fR function, but when opening a pipe, you use the |
| 277 | \&\f(CW\*(C`popen\*(C'\fR function. But in the shell, you just use a different redirection |
| 278 | character. That's also the case for Perl. The \f(CW\*(C`open\*(C'\fR call |
| 279 | remains the same\*(--just its argument differs. |
| 280 | .PP |
| 281 | If the leading character is a pipe symbol, \f(CW\*(C`open\*(C'\fR starts up a new |
| 282 | command and opens a write-only filehandle leading into that command. |
| 283 | This lets you write into that handle and have what you write show up on |
| 284 | that command's standard input. For example: |
| 285 | .PP |
| 286 | .Vb 3 |
| 287 | \& open(PRINTER, "| lpr -Plp1") || die "can't run lpr: $!"; |
| 288 | \& print PRINTER "stuff\en"; |
| 289 | \& close(PRINTER) || die "can't close lpr: $!"; |
| 290 | .Ve |
| 291 | .PP |
| 292 | If the trailing character is a pipe, you start up a new command and open a |
| 293 | read-only filehandle leading out of that command. This lets whatever that |
| 294 | command writes to its standard output show up on your handle for reading. |
| 295 | For example: |
| 296 | .PP |
| 297 | .Vb 3 |
| 298 | \& open(NET, "netstat -i -n |") || die "can't fork netstat: $!"; |
| 299 | \& while (<NET>) { } # do something with input |
| 300 | \& close(NET) || die "can't close netstat: $!"; |
| 301 | .Ve |
| 302 | .PP |
| 303 | What happens if you try to open a pipe to or from a non-existent |
| 304 | command? If possible, Perl will detect the failure and set \f(CW$!\fR as |
| 305 | usual. But if the command contains special shell characters, such as |
| 306 | \&\f(CW\*(C`>\*(C'\fR or \f(CW\*(C`*\*(C'\fR, called 'metacharacters', Perl does not execute the |
| 307 | command directly. Instead, Perl runs the shell, which then tries to |
| 308 | run the command. This means that it's the shell that gets the error |
| 309 | indication. In such a case, the \f(CW\*(C`open\*(C'\fR call will only indicate |
| 310 | failure if Perl can't even run the shell. See \*(L"How can I capture \s-1STDERR\s0 from an external command?\*(R" in perlfaq8 to see how to cope with |
| 311 | this. There's also an explanation in perlipc. |
| 312 | .PP |
| 313 | If you would like to open a bidirectional pipe, the IPC::Open2 |
| 314 | library will handle this for you. Check out |
| 315 | \&\*(L"Bidirectional Communication with Another Process\*(R" in perlipc |
| 316 | .Sh "The Minus File" |
| 317 | .IX Subsection "The Minus File" |
| 318 | Again following the lead of the standard shell utilities, Perl's |
| 319 | \&\f(CW\*(C`open\*(C'\fR function treats a file whose name is a single minus, \*(L"\-\*(R", in a |
| 320 | special way. If you open minus for reading, it really means to access |
| 321 | the standard input. If you open minus for writing, it really means to |
| 322 | access the standard output. |
| 323 | .PP |
| 324 | If minus can be used as the default input or default output, what happens |
| 325 | if you open a pipe into or out of minus? What's the default command it |
| 326 | would run? The same script as you're currently running! This is actually |
| 327 | a stealth \f(CW\*(C`fork\*(C'\fR hidden inside an \f(CW\*(C`open\*(C'\fR call. See |
| 328 | \&\*(L"Safe Pipe Opens\*(R" in perlipc for details. |
| 329 | .Sh "Mixing Reads and Writes" |
| 330 | .IX Subsection "Mixing Reads and Writes" |
| 331 | It is possible to specify both read and write access. All you do is |
| 332 | add a \*(L"+\*(R" symbol in front of the redirection. But as in the shell, |
| 333 | using a less-than on a file never creates a new file; it only opens an |
| 334 | existing one. On the other hand, using a greater-than always clobbers |
| 335 | (truncates to zero length) an existing file, or creates a brand-new one |
| 336 | if there isn't an old one. Adding a \*(L"+\*(R" for read-write doesn't affect |
| 337 | whether it only works on existing files or always clobbers existing ones. |
| 338 | .PP |
| 339 | .Vb 2 |
| 340 | \& open(WTMP, "+< /usr/adm/wtmp") |
| 341 | \& || die "can't open /usr/adm/wtmp: $!"; |
| 342 | .Ve |
| 343 | .PP |
| 344 | .Vb 2 |
| 345 | \& open(SCREEN, "+> lkscreen") |
| 346 | \& || die "can't open lkscreen: $!"; |
| 347 | .Ve |
| 348 | .PP |
| 349 | .Vb 2 |
| 350 | \& open(LOGFILE, "+>> /var/log/applog" |
| 351 | \& || die "can't open /var/log/applog: $!"; |
| 352 | .Ve |
| 353 | .PP |
| 354 | The first one won't create a new file, and the second one will always |
| 355 | clobber an old one. The third one will create a new file if necessary |
| 356 | and not clobber an old one, and it will allow you to read at any point |
| 357 | in the file, but all writes will always go to the end. In short, |
| 358 | the first case is substantially more common than the second and third |
| 359 | cases, which are almost always wrong. (If you know C, the plus in |
| 360 | Perl's \f(CW\*(C`open\*(C'\fR is historically derived from the one in C's fopen(3S), |
| 361 | which it ultimately calls.) |
| 362 | .PP |
| 363 | In fact, when it comes to updating a file, unless you're working on |
| 364 | a binary file as in the \s-1WTMP\s0 case above, you probably don't want to |
| 365 | use this approach for updating. Instead, Perl's \fB\-i\fR flag comes to |
| 366 | the rescue. The following command takes all the C, \*(C+, or yacc source |
| 367 | or header files and changes all their foo's to bar's, leaving |
| 368 | the old version in the original filename with a \*(L".orig\*(R" tacked |
| 369 | on the end: |
| 370 | .PP |
| 371 | .Vb 1 |
| 372 | \& $ perl -i.orig -pe 's/\ebfoo\eb/bar/g' *.[Cchy] |
| 373 | .Ve |
| 374 | .PP |
| 375 | This is a short cut for some renaming games that are really |
| 376 | the best way to update textfiles. See the second question in |
| 377 | perlfaq5 for more details. |
| 378 | .Sh "Filters" |
| 379 | .IX Subsection "Filters" |
| 380 | One of the most common uses for \f(CW\*(C`open\*(C'\fR is one you never |
| 381 | even notice. When you process the \s-1ARGV\s0 filehandle using |
| 382 | \&\f(CW\*(C`<ARGV>\*(C'\fR, Perl actually does an implicit open |
| 383 | on each file in \f(CW@ARGV\fR. Thus a program called like this: |
| 384 | .PP |
| 385 | .Vb 1 |
| 386 | \& $ myprogram file1 file2 file3 |
| 387 | .Ve |
| 388 | .PP |
| 389 | Can have all its files opened and processed one at a time |
| 390 | using a construct no more complex than: |
| 391 | .PP |
| 392 | .Vb 3 |
| 393 | \& while (<>) { |
| 394 | \& # do something with $_ |
| 395 | \& } |
| 396 | .Ve |
| 397 | .PP |
| 398 | If \f(CW@ARGV\fR is empty when the loop first begins, Perl pretends you've opened |
| 399 | up minus, that is, the standard input. In fact, \f(CW$ARGV\fR, the currently |
| 400 | open file during \f(CW\*(C`<ARGV>\*(C'\fR processing, is even set to \*(L"\-\*(R" |
| 401 | in these circumstances. |
| 402 | .PP |
| 403 | You are welcome to pre-process your \f(CW@ARGV\fR before starting the loop to |
| 404 | make sure it's to your liking. One reason to do this might be to remove |
| 405 | command options beginning with a minus. While you can always roll the |
| 406 | simple ones by hand, the Getopts modules are good for this: |
| 407 | .PP |
| 408 | .Vb 1 |
| 409 | \& use Getopt::Std; |
| 410 | .Ve |
| 411 | .PP |
| 412 | .Vb 2 |
| 413 | \& # -v, -D, -o ARG, sets $opt_v, $opt_D, $opt_o |
| 414 | \& getopts("vDo:"); |
| 415 | .Ve |
| 416 | .PP |
| 417 | .Vb 2 |
| 418 | \& # -v, -D, -o ARG, sets $args{v}, $args{D}, $args{o} |
| 419 | \& getopts("vDo:", \e%args); |
| 420 | .Ve |
| 421 | .PP |
| 422 | Or the standard Getopt::Long module to permit named arguments: |
| 423 | .PP |
| 424 | .Vb 5 |
| 425 | \& use Getopt::Long; |
| 426 | \& GetOptions( "verbose" => \e$verbose, # --verbose |
| 427 | \& "Debug" => \e$debug, # --Debug |
| 428 | \& "output=s" => \e$output ); |
| 429 | \& # --output=somestring or --output somestring |
| 430 | .Ve |
| 431 | .PP |
| 432 | Another reason for preprocessing arguments is to make an empty |
| 433 | argument list default to all files: |
| 434 | .PP |
| 435 | .Vb 1 |
| 436 | \& @ARGV = glob("*") unless @ARGV; |
| 437 | .Ve |
| 438 | .PP |
| 439 | You could even filter out all but plain, text files. This is a bit |
| 440 | silent, of course, and you might prefer to mention them on the way. |
| 441 | .PP |
| 442 | .Vb 1 |
| 443 | \& @ARGV = grep { -f && -T } @ARGV; |
| 444 | .Ve |
| 445 | .PP |
| 446 | If you're using the \fB\-n\fR or \fB\-p\fR command-line options, you |
| 447 | should put changes to \f(CW@ARGV\fR in a \f(CW\*(C`BEGIN{}\*(C'\fR block. |
| 448 | .PP |
| 449 | Remember that a normal \f(CW\*(C`open\*(C'\fR has special properties, in that it might |
| 450 | call fopen(3S) or it might called popen(3S), depending on what its |
| 451 | argument looks like; that's why it's sometimes called \*(L"magic open\*(R". |
| 452 | Here's an example: |
| 453 | .PP |
| 454 | .Vb 3 |
| 455 | \& $pwdinfo = `domainname` =~ /^(\e(none\e))?$/ |
| 456 | \& ? '< /etc/passwd' |
| 457 | \& : 'ypcat passwd |'; |
| 458 | .Ve |
| 459 | .PP |
| 460 | .Vb 2 |
| 461 | \& open(PWD, $pwdinfo) |
| 462 | \& or die "can't open $pwdinfo: $!"; |
| 463 | .Ve |
| 464 | .PP |
| 465 | This sort of thing also comes into play in filter processing. Because |
| 466 | \&\f(CW\*(C`<ARGV>\*(C'\fR processing employs the normal, shell-style Perl \f(CW\*(C`open\*(C'\fR, |
| 467 | it respects all the special things we've already seen: |
| 468 | .PP |
| 469 | .Vb 1 |
| 470 | \& $ myprogram f1 "cmd1|" - f2 "cmd2|" f3 < tmpfile |
| 471 | .Ve |
| 472 | .PP |
| 473 | That program will read from the file \fIf1\fR, the process \fIcmd1\fR, standard |
| 474 | input (\fItmpfile\fR in this case), the \fIf2\fR file, the \fIcmd2\fR command, |
| 475 | and finally the \fIf3\fR file. |
| 476 | .PP |
| 477 | Yes, this also means that if you have files named \*(L"\-\*(R" (and so on) in |
| 478 | your directory, they won't be processed as literal files by \f(CW\*(C`open\*(C'\fR. |
| 479 | You'll need to pass them as \*(L"./\-\*(R", much as you would for the \fIrm\fR program, |
| 480 | or you could use \f(CW\*(C`sysopen\*(C'\fR as described below. |
| 481 | .PP |
| 482 | One of the more interesting applications is to change files of a certain |
| 483 | name into pipes. For example, to autoprocess gzipped or compressed |
| 484 | files by decompressing them with \fIgzip\fR: |
| 485 | .PP |
| 486 | .Vb 1 |
| 487 | \& @ARGV = map { /^\e.(gz|Z)$/ ? "gzip -dc $_ |" : $_ } @ARGV; |
| 488 | .Ve |
| 489 | .PP |
| 490 | Or, if you have the \fI\s-1GET\s0\fR program installed from \s-1LWP\s0, |
| 491 | you can fetch URLs before processing them: |
| 492 | .PP |
| 493 | .Vb 1 |
| 494 | \& @ARGV = map { m#^\ew+://# ? "GET $_ |" : $_ } @ARGV; |
| 495 | .Ve |
| 496 | .PP |
| 497 | It's not for nothing that this is called magic \f(CW\*(C`<ARGV>\*(C'\fR. |
| 498 | Pretty nifty, eh? |
| 499 | .SH "Open A\*` la C" |
| 500 | .IX Header "Open A` la C" |
| 501 | If you want the convenience of the shell, then Perl's \f(CW\*(C`open\*(C'\fR is |
| 502 | definitely the way to go. On the other hand, if you want finer precision |
| 503 | than C's simplistic fopen(3S) provides you should look to Perl's |
| 504 | \&\f(CW\*(C`sysopen\*(C'\fR, which is a direct hook into the \fIopen\fR\|(2) system call. |
| 505 | That does mean it's a bit more involved, but that's the price of |
| 506 | precision. |
| 507 | .PP |
| 508 | \&\f(CW\*(C`sysopen\*(C'\fR takes 3 (or 4) arguments. |
| 509 | .PP |
| 510 | .Vb 1 |
| 511 | \& sysopen HANDLE, PATH, FLAGS, [MASK] |
| 512 | .Ve |
| 513 | .PP |
| 514 | The \s-1HANDLE\s0 argument is a filehandle just as with \f(CW\*(C`open\*(C'\fR. The \s-1PATH\s0 is |
| 515 | a literal path, one that doesn't pay attention to any greater-thans or |
| 516 | less-thans or pipes or minuses, nor ignore whitespace. If it's there, |
| 517 | it's part of the path. The \s-1FLAGS\s0 argument contains one or more values |
| 518 | derived from the Fcntl module that have been or'd together using the |
| 519 | bitwise \*(L"|\*(R" operator. The final argument, the \s-1MASK\s0, is optional; if |
| 520 | present, it is combined with the user's current umask for the creation |
| 521 | mode of the file. You should usually omit this. |
| 522 | .PP |
| 523 | Although the traditional values of read\-only, write\-only, and read-write |
| 524 | are 0, 1, and 2 respectively, this is known not to hold true on some |
| 525 | systems. Instead, it's best to load in the appropriate constants first |
| 526 | from the Fcntl module, which supplies the following standard flags: |
| 527 | .PP |
| 528 | .Vb 8 |
| 529 | \& O_RDONLY Read only |
| 530 | \& O_WRONLY Write only |
| 531 | \& O_RDWR Read and write |
| 532 | \& O_CREAT Create the file if it doesn't exist |
| 533 | \& O_EXCL Fail if the file already exists |
| 534 | \& O_APPEND Append to the file |
| 535 | \& O_TRUNC Truncate the file |
| 536 | \& O_NONBLOCK Non-blocking access |
| 537 | .Ve |
| 538 | .PP |
| 539 | Less common flags that are sometimes available on some operating |
| 540 | systems include \f(CW\*(C`O_BINARY\*(C'\fR, \f(CW\*(C`O_TEXT\*(C'\fR, \f(CW\*(C`O_SHLOCK\*(C'\fR, \f(CW\*(C`O_EXLOCK\*(C'\fR, |
| 541 | \&\f(CW\*(C`O_DEFER\*(C'\fR, \f(CW\*(C`O_SYNC\*(C'\fR, \f(CW\*(C`O_ASYNC\*(C'\fR, \f(CW\*(C`O_DSYNC\*(C'\fR, \f(CW\*(C`O_RSYNC\*(C'\fR, |
| 542 | \&\f(CW\*(C`O_NOCTTY\*(C'\fR, \f(CW\*(C`O_NDELAY\*(C'\fR and \f(CW\*(C`O_LARGEFILE\*(C'\fR. Consult your \fIopen\fR\|(2) |
| 543 | manpage or its local equivalent for details. (Note: starting from |
| 544 | Perl release 5.6 the \f(CW\*(C`O_LARGEFILE\*(C'\fR flag, if available, is automatically |
| 545 | added to the \fIsysopen()\fR flags because large files are the default.) |
| 546 | .PP |
| 547 | Here's how to use \f(CW\*(C`sysopen\*(C'\fR to emulate the simple \f(CW\*(C`open\*(C'\fR calls we had |
| 548 | before. We'll omit the \f(CW\*(C`|| die $!\*(C'\fR checks for clarity, but make sure |
| 549 | you always check the return values in real code. These aren't quite |
| 550 | the same, since \f(CW\*(C`open\*(C'\fR will trim leading and trailing whitespace, |
| 551 | but you'll get the idea. |
| 552 | .PP |
| 553 | To open a file for reading: |
| 554 | .PP |
| 555 | .Vb 2 |
| 556 | \& open(FH, "< $path"); |
| 557 | \& sysopen(FH, $path, O_RDONLY); |
| 558 | .Ve |
| 559 | .PP |
| 560 | To open a file for writing, creating a new file if needed or else truncating |
| 561 | an old file: |
| 562 | .PP |
| 563 | .Vb 2 |
| 564 | \& open(FH, "> $path"); |
| 565 | \& sysopen(FH, $path, O_WRONLY | O_TRUNC | O_CREAT); |
| 566 | .Ve |
| 567 | .PP |
| 568 | To open a file for appending, creating one if necessary: |
| 569 | .PP |
| 570 | .Vb 2 |
| 571 | \& open(FH, ">> $path"); |
| 572 | \& sysopen(FH, $path, O_WRONLY | O_APPEND | O_CREAT); |
| 573 | .Ve |
| 574 | .PP |
| 575 | To open a file for update, where the file must already exist: |
| 576 | .PP |
| 577 | .Vb 2 |
| 578 | \& open(FH, "+< $path"); |
| 579 | \& sysopen(FH, $path, O_RDWR); |
| 580 | .Ve |
| 581 | .PP |
| 582 | And here are things you can do with \f(CW\*(C`sysopen\*(C'\fR that you cannot do with |
| 583 | a regular \f(CW\*(C`open\*(C'\fR. As you'll see, it's just a matter of controlling the |
| 584 | flags in the third argument. |
| 585 | .PP |
| 586 | To open a file for writing, creating a new file which must not previously |
| 587 | exist: |
| 588 | .PP |
| 589 | .Vb 1 |
| 590 | \& sysopen(FH, $path, O_WRONLY | O_EXCL | O_CREAT); |
| 591 | .Ve |
| 592 | .PP |
| 593 | To open a file for appending, where that file must already exist: |
| 594 | .PP |
| 595 | .Vb 1 |
| 596 | \& sysopen(FH, $path, O_WRONLY | O_APPEND); |
| 597 | .Ve |
| 598 | .PP |
| 599 | To open a file for update, creating a new file if necessary: |
| 600 | .PP |
| 601 | .Vb 1 |
| 602 | \& sysopen(FH, $path, O_RDWR | O_CREAT); |
| 603 | .Ve |
| 604 | .PP |
| 605 | To open a file for update, where that file must not already exist: |
| 606 | .PP |
| 607 | .Vb 1 |
| 608 | \& sysopen(FH, $path, O_RDWR | O_EXCL | O_CREAT); |
| 609 | .Ve |
| 610 | .PP |
| 611 | To open a file without blocking, creating one if necessary: |
| 612 | .PP |
| 613 | .Vb 1 |
| 614 | \& sysopen(FH, $path, O_WRONLY | O_NONBLOCK | O_CREAT); |
| 615 | .Ve |
| 616 | .Sh "Permissions A\*` la mode" |
| 617 | .IX Subsection "Permissions A` la mode" |
| 618 | If you omit the \s-1MASK\s0 argument to \f(CW\*(C`sysopen\*(C'\fR, Perl uses the octal value |
| 619 | 0666. The normal \s-1MASK\s0 to use for executables and directories should |
| 620 | be 0777, and for anything else, 0666. |
| 621 | .PP |
| 622 | Why so permissive? Well, it isn't really. The \s-1MASK\s0 will be modified |
| 623 | by your process's current \f(CW\*(C`umask\*(C'\fR. A umask is a number representing |
| 624 | \&\fIdisabled\fR permissions bits; that is, bits that will not be turned on |
| 625 | in the created files' permissions field. |
| 626 | .PP |
| 627 | For example, if your \f(CW\*(C`umask\*(C'\fR were 027, then the 020 part would |
| 628 | disable the group from writing, and the 007 part would disable others |
| 629 | from reading, writing, or executing. Under these conditions, passing |
| 630 | \&\f(CW\*(C`sysopen\*(C'\fR 0666 would create a file with mode 0640, since \f(CW\*(C`0666 & ~027\*(C'\fR |
| 631 | is 0640. |
| 632 | .PP |
| 633 | You should seldom use the \s-1MASK\s0 argument to \f(CW\*(C`sysopen()\*(C'\fR. That takes |
| 634 | away the user's freedom to choose what permission new files will have. |
| 635 | Denying choice is almost always a bad thing. One exception would be for |
| 636 | cases where sensitive or private data is being stored, such as with mail |
| 637 | folders, cookie files, and internal temporary files. |
| 638 | .SH "Obscure Open Tricks" |
| 639 | .IX Header "Obscure Open Tricks" |
| 640 | .Sh "Re-Opening Files (dups)" |
| 641 | .IX Subsection "Re-Opening Files (dups)" |
| 642 | Sometimes you already have a filehandle open, and want to make another |
| 643 | handle that's a duplicate of the first one. In the shell, we place an |
| 644 | ampersand in front of a file descriptor number when doing redirections. |
| 645 | For example, \f(CW\*(C`2>&1\*(C'\fR makes descriptor 2 (that's \s-1STDERR\s0 in Perl) |
| 646 | be redirected into descriptor 1 (which is usually Perl's \s-1STDOUT\s0). |
| 647 | The same is essentially true in Perl: a filename that begins with an |
| 648 | ampersand is treated instead as a file descriptor if a number, or as a |
| 649 | filehandle if a string. |
| 650 | .PP |
| 651 | .Vb 2 |
| 652 | \& open(SAVEOUT, ">&SAVEERR") || die "couldn't dup SAVEERR: $!"; |
| 653 | \& open(MHCONTEXT, "<&4") || die "couldn't dup fd4: $!"; |
| 654 | .Ve |
| 655 | .PP |
| 656 | That means that if a function is expecting a filename, but you don't |
| 657 | want to give it a filename because you already have the file open, you |
| 658 | can just pass the filehandle with a leading ampersand. It's best to |
| 659 | use a fully qualified handle though, just in case the function happens |
| 660 | to be in a different package: |
| 661 | .PP |
| 662 | .Vb 1 |
| 663 | \& somefunction("&main::LOGFILE"); |
| 664 | .Ve |
| 665 | .PP |
| 666 | This way if \fIsomefunction()\fR is planning on opening its argument, it can |
| 667 | just use the already opened handle. This differs from passing a handle, |
| 668 | because with a handle, you don't open the file. Here you have something |
| 669 | you can pass to open. |
| 670 | .PP |
| 671 | If you have one of those tricky, newfangled I/O objects that the \*(C+ |
| 672 | folks are raving about, then this doesn't work because those aren't a |
| 673 | proper filehandle in the native Perl sense. You'll have to use \fIfileno()\fR |
| 674 | to pull out the proper descriptor number, assuming you can: |
| 675 | .PP |
| 676 | .Vb 4 |
| 677 | \& use IO::Socket; |
| 678 | \& $handle = IO::Socket::INET->new("www.perl.com:80"); |
| 679 | \& $fd = $handle->fileno; |
| 680 | \& somefunction("&$fd"); # not an indirect function call |
| 681 | .Ve |
| 682 | .PP |
| 683 | It can be easier (and certainly will be faster) just to use real |
| 684 | filehandles though: |
| 685 | .PP |
| 686 | .Vb 4 |
| 687 | \& use IO::Socket; |
| 688 | \& local *REMOTE = IO::Socket::INET->new("www.perl.com:80"); |
| 689 | \& die "can't connect" unless defined(fileno(REMOTE)); |
| 690 | \& somefunction("&main::REMOTE"); |
| 691 | .Ve |
| 692 | .PP |
| 693 | If the filehandle or descriptor number is preceded not just with a simple |
| 694 | \&\*(L"&\*(R" but rather with a \*(L"&=\*(R" combination, then Perl will not create a |
| 695 | completely new descriptor opened to the same place using the \fIdup\fR\|(2) |
| 696 | system call. Instead, it will just make something of an alias to the |
| 697 | existing one using the fdopen(3S) library call This is slightly more |
| 698 | parsimonious of systems resources, although this is less a concern |
| 699 | these days. Here's an example of that: |
| 700 | .PP |
| 701 | .Vb 2 |
| 702 | \& $fd = $ENV{"MHCONTEXTFD"}; |
| 703 | \& open(MHCONTEXT, "<&=$fd") or die "couldn't fdopen $fd: $!"; |
| 704 | .Ve |
| 705 | .PP |
| 706 | If you're using magic \f(CW\*(C`<ARGV>\*(C'\fR, you could even pass in as a |
| 707 | command line argument in \f(CW@ARGV\fR something like \f(CW"<&=$MHCONTEXTFD"\fR, |
| 708 | but we've never seen anyone actually do this. |
| 709 | .Sh "Dispelling the Dweomer" |
| 710 | .IX Subsection "Dispelling the Dweomer" |
| 711 | Perl is more of a DWIMmer language than something like Java\*(--where \s-1DWIM\s0 |
| 712 | is an acronym for \*(L"do what I mean\*(R". But this principle sometimes leads |
| 713 | to more hidden magic than one knows what to do with. In this way, Perl |
| 714 | is also filled with \fIdweomer\fR, an obscure word meaning an enchantment. |
| 715 | Sometimes, Perl's DWIMmer is just too much like dweomer for comfort. |
| 716 | .PP |
| 717 | If magic \f(CW\*(C`open\*(C'\fR is a bit too magical for you, you don't have to turn |
| 718 | to \f(CW\*(C`sysopen\*(C'\fR. To open a file with arbitrary weird characters in |
| 719 | it, it's necessary to protect any leading and trailing whitespace. |
| 720 | Leading whitespace is protected by inserting a \f(CW"./"\fR in front of a |
| 721 | filename that starts with whitespace. Trailing whitespace is protected |
| 722 | by appending an \s-1ASCII\s0 \s-1NUL\s0 byte (\f(CW"\e0"\fR) at the end of the string. |
| 723 | .PP |
| 724 | .Vb 2 |
| 725 | \& $file =~ s#^(\es)#./$1#; |
| 726 | \& open(FH, "< $file\e0") || die "can't open $file: $!"; |
| 727 | .Ve |
| 728 | .PP |
| 729 | This assumes, of course, that your system considers dot the current |
| 730 | working directory, slash the directory separator, and disallows \s-1ASCII\s0 |
| 731 | NULs within a valid filename. Most systems follow these conventions, |
| 732 | including all \s-1POSIX\s0 systems as well as proprietary Microsoft systems. |
| 733 | The only vaguely popular system that doesn't work this way is the |
| 734 | \&\*(L"Classic\*(R" Macintosh system, which uses a colon where the rest of us |
| 735 | use a slash. Maybe \f(CW\*(C`sysopen\*(C'\fR isn't such a bad idea after all. |
| 736 | .PP |
| 737 | If you want to use \f(CW\*(C`<ARGV>\*(C'\fR processing in a totally boring |
| 738 | and non-magical way, you could do this first: |
| 739 | .PP |
| 740 | .Vb 10 |
| 741 | \& # "Sam sat on the ground and put his head in his hands. |
| 742 | \& # 'I wish I had never come here, and I don't want to see |
| 743 | \& # no more magic,' he said, and fell silent." |
| 744 | \& for (@ARGV) { |
| 745 | \& s#^([^./])#./$1#; |
| 746 | \& $_ .= "\e0"; |
| 747 | \& } |
| 748 | \& while (<>) { |
| 749 | \& # now process $_ |
| 750 | \& } |
| 751 | .Ve |
| 752 | .PP |
| 753 | But be warned that users will not appreciate being unable to use \*(L"\-\*(R" |
| 754 | to mean standard input, per the standard convention. |
| 755 | .Sh "Paths as Opens" |
| 756 | .IX Subsection "Paths as Opens" |
| 757 | You've probably noticed how Perl's \f(CW\*(C`warn\*(C'\fR and \f(CW\*(C`die\*(C'\fR functions can |
| 758 | produce messages like: |
| 759 | .PP |
| 760 | .Vb 1 |
| 761 | \& Some warning at scriptname line 29, <FH> line 7. |
| 762 | .Ve |
| 763 | .PP |
| 764 | That's because you opened a filehandle \s-1FH\s0, and had read in seven records |
| 765 | from it. But what was the name of the file, rather than the handle? |
| 766 | .PP |
| 767 | If you aren't running with \f(CW\*(C`strict refs\*(C'\fR, or if you've turned them off |
| 768 | temporarily, then all you have to do is this: |
| 769 | .PP |
| 770 | .Vb 4 |
| 771 | \& open($path, "< $path") || die "can't open $path: $!"; |
| 772 | \& while (<$path>) { |
| 773 | \& # whatever |
| 774 | \& } |
| 775 | .Ve |
| 776 | .PP |
| 777 | Since you're using the pathname of the file as its handle, |
| 778 | you'll get warnings more like |
| 779 | .PP |
| 780 | .Vb 1 |
| 781 | \& Some warning at scriptname line 29, </etc/motd> line 7. |
| 782 | .Ve |
| 783 | .Sh "Single Argument Open" |
| 784 | .IX Subsection "Single Argument Open" |
| 785 | Remember how we said that Perl's open took two arguments? That was a |
| 786 | passive prevarication. You see, it can also take just one argument. |
| 787 | If and only if the variable is a global variable, not a lexical, you |
| 788 | can pass \f(CW\*(C`open\*(C'\fR just one argument, the filehandle, and it will |
| 789 | get the path from the global scalar variable of the same name. |
| 790 | .PP |
| 791 | .Vb 5 |
| 792 | \& $FILE = "/etc/motd"; |
| 793 | \& open FILE or die "can't open $FILE: $!"; |
| 794 | \& while (<FILE>) { |
| 795 | \& # whatever |
| 796 | \& } |
| 797 | .Ve |
| 798 | .PP |
| 799 | Why is this here? Someone has to cater to the hysterical porpoises. |
| 800 | It's something that's been in Perl since the very beginning, if not |
| 801 | before. |
| 802 | .Sh "Playing with \s-1STDIN\s0 and \s-1STDOUT\s0" |
| 803 | .IX Subsection "Playing with STDIN and STDOUT" |
| 804 | One clever move with \s-1STDOUT\s0 is to explicitly close it when you're done |
| 805 | with the program. |
| 806 | .PP |
| 807 | .Vb 1 |
| 808 | \& END { close(STDOUT) || die "can't close stdout: $!" } |
| 809 | .Ve |
| 810 | .PP |
| 811 | If you don't do this, and your program fills up the disk partition due |
| 812 | to a command line redirection, it won't report the error exit with a |
| 813 | failure status. |
| 814 | .PP |
| 815 | You don't have to accept the \s-1STDIN\s0 and \s-1STDOUT\s0 you were given. You are |
| 816 | welcome to reopen them if you'd like. |
| 817 | .PP |
| 818 | .Vb 2 |
| 819 | \& open(STDIN, "< datafile") |
| 820 | \& || die "can't open datafile: $!"; |
| 821 | .Ve |
| 822 | .PP |
| 823 | .Vb 2 |
| 824 | \& open(STDOUT, "> output") |
| 825 | \& || die "can't open output: $!"; |
| 826 | .Ve |
| 827 | .PP |
| 828 | And then these can be accessed directly or passed on to subprocesses. |
| 829 | This makes it look as though the program were initially invoked |
| 830 | with those redirections from the command line. |
| 831 | .PP |
| 832 | It's probably more interesting to connect these to pipes. For example: |
| 833 | .PP |
| 834 | .Vb 3 |
| 835 | \& $pager = $ENV{PAGER} || "(less || more)"; |
| 836 | \& open(STDOUT, "| $pager") |
| 837 | \& || die "can't fork a pager: $!"; |
| 838 | .Ve |
| 839 | .PP |
| 840 | This makes it appear as though your program were called with its stdout |
| 841 | already piped into your pager. You can also use this kind of thing |
| 842 | in conjunction with an implicit fork to yourself. You might do this |
| 843 | if you would rather handle the post processing in your own program, |
| 844 | just in a different process: |
| 845 | .PP |
| 846 | .Vb 4 |
| 847 | \& head(100); |
| 848 | \& while (<>) { |
| 849 | \& print; |
| 850 | \& } |
| 851 | .Ve |
| 852 | .PP |
| 853 | .Vb 10 |
| 854 | \& sub head { |
| 855 | \& my $lines = shift || 20; |
| 856 | \& return if $pid = open(STDOUT, "|-"); # return if parent |
| 857 | \& die "cannot fork: $!" unless defined $pid; |
| 858 | \& while (<STDIN>) { |
| 859 | \& last if --$lines < 0; |
| 860 | \& print; |
| 861 | \& } |
| 862 | \& exit; |
| 863 | \& } |
| 864 | .Ve |
| 865 | .PP |
| 866 | This technique can be applied to repeatedly push as many filters on your |
| 867 | output stream as you wish. |
| 868 | .SH "Other I/O Issues" |
| 869 | .IX Header "Other I/O Issues" |
| 870 | These topics aren't really arguments related to \f(CW\*(C`open\*(C'\fR or \f(CW\*(C`sysopen\*(C'\fR, |
| 871 | but they do affect what you do with your open files. |
| 872 | .Sh "Opening Non-File Files" |
| 873 | .IX Subsection "Opening Non-File Files" |
| 874 | When is a file not a file? Well, you could say when it exists but |
| 875 | isn't a plain file. We'll check whether it's a symbolic link first, |
| 876 | just in case. |
| 877 | .PP |
| 878 | .Vb 3 |
| 879 | \& if (-l $file || ! -f _) { |
| 880 | \& print "$file is not a plain file\en"; |
| 881 | \& } |
| 882 | .Ve |
| 883 | .PP |
| 884 | What other kinds of files are there than, well, files? Directories, |
| 885 | symbolic links, named pipes, Unix-domain sockets, and block and character |
| 886 | devices. Those are all files, too\*(--just not \fIplain\fR files. This isn't |
| 887 | the same issue as being a text file. Not all text files are plain files. |
| 888 | Not all plain files are text files. That's why there are separate \f(CW\*(C`\-f\*(C'\fR |
| 889 | and \f(CW\*(C`\-T\*(C'\fR file tests. |
| 890 | .PP |
| 891 | To open a directory, you should use the \f(CW\*(C`opendir\*(C'\fR function, then |
| 892 | process it with \f(CW\*(C`readdir\*(C'\fR, carefully restoring the directory |
| 893 | name if necessary: |
| 894 | .PP |
| 895 | .Vb 5 |
| 896 | \& opendir(DIR, $dirname) or die "can't opendir $dirname: $!"; |
| 897 | \& while (defined($file = readdir(DIR))) { |
| 898 | \& # do something with "$dirname/$file" |
| 899 | \& } |
| 900 | \& closedir(DIR); |
| 901 | .Ve |
| 902 | .PP |
| 903 | If you want to process directories recursively, it's better to use the |
| 904 | File::Find module. For example, this prints out all files recursively |
| 905 | and adds a slash to their names if the file is a directory. |
| 906 | .PP |
| 907 | .Vb 3 |
| 908 | \& @ARGV = qw(.) unless @ARGV; |
| 909 | \& use File::Find; |
| 910 | \& find sub { print $File::Find::name, -d && '/', "\en" }, @ARGV; |
| 911 | .Ve |
| 912 | .PP |
| 913 | This finds all bogus symbolic links beneath a particular directory: |
| 914 | .PP |
| 915 | .Vb 1 |
| 916 | \& find sub { print "$File::Find::name\en" if -l && !-e }, $dir; |
| 917 | .Ve |
| 918 | .PP |
| 919 | As you see, with symbolic links, you can just pretend that it is |
| 920 | what it points to. Or, if you want to know \fIwhat\fR it points to, then |
| 921 | \&\f(CW\*(C`readlink\*(C'\fR is called for: |
| 922 | .PP |
| 923 | .Vb 7 |
| 924 | \& if (-l $file) { |
| 925 | \& if (defined($whither = readlink($file))) { |
| 926 | \& print "$file points to $whither\en"; |
| 927 | \& } else { |
| 928 | \& print "$file points nowhere: $!\en"; |
| 929 | \& } |
| 930 | \& } |
| 931 | .Ve |
| 932 | .Sh "Opening Named Pipes" |
| 933 | .IX Subsection "Opening Named Pipes" |
| 934 | Named pipes are a different matter. You pretend they're regular files, |
| 935 | but their opens will normally block until there is both a reader and |
| 936 | a writer. You can read more about them in \*(L"Named Pipes\*(R" in perlipc. |
| 937 | Unix-domain sockets are rather different beasts as well; they're |
| 938 | described in \*(L"Unix\-Domain \s-1TCP\s0 Clients and Servers\*(R" in perlipc. |
| 939 | .PP |
| 940 | When it comes to opening devices, it can be easy and it can be tricky. |
| 941 | We'll assume that if you're opening up a block device, you know what |
| 942 | you're doing. The character devices are more interesting. These are |
| 943 | typically used for modems, mice, and some kinds of printers. This is |
| 944 | described in \*(L"How do I read and write the serial port?\*(R" in perlfaq8 |
| 945 | It's often enough to open them carefully: |
| 946 | .PP |
| 947 | .Vb 5 |
| 948 | \& sysopen(TTYIN, "/dev/ttyS1", O_RDWR | O_NDELAY | O_NOCTTY) |
| 949 | \& # (O_NOCTTY no longer needed on POSIX systems) |
| 950 | \& or die "can't open /dev/ttyS1: $!"; |
| 951 | \& open(TTYOUT, "+>&TTYIN") |
| 952 | \& or die "can't dup TTYIN: $!"; |
| 953 | .Ve |
| 954 | .PP |
| 955 | .Vb 1 |
| 956 | \& $ofh = select(TTYOUT); $| = 1; select($ofh); |
| 957 | .Ve |
| 958 | .PP |
| 959 | .Vb 2 |
| 960 | \& print TTYOUT "+++at\e015"; |
| 961 | \& $answer = <TTYIN>; |
| 962 | .Ve |
| 963 | .PP |
| 964 | With descriptors that you haven't opened using \f(CW\*(C`sysopen\*(C'\fR, such as |
| 965 | sockets, you can set them to be non-blocking using \f(CW\*(C`fcntl\*(C'\fR: |
| 966 | .PP |
| 967 | .Vb 5 |
| 968 | \& use Fcntl; |
| 969 | \& my $old_flags = fcntl($handle, F_GETFL, 0) |
| 970 | \& or die "can't get flags: $!"; |
| 971 | \& fcntl($handle, F_SETFL, $old_flags | O_NONBLOCK) |
| 972 | \& or die "can't set non blocking: $!"; |
| 973 | .Ve |
| 974 | .PP |
| 975 | Rather than losing yourself in a morass of twisting, turning \f(CW\*(C`ioctl\*(C'\fRs, |
| 976 | all dissimilar, if you're going to manipulate ttys, it's best to |
| 977 | make calls out to the \fIstty\fR\|(1) program if you have it, or else use the |
| 978 | portable \s-1POSIX\s0 interface. To figure this all out, you'll need to read the |
| 979 | \&\fItermios\fR\|(3) manpage, which describes the \s-1POSIX\s0 interface to tty devices, |
| 980 | and then \s-1POSIX\s0, which describes Perl's interface to \s-1POSIX\s0. There are |
| 981 | also some high-level modules on \s-1CPAN\s0 that can help you with these games. |
| 982 | Check out Term::ReadKey and Term::ReadLine. |
| 983 | .Sh "Opening Sockets" |
| 984 | .IX Subsection "Opening Sockets" |
| 985 | What else can you open? To open a connection using sockets, you won't use |
| 986 | one of Perl's two open functions. See |
| 987 | \&\*(L"Sockets: Client/Server Communication\*(R" in perlipc for that. Here's an |
| 988 | example. Once you have it, you can use \s-1FH\s0 as a bidirectional filehandle. |
| 989 | .PP |
| 990 | .Vb 2 |
| 991 | \& use IO::Socket; |
| 992 | \& local *FH = IO::Socket::INET->new("www.perl.com:80"); |
| 993 | .Ve |
| 994 | .PP |
| 995 | For opening up a \s-1URL\s0, the \s-1LWP\s0 modules from \s-1CPAN\s0 are just what |
| 996 | the doctor ordered. There's no filehandle interface, but |
| 997 | it's still easy to get the contents of a document: |
| 998 | .PP |
| 999 | .Vb 2 |
| 1000 | \& use LWP::Simple; |
| 1001 | \& $doc = get('http://www.linpro.no/lwp/'); |
| 1002 | .Ve |
| 1003 | .Sh "Binary Files" |
| 1004 | .IX Subsection "Binary Files" |
| 1005 | On certain legacy systems with what could charitably be called terminally |
| 1006 | convoluted (some would say broken) I/O models, a file isn't a file\*(--at |
| 1007 | least, not with respect to the C standard I/O library. On these old |
| 1008 | systems whose libraries (but not kernels) distinguish between text and |
| 1009 | binary streams, to get files to behave properly you'll have to bend over |
| 1010 | backwards to avoid nasty problems. On such infelicitous systems, sockets |
| 1011 | and pipes are already opened in binary mode, and there is currently no |
| 1012 | way to turn that off. With files, you have more options. |
| 1013 | .PP |
| 1014 | Another option is to use the \f(CW\*(C`binmode\*(C'\fR function on the appropriate |
| 1015 | handles before doing regular I/O on them: |
| 1016 | .PP |
| 1017 | .Vb 3 |
| 1018 | \& binmode(STDIN); |
| 1019 | \& binmode(STDOUT); |
| 1020 | \& while (<STDIN>) { print } |
| 1021 | .Ve |
| 1022 | .PP |
| 1023 | Passing \f(CW\*(C`sysopen\*(C'\fR a non-standard flag option will also open the file in |
| 1024 | binary mode on those systems that support it. This is the equivalent of |
| 1025 | opening the file normally, then calling \f(CW\*(C`binmode\*(C'\fR on the handle. |
| 1026 | .PP |
| 1027 | .Vb 2 |
| 1028 | \& sysopen(BINDAT, "records.data", O_RDWR | O_BINARY) |
| 1029 | \& || die "can't open records.data: $!"; |
| 1030 | .Ve |
| 1031 | .PP |
| 1032 | Now you can use \f(CW\*(C`read\*(C'\fR and \f(CW\*(C`print\*(C'\fR on that handle without worrying |
| 1033 | about the non-standard system I/O library breaking your data. It's not |
| 1034 | a pretty picture, but then, legacy systems seldom are. \s-1CP/M\s0 will be |
| 1035 | with us until the end of days, and after. |
| 1036 | .PP |
| 1037 | On systems with exotic I/O systems, it turns out that, astonishingly |
| 1038 | enough, even unbuffered I/O using \f(CW\*(C`sysread\*(C'\fR and \f(CW\*(C`syswrite\*(C'\fR might do |
| 1039 | sneaky data mutilation behind your back. |
| 1040 | .PP |
| 1041 | .Vb 3 |
| 1042 | \& while (sysread(WHENCE, $buf, 1024)) { |
| 1043 | \& syswrite(WHITHER, $buf, length($buf)); |
| 1044 | \& } |
| 1045 | .Ve |
| 1046 | .PP |
| 1047 | Depending on the vicissitudes of your runtime system, even these calls |
| 1048 | may need \f(CW\*(C`binmode\*(C'\fR or \f(CW\*(C`O_BINARY\*(C'\fR first. Systems known to be free of |
| 1049 | such difficulties include Unix, the Mac \s-1OS\s0, Plan 9, and Inferno. |
| 1050 | .Sh "File Locking" |
| 1051 | .IX Subsection "File Locking" |
| 1052 | In a multitasking environment, you may need to be careful not to collide |
| 1053 | with other processes who want to do I/O on the same files as you |
| 1054 | are working on. You'll often need shared or exclusive locks |
| 1055 | on files for reading and writing respectively. You might just |
| 1056 | pretend that only exclusive locks exist. |
| 1057 | .PP |
| 1058 | Never use the existence of a file \f(CW\*(C`\-e $file\*(C'\fR as a locking indication, |
| 1059 | because there is a race condition between the test for the existence of |
| 1060 | the file and its creation. It's possible for another process to create |
| 1061 | a file in the slice of time between your existence check and your attempt |
| 1062 | to create the file. Atomicity is critical. |
| 1063 | .PP |
| 1064 | Perl's most portable locking interface is via the \f(CW\*(C`flock\*(C'\fR function, |
| 1065 | whose simplicity is emulated on systems that don't directly support it |
| 1066 | such as SysV or Windows. The underlying semantics may affect how |
| 1067 | it all works, so you should learn how \f(CW\*(C`flock\*(C'\fR is implemented on your |
| 1068 | system's port of Perl. |
| 1069 | .PP |
| 1070 | File locking \fIdoes not\fR lock out another process that would like to |
| 1071 | do I/O. A file lock only locks out others trying to get a lock, not |
| 1072 | processes trying to do I/O. Because locks are advisory, if one process |
| 1073 | uses locking and another doesn't, all bets are off. |
| 1074 | .PP |
| 1075 | By default, the \f(CW\*(C`flock\*(C'\fR call will block until a lock is granted. |
| 1076 | A request for a shared lock will be granted as soon as there is no |
| 1077 | exclusive locker. A request for an exclusive lock will be granted as |
| 1078 | soon as there is no locker of any kind. Locks are on file descriptors, |
| 1079 | not file names. You can't lock a file until you open it, and you can't |
| 1080 | hold on to a lock once the file has been closed. |
| 1081 | .PP |
| 1082 | Here's how to get a blocking shared lock on a file, typically used |
| 1083 | for reading: |
| 1084 | .PP |
| 1085 | .Vb 5 |
| 1086 | \& use 5.004; |
| 1087 | \& use Fcntl qw(:DEFAULT :flock); |
| 1088 | \& open(FH, "< filename") or die "can't open filename: $!"; |
| 1089 | \& flock(FH, LOCK_SH) or die "can't lock filename: $!"; |
| 1090 | \& # now read from FH |
| 1091 | .Ve |
| 1092 | .PP |
| 1093 | You can get a non-blocking lock by using \f(CW\*(C`LOCK_NB\*(C'\fR. |
| 1094 | .PP |
| 1095 | .Vb 2 |
| 1096 | \& flock(FH, LOCK_SH | LOCK_NB) |
| 1097 | \& or die "can't lock filename: $!"; |
| 1098 | .Ve |
| 1099 | .PP |
| 1100 | This can be useful for producing more user-friendly behaviour by warning |
| 1101 | if you're going to be blocking: |
| 1102 | .PP |
| 1103 | .Vb 10 |
| 1104 | \& use 5.004; |
| 1105 | \& use Fcntl qw(:DEFAULT :flock); |
| 1106 | \& open(FH, "< filename") or die "can't open filename: $!"; |
| 1107 | \& unless (flock(FH, LOCK_SH | LOCK_NB)) { |
| 1108 | \& $| = 1; |
| 1109 | \& print "Waiting for lock..."; |
| 1110 | \& flock(FH, LOCK_SH) or die "can't lock filename: $!"; |
| 1111 | \& print "got it.\en" |
| 1112 | \& } |
| 1113 | \& # now read from FH |
| 1114 | .Ve |
| 1115 | .PP |
| 1116 | To get an exclusive lock, typically used for writing, you have to be |
| 1117 | careful. We \f(CW\*(C`sysopen\*(C'\fR the file so it can be locked before it gets |
| 1118 | emptied. You can get a nonblocking version using \f(CW\*(C`LOCK_EX | LOCK_NB\*(C'\fR. |
| 1119 | .PP |
| 1120 | .Vb 9 |
| 1121 | \& use 5.004; |
| 1122 | \& use Fcntl qw(:DEFAULT :flock); |
| 1123 | \& sysopen(FH, "filename", O_WRONLY | O_CREAT) |
| 1124 | \& or die "can't open filename: $!"; |
| 1125 | \& flock(FH, LOCK_EX) |
| 1126 | \& or die "can't lock filename: $!"; |
| 1127 | \& truncate(FH, 0) |
| 1128 | \& or die "can't truncate filename: $!"; |
| 1129 | \& # now write to FH |
| 1130 | .Ve |
| 1131 | .PP |
| 1132 | Finally, due to the uncounted millions who cannot be dissuaded from |
| 1133 | wasting cycles on useless vanity devices called hit counters, here's |
| 1134 | how to increment a number in a file safely: |
| 1135 | .PP |
| 1136 | .Vb 1 |
| 1137 | \& use Fcntl qw(:DEFAULT :flock); |
| 1138 | .Ve |
| 1139 | .PP |
| 1140 | .Vb 6 |
| 1141 | \& sysopen(FH, "numfile", O_RDWR | O_CREAT) |
| 1142 | \& or die "can't open numfile: $!"; |
| 1143 | \& # autoflush FH |
| 1144 | \& $ofh = select(FH); $| = 1; select ($ofh); |
| 1145 | \& flock(FH, LOCK_EX) |
| 1146 | \& or die "can't write-lock numfile: $!"; |
| 1147 | .Ve |
| 1148 | .PP |
| 1149 | .Vb 5 |
| 1150 | \& $num = <FH> || 0; |
| 1151 | \& seek(FH, 0, 0) |
| 1152 | \& or die "can't rewind numfile : $!"; |
| 1153 | \& print FH $num+1, "\en" |
| 1154 | \& or die "can't write numfile: $!"; |
| 1155 | .Ve |
| 1156 | .PP |
| 1157 | .Vb 4 |
| 1158 | \& truncate(FH, tell(FH)) |
| 1159 | \& or die "can't truncate numfile: $!"; |
| 1160 | \& close(FH) |
| 1161 | \& or die "can't close numfile: $!"; |
| 1162 | .Ve |
| 1163 | .Sh "\s-1IO\s0 Layers" |
| 1164 | .IX Subsection "IO Layers" |
| 1165 | In Perl 5.8.0 a new I/O framework called \*(L"PerlIO\*(R" was introduced. |
| 1166 | This is a new \*(L"plumbing\*(R" for all the I/O happening in Perl; for the |
| 1167 | most part everything will work just as it did, but PerlIO also brought |
| 1168 | in some new features such as the ability to think of I/O as \*(L"layers\*(R". |
| 1169 | One I/O layer may in addition to just moving the data also do |
| 1170 | transformations on the data. Such transformations may include |
| 1171 | compression and decompression, encryption and decryption, and transforming |
| 1172 | between various character encodings. |
| 1173 | .PP |
| 1174 | Full discussion about the features of PerlIO is out of scope for this |
| 1175 | tutorial, but here is how to recognize the layers being used: |
| 1176 | .IP "\(bu" 4 |
| 1177 | The three\-(or more)\-argument form of \f(CW\*(C`open\*(C'\fR is being used and the |
| 1178 | second argument contains something else in addition to the usual |
| 1179 | \&\f(CW'<'\fR, \f(CW'>'\fR, \f(CW'>>'\fR, \f(CW'|'\fR and their variants, |
| 1180 | for example: |
| 1181 | .Sp |
| 1182 | .Vb 1 |
| 1183 | \& open(my $fh, "<:utf8", $fn); |
| 1184 | .Ve |
| 1185 | .IP "\(bu" 4 |
| 1186 | The two-argument form of \f(CW\*(C`binmode\*(C'\fR is being used, for example |
| 1187 | .Sp |
| 1188 | .Vb 1 |
| 1189 | \& binmode($fh, ":encoding(utf16)"); |
| 1190 | .Ve |
| 1191 | .PP |
| 1192 | For more detailed discussion about PerlIO see PerlIO; |
| 1193 | for more detailed discussion about Unicode and I/O see perluniintro. |
| 1194 | .SH "SEE ALSO" |
| 1195 | .IX Header "SEE ALSO" |
| 1196 | The \f(CW\*(C`open\*(C'\fR and \f(CW\*(C`sysopen\*(C'\fR functions in \fIperlfunc\fR\|(1); |
| 1197 | the system \fIopen\fR\|(2), \fIdup\fR\|(2), \fIfopen\fR\|(3), and \fIfdopen\fR\|(3) manpages; |
| 1198 | the \s-1POSIX\s0 documentation. |
| 1199 | .SH "AUTHOR and COPYRIGHT" |
| 1200 | .IX Header "AUTHOR and COPYRIGHT" |
| 1201 | Copyright 1998 Tom Christiansen. |
| 1202 | .PP |
| 1203 | This documentation is free; you can redistribute it and/or modify it |
| 1204 | under the same terms as Perl itself. |
| 1205 | .PP |
| 1206 | Irrespective of its distribution, all code examples in these files are |
| 1207 | hereby placed into the public domain. You are permitted and |
| 1208 | encouraged to use this code in your own programs for fun or for profit |
| 1209 | as you see fit. A simple comment in the code giving credit would be |
| 1210 | courteous but is not required. |
| 1211 | .SH "HISTORY" |
| 1212 | .IX Header "HISTORY" |
| 1213 | First release: Sat Jan 9 08:09:11 \s-1MST\s0 1999 |