Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | .\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLOP 1" | |
132 | .TH PERLOP 1 "2002-06-08" "perl v5.8.0" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlop \- Perl operators and precedence | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | Perl operators have the following associativity and precedence, | |
138 | listed from highest precedence to lowest. Operators borrowed from | |
139 | C keep the same precedence relationship with each other, even where | |
140 | C's precedence is slightly screwy. (This makes learning Perl easier | |
141 | for C folks.) With very few exceptions, these all operate on scalar | |
142 | values only, not array values. | |
143 | .PP | |
144 | .Vb 24 | |
145 | \& left terms and list operators (leftward) | |
146 | \& left -> | |
147 | \& nonassoc ++ -- | |
148 | \& right ** | |
149 | \& right ! ~ \e and unary + and - | |
150 | \& left =~ !~ | |
151 | \& left * / % x | |
152 | \& left + - . | |
153 | \& left << >> | |
154 | \& nonassoc named unary operators | |
155 | \& nonassoc < > <= >= lt gt le ge | |
156 | \& nonassoc == != <=> eq ne cmp | |
157 | \& left & | |
158 | \& left | ^ | |
159 | \& left && | |
160 | \& left || | |
161 | \& nonassoc .. ... | |
162 | \& right ?: | |
163 | \& right = += -= *= etc. | |
164 | \& left , => | |
165 | \& nonassoc list operators (rightward) | |
166 | \& right not | |
167 | \& left and | |
168 | \& left or xor | |
169 | .Ve | |
170 | .PP | |
171 | In the following sections, these operators are covered in precedence order. | |
172 | .PP | |
173 | Many operators can be overloaded for objects. See overload. | |
174 | .SH "DESCRIPTION" | |
175 | .IX Header "DESCRIPTION" | |
176 | .Sh "Terms and List Operators (Leftward)" | |
177 | .IX Subsection "Terms and List Operators (Leftward)" | |
178 | A \s-1TERM\s0 has the highest precedence in Perl. They include variables, | |
179 | quote and quote-like operators, any expression in parentheses, | |
180 | and any function whose arguments are parenthesized. Actually, there | |
181 | aren't really functions in this sense, just list operators and unary | |
182 | operators behaving as functions because you put parentheses around | |
183 | the arguments. These are all documented in perlfunc. | |
184 | .PP | |
185 | If any list operator (\fIprint()\fR, etc.) or any unary operator (\fIchdir()\fR, etc.) | |
186 | is followed by a left parenthesis as the next token, the operator and | |
187 | arguments within parentheses are taken to be of highest precedence, | |
188 | just like a normal function call. | |
189 | .PP | |
190 | In the absence of parentheses, the precedence of list operators such as | |
191 | \&\f(CW\*(C`print\*(C'\fR, \f(CW\*(C`sort\*(C'\fR, or \f(CW\*(C`chmod\*(C'\fR is either very high or very low depending on | |
192 | whether you are looking at the left side or the right side of the operator. | |
193 | For example, in | |
194 | .PP | |
195 | .Vb 2 | |
196 | \& @ary = (1, 3, sort 4, 2); | |
197 | \& print @ary; # prints 1324 | |
198 | .Ve | |
199 | .PP | |
200 | the commas on the right of the sort are evaluated before the sort, | |
201 | but the commas on the left are evaluated after. In other words, | |
202 | list operators tend to gobble up all arguments that follow, and | |
203 | then act like a simple \s-1TERM\s0 with regard to the preceding expression. | |
204 | Be careful with parentheses: | |
205 | .PP | |
206 | .Vb 3 | |
207 | \& # These evaluate exit before doing the print: | |
208 | \& print($foo, exit); # Obviously not what you want. | |
209 | \& print $foo, exit; # Nor is this. | |
210 | .Ve | |
211 | .PP | |
212 | .Vb 4 | |
213 | \& # These do the print before evaluating exit: | |
214 | \& (print $foo), exit; # This is what you want. | |
215 | \& print($foo), exit; # Or this. | |
216 | \& print ($foo), exit; # Or even this. | |
217 | .Ve | |
218 | .PP | |
219 | Also note that | |
220 | .PP | |
221 | .Vb 1 | |
222 | \& print ($foo & 255) + 1, "\en"; | |
223 | .Ve | |
224 | .PP | |
225 | probably doesn't do what you expect at first glance. See | |
226 | \&\*(L"Named Unary Operators\*(R" for more discussion of this. | |
227 | .PP | |
228 | Also parsed as terms are the \f(CW\*(C`do {}\*(C'\fR and \f(CW\*(C`eval {}\*(C'\fR constructs, as | |
229 | well as subroutine and method calls, and the anonymous | |
230 | constructors \f(CW\*(C`[]\*(C'\fR and \f(CW\*(C`{}\*(C'\fR. | |
231 | .PP | |
232 | See also \*(L"Quote and Quote-like Operators\*(R" toward the end of this section, | |
233 | as well as \*(L"I/O Operators\*(R". | |
234 | .Sh "The Arrow Operator" | |
235 | .IX Subsection "The Arrow Operator" | |
236 | "\f(CW\*(C`\->\*(C'\fR" is an infix dereference operator, just as it is in C | |
237 | and \*(C+. If the right side is either a \f(CW\*(C`[...]\*(C'\fR, \f(CW\*(C`{...}\*(C'\fR, or a | |
238 | \&\f(CW\*(C`(...)\*(C'\fR subscript, then the left side must be either a hard or | |
239 | symbolic reference to an array, a hash, or a subroutine respectively. | |
240 | (Or technically speaking, a location capable of holding a hard | |
241 | reference, if it's an array or hash reference being used for | |
242 | assignment.) See perlreftut and perlref. | |
243 | .PP | |
244 | Otherwise, the right side is a method name or a simple scalar | |
245 | variable containing either the method name or a subroutine reference, | |
246 | and the left side must be either an object (a blessed reference) | |
247 | or a class name (that is, a package name). See perlobj. | |
248 | .Sh "Auto-increment and Auto-decrement" | |
249 | .IX Subsection "Auto-increment and Auto-decrement" | |
250 | \&\*(L"++\*(R" and \*(L"\-\-\*(R" work as in C. That is, if placed before a variable, they | |
251 | increment or decrement the variable before returning the value, and if | |
252 | placed after, increment or decrement the variable after returning the value. | |
253 | .PP | |
254 | The auto-increment operator has a little extra builtin magic to it. If | |
255 | you increment a variable that is numeric, or that has ever been used in | |
256 | a numeric context, you get a normal increment. If, however, the | |
257 | variable has been used in only string contexts since it was set, and | |
258 | has a value that is not the empty string and matches the pattern | |
259 | \&\f(CW\*(C`/^[a\-zA\-Z]*[0\-9]*\ez/\*(C'\fR, the increment is done as a string, preserving each | |
260 | character within its range, with carry: | |
261 | .PP | |
262 | .Vb 4 | |
263 | \& print ++($foo = '99'); # prints '100' | |
264 | \& print ++($foo = 'a0'); # prints 'a1' | |
265 | \& print ++($foo = 'Az'); # prints 'Ba' | |
266 | \& print ++($foo = 'zz'); # prints 'aaa' | |
267 | .Ve | |
268 | .PP | |
269 | The auto-decrement operator is not magical. | |
270 | .Sh "Exponentiation" | |
271 | .IX Subsection "Exponentiation" | |
272 | Binary \*(L"**\*(R" is the exponentiation operator. It binds even more | |
273 | tightly than unary minus, so \-2**4 is \-(2**4), not (\-2)**4. (This is | |
274 | implemented using C's \fIpow\fR\|(3) function, which actually works on doubles | |
275 | internally.) | |
276 | .Sh "Symbolic Unary Operators" | |
277 | .IX Subsection "Symbolic Unary Operators" | |
278 | Unary \*(L"!\*(R" performs logical negation, i.e., \*(L"not\*(R". See also \f(CW\*(C`not\*(C'\fR for a lower | |
279 | precedence version of this. | |
280 | .PP | |
281 | Unary \*(L"\-\*(R" performs arithmetic negation if the operand is numeric. If | |
282 | the operand is an identifier, a string consisting of a minus sign | |
283 | concatenated with the identifier is returned. Otherwise, if the string | |
284 | starts with a plus or minus, a string starting with the opposite sign | |
285 | is returned. One effect of these rules is that \f(CW\*(C`\-bareword\*(C'\fR is equivalent | |
286 | to \f(CW"\-bareword"\fR. | |
287 | .PP | |
288 | Unary \*(L"~\*(R" performs bitwise negation, i.e., 1's complement. For | |
289 | example, \f(CW\*(C`0666 & ~027\*(C'\fR is 0640. (See also \*(L"Integer Arithmetic\*(R" and | |
290 | \&\*(L"Bitwise String Operators\*(R".) Note that the width of the result is | |
291 | platform\-dependent: ~0 is 32 bits wide on a 32\-bit platform, but 64 | |
292 | bits wide on a 64\-bit platform, so if you are expecting a certain bit | |
293 | width, remember use the & operator to mask off the excess bits. | |
294 | .PP | |
295 | Unary \*(L"+\*(R" has no effect whatsoever, even on strings. It is useful | |
296 | syntactically for separating a function name from a parenthesized expression | |
297 | that would otherwise be interpreted as the complete list of function | |
298 | arguments. (See examples above under \*(L"Terms and List Operators (Leftward)\*(R".) | |
299 | .PP | |
300 | Unary \*(L"\e\*(R" creates a reference to whatever follows it. See perlreftut | |
301 | and perlref. Do not confuse this behavior with the behavior of | |
302 | backslash within a string, although both forms do convey the notion | |
303 | of protecting the next thing from interpolation. | |
304 | .Sh "Binding Operators" | |
305 | .IX Subsection "Binding Operators" | |
306 | Binary \*(L"=~\*(R" binds a scalar expression to a pattern match. Certain operations | |
307 | search or modify the string \f(CW$_\fR by default. This operator makes that kind | |
308 | of operation work on some other string. The right argument is a search | |
309 | pattern, substitution, or transliteration. The left argument is what is | |
310 | supposed to be searched, substituted, or transliterated instead of the default | |
311 | \&\f(CW$_\fR. When used in scalar context, the return value generally indicates the | |
312 | success of the operation. Behavior in list context depends on the particular | |
313 | operator. See \*(L"Regexp Quote-Like Operators\*(R" for details. | |
314 | .PP | |
315 | If the right argument is an expression rather than a search pattern, | |
316 | substitution, or transliteration, it is interpreted as a search pattern at run | |
317 | time. This can be less efficient than an explicit search, because the | |
318 | pattern must be compiled every time the expression is evaluated. | |
319 | .PP | |
320 | Binary \*(L"!~\*(R" is just like \*(L"=~\*(R" except the return value is negated in | |
321 | the logical sense. | |
322 | .Sh "Multiplicative Operators" | |
323 | .IX Subsection "Multiplicative Operators" | |
324 | Binary \*(L"*\*(R" multiplies two numbers. | |
325 | .PP | |
326 | Binary \*(L"/\*(R" divides two numbers. | |
327 | .PP | |
328 | Binary \*(L"%\*(R" computes the modulus of two numbers. Given integer | |
329 | operands \f(CW$a\fR and \f(CW$b\fR: If \f(CW$b\fR is positive, then \f(CW\*(C`$a % $b\*(C'\fR is | |
330 | \&\f(CW$a\fR minus the largest multiple of \f(CW$b\fR that is not greater than | |
331 | \&\f(CW$a\fR. If \f(CW$b\fR is negative, then \f(CW\*(C`$a % $b\*(C'\fR is \f(CW$a\fR minus the | |
332 | smallest multiple of \f(CW$b\fR that is not less than \f(CW$a\fR (i.e. the | |
333 | result will be less than or equal to zero). | |
334 | Note than when \f(CW\*(C`use integer\*(C'\fR is in scope, \*(L"%\*(R" gives you direct access | |
335 | to the modulus operator as implemented by your C compiler. This | |
336 | operator is not as well defined for negative operands, but it will | |
337 | execute faster. | |
338 | .PP | |
339 | Binary \*(L"x\*(R" is the repetition operator. In scalar context or if the left | |
340 | operand is not enclosed in parentheses, it returns a string consisting | |
341 | of the left operand repeated the number of times specified by the right | |
342 | operand. In list context, if the left operand is enclosed in | |
343 | parentheses, it repeats the list. | |
344 | .PP | |
345 | .Vb 1 | |
346 | \& print '-' x 80; # print row of dashes | |
347 | .Ve | |
348 | .PP | |
349 | .Vb 1 | |
350 | \& print "\et" x ($tab/8), ' ' x ($tab%8); # tab over | |
351 | .Ve | |
352 | .PP | |
353 | .Vb 2 | |
354 | \& @ones = (1) x 80; # a list of 80 1's | |
355 | \& @ones = (5) x @ones; # set all elements to 5 | |
356 | .Ve | |
357 | .Sh "Additive Operators" | |
358 | .IX Subsection "Additive Operators" | |
359 | Binary \*(L"+\*(R" returns the sum of two numbers. | |
360 | .PP | |
361 | Binary \*(L"\-\*(R" returns the difference of two numbers. | |
362 | .PP | |
363 | Binary \*(L".\*(R" concatenates two strings. | |
364 | .Sh "Shift Operators" | |
365 | .IX Subsection "Shift Operators" | |
366 | Binary \*(L"<<\*(R" returns the value of its left argument shifted left by the | |
367 | number of bits specified by the right argument. Arguments should be | |
368 | integers. (See also \*(L"Integer Arithmetic\*(R".) | |
369 | .PP | |
370 | Binary \*(L">>\*(R" returns the value of its left argument shifted right by | |
371 | the number of bits specified by the right argument. Arguments should | |
372 | be integers. (See also \*(L"Integer Arithmetic\*(R".) | |
373 | .PP | |
374 | Note that both \*(L"<<\*(R" and \*(L">>\*(R" in Perl are implemented directly using | |
375 | \&\*(L"<<\*(R" and \*(L">>\*(R" in C. If \f(CW\*(C`use integer\*(C'\fR (see \*(L"Integer Arithmetic\*(R") is | |
376 | in force then signed C integers are used, else unsigned C integers are | |
377 | used. Either way, the implementation isn't going to generate results | |
378 | larger than the size of the integer type Perl was built with (32 bits | |
379 | or 64 bits). | |
380 | .PP | |
381 | The result of overflowing the range of the integers is undefined | |
382 | because it is undefined also in C. In other words, using 32\-bit | |
383 | integers, \f(CW\*(C`1 << 32\*(C'\fR is undefined. Shifting by a negative number | |
384 | of bits is also undefined. | |
385 | .Sh "Named Unary Operators" | |
386 | .IX Subsection "Named Unary Operators" | |
387 | The various named unary operators are treated as functions with one | |
388 | argument, with optional parentheses. These include the filetest | |
389 | operators, like \f(CW\*(C`\-f\*(C'\fR, \f(CW\*(C`\-M\*(C'\fR, etc. See perlfunc. | |
390 | .PP | |
391 | If any list operator (\fIprint()\fR, etc.) or any unary operator (\fIchdir()\fR, etc.) | |
392 | is followed by a left parenthesis as the next token, the operator and | |
393 | arguments within parentheses are taken to be of highest precedence, | |
394 | just like a normal function call. For example, | |
395 | because named unary operators are higher precedence than ||: | |
396 | .PP | |
397 | .Vb 4 | |
398 | \& chdir $foo || die; # (chdir $foo) || die | |
399 | \& chdir($foo) || die; # (chdir $foo) || die | |
400 | \& chdir ($foo) || die; # (chdir $foo) || die | |
401 | \& chdir +($foo) || die; # (chdir $foo) || die | |
402 | .Ve | |
403 | .PP | |
404 | but, because * is higher precedence than named operators: | |
405 | .PP | |
406 | .Vb 4 | |
407 | \& chdir $foo * 20; # chdir ($foo * 20) | |
408 | \& chdir($foo) * 20; # (chdir $foo) * 20 | |
409 | \& chdir ($foo) * 20; # (chdir $foo) * 20 | |
410 | \& chdir +($foo) * 20; # chdir ($foo * 20) | |
411 | .Ve | |
412 | .PP | |
413 | .Vb 4 | |
414 | \& rand 10 * 20; # rand (10 * 20) | |
415 | \& rand(10) * 20; # (rand 10) * 20 | |
416 | \& rand (10) * 20; # (rand 10) * 20 | |
417 | \& rand +(10) * 20; # rand (10 * 20) | |
418 | .Ve | |
419 | .PP | |
420 | See also \*(L"Terms and List Operators (Leftward)\*(R". | |
421 | .Sh "Relational Operators" | |
422 | .IX Subsection "Relational Operators" | |
423 | Binary \*(L"<\*(R" returns true if the left argument is numerically less than | |
424 | the right argument. | |
425 | .PP | |
426 | Binary \*(L">\*(R" returns true if the left argument is numerically greater | |
427 | than the right argument. | |
428 | .PP | |
429 | Binary \*(L"<=\*(R" returns true if the left argument is numerically less than | |
430 | or equal to the right argument. | |
431 | .PP | |
432 | Binary \*(L">=\*(R" returns true if the left argument is numerically greater | |
433 | than or equal to the right argument. | |
434 | .PP | |
435 | Binary \*(L"lt\*(R" returns true if the left argument is stringwise less than | |
436 | the right argument. | |
437 | .PP | |
438 | Binary \*(L"gt\*(R" returns true if the left argument is stringwise greater | |
439 | than the right argument. | |
440 | .PP | |
441 | Binary \*(L"le\*(R" returns true if the left argument is stringwise less than | |
442 | or equal to the right argument. | |
443 | .PP | |
444 | Binary \*(L"ge\*(R" returns true if the left argument is stringwise greater | |
445 | than or equal to the right argument. | |
446 | .Sh "Equality Operators" | |
447 | .IX Subsection "Equality Operators" | |
448 | Binary \*(L"==\*(R" returns true if the left argument is numerically equal to | |
449 | the right argument. | |
450 | .PP | |
451 | Binary \*(L"!=\*(R" returns true if the left argument is numerically not equal | |
452 | to the right argument. | |
453 | .PP | |
454 | Binary \*(L"<=>\*(R" returns \-1, 0, or 1 depending on whether the left | |
455 | argument is numerically less than, equal to, or greater than the right | |
456 | argument. If your platform supports NaNs (not\-a\-numbers) as numeric | |
457 | values, using them with \*(L"<=>\*(R" returns undef. NaN is not \*(L"<\*(R", \*(L"==\*(R", \*(L">\*(R", | |
458 | \&\*(L"<=\*(R" or \*(L">=\*(R" anything (even NaN), so those 5 return false. NaN != NaN | |
459 | returns true, as does NaN != anything else. If your platform doesn't | |
460 | support NaNs then NaN is just a string with numeric value 0. | |
461 | .PP | |
462 | .Vb 2 | |
463 | \& perl -le '$a = NaN; print "No NaN support here" if $a == $a' | |
464 | \& perl -le '$a = NaN; print "NaN support here" if $a != $a' | |
465 | .Ve | |
466 | .PP | |
467 | Binary \*(L"eq\*(R" returns true if the left argument is stringwise equal to | |
468 | the right argument. | |
469 | .PP | |
470 | Binary \*(L"ne\*(R" returns true if the left argument is stringwise not equal | |
471 | to the right argument. | |
472 | .PP | |
473 | Binary \*(L"cmp\*(R" returns \-1, 0, or 1 depending on whether the left | |
474 | argument is stringwise less than, equal to, or greater than the right | |
475 | argument. | |
476 | .PP | |
477 | \&\*(L"lt\*(R", \*(L"le\*(R", \*(L"ge\*(R", \*(L"gt\*(R" and \*(L"cmp\*(R" use the collation (sort) order specified | |
478 | by the current locale if \f(CW\*(C`use locale\*(C'\fR is in effect. See perllocale. | |
479 | .Sh "Bitwise And" | |
480 | .IX Subsection "Bitwise And" | |
481 | Binary \*(L"&\*(R" returns its operators ANDed together bit by bit. | |
482 | (See also \*(L"Integer Arithmetic\*(R" and \*(L"Bitwise String Operators\*(R".) | |
483 | .Sh "Bitwise Or and Exclusive Or" | |
484 | .IX Subsection "Bitwise Or and Exclusive Or" | |
485 | Binary \*(L"|\*(R" returns its operators ORed together bit by bit. | |
486 | (See also \*(L"Integer Arithmetic\*(R" and \*(L"Bitwise String Operators\*(R".) | |
487 | .PP | |
488 | Binary \*(L"^\*(R" returns its operators XORed together bit by bit. | |
489 | (See also \*(L"Integer Arithmetic\*(R" and \*(L"Bitwise String Operators\*(R".) | |
490 | .Sh "C\-style Logical And" | |
491 | .IX Subsection "C-style Logical And" | |
492 | Binary \*(L"&&\*(R" performs a short-circuit logical \s-1AND\s0 operation. That is, | |
493 | if the left operand is false, the right operand is not even evaluated. | |
494 | Scalar or list context propagates down to the right operand if it | |
495 | is evaluated. | |
496 | .Sh "C\-style Logical Or" | |
497 | .IX Subsection "C-style Logical Or" | |
498 | Binary \*(L"||\*(R" performs a short-circuit logical \s-1OR\s0 operation. That is, | |
499 | if the left operand is true, the right operand is not even evaluated. | |
500 | Scalar or list context propagates down to the right operand if it | |
501 | is evaluated. | |
502 | .PP | |
503 | The \f(CW\*(C`||\*(C'\fR and \f(CW\*(C`&&\*(C'\fR operators differ from C's in that, rather than returning | |
504 | 0 or 1, they return the last value evaluated. Thus, a reasonably portable | |
505 | way to find out the home directory (assuming it's not \*(L"0\*(R") might be: | |
506 | .PP | |
507 | .Vb 2 | |
508 | \& $home = $ENV{'HOME'} || $ENV{'LOGDIR'} || | |
509 | \& (getpwuid($<))[7] || die "You're homeless!\en"; | |
510 | .Ve | |
511 | .PP | |
512 | In particular, this means that you shouldn't use this | |
513 | for selecting between two aggregates for assignment: | |
514 | .PP | |
515 | .Vb 3 | |
516 | \& @a = @b || @c; # this is wrong | |
517 | \& @a = scalar(@b) || @c; # really meant this | |
518 | \& @a = @b ? @b : @c; # this works fine, though | |
519 | .Ve | |
520 | .PP | |
521 | As more readable alternatives to \f(CW\*(C`&&\*(C'\fR and \f(CW\*(C`||\*(C'\fR when used for | |
522 | control flow, Perl provides \f(CW\*(C`and\*(C'\fR and \f(CW\*(C`or\*(C'\fR operators (see below). | |
523 | The short-circuit behavior is identical. The precedence of \*(L"and\*(R" and | |
524 | \&\*(L"or\*(R" is much lower, however, so that you can safely use them after a | |
525 | list operator without the need for parentheses: | |
526 | .PP | |
527 | .Vb 2 | |
528 | \& unlink "alpha", "beta", "gamma" | |
529 | \& or gripe(), next LINE; | |
530 | .Ve | |
531 | .PP | |
532 | With the C\-style operators that would have been written like this: | |
533 | .PP | |
534 | .Vb 2 | |
535 | \& unlink("alpha", "beta", "gamma") | |
536 | \& || (gripe(), next LINE); | |
537 | .Ve | |
538 | .PP | |
539 | Using \*(L"or\*(R" for assignment is unlikely to do what you want; see below. | |
540 | .Sh "Range Operators" | |
541 | .IX Subsection "Range Operators" | |
542 | Binary \*(L"..\*(R" is the range operator, which is really two different | |
543 | operators depending on the context. In list context, it returns an | |
544 | list of values counting (up by ones) from the left value to the right | |
545 | value. If the left value is greater than the right value then it | |
546 | returns the empty array. The range operator is useful for writing | |
547 | \&\f(CW\*(C`foreach (1..10)\*(C'\fR loops and for doing slice operations on arrays. In | |
548 | the current implementation, no temporary array is created when the | |
549 | range operator is used as the expression in \f(CW\*(C`foreach\*(C'\fR loops, but older | |
550 | versions of Perl might burn a lot of memory when you write something | |
551 | like this: | |
552 | .PP | |
553 | .Vb 3 | |
554 | \& for (1 .. 1_000_000) { | |
555 | \& # code | |
556 | \& } | |
557 | .Ve | |
558 | .PP | |
559 | The range operator also works on strings, using the magical auto\-increment, | |
560 | see below. | |
561 | .PP | |
562 | In scalar context, \*(L"..\*(R" returns a boolean value. The operator is | |
563 | bistable, like a flip\-flop, and emulates the line-range (comma) operator | |
564 | of \fBsed\fR, \fBawk\fR, and various editors. Each \*(L"..\*(R" operator maintains its | |
565 | own boolean state. It is false as long as its left operand is false. | |
566 | Once the left operand is true, the range operator stays true until the | |
567 | right operand is true, \fI\s-1AFTER\s0\fR which the range operator becomes false | |
568 | again. It doesn't become false till the next time the range operator is | |
569 | evaluated. It can test the right operand and become false on the same | |
570 | evaluation it became true (as in \fBawk\fR), but it still returns true once. | |
571 | If you don't want it to test the right operand till the next | |
572 | evaluation, as in \fBsed\fR, just use three dots (\*(L"...\*(R") instead of | |
573 | two. In all other regards, \*(L"...\*(R" behaves just like \*(L"..\*(R" does. | |
574 | .PP | |
575 | The right operand is not evaluated while the operator is in the | |
576 | \&\*(L"false\*(R" state, and the left operand is not evaluated while the | |
577 | operator is in the \*(L"true\*(R" state. The precedence is a little lower | |
578 | than || and &&. The value returned is either the empty string for | |
579 | false, or a sequence number (beginning with 1) for true. The | |
580 | sequence number is reset for each range encountered. The final | |
581 | sequence number in a range has the string \*(L"E0\*(R" appended to it, which | |
582 | doesn't affect its numeric value, but gives you something to search | |
583 | for if you want to exclude the endpoint. You can exclude the | |
584 | beginning point by waiting for the sequence number to be greater | |
585 | than 1. If either operand of scalar \*(L"..\*(R" is a constant expression, | |
586 | that operand is implicitly compared to the \f(CW$.\fR variable, the | |
587 | current line number. Examples: | |
588 | .PP | |
589 | As a scalar operator: | |
590 | .PP | |
591 | .Vb 3 | |
592 | \& if (101 .. 200) { print; } # print 2nd hundred lines | |
593 | \& next line if (1 .. /^$/); # skip header lines | |
594 | \& s/^/> / if (/^$/ .. eof()); # quote body | |
595 | .Ve | |
596 | .PP | |
597 | .Vb 8 | |
598 | \& # parse mail messages | |
599 | \& while (<>) { | |
600 | \& $in_header = 1 .. /^$/; | |
601 | \& $in_body = /^$/ .. eof(); | |
602 | \& # do something based on those | |
603 | \& } continue { | |
604 | \& close ARGV if eof; # reset $. each file | |
605 | \& } | |
606 | .Ve | |
607 | .PP | |
608 | As a list operator: | |
609 | .PP | |
610 | .Vb 3 | |
611 | \& for (101 .. 200) { print; } # print $_ 100 times | |
612 | \& @foo = @foo[0 .. $#foo]; # an expensive no-op | |
613 | \& @foo = @foo[$#foo-4 .. $#foo]; # slice last 5 items | |
614 | .Ve | |
615 | .PP | |
616 | The range operator (in list context) makes use of the magical | |
617 | auto-increment algorithm if the operands are strings. You | |
618 | can say | |
619 | .PP | |
620 | .Vb 1 | |
621 | \& @alphabet = ('A' .. 'Z'); | |
622 | .Ve | |
623 | .PP | |
624 | to get all normal letters of the English alphabet, or | |
625 | .PP | |
626 | .Vb 1 | |
627 | \& $hexdigit = (0 .. 9, 'a' .. 'f')[$num & 15]; | |
628 | .Ve | |
629 | .PP | |
630 | to get a hexadecimal digit, or | |
631 | .PP | |
632 | .Vb 1 | |
633 | \& @z2 = ('01' .. '31'); print $z2[$mday]; | |
634 | .Ve | |
635 | .PP | |
636 | to get dates with leading zeros. If the final value specified is not | |
637 | in the sequence that the magical increment would produce, the sequence | |
638 | goes until the next value would be longer than the final value | |
639 | specified. | |
640 | .Sh "Conditional Operator" | |
641 | .IX Subsection "Conditional Operator" | |
642 | Ternary \*(L"?:\*(R" is the conditional operator, just as in C. It works much | |
643 | like an if\-then\-else. If the argument before the ? is true, the | |
644 | argument before the : is returned, otherwise the argument after the : | |
645 | is returned. For example: | |
646 | .PP | |
647 | .Vb 2 | |
648 | \& printf "I have %d dog%s.\en", $n, | |
649 | \& ($n == 1) ? '' : "s"; | |
650 | .Ve | |
651 | .PP | |
652 | Scalar or list context propagates downward into the 2nd | |
653 | or 3rd argument, whichever is selected. | |
654 | .PP | |
655 | .Vb 3 | |
656 | \& $a = $ok ? $b : $c; # get a scalar | |
657 | \& @a = $ok ? @b : @c; # get an array | |
658 | \& $a = $ok ? @b : @c; # oops, that's just a count! | |
659 | .Ve | |
660 | .PP | |
661 | The operator may be assigned to if both the 2nd and 3rd arguments are | |
662 | legal lvalues (meaning that you can assign to them): | |
663 | .PP | |
664 | .Vb 1 | |
665 | \& ($a_or_b ? $a : $b) = $c; | |
666 | .Ve | |
667 | .PP | |
668 | Because this operator produces an assignable result, using assignments | |
669 | without parentheses will get you in trouble. For example, this: | |
670 | .PP | |
671 | .Vb 1 | |
672 | \& $a % 2 ? $a += 10 : $a += 2 | |
673 | .Ve | |
674 | .PP | |
675 | Really means this: | |
676 | .PP | |
677 | .Vb 1 | |
678 | \& (($a % 2) ? ($a += 10) : $a) += 2 | |
679 | .Ve | |
680 | .PP | |
681 | Rather than this: | |
682 | .PP | |
683 | .Vb 1 | |
684 | \& ($a % 2) ? ($a += 10) : ($a += 2) | |
685 | .Ve | |
686 | .PP | |
687 | That should probably be written more simply as: | |
688 | .PP | |
689 | .Vb 1 | |
690 | \& $a += ($a % 2) ? 10 : 2; | |
691 | .Ve | |
692 | .Sh "Assignment Operators" | |
693 | .IX Subsection "Assignment Operators" | |
694 | \&\*(L"=\*(R" is the ordinary assignment operator. | |
695 | .PP | |
696 | Assignment operators work as in C. That is, | |
697 | .PP | |
698 | .Vb 1 | |
699 | \& $a += 2; | |
700 | .Ve | |
701 | .PP | |
702 | is equivalent to | |
703 | .PP | |
704 | .Vb 1 | |
705 | \& $a = $a + 2; | |
706 | .Ve | |
707 | .PP | |
708 | although without duplicating any side effects that dereferencing the lvalue | |
709 | might trigger, such as from \fItie()\fR. Other assignment operators work similarly. | |
710 | The following are recognized: | |
711 | .PP | |
712 | .Vb 4 | |
713 | \& **= += *= &= <<= &&= | |
714 | \& -= /= |= >>= ||= | |
715 | \& .= %= ^= | |
716 | \& x= | |
717 | .Ve | |
718 | .PP | |
719 | Although these are grouped by family, they all have the precedence | |
720 | of assignment. | |
721 | .PP | |
722 | Unlike in C, the scalar assignment operator produces a valid lvalue. | |
723 | Modifying an assignment is equivalent to doing the assignment and | |
724 | then modifying the variable that was assigned to. This is useful | |
725 | for modifying a copy of something, like this: | |
726 | .PP | |
727 | .Vb 1 | |
728 | \& ($tmp = $global) =~ tr [A-Z] [a-z]; | |
729 | .Ve | |
730 | .PP | |
731 | Likewise, | |
732 | .PP | |
733 | .Vb 1 | |
734 | \& ($a += 2) *= 3; | |
735 | .Ve | |
736 | .PP | |
737 | is equivalent to | |
738 | .PP | |
739 | .Vb 2 | |
740 | \& $a += 2; | |
741 | \& $a *= 3; | |
742 | .Ve | |
743 | .PP | |
744 | Similarly, a list assignment in list context produces the list of | |
745 | lvalues assigned to, and a list assignment in scalar context returns | |
746 | the number of elements produced by the expression on the right hand | |
747 | side of the assignment. | |
748 | .Sh "Comma Operator" | |
749 | .IX Subsection "Comma Operator" | |
750 | Binary \*(L",\*(R" is the comma operator. In scalar context it evaluates | |
751 | its left argument, throws that value away, then evaluates its right | |
752 | argument and returns that value. This is just like C's comma operator. | |
753 | .PP | |
754 | In list context, it's just the list argument separator, and inserts | |
755 | both its arguments into the list. | |
756 | .PP | |
757 | The => digraph is mostly just a synonym for the comma operator. It's useful for | |
758 | documenting arguments that come in pairs. As of release 5.001, it also forces | |
759 | any word to the left of it to be interpreted as a string. | |
760 | .Sh "List Operators (Rightward)" | |
761 | .IX Subsection "List Operators (Rightward)" | |
762 | On the right side of a list operator, it has very low precedence, | |
763 | such that it controls all comma-separated expressions found there. | |
764 | The only operators with lower precedence are the logical operators | |
765 | \&\*(L"and\*(R", \*(L"or\*(R", and \*(L"not\*(R", which may be used to evaluate calls to list | |
766 | operators without the need for extra parentheses: | |
767 | .PP | |
768 | .Vb 2 | |
769 | \& open HANDLE, "filename" | |
770 | \& or die "Can't open: $!\en"; | |
771 | .Ve | |
772 | .PP | |
773 | See also discussion of list operators in \*(L"Terms and List Operators (Leftward)\*(R". | |
774 | .Sh "Logical Not" | |
775 | .IX Subsection "Logical Not" | |
776 | Unary \*(L"not\*(R" returns the logical negation of the expression to its right. | |
777 | It's the equivalent of \*(L"!\*(R" except for the very low precedence. | |
778 | .Sh "Logical And" | |
779 | .IX Subsection "Logical And" | |
780 | Binary \*(L"and\*(R" returns the logical conjunction of the two surrounding | |
781 | expressions. It's equivalent to && except for the very low | |
782 | precedence. This means that it short\-circuits: i.e., the right | |
783 | expression is evaluated only if the left expression is true. | |
784 | .Sh "Logical or and Exclusive Or" | |
785 | .IX Subsection "Logical or and Exclusive Or" | |
786 | Binary \*(L"or\*(R" returns the logical disjunction of the two surrounding | |
787 | expressions. It's equivalent to || except for the very low precedence. | |
788 | This makes it useful for control flow | |
789 | .PP | |
790 | .Vb 1 | |
791 | \& print FH $data or die "Can't write to FH: $!"; | |
792 | .Ve | |
793 | .PP | |
794 | This means that it short\-circuits: i.e., the right expression is evaluated | |
795 | only if the left expression is false. Due to its precedence, you should | |
796 | probably avoid using this for assignment, only for control flow. | |
797 | .PP | |
798 | .Vb 3 | |
799 | \& $a = $b or $c; # bug: this is wrong | |
800 | \& ($a = $b) or $c; # really means this | |
801 | \& $a = $b || $c; # better written this way | |
802 | .Ve | |
803 | .PP | |
804 | However, when it's a list-context assignment and you're trying to use | |
805 | \&\*(L"||\*(R" for control flow, you probably need \*(L"or\*(R" so that the assignment | |
806 | takes higher precedence. | |
807 | .PP | |
808 | .Vb 2 | |
809 | \& @info = stat($file) || die; # oops, scalar sense of stat! | |
810 | \& @info = stat($file) or die; # better, now @info gets its due | |
811 | .Ve | |
812 | .PP | |
813 | Then again, you could always use parentheses. | |
814 | .PP | |
815 | Binary \*(L"xor\*(R" returns the exclusive-OR of the two surrounding expressions. | |
816 | It cannot short circuit, of course. | |
817 | .Sh "C Operators Missing From Perl" | |
818 | .IX Subsection "C Operators Missing From Perl" | |
819 | Here is what C has that Perl doesn't: | |
820 | .IP "unary &" 8 | |
821 | .IX Item "unary &" | |
822 | Address-of operator. (But see the \*(L"\e\*(R" operator for taking a reference.) | |
823 | .IP "unary *" 8 | |
824 | .IX Item "unary *" | |
825 | Dereference-address operator. (Perl's prefix dereferencing | |
826 | operators are typed: $, @, %, and &.) | |
827 | .IP "(\s-1TYPE\s0)" 8 | |
828 | .IX Item "(TYPE)" | |
829 | Type-casting operator. | |
830 | .Sh "Quote and Quote-like Operators" | |
831 | .IX Subsection "Quote and Quote-like Operators" | |
832 | While we usually think of quotes as literal values, in Perl they | |
833 | function as operators, providing various kinds of interpolating and | |
834 | pattern matching capabilities. Perl provides customary quote characters | |
835 | for these behaviors, but also provides a way for you to choose your | |
836 | quote character for any of them. In the following table, a \f(CW\*(C`{}\*(C'\fR represents | |
837 | any pair of delimiters you choose. | |
838 | .PP | |
839 | .Vb 10 | |
840 | \& Customary Generic Meaning Interpolates | |
841 | \& '' q{} Literal no | |
842 | \& "" qq{} Literal yes | |
843 | \& `` qx{} Command yes* | |
844 | \& qw{} Word list no | |
845 | \& // m{} Pattern match yes* | |
846 | \& qr{} Pattern yes* | |
847 | \& s{}{} Substitution yes* | |
848 | \& tr{}{} Transliteration no (but see below) | |
849 | \& <<EOF here-doc yes* | |
850 | .Ve | |
851 | .PP | |
852 | .Vb 1 | |
853 | \& * unless the delimiter is ''. | |
854 | .Ve | |
855 | .PP | |
856 | Non-bracketing delimiters use the same character fore and aft, but the four | |
857 | sorts of brackets (round, angle, square, curly) will all nest, which means | |
858 | that | |
859 | .PP | |
860 | .Vb 1 | |
861 | \& q{foo{bar}baz} | |
862 | .Ve | |
863 | .PP | |
864 | is the same as | |
865 | .PP | |
866 | .Vb 1 | |
867 | \& 'foo{bar}baz' | |
868 | .Ve | |
869 | .PP | |
870 | Note, however, that this does not always work for quoting Perl code: | |
871 | .PP | |
872 | .Vb 1 | |
873 | \& $s = q{ if($a eq "}") ... }; # WRONG | |
874 | .Ve | |
875 | .PP | |
876 | is a syntax error. The \f(CW\*(C`Text::Balanced\*(C'\fR module (from \s-1CPAN\s0, and | |
877 | starting from Perl 5.8 part of the standard distribution) is able | |
878 | to do this properly. | |
879 | .PP | |
880 | There can be whitespace between the operator and the quoting | |
881 | characters, except when \f(CW\*(C`#\*(C'\fR is being used as the quoting character. | |
882 | \&\f(CW\*(C`q#foo#\*(C'\fR is parsed as the string \f(CW\*(C`foo\*(C'\fR, while \f(CW\*(C`q #foo#\*(C'\fR is the | |
883 | operator \f(CW\*(C`q\*(C'\fR followed by a comment. Its argument will be taken | |
884 | from the next line. This allows you to write: | |
885 | .PP | |
886 | .Vb 2 | |
887 | \& s {foo} # Replace foo | |
888 | \& {bar} # with bar. | |
889 | .Ve | |
890 | .PP | |
891 | The following escape sequences are available in constructs that interpolate | |
892 | and in transliterations. | |
893 | .PP | |
894 | .Vb 12 | |
895 | \& \et tab (HT, TAB) | |
896 | \& \en newline (NL) | |
897 | \& \er return (CR) | |
898 | \& \ef form feed (FF) | |
899 | \& \eb backspace (BS) | |
900 | \& \ea alarm (bell) (BEL) | |
901 | \& \ee escape (ESC) | |
902 | \& \e033 octal char (ESC) | |
903 | \& \ex1b hex char (ESC) | |
904 | \& \ex{263a} wide hex char (SMILEY) | |
905 | \& \ec[ control char (ESC) | |
906 | \& \eN{name} named Unicode character | |
907 | .Ve | |
908 | .PP | |
909 | The following escape sequences are available in constructs that interpolate | |
910 | but not in transliterations. | |
911 | .PP | |
912 | .Vb 6 | |
913 | \& \el lowercase next char | |
914 | \& \eu uppercase next char | |
915 | \& \eL lowercase till \eE | |
916 | \& \eU uppercase till \eE | |
917 | \& \eE end case modification | |
918 | \& \eQ quote non-word characters till \eE | |
919 | .Ve | |
920 | .PP | |
921 | If \f(CW\*(C`use locale\*(C'\fR is in effect, the case map used by \f(CW\*(C`\el\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, | |
922 | \&\f(CW\*(C`\eu\*(C'\fR and \f(CW\*(C`\eU\*(C'\fR is taken from the current locale. See perllocale. | |
923 | If Unicode (for example, \f(CW\*(C`\eN{}\*(C'\fR or wide hex characters of 0x100 or | |
924 | beyond) is being used, the case map used by \f(CW\*(C`\el\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR and | |
925 | \&\f(CW\*(C`\eU\*(C'\fR is as defined by Unicode. For documentation of \f(CW\*(C`\eN{name}\*(C'\fR, | |
926 | see charnames. | |
927 | .PP | |
928 | All systems use the virtual \f(CW"\en"\fR to represent a line terminator, | |
929 | called a \*(L"newline\*(R". There is no such thing as an unvarying, physical | |
930 | newline character. It is only an illusion that the operating system, | |
931 | device drivers, C libraries, and Perl all conspire to preserve. Not all | |
932 | systems read \f(CW"\er"\fR as \s-1ASCII\s0 \s-1CR\s0 and \f(CW"\en"\fR as \s-1ASCII\s0 \s-1LF\s0. For example, | |
933 | on a Mac, these are reversed, and on systems without line terminator, | |
934 | printing \f(CW"\en"\fR may emit no actual data. In general, use \f(CW"\en"\fR when | |
935 | you mean a \*(L"newline\*(R" for your system, but use the literal \s-1ASCII\s0 when you | |
936 | need an exact character. For example, most networking protocols expect | |
937 | and prefer a \s-1CR+LF\s0 (\f(CW"\e015\e012"\fR or \f(CW"\ecM\ecJ"\fR) for line terminators, | |
938 | and although they often accept just \f(CW"\e012"\fR, they seldom tolerate just | |
939 | \&\f(CW"\e015"\fR. If you get in the habit of using \f(CW"\en"\fR for networking, | |
940 | you may be burned some day. | |
941 | .PP | |
942 | For constructs that do interpolate, variables beginning with "\f(CW\*(C`$\*(C'\fR\*(L" | |
943 | or \*(R"\f(CW\*(C`@\*(C'\fR" are interpolated. Subscripted variables such as \f(CW$a[3]\fR or | |
944 | \&\f(CW\*(C`$href\-\*(C'\fR{key}[0]> are also interpolated, as are array and hash slices. | |
945 | But method calls such as \f(CW\*(C`$obj\-\*(C'\fRmeth> are not. | |
946 | .PP | |
947 | Interpolating an array or slice interpolates the elements in order, | |
948 | separated by the value of \f(CW$"\fR, so is equivalent to interpolating | |
949 | \&\f(CW\*(C`join $", @array\*(C'\fR. \*(L"Punctuation\*(R" arrays such as \f(CW\*(C`@+\*(C'\fR are only | |
950 | interpolated if the name is enclosed in braces \f(CW\*(C`@{+}\*(C'\fR. | |
951 | .PP | |
952 | You cannot include a literal \f(CW\*(C`$\*(C'\fR or \f(CW\*(C`@\*(C'\fR within a \f(CW\*(C`\eQ\*(C'\fR sequence. | |
953 | An unescaped \f(CW\*(C`$\*(C'\fR or \f(CW\*(C`@\*(C'\fR interpolates the corresponding variable, | |
954 | while escaping will cause the literal string \f(CW\*(C`\e$\*(C'\fR to be inserted. | |
955 | You'll need to write something like \f(CW\*(C`m/\eQuser\eE\e@\eQhost/\*(C'\fR. | |
956 | .PP | |
957 | Patterns are subject to an additional level of interpretation as a | |
958 | regular expression. This is done as a second pass, after variables are | |
959 | interpolated, so that regular expressions may be incorporated into the | |
960 | pattern from the variables. If this is not what you want, use \f(CW\*(C`\eQ\*(C'\fR to | |
961 | interpolate a variable literally. | |
962 | .PP | |
963 | Apart from the behavior described above, Perl does not expand | |
964 | multiple levels of interpolation. In particular, contrary to the | |
965 | expectations of shell programmers, back-quotes do \fI\s-1NOT\s0\fR interpolate | |
966 | within double quotes, nor do single quotes impede evaluation of | |
967 | variables when used within double quotes. | |
968 | .Sh "Regexp Quote-Like Operators" | |
969 | .IX Subsection "Regexp Quote-Like Operators" | |
970 | Here are the quote-like operators that apply to pattern | |
971 | matching and related activities. | |
972 | .IP "?PATTERN?" 8 | |
973 | .IX Item "?PATTERN?" | |
974 | This is just like the \f(CW\*(C`/pattern/\*(C'\fR search, except that it matches only | |
975 | once between calls to the \fIreset()\fR operator. This is a useful | |
976 | optimization when you want to see only the first occurrence of | |
977 | something in each file of a set of files, for instance. Only \f(CW\*(C`??\*(C'\fR | |
978 | patterns local to the current package are reset. | |
979 | .Sp | |
980 | .Vb 7 | |
981 | \& while (<>) { | |
982 | \& if (?^$?) { | |
983 | \& # blank line between header and body | |
984 | \& } | |
985 | \& } continue { | |
986 | \& reset if eof; # clear ?? status for next file | |
987 | \& } | |
988 | .Ve | |
989 | .Sp | |
990 | This usage is vaguely deprecated, which means it just might possibly | |
991 | be removed in some distant future version of Perl, perhaps somewhere | |
992 | around the year 2168. | |
993 | .IP "m/PATTERN/cgimosx" 8 | |
994 | .IX Item "m/PATTERN/cgimosx" | |
995 | .PD 0 | |
996 | .IP "/PATTERN/cgimosx" 8 | |
997 | .IX Item "/PATTERN/cgimosx" | |
998 | .PD | |
999 | Searches a string for a pattern match, and in scalar context returns | |
1000 | true if it succeeds, false if it fails. If no string is specified | |
1001 | via the \f(CW\*(C`=~\*(C'\fR or \f(CW\*(C`!~\*(C'\fR operator, the \f(CW$_\fR string is searched. (The | |
1002 | string specified with \f(CW\*(C`=~\*(C'\fR need not be an lvalue\*(--it may be the | |
1003 | result of an expression evaluation, but remember the \f(CW\*(C`=~\*(C'\fR binds | |
1004 | rather tightly.) See also perlre. See perllocale for | |
1005 | discussion of additional considerations that apply when \f(CW\*(C`use locale\*(C'\fR | |
1006 | is in effect. | |
1007 | .Sp | |
1008 | Options are: | |
1009 | .Sp | |
1010 | .Vb 7 | |
1011 | \& c Do not reset search position on a failed match when /g is in effect. | |
1012 | \& g Match globally, i.e., find all occurrences. | |
1013 | \& i Do case-insensitive pattern matching. | |
1014 | \& m Treat string as multiple lines. | |
1015 | \& o Compile pattern only once. | |
1016 | \& s Treat string as single line. | |
1017 | \& x Use extended regular expressions. | |
1018 | .Ve | |
1019 | .Sp | |
1020 | If \*(L"/\*(R" is the delimiter then the initial \f(CW\*(C`m\*(C'\fR is optional. With the \f(CW\*(C`m\*(C'\fR | |
1021 | you can use any pair of non\-alphanumeric, non-whitespace characters | |
1022 | as delimiters. This is particularly useful for matching path names | |
1023 | that contain \*(L"/\*(R", to avoid \s-1LTS\s0 (leaning toothpick syndrome). If \*(L"?\*(R" is | |
1024 | the delimiter, then the match-only-once rule of \f(CW\*(C`?PATTERN?\*(C'\fR applies. | |
1025 | If \*(L"'\*(R" is the delimiter, no interpolation is performed on the \s-1PATTERN\s0. | |
1026 | .Sp | |
1027 | \&\s-1PATTERN\s0 may contain variables, which will be interpolated (and the | |
1028 | pattern recompiled) every time the pattern search is evaluated, except | |
1029 | for when the delimiter is a single quote. (Note that \f(CW$(\fR, \f(CW$)\fR, and | |
1030 | \&\f(CW$|\fR are not interpolated because they look like end-of-string tests.) | |
1031 | If you want such a pattern to be compiled only once, add a \f(CW\*(C`/o\*(C'\fR after | |
1032 | the trailing delimiter. This avoids expensive run-time recompilations, | |
1033 | and is useful when the value you are interpolating won't change over | |
1034 | the life of the script. However, mentioning \f(CW\*(C`/o\*(C'\fR constitutes a promise | |
1035 | that you won't change the variables in the pattern. If you change them, | |
1036 | Perl won't even notice. See also \*(L"qr/STRING/imosx\*(R". | |
1037 | .Sp | |
1038 | If the \s-1PATTERN\s0 evaluates to the empty string, the last | |
1039 | \&\fIsuccessfully\fR matched regular expression is used instead. In this | |
1040 | case, only the \f(CW\*(C`g\*(C'\fR and \f(CW\*(C`c\*(C'\fR flags on the empty pattern is honoured \- | |
1041 | the other flags are taken from the original pattern. If no match has | |
1042 | previously succeeded, this will (silently) act instead as a genuine | |
1043 | empty pattern (which will always match). | |
1044 | .Sp | |
1045 | If the \f(CW\*(C`/g\*(C'\fR option is not used, \f(CW\*(C`m//\*(C'\fR in list context returns a | |
1046 | list consisting of the subexpressions matched by the parentheses in the | |
1047 | pattern, i.e., (\f(CW$1\fR, \f(CW$2\fR, \f(CW$3\fR...). (Note that here \f(CW$1\fR etc. are | |
1048 | also set, and that this differs from Perl 4's behavior.) When there are | |
1049 | no parentheses in the pattern, the return value is the list \f(CW\*(C`(1)\*(C'\fR for | |
1050 | success. With or without parentheses, an empty list is returned upon | |
1051 | failure. | |
1052 | .Sp | |
1053 | Examples: | |
1054 | .Sp | |
1055 | .Vb 2 | |
1056 | \& open(TTY, '/dev/tty'); | |
1057 | \& <TTY> =~ /^y/i && foo(); # do foo if desired | |
1058 | .Ve | |
1059 | .Sp | |
1060 | .Vb 1 | |
1061 | \& if (/Version: *([0-9.]*)/) { $version = $1; } | |
1062 | .Ve | |
1063 | .Sp | |
1064 | .Vb 1 | |
1065 | \& next if m#^/usr/spool/uucp#; | |
1066 | .Ve | |
1067 | .Sp | |
1068 | .Vb 5 | |
1069 | \& # poor man's grep | |
1070 | \& $arg = shift; | |
1071 | \& while (<>) { | |
1072 | \& print if /$arg/o; # compile only once | |
1073 | \& } | |
1074 | .Ve | |
1075 | .Sp | |
1076 | .Vb 1 | |
1077 | \& if (($F1, $F2, $Etc) = ($foo =~ /^(\eS+)\es+(\eS+)\es*(.*)/)) | |
1078 | .Ve | |
1079 | .Sp | |
1080 | This last example splits \f(CW$foo\fR into the first two words and the | |
1081 | remainder of the line, and assigns those three fields to \f(CW$F1\fR, \f(CW$F2\fR, and | |
1082 | \&\f(CW$Etc\fR. The conditional is true if any variables were assigned, i.e., if | |
1083 | the pattern matched. | |
1084 | .Sp | |
1085 | The \f(CW\*(C`/g\*(C'\fR modifier specifies global pattern matching\*(--that is, | |
1086 | matching as many times as possible within the string. How it behaves | |
1087 | depends on the context. In list context, it returns a list of the | |
1088 | substrings matched by any capturing parentheses in the regular | |
1089 | expression. If there are no parentheses, it returns a list of all | |
1090 | the matched strings, as if there were parentheses around the whole | |
1091 | pattern. | |
1092 | .Sp | |
1093 | In scalar context, each execution of \f(CW\*(C`m//g\*(C'\fR finds the next match, | |
1094 | returning true if it matches, and false if there is no further match. | |
1095 | The position after the last match can be read or set using the \fIpos()\fR | |
1096 | function; see \*(L"pos\*(R" in perlfunc. A failed match normally resets the | |
1097 | search position to the beginning of the string, but you can avoid that | |
1098 | by adding the \f(CW\*(C`/c\*(C'\fR modifier (e.g. \f(CW\*(C`m//gc\*(C'\fR). Modifying the target | |
1099 | string also resets the search position. | |
1100 | .Sp | |
1101 | You can intermix \f(CW\*(C`m//g\*(C'\fR matches with \f(CW\*(C`m/\eG.../g\*(C'\fR, where \f(CW\*(C`\eG\*(C'\fR is a | |
1102 | zero-width assertion that matches the exact position where the previous | |
1103 | \&\f(CW\*(C`m//g\*(C'\fR, if any, left off. Without the \f(CW\*(C`/g\*(C'\fR modifier, the \f(CW\*(C`\eG\*(C'\fR assertion | |
1104 | still anchors at \fIpos()\fR, but the match is of course only attempted once. | |
1105 | Using \f(CW\*(C`\eG\*(C'\fR without \f(CW\*(C`/g\*(C'\fR on a target string that has not previously had a | |
1106 | \&\f(CW\*(C`/g\*(C'\fR match applied to it is the same as using the \f(CW\*(C`\eA\*(C'\fR assertion to match | |
1107 | the beginning of the string. Note also that, currently, \f(CW\*(C`\eG\*(C'\fR is only | |
1108 | properly supported when anchored at the very beginning of the pattern. | |
1109 | .Sp | |
1110 | Examples: | |
1111 | .Sp | |
1112 | .Vb 2 | |
1113 | \& # list context | |
1114 | \& ($one,$five,$fifteen) = (`uptime` =~ /(\ed+\e.\ed+)/g); | |
1115 | .Ve | |
1116 | .Sp | |
1117 | .Vb 8 | |
1118 | \& # scalar context | |
1119 | \& $/ = ""; | |
1120 | \& while (defined($paragraph = <>)) { | |
1121 | \& while ($paragraph =~ /[a-z]['")]*[.!?]+['")]*\es/g) { | |
1122 | \& $sentences++; | |
1123 | \& } | |
1124 | \& } | |
1125 | \& print "$sentences\en"; | |
1126 | .Ve | |
1127 | .Sp | |
1128 | .Vb 11 | |
1129 | \& # using m//gc with \eG | |
1130 | \& $_ = "ppooqppqq"; | |
1131 | \& while ($i++ < 2) { | |
1132 | \& print "1: '"; | |
1133 | \& print $1 while /(o)/gc; print "', pos=", pos, "\en"; | |
1134 | \& print "2: '"; | |
1135 | \& print $1 if /\eG(q)/gc; print "', pos=", pos, "\en"; | |
1136 | \& print "3: '"; | |
1137 | \& print $1 while /(p)/gc; print "', pos=", pos, "\en"; | |
1138 | \& } | |
1139 | \& print "Final: '$1', pos=",pos,"\en" if /\eG(.)/; | |
1140 | .Ve | |
1141 | .Sp | |
1142 | The last example should print: | |
1143 | .Sp | |
1144 | .Vb 7 | |
1145 | \& 1: 'oo', pos=4 | |
1146 | \& 2: 'q', pos=5 | |
1147 | \& 3: 'pp', pos=7 | |
1148 | \& 1: '', pos=7 | |
1149 | \& 2: 'q', pos=8 | |
1150 | \& 3: '', pos=8 | |
1151 | \& Final: 'q', pos=8 | |
1152 | .Ve | |
1153 | .Sp | |
1154 | Notice that the final match matched \f(CW\*(C`q\*(C'\fR instead of \f(CW\*(C`p\*(C'\fR, which a match | |
1155 | without the \f(CW\*(C`\eG\*(C'\fR anchor would have done. Also note that the final match | |
1156 | did not update \f(CW\*(C`pos\*(C'\fR \*(-- \f(CW\*(C`pos\*(C'\fR is only updated on a \f(CW\*(C`/g\*(C'\fR match. If the | |
1157 | final match did indeed match \f(CW\*(C`p\*(C'\fR, it's a good bet that you're running an | |
1158 | older (pre\-5.6.0) Perl. | |
1159 | .Sp | |
1160 | A useful idiom for \f(CW\*(C`lex\*(C'\fR\-like scanners is \f(CW\*(C`/\eG.../gc\*(C'\fR. You can | |
1161 | combine several regexps like this to process a string part\-by\-part, | |
1162 | doing different actions depending on which regexp matched. Each | |
1163 | regexp tries to match where the previous one leaves off. | |
1164 | .Sp | |
1165 | .Vb 14 | |
1166 | \& $_ = <<'EOL'; | |
1167 | \& $url = new URI::URL "http://www/"; die if $url eq "xXx"; | |
1168 | \& EOL | |
1169 | \& LOOP: | |
1170 | \& { | |
1171 | \& print(" digits"), redo LOOP if /\eG\ed+\eb[,.;]?\es*/gc; | |
1172 | \& print(" lowercase"), redo LOOP if /\eG[a-z]+\eb[,.;]?\es*/gc; | |
1173 | \& print(" UPPERCASE"), redo LOOP if /\eG[A-Z]+\eb[,.;]?\es*/gc; | |
1174 | \& print(" Capitalized"), redo LOOP if /\eG[A-Z][a-z]+\eb[,.;]?\es*/gc; | |
1175 | \& print(" MiXeD"), redo LOOP if /\eG[A-Za-z]+\eb[,.;]?\es*/gc; | |
1176 | \& print(" alphanumeric"), redo LOOP if /\eG[A-Za-z0-9]+\eb[,.;]?\es*/gc; | |
1177 | \& print(" line-noise"), redo LOOP if /\eG[^A-Za-z0-9]+/gc; | |
1178 | \& print ". That's all!\en"; | |
1179 | \& } | |
1180 | .Ve | |
1181 | .Sp | |
1182 | Here is the output (split into several lines): | |
1183 | .Sp | |
1184 | .Vb 4 | |
1185 | \& line-noise lowercase line-noise lowercase UPPERCASE line-noise | |
1186 | \& UPPERCASE line-noise lowercase line-noise lowercase line-noise | |
1187 | \& lowercase lowercase line-noise lowercase lowercase line-noise | |
1188 | \& MiXeD line-noise. That's all! | |
1189 | .Ve | |
1190 | .IP "q/STRING/" 8 | |
1191 | .IX Item "q/STRING/" | |
1192 | .PD 0 | |
1193 | .ie n .IP "'STRING'" 8 | |
1194 | .el .IP "\f(CW'STRING'\fR" 8 | |
1195 | .IX Item "'STRING'" | |
1196 | .PD | |
1197 | A single\-quoted, literal string. A backslash represents a backslash | |
1198 | unless followed by the delimiter or another backslash, in which case | |
1199 | the delimiter or backslash is interpolated. | |
1200 | .Sp | |
1201 | .Vb 3 | |
1202 | \& $foo = q!I said, "You said, 'She said it.'"!; | |
1203 | \& $bar = q('This is it.'); | |
1204 | \& $baz = '\en'; # a two-character string | |
1205 | .Ve | |
1206 | .IP "qq/STRING/" 8 | |
1207 | .IX Item "qq/STRING/" | |
1208 | .PD 0 | |
1209 | .ie n .IP """\s-1STRING\s0""" 8 | |
1210 | .el .IP "``\s-1STRING\s0''" 8 | |
1211 | .IX Item "STRING" | |
1212 | .PD | |
1213 | A double\-quoted, interpolated string. | |
1214 | .Sp | |
1215 | .Vb 4 | |
1216 | \& $_ .= qq | |
1217 | \& (*** The previous line contains the naughty word "$1".\en) | |
1218 | \& if /\eb(tcl|java|python)\eb/i; # :-) | |
1219 | \& $baz = "\en"; # a one-character string | |
1220 | .Ve | |
1221 | .IP "qr/STRING/imosx" 8 | |
1222 | .IX Item "qr/STRING/imosx" | |
1223 | This operator quotes (and possibly compiles) its \fI\s-1STRING\s0\fR as a regular | |
1224 | expression. \fI\s-1STRING\s0\fR is interpolated the same way as \fI\s-1PATTERN\s0\fR | |
1225 | in \f(CW\*(C`m/PATTERN/\*(C'\fR. If \*(L"'\*(R" is used as the delimiter, no interpolation | |
1226 | is done. Returns a Perl value which may be used instead of the | |
1227 | corresponding \f(CW\*(C`/STRING/imosx\*(C'\fR expression. | |
1228 | .Sp | |
1229 | For example, | |
1230 | .Sp | |
1231 | .Vb 2 | |
1232 | \& $rex = qr/my.STRING/is; | |
1233 | \& s/$rex/foo/; | |
1234 | .Ve | |
1235 | .Sp | |
1236 | is equivalent to | |
1237 | .Sp | |
1238 | .Vb 1 | |
1239 | \& s/my.STRING/foo/is; | |
1240 | .Ve | |
1241 | .Sp | |
1242 | The result may be used as a subpattern in a match: | |
1243 | .Sp | |
1244 | .Vb 4 | |
1245 | \& $re = qr/$pattern/; | |
1246 | \& $string =~ /foo${re}bar/; # can be interpolated in other patterns | |
1247 | \& $string =~ $re; # or used standalone | |
1248 | \& $string =~ /$re/; # or this way | |
1249 | .Ve | |
1250 | .Sp | |
1251 | Since Perl may compile the pattern at the moment of execution of \fIqr()\fR | |
1252 | operator, using \fIqr()\fR may have speed advantages in some situations, | |
1253 | notably if the result of \fIqr()\fR is used standalone: | |
1254 | .Sp | |
1255 | .Vb 11 | |
1256 | \& sub match { | |
1257 | \& my $patterns = shift; | |
1258 | \& my @compiled = map qr/$_/i, @$patterns; | |
1259 | \& grep { | |
1260 | \& my $success = 0; | |
1261 | \& foreach my $pat (@compiled) { | |
1262 | \& $success = 1, last if /$pat/; | |
1263 | \& } | |
1264 | \& $success; | |
1265 | \& } @_; | |
1266 | \& } | |
1267 | .Ve | |
1268 | .Sp | |
1269 | Precompilation of the pattern into an internal representation at | |
1270 | the moment of \fIqr()\fR avoids a need to recompile the pattern every | |
1271 | time a match \f(CW\*(C`/$pat/\*(C'\fR is attempted. (Perl has many other internal | |
1272 | optimizations, but none would be triggered in the above example if | |
1273 | we did not use \fIqr()\fR operator.) | |
1274 | .Sp | |
1275 | Options are: | |
1276 | .Sp | |
1277 | .Vb 5 | |
1278 | \& i Do case-insensitive pattern matching. | |
1279 | \& m Treat string as multiple lines. | |
1280 | \& o Compile pattern only once. | |
1281 | \& s Treat string as single line. | |
1282 | \& x Use extended regular expressions. | |
1283 | .Ve | |
1284 | .Sp | |
1285 | See perlre for additional information on valid syntax for \s-1STRING\s0, and | |
1286 | for a detailed look at the semantics of regular expressions. | |
1287 | .IP "qx/STRING/" 8 | |
1288 | .IX Item "qx/STRING/" | |
1289 | .PD 0 | |
1290 | .IP "`STRING`" 8 | |
1291 | .IX Item "`STRING`" | |
1292 | .PD | |
1293 | A string which is (possibly) interpolated and then executed as a | |
1294 | system command with \f(CW\*(C`/bin/sh\*(C'\fR or its equivalent. Shell wildcards, | |
1295 | pipes, and redirections will be honored. The collected standard | |
1296 | output of the command is returned; standard error is unaffected. In | |
1297 | scalar context, it comes back as a single (potentially multi\-line) | |
1298 | string, or undef if the command failed. In list context, returns a | |
1299 | list of lines (however you've defined lines with $/ or | |
1300 | \&\f(CW$INPUT_RECORD_SEPARATOR\fR), or an empty list if the command failed. | |
1301 | .Sp | |
1302 | Because backticks do not affect standard error, use shell file descriptor | |
1303 | syntax (assuming the shell supports this) if you care to address this. | |
1304 | To capture a command's \s-1STDERR\s0 and \s-1STDOUT\s0 together: | |
1305 | .Sp | |
1306 | .Vb 1 | |
1307 | \& $output = `cmd 2>&1`; | |
1308 | .Ve | |
1309 | .Sp | |
1310 | To capture a command's \s-1STDOUT\s0 but discard its \s-1STDERR:\s0 | |
1311 | .Sp | |
1312 | .Vb 1 | |
1313 | \& $output = `cmd 2>/dev/null`; | |
1314 | .Ve | |
1315 | .Sp | |
1316 | To capture a command's \s-1STDERR\s0 but discard its \s-1STDOUT\s0 (ordering is | |
1317 | important here): | |
1318 | .Sp | |
1319 | .Vb 1 | |
1320 | \& $output = `cmd 2>&1 1>/dev/null`; | |
1321 | .Ve | |
1322 | .Sp | |
1323 | To exchange a command's \s-1STDOUT\s0 and \s-1STDERR\s0 in order to capture the \s-1STDERR\s0 | |
1324 | but leave its \s-1STDOUT\s0 to come out the old \s-1STDERR:\s0 | |
1325 | .Sp | |
1326 | .Vb 1 | |
1327 | \& $output = `cmd 3>&1 1>&2 2>&3 3>&-`; | |
1328 | .Ve | |
1329 | .Sp | |
1330 | To read both a command's \s-1STDOUT\s0 and its \s-1STDERR\s0 separately, it's easiest | |
1331 | and safest to redirect them separately to files, and then read from those | |
1332 | files when the program is done: | |
1333 | .Sp | |
1334 | .Vb 1 | |
1335 | \& system("program args 1>/tmp/program.stdout 2>/tmp/program.stderr"); | |
1336 | .Ve | |
1337 | .Sp | |
1338 | Using single-quote as a delimiter protects the command from Perl's | |
1339 | double-quote interpolation, passing it on to the shell instead: | |
1340 | .Sp | |
1341 | .Vb 2 | |
1342 | \& $perl_info = qx(ps $$); # that's Perl's $$ | |
1343 | \& $shell_info = qx'ps $$'; # that's the new shell's $$ | |
1344 | .Ve | |
1345 | .Sp | |
1346 | How that string gets evaluated is entirely subject to the command | |
1347 | interpreter on your system. On most platforms, you will have to protect | |
1348 | shell metacharacters if you want them treated literally. This is in | |
1349 | practice difficult to do, as it's unclear how to escape which characters. | |
1350 | See perlsec for a clean and safe example of a manual \fIfork()\fR and \fIexec()\fR | |
1351 | to emulate backticks safely. | |
1352 | .Sp | |
1353 | On some platforms (notably DOS-like ones), the shell may not be | |
1354 | capable of dealing with multiline commands, so putting newlines in | |
1355 | the string may not get you what you want. You may be able to evaluate | |
1356 | multiple commands in a single line by separating them with the command | |
1357 | separator character, if your shell supports that (e.g. \f(CW\*(C`;\*(C'\fR on many Unix | |
1358 | shells; \f(CW\*(C`&\*(C'\fR on the Windows \s-1NT\s0 \f(CW\*(C`cmd\*(C'\fR shell). | |
1359 | .Sp | |
1360 | Beginning with v5.6.0, Perl will attempt to flush all files opened for | |
1361 | output before starting the child process, but this may not be supported | |
1362 | on some platforms (see perlport). To be safe, you may need to set | |
1363 | \&\f(CW$|\fR ($AUTOFLUSH in English) or call the \f(CW\*(C`autoflush()\*(C'\fR method of | |
1364 | \&\f(CW\*(C`IO::Handle\*(C'\fR on any open handles. | |
1365 | .Sp | |
1366 | Beware that some command shells may place restrictions on the length | |
1367 | of the command line. You must ensure your strings don't exceed this | |
1368 | limit after any necessary interpolations. See the platform-specific | |
1369 | release notes for more details about your particular environment. | |
1370 | .Sp | |
1371 | Using this operator can lead to programs that are difficult to port, | |
1372 | because the shell commands called vary between systems, and may in | |
1373 | fact not be present at all. As one example, the \f(CW\*(C`type\*(C'\fR command under | |
1374 | the \s-1POSIX\s0 shell is very different from the \f(CW\*(C`type\*(C'\fR command under \s-1DOS\s0. | |
1375 | That doesn't mean you should go out of your way to avoid backticks | |
1376 | when they're the right way to get something done. Perl was made to be | |
1377 | a glue language, and one of the things it glues together is commands. | |
1378 | Just understand what you're getting yourself into. | |
1379 | .Sp | |
1380 | See \*(L"I/O Operators\*(R" for more discussion. | |
1381 | .IP "qw/STRING/" 8 | |
1382 | .IX Item "qw/STRING/" | |
1383 | Evaluates to a list of the words extracted out of \s-1STRING\s0, using embedded | |
1384 | whitespace as the word delimiters. It can be understood as being roughly | |
1385 | equivalent to: | |
1386 | .Sp | |
1387 | .Vb 1 | |
1388 | \& split(' ', q/STRING/); | |
1389 | .Ve | |
1390 | .Sp | |
1391 | the difference being that it generates a real list at compile time. So | |
1392 | this expression: | |
1393 | .Sp | |
1394 | .Vb 1 | |
1395 | \& qw(foo bar baz) | |
1396 | .Ve | |
1397 | .Sp | |
1398 | is semantically equivalent to the list: | |
1399 | .Sp | |
1400 | .Vb 1 | |
1401 | \& 'foo', 'bar', 'baz' | |
1402 | .Ve | |
1403 | .Sp | |
1404 | Some frequently seen examples: | |
1405 | .Sp | |
1406 | .Vb 2 | |
1407 | \& use POSIX qw( setlocale localeconv ) | |
1408 | \& @EXPORT = qw( foo bar baz ); | |
1409 | .Ve | |
1410 | .Sp | |
1411 | A common mistake is to try to separate the words with comma or to | |
1412 | put comments into a multi-line \f(CW\*(C`qw\*(C'\fR\-string. For this reason, the | |
1413 | \&\f(CW\*(C`use warnings\*(C'\fR pragma and the \fB\-w\fR switch (that is, the \f(CW$^W\fR variable) | |
1414 | produces warnings if the \s-1STRING\s0 contains the \*(L",\*(R" or the \*(L"#\*(R" character. | |
1415 | .IP "s/PATTERN/REPLACEMENT/egimosx" 8 | |
1416 | .IX Item "s/PATTERN/REPLACEMENT/egimosx" | |
1417 | Searches a string for a pattern, and if found, replaces that pattern | |
1418 | with the replacement text and returns the number of substitutions | |
1419 | made. Otherwise it returns false (specifically, the empty string). | |
1420 | .Sp | |
1421 | If no string is specified via the \f(CW\*(C`=~\*(C'\fR or \f(CW\*(C`!~\*(C'\fR operator, the \f(CW$_\fR | |
1422 | variable is searched and modified. (The string specified with \f(CW\*(C`=~\*(C'\fR must | |
1423 | be scalar variable, an array element, a hash element, or an assignment | |
1424 | to one of those, i.e., an lvalue.) | |
1425 | .Sp | |
1426 | If the delimiter chosen is a single quote, no interpolation is | |
1427 | done on either the \s-1PATTERN\s0 or the \s-1REPLACEMENT\s0. Otherwise, if the | |
1428 | \&\s-1PATTERN\s0 contains a $ that looks like a variable rather than an | |
1429 | end-of-string test, the variable will be interpolated into the pattern | |
1430 | at run\-time. If you want the pattern compiled only once the first time | |
1431 | the variable is interpolated, use the \f(CW\*(C`/o\*(C'\fR option. If the pattern | |
1432 | evaluates to the empty string, the last successfully executed regular | |
1433 | expression is used instead. See perlre for further explanation on these. | |
1434 | See perllocale for discussion of additional considerations that apply | |
1435 | when \f(CW\*(C`use locale\*(C'\fR is in effect. | |
1436 | .Sp | |
1437 | Options are: | |
1438 | .Sp | |
1439 | .Vb 7 | |
1440 | \& e Evaluate the right side as an expression. | |
1441 | \& g Replace globally, i.e., all occurrences. | |
1442 | \& i Do case-insensitive pattern matching. | |
1443 | \& m Treat string as multiple lines. | |
1444 | \& o Compile pattern only once. | |
1445 | \& s Treat string as single line. | |
1446 | \& x Use extended regular expressions. | |
1447 | .Ve | |
1448 | .Sp | |
1449 | Any non\-alphanumeric, non-whitespace delimiter may replace the | |
1450 | slashes. If single quotes are used, no interpretation is done on the | |
1451 | replacement string (the \f(CW\*(C`/e\*(C'\fR modifier overrides this, however). Unlike | |
1452 | Perl 4, Perl 5 treats backticks as normal delimiters; the replacement | |
1453 | text is not evaluated as a command. If the | |
1454 | \&\s-1PATTERN\s0 is delimited by bracketing quotes, the \s-1REPLACEMENT\s0 has its own | |
1455 | pair of quotes, which may or may not be bracketing quotes, e.g., | |
1456 | \&\f(CW\*(C`s(foo)(bar)\*(C'\fR or \f(CW\*(C`s<foo>/bar/\*(C'\fR. A \f(CW\*(C`/e\*(C'\fR will cause the | |
1457 | replacement portion to be treated as a full-fledged Perl expression | |
1458 | and evaluated right then and there. It is, however, syntax checked at | |
1459 | compile\-time. A second \f(CW\*(C`e\*(C'\fR modifier will cause the replacement portion | |
1460 | to be \f(CW\*(C`eval\*(C'\fRed before being run as a Perl expression. | |
1461 | .Sp | |
1462 | Examples: | |
1463 | .Sp | |
1464 | .Vb 1 | |
1465 | \& s/\ebgreen\eb/mauve/g; # don't change wintergreen | |
1466 | .Ve | |
1467 | .Sp | |
1468 | .Vb 1 | |
1469 | \& $path =~ s|/usr/bin|/usr/local/bin|; | |
1470 | .Ve | |
1471 | .Sp | |
1472 | .Vb 1 | |
1473 | \& s/Login: $foo/Login: $bar/; # run-time pattern | |
1474 | .Ve | |
1475 | .Sp | |
1476 | .Vb 1 | |
1477 | \& ($foo = $bar) =~ s/this/that/; # copy first, then change | |
1478 | .Ve | |
1479 | .Sp | |
1480 | .Vb 1 | |
1481 | \& $count = ($paragraph =~ s/Mister\eb/Mr./g); # get change-count | |
1482 | .Ve | |
1483 | .Sp | |
1484 | .Vb 4 | |
1485 | \& $_ = 'abc123xyz'; | |
1486 | \& s/\ed+/$&*2/e; # yields 'abc246xyz' | |
1487 | \& s/\ed+/sprintf("%5d",$&)/e; # yields 'abc 246xyz' | |
1488 | \& s/\ew/$& x 2/eg; # yields 'aabbcc 224466xxyyzz' | |
1489 | .Ve | |
1490 | .Sp | |
1491 | .Vb 3 | |
1492 | \& s/%(.)/$percent{$1}/g; # change percent escapes; no /e | |
1493 | \& s/%(.)/$percent{$1} || $&/ge; # expr now, so /e | |
1494 | \& s/^=(\ew+)/&pod($1)/ge; # use function call | |
1495 | .Ve | |
1496 | .Sp | |
1497 | .Vb 3 | |
1498 | \& # expand variables in $_, but dynamics only, using | |
1499 | \& # symbolic dereferencing | |
1500 | \& s/\e$(\ew+)/${$1}/g; | |
1501 | .Ve | |
1502 | .Sp | |
1503 | .Vb 2 | |
1504 | \& # Add one to the value of any numbers in the string | |
1505 | \& s/(\ed+)/1 + $1/eg; | |
1506 | .Ve | |
1507 | .Sp | |
1508 | .Vb 4 | |
1509 | \& # This will expand any embedded scalar variable | |
1510 | \& # (including lexicals) in $_ : First $1 is interpolated | |
1511 | \& # to the variable name, and then evaluated | |
1512 | \& s/(\e$\ew+)/$1/eeg; | |
1513 | .Ve | |
1514 | .Sp | |
1515 | .Vb 6 | |
1516 | \& # Delete (most) C comments. | |
1517 | \& $program =~ s { | |
1518 | \& /\e* # Match the opening delimiter. | |
1519 | \& .*? # Match a minimal number of characters. | |
1520 | \& \e*/ # Match the closing delimiter. | |
1521 | \& } []gsx; | |
1522 | .Ve | |
1523 | .Sp | |
1524 | .Vb 1 | |
1525 | \& s/^\es*(.*?)\es*$/$1/; # trim white space in $_, expensively | |
1526 | .Ve | |
1527 | .Sp | |
1528 | .Vb 4 | |
1529 | \& for ($variable) { # trim white space in $variable, cheap | |
1530 | \& s/^\es+//; | |
1531 | \& s/\es+$//; | |
1532 | \& } | |
1533 | .Ve | |
1534 | .Sp | |
1535 | .Vb 1 | |
1536 | \& s/([^ ]*) *([^ ]*)/$2 $1/; # reverse 1st two fields | |
1537 | .Ve | |
1538 | .Sp | |
1539 | Note the use of $ instead of \e in the last example. Unlike | |
1540 | \&\fBsed\fR, we use the \e<\fIdigit\fR> form in only the left hand side. | |
1541 | Anywhere else it's $<\fIdigit\fR>. | |
1542 | .Sp | |
1543 | Occasionally, you can't use just a \f(CW\*(C`/g\*(C'\fR to get all the changes | |
1544 | to occur that you might want. Here are two common cases: | |
1545 | .Sp | |
1546 | .Vb 2 | |
1547 | \& # put commas in the right places in an integer | |
1548 | \& 1 while s/(\ed)(\ed\ed\ed)(?!\ed)/$1,$2/g; | |
1549 | .Ve | |
1550 | .Sp | |
1551 | .Vb 2 | |
1552 | \& # expand tabs to 8-column spacing | |
1553 | \& 1 while s/\et+/' ' x (length($&)*8 - length($`)%8)/e; | |
1554 | .Ve | |
1555 | .IP "tr/SEARCHLIST/REPLACEMENTLIST/cds" 8 | |
1556 | .IX Item "tr/SEARCHLIST/REPLACEMENTLIST/cds" | |
1557 | .PD 0 | |
1558 | .IP "y/SEARCHLIST/REPLACEMENTLIST/cds" 8 | |
1559 | .IX Item "y/SEARCHLIST/REPLACEMENTLIST/cds" | |
1560 | .PD | |
1561 | Transliterates all occurrences of the characters found in the search list | |
1562 | with the corresponding character in the replacement list. It returns | |
1563 | the number of characters replaced or deleted. If no string is | |
1564 | specified via the =~ or !~ operator, the \f(CW$_\fR string is transliterated. (The | |
1565 | string specified with =~ must be a scalar variable, an array element, a | |
1566 | hash element, or an assignment to one of those, i.e., an lvalue.) | |
1567 | .Sp | |
1568 | A character range may be specified with a hyphen, so \f(CW\*(C`tr/A\-J/0\-9/\*(C'\fR | |
1569 | does the same replacement as \f(CW\*(C`tr/ACEGIBDFHJ/0246813579/\*(C'\fR. | |
1570 | For \fBsed\fR devotees, \f(CW\*(C`y\*(C'\fR is provided as a synonym for \f(CW\*(C`tr\*(C'\fR. If the | |
1571 | \&\s-1SEARCHLIST\s0 is delimited by bracketing quotes, the \s-1REPLACEMENTLIST\s0 has | |
1572 | its own pair of quotes, which may or may not be bracketing quotes, | |
1573 | e.g., \f(CW\*(C`tr[A\-Z][a\-z]\*(C'\fR or \f(CW\*(C`tr(+\e\-*/)/ABCD/\*(C'\fR. | |
1574 | .Sp | |
1575 | Note that \f(CW\*(C`tr\*(C'\fR does \fBnot\fR do regular expression character classes | |
1576 | such as \f(CW\*(C`\ed\*(C'\fR or \f(CW\*(C`[:lower:]\*(C'\fR. The <tr> operator is not equivalent to | |
1577 | the \fItr\fR\|(1) utility. If you want to map strings between lower/upper | |
1578 | cases, see \*(L"lc\*(R" in perlfunc and \*(L"uc\*(R" in perlfunc, and in general consider | |
1579 | using the \f(CW\*(C`s\*(C'\fR operator if you need regular expressions. | |
1580 | .Sp | |
1581 | Note also that the whole range idea is rather unportable between | |
1582 | character sets\*(--and even within character sets they may cause results | |
1583 | you probably didn't expect. A sound principle is to use only ranges | |
1584 | that begin from and end at either alphabets of equal case (a\-e, A\-E), | |
1585 | or digits (0\-4). Anything else is unsafe. If in doubt, spell out the | |
1586 | character sets in full. | |
1587 | .Sp | |
1588 | Options: | |
1589 | .Sp | |
1590 | .Vb 3 | |
1591 | \& c Complement the SEARCHLIST. | |
1592 | \& d Delete found but unreplaced characters. | |
1593 | \& s Squash duplicate replaced characters. | |
1594 | .Ve | |
1595 | .Sp | |
1596 | If the \f(CW\*(C`/c\*(C'\fR modifier is specified, the \s-1SEARCHLIST\s0 character set | |
1597 | is complemented. If the \f(CW\*(C`/d\*(C'\fR modifier is specified, any characters | |
1598 | specified by \s-1SEARCHLIST\s0 not found in \s-1REPLACEMENTLIST\s0 are deleted. | |
1599 | (Note that this is slightly more flexible than the behavior of some | |
1600 | \&\fBtr\fR programs, which delete anything they find in the \s-1SEARCHLIST\s0, | |
1601 | period.) If the \f(CW\*(C`/s\*(C'\fR modifier is specified, sequences of characters | |
1602 | that were transliterated to the same character are squashed down | |
1603 | to a single instance of the character. | |
1604 | .Sp | |
1605 | If the \f(CW\*(C`/d\*(C'\fR modifier is used, the \s-1REPLACEMENTLIST\s0 is always interpreted | |
1606 | exactly as specified. Otherwise, if the \s-1REPLACEMENTLIST\s0 is shorter | |
1607 | than the \s-1SEARCHLIST\s0, the final character is replicated till it is long | |
1608 | enough. If the \s-1REPLACEMENTLIST\s0 is empty, the \s-1SEARCHLIST\s0 is replicated. | |
1609 | This latter is useful for counting characters in a class or for | |
1610 | squashing character sequences in a class. | |
1611 | .Sp | |
1612 | Examples: | |
1613 | .Sp | |
1614 | .Vb 1 | |
1615 | \& $ARGV[1] =~ tr/A-Z/a-z/; # canonicalize to lower case | |
1616 | .Ve | |
1617 | .Sp | |
1618 | .Vb 1 | |
1619 | \& $cnt = tr/*/*/; # count the stars in $_ | |
1620 | .Ve | |
1621 | .Sp | |
1622 | .Vb 1 | |
1623 | \& $cnt = $sky =~ tr/*/*/; # count the stars in $sky | |
1624 | .Ve | |
1625 | .Sp | |
1626 | .Vb 1 | |
1627 | \& $cnt = tr/0-9//; # count the digits in $_ | |
1628 | .Ve | |
1629 | .Sp | |
1630 | .Vb 1 | |
1631 | \& tr/a-zA-Z//s; # bookkeeper -> bokeper | |
1632 | .Ve | |
1633 | .Sp | |
1634 | .Vb 1 | |
1635 | \& ($HOST = $host) =~ tr/a-z/A-Z/; | |
1636 | .Ve | |
1637 | .Sp | |
1638 | .Vb 1 | |
1639 | \& tr/a-zA-Z/ /cs; # change non-alphas to single space | |
1640 | .Ve | |
1641 | .Sp | |
1642 | .Vb 2 | |
1643 | \& tr [\e200-\e377] | |
1644 | \& [\e000-\e177]; # delete 8th bit | |
1645 | .Ve | |
1646 | .Sp | |
1647 | If multiple transliterations are given for a character, only the | |
1648 | first one is used: | |
1649 | .Sp | |
1650 | .Vb 1 | |
1651 | \& tr/AAA/XYZ/ | |
1652 | .Ve | |
1653 | .Sp | |
1654 | will transliterate any A to X. | |
1655 | .Sp | |
1656 | Because the transliteration table is built at compile time, neither | |
1657 | the \s-1SEARCHLIST\s0 nor the \s-1REPLACEMENTLIST\s0 are subjected to double quote | |
1658 | interpolation. That means that if you want to use variables, you | |
1659 | must use an \fIeval()\fR: | |
1660 | .Sp | |
1661 | .Vb 2 | |
1662 | \& eval "tr/$oldlist/$newlist/"; | |
1663 | \& die $@ if $@; | |
1664 | .Ve | |
1665 | .Sp | |
1666 | .Vb 1 | |
1667 | \& eval "tr/$oldlist/$newlist/, 1" or die $@; | |
1668 | .Ve | |
1669 | .IP "<<\s-1EOF\s0" 8 | |
1670 | .IX Item "<<EOF" | |
1671 | A line-oriented form of quoting is based on the shell \*(L"here\-document\*(R" | |
1672 | syntax. Following a \f(CW\*(C`<<\*(C'\fR you specify a string to terminate | |
1673 | the quoted material, and all lines following the current line down to | |
1674 | the terminating string are the value of the item. The terminating | |
1675 | string may be either an identifier (a word), or some quoted text. If | |
1676 | quoted, the type of quotes you use determines the treatment of the | |
1677 | text, just as in regular quoting. An unquoted identifier works like | |
1678 | double quotes. There must be no space between the \f(CW\*(C`<<\*(C'\fR and | |
1679 | the identifier, unless the identifier is quoted. (If you put a space it | |
1680 | will be treated as a null identifier, which is valid, and matches the first | |
1681 | empty line.) The terminating string must appear by itself (unquoted and | |
1682 | with no surrounding whitespace) on the terminating line. | |
1683 | .Sp | |
1684 | .Vb 3 | |
1685 | \& print <<EOF; | |
1686 | \& The price is $Price. | |
1687 | \& EOF | |
1688 | .Ve | |
1689 | .Sp | |
1690 | .Vb 3 | |
1691 | \& print << "EOF"; # same as above | |
1692 | \& The price is $Price. | |
1693 | \& EOF | |
1694 | .Ve | |
1695 | .Sp | |
1696 | .Vb 4 | |
1697 | \& print << `EOC`; # execute commands | |
1698 | \& echo hi there | |
1699 | \& echo lo there | |
1700 | \& EOC | |
1701 | .Ve | |
1702 | .Sp | |
1703 | .Vb 5 | |
1704 | \& print <<"foo", <<"bar"; # you can stack them | |
1705 | \& I said foo. | |
1706 | \& foo | |
1707 | \& I said bar. | |
1708 | \& bar | |
1709 | .Ve | |
1710 | .Sp | |
1711 | .Vb 6 | |
1712 | \& myfunc(<< "THIS", 23, <<'THAT'); | |
1713 | \& Here's a line | |
1714 | \& or two. | |
1715 | \& THIS | |
1716 | \& and here's another. | |
1717 | \& THAT | |
1718 | .Ve | |
1719 | .Sp | |
1720 | Just don't forget that you have to put a semicolon on the end | |
1721 | to finish the statement, as Perl doesn't know you're not going to | |
1722 | try to do this: | |
1723 | .Sp | |
1724 | .Vb 4 | |
1725 | \& print <<ABC | |
1726 | \& 179231 | |
1727 | \& ABC | |
1728 | \& + 20; | |
1729 | .Ve | |
1730 | .Sp | |
1731 | If you want your here-docs to be indented with the | |
1732 | rest of the code, you'll need to remove leading whitespace | |
1733 | from each line manually: | |
1734 | .Sp | |
1735 | .Vb 4 | |
1736 | \& ($quote = <<'FINIS') =~ s/^\es+//gm; | |
1737 | \& The Road goes ever on and on, | |
1738 | \& down from the door where it began. | |
1739 | \& FINIS | |
1740 | .Ve | |
1741 | .Sp | |
1742 | If you use a here-doc within a delimited construct, such as in \f(CW\*(C`s///eg\*(C'\fR, | |
1743 | the quoted material must come on the lines following the final delimiter. | |
1744 | So instead of | |
1745 | .Sp | |
1746 | .Vb 4 | |
1747 | \& s/this/<<E . 'that' | |
1748 | \& the other | |
1749 | \& E | |
1750 | \& . 'more '/eg; | |
1751 | .Ve | |
1752 | .Sp | |
1753 | you have to write | |
1754 | .Sp | |
1755 | .Vb 4 | |
1756 | \& s/this/<<E . 'that' | |
1757 | \& . 'more '/eg; | |
1758 | \& the other | |
1759 | \& E | |
1760 | .Ve | |
1761 | .Sp | |
1762 | If the terminating identifier is on the last line of the program, you | |
1763 | must be sure there is a newline after it; otherwise, Perl will give the | |
1764 | warning \fBCan't find string terminator \*(L"\s-1END\s0\*(R" anywhere before \s-1EOF\s0...\fR. | |
1765 | .Sp | |
1766 | Additionally, the quoting rules for the identifier are not related to | |
1767 | Perl's quoting rules \*(-- \f(CW\*(C`q()\*(C'\fR, \f(CW\*(C`qq()\*(C'\fR, and the like are not supported | |
1768 | in place of \f(CW''\fR and \f(CW""\fR, and the only interpolation is for backslashing | |
1769 | the quoting character: | |
1770 | .Sp | |
1771 | .Vb 3 | |
1772 | \& print << "abc\e"def"; | |
1773 | \& testing... | |
1774 | \& abc"def | |
1775 | .Ve | |
1776 | .Sp | |
1777 | Finally, quoted strings cannot span multiple lines. The general rule is | |
1778 | that the identifier must be a string literal. Stick with that, and you | |
1779 | should be safe. | |
1780 | .Sh "Gory details of parsing quoted constructs" | |
1781 | .IX Subsection "Gory details of parsing quoted constructs" | |
1782 | When presented with something that might have several different | |
1783 | interpretations, Perl uses the \fB\s-1DWIM\s0\fR (that's \*(L"Do What I Mean\*(R") | |
1784 | principle to pick the most probable interpretation. This strategy | |
1785 | is so successful that Perl programmers often do not suspect the | |
1786 | ambivalence of what they write. But from time to time, Perl's | |
1787 | notions differ substantially from what the author honestly meant. | |
1788 | .PP | |
1789 | This section hopes to clarify how Perl handles quoted constructs. | |
1790 | Although the most common reason to learn this is to unravel labyrinthine | |
1791 | regular expressions, because the initial steps of parsing are the | |
1792 | same for all quoting operators, they are all discussed together. | |
1793 | .PP | |
1794 | The most important Perl parsing rule is the first one discussed | |
1795 | below: when processing a quoted construct, Perl first finds the end | |
1796 | of that construct, then interprets its contents. If you understand | |
1797 | this rule, you may skip the rest of this section on the first | |
1798 | reading. The other rules are likely to contradict the user's | |
1799 | expectations much less frequently than this first one. | |
1800 | .PP | |
1801 | Some passes discussed below are performed concurrently, but because | |
1802 | their results are the same, we consider them individually. For different | |
1803 | quoting constructs, Perl performs different numbers of passes, from | |
1804 | one to five, but these passes are always performed in the same order. | |
1805 | .IP "Finding the end" 4 | |
1806 | .IX Item "Finding the end" | |
1807 | The first pass is finding the end of the quoted construct, whether | |
1808 | it be a multicharacter delimiter \f(CW"\enEOF\en"\fR in the \f(CW\*(C`<<EOF\*(C'\fR | |
1809 | construct, a \f(CW\*(C`/\*(C'\fR that terminates a \f(CW\*(C`qq//\*(C'\fR construct, a \f(CW\*(C`]\*(C'\fR which | |
1810 | terminates \f(CW\*(C`qq[]\*(C'\fR construct, or a \f(CW\*(C`>\*(C'\fR which terminates a | |
1811 | fileglob started with \f(CW\*(C`<\*(C'\fR. | |
1812 | .Sp | |
1813 | When searching for single-character non-pairing delimiters, such | |
1814 | as \f(CW\*(C`/\*(C'\fR, combinations of \f(CW\*(C`\e\e\*(C'\fR and \f(CW\*(C`\e/\*(C'\fR are skipped. However, | |
1815 | when searching for single-character pairing delimiter like \f(CW\*(C`[\*(C'\fR, | |
1816 | combinations of \f(CW\*(C`\e\e\*(C'\fR, \f(CW\*(C`\e]\*(C'\fR, and \f(CW\*(C`\e[\*(C'\fR are all skipped, and nested | |
1817 | \&\f(CW\*(C`[\*(C'\fR, \f(CW\*(C`]\*(C'\fR are skipped as well. When searching for multicharacter | |
1818 | delimiters, nothing is skipped. | |
1819 | .Sp | |
1820 | For constructs with three-part delimiters (\f(CW\*(C`s///\*(C'\fR, \f(CW\*(C`y///\*(C'\fR, and | |
1821 | \&\f(CW\*(C`tr///\*(C'\fR), the search is repeated once more. | |
1822 | .Sp | |
1823 | During this search no attention is paid to the semantics of the construct. | |
1824 | Thus: | |
1825 | .Sp | |
1826 | .Vb 1 | |
1827 | \& "$hash{"$foo/$bar"}" | |
1828 | .Ve | |
1829 | .Sp | |
1830 | or: | |
1831 | .Sp | |
1832 | .Vb 3 | |
1833 | \& m/ | |
1834 | \& bar # NOT a comment, this slash / terminated m//! | |
1835 | \& /x | |
1836 | .Ve | |
1837 | .Sp | |
1838 | do not form legal quoted expressions. The quoted part ends on the | |
1839 | first \f(CW\*(C`"\*(C'\fR and \f(CW\*(C`/\*(C'\fR, and the rest happens to be a syntax error. | |
1840 | Because the slash that terminated \f(CW\*(C`m//\*(C'\fR was followed by a \f(CW\*(C`SPACE\*(C'\fR, | |
1841 | the example above is not \f(CW\*(C`m//x\*(C'\fR, but rather \f(CW\*(C`m//\*(C'\fR with no \f(CW\*(C`/x\*(C'\fR | |
1842 | modifier. So the embedded \f(CW\*(C`#\*(C'\fR is interpreted as a literal \f(CW\*(C`#\*(C'\fR. | |
1843 | .IP "Removal of backslashes before delimiters" 4 | |
1844 | .IX Item "Removal of backslashes before delimiters" | |
1845 | During the second pass, text between the starting and ending | |
1846 | delimiters is copied to a safe location, and the \f(CW\*(C`\e\*(C'\fR is removed | |
1847 | from combinations consisting of \f(CW\*(C`\e\*(C'\fR and delimiter\*(--or delimiters, | |
1848 | meaning both starting and ending delimiters will should these differ. | |
1849 | This removal does not happen for multi-character delimiters. | |
1850 | Note that the combination \f(CW\*(C`\e\e\*(C'\fR is left intact, just as it was. | |
1851 | .Sp | |
1852 | Starting from this step no information about the delimiters is | |
1853 | used in parsing. | |
1854 | .IP "Interpolation" 4 | |
1855 | .IX Item "Interpolation" | |
1856 | The next step is interpolation in the text obtained, which is now | |
1857 | delimiter\-independent. There are four different cases. | |
1858 | .RS 4 | |
1859 | .ie n .IP """<<'EOF'""\fR, \f(CW""m''""\fR, \f(CW""s'''""\fR, \f(CW""tr///""\fR, \f(CW""y///""" 4 | |
1860 | .el .IP "\f(CW<<'EOF'\fR, \f(CWm''\fR, \f(CWs'''\fR, \f(CWtr///\fR, \f(CWy///\fR" 4 | |
1861 | .IX Item "<<'EOF', m'', s''', tr///, y///" | |
1862 | No interpolation is performed. | |
1863 | .ie n .IP "''\fR, \f(CW""q//""" 4 | |
1864 | .el .IP "\f(CW''\fR, \f(CWq//\fR" 4 | |
1865 | .IX Item "'', q//" | |
1866 | The only interpolation is removal of \f(CW\*(C`\e\*(C'\fR from pairs \f(CW\*(C`\e\e\*(C'\fR. | |
1867 | .ie n .IP """""\fR, \f(CW``\fR, \f(CW""qq//""\fR, \f(CW""qx//""\fR, \f(CW""<file*glob>""" 4 | |
1868 | .el .IP "\f(CW``''\fR, \f(CW``\fR, \f(CWqq//\fR, \f(CWqx//\fR, \f(CW<file*glob>\fR" 4 | |
1869 | .IX Item """"", ``, qq//, qx//, <file*glob>" | |
1870 | \&\f(CW\*(C`\eQ\*(C'\fR, \f(CW\*(C`\eU\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\el\*(C'\fR (possibly paired with \f(CW\*(C`\eE\*(C'\fR) are | |
1871 | converted to corresponding Perl constructs. Thus, \f(CW"$foo\eQbaz$bar"\fR | |
1872 | is converted to \f(CW\*(C`$foo . (quotemeta("baz" . $bar))\*(C'\fR internally. | |
1873 | The other combinations are replaced with appropriate expansions. | |
1874 | .Sp | |
1875 | Let it be stressed that \fIwhatever falls between \f(CI\*(C`\eQ\*(C'\fI and \f(CI\*(C`\eE\*(C'\fI\fR | |
1876 | is interpolated in the usual way. Something like \f(CW"\eQ\e\eE"\fR has | |
1877 | no \f(CW\*(C`\eE\*(C'\fR inside. instead, it has \f(CW\*(C`\eQ\*(C'\fR, \f(CW\*(C`\e\e\*(C'\fR, and \f(CW\*(C`E\*(C'\fR, so the | |
1878 | result is the same as for \f(CW"\e\e\e\eE"\fR. As a general rule, backslashes | |
1879 | between \f(CW\*(C`\eQ\*(C'\fR and \f(CW\*(C`\eE\*(C'\fR may lead to counterintuitive results. So, | |
1880 | \&\f(CW"\eQ\et\eE"\fR is converted to \f(CW\*(C`quotemeta("\et")\*(C'\fR, which is the same | |
1881 | as \f(CW"\e\e\et"\fR (since \s-1TAB\s0 is not alphanumeric). Note also that: | |
1882 | .Sp | |
1883 | .Vb 2 | |
1884 | \& $str = '\et'; | |
1885 | \& return "\eQ$str"; | |
1886 | .Ve | |
1887 | .Sp | |
1888 | may be closer to the conjectural \fIintention\fR of the writer of \f(CW"\eQ\et\eE"\fR. | |
1889 | .Sp | |
1890 | Interpolated scalars and arrays are converted internally to the \f(CW\*(C`join\*(C'\fR and | |
1891 | \&\f(CW\*(C`.\*(C'\fR catenation operations. Thus, \f(CW"$foo XXX '@arr'"\fR becomes: | |
1892 | .Sp | |
1893 | .Vb 1 | |
1894 | \& $foo . " XXX '" . (join $", @arr) . "'"; | |
1895 | .Ve | |
1896 | .Sp | |
1897 | All operations above are performed simultaneously, left to right. | |
1898 | .Sp | |
1899 | Because the result of \f(CW"\eQ STRING \eE"\fR has all metacharacters | |
1900 | quoted, there is no way to insert a literal \f(CW\*(C`$\*(C'\fR or \f(CW\*(C`@\*(C'\fR inside a | |
1901 | \&\f(CW\*(C`\eQ\eE\*(C'\fR pair. If protected by \f(CW\*(C`\e\*(C'\fR, \f(CW\*(C`$\*(C'\fR will be quoted to became | |
1902 | \&\f(CW"\e\e\e$"\fR; if not, it is interpreted as the start of an interpolated | |
1903 | scalar. | |
1904 | .Sp | |
1905 | Note also that the interpolation code needs to make a decision on | |
1906 | where the interpolated scalar ends. For instance, whether | |
1907 | \&\f(CW"a $b \-> {c}"\fR really means: | |
1908 | .Sp | |
1909 | .Vb 1 | |
1910 | \& "a " . $b . " -> {c}"; | |
1911 | .Ve | |
1912 | .Sp | |
1913 | or: | |
1914 | .Sp | |
1915 | .Vb 1 | |
1916 | \& "a " . $b -> {c}; | |
1917 | .Ve | |
1918 | .Sp | |
1919 | Most of the time, the longest possible text that does not include | |
1920 | spaces between components and which contains matching braces or | |
1921 | brackets. because the outcome may be determined by voting based | |
1922 | on heuristic estimators, the result is not strictly predictable. | |
1923 | Fortunately, it's usually correct for ambiguous cases. | |
1924 | .ie n .IP """?RE?""\fR, \f(CW""/RE/""\fR, \f(CW""m/RE/""\fR, \f(CW""s/RE/foo/""," 4 | |
1925 | .el .IP "\f(CW?RE?\fR, \f(CW/RE/\fR, \f(CWm/RE/\fR, \f(CWs/RE/foo/\fR," 4 | |
1926 | .IX Item "?RE?, /RE/, m/RE/, s/RE/foo/," | |
1927 | Processing of \f(CW\*(C`\eQ\*(C'\fR, \f(CW\*(C`\eU\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\el\*(C'\fR, and interpolation | |
1928 | happens (almost) as with \f(CW\*(C`qq//\*(C'\fR constructs, but the substitution | |
1929 | of \f(CW\*(C`\e\*(C'\fR followed by RE-special chars (including \f(CW\*(C`\e\*(C'\fR) is not | |
1930 | performed. Moreover, inside \f(CW\*(C`(?{BLOCK})\*(C'\fR, \f(CW\*(C`(?# comment )\*(C'\fR, and | |
1931 | a \f(CW\*(C`#\*(C'\fR\-comment in a \f(CW\*(C`//x\*(C'\fR\-regular expression, no processing is | |
1932 | performed whatsoever. This is the first step at which the presence | |
1933 | of the \f(CW\*(C`//x\*(C'\fR modifier is relevant. | |
1934 | .Sp | |
1935 | Interpolation has several quirks: \f(CW$|\fR, \f(CW$(\fR, and \f(CW$)\fR are not | |
1936 | interpolated, and constructs \f(CW$var[SOMETHING]\fR are voted (by several | |
1937 | different estimators) to be either an array element or \f(CW$var\fR | |
1938 | followed by an \s-1RE\s0 alternative. This is where the notation | |
1939 | \&\f(CW\*(C`${arr[$bar]}\*(C'\fR comes handy: \f(CW\*(C`/${arr[0\-9]}/\*(C'\fR is interpreted as | |
1940 | array element \f(CW\*(C`\-9\*(C'\fR, not as a regular expression from the variable | |
1941 | \&\f(CW$arr\fR followed by a digit, which would be the interpretation of | |
1942 | \&\f(CW\*(C`/$arr[0\-9]/\*(C'\fR. Since voting among different estimators may occur, | |
1943 | the result is not predictable. | |
1944 | .Sp | |
1945 | It is at this step that \f(CW\*(C`\e1\*(C'\fR is begrudgingly converted to \f(CW$1\fR in | |
1946 | the replacement text of \f(CW\*(C`s///\*(C'\fR to correct the incorrigible | |
1947 | \&\fIsed\fR hackers who haven't picked up the saner idiom yet. A warning | |
1948 | is emitted if the \f(CW\*(C`use warnings\*(C'\fR pragma or the \fB\-w\fR command-line flag | |
1949 | (that is, the \f(CW$^W\fR variable) was set. | |
1950 | .Sp | |
1951 | The lack of processing of \f(CW\*(C`\e\e\*(C'\fR creates specific restrictions on | |
1952 | the post-processed text. If the delimiter is \f(CW\*(C`/\*(C'\fR, one cannot get | |
1953 | the combination \f(CW\*(C`\e/\*(C'\fR into the result of this step. \f(CW\*(C`/\*(C'\fR will | |
1954 | finish the regular expression, \f(CW\*(C`\e/\*(C'\fR will be stripped to \f(CW\*(C`/\*(C'\fR on | |
1955 | the previous step, and \f(CW\*(C`\e\e/\*(C'\fR will be left as is. Because \f(CW\*(C`/\*(C'\fR is | |
1956 | equivalent to \f(CW\*(C`\e/\*(C'\fR inside a regular expression, this does not | |
1957 | matter unless the delimiter happens to be character special to the | |
1958 | \&\s-1RE\s0 engine, such as in \f(CW\*(C`s*foo*bar*\*(C'\fR, \f(CW\*(C`m[foo]\*(C'\fR, or \f(CW\*(C`?foo?\*(C'\fR; or an | |
1959 | alphanumeric char, as in: | |
1960 | .Sp | |
1961 | .Vb 1 | |
1962 | \& m m ^ a \es* b mmx; | |
1963 | .Ve | |
1964 | .Sp | |
1965 | In the \s-1RE\s0 above, which is intentionally obfuscated for illustration, the | |
1966 | delimiter is \f(CW\*(C`m\*(C'\fR, the modifier is \f(CW\*(C`mx\*(C'\fR, and after backslash-removal the | |
1967 | \&\s-1RE\s0 is the same as for \f(CW\*(C`m/ ^ a s* b /mx\*(C'\fR). There's more than one | |
1968 | reason you're encouraged to restrict your delimiters to non\-alphanumeric, | |
1969 | non-whitespace choices. | |
1970 | .RE | |
1971 | .RS 4 | |
1972 | .Sp | |
1973 | This step is the last one for all constructs except regular expressions, | |
1974 | which are processed further. | |
1975 | .RE | |
1976 | .IP "Interpolation of regular expressions" 4 | |
1977 | .IX Item "Interpolation of regular expressions" | |
1978 | Previous steps were performed during the compilation of Perl code, | |
1979 | but this one happens at run time\*(--although it may be optimized to | |
1980 | be calculated at compile time if appropriate. After preprocessing | |
1981 | described above, and possibly after evaluation if catenation, | |
1982 | joining, casing translation, or metaquoting are involved, the | |
1983 | resulting \fIstring\fR is passed to the \s-1RE\s0 engine for compilation. | |
1984 | .Sp | |
1985 | Whatever happens in the \s-1RE\s0 engine might be better discussed in perlre, | |
1986 | but for the sake of continuity, we shall do so here. | |
1987 | .Sp | |
1988 | This is another step where the presence of the \f(CW\*(C`//x\*(C'\fR modifier is | |
1989 | relevant. The \s-1RE\s0 engine scans the string from left to right and | |
1990 | converts it to a finite automaton. | |
1991 | .Sp | |
1992 | Backslashed characters are either replaced with corresponding | |
1993 | literal strings (as with \f(CW\*(C`\e{\*(C'\fR), or else they generate special nodes | |
1994 | in the finite automaton (as with \f(CW\*(C`\eb\*(C'\fR). Characters special to the | |
1995 | \&\s-1RE\s0 engine (such as \f(CW\*(C`|\*(C'\fR) generate corresponding nodes or groups of | |
1996 | nodes. \f(CW\*(C`(?#...)\*(C'\fR comments are ignored. All the rest is either | |
1997 | converted to literal strings to match, or else is ignored (as is | |
1998 | whitespace and \f(CW\*(C`#\*(C'\fR\-style comments if \f(CW\*(C`//x\*(C'\fR is present). | |
1999 | .Sp | |
2000 | Parsing of the bracketed character class construct, \f(CW\*(C`[...]\*(C'\fR, is | |
2001 | rather different than the rule used for the rest of the pattern. | |
2002 | The terminator of this construct is found using the same rules as | |
2003 | for finding the terminator of a \f(CW\*(C`{}\*(C'\fR\-delimited construct, the only | |
2004 | exception being that \f(CW\*(C`]\*(C'\fR immediately following \f(CW\*(C`[\*(C'\fR is treated as | |
2005 | though preceded by a backslash. Similarly, the terminator of | |
2006 | \&\f(CW\*(C`(?{...})\*(C'\fR is found using the same rules as for finding the | |
2007 | terminator of a \f(CW\*(C`{}\*(C'\fR\-delimited construct. | |
2008 | .Sp | |
2009 | It is possible to inspect both the string given to \s-1RE\s0 engine and the | |
2010 | resulting finite automaton. See the arguments \f(CW\*(C`debug\*(C'\fR/\f(CW\*(C`debugcolor\*(C'\fR | |
2011 | in the \f(CW\*(C`use re\*(C'\fR pragma, as well as Perl's \fB\-Dr\fR command-line | |
2012 | switch documented in \*(L"Command Switches\*(R" in perlrun. | |
2013 | .IP "Optimization of regular expressions" 4 | |
2014 | .IX Item "Optimization of regular expressions" | |
2015 | This step is listed for completeness only. Since it does not change | |
2016 | semantics, details of this step are not documented and are subject | |
2017 | to change without notice. This step is performed over the finite | |
2018 | automaton that was generated during the previous pass. | |
2019 | .Sp | |
2020 | It is at this stage that \f(CW\*(C`split()\*(C'\fR silently optimizes \f(CW\*(C`/^/\*(C'\fR to | |
2021 | mean \f(CW\*(C`/^/m\*(C'\fR. | |
2022 | .Sh "I/O Operators" | |
2023 | .IX Subsection "I/O Operators" | |
2024 | There are several I/O operators you should know about. | |
2025 | .PP | |
2026 | A string enclosed by backticks (grave accents) first undergoes | |
2027 | double-quote interpolation. It is then interpreted as an external | |
2028 | command, and the output of that command is the value of the | |
2029 | backtick string, like in a shell. In scalar context, a single string | |
2030 | consisting of all output is returned. In list context, a list of | |
2031 | values is returned, one per line of output. (You can set \f(CW$/\fR to use | |
2032 | a different line terminator.) The command is executed each time the | |
2033 | pseudo-literal is evaluated. The status value of the command is | |
2034 | returned in \f(CW$?\fR (see perlvar for the interpretation of \f(CW$?\fR). | |
2035 | Unlike in \fBcsh\fR, no translation is done on the return data\*(--newlines | |
2036 | remain newlines. Unlike in any of the shells, single quotes do not | |
2037 | hide variable names in the command from interpretation. To pass a | |
2038 | literal dollar-sign through to the shell you need to hide it with a | |
2039 | backslash. The generalized form of backticks is \f(CW\*(C`qx//\*(C'\fR. (Because | |
2040 | backticks always undergo shell expansion as well, see perlsec for | |
2041 | security concerns.) | |
2042 | .PP | |
2043 | In scalar context, evaluating a filehandle in angle brackets yields | |
2044 | the next line from that file (the newline, if any, included), or | |
2045 | \&\f(CW\*(C`undef\*(C'\fR at end-of-file or on error. When \f(CW$/\fR is set to \f(CW\*(C`undef\*(C'\fR | |
2046 | (sometimes known as file-slurp mode) and the file is empty, it | |
2047 | returns \f(CW''\fR the first time, followed by \f(CW\*(C`undef\*(C'\fR subsequently. | |
2048 | .PP | |
2049 | Ordinarily you must assign the returned value to a variable, but | |
2050 | there is one situation where an automatic assignment happens. If | |
2051 | and only if the input symbol is the only thing inside the conditional | |
2052 | of a \f(CW\*(C`while\*(C'\fR statement (even if disguised as a \f(CW\*(C`for(;;)\*(C'\fR loop), | |
2053 | the value is automatically assigned to the global variable \f(CW$_\fR, | |
2054 | destroying whatever was there previously. (This may seem like an | |
2055 | odd thing to you, but you'll use the construct in almost every Perl | |
2056 | script you write.) The \f(CW$_\fR variable is not implicitly localized. | |
2057 | You'll have to put a \f(CW\*(C`local $_;\*(C'\fR before the loop if you want that | |
2058 | to happen. | |
2059 | .PP | |
2060 | The following lines are equivalent: | |
2061 | .PP | |
2062 | .Vb 7 | |
2063 | \& while (defined($_ = <STDIN>)) { print; } | |
2064 | \& while ($_ = <STDIN>) { print; } | |
2065 | \& while (<STDIN>) { print; } | |
2066 | \& for (;<STDIN>;) { print; } | |
2067 | \& print while defined($_ = <STDIN>); | |
2068 | \& print while ($_ = <STDIN>); | |
2069 | \& print while <STDIN>; | |
2070 | .Ve | |
2071 | .PP | |
2072 | This also behaves similarly, but avoids \f(CW$_\fR : | |
2073 | .PP | |
2074 | .Vb 1 | |
2075 | \& while (my $line = <STDIN>) { print $line } | |
2076 | .Ve | |
2077 | .PP | |
2078 | In these loop constructs, the assigned value (whether assignment | |
2079 | is automatic or explicit) is then tested to see whether it is | |
2080 | defined. The defined test avoids problems where line has a string | |
2081 | value that would be treated as false by Perl, for example a "\*(L" or | |
2082 | a \*(R"0" with no trailing newline. If you really mean for such values | |
2083 | to terminate the loop, they should be tested for explicitly: | |
2084 | .PP | |
2085 | .Vb 2 | |
2086 | \& while (($_ = <STDIN>) ne '0') { ... } | |
2087 | \& while (<STDIN>) { last unless $_; ... } | |
2088 | .Ve | |
2089 | .PP | |
2090 | In other boolean contexts, \f(CW\*(C`<\f(CIfilehandle\f(CW>\*(C'\fR without an | |
2091 | explicit \f(CW\*(C`defined\*(C'\fR test or comparison elicit a warning if the | |
2092 | \&\f(CW\*(C`use warnings\*(C'\fR pragma or the \fB\-w\fR | |
2093 | command-line switch (the \f(CW$^W\fR variable) is in effect. | |
2094 | .PP | |
2095 | The filehandles \s-1STDIN\s0, \s-1STDOUT\s0, and \s-1STDERR\s0 are predefined. (The | |
2096 | filehandles \f(CW\*(C`stdin\*(C'\fR, \f(CW\*(C`stdout\*(C'\fR, and \f(CW\*(C`stderr\*(C'\fR will also work except | |
2097 | in packages, where they would be interpreted as local identifiers | |
2098 | rather than global.) Additional filehandles may be created with | |
2099 | the \fIopen()\fR function, amongst others. See perlopentut and | |
2100 | \&\*(L"open\*(R" in perlfunc for details on this. | |
2101 | .PP | |
2102 | If a <\s-1FILEHANDLE\s0> is used in a context that is looking for | |
2103 | a list, a list comprising all input lines is returned, one line per | |
2104 | list element. It's easy to grow to a rather large data space this | |
2105 | way, so use with care. | |
2106 | .PP | |
2107 | <\s-1FILEHANDLE\s0> may also be spelled \f(CW\*(C`readline(*FILEHANDLE)\*(C'\fR. | |
2108 | See \*(L"readline\*(R" in perlfunc. | |
2109 | .PP | |
2110 | The null filehandle <> is special: it can be used to emulate the | |
2111 | behavior of \fBsed\fR and \fBawk\fR. Input from <> comes either from | |
2112 | standard input, or from each file listed on the command line. Here's | |
2113 | how it works: the first time <> is evaluated, the \f(CW@ARGV\fR array is | |
2114 | checked, and if it is empty, \f(CW$ARGV[0]\fR is set to \*(L"\-\*(R", which when opened | |
2115 | gives you standard input. The \f(CW@ARGV\fR array is then processed as a list | |
2116 | of filenames. The loop | |
2117 | .PP | |
2118 | .Vb 3 | |
2119 | \& while (<>) { | |
2120 | \& ... # code for each line | |
2121 | \& } | |
2122 | .Ve | |
2123 | .PP | |
2124 | is equivalent to the following Perl-like pseudo code: | |
2125 | .PP | |
2126 | .Vb 7 | |
2127 | \& unshift(@ARGV, '-') unless @ARGV; | |
2128 | \& while ($ARGV = shift) { | |
2129 | \& open(ARGV, $ARGV); | |
2130 | \& while (<ARGV>) { | |
2131 | \& ... # code for each line | |
2132 | \& } | |
2133 | \& } | |
2134 | .Ve | |
2135 | .PP | |
2136 | except that it isn't so cumbersome to say, and will actually work. | |
2137 | It really does shift the \f(CW@ARGV\fR array and put the current filename | |
2138 | into the \f(CW$ARGV\fR variable. It also uses filehandle \fI\s-1ARGV\s0\fR | |
2139 | internally\-\-<> is just a synonym for <\s-1ARGV\s0>, which | |
2140 | is magical. (The pseudo code above doesn't work because it treats | |
2141 | <\s-1ARGV\s0> as non\-magical.) | |
2142 | .PP | |
2143 | You can modify \f(CW@ARGV\fR before the first <> as long as the array ends up | |
2144 | containing the list of filenames you really want. Line numbers (\f(CW$.\fR) | |
2145 | continue as though the input were one big happy file. See the example | |
2146 | in \*(L"eof\*(R" in perlfunc for how to reset line numbers on each file. | |
2147 | .PP | |
2148 | If you want to set \f(CW@ARGV\fR to your own list of files, go right ahead. | |
2149 | This sets \f(CW@ARGV\fR to all plain text files if no \f(CW@ARGV\fR was given: | |
2150 | .PP | |
2151 | .Vb 1 | |
2152 | \& @ARGV = grep { -f && -T } glob('*') unless @ARGV; | |
2153 | .Ve | |
2154 | .PP | |
2155 | You can even set them to pipe commands. For example, this automatically | |
2156 | filters compressed arguments through \fBgzip\fR: | |
2157 | .PP | |
2158 | .Vb 1 | |
2159 | \& @ARGV = map { /\e.(gz|Z)$/ ? "gzip -dc < $_ |" : $_ } @ARGV; | |
2160 | .Ve | |
2161 | .PP | |
2162 | If you want to pass switches into your script, you can use one of the | |
2163 | Getopts modules or put a loop on the front like this: | |
2164 | .PP | |
2165 | .Vb 7 | |
2166 | \& while ($_ = $ARGV[0], /^-/) { | |
2167 | \& shift; | |
2168 | \& last if /^--$/; | |
2169 | \& if (/^-D(.*)/) { $debug = $1 } | |
2170 | \& if (/^-v/) { $verbose++ } | |
2171 | \& # ... # other switches | |
2172 | \& } | |
2173 | .Ve | |
2174 | .PP | |
2175 | .Vb 3 | |
2176 | \& while (<>) { | |
2177 | \& # ... # code for each line | |
2178 | \& } | |
2179 | .Ve | |
2180 | .PP | |
2181 | The <> symbol will return \f(CW\*(C`undef\*(C'\fR for end-of-file only once. | |
2182 | If you call it again after this, it will assume you are processing another | |
2183 | \&\f(CW@ARGV\fR list, and if you haven't set \f(CW@ARGV\fR, will read input from \s-1STDIN\s0. | |
2184 | .PP | |
2185 | If what the angle brackets contain is a simple scalar variable (e.g., | |
2186 | <$foo>), then that variable contains the name of the | |
2187 | filehandle to input from, or its typeglob, or a reference to the | |
2188 | same. For example: | |
2189 | .PP | |
2190 | .Vb 2 | |
2191 | \& $fh = \e*STDIN; | |
2192 | \& $line = <$fh>; | |
2193 | .Ve | |
2194 | .PP | |
2195 | If what's within the angle brackets is neither a filehandle nor a simple | |
2196 | scalar variable containing a filehandle name, typeglob, or typeglob | |
2197 | reference, it is interpreted as a filename pattern to be globbed, and | |
2198 | either a list of filenames or the next filename in the list is returned, | |
2199 | depending on context. This distinction is determined on syntactic | |
2200 | grounds alone. That means \f(CW\*(C`<$x>\*(C'\fR is always a \fIreadline()\fR from | |
2201 | an indirect handle, but \f(CW\*(C`<$hash{key}>\*(C'\fR is always a \fIglob()\fR. | |
2202 | That's because \f(CW$x\fR is a simple scalar variable, but \f(CW$hash{key}\fR is | |
2203 | not\*(--it's a hash element. | |
2204 | .PP | |
2205 | One level of double-quote interpretation is done first, but you can't | |
2206 | say \f(CW\*(C`<$foo>\*(C'\fR because that's an indirect filehandle as explained | |
2207 | in the previous paragraph. (In older versions of Perl, programmers | |
2208 | would insert curly brackets to force interpretation as a filename glob: | |
2209 | \&\f(CW\*(C`<${foo}>\*(C'\fR. These days, it's considered cleaner to call the | |
2210 | internal function directly as \f(CW\*(C`glob($foo)\*(C'\fR, which is probably the right | |
2211 | way to have done it in the first place.) For example: | |
2212 | .PP | |
2213 | .Vb 3 | |
2214 | \& while (<*.c>) { | |
2215 | \& chmod 0644, $_; | |
2216 | \& } | |
2217 | .Ve | |
2218 | .PP | |
2219 | is roughly equivalent to: | |
2220 | .PP | |
2221 | .Vb 5 | |
2222 | \& open(FOO, "echo *.c | tr -s ' \et\er\ef' '\e\e012\e\e012\e\e012\e\e012'|"); | |
2223 | \& while (<FOO>) { | |
2224 | \& chomp; | |
2225 | \& chmod 0644, $_; | |
2226 | \& } | |
2227 | .Ve | |
2228 | .PP | |
2229 | except that the globbing is actually done internally using the standard | |
2230 | \&\f(CW\*(C`File::Glob\*(C'\fR extension. Of course, the shortest way to do the above is: | |
2231 | .PP | |
2232 | .Vb 1 | |
2233 | \& chmod 0644, <*.c>; | |
2234 | .Ve | |
2235 | .PP | |
2236 | A (file)glob evaluates its (embedded) argument only when it is | |
2237 | starting a new list. All values must be read before it will start | |
2238 | over. In list context, this isn't important because you automatically | |
2239 | get them all anyway. However, in scalar context the operator returns | |
2240 | the next value each time it's called, or \f(CW\*(C`undef\*(C'\fR when the list has | |
2241 | run out. As with filehandle reads, an automatic \f(CW\*(C`defined\*(C'\fR is | |
2242 | generated when the glob occurs in the test part of a \f(CW\*(C`while\*(C'\fR, | |
2243 | because legal glob returns (e.g. a file called \fI0\fR) would otherwise | |
2244 | terminate the loop. Again, \f(CW\*(C`undef\*(C'\fR is returned only once. So if | |
2245 | you're expecting a single value from a glob, it is much better to | |
2246 | say | |
2247 | .PP | |
2248 | .Vb 1 | |
2249 | \& ($file) = <blurch*>; | |
2250 | .Ve | |
2251 | .PP | |
2252 | than | |
2253 | .PP | |
2254 | .Vb 1 | |
2255 | \& $file = <blurch*>; | |
2256 | .Ve | |
2257 | .PP | |
2258 | because the latter will alternate between returning a filename and | |
2259 | returning false. | |
2260 | .PP | |
2261 | If you're trying to do variable interpolation, it's definitely better | |
2262 | to use the \fIglob()\fR function, because the older notation can cause people | |
2263 | to become confused with the indirect filehandle notation. | |
2264 | .PP | |
2265 | .Vb 2 | |
2266 | \& @files = glob("$dir/*.[ch]"); | |
2267 | \& @files = glob($files[$i]); | |
2268 | .Ve | |
2269 | .Sh "Constant Folding" | |
2270 | .IX Subsection "Constant Folding" | |
2271 | Like C, Perl does a certain amount of expression evaluation at | |
2272 | compile time whenever it determines that all arguments to an | |
2273 | operator are static and have no side effects. In particular, string | |
2274 | concatenation happens at compile time between literals that don't do | |
2275 | variable substitution. Backslash interpolation also happens at | |
2276 | compile time. You can say | |
2277 | .PP | |
2278 | .Vb 2 | |
2279 | \& 'Now is the time for all' . "\en" . | |
2280 | \& 'good men to come to.' | |
2281 | .Ve | |
2282 | .PP | |
2283 | and this all reduces to one string internally. Likewise, if | |
2284 | you say | |
2285 | .PP | |
2286 | .Vb 3 | |
2287 | \& foreach $file (@filenames) { | |
2288 | \& if (-s $file > 5 + 100 * 2**16) { } | |
2289 | \& } | |
2290 | .Ve | |
2291 | .PP | |
2292 | the compiler will precompute the number which that expression | |
2293 | represents so that the interpreter won't have to. | |
2294 | .Sh "Bitwise String Operators" | |
2295 | .IX Subsection "Bitwise String Operators" | |
2296 | Bitstrings of any size may be manipulated by the bitwise operators | |
2297 | (\f(CW\*(C`~ | & ^\*(C'\fR). | |
2298 | .PP | |
2299 | If the operands to a binary bitwise op are strings of different | |
2300 | sizes, \fB|\fR and \fB^\fR ops act as though the shorter operand had | |
2301 | additional zero bits on the right, while the \fB&\fR op acts as though | |
2302 | the longer operand were truncated to the length of the shorter. | |
2303 | The granularity for such extension or truncation is one or more | |
2304 | bytes. | |
2305 | .PP | |
2306 | .Vb 5 | |
2307 | \& # ASCII-based examples | |
2308 | \& print "j p \en" ^ " a h"; # prints "JAPH\en" | |
2309 | \& print "JA" | " ph\en"; # prints "japh\en" | |
2310 | \& print "japh\enJunk" & '_____'; # prints "JAPH\en"; | |
2311 | \& print 'p N$' ^ " E<H\en"; # prints "Perl\en"; | |
2312 | .Ve | |
2313 | .PP | |
2314 | If you are intending to manipulate bitstrings, be certain that | |
2315 | you're supplying bitstrings: If an operand is a number, that will imply | |
2316 | a \fBnumeric\fR bitwise operation. You may explicitly show which type of | |
2317 | operation you intend by using \f(CW""\fR or \f(CW\*(C`0+\*(C'\fR, as in the examples below. | |
2318 | .PP | |
2319 | .Vb 4 | |
2320 | \& $foo = 150 | 105 ; # yields 255 (0x96 | 0x69 is 0xFF) | |
2321 | \& $foo = '150' | 105 ; # yields 255 | |
2322 | \& $foo = 150 | '105'; # yields 255 | |
2323 | \& $foo = '150' | '105'; # yields string '155' (under ASCII) | |
2324 | .Ve | |
2325 | .PP | |
2326 | .Vb 2 | |
2327 | \& $baz = 0+$foo & 0+$bar; # both ops explicitly numeric | |
2328 | \& $biz = "$foo" ^ "$bar"; # both ops explicitly stringy | |
2329 | .Ve | |
2330 | .PP | |
2331 | See \*(L"vec\*(R" in perlfunc for information on how to manipulate individual bits | |
2332 | in a bit vector. | |
2333 | .Sh "Integer Arithmetic" | |
2334 | .IX Subsection "Integer Arithmetic" | |
2335 | By default, Perl assumes that it must do most of its arithmetic in | |
2336 | floating point. But by saying | |
2337 | .PP | |
2338 | .Vb 1 | |
2339 | \& use integer; | |
2340 | .Ve | |
2341 | .PP | |
2342 | you may tell the compiler that it's okay to use integer operations | |
2343 | (if it feels like it) from here to the end of the enclosing \s-1BLOCK\s0. | |
2344 | An inner \s-1BLOCK\s0 may countermand this by saying | |
2345 | .PP | |
2346 | .Vb 1 | |
2347 | \& no integer; | |
2348 | .Ve | |
2349 | .PP | |
2350 | which lasts until the end of that \s-1BLOCK\s0. Note that this doesn't | |
2351 | mean everything is only an integer, merely that Perl may use integer | |
2352 | operations if it is so inclined. For example, even under \f(CW\*(C`use | |
2353 | integer\*(C'\fR, if you take the \f(CWsqrt(2)\fR, you'll still get \f(CW1.4142135623731\fR | |
2354 | or so. | |
2355 | .PP | |
2356 | Used on numbers, the bitwise operators (\*(L"&\*(R", \*(L"|\*(R", \*(L"^\*(R", \*(L"~\*(R", \*(L"<<\*(R", | |
2357 | and \*(L">>\*(R") always produce integral results. (But see also | |
2358 | \&\*(L"Bitwise String Operators\*(R".) However, \f(CW\*(C`use integer\*(C'\fR still has meaning for | |
2359 | them. By default, their results are interpreted as unsigned integers, but | |
2360 | if \f(CW\*(C`use integer\*(C'\fR is in effect, their results are interpreted | |
2361 | as signed integers. For example, \f(CW\*(C`~0\*(C'\fR usually evaluates to a large | |
2362 | integral value. However, \f(CW\*(C`use integer; ~0\*(C'\fR is \f(CW\*(C`\-1\*(C'\fR on twos-complement | |
2363 | machines. | |
2364 | .Sh "Floating-point Arithmetic" | |
2365 | .IX Subsection "Floating-point Arithmetic" | |
2366 | While \f(CW\*(C`use integer\*(C'\fR provides integer-only arithmetic, there is no | |
2367 | analogous mechanism to provide automatic rounding or truncation to a | |
2368 | certain number of decimal places. For rounding to a certain number | |
2369 | of digits, \fIsprintf()\fR or \fIprintf()\fR is usually the easiest route. | |
2370 | See perlfaq4. | |
2371 | .PP | |
2372 | Floating-point numbers are only approximations to what a mathematician | |
2373 | would call real numbers. There are infinitely more reals than floats, | |
2374 | so some corners must be cut. For example: | |
2375 | .PP | |
2376 | .Vb 2 | |
2377 | \& printf "%.20g\en", 123456789123456789; | |
2378 | \& # produces 123456789123456784 | |
2379 | .Ve | |
2380 | .PP | |
2381 | Testing for exact equality of floating-point equality or inequality is | |
2382 | not a good idea. Here's a (relatively expensive) work-around to compare | |
2383 | whether two floating-point numbers are equal to a particular number of | |
2384 | decimal places. See Knuth, volume \s-1II\s0, for a more robust treatment of | |
2385 | this topic. | |
2386 | .PP | |
2387 | .Vb 7 | |
2388 | \& sub fp_equal { | |
2389 | \& my ($X, $Y, $POINTS) = @_; | |
2390 | \& my ($tX, $tY); | |
2391 | \& $tX = sprintf("%.${POINTS}g", $X); | |
2392 | \& $tY = sprintf("%.${POINTS}g", $Y); | |
2393 | \& return $tX eq $tY; | |
2394 | \& } | |
2395 | .Ve | |
2396 | .PP | |
2397 | The \s-1POSIX\s0 module (part of the standard perl distribution) implements | |
2398 | \&\fIceil()\fR, \fIfloor()\fR, and other mathematical and trigonometric functions. | |
2399 | The Math::Complex module (part of the standard perl distribution) | |
2400 | defines mathematical functions that work on both the reals and the | |
2401 | imaginary numbers. Math::Complex not as efficient as \s-1POSIX\s0, but | |
2402 | \&\s-1POSIX\s0 can't work with complex numbers. | |
2403 | .PP | |
2404 | Rounding in financial applications can have serious implications, and | |
2405 | the rounding method used should be specified precisely. In these | |
2406 | cases, it probably pays not to trust whichever system rounding is | |
2407 | being used by Perl, but to instead implement the rounding function you | |
2408 | need yourself. | |
2409 | .Sh "Bigger Numbers" | |
2410 | .IX Subsection "Bigger Numbers" | |
2411 | The standard Math::BigInt and Math::BigFloat modules provide | |
2412 | variable-precision arithmetic and overloaded operators, although | |
2413 | they're currently pretty slow. At the cost of some space and | |
2414 | considerable speed, they avoid the normal pitfalls associated with | |
2415 | limited-precision representations. | |
2416 | .PP | |
2417 | .Vb 3 | |
2418 | \& use Math::BigInt; | |
2419 | \& $x = Math::BigInt->new('123456789123456789'); | |
2420 | \& print $x * $x; | |
2421 | .Ve | |
2422 | .PP | |
2423 | .Vb 1 | |
2424 | \& # prints +15241578780673678515622620750190521 | |
2425 | .Ve | |
2426 | .PP | |
2427 | There are several modules that let you calculate with (bound only by | |
2428 | memory and cpu\-time) unlimited or fixed precision. There are also | |
2429 | some non-standard modules that provide faster implementations via | |
2430 | external C libraries. | |
2431 | .PP | |
2432 | Here is a short, but incomplete summary: | |
2433 | .PP | |
2434 | .Vb 11 | |
2435 | \& Math::Fraction big, unlimited fractions like 9973 / 12967 | |
2436 | \& Math::String treat string sequences like numbers | |
2437 | \& Math::FixedPrecision calculate with a fixed precision | |
2438 | \& Math::Currency for currency calculations | |
2439 | \& Bit::Vector manipulate bit vectors fast (uses C) | |
2440 | \& Math::BigIntFast Bit::Vector wrapper for big numbers | |
2441 | \& Math::Pari provides access to the Pari C library | |
2442 | \& Math::BigInteger uses an external C library | |
2443 | \& Math::Cephes uses external Cephes C library (no big numbers) | |
2444 | \& Math::Cephes::Fraction fractions via the Cephes library | |
2445 | \& Math::GMP another one using an external C library | |
2446 | .Ve | |
2447 | .PP | |
2448 | Choose wisely. |