Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLOP 1" | |
132 | .TH PERLOP 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | .IX Xref "operator" | |
135 | perlop \- Perl operators and precedence | |
136 | .SH "DESCRIPTION" | |
137 | .IX Header "DESCRIPTION" | |
138 | .Sh "Operator Precedence and Associativity" | |
139 | .IX Xref "operator, precedence precedence associativity" | |
140 | .IX Subsection "Operator Precedence and Associativity" | |
141 | Operator precedence and associativity work in Perl more or less like | |
142 | they do in mathematics. | |
143 | .PP | |
144 | \&\fIOperator precedence\fR means some operators are evaluated before | |
145 | others. For example, in \f(CW\*(C`2 + 4 * 5\*(C'\fR, the multiplication has higher | |
146 | precedence so \f(CW\*(C`4 * 5\*(C'\fR is evaluated first yielding \f(CW\*(C`2 + 20 == | |
147 | 22\*(C'\fR and not \f(CW\*(C`6 * 5 == 30\*(C'\fR. | |
148 | .PP | |
149 | \&\fIOperator associativity\fR defines what happens if a sequence of the | |
150 | same operators is used one after another: whether the evaluator will | |
151 | evaluate the left operations first or the right. For example, in \f(CW\*(C`8 | |
152 | \&\- 4 \- 2\*(C'\fR, subtraction is left associative so Perl evaluates the | |
153 | expression left to right. \f(CW\*(C`8 \- 4\*(C'\fR is evaluated first making the | |
154 | expression \f(CW\*(C`4 \- 2 == 2\*(C'\fR and not \f(CW\*(C`8 \- 2 == 6\*(C'\fR. | |
155 | .PP | |
156 | Perl operators have the following associativity and precedence, | |
157 | listed from highest precedence to lowest. Operators borrowed from | |
158 | C keep the same precedence relationship with each other, even where | |
159 | C's precedence is slightly screwy. (This makes learning Perl easier | |
160 | for C folks.) With very few exceptions, these all operate on scalar | |
161 | values only, not array values. | |
162 | .PP | |
163 | .Vb 24 | |
164 | \& left terms and list operators (leftward) | |
165 | \& left -> | |
166 | \& nonassoc ++ -- | |
167 | \& right ** | |
168 | \& right ! ~ \e and unary + and - | |
169 | \& left =~ !~ | |
170 | \& left * / % x | |
171 | \& left + - . | |
172 | \& left << >> | |
173 | \& nonassoc named unary operators | |
174 | \& nonassoc < > <= >= lt gt le ge | |
175 | \& nonassoc == != <=> eq ne cmp | |
176 | \& left & | |
177 | \& left | ^ | |
178 | \& left && | |
179 | \& left || | |
180 | \& nonassoc .. ... | |
181 | \& right ?: | |
182 | \& right = += -= *= etc. | |
183 | \& left , => | |
184 | \& nonassoc list operators (rightward) | |
185 | \& right not | |
186 | \& left and | |
187 | \& left or xor | |
188 | .Ve | |
189 | .PP | |
190 | In the following sections, these operators are covered in precedence order. | |
191 | .PP | |
192 | Many operators can be overloaded for objects. See overload. | |
193 | .Sh "Terms and List Operators (Leftward)" | |
194 | .IX Xref "list operator operator, list term" | |
195 | .IX Subsection "Terms and List Operators (Leftward)" | |
196 | A \s-1TERM\s0 has the highest precedence in Perl. They include variables, | |
197 | quote and quote-like operators, any expression in parentheses, | |
198 | and any function whose arguments are parenthesized. Actually, there | |
199 | aren't really functions in this sense, just list operators and unary | |
200 | operators behaving as functions because you put parentheses around | |
201 | the arguments. These are all documented in perlfunc. | |
202 | .PP | |
203 | If any list operator (\fIprint()\fR, etc.) or any unary operator (\fIchdir()\fR, etc.) | |
204 | is followed by a left parenthesis as the next token, the operator and | |
205 | arguments within parentheses are taken to be of highest precedence, | |
206 | just like a normal function call. | |
207 | .PP | |
208 | In the absence of parentheses, the precedence of list operators such as | |
209 | \&\f(CW\*(C`print\*(C'\fR, \f(CW\*(C`sort\*(C'\fR, or \f(CW\*(C`chmod\*(C'\fR is either very high or very low depending on | |
210 | whether you are looking at the left side or the right side of the operator. | |
211 | For example, in | |
212 | .PP | |
213 | .Vb 2 | |
214 | \& @ary = (1, 3, sort 4, 2); | |
215 | \& print @ary; # prints 1324 | |
216 | .Ve | |
217 | .PP | |
218 | the commas on the right of the sort are evaluated before the sort, | |
219 | but the commas on the left are evaluated after. In other words, | |
220 | list operators tend to gobble up all arguments that follow, and | |
221 | then act like a simple \s-1TERM\s0 with regard to the preceding expression. | |
222 | Be careful with parentheses: | |
223 | .PP | |
224 | .Vb 3 | |
225 | \& # These evaluate exit before doing the print: | |
226 | \& print($foo, exit); # Obviously not what you want. | |
227 | \& print $foo, exit; # Nor is this. | |
228 | .Ve | |
229 | .PP | |
230 | .Vb 4 | |
231 | \& # These do the print before evaluating exit: | |
232 | \& (print $foo), exit; # This is what you want. | |
233 | \& print($foo), exit; # Or this. | |
234 | \& print ($foo), exit; # Or even this. | |
235 | .Ve | |
236 | .PP | |
237 | Also note that | |
238 | .PP | |
239 | .Vb 1 | |
240 | \& print ($foo & 255) + 1, "\en"; | |
241 | .Ve | |
242 | .PP | |
243 | probably doesn't do what you expect at first glance. The parentheses | |
244 | enclose the argument list for \f(CW\*(C`print\*(C'\fR which is evaluated (printing | |
245 | the result of \f(CW\*(C`$foo & 255\*(C'\fR). Then one is added to the return value | |
246 | of \f(CW\*(C`print\*(C'\fR (usually 1). The result is something like this: | |
247 | .PP | |
248 | .Vb 1 | |
249 | \& 1 + 1, "\en"; # Obviously not what you meant. | |
250 | .Ve | |
251 | .PP | |
252 | To do what you meant properly, you must write: | |
253 | .PP | |
254 | .Vb 1 | |
255 | \& print(($foo & 255) + 1, "\en"); | |
256 | .Ve | |
257 | .PP | |
258 | See \*(L"Named Unary Operators\*(R" for more discussion of this. | |
259 | .PP | |
260 | Also parsed as terms are the \f(CW\*(C`do {}\*(C'\fR and \f(CW\*(C`eval {}\*(C'\fR constructs, as | |
261 | well as subroutine and method calls, and the anonymous | |
262 | constructors \f(CW\*(C`[]\*(C'\fR and \f(CW\*(C`{}\*(C'\fR. | |
263 | .PP | |
264 | See also \*(L"Quote and Quote-like Operators\*(R" toward the end of this section, | |
265 | as well as \*(L"I/O Operators\*(R". | |
266 | .Sh "The Arrow Operator" | |
267 | .IX Xref "arrow dereference ->" | |
268 | .IX Subsection "The Arrow Operator" | |
269 | "\f(CW\*(C`\->\*(C'\fR" is an infix dereference operator, just as it is in C | |
270 | and \*(C+. If the right side is either a \f(CW\*(C`[...]\*(C'\fR, \f(CW\*(C`{...}\*(C'\fR, or a | |
271 | \&\f(CW\*(C`(...)\*(C'\fR subscript, then the left side must be either a hard or | |
272 | symbolic reference to an array, a hash, or a subroutine respectively. | |
273 | (Or technically speaking, a location capable of holding a hard | |
274 | reference, if it's an array or hash reference being used for | |
275 | assignment.) See perlreftut and perlref. | |
276 | .PP | |
277 | Otherwise, the right side is a method name or a simple scalar | |
278 | variable containing either the method name or a subroutine reference, | |
279 | and the left side must be either an object (a blessed reference) | |
280 | or a class name (that is, a package name). See perlobj. | |
281 | .Sh "Auto-increment and Auto-decrement" | |
282 | .IX Xref "increment auto-increment ++ decrement auto-decrement --" | |
283 | .IX Subsection "Auto-increment and Auto-decrement" | |
284 | \&\*(L"++\*(R" and \*(L"\-\-\*(R" work as in C. That is, if placed before a variable, | |
285 | they increment or decrement the variable by one before returning the | |
286 | value, and if placed after, increment or decrement after returning the | |
287 | value. | |
288 | .PP | |
289 | .Vb 3 | |
290 | \& $i = 0; $j = 0; | |
291 | \& print $i++; # prints 0 | |
292 | \& print ++$j; # prints 1 | |
293 | .Ve | |
294 | .PP | |
295 | Note that just as in C, Perl doesn't define \fBwhen\fR the variable is | |
296 | incremented or decremented. You just know it will be done sometime | |
297 | before or after the value is returned. This also means that modifying | |
298 | a variable twice in the same statement will lead to undefined behaviour. | |
299 | Avoid statements like: | |
300 | .PP | |
301 | .Vb 2 | |
302 | \& $i = $i ++; | |
303 | \& print ++ $i + $i ++; | |
304 | .Ve | |
305 | .PP | |
306 | Perl will not guarantee what the result of the above statements is. | |
307 | .PP | |
308 | The auto-increment operator has a little extra builtin magic to it. If | |
309 | you increment a variable that is numeric, or that has ever been used in | |
310 | a numeric context, you get a normal increment. If, however, the | |
311 | variable has been used in only string contexts since it was set, and | |
312 | has a value that is not the empty string and matches the pattern | |
313 | \&\f(CW\*(C`/^[a\-zA\-Z]*[0\-9]*\ez/\*(C'\fR, the increment is done as a string, preserving each | |
314 | character within its range, with carry: | |
315 | .PP | |
316 | .Vb 4 | |
317 | \& print ++($foo = '99'); # prints '100' | |
318 | \& print ++($foo = 'a0'); # prints 'a1' | |
319 | \& print ++($foo = 'Az'); # prints 'Ba' | |
320 | \& print ++($foo = 'zz'); # prints 'aaa' | |
321 | .Ve | |
322 | .PP | |
323 | \&\f(CW\*(C`undef\*(C'\fR is always treated as numeric, and in particular is changed | |
324 | to \f(CW0\fR before incrementing (so that a post-increment of an undef value | |
325 | will return \f(CW0\fR rather than \f(CW\*(C`undef\*(C'\fR). | |
326 | .PP | |
327 | The auto-decrement operator is not magical. | |
328 | .Sh "Exponentiation" | |
329 | .IX Xref "** exponentiation power" | |
330 | .IX Subsection "Exponentiation" | |
331 | Binary \*(L"**\*(R" is the exponentiation operator. It binds even more | |
332 | tightly than unary minus, so \-2**4 is \-(2**4), not (\-2)**4. (This is | |
333 | implemented using C's \fIpow\fR\|(3) function, which actually works on doubles | |
334 | internally.) | |
335 | .Sh "Symbolic Unary Operators" | |
336 | .IX Xref "unary operator operator, unary" | |
337 | .IX Subsection "Symbolic Unary Operators" | |
338 | Unary \*(L"!\*(R" performs logical negation, i.e., \*(L"not\*(R". See also \f(CW\*(C`not\*(C'\fR for a lower | |
339 | precedence version of this. | |
340 | .IX Xref "!" | |
341 | .PP | |
342 | Unary \*(L"\-\*(R" performs arithmetic negation if the operand is numeric. If | |
343 | the operand is an identifier, a string consisting of a minus sign | |
344 | concatenated with the identifier is returned. Otherwise, if the string | |
345 | starts with a plus or minus, a string starting with the opposite sign | |
346 | is returned. One effect of these rules is that \-bareword is equivalent | |
347 | to the string \*(L"\-bareword\*(R". If, however, the string begins with a | |
348 | non-alphabetic character (exluding \*(L"+\*(R" or \*(L"\-\*(R"), Perl will attempt to convert | |
349 | the string to a numeric and the arithmetic negation is performed. If the | |
350 | string cannot be cleanly converted to a numeric, Perl will give the warning | |
351 | \&\fBArgument \*(L"the string\*(R" isn't numeric in negation (\-) at ...\fR. | |
352 | .IX Xref "- negation, arithmetic" | |
353 | .PP | |
354 | Unary \*(L"~\*(R" performs bitwise negation, i.e., 1's complement. For | |
355 | example, \f(CW\*(C`0666 & ~027\*(C'\fR is 0640. (See also \*(L"Integer Arithmetic\*(R" and | |
356 | \&\*(L"Bitwise String Operators\*(R".) Note that the width of the result is | |
357 | platform\-dependent: ~0 is 32 bits wide on a 32\-bit platform, but 64 | |
358 | bits wide on a 64\-bit platform, so if you are expecting a certain bit | |
359 | width, remember to use the & operator to mask off the excess bits. | |
360 | .IX Xref "~ negation, binary" | |
361 | .PP | |
362 | Unary \*(L"+\*(R" has no effect whatsoever, even on strings. It is useful | |
363 | syntactically for separating a function name from a parenthesized expression | |
364 | that would otherwise be interpreted as the complete list of function | |
365 | arguments. (See examples above under \*(L"Terms and List Operators (Leftward)\*(R".) | |
366 | .IX Xref "+" | |
367 | .PP | |
368 | Unary \*(L"\e\*(R" creates a reference to whatever follows it. See perlreftut | |
369 | and perlref. Do not confuse this behavior with the behavior of | |
370 | backslash within a string, although both forms do convey the notion | |
371 | of protecting the next thing from interpolation. | |
372 | .IX Xref "\ reference backslash" | |
373 | .Sh "Binding Operators" | |
374 | .IX Xref "binding operator, binding =~ !~" | |
375 | .IX Subsection "Binding Operators" | |
376 | Binary \*(L"=~\*(R" binds a scalar expression to a pattern match. Certain operations | |
377 | search or modify the string \f(CW$_\fR by default. This operator makes that kind | |
378 | of operation work on some other string. The right argument is a search | |
379 | pattern, substitution, or transliteration. The left argument is what is | |
380 | supposed to be searched, substituted, or transliterated instead of the default | |
381 | \&\f(CW$_\fR. When used in scalar context, the return value generally indicates the | |
382 | success of the operation. Behavior in list context depends on the particular | |
383 | operator. See \*(L"Regexp Quote-Like Operators\*(R" for details and | |
384 | perlretut for examples using these operators. | |
385 | .PP | |
386 | If the right argument is an expression rather than a search pattern, | |
387 | substitution, or transliteration, it is interpreted as a search pattern at run | |
388 | time. | |
389 | .PP | |
390 | Binary \*(L"!~\*(R" is just like \*(L"=~\*(R" except the return value is negated in | |
391 | the logical sense. | |
392 | .Sh "Multiplicative Operators" | |
393 | .IX Xref "operator, multiplicative" | |
394 | .IX Subsection "Multiplicative Operators" | |
395 | Binary \*(L"*\*(R" multiplies two numbers. | |
396 | .IX Xref "*" | |
397 | .PP | |
398 | Binary \*(L"/\*(R" divides two numbers. | |
399 | .IX Xref "slash" | |
400 | .PP | |
401 | Binary \*(L"%\*(R" computes the modulus of two numbers. Given integer | |
402 | operands \f(CW$a\fR and \f(CW$b\fR: If \f(CW$b\fR is positive, then \f(CW\*(C`$a % $b\*(C'\fR is | |
403 | \&\f(CW$a\fR minus the largest multiple of \f(CW$b\fR that is not greater than | |
404 | \&\f(CW$a\fR. If \f(CW$b\fR is negative, then \f(CW\*(C`$a % $b\*(C'\fR is \f(CW$a\fR minus the | |
405 | smallest multiple of \f(CW$b\fR that is not less than \f(CW$a\fR (i.e. the | |
406 | result will be less than or equal to zero). | |
407 | Note that when \f(CW\*(C`use integer\*(C'\fR is in scope, \*(L"%\*(R" gives you direct access | |
408 | to the modulus operator as implemented by your C compiler. This | |
409 | operator is not as well defined for negative operands, but it will | |
410 | execute faster. | |
411 | .IX Xref "% remainder modulus mod" | |
412 | .PP | |
413 | Binary \*(L"x\*(R" is the repetition operator. In scalar context or if the left | |
414 | operand is not enclosed in parentheses, it returns a string consisting | |
415 | of the left operand repeated the number of times specified by the right | |
416 | operand. In list context, if the left operand is enclosed in | |
417 | parentheses or is a list formed by \f(CW\*(C`qw/STRING/\*(C'\fR, it repeats the list. | |
418 | If the right operand is zero or negative, it returns an empty string | |
419 | or an empty list, depending on the context. | |
420 | .IX Xref "x" | |
421 | .PP | |
422 | .Vb 1 | |
423 | \& print '-' x 80; # print row of dashes | |
424 | .Ve | |
425 | .PP | |
426 | .Vb 1 | |
427 | \& print "\et" x ($tab/8), ' ' x ($tab%8); # tab over | |
428 | .Ve | |
429 | .PP | |
430 | .Vb 2 | |
431 | \& @ones = (1) x 80; # a list of 80 1's | |
432 | \& @ones = (5) x @ones; # set all elements to 5 | |
433 | .Ve | |
434 | .Sh "Additive Operators" | |
435 | .IX Xref "operator, additive" | |
436 | .IX Subsection "Additive Operators" | |
437 | Binary \*(L"+\*(R" returns the sum of two numbers. | |
438 | .IX Xref "+" | |
439 | .PP | |
440 | Binary \*(L"\-\*(R" returns the difference of two numbers. | |
441 | .IX Xref "-" | |
442 | .PP | |
443 | Binary \*(L".\*(R" concatenates two strings. | |
444 | .IX Xref "string, concatenation concatenation cat concat concatenate ." | |
445 | .Sh "Shift Operators" | |
446 | .IX Xref "shift operator operator, shift << >> right shift left shift bitwise shift shl shr shift, right shift, left" | |
447 | .IX Subsection "Shift Operators" | |
448 | Binary \*(L"<<\*(R" returns the value of its left argument shifted left by the | |
449 | number of bits specified by the right argument. Arguments should be | |
450 | integers. (See also \*(L"Integer Arithmetic\*(R".) | |
451 | .PP | |
452 | Binary \*(L">>\*(R" returns the value of its left argument shifted right by | |
453 | the number of bits specified by the right argument. Arguments should | |
454 | be integers. (See also \*(L"Integer Arithmetic\*(R".) | |
455 | .PP | |
456 | Note that both \*(L"<<\*(R" and \*(L">>\*(R" in Perl are implemented directly using | |
457 | \&\*(L"<<\*(R" and \*(L">>\*(R" in C. If \f(CW\*(C`use integer\*(C'\fR (see \*(L"Integer Arithmetic\*(R") is | |
458 | in force then signed C integers are used, else unsigned C integers are | |
459 | used. Either way, the implementation isn't going to generate results | |
460 | larger than the size of the integer type Perl was built with (32 bits | |
461 | or 64 bits). | |
462 | .PP | |
463 | The result of overflowing the range of the integers is undefined | |
464 | because it is undefined also in C. In other words, using 32\-bit | |
465 | integers, \f(CW\*(C`1 << 32\*(C'\fR is undefined. Shifting by a negative number | |
466 | of bits is also undefined. | |
467 | .Sh "Named Unary Operators" | |
468 | .IX Xref "operator, named unary" | |
469 | .IX Subsection "Named Unary Operators" | |
470 | The various named unary operators are treated as functions with one | |
471 | argument, with optional parentheses. | |
472 | .PP | |
473 | If any list operator (\fIprint()\fR, etc.) or any unary operator (\fIchdir()\fR, etc.) | |
474 | is followed by a left parenthesis as the next token, the operator and | |
475 | arguments within parentheses are taken to be of highest precedence, | |
476 | just like a normal function call. For example, | |
477 | because named unary operators are higher precedence than ||: | |
478 | .PP | |
479 | .Vb 4 | |
480 | \& chdir $foo || die; # (chdir $foo) || die | |
481 | \& chdir($foo) || die; # (chdir $foo) || die | |
482 | \& chdir ($foo) || die; # (chdir $foo) || die | |
483 | \& chdir +($foo) || die; # (chdir $foo) || die | |
484 | .Ve | |
485 | .PP | |
486 | but, because * is higher precedence than named operators: | |
487 | .PP | |
488 | .Vb 4 | |
489 | \& chdir $foo * 20; # chdir ($foo * 20) | |
490 | \& chdir($foo) * 20; # (chdir $foo) * 20 | |
491 | \& chdir ($foo) * 20; # (chdir $foo) * 20 | |
492 | \& chdir +($foo) * 20; # chdir ($foo * 20) | |
493 | .Ve | |
494 | .PP | |
495 | .Vb 4 | |
496 | \& rand 10 * 20; # rand (10 * 20) | |
497 | \& rand(10) * 20; # (rand 10) * 20 | |
498 | \& rand (10) * 20; # (rand 10) * 20 | |
499 | \& rand +(10) * 20; # rand (10 * 20) | |
500 | .Ve | |
501 | .PP | |
502 | Regarding precedence, the filetest operators, like \f(CW\*(C`\-f\*(C'\fR, \f(CW\*(C`\-M\*(C'\fR, etc. are | |
503 | treated like named unary operators, but they don't follow this functional | |
504 | parenthesis rule. That means, for example, that \f(CW\*(C`\-f($file).".bak"\*(C'\fR is | |
505 | equivalent to \f(CW\*(C`\-f "$file.bak"\*(C'\fR. | |
506 | .IX Xref "-X filetest operator, filetest" | |
507 | .PP | |
508 | See also \*(L"Terms and List Operators (Leftward)\*(R". | |
509 | .Sh "Relational Operators" | |
510 | .IX Xref "relational operator operator, relational" | |
511 | .IX Subsection "Relational Operators" | |
512 | Binary \*(L"<\*(R" returns true if the left argument is numerically less than | |
513 | the right argument. | |
514 | .IX Xref "<" | |
515 | .PP | |
516 | Binary \*(L">\*(R" returns true if the left argument is numerically greater | |
517 | than the right argument. | |
518 | .IX Xref ">" | |
519 | .PP | |
520 | Binary \*(L"<=\*(R" returns true if the left argument is numerically less than | |
521 | or equal to the right argument. | |
522 | .IX Xref "<=" | |
523 | .PP | |
524 | Binary \*(L">=\*(R" returns true if the left argument is numerically greater | |
525 | than or equal to the right argument. | |
526 | .IX Xref ">=" | |
527 | .PP | |
528 | Binary \*(L"lt\*(R" returns true if the left argument is stringwise less than | |
529 | the right argument. | |
530 | .IX Xref "lt" | |
531 | .PP | |
532 | Binary \*(L"gt\*(R" returns true if the left argument is stringwise greater | |
533 | than the right argument. | |
534 | .IX Xref "gt" | |
535 | .PP | |
536 | Binary \*(L"le\*(R" returns true if the left argument is stringwise less than | |
537 | or equal to the right argument. | |
538 | .IX Xref "le" | |
539 | .PP | |
540 | Binary \*(L"ge\*(R" returns true if the left argument is stringwise greater | |
541 | than or equal to the right argument. | |
542 | .IX Xref "ge" | |
543 | .Sh "Equality Operators" | |
544 | .IX Xref "equality equal equals operator, equality" | |
545 | .IX Subsection "Equality Operators" | |
546 | Binary \*(L"==\*(R" returns true if the left argument is numerically equal to | |
547 | the right argument. | |
548 | .IX Xref "==" | |
549 | .PP | |
550 | Binary \*(L"!=\*(R" returns true if the left argument is numerically not equal | |
551 | to the right argument. | |
552 | .IX Xref "!=" | |
553 | .PP | |
554 | Binary \*(L"<=>\*(R" returns \-1, 0, or 1 depending on whether the left | |
555 | argument is numerically less than, equal to, or greater than the right | |
556 | argument. If your platform supports NaNs (not\-a\-numbers) as numeric | |
557 | values, using them with \*(L"<=>\*(R" returns undef. NaN is not \*(L"<\*(R", \*(L"==\*(R", \*(L">\*(R", | |
558 | \&\*(L"<=\*(R" or \*(L">=\*(R" anything (even NaN), so those 5 return false. NaN != NaN | |
559 | returns true, as does NaN != anything else. If your platform doesn't | |
560 | support NaNs then NaN is just a string with numeric value 0. | |
561 | .IX Xref "<=> spaceship" | |
562 | .PP | |
563 | .Vb 2 | |
564 | \& perl -le '$a = "NaN"; print "No NaN support here" if $a == $a' | |
565 | \& perl -le '$a = "NaN"; print "NaN support here" if $a != $a' | |
566 | .Ve | |
567 | .PP | |
568 | Binary \*(L"eq\*(R" returns true if the left argument is stringwise equal to | |
569 | the right argument. | |
570 | .IX Xref "eq" | |
571 | .PP | |
572 | Binary \*(L"ne\*(R" returns true if the left argument is stringwise not equal | |
573 | to the right argument. | |
574 | .IX Xref "ne" | |
575 | .PP | |
576 | Binary \*(L"cmp\*(R" returns \-1, 0, or 1 depending on whether the left | |
577 | argument is stringwise less than, equal to, or greater than the right | |
578 | argument. | |
579 | .IX Xref "cmp" | |
580 | .PP | |
581 | \&\*(L"lt\*(R", \*(L"le\*(R", \*(L"ge\*(R", \*(L"gt\*(R" and \*(L"cmp\*(R" use the collation (sort) order specified | |
582 | by the current locale if \f(CW\*(C`use locale\*(C'\fR is in effect. See perllocale. | |
583 | .Sh "Bitwise And" | |
584 | .IX Xref "operator, bitwise, and bitwise and &" | |
585 | .IX Subsection "Bitwise And" | |
586 | Binary \*(L"&\*(R" returns its operands ANDed together bit by bit. | |
587 | (See also \*(L"Integer Arithmetic\*(R" and \*(L"Bitwise String Operators\*(R".) | |
588 | .PP | |
589 | Note that \*(L"&\*(R" has lower priority than relational operators, so for example | |
590 | the brackets are essential in a test like | |
591 | .PP | |
592 | .Vb 1 | |
593 | \& print "Even\en" if ($x & 1) == 0; | |
594 | .Ve | |
595 | .Sh "Bitwise Or and Exclusive Or" | |
596 | .IX Xref "operator, bitwise, or bitwise or | operator, bitwise, xor bitwise xor ^" | |
597 | .IX Subsection "Bitwise Or and Exclusive Or" | |
598 | Binary \*(L"|\*(R" returns its operands ORed together bit by bit. | |
599 | (See also \*(L"Integer Arithmetic\*(R" and \*(L"Bitwise String Operators\*(R".) | |
600 | .PP | |
601 | Binary \*(L"^\*(R" returns its operands XORed together bit by bit. | |
602 | (See also \*(L"Integer Arithmetic\*(R" and \*(L"Bitwise String Operators\*(R".) | |
603 | .PP | |
604 | Note that \*(L"|\*(R" and \*(L"^\*(R" have lower priority than relational operators, so | |
605 | for example the brackets are essential in a test like | |
606 | .PP | |
607 | .Vb 1 | |
608 | \& print "false\en" if (8 | 2) != 10; | |
609 | .Ve | |
610 | .Sh "C\-style Logical And" | |
611 | .IX Xref "&& logical and operator, logical, and" | |
612 | .IX Subsection "C-style Logical And" | |
613 | Binary \*(L"&&\*(R" performs a short-circuit logical \s-1AND\s0 operation. That is, | |
614 | if the left operand is false, the right operand is not even evaluated. | |
615 | Scalar or list context propagates down to the right operand if it | |
616 | is evaluated. | |
617 | .Sh "C\-style Logical Or" | |
618 | .IX Xref "|| operator, logical, or" | |
619 | .IX Subsection "C-style Logical Or" | |
620 | Binary \*(L"||\*(R" performs a short-circuit logical \s-1OR\s0 operation. That is, | |
621 | if the left operand is true, the right operand is not even evaluated. | |
622 | Scalar or list context propagates down to the right operand if it | |
623 | is evaluated. | |
624 | .PP | |
625 | The \f(CW\*(C`||\*(C'\fR and \f(CW\*(C`&&\*(C'\fR operators return the last value evaluated | |
626 | (unlike C's \f(CW\*(C`||\*(C'\fR and \f(CW\*(C`&&\*(C'\fR, which return 0 or 1). Thus, a reasonably | |
627 | portable way to find out the home directory might be: | |
628 | .PP | |
629 | .Vb 2 | |
630 | \& $home = $ENV{'HOME'} || $ENV{'LOGDIR'} || | |
631 | \& (getpwuid($<))[7] || die "You're homeless!\en"; | |
632 | .Ve | |
633 | .PP | |
634 | In particular, this means that you shouldn't use this | |
635 | for selecting between two aggregates for assignment: | |
636 | .PP | |
637 | .Vb 3 | |
638 | \& @a = @b || @c; # this is wrong | |
639 | \& @a = scalar(@b) || @c; # really meant this | |
640 | \& @a = @b ? @b : @c; # this works fine, though | |
641 | .Ve | |
642 | .PP | |
643 | As more readable alternatives to \f(CW\*(C`&&\*(C'\fR and \f(CW\*(C`||\*(C'\fR when used for | |
644 | control flow, Perl provides \f(CW\*(C`and\*(C'\fR and \f(CW\*(C`or\*(C'\fR operators (see below). | |
645 | The short-circuit behavior is identical. The precedence of \*(L"and\*(R" and | |
646 | \&\*(L"or\*(R" is much lower, however, so that you can safely use them after a | |
647 | list operator without the need for parentheses: | |
648 | .PP | |
649 | .Vb 2 | |
650 | \& unlink "alpha", "beta", "gamma" | |
651 | \& or gripe(), next LINE; | |
652 | .Ve | |
653 | .PP | |
654 | With the C\-style operators that would have been written like this: | |
655 | .PP | |
656 | .Vb 2 | |
657 | \& unlink("alpha", "beta", "gamma") | |
658 | \& || (gripe(), next LINE); | |
659 | .Ve | |
660 | .PP | |
661 | Using \*(L"or\*(R" for assignment is unlikely to do what you want; see below. | |
662 | .Sh "Range Operators" | |
663 | .IX Xref "operator, range range .. ..." | |
664 | .IX Subsection "Range Operators" | |
665 | Binary \*(L"..\*(R" is the range operator, which is really two different | |
666 | operators depending on the context. In list context, it returns a | |
667 | list of values counting (up by ones) from the left value to the right | |
668 | value. If the left value is greater than the right value then it | |
669 | returns the empty list. The range operator is useful for writing | |
670 | \&\f(CW\*(C`foreach (1..10)\*(C'\fR loops and for doing slice operations on arrays. In | |
671 | the current implementation, no temporary array is created when the | |
672 | range operator is used as the expression in \f(CW\*(C`foreach\*(C'\fR loops, but older | |
673 | versions of Perl might burn a lot of memory when you write something | |
674 | like this: | |
675 | .PP | |
676 | .Vb 3 | |
677 | \& for (1 .. 1_000_000) { | |
678 | \& # code | |
679 | \& } | |
680 | .Ve | |
681 | .PP | |
682 | The range operator also works on strings, using the magical auto\-increment, | |
683 | see below. | |
684 | .PP | |
685 | In scalar context, \*(L"..\*(R" returns a boolean value. The operator is | |
686 | bistable, like a flip\-flop, and emulates the line-range (comma) operator | |
687 | of \fBsed\fR, \fBawk\fR, and various editors. Each \*(L"..\*(R" operator maintains its | |
688 | own boolean state. It is false as long as its left operand is false. | |
689 | Once the left operand is true, the range operator stays true until the | |
690 | right operand is true, \fI\s-1AFTER\s0\fR which the range operator becomes false | |
691 | again. It doesn't become false till the next time the range operator is | |
692 | evaluated. It can test the right operand and become false on the same | |
693 | evaluation it became true (as in \fBawk\fR), but it still returns true once. | |
694 | If you don't want it to test the right operand till the next | |
695 | evaluation, as in \fBsed\fR, just use three dots (\*(L"...\*(R") instead of | |
696 | two. In all other regards, \*(L"...\*(R" behaves just like \*(L"..\*(R" does. | |
697 | .PP | |
698 | The right operand is not evaluated while the operator is in the | |
699 | \&\*(L"false\*(R" state, and the left operand is not evaluated while the | |
700 | operator is in the \*(L"true\*(R" state. The precedence is a little lower | |
701 | than || and &&. The value returned is either the empty string for | |
702 | false, or a sequence number (beginning with 1) for true. The | |
703 | sequence number is reset for each range encountered. The final | |
704 | sequence number in a range has the string \*(L"E0\*(R" appended to it, which | |
705 | doesn't affect its numeric value, but gives you something to search | |
706 | for if you want to exclude the endpoint. You can exclude the | |
707 | beginning point by waiting for the sequence number to be greater | |
708 | than 1. | |
709 | .PP | |
710 | If either operand of scalar \*(L"..\*(R" is a constant expression, | |
711 | that operand is considered true if it is equal (\f(CW\*(C`==\*(C'\fR) to the current | |
712 | input line number (the \f(CW$.\fR variable). | |
713 | .PP | |
714 | To be pedantic, the comparison is actually \f(CW\*(C`int(EXPR) == int(EXPR)\*(C'\fR, | |
715 | but that is only an issue if you use a floating point expression; when | |
716 | implicitly using \f(CW$.\fR as described in the previous paragraph, the | |
717 | comparison is \f(CW\*(C`int(EXPR) == int($.)\*(C'\fR which is only an issue when \f(CW$.\fR | |
718 | is set to a floating point value and you are not reading from a file. | |
719 | Furthermore, \f(CW"span" .. "spat"\fR or \f(CW\*(C`2.18 .. 3.14\*(C'\fR will not do what | |
720 | you want in scalar context because each of the operands are evaluated | |
721 | using their integer representation. | |
722 | .PP | |
723 | Examples: | |
724 | .PP | |
725 | As a scalar operator: | |
726 | .PP | |
727 | .Vb 2 | |
728 | \& if (101 .. 200) { print; } # print 2nd hundred lines, short for | |
729 | \& # if ($. == 101 .. $. == 200) ... | |
730 | .Ve | |
731 | .PP | |
732 | .Vb 3 | |
733 | \& next LINE if (1 .. /^$/); # skip header lines, short for | |
734 | \& # ... if ($. == 1 .. /^$/); | |
735 | \& # (typically in a loop labeled LINE) | |
736 | .Ve | |
737 | .PP | |
738 | .Vb 1 | |
739 | \& s/^/> / if (/^$/ .. eof()); # quote body | |
740 | .Ve | |
741 | .PP | |
742 | .Vb 12 | |
743 | \& # parse mail messages | |
744 | \& while (<>) { | |
745 | \& $in_header = 1 .. /^$/; | |
746 | \& $in_body = /^$/ .. eof; | |
747 | \& if ($in_header) { | |
748 | \& # ... | |
749 | \& } else { # in body | |
750 | \& # ... | |
751 | \& } | |
752 | \& } continue { | |
753 | \& close ARGV if eof; # reset $. each file | |
754 | \& } | |
755 | .Ve | |
756 | .PP | |
757 | Here's a simple example to illustrate the difference between | |
758 | the two range operators: | |
759 | .PP | |
760 | .Vb 4 | |
761 | \& @lines = (" - Foo", | |
762 | \& "01 - Bar", | |
763 | \& "1 - Baz", | |
764 | \& " - Quux"); | |
765 | .Ve | |
766 | .PP | |
767 | .Vb 5 | |
768 | \& foreach (@lines) { | |
769 | \& if (/0/ .. /1/) { | |
770 | \& print "$_\en"; | |
771 | \& } | |
772 | \& } | |
773 | .Ve | |
774 | .PP | |
775 | This program will print only the line containing \*(L"Bar\*(R". If | |
776 | the range operator is changed to \f(CW\*(C`...\*(C'\fR, it will also print the | |
777 | \&\*(L"Baz\*(R" line. | |
778 | .PP | |
779 | And now some examples as a list operator: | |
780 | .PP | |
781 | .Vb 3 | |
782 | \& for (101 .. 200) { print; } # print $_ 100 times | |
783 | \& @foo = @foo[0 .. $#foo]; # an expensive no-op | |
784 | \& @foo = @foo[$#foo-4 .. $#foo]; # slice last 5 items | |
785 | .Ve | |
786 | .PP | |
787 | The range operator (in list context) makes use of the magical | |
788 | auto-increment algorithm if the operands are strings. You | |
789 | can say | |
790 | .PP | |
791 | .Vb 1 | |
792 | \& @alphabet = ('A' .. 'Z'); | |
793 | .Ve | |
794 | .PP | |
795 | to get all normal letters of the English alphabet, or | |
796 | .PP | |
797 | .Vb 1 | |
798 | \& $hexdigit = (0 .. 9, 'a' .. 'f')[$num & 15]; | |
799 | .Ve | |
800 | .PP | |
801 | to get a hexadecimal digit, or | |
802 | .PP | |
803 | .Vb 1 | |
804 | \& @z2 = ('01' .. '31'); print $z2[$mday]; | |
805 | .Ve | |
806 | .PP | |
807 | to get dates with leading zeros. If the final value specified is not | |
808 | in the sequence that the magical increment would produce, the sequence | |
809 | goes until the next value would be longer than the final value | |
810 | specified. | |
811 | .PP | |
812 | Because each operand is evaluated in integer form, \f(CW\*(C`2.18 .. 3.14\*(C'\fR will | |
813 | return two elements in list context. | |
814 | .PP | |
815 | .Vb 1 | |
816 | \& @list = (2.18 .. 3.14); # same as @list = (2 .. 3); | |
817 | .Ve | |
818 | .Sh "Conditional Operator" | |
819 | .IX Xref "operator, conditional operator, ternary ternary ?:" | |
820 | .IX Subsection "Conditional Operator" | |
821 | Ternary \*(L"?:\*(R" is the conditional operator, just as in C. It works much | |
822 | like an if\-then\-else. If the argument before the ? is true, the | |
823 | argument before the : is returned, otherwise the argument after the : | |
824 | is returned. For example: | |
825 | .PP | |
826 | .Vb 2 | |
827 | \& printf "I have %d dog%s.\en", $n, | |
828 | \& ($n == 1) ? '' : "s"; | |
829 | .Ve | |
830 | .PP | |
831 | Scalar or list context propagates downward into the 2nd | |
832 | or 3rd argument, whichever is selected. | |
833 | .PP | |
834 | .Vb 3 | |
835 | \& $a = $ok ? $b : $c; # get a scalar | |
836 | \& @a = $ok ? @b : @c; # get an array | |
837 | \& $a = $ok ? @b : @c; # oops, that's just a count! | |
838 | .Ve | |
839 | .PP | |
840 | The operator may be assigned to if both the 2nd and 3rd arguments are | |
841 | legal lvalues (meaning that you can assign to them): | |
842 | .PP | |
843 | .Vb 1 | |
844 | \& ($a_or_b ? $a : $b) = $c; | |
845 | .Ve | |
846 | .PP | |
847 | Because this operator produces an assignable result, using assignments | |
848 | without parentheses will get you in trouble. For example, this: | |
849 | .PP | |
850 | .Vb 1 | |
851 | \& $a % 2 ? $a += 10 : $a += 2 | |
852 | .Ve | |
853 | .PP | |
854 | Really means this: | |
855 | .PP | |
856 | .Vb 1 | |
857 | \& (($a % 2) ? ($a += 10) : $a) += 2 | |
858 | .Ve | |
859 | .PP | |
860 | Rather than this: | |
861 | .PP | |
862 | .Vb 1 | |
863 | \& ($a % 2) ? ($a += 10) : ($a += 2) | |
864 | .Ve | |
865 | .PP | |
866 | That should probably be written more simply as: | |
867 | .PP | |
868 | .Vb 1 | |
869 | \& $a += ($a % 2) ? 10 : 2; | |
870 | .Ve | |
871 | .Sh "Assignment Operators" | |
872 | .IX Xref "assignment operator, assignment = **= += *= &= <<= &&= -= = |= >>= ||= .= %= ^= x=" | |
873 | .IX Subsection "Assignment Operators" | |
874 | \&\*(L"=\*(R" is the ordinary assignment operator. | |
875 | .PP | |
876 | Assignment operators work as in C. That is, | |
877 | .PP | |
878 | .Vb 1 | |
879 | \& $a += 2; | |
880 | .Ve | |
881 | .PP | |
882 | is equivalent to | |
883 | .PP | |
884 | .Vb 1 | |
885 | \& $a = $a + 2; | |
886 | .Ve | |
887 | .PP | |
888 | although without duplicating any side effects that dereferencing the lvalue | |
889 | might trigger, such as from \fItie()\fR. Other assignment operators work similarly. | |
890 | The following are recognized: | |
891 | .PP | |
892 | .Vb 4 | |
893 | \& **= += *= &= <<= &&= | |
894 | \& -= /= |= >>= ||= | |
895 | \& .= %= ^= | |
896 | \& x= | |
897 | .Ve | |
898 | .PP | |
899 | Although these are grouped by family, they all have the precedence | |
900 | of assignment. | |
901 | .PP | |
902 | Unlike in C, the scalar assignment operator produces a valid lvalue. | |
903 | Modifying an assignment is equivalent to doing the assignment and | |
904 | then modifying the variable that was assigned to. This is useful | |
905 | for modifying a copy of something, like this: | |
906 | .PP | |
907 | .Vb 1 | |
908 | \& ($tmp = $global) =~ tr [A-Z] [a-z]; | |
909 | .Ve | |
910 | .PP | |
911 | Likewise, | |
912 | .PP | |
913 | .Vb 1 | |
914 | \& ($a += 2) *= 3; | |
915 | .Ve | |
916 | .PP | |
917 | is equivalent to | |
918 | .PP | |
919 | .Vb 2 | |
920 | \& $a += 2; | |
921 | \& $a *= 3; | |
922 | .Ve | |
923 | .PP | |
924 | Similarly, a list assignment in list context produces the list of | |
925 | lvalues assigned to, and a list assignment in scalar context returns | |
926 | the number of elements produced by the expression on the right hand | |
927 | side of the assignment. | |
928 | .Sh "Comma Operator" | |
929 | .IX Xref "comma operator, comma ," | |
930 | .IX Subsection "Comma Operator" | |
931 | Binary \*(L",\*(R" is the comma operator. In scalar context it evaluates | |
932 | its left argument, throws that value away, then evaluates its right | |
933 | argument and returns that value. This is just like C's comma operator. | |
934 | .PP | |
935 | In list context, it's just the list argument separator, and inserts | |
936 | both its arguments into the list. | |
937 | .PP | |
938 | The \f(CW\*(C`=>\*(C'\fR operator is a synonym for the comma, but forces any word | |
939 | (consisting entirely of word characters) to its left to be interpreted | |
940 | as a string (as of 5.001). This includes words that might otherwise be | |
941 | considered a constant or function call. | |
942 | .PP | |
943 | .Vb 1 | |
944 | \& use constant FOO => "something"; | |
945 | .Ve | |
946 | .PP | |
947 | .Vb 1 | |
948 | \& my %h = ( FOO => 23 ); | |
949 | .Ve | |
950 | .PP | |
951 | is equivalent to: | |
952 | .PP | |
953 | .Vb 1 | |
954 | \& my %h = ("FOO", 23); | |
955 | .Ve | |
956 | .PP | |
957 | It is \fI\s-1NOT\s0\fR: | |
958 | .PP | |
959 | .Vb 1 | |
960 | \& my %h = ("something", 23); | |
961 | .Ve | |
962 | .PP | |
963 | If the argument on the left is not a word, it is first interpreted as | |
964 | an expression, and then the string value of that is used. | |
965 | .PP | |
966 | The \f(CW\*(C`=>\*(C'\fR operator is helpful in documenting the correspondence | |
967 | between keys and values in hashes, and other paired elements in lists. | |
968 | .PP | |
969 | .Vb 2 | |
970 | \& %hash = ( $key => $value ); | |
971 | \& login( $username => $password ); | |
972 | .Ve | |
973 | .Sh "List Operators (Rightward)" | |
974 | .IX Xref "operator, list, rightward list operator" | |
975 | .IX Subsection "List Operators (Rightward)" | |
976 | On the right side of a list operator, it has very low precedence, | |
977 | such that it controls all comma-separated expressions found there. | |
978 | The only operators with lower precedence are the logical operators | |
979 | \&\*(L"and\*(R", \*(L"or\*(R", and \*(L"not\*(R", which may be used to evaluate calls to list | |
980 | operators without the need for extra parentheses: | |
981 | .PP | |
982 | .Vb 2 | |
983 | \& open HANDLE, "filename" | |
984 | \& or die "Can't open: $!\en"; | |
985 | .Ve | |
986 | .PP | |
987 | See also discussion of list operators in \*(L"Terms and List Operators (Leftward)\*(R". | |
988 | .Sh "Logical Not" | |
989 | .IX Xref "operator, logical, not not" | |
990 | .IX Subsection "Logical Not" | |
991 | Unary \*(L"not\*(R" returns the logical negation of the expression to its right. | |
992 | It's the equivalent of \*(L"!\*(R" except for the very low precedence. | |
993 | .Sh "Logical And" | |
994 | .IX Xref "operator, logical, and and" | |
995 | .IX Subsection "Logical And" | |
996 | Binary \*(L"and\*(R" returns the logical conjunction of the two surrounding | |
997 | expressions. It's equivalent to && except for the very low | |
998 | precedence. This means that it short\-circuits: i.e., the right | |
999 | expression is evaluated only if the left expression is true. | |
1000 | .Sh "Logical or and Exclusive Or" | |
1001 | .IX Xref "operator, logical, or operator, logical, xor operator, logical, err operator, logical, defined or operator, logical, exclusive or or xor err" | |
1002 | .IX Subsection "Logical or and Exclusive Or" | |
1003 | Binary \*(L"or\*(R" returns the logical disjunction of the two surrounding | |
1004 | expressions. It's equivalent to || except for the very low precedence. | |
1005 | This makes it useful for control flow | |
1006 | .PP | |
1007 | .Vb 1 | |
1008 | \& print FH $data or die "Can't write to FH: $!"; | |
1009 | .Ve | |
1010 | .PP | |
1011 | This means that it short\-circuits: i.e., the right expression is evaluated | |
1012 | only if the left expression is false. Due to its precedence, you should | |
1013 | probably avoid using this for assignment, only for control flow. | |
1014 | .PP | |
1015 | .Vb 3 | |
1016 | \& $a = $b or $c; # bug: this is wrong | |
1017 | \& ($a = $b) or $c; # really means this | |
1018 | \& $a = $b || $c; # better written this way | |
1019 | .Ve | |
1020 | .PP | |
1021 | However, when it's a list-context assignment and you're trying to use | |
1022 | \&\*(L"||\*(R" for control flow, you probably need \*(L"or\*(R" so that the assignment | |
1023 | takes higher precedence. | |
1024 | .PP | |
1025 | .Vb 2 | |
1026 | \& @info = stat($file) || die; # oops, scalar sense of stat! | |
1027 | \& @info = stat($file) or die; # better, now @info gets its due | |
1028 | .Ve | |
1029 | .PP | |
1030 | Then again, you could always use parentheses. | |
1031 | .PP | |
1032 | Binary \*(L"xor\*(R" returns the exclusive-OR of the two surrounding expressions. | |
1033 | It cannot short circuit, of course. | |
1034 | .Sh "C Operators Missing From Perl" | |
1035 | .IX Xref "operator, missing from perl & * typecasting (TYPE)" | |
1036 | .IX Subsection "C Operators Missing From Perl" | |
1037 | Here is what C has that Perl doesn't: | |
1038 | .IP "unary &" 8 | |
1039 | .IX Item "unary &" | |
1040 | Address-of operator. (But see the \*(L"\e\*(R" operator for taking a reference.) | |
1041 | .IP "unary *" 8 | |
1042 | .IX Item "unary *" | |
1043 | Dereference-address operator. (Perl's prefix dereferencing | |
1044 | operators are typed: $, @, %, and &.) | |
1045 | .IP "(\s-1TYPE\s0)" 8 | |
1046 | .IX Item "(TYPE)" | |
1047 | Type-casting operator. | |
1048 | .Sh "Quote and Quote-like Operators" | |
1049 | .IX Xref "operator, quote operator, quote-like q qq qx qw m qr s tr ' '' "" """" ` `` << escape sequence escape" | |
1050 | .IX Subsection "Quote and Quote-like Operators" | |
1051 | While we usually think of quotes as literal values, in Perl they | |
1052 | function as operators, providing various kinds of interpolating and | |
1053 | pattern matching capabilities. Perl provides customary quote characters | |
1054 | for these behaviors, but also provides a way for you to choose your | |
1055 | quote character for any of them. In the following table, a \f(CW\*(C`{}\*(C'\fR represents | |
1056 | any pair of delimiters you choose. | |
1057 | .PP | |
1058 | .Vb 10 | |
1059 | \& Customary Generic Meaning Interpolates | |
1060 | \& '' q{} Literal no | |
1061 | \& "" qq{} Literal yes | |
1062 | \& `` qx{} Command yes* | |
1063 | \& qw{} Word list no | |
1064 | \& // m{} Pattern match yes* | |
1065 | \& qr{} Pattern yes* | |
1066 | \& s{}{} Substitution yes* | |
1067 | \& tr{}{} Transliteration no (but see below) | |
1068 | \& <<EOF here-doc yes* | |
1069 | .Ve | |
1070 | .PP | |
1071 | .Vb 1 | |
1072 | \& * unless the delimiter is ''. | |
1073 | .Ve | |
1074 | .PP | |
1075 | Non-bracketing delimiters use the same character fore and aft, but the four | |
1076 | sorts of brackets (round, angle, square, curly) will all nest, which means | |
1077 | that | |
1078 | .PP | |
1079 | .Vb 1 | |
1080 | \& q{foo{bar}baz} | |
1081 | .Ve | |
1082 | .PP | |
1083 | is the same as | |
1084 | .PP | |
1085 | .Vb 1 | |
1086 | \& 'foo{bar}baz' | |
1087 | .Ve | |
1088 | .PP | |
1089 | Note, however, that this does not always work for quoting Perl code: | |
1090 | .PP | |
1091 | .Vb 1 | |
1092 | \& $s = q{ if($a eq "}") ... }; # WRONG | |
1093 | .Ve | |
1094 | .PP | |
1095 | is a syntax error. The \f(CW\*(C`Text::Balanced\*(C'\fR module (from \s-1CPAN\s0, and | |
1096 | starting from Perl 5.8 part of the standard distribution) is able | |
1097 | to do this properly. | |
1098 | .PP | |
1099 | There can be whitespace between the operator and the quoting | |
1100 | characters, except when \f(CW\*(C`#\*(C'\fR is being used as the quoting character. | |
1101 | \&\f(CW\*(C`q#foo#\*(C'\fR is parsed as the string \f(CW\*(C`foo\*(C'\fR, while \f(CW\*(C`q #foo#\*(C'\fR is the | |
1102 | operator \f(CW\*(C`q\*(C'\fR followed by a comment. Its argument will be taken | |
1103 | from the next line. This allows you to write: | |
1104 | .PP | |
1105 | .Vb 2 | |
1106 | \& s {foo} # Replace foo | |
1107 | \& {bar} # with bar. | |
1108 | .Ve | |
1109 | .PP | |
1110 | The following escape sequences are available in constructs that interpolate | |
1111 | and in transliterations. | |
1112 | .IX Xref "\t \n \r \f \b \a \e \x \0 \c \N" | |
1113 | .PP | |
1114 | .Vb 12 | |
1115 | \& \et tab (HT, TAB) | |
1116 | \& \en newline (NL) | |
1117 | \& \er return (CR) | |
1118 | \& \ef form feed (FF) | |
1119 | \& \eb backspace (BS) | |
1120 | \& \ea alarm (bell) (BEL) | |
1121 | \& \ee escape (ESC) | |
1122 | \& \e033 octal char (ESC) | |
1123 | \& \ex1b hex char (ESC) | |
1124 | \& \ex{263a} wide hex char (SMILEY) | |
1125 | \& \ec[ control char (ESC) | |
1126 | \& \eN{name} named Unicode character | |
1127 | .Ve | |
1128 | .PP | |
1129 | \&\fB\s-1NOTE\s0\fR: Unlike C and other languages, Perl has no \ev escape sequence for | |
1130 | the vertical tab (\s-1VT\s0 \- \s-1ASCII\s0 11). | |
1131 | .PP | |
1132 | The following escape sequences are available in constructs that interpolate | |
1133 | but not in transliterations. | |
1134 | .IX Xref "\l \u \L \U \E \Q" | |
1135 | .PP | |
1136 | .Vb 6 | |
1137 | \& \el lowercase next char | |
1138 | \& \eu uppercase next char | |
1139 | \& \eL lowercase till \eE | |
1140 | \& \eU uppercase till \eE | |
1141 | \& \eE end case modification | |
1142 | \& \eQ quote non-word characters till \eE | |
1143 | .Ve | |
1144 | .PP | |
1145 | If \f(CW\*(C`use locale\*(C'\fR is in effect, the case map used by \f(CW\*(C`\el\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, | |
1146 | \&\f(CW\*(C`\eu\*(C'\fR and \f(CW\*(C`\eU\*(C'\fR is taken from the current locale. See perllocale. | |
1147 | If Unicode (for example, \f(CW\*(C`\eN{}\*(C'\fR or wide hex characters of 0x100 or | |
1148 | beyond) is being used, the case map used by \f(CW\*(C`\el\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR and | |
1149 | \&\f(CW\*(C`\eU\*(C'\fR is as defined by Unicode. For documentation of \f(CW\*(C`\eN{name}\*(C'\fR, | |
1150 | see charnames. | |
1151 | .PP | |
1152 | All systems use the virtual \f(CW"\en"\fR to represent a line terminator, | |
1153 | called a \*(L"newline\*(R". There is no such thing as an unvarying, physical | |
1154 | newline character. It is only an illusion that the operating system, | |
1155 | device drivers, C libraries, and Perl all conspire to preserve. Not all | |
1156 | systems read \f(CW"\er"\fR as \s-1ASCII\s0 \s-1CR\s0 and \f(CW"\en"\fR as \s-1ASCII\s0 \s-1LF\s0. For example, | |
1157 | on a Mac, these are reversed, and on systems without line terminator, | |
1158 | printing \f(CW"\en"\fR may emit no actual data. In general, use \f(CW"\en"\fR when | |
1159 | you mean a \*(L"newline\*(R" for your system, but use the literal \s-1ASCII\s0 when you | |
1160 | need an exact character. For example, most networking protocols expect | |
1161 | and prefer a \s-1CR+LF\s0 (\f(CW"\e015\e012"\fR or \f(CW"\ecM\ecJ"\fR) for line terminators, | |
1162 | and although they often accept just \f(CW"\e012"\fR, they seldom tolerate just | |
1163 | \&\f(CW"\e015"\fR. If you get in the habit of using \f(CW"\en"\fR for networking, | |
1164 | you may be burned some day. | |
1165 | .IX Xref "newline line terminator eol end of line \n \r \r\n" | |
1166 | .PP | |
1167 | For constructs that do interpolate, variables beginning with "\f(CW\*(C`$\*(C'\fR\*(L" | |
1168 | or \*(R"\f(CW\*(C`@\*(C'\fR" are interpolated. Subscripted variables such as \f(CW$a[3]\fR or | |
1169 | \&\f(CW\*(C`$href\->{key}[0]\*(C'\fR are also interpolated, as are array and hash slices. | |
1170 | But method calls such as \f(CW\*(C`$obj\->meth\*(C'\fR are not. | |
1171 | .PP | |
1172 | Interpolating an array or slice interpolates the elements in order, | |
1173 | separated by the value of \f(CW$"\fR, so is equivalent to interpolating | |
1174 | \&\f(CW\*(C`join $", @array\*(C'\fR. \*(L"Punctuation\*(R" arrays such as \f(CW\*(C`@+\*(C'\fR are only | |
1175 | interpolated if the name is enclosed in braces \f(CW\*(C`@{+}\*(C'\fR. | |
1176 | .PP | |
1177 | You cannot include a literal \f(CW\*(C`$\*(C'\fR or \f(CW\*(C`@\*(C'\fR within a \f(CW\*(C`\eQ\*(C'\fR sequence. | |
1178 | An unescaped \f(CW\*(C`$\*(C'\fR or \f(CW\*(C`@\*(C'\fR interpolates the corresponding variable, | |
1179 | while escaping will cause the literal string \f(CW\*(C`\e$\*(C'\fR to be inserted. | |
1180 | You'll need to write something like \f(CW\*(C`m/\eQuser\eE\e@\eQhost/\*(C'\fR. | |
1181 | .PP | |
1182 | Patterns are subject to an additional level of interpretation as a | |
1183 | regular expression. This is done as a second pass, after variables are | |
1184 | interpolated, so that regular expressions may be incorporated into the | |
1185 | pattern from the variables. If this is not what you want, use \f(CW\*(C`\eQ\*(C'\fR to | |
1186 | interpolate a variable literally. | |
1187 | .PP | |
1188 | Apart from the behavior described above, Perl does not expand | |
1189 | multiple levels of interpolation. In particular, contrary to the | |
1190 | expectations of shell programmers, back-quotes do \fI\s-1NOT\s0\fR interpolate | |
1191 | within double quotes, nor do single quotes impede evaluation of | |
1192 | variables when used within double quotes. | |
1193 | .Sh "Regexp Quote-Like Operators" | |
1194 | .IX Xref "operator, regexp" | |
1195 | .IX Subsection "Regexp Quote-Like Operators" | |
1196 | Here are the quote-like operators that apply to pattern | |
1197 | matching and related activities. | |
1198 | .IP "?PATTERN?" 8 | |
1199 | .IX Xref "?" | |
1200 | .IX Item "?PATTERN?" | |
1201 | This is just like the \f(CW\*(C`/pattern/\*(C'\fR search, except that it matches only | |
1202 | once between calls to the \fIreset()\fR operator. This is a useful | |
1203 | optimization when you want to see only the first occurrence of | |
1204 | something in each file of a set of files, for instance. Only \f(CW\*(C`??\*(C'\fR | |
1205 | patterns local to the current package are reset. | |
1206 | .Sp | |
1207 | .Vb 7 | |
1208 | \& while (<>) { | |
1209 | \& if (?^$?) { | |
1210 | \& # blank line between header and body | |
1211 | \& } | |
1212 | \& } continue { | |
1213 | \& reset if eof; # clear ?? status for next file | |
1214 | \& } | |
1215 | .Ve | |
1216 | .Sp | |
1217 | This usage is vaguely deprecated, which means it just might possibly | |
1218 | be removed in some distant future version of Perl, perhaps somewhere | |
1219 | around the year 2168. | |
1220 | .IP "m/PATTERN/cgimosx" 8 | |
1221 | .IX Xref "m operator, match regexp, options regexp regex, options regex c i m o s x" | |
1222 | .IX Item "m/PATTERN/cgimosx" | |
1223 | .PD 0 | |
1224 | .IP "/PATTERN/cgimosx" 8 | |
1225 | .IX Item "/PATTERN/cgimosx" | |
1226 | .PD | |
1227 | Searches a string for a pattern match, and in scalar context returns | |
1228 | true if it succeeds, false if it fails. If no string is specified | |
1229 | via the \f(CW\*(C`=~\*(C'\fR or \f(CW\*(C`!~\*(C'\fR operator, the \f(CW$_\fR string is searched. (The | |
1230 | string specified with \f(CW\*(C`=~\*(C'\fR need not be an lvalue\*(--it may be the | |
1231 | result of an expression evaluation, but remember the \f(CW\*(C`=~\*(C'\fR binds | |
1232 | rather tightly.) See also perlre. See perllocale for | |
1233 | discussion of additional considerations that apply when \f(CW\*(C`use locale\*(C'\fR | |
1234 | is in effect. | |
1235 | .Sp | |
1236 | Options are: | |
1237 | .Sp | |
1238 | .Vb 7 | |
1239 | \& c Do not reset search position on a failed match when /g is in effect. | |
1240 | \& g Match globally, i.e., find all occurrences. | |
1241 | \& i Do case-insensitive pattern matching. | |
1242 | \& m Treat string as multiple lines. | |
1243 | \& o Compile pattern only once. | |
1244 | \& s Treat string as single line. | |
1245 | \& x Use extended regular expressions. | |
1246 | .Ve | |
1247 | .Sp | |
1248 | If \*(L"/\*(R" is the delimiter then the initial \f(CW\*(C`m\*(C'\fR is optional. With the \f(CW\*(C`m\*(C'\fR | |
1249 | you can use any pair of non\-alphanumeric, non-whitespace characters | |
1250 | as delimiters. This is particularly useful for matching path names | |
1251 | that contain \*(L"/\*(R", to avoid \s-1LTS\s0 (leaning toothpick syndrome). If \*(L"?\*(R" is | |
1252 | the delimiter, then the match-only-once rule of \f(CW\*(C`?PATTERN?\*(C'\fR applies. | |
1253 | If \*(L"'\*(R" is the delimiter, no interpolation is performed on the \s-1PATTERN\s0. | |
1254 | .Sp | |
1255 | \&\s-1PATTERN\s0 may contain variables, which will be interpolated (and the | |
1256 | pattern recompiled) every time the pattern search is evaluated, except | |
1257 | for when the delimiter is a single quote. (Note that \f(CW$(\fR, \f(CW$)\fR, and | |
1258 | \&\f(CW$|\fR are not interpolated because they look like end-of-string tests.) | |
1259 | If you want such a pattern to be compiled only once, add a \f(CW\*(C`/o\*(C'\fR after | |
1260 | the trailing delimiter. This avoids expensive run-time recompilations, | |
1261 | and is useful when the value you are interpolating won't change over | |
1262 | the life of the script. However, mentioning \f(CW\*(C`/o\*(C'\fR constitutes a promise | |
1263 | that you won't change the variables in the pattern. If you change them, | |
1264 | Perl won't even notice. See also \*(L"qr/STRING/imosx\*(R". | |
1265 | .Sp | |
1266 | If the \s-1PATTERN\s0 evaluates to the empty string, the last | |
1267 | \&\fIsuccessfully\fR matched regular expression is used instead. In this | |
1268 | case, only the \f(CW\*(C`g\*(C'\fR and \f(CW\*(C`c\*(C'\fR flags on the empty pattern is honoured \- | |
1269 | the other flags are taken from the original pattern. If no match has | |
1270 | previously succeeded, this will (silently) act instead as a genuine | |
1271 | empty pattern (which will always match). | |
1272 | .Sp | |
1273 | If the \f(CW\*(C`/g\*(C'\fR option is not used, \f(CW\*(C`m//\*(C'\fR in list context returns a | |
1274 | list consisting of the subexpressions matched by the parentheses in the | |
1275 | pattern, i.e., (\f(CW$1\fR, \f(CW$2\fR, \f(CW$3\fR...). (Note that here \f(CW$1\fR etc. are | |
1276 | also set, and that this differs from Perl 4's behavior.) When there are | |
1277 | no parentheses in the pattern, the return value is the list \f(CW\*(C`(1)\*(C'\fR for | |
1278 | success. With or without parentheses, an empty list is returned upon | |
1279 | failure. | |
1280 | .Sp | |
1281 | Examples: | |
1282 | .Sp | |
1283 | .Vb 2 | |
1284 | \& open(TTY, '/dev/tty'); | |
1285 | \& <TTY> =~ /^y/i && foo(); # do foo if desired | |
1286 | .Ve | |
1287 | .Sp | |
1288 | .Vb 1 | |
1289 | \& if (/Version: *([0-9.]*)/) { $version = $1; } | |
1290 | .Ve | |
1291 | .Sp | |
1292 | .Vb 1 | |
1293 | \& next if m#^/usr/spool/uucp#; | |
1294 | .Ve | |
1295 | .Sp | |
1296 | .Vb 5 | |
1297 | \& # poor man's grep | |
1298 | \& $arg = shift; | |
1299 | \& while (<>) { | |
1300 | \& print if /$arg/o; # compile only once | |
1301 | \& } | |
1302 | .Ve | |
1303 | .Sp | |
1304 | .Vb 1 | |
1305 | \& if (($F1, $F2, $Etc) = ($foo =~ /^(\eS+)\es+(\eS+)\es*(.*)/)) | |
1306 | .Ve | |
1307 | .Sp | |
1308 | This last example splits \f(CW$foo\fR into the first two words and the | |
1309 | remainder of the line, and assigns those three fields to \f(CW$F1\fR, \f(CW$F2\fR, and | |
1310 | \&\f(CW$Etc\fR. The conditional is true if any variables were assigned, i.e., if | |
1311 | the pattern matched. | |
1312 | .Sp | |
1313 | The \f(CW\*(C`/g\*(C'\fR modifier specifies global pattern matching\*(--that is, | |
1314 | matching as many times as possible within the string. How it behaves | |
1315 | depends on the context. In list context, it returns a list of the | |
1316 | substrings matched by any capturing parentheses in the regular | |
1317 | expression. If there are no parentheses, it returns a list of all | |
1318 | the matched strings, as if there were parentheses around the whole | |
1319 | pattern. | |
1320 | .Sp | |
1321 | In scalar context, each execution of \f(CW\*(C`m//g\*(C'\fR finds the next match, | |
1322 | returning true if it matches, and false if there is no further match. | |
1323 | The position after the last match can be read or set using the \fIpos()\fR | |
1324 | function; see \*(L"pos\*(R" in perlfunc. A failed match normally resets the | |
1325 | search position to the beginning of the string, but you can avoid that | |
1326 | by adding the \f(CW\*(C`/c\*(C'\fR modifier (e.g. \f(CW\*(C`m//gc\*(C'\fR). Modifying the target | |
1327 | string also resets the search position. | |
1328 | .Sp | |
1329 | You can intermix \f(CW\*(C`m//g\*(C'\fR matches with \f(CW\*(C`m/\eG.../g\*(C'\fR, where \f(CW\*(C`\eG\*(C'\fR is a | |
1330 | zero-width assertion that matches the exact position where the previous | |
1331 | \&\f(CW\*(C`m//g\*(C'\fR, if any, left off. Without the \f(CW\*(C`/g\*(C'\fR modifier, the \f(CW\*(C`\eG\*(C'\fR assertion | |
1332 | still anchors at \fIpos()\fR, but the match is of course only attempted once. | |
1333 | Using \f(CW\*(C`\eG\*(C'\fR without \f(CW\*(C`/g\*(C'\fR on a target string that has not previously had a | |
1334 | \&\f(CW\*(C`/g\*(C'\fR match applied to it is the same as using the \f(CW\*(C`\eA\*(C'\fR assertion to match | |
1335 | the beginning of the string. Note also that, currently, \f(CW\*(C`\eG\*(C'\fR is only | |
1336 | properly supported when anchored at the very beginning of the pattern. | |
1337 | .Sp | |
1338 | Examples: | |
1339 | .Sp | |
1340 | .Vb 2 | |
1341 | \& # list context | |
1342 | \& ($one,$five,$fifteen) = (`uptime` =~ /(\ed+\e.\ed+)/g); | |
1343 | .Ve | |
1344 | .Sp | |
1345 | .Vb 8 | |
1346 | \& # scalar context | |
1347 | \& $/ = ""; | |
1348 | \& while (defined($paragraph = <>)) { | |
1349 | \& while ($paragraph =~ /[a-z]['")]*[.!?]+['")]*\es/g) { | |
1350 | \& $sentences++; | |
1351 | \& } | |
1352 | \& } | |
1353 | \& print "$sentences\en"; | |
1354 | .Ve | |
1355 | .Sp | |
1356 | .Vb 11 | |
1357 | \& # using m//gc with \eG | |
1358 | \& $_ = "ppooqppqq"; | |
1359 | \& while ($i++ < 2) { | |
1360 | \& print "1: '"; | |
1361 | \& print $1 while /(o)/gc; print "', pos=", pos, "\en"; | |
1362 | \& print "2: '"; | |
1363 | \& print $1 if /\eG(q)/gc; print "', pos=", pos, "\en"; | |
1364 | \& print "3: '"; | |
1365 | \& print $1 while /(p)/gc; print "', pos=", pos, "\en"; | |
1366 | \& } | |
1367 | \& print "Final: '$1', pos=",pos,"\en" if /\eG(.)/; | |
1368 | .Ve | |
1369 | .Sp | |
1370 | The last example should print: | |
1371 | .Sp | |
1372 | .Vb 7 | |
1373 | \& 1: 'oo', pos=4 | |
1374 | \& 2: 'q', pos=5 | |
1375 | \& 3: 'pp', pos=7 | |
1376 | \& 1: '', pos=7 | |
1377 | \& 2: 'q', pos=8 | |
1378 | \& 3: '', pos=8 | |
1379 | \& Final: 'q', pos=8 | |
1380 | .Ve | |
1381 | .Sp | |
1382 | Notice that the final match matched \f(CW\*(C`q\*(C'\fR instead of \f(CW\*(C`p\*(C'\fR, which a match | |
1383 | without the \f(CW\*(C`\eG\*(C'\fR anchor would have done. Also note that the final match | |
1384 | did not update \f(CW\*(C`pos\*(C'\fR \*(-- \f(CW\*(C`pos\*(C'\fR is only updated on a \f(CW\*(C`/g\*(C'\fR match. If the | |
1385 | final match did indeed match \f(CW\*(C`p\*(C'\fR, it's a good bet that you're running an | |
1386 | older (pre\-5.6.0) Perl. | |
1387 | .Sp | |
1388 | A useful idiom for \f(CW\*(C`lex\*(C'\fR\-like scanners is \f(CW\*(C`/\eG.../gc\*(C'\fR. You can | |
1389 | combine several regexps like this to process a string part\-by\-part, | |
1390 | doing different actions depending on which regexp matched. Each | |
1391 | regexp tries to match where the previous one leaves off. | |
1392 | .Sp | |
1393 | .Vb 14 | |
1394 | \& $_ = <<'EOL'; | |
1395 | \& $url = new URI::URL "http://www/"; die if $url eq "xXx"; | |
1396 | \& EOL | |
1397 | \& LOOP: | |
1398 | \& { | |
1399 | \& print(" digits"), redo LOOP if /\eG\ed+\eb[,.;]?\es*/gc; | |
1400 | \& print(" lowercase"), redo LOOP if /\eG[a-z]+\eb[,.;]?\es*/gc; | |
1401 | \& print(" UPPERCASE"), redo LOOP if /\eG[A-Z]+\eb[,.;]?\es*/gc; | |
1402 | \& print(" Capitalized"), redo LOOP if /\eG[A-Z][a-z]+\eb[,.;]?\es*/gc; | |
1403 | \& print(" MiXeD"), redo LOOP if /\eG[A-Za-z]+\eb[,.;]?\es*/gc; | |
1404 | \& print(" alphanumeric"), redo LOOP if /\eG[A-Za-z0-9]+\eb[,.;]?\es*/gc; | |
1405 | \& print(" line-noise"), redo LOOP if /\eG[^A-Za-z0-9]+/gc; | |
1406 | \& print ". That's all!\en"; | |
1407 | \& } | |
1408 | .Ve | |
1409 | .Sp | |
1410 | Here is the output (split into several lines): | |
1411 | .Sp | |
1412 | .Vb 4 | |
1413 | \& line-noise lowercase line-noise lowercase UPPERCASE line-noise | |
1414 | \& UPPERCASE line-noise lowercase line-noise lowercase line-noise | |
1415 | \& lowercase lowercase line-noise lowercase lowercase line-noise | |
1416 | \& MiXeD line-noise. That's all! | |
1417 | .Ve | |
1418 | .IP "q/STRING/" 8 | |
1419 | .IX Xref "q quote, double ' ''" | |
1420 | .IX Item "q/STRING/" | |
1421 | .PD 0 | |
1422 | .ie n .IP "'STRING'" 8 | |
1423 | .el .IP "\f(CW'STRING'\fR" 8 | |
1424 | .IX Item "'STRING'" | |
1425 | .PD | |
1426 | A single\-quoted, literal string. A backslash represents a backslash | |
1427 | unless followed by the delimiter or another backslash, in which case | |
1428 | the delimiter or backslash is interpolated. | |
1429 | .Sp | |
1430 | .Vb 3 | |
1431 | \& $foo = q!I said, "You said, 'She said it.'"!; | |
1432 | \& $bar = q('This is it.'); | |
1433 | \& $baz = '\en'; # a two-character string | |
1434 | .Ve | |
1435 | .IP "qq/STRING/" 8 | |
1436 | .IX Xref "qq quote, double "" """"" | |
1437 | .IX Item "qq/STRING/" | |
1438 | .PD 0 | |
1439 | .ie n .IP """\s-1STRING\s0""" 8 | |
1440 | .el .IP "``\s-1STRING\s0''" 8 | |
1441 | .IX Item "STRING" | |
1442 | .PD | |
1443 | A double\-quoted, interpolated string. | |
1444 | .Sp | |
1445 | .Vb 4 | |
1446 | \& $_ .= qq | |
1447 | \& (*** The previous line contains the naughty word "$1".\en) | |
1448 | \& if /\eb(tcl|java|python)\eb/i; # :-) | |
1449 | \& $baz = "\en"; # a one-character string | |
1450 | .Ve | |
1451 | .IP "qr/STRING/imosx" 8 | |
1452 | .IX Xref "qr i m o s x" | |
1453 | .IX Item "qr/STRING/imosx" | |
1454 | This operator quotes (and possibly compiles) its \fI\s-1STRING\s0\fR as a regular | |
1455 | expression. \fI\s-1STRING\s0\fR is interpolated the same way as \fI\s-1PATTERN\s0\fR | |
1456 | in \f(CW\*(C`m/PATTERN/\*(C'\fR. If \*(L"'\*(R" is used as the delimiter, no interpolation | |
1457 | is done. Returns a Perl value which may be used instead of the | |
1458 | corresponding \f(CW\*(C`/STRING/imosx\*(C'\fR expression. | |
1459 | .Sp | |
1460 | For example, | |
1461 | .Sp | |
1462 | .Vb 2 | |
1463 | \& $rex = qr/my.STRING/is; | |
1464 | \& s/$rex/foo/; | |
1465 | .Ve | |
1466 | .Sp | |
1467 | is equivalent to | |
1468 | .Sp | |
1469 | .Vb 1 | |
1470 | \& s/my.STRING/foo/is; | |
1471 | .Ve | |
1472 | .Sp | |
1473 | The result may be used as a subpattern in a match: | |
1474 | .Sp | |
1475 | .Vb 4 | |
1476 | \& $re = qr/$pattern/; | |
1477 | \& $string =~ /foo${re}bar/; # can be interpolated in other patterns | |
1478 | \& $string =~ $re; # or used standalone | |
1479 | \& $string =~ /$re/; # or this way | |
1480 | .Ve | |
1481 | .Sp | |
1482 | Since Perl may compile the pattern at the moment of execution of \fIqr()\fR | |
1483 | operator, using \fIqr()\fR may have speed advantages in some situations, | |
1484 | notably if the result of \fIqr()\fR is used standalone: | |
1485 | .Sp | |
1486 | .Vb 11 | |
1487 | \& sub match { | |
1488 | \& my $patterns = shift; | |
1489 | \& my @compiled = map qr/$_/i, @$patterns; | |
1490 | \& grep { | |
1491 | \& my $success = 0; | |
1492 | \& foreach my $pat (@compiled) { | |
1493 | \& $success = 1, last if /$pat/; | |
1494 | \& } | |
1495 | \& $success; | |
1496 | \& } @_; | |
1497 | \& } | |
1498 | .Ve | |
1499 | .Sp | |
1500 | Precompilation of the pattern into an internal representation at | |
1501 | the moment of \fIqr()\fR avoids a need to recompile the pattern every | |
1502 | time a match \f(CW\*(C`/$pat/\*(C'\fR is attempted. (Perl has many other internal | |
1503 | optimizations, but none would be triggered in the above example if | |
1504 | we did not use \fIqr()\fR operator.) | |
1505 | .Sp | |
1506 | Options are: | |
1507 | .Sp | |
1508 | .Vb 5 | |
1509 | \& i Do case-insensitive pattern matching. | |
1510 | \& m Treat string as multiple lines. | |
1511 | \& o Compile pattern only once. | |
1512 | \& s Treat string as single line. | |
1513 | \& x Use extended regular expressions. | |
1514 | .Ve | |
1515 | .Sp | |
1516 | See perlre for additional information on valid syntax for \s-1STRING\s0, and | |
1517 | for a detailed look at the semantics of regular expressions. | |
1518 | .IP "qx/STRING/" 8 | |
1519 | .IX Xref "qx ` `` backtick" | |
1520 | .IX Item "qx/STRING/" | |
1521 | .PD 0 | |
1522 | .IP "`STRING`" 8 | |
1523 | .IX Item "`STRING`" | |
1524 | .PD | |
1525 | A string which is (possibly) interpolated and then executed as a | |
1526 | system command with \f(CW\*(C`/bin/sh\*(C'\fR or its equivalent. Shell wildcards, | |
1527 | pipes, and redirections will be honored. The collected standard | |
1528 | output of the command is returned; standard error is unaffected. In | |
1529 | scalar context, it comes back as a single (potentially multi\-line) | |
1530 | string, or undef if the command failed. In list context, returns a | |
1531 | list of lines (however you've defined lines with $/ or | |
1532 | \&\f(CW$INPUT_RECORD_SEPARATOR\fR), or an empty list if the command failed. | |
1533 | .Sp | |
1534 | Because backticks do not affect standard error, use shell file descriptor | |
1535 | syntax (assuming the shell supports this) if you care to address this. | |
1536 | To capture a command's \s-1STDERR\s0 and \s-1STDOUT\s0 together: | |
1537 | .Sp | |
1538 | .Vb 1 | |
1539 | \& $output = `cmd 2>&1`; | |
1540 | .Ve | |
1541 | .Sp | |
1542 | To capture a command's \s-1STDOUT\s0 but discard its \s-1STDERR:\s0 | |
1543 | .Sp | |
1544 | .Vb 1 | |
1545 | \& $output = `cmd 2>/dev/null`; | |
1546 | .Ve | |
1547 | .Sp | |
1548 | To capture a command's \s-1STDERR\s0 but discard its \s-1STDOUT\s0 (ordering is | |
1549 | important here): | |
1550 | .Sp | |
1551 | .Vb 1 | |
1552 | \& $output = `cmd 2>&1 1>/dev/null`; | |
1553 | .Ve | |
1554 | .Sp | |
1555 | To exchange a command's \s-1STDOUT\s0 and \s-1STDERR\s0 in order to capture the \s-1STDERR\s0 | |
1556 | but leave its \s-1STDOUT\s0 to come out the old \s-1STDERR:\s0 | |
1557 | .Sp | |
1558 | .Vb 1 | |
1559 | \& $output = `cmd 3>&1 1>&2 2>&3 3>&-`; | |
1560 | .Ve | |
1561 | .Sp | |
1562 | To read both a command's \s-1STDOUT\s0 and its \s-1STDERR\s0 separately, it's easiest | |
1563 | to redirect them separately to files, and then read from those files | |
1564 | when the program is done: | |
1565 | .Sp | |
1566 | .Vb 1 | |
1567 | \& system("program args 1>program.stdout 2>program.stderr"); | |
1568 | .Ve | |
1569 | .Sp | |
1570 | Using single-quote as a delimiter protects the command from Perl's | |
1571 | double-quote interpolation, passing it on to the shell instead: | |
1572 | .Sp | |
1573 | .Vb 2 | |
1574 | \& $perl_info = qx(ps $$); # that's Perl's $$ | |
1575 | \& $shell_info = qx'ps $$'; # that's the new shell's $$ | |
1576 | .Ve | |
1577 | .Sp | |
1578 | How that string gets evaluated is entirely subject to the command | |
1579 | interpreter on your system. On most platforms, you will have to protect | |
1580 | shell metacharacters if you want them treated literally. This is in | |
1581 | practice difficult to do, as it's unclear how to escape which characters. | |
1582 | See perlsec for a clean and safe example of a manual \fIfork()\fR and \fIexec()\fR | |
1583 | to emulate backticks safely. | |
1584 | .Sp | |
1585 | On some platforms (notably DOS-like ones), the shell may not be | |
1586 | capable of dealing with multiline commands, so putting newlines in | |
1587 | the string may not get you what you want. You may be able to evaluate | |
1588 | multiple commands in a single line by separating them with the command | |
1589 | separator character, if your shell supports that (e.g. \f(CW\*(C`;\*(C'\fR on many Unix | |
1590 | shells; \f(CW\*(C`&\*(C'\fR on the Windows \s-1NT\s0 \f(CW\*(C`cmd\*(C'\fR shell). | |
1591 | .Sp | |
1592 | Beginning with v5.6.0, Perl will attempt to flush all files opened for | |
1593 | output before starting the child process, but this may not be supported | |
1594 | on some platforms (see perlport). To be safe, you may need to set | |
1595 | \&\f(CW$|\fR ($AUTOFLUSH in English) or call the \f(CW\*(C`autoflush()\*(C'\fR method of | |
1596 | \&\f(CW\*(C`IO::Handle\*(C'\fR on any open handles. | |
1597 | .Sp | |
1598 | Beware that some command shells may place restrictions on the length | |
1599 | of the command line. You must ensure your strings don't exceed this | |
1600 | limit after any necessary interpolations. See the platform-specific | |
1601 | release notes for more details about your particular environment. | |
1602 | .Sp | |
1603 | Using this operator can lead to programs that are difficult to port, | |
1604 | because the shell commands called vary between systems, and may in | |
1605 | fact not be present at all. As one example, the \f(CW\*(C`type\*(C'\fR command under | |
1606 | the \s-1POSIX\s0 shell is very different from the \f(CW\*(C`type\*(C'\fR command under \s-1DOS\s0. | |
1607 | That doesn't mean you should go out of your way to avoid backticks | |
1608 | when they're the right way to get something done. Perl was made to be | |
1609 | a glue language, and one of the things it glues together is commands. | |
1610 | Just understand what you're getting yourself into. | |
1611 | .Sp | |
1612 | See \*(L"I/O Operators\*(R" for more discussion. | |
1613 | .IP "qw/STRING/" 8 | |
1614 | .IX Xref "qw quote, list quote, words" | |
1615 | .IX Item "qw/STRING/" | |
1616 | Evaluates to a list of the words extracted out of \s-1STRING\s0, using embedded | |
1617 | whitespace as the word delimiters. It can be understood as being roughly | |
1618 | equivalent to: | |
1619 | .Sp | |
1620 | .Vb 1 | |
1621 | \& split(' ', q/STRING/); | |
1622 | .Ve | |
1623 | .Sp | |
1624 | the differences being that it generates a real list at compile time, and | |
1625 | in scalar context it returns the last element in the list. So | |
1626 | this expression: | |
1627 | .Sp | |
1628 | .Vb 1 | |
1629 | \& qw(foo bar baz) | |
1630 | .Ve | |
1631 | .Sp | |
1632 | is semantically equivalent to the list: | |
1633 | .Sp | |
1634 | .Vb 1 | |
1635 | \& 'foo', 'bar', 'baz' | |
1636 | .Ve | |
1637 | .Sp | |
1638 | Some frequently seen examples: | |
1639 | .Sp | |
1640 | .Vb 2 | |
1641 | \& use POSIX qw( setlocale localeconv ) | |
1642 | \& @EXPORT = qw( foo bar baz ); | |
1643 | .Ve | |
1644 | .Sp | |
1645 | A common mistake is to try to separate the words with comma or to | |
1646 | put comments into a multi-line \f(CW\*(C`qw\*(C'\fR\-string. For this reason, the | |
1647 | \&\f(CW\*(C`use warnings\*(C'\fR pragma and the \fB\-w\fR switch (that is, the \f(CW$^W\fR variable) | |
1648 | produces warnings if the \s-1STRING\s0 contains the \*(L",\*(R" or the \*(L"#\*(R" character. | |
1649 | .IP "s/PATTERN/REPLACEMENT/egimosx" 8 | |
1650 | .IX Xref "substitute substitution replace regexp, replace regexp, substitute e g i m o s x" | |
1651 | .IX Item "s/PATTERN/REPLACEMENT/egimosx" | |
1652 | Searches a string for a pattern, and if found, replaces that pattern | |
1653 | with the replacement text and returns the number of substitutions | |
1654 | made. Otherwise it returns false (specifically, the empty string). | |
1655 | .Sp | |
1656 | If no string is specified via the \f(CW\*(C`=~\*(C'\fR or \f(CW\*(C`!~\*(C'\fR operator, the \f(CW$_\fR | |
1657 | variable is searched and modified. (The string specified with \f(CW\*(C`=~\*(C'\fR must | |
1658 | be scalar variable, an array element, a hash element, or an assignment | |
1659 | to one of those, i.e., an lvalue.) | |
1660 | .Sp | |
1661 | If the delimiter chosen is a single quote, no interpolation is | |
1662 | done on either the \s-1PATTERN\s0 or the \s-1REPLACEMENT\s0. Otherwise, if the | |
1663 | \&\s-1PATTERN\s0 contains a $ that looks like a variable rather than an | |
1664 | end-of-string test, the variable will be interpolated into the pattern | |
1665 | at run\-time. If you want the pattern compiled only once the first time | |
1666 | the variable is interpolated, use the \f(CW\*(C`/o\*(C'\fR option. If the pattern | |
1667 | evaluates to the empty string, the last successfully executed regular | |
1668 | expression is used instead. See perlre for further explanation on these. | |
1669 | See perllocale for discussion of additional considerations that apply | |
1670 | when \f(CW\*(C`use locale\*(C'\fR is in effect. | |
1671 | .Sp | |
1672 | Options are: | |
1673 | .Sp | |
1674 | .Vb 7 | |
1675 | \& e Evaluate the right side as an expression. | |
1676 | \& g Replace globally, i.e., all occurrences. | |
1677 | \& i Do case-insensitive pattern matching. | |
1678 | \& m Treat string as multiple lines. | |
1679 | \& o Compile pattern only once. | |
1680 | \& s Treat string as single line. | |
1681 | \& x Use extended regular expressions. | |
1682 | .Ve | |
1683 | .Sp | |
1684 | Any non\-alphanumeric, non-whitespace delimiter may replace the | |
1685 | slashes. If single quotes are used, no interpretation is done on the | |
1686 | replacement string (the \f(CW\*(C`/e\*(C'\fR modifier overrides this, however). Unlike | |
1687 | Perl 4, Perl 5 treats backticks as normal delimiters; the replacement | |
1688 | text is not evaluated as a command. If the | |
1689 | \&\s-1PATTERN\s0 is delimited by bracketing quotes, the \s-1REPLACEMENT\s0 has its own | |
1690 | pair of quotes, which may or may not be bracketing quotes, e.g., | |
1691 | \&\f(CW\*(C`s(foo)(bar)\*(C'\fR or \f(CW\*(C`s<foo>/bar/\*(C'\fR. A \f(CW\*(C`/e\*(C'\fR will cause the | |
1692 | replacement portion to be treated as a full-fledged Perl expression | |
1693 | and evaluated right then and there. It is, however, syntax checked at | |
1694 | compile\-time. A second \f(CW\*(C`e\*(C'\fR modifier will cause the replacement portion | |
1695 | to be \f(CW\*(C`eval\*(C'\fRed before being run as a Perl expression. | |
1696 | .Sp | |
1697 | Examples: | |
1698 | .Sp | |
1699 | .Vb 1 | |
1700 | \& s/\ebgreen\eb/mauve/g; # don't change wintergreen | |
1701 | .Ve | |
1702 | .Sp | |
1703 | .Vb 1 | |
1704 | \& $path =~ s|/usr/bin|/usr/local/bin|; | |
1705 | .Ve | |
1706 | .Sp | |
1707 | .Vb 1 | |
1708 | \& s/Login: $foo/Login: $bar/; # run-time pattern | |
1709 | .Ve | |
1710 | .Sp | |
1711 | .Vb 1 | |
1712 | \& ($foo = $bar) =~ s/this/that/; # copy first, then change | |
1713 | .Ve | |
1714 | .Sp | |
1715 | .Vb 1 | |
1716 | \& $count = ($paragraph =~ s/Mister\eb/Mr./g); # get change-count | |
1717 | .Ve | |
1718 | .Sp | |
1719 | .Vb 4 | |
1720 | \& $_ = 'abc123xyz'; | |
1721 | \& s/\ed+/$&*2/e; # yields 'abc246xyz' | |
1722 | \& s/\ed+/sprintf("%5d",$&)/e; # yields 'abc 246xyz' | |
1723 | \& s/\ew/$& x 2/eg; # yields 'aabbcc 224466xxyyzz' | |
1724 | .Ve | |
1725 | .Sp | |
1726 | .Vb 3 | |
1727 | \& s/%(.)/$percent{$1}/g; # change percent escapes; no /e | |
1728 | \& s/%(.)/$percent{$1} || $&/ge; # expr now, so /e | |
1729 | \& s/^=(\ew+)/&pod($1)/ge; # use function call | |
1730 | .Ve | |
1731 | .Sp | |
1732 | .Vb 3 | |
1733 | \& # expand variables in $_, but dynamics only, using | |
1734 | \& # symbolic dereferencing | |
1735 | \& s/\e$(\ew+)/${$1}/g; | |
1736 | .Ve | |
1737 | .Sp | |
1738 | .Vb 2 | |
1739 | \& # Add one to the value of any numbers in the string | |
1740 | \& s/(\ed+)/1 + $1/eg; | |
1741 | .Ve | |
1742 | .Sp | |
1743 | .Vb 4 | |
1744 | \& # This will expand any embedded scalar variable | |
1745 | \& # (including lexicals) in $_ : First $1 is interpolated | |
1746 | \& # to the variable name, and then evaluated | |
1747 | \& s/(\e$\ew+)/$1/eeg; | |
1748 | .Ve | |
1749 | .Sp | |
1750 | .Vb 6 | |
1751 | \& # Delete (most) C comments. | |
1752 | \& $program =~ s { | |
1753 | \& /\e* # Match the opening delimiter. | |
1754 | \& .*? # Match a minimal number of characters. | |
1755 | \& \e*/ # Match the closing delimiter. | |
1756 | \& } []gsx; | |
1757 | .Ve | |
1758 | .Sp | |
1759 | .Vb 1 | |
1760 | \& s/^\es*(.*?)\es*$/$1/; # trim whitespace in $_, expensively | |
1761 | .Ve | |
1762 | .Sp | |
1763 | .Vb 4 | |
1764 | \& for ($variable) { # trim whitespace in $variable, cheap | |
1765 | \& s/^\es+//; | |
1766 | \& s/\es+$//; | |
1767 | \& } | |
1768 | .Ve | |
1769 | .Sp | |
1770 | .Vb 1 | |
1771 | \& s/([^ ]*) *([^ ]*)/$2 $1/; # reverse 1st two fields | |
1772 | .Ve | |
1773 | .Sp | |
1774 | Note the use of $ instead of \e in the last example. Unlike | |
1775 | \&\fBsed\fR, we use the \e<\fIdigit\fR> form in only the left hand side. | |
1776 | Anywhere else it's $<\fIdigit\fR>. | |
1777 | .Sp | |
1778 | Occasionally, you can't use just a \f(CW\*(C`/g\*(C'\fR to get all the changes | |
1779 | to occur that you might want. Here are two common cases: | |
1780 | .Sp | |
1781 | .Vb 2 | |
1782 | \& # put commas in the right places in an integer | |
1783 | \& 1 while s/(\ed)(\ed\ed\ed)(?!\ed)/$1,$2/g; | |
1784 | .Ve | |
1785 | .Sp | |
1786 | .Vb 2 | |
1787 | \& # expand tabs to 8-column spacing | |
1788 | \& 1 while s/\et+/' ' x (length($&)*8 - length($`)%8)/e; | |
1789 | .Ve | |
1790 | .IP "tr/SEARCHLIST/REPLACEMENTLIST/cds" 8 | |
1791 | .IX Xref "tr y transliterate c d s" | |
1792 | .IX Item "tr/SEARCHLIST/REPLACEMENTLIST/cds" | |
1793 | .PD 0 | |
1794 | .IP "y/SEARCHLIST/REPLACEMENTLIST/cds" 8 | |
1795 | .IX Item "y/SEARCHLIST/REPLACEMENTLIST/cds" | |
1796 | .PD | |
1797 | Transliterates all occurrences of the characters found in the search list | |
1798 | with the corresponding character in the replacement list. It returns | |
1799 | the number of characters replaced or deleted. If no string is | |
1800 | specified via the =~ or !~ operator, the \f(CW$_\fR string is transliterated. (The | |
1801 | string specified with =~ must be a scalar variable, an array element, a | |
1802 | hash element, or an assignment to one of those, i.e., an lvalue.) | |
1803 | .Sp | |
1804 | A character range may be specified with a hyphen, so \f(CW\*(C`tr/A\-J/0\-9/\*(C'\fR | |
1805 | does the same replacement as \f(CW\*(C`tr/ACEGIBDFHJ/0246813579/\*(C'\fR. | |
1806 | For \fBsed\fR devotees, \f(CW\*(C`y\*(C'\fR is provided as a synonym for \f(CW\*(C`tr\*(C'\fR. If the | |
1807 | \&\s-1SEARCHLIST\s0 is delimited by bracketing quotes, the \s-1REPLACEMENTLIST\s0 has | |
1808 | its own pair of quotes, which may or may not be bracketing quotes, | |
1809 | e.g., \f(CW\*(C`tr[A\-Z][a\-z]\*(C'\fR or \f(CW\*(C`tr(+\e\-*/)/ABCD/\*(C'\fR. | |
1810 | .Sp | |
1811 | Note that \f(CW\*(C`tr\*(C'\fR does \fBnot\fR do regular expression character classes | |
1812 | such as \f(CW\*(C`\ed\*(C'\fR or \f(CW\*(C`[:lower:]\*(C'\fR. The <tr> operator is not equivalent to | |
1813 | the \fItr\fR\|(1) utility. If you want to map strings between lower/upper | |
1814 | cases, see \*(L"lc\*(R" in perlfunc and \*(L"uc\*(R" in perlfunc, and in general consider | |
1815 | using the \f(CW\*(C`s\*(C'\fR operator if you need regular expressions. | |
1816 | .Sp | |
1817 | Note also that the whole range idea is rather unportable between | |
1818 | character sets\*(--and even within character sets they may cause results | |
1819 | you probably didn't expect. A sound principle is to use only ranges | |
1820 | that begin from and end at either alphabets of equal case (a\-e, A\-E), | |
1821 | or digits (0\-4). Anything else is unsafe. If in doubt, spell out the | |
1822 | character sets in full. | |
1823 | .Sp | |
1824 | Options: | |
1825 | .Sp | |
1826 | .Vb 3 | |
1827 | \& c Complement the SEARCHLIST. | |
1828 | \& d Delete found but unreplaced characters. | |
1829 | \& s Squash duplicate replaced characters. | |
1830 | .Ve | |
1831 | .Sp | |
1832 | If the \f(CW\*(C`/c\*(C'\fR modifier is specified, the \s-1SEARCHLIST\s0 character set | |
1833 | is complemented. If the \f(CW\*(C`/d\*(C'\fR modifier is specified, any characters | |
1834 | specified by \s-1SEARCHLIST\s0 not found in \s-1REPLACEMENTLIST\s0 are deleted. | |
1835 | (Note that this is slightly more flexible than the behavior of some | |
1836 | \&\fBtr\fR programs, which delete anything they find in the \s-1SEARCHLIST\s0, | |
1837 | period.) If the \f(CW\*(C`/s\*(C'\fR modifier is specified, sequences of characters | |
1838 | that were transliterated to the same character are squashed down | |
1839 | to a single instance of the character. | |
1840 | .Sp | |
1841 | If the \f(CW\*(C`/d\*(C'\fR modifier is used, the \s-1REPLACEMENTLIST\s0 is always interpreted | |
1842 | exactly as specified. Otherwise, if the \s-1REPLACEMENTLIST\s0 is shorter | |
1843 | than the \s-1SEARCHLIST\s0, the final character is replicated till it is long | |
1844 | enough. If the \s-1REPLACEMENTLIST\s0 is empty, the \s-1SEARCHLIST\s0 is replicated. | |
1845 | This latter is useful for counting characters in a class or for | |
1846 | squashing character sequences in a class. | |
1847 | .Sp | |
1848 | Examples: | |
1849 | .Sp | |
1850 | .Vb 1 | |
1851 | \& $ARGV[1] =~ tr/A-Z/a-z/; # canonicalize to lower case | |
1852 | .Ve | |
1853 | .Sp | |
1854 | .Vb 1 | |
1855 | \& $cnt = tr/*/*/; # count the stars in $_ | |
1856 | .Ve | |
1857 | .Sp | |
1858 | .Vb 1 | |
1859 | \& $cnt = $sky =~ tr/*/*/; # count the stars in $sky | |
1860 | .Ve | |
1861 | .Sp | |
1862 | .Vb 1 | |
1863 | \& $cnt = tr/0-9//; # count the digits in $_ | |
1864 | .Ve | |
1865 | .Sp | |
1866 | .Vb 1 | |
1867 | \& tr/a-zA-Z//s; # bookkeeper -> bokeper | |
1868 | .Ve | |
1869 | .Sp | |
1870 | .Vb 1 | |
1871 | \& ($HOST = $host) =~ tr/a-z/A-Z/; | |
1872 | .Ve | |
1873 | .Sp | |
1874 | .Vb 1 | |
1875 | \& tr/a-zA-Z/ /cs; # change non-alphas to single space | |
1876 | .Ve | |
1877 | .Sp | |
1878 | .Vb 2 | |
1879 | \& tr [\e200-\e377] | |
1880 | \& [\e000-\e177]; # delete 8th bit | |
1881 | .Ve | |
1882 | .Sp | |
1883 | If multiple transliterations are given for a character, only the | |
1884 | first one is used: | |
1885 | .Sp | |
1886 | .Vb 1 | |
1887 | \& tr/AAA/XYZ/ | |
1888 | .Ve | |
1889 | .Sp | |
1890 | will transliterate any A to X. | |
1891 | .Sp | |
1892 | Because the transliteration table is built at compile time, neither | |
1893 | the \s-1SEARCHLIST\s0 nor the \s-1REPLACEMENTLIST\s0 are subjected to double quote | |
1894 | interpolation. That means that if you want to use variables, you | |
1895 | must use an \fIeval()\fR: | |
1896 | .Sp | |
1897 | .Vb 2 | |
1898 | \& eval "tr/$oldlist/$newlist/"; | |
1899 | \& die $@ if $@; | |
1900 | .Ve | |
1901 | .Sp | |
1902 | .Vb 1 | |
1903 | \& eval "tr/$oldlist/$newlist/, 1" or die $@; | |
1904 | .Ve | |
1905 | .IP "<<\s-1EOF\s0" 8 | |
1906 | .IX Xref "here-doc heredoc here-document <<" | |
1907 | .IX Item "<<EOF" | |
1908 | A line-oriented form of quoting is based on the shell \*(L"here\-document\*(R" | |
1909 | syntax. Following a \f(CW\*(C`<<\*(C'\fR you specify a string to terminate | |
1910 | the quoted material, and all lines following the current line down to | |
1911 | the terminating string are the value of the item. The terminating | |
1912 | string may be either an identifier (a word), or some quoted text. If | |
1913 | quoted, the type of quotes you use determines the treatment of the | |
1914 | text, just as in regular quoting. An unquoted identifier works like | |
1915 | double quotes. There must be no space between the \f(CW\*(C`<<\*(C'\fR and | |
1916 | the identifier, unless the identifier is quoted. (If you put a space it | |
1917 | will be treated as a null identifier, which is valid, and matches the first | |
1918 | empty line.) The terminating string must appear by itself (unquoted and | |
1919 | with no surrounding whitespace) on the terminating line. | |
1920 | .Sp | |
1921 | .Vb 3 | |
1922 | \& print <<EOF; | |
1923 | \& The price is $Price. | |
1924 | \& EOF | |
1925 | .Ve | |
1926 | .Sp | |
1927 | .Vb 3 | |
1928 | \& print << "EOF"; # same as above | |
1929 | \& The price is $Price. | |
1930 | \& EOF | |
1931 | .Ve | |
1932 | .Sp | |
1933 | .Vb 4 | |
1934 | \& print << `EOC`; # execute commands | |
1935 | \& echo hi there | |
1936 | \& echo lo there | |
1937 | \& EOC | |
1938 | .Ve | |
1939 | .Sp | |
1940 | .Vb 5 | |
1941 | \& print <<"foo", <<"bar"; # you can stack them | |
1942 | \& I said foo. | |
1943 | \& foo | |
1944 | \& I said bar. | |
1945 | \& bar | |
1946 | .Ve | |
1947 | .Sp | |
1948 | .Vb 6 | |
1949 | \& myfunc(<< "THIS", 23, <<'THAT'); | |
1950 | \& Here's a line | |
1951 | \& or two. | |
1952 | \& THIS | |
1953 | \& and here's another. | |
1954 | \& THAT | |
1955 | .Ve | |
1956 | .Sp | |
1957 | Just don't forget that you have to put a semicolon on the end | |
1958 | to finish the statement, as Perl doesn't know you're not going to | |
1959 | try to do this: | |
1960 | .Sp | |
1961 | .Vb 4 | |
1962 | \& print <<ABC | |
1963 | \& 179231 | |
1964 | \& ABC | |
1965 | \& + 20; | |
1966 | .Ve | |
1967 | .Sp | |
1968 | If you want your here-docs to be indented with the | |
1969 | rest of the code, you'll need to remove leading whitespace | |
1970 | from each line manually: | |
1971 | .Sp | |
1972 | .Vb 4 | |
1973 | \& ($quote = <<'FINIS') =~ s/^\es+//gm; | |
1974 | \& The Road goes ever on and on, | |
1975 | \& down from the door where it began. | |
1976 | \& FINIS | |
1977 | .Ve | |
1978 | .Sp | |
1979 | If you use a here-doc within a delimited construct, such as in \f(CW\*(C`s///eg\*(C'\fR, | |
1980 | the quoted material must come on the lines following the final delimiter. | |
1981 | So instead of | |
1982 | .Sp | |
1983 | .Vb 4 | |
1984 | \& s/this/<<E . 'that' | |
1985 | \& the other | |
1986 | \& E | |
1987 | \& . 'more '/eg; | |
1988 | .Ve | |
1989 | .Sp | |
1990 | you have to write | |
1991 | .Sp | |
1992 | .Vb 4 | |
1993 | \& s/this/<<E . 'that' | |
1994 | \& . 'more '/eg; | |
1995 | \& the other | |
1996 | \& E | |
1997 | .Ve | |
1998 | .Sp | |
1999 | If the terminating identifier is on the last line of the program, you | |
2000 | must be sure there is a newline after it; otherwise, Perl will give the | |
2001 | warning \fBCan't find string terminator \*(L"\s-1END\s0\*(R" anywhere before \s-1EOF\s0...\fR. | |
2002 | .Sp | |
2003 | Additionally, the quoting rules for the identifier are not related to | |
2004 | Perl's quoting rules \*(-- \f(CW\*(C`q()\*(C'\fR, \f(CW\*(C`qq()\*(C'\fR, and the like are not supported | |
2005 | in place of \f(CW''\fR and \f(CW""\fR, and the only interpolation is for backslashing | |
2006 | the quoting character: | |
2007 | .Sp | |
2008 | .Vb 3 | |
2009 | \& print << "abc\e"def"; | |
2010 | \& testing... | |
2011 | \& abc"def | |
2012 | .Ve | |
2013 | .Sp | |
2014 | Finally, quoted strings cannot span multiple lines. The general rule is | |
2015 | that the identifier must be a string literal. Stick with that, and you | |
2016 | should be safe. | |
2017 | .Sh "Gory details of parsing quoted constructs" | |
2018 | .IX Xref "quote, gory details" | |
2019 | .IX Subsection "Gory details of parsing quoted constructs" | |
2020 | When presented with something that might have several different | |
2021 | interpretations, Perl uses the \fB\s-1DWIM\s0\fR (that's \*(L"Do What I Mean\*(R") | |
2022 | principle to pick the most probable interpretation. This strategy | |
2023 | is so successful that Perl programmers often do not suspect the | |
2024 | ambivalence of what they write. But from time to time, Perl's | |
2025 | notions differ substantially from what the author honestly meant. | |
2026 | .PP | |
2027 | This section hopes to clarify how Perl handles quoted constructs. | |
2028 | Although the most common reason to learn this is to unravel labyrinthine | |
2029 | regular expressions, because the initial steps of parsing are the | |
2030 | same for all quoting operators, they are all discussed together. | |
2031 | .PP | |
2032 | The most important Perl parsing rule is the first one discussed | |
2033 | below: when processing a quoted construct, Perl first finds the end | |
2034 | of that construct, then interprets its contents. If you understand | |
2035 | this rule, you may skip the rest of this section on the first | |
2036 | reading. The other rules are likely to contradict the user's | |
2037 | expectations much less frequently than this first one. | |
2038 | .PP | |
2039 | Some passes discussed below are performed concurrently, but because | |
2040 | their results are the same, we consider them individually. For different | |
2041 | quoting constructs, Perl performs different numbers of passes, from | |
2042 | one to five, but these passes are always performed in the same order. | |
2043 | .IP "Finding the end" 4 | |
2044 | .IX Item "Finding the end" | |
2045 | The first pass is finding the end of the quoted construct, whether | |
2046 | it be a multicharacter delimiter \f(CW"\enEOF\en"\fR in the \f(CW\*(C`<<EOF\*(C'\fR | |
2047 | construct, a \f(CW\*(C`/\*(C'\fR that terminates a \f(CW\*(C`qq//\*(C'\fR construct, a \f(CW\*(C`]\*(C'\fR which | |
2048 | terminates \f(CW\*(C`qq[]\*(C'\fR construct, or a \f(CW\*(C`>\*(C'\fR which terminates a | |
2049 | fileglob started with \f(CW\*(C`<\*(C'\fR. | |
2050 | .Sp | |
2051 | When searching for single-character non-pairing delimiters, such | |
2052 | as \f(CW\*(C`/\*(C'\fR, combinations of \f(CW\*(C`\e\e\*(C'\fR and \f(CW\*(C`\e/\*(C'\fR are skipped. However, | |
2053 | when searching for single-character pairing delimiter like \f(CW\*(C`[\*(C'\fR, | |
2054 | combinations of \f(CW\*(C`\e\e\*(C'\fR, \f(CW\*(C`\e]\*(C'\fR, and \f(CW\*(C`\e[\*(C'\fR are all skipped, and nested | |
2055 | \&\f(CW\*(C`[\*(C'\fR, \f(CW\*(C`]\*(C'\fR are skipped as well. When searching for multicharacter | |
2056 | delimiters, nothing is skipped. | |
2057 | .Sp | |
2058 | For constructs with three-part delimiters (\f(CW\*(C`s///\*(C'\fR, \f(CW\*(C`y///\*(C'\fR, and | |
2059 | \&\f(CW\*(C`tr///\*(C'\fR), the search is repeated once more. | |
2060 | .Sp | |
2061 | During this search no attention is paid to the semantics of the construct. | |
2062 | Thus: | |
2063 | .Sp | |
2064 | .Vb 1 | |
2065 | \& "$hash{"$foo/$bar"}" | |
2066 | .Ve | |
2067 | .Sp | |
2068 | or: | |
2069 | .Sp | |
2070 | .Vb 3 | |
2071 | \& m/ | |
2072 | \& bar # NOT a comment, this slash / terminated m//! | |
2073 | \& /x | |
2074 | .Ve | |
2075 | .Sp | |
2076 | do not form legal quoted expressions. The quoted part ends on the | |
2077 | first \f(CW\*(C`"\*(C'\fR and \f(CW\*(C`/\*(C'\fR, and the rest happens to be a syntax error. | |
2078 | Because the slash that terminated \f(CW\*(C`m//\*(C'\fR was followed by a \f(CW\*(C`SPACE\*(C'\fR, | |
2079 | the example above is not \f(CW\*(C`m//x\*(C'\fR, but rather \f(CW\*(C`m//\*(C'\fR with no \f(CW\*(C`/x\*(C'\fR | |
2080 | modifier. So the embedded \f(CW\*(C`#\*(C'\fR is interpreted as a literal \f(CW\*(C`#\*(C'\fR. | |
2081 | .Sp | |
2082 | Also no attention is paid to \f(CW\*(C`\ec\e\*(C'\fR during this search. | |
2083 | Thus the second \f(CW\*(C`\e\*(C'\fR in \f(CW\*(C`qq/\ec\e/\*(C'\fR is interpreted as a part of \f(CW\*(C`\e/\*(C'\fR, | |
2084 | and the following \f(CW\*(C`/\*(C'\fR is not recognized as a delimiter. | |
2085 | Instead, use \f(CW\*(C`\e034\*(C'\fR or \f(CW\*(C`\ex1c\*(C'\fR at the end of quoted constructs. | |
2086 | .IP "Removal of backslashes before delimiters" 4 | |
2087 | .IX Item "Removal of backslashes before delimiters" | |
2088 | During the second pass, text between the starting and ending | |
2089 | delimiters is copied to a safe location, and the \f(CW\*(C`\e\*(C'\fR is removed | |
2090 | from combinations consisting of \f(CW\*(C`\e\*(C'\fR and delimiter\*(--or delimiters, | |
2091 | meaning both starting and ending delimiters will should these differ. | |
2092 | This removal does not happen for multi-character delimiters. | |
2093 | Note that the combination \f(CW\*(C`\e\e\*(C'\fR is left intact, just as it was. | |
2094 | .Sp | |
2095 | Starting from this step no information about the delimiters is | |
2096 | used in parsing. | |
2097 | .IP "Interpolation" 4 | |
2098 | .IX Xref "interpolation" | |
2099 | .IX Item "Interpolation" | |
2100 | The next step is interpolation in the text obtained, which is now | |
2101 | delimiter\-independent. There are four different cases. | |
2102 | .RS 4 | |
2103 | .ie n .IP """<<'EOF'""\fR, \f(CW""m''""\fR, \f(CW""s'''""\fR, \f(CW""tr///""\fR, \f(CW""y///""" 4 | |
2104 | .el .IP "\f(CW<<'EOF'\fR, \f(CWm''\fR, \f(CWs'''\fR, \f(CWtr///\fR, \f(CWy///\fR" 4 | |
2105 | .IX Item "<<'EOF', m'', s''', tr///, y///" | |
2106 | No interpolation is performed. | |
2107 | .ie n .IP "''\fR, \f(CW""q//""" 4 | |
2108 | .el .IP "\f(CW''\fR, \f(CWq//\fR" 4 | |
2109 | .IX Item "'', q//" | |
2110 | The only interpolation is removal of \f(CW\*(C`\e\*(C'\fR from pairs \f(CW\*(C`\e\e\*(C'\fR. | |
2111 | .ie n .IP """""\fR, \f(CW``\fR, \f(CW""qq//""\fR, \f(CW""qx//""\fR, \f(CW""<file*glob>""" 4 | |
2112 | .el .IP "\f(CW``''\fR, \f(CW``\fR, \f(CWqq//\fR, \f(CWqx//\fR, \f(CW<file*glob>\fR" 4 | |
2113 | .IX Item """"", ``, qq//, qx//, <file*glob>" | |
2114 | \&\f(CW\*(C`\eQ\*(C'\fR, \f(CW\*(C`\eU\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\el\*(C'\fR (possibly paired with \f(CW\*(C`\eE\*(C'\fR) are | |
2115 | converted to corresponding Perl constructs. Thus, \f(CW"$foo\eQbaz$bar"\fR | |
2116 | is converted to \f(CW\*(C`$foo . (quotemeta("baz" . $bar))\*(C'\fR internally. | |
2117 | The other combinations are replaced with appropriate expansions. | |
2118 | .Sp | |
2119 | Let it be stressed that \fIwhatever falls between \f(CI\*(C`\eQ\*(C'\fI and \f(CI\*(C`\eE\*(C'\fI\fR | |
2120 | is interpolated in the usual way. Something like \f(CW"\eQ\e\eE"\fR has | |
2121 | no \f(CW\*(C`\eE\*(C'\fR inside. instead, it has \f(CW\*(C`\eQ\*(C'\fR, \f(CW\*(C`\e\e\*(C'\fR, and \f(CW\*(C`E\*(C'\fR, so the | |
2122 | result is the same as for \f(CW"\e\e\e\eE"\fR. As a general rule, backslashes | |
2123 | between \f(CW\*(C`\eQ\*(C'\fR and \f(CW\*(C`\eE\*(C'\fR may lead to counterintuitive results. So, | |
2124 | \&\f(CW"\eQ\et\eE"\fR is converted to \f(CW\*(C`quotemeta("\et")\*(C'\fR, which is the same | |
2125 | as \f(CW"\e\e\et"\fR (since \s-1TAB\s0 is not alphanumeric). Note also that: | |
2126 | .Sp | |
2127 | .Vb 2 | |
2128 | \& $str = '\et'; | |
2129 | \& return "\eQ$str"; | |
2130 | .Ve | |
2131 | .Sp | |
2132 | may be closer to the conjectural \fIintention\fR of the writer of \f(CW"\eQ\et\eE"\fR. | |
2133 | .Sp | |
2134 | Interpolated scalars and arrays are converted internally to the \f(CW\*(C`join\*(C'\fR and | |
2135 | \&\f(CW\*(C`.\*(C'\fR catenation operations. Thus, \f(CW"$foo XXX '@arr'"\fR becomes: | |
2136 | .Sp | |
2137 | .Vb 1 | |
2138 | \& $foo . " XXX '" . (join $", @arr) . "'"; | |
2139 | .Ve | |
2140 | .Sp | |
2141 | All operations above are performed simultaneously, left to right. | |
2142 | .Sp | |
2143 | Because the result of \f(CW"\eQ STRING \eE"\fR has all metacharacters | |
2144 | quoted, there is no way to insert a literal \f(CW\*(C`$\*(C'\fR or \f(CW\*(C`@\*(C'\fR inside a | |
2145 | \&\f(CW\*(C`\eQ\eE\*(C'\fR pair. If protected by \f(CW\*(C`\e\*(C'\fR, \f(CW\*(C`$\*(C'\fR will be quoted to became | |
2146 | \&\f(CW"\e\e\e$"\fR; if not, it is interpreted as the start of an interpolated | |
2147 | scalar. | |
2148 | .Sp | |
2149 | Note also that the interpolation code needs to make a decision on | |
2150 | where the interpolated scalar ends. For instance, whether | |
2151 | \&\f(CW"a $b \-> {c}"\fR really means: | |
2152 | .Sp | |
2153 | .Vb 1 | |
2154 | \& "a " . $b . " -> {c}"; | |
2155 | .Ve | |
2156 | .Sp | |
2157 | or: | |
2158 | .Sp | |
2159 | .Vb 1 | |
2160 | \& "a " . $b -> {c}; | |
2161 | .Ve | |
2162 | .Sp | |
2163 | Most of the time, the longest possible text that does not include | |
2164 | spaces between components and which contains matching braces or | |
2165 | brackets. because the outcome may be determined by voting based | |
2166 | on heuristic estimators, the result is not strictly predictable. | |
2167 | Fortunately, it's usually correct for ambiguous cases. | |
2168 | .ie n .IP """?RE?""\fR, \f(CW""/RE/""\fR, \f(CW""m/RE/""\fR, \f(CW""s/RE/foo/""," 4 | |
2169 | .el .IP "\f(CW?RE?\fR, \f(CW/RE/\fR, \f(CWm/RE/\fR, \f(CWs/RE/foo/\fR," 4 | |
2170 | .IX Item "?RE?, /RE/, m/RE/, s/RE/foo/," | |
2171 | Processing of \f(CW\*(C`\eQ\*(C'\fR, \f(CW\*(C`\eU\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\el\*(C'\fR, and interpolation | |
2172 | happens (almost) as with \f(CW\*(C`qq//\*(C'\fR constructs, but the substitution | |
2173 | of \f(CW\*(C`\e\*(C'\fR followed by RE-special chars (including \f(CW\*(C`\e\*(C'\fR) is not | |
2174 | performed. Moreover, inside \f(CW\*(C`(?{BLOCK})\*(C'\fR, \f(CW\*(C`(?# comment )\*(C'\fR, and | |
2175 | a \f(CW\*(C`#\*(C'\fR\-comment in a \f(CW\*(C`//x\*(C'\fR\-regular expression, no processing is | |
2176 | performed whatsoever. This is the first step at which the presence | |
2177 | of the \f(CW\*(C`//x\*(C'\fR modifier is relevant. | |
2178 | .Sp | |
2179 | Interpolation has several quirks: \f(CW$|\fR, \f(CW$(\fR, and \f(CW$)\fR are not | |
2180 | interpolated, and constructs \f(CW$var[SOMETHING]\fR are voted (by several | |
2181 | different estimators) to be either an array element or \f(CW$var\fR | |
2182 | followed by an \s-1RE\s0 alternative. This is where the notation | |
2183 | \&\f(CW\*(C`${arr[$bar]}\*(C'\fR comes handy: \f(CW\*(C`/${arr[0\-9]}/\*(C'\fR is interpreted as | |
2184 | array element \f(CW\*(C`\-9\*(C'\fR, not as a regular expression from the variable | |
2185 | \&\f(CW$arr\fR followed by a digit, which would be the interpretation of | |
2186 | \&\f(CW\*(C`/$arr[0\-9]/\*(C'\fR. Since voting among different estimators may occur, | |
2187 | the result is not predictable. | |
2188 | .Sp | |
2189 | It is at this step that \f(CW\*(C`\e1\*(C'\fR is begrudgingly converted to \f(CW$1\fR in | |
2190 | the replacement text of \f(CW\*(C`s///\*(C'\fR to correct the incorrigible | |
2191 | \&\fIsed\fR hackers who haven't picked up the saner idiom yet. A warning | |
2192 | is emitted if the \f(CW\*(C`use warnings\*(C'\fR pragma or the \fB\-w\fR command-line flag | |
2193 | (that is, the \f(CW$^W\fR variable) was set. | |
2194 | .Sp | |
2195 | The lack of processing of \f(CW\*(C`\e\e\*(C'\fR creates specific restrictions on | |
2196 | the post-processed text. If the delimiter is \f(CW\*(C`/\*(C'\fR, one cannot get | |
2197 | the combination \f(CW\*(C`\e/\*(C'\fR into the result of this step. \f(CW\*(C`/\*(C'\fR will | |
2198 | finish the regular expression, \f(CW\*(C`\e/\*(C'\fR will be stripped to \f(CW\*(C`/\*(C'\fR on | |
2199 | the previous step, and \f(CW\*(C`\e\e/\*(C'\fR will be left as is. Because \f(CW\*(C`/\*(C'\fR is | |
2200 | equivalent to \f(CW\*(C`\e/\*(C'\fR inside a regular expression, this does not | |
2201 | matter unless the delimiter happens to be character special to the | |
2202 | \&\s-1RE\s0 engine, such as in \f(CW\*(C`s*foo*bar*\*(C'\fR, \f(CW\*(C`m[foo]\*(C'\fR, or \f(CW\*(C`?foo?\*(C'\fR; or an | |
2203 | alphanumeric char, as in: | |
2204 | .Sp | |
2205 | .Vb 1 | |
2206 | \& m m ^ a \es* b mmx; | |
2207 | .Ve | |
2208 | .Sp | |
2209 | In the \s-1RE\s0 above, which is intentionally obfuscated for illustration, the | |
2210 | delimiter is \f(CW\*(C`m\*(C'\fR, the modifier is \f(CW\*(C`mx\*(C'\fR, and after backslash-removal the | |
2211 | \&\s-1RE\s0 is the same as for \f(CW\*(C`m/ ^ a \es* b /mx\*(C'\fR. There's more than one | |
2212 | reason you're encouraged to restrict your delimiters to non\-alphanumeric, | |
2213 | non-whitespace choices. | |
2214 | .RE | |
2215 | .RS 4 | |
2216 | .Sp | |
2217 | This step is the last one for all constructs except regular expressions, | |
2218 | which are processed further. | |
2219 | .RE | |
2220 | .IP "Interpolation of regular expressions" 4 | |
2221 | .IX Xref "regexp, interpolation" | |
2222 | .IX Item "Interpolation of regular expressions" | |
2223 | Previous steps were performed during the compilation of Perl code, | |
2224 | but this one happens at run time\*(--although it may be optimized to | |
2225 | be calculated at compile time if appropriate. After preprocessing | |
2226 | described above, and possibly after evaluation if catenation, | |
2227 | joining, casing translation, or metaquoting are involved, the | |
2228 | resulting \fIstring\fR is passed to the \s-1RE\s0 engine for compilation. | |
2229 | .Sp | |
2230 | Whatever happens in the \s-1RE\s0 engine might be better discussed in perlre, | |
2231 | but for the sake of continuity, we shall do so here. | |
2232 | .Sp | |
2233 | This is another step where the presence of the \f(CW\*(C`//x\*(C'\fR modifier is | |
2234 | relevant. The \s-1RE\s0 engine scans the string from left to right and | |
2235 | converts it to a finite automaton. | |
2236 | .Sp | |
2237 | Backslashed characters are either replaced with corresponding | |
2238 | literal strings (as with \f(CW\*(C`\e{\*(C'\fR), or else they generate special nodes | |
2239 | in the finite automaton (as with \f(CW\*(C`\eb\*(C'\fR). Characters special to the | |
2240 | \&\s-1RE\s0 engine (such as \f(CW\*(C`|\*(C'\fR) generate corresponding nodes or groups of | |
2241 | nodes. \f(CW\*(C`(?#...)\*(C'\fR comments are ignored. All the rest is either | |
2242 | converted to literal strings to match, or else is ignored (as is | |
2243 | whitespace and \f(CW\*(C`#\*(C'\fR\-style comments if \f(CW\*(C`//x\*(C'\fR is present). | |
2244 | .Sp | |
2245 | Parsing of the bracketed character class construct, \f(CW\*(C`[...]\*(C'\fR, is | |
2246 | rather different than the rule used for the rest of the pattern. | |
2247 | The terminator of this construct is found using the same rules as | |
2248 | for finding the terminator of a \f(CW\*(C`{}\*(C'\fR\-delimited construct, the only | |
2249 | exception being that \f(CW\*(C`]\*(C'\fR immediately following \f(CW\*(C`[\*(C'\fR is treated as | |
2250 | though preceded by a backslash. Similarly, the terminator of | |
2251 | \&\f(CW\*(C`(?{...})\*(C'\fR is found using the same rules as for finding the | |
2252 | terminator of a \f(CW\*(C`{}\*(C'\fR\-delimited construct. | |
2253 | .Sp | |
2254 | It is possible to inspect both the string given to \s-1RE\s0 engine and the | |
2255 | resulting finite automaton. See the arguments \f(CW\*(C`debug\*(C'\fR/\f(CW\*(C`debugcolor\*(C'\fR | |
2256 | in the \f(CW\*(C`use re\*(C'\fR pragma, as well as Perl's \fB\-Dr\fR command-line | |
2257 | switch documented in \*(L"Command Switches\*(R" in perlrun. | |
2258 | .IP "Optimization of regular expressions" 4 | |
2259 | .IX Xref "regexp, optimization" | |
2260 | .IX Item "Optimization of regular expressions" | |
2261 | This step is listed for completeness only. Since it does not change | |
2262 | semantics, details of this step are not documented and are subject | |
2263 | to change without notice. This step is performed over the finite | |
2264 | automaton that was generated during the previous pass. | |
2265 | .Sp | |
2266 | It is at this stage that \f(CW\*(C`split()\*(C'\fR silently optimizes \f(CW\*(C`/^/\*(C'\fR to | |
2267 | mean \f(CW\*(C`/^/m\*(C'\fR. | |
2268 | .Sh "I/O Operators" | |
2269 | .IX Xref "operator, i o operator, io io while filehandle <> @ARGV" | |
2270 | .IX Subsection "I/O Operators" | |
2271 | There are several I/O operators you should know about. | |
2272 | .PP | |
2273 | A string enclosed by backticks (grave accents) first undergoes | |
2274 | double-quote interpolation. It is then interpreted as an external | |
2275 | command, and the output of that command is the value of the | |
2276 | backtick string, like in a shell. In scalar context, a single string | |
2277 | consisting of all output is returned. In list context, a list of | |
2278 | values is returned, one per line of output. (You can set \f(CW$/\fR to use | |
2279 | a different line terminator.) The command is executed each time the | |
2280 | pseudo-literal is evaluated. The status value of the command is | |
2281 | returned in \f(CW$?\fR (see perlvar for the interpretation of \f(CW$?\fR). | |
2282 | Unlike in \fBcsh\fR, no translation is done on the return data\*(--newlines | |
2283 | remain newlines. Unlike in any of the shells, single quotes do not | |
2284 | hide variable names in the command from interpretation. To pass a | |
2285 | literal dollar-sign through to the shell you need to hide it with a | |
2286 | backslash. The generalized form of backticks is \f(CW\*(C`qx//\*(C'\fR. (Because | |
2287 | backticks always undergo shell expansion as well, see perlsec for | |
2288 | security concerns.) | |
2289 | .IX Xref "qx ` `` backtick glob" | |
2290 | .PP | |
2291 | In scalar context, evaluating a filehandle in angle brackets yields | |
2292 | the next line from that file (the newline, if any, included), or | |
2293 | \&\f(CW\*(C`undef\*(C'\fR at end-of-file or on error. When \f(CW$/\fR is set to \f(CW\*(C`undef\*(C'\fR | |
2294 | (sometimes known as file-slurp mode) and the file is empty, it | |
2295 | returns \f(CW''\fR the first time, followed by \f(CW\*(C`undef\*(C'\fR subsequently. | |
2296 | .PP | |
2297 | Ordinarily you must assign the returned value to a variable, but | |
2298 | there is one situation where an automatic assignment happens. If | |
2299 | and only if the input symbol is the only thing inside the conditional | |
2300 | of a \f(CW\*(C`while\*(C'\fR statement (even if disguised as a \f(CW\*(C`for(;;)\*(C'\fR loop), | |
2301 | the value is automatically assigned to the global variable \f(CW$_\fR, | |
2302 | destroying whatever was there previously. (This may seem like an | |
2303 | odd thing to you, but you'll use the construct in almost every Perl | |
2304 | script you write.) The \f(CW$_\fR variable is not implicitly localized. | |
2305 | You'll have to put a \f(CW\*(C`local $_;\*(C'\fR before the loop if you want that | |
2306 | to happen. | |
2307 | .PP | |
2308 | The following lines are equivalent: | |
2309 | .PP | |
2310 | .Vb 7 | |
2311 | \& while (defined($_ = <STDIN>)) { print; } | |
2312 | \& while ($_ = <STDIN>) { print; } | |
2313 | \& while (<STDIN>) { print; } | |
2314 | \& for (;<STDIN>;) { print; } | |
2315 | \& print while defined($_ = <STDIN>); | |
2316 | \& print while ($_ = <STDIN>); | |
2317 | \& print while <STDIN>; | |
2318 | .Ve | |
2319 | .PP | |
2320 | This also behaves similarly, but avoids \f(CW$_\fR : | |
2321 | .PP | |
2322 | .Vb 1 | |
2323 | \& while (my $line = <STDIN>) { print $line } | |
2324 | .Ve | |
2325 | .PP | |
2326 | In these loop constructs, the assigned value (whether assignment | |
2327 | is automatic or explicit) is then tested to see whether it is | |
2328 | defined. The defined test avoids problems where line has a string | |
2329 | value that would be treated as false by Perl, for example a "\*(L" or | |
2330 | a \*(R"0" with no trailing newline. If you really mean for such values | |
2331 | to terminate the loop, they should be tested for explicitly: | |
2332 | .PP | |
2333 | .Vb 2 | |
2334 | \& while (($_ = <STDIN>) ne '0') { ... } | |
2335 | \& while (<STDIN>) { last unless $_; ... } | |
2336 | .Ve | |
2337 | .PP | |
2338 | In other boolean contexts, \f(CW\*(C`<\f(CIfilehandle\f(CW>\*(C'\fR without an | |
2339 | explicit \f(CW\*(C`defined\*(C'\fR test or comparison elicit a warning if the | |
2340 | \&\f(CW\*(C`use warnings\*(C'\fR pragma or the \fB\-w\fR | |
2341 | command-line switch (the \f(CW$^W\fR variable) is in effect. | |
2342 | .PP | |
2343 | The filehandles \s-1STDIN\s0, \s-1STDOUT\s0, and \s-1STDERR\s0 are predefined. (The | |
2344 | filehandles \f(CW\*(C`stdin\*(C'\fR, \f(CW\*(C`stdout\*(C'\fR, and \f(CW\*(C`stderr\*(C'\fR will also work except | |
2345 | in packages, where they would be interpreted as local identifiers | |
2346 | rather than global.) Additional filehandles may be created with | |
2347 | the \fIopen()\fR function, amongst others. See perlopentut and | |
2348 | \&\*(L"open\*(R" in perlfunc for details on this. | |
2349 | .IX Xref "stdin stdout sterr" | |
2350 | .PP | |
2351 | If a <\s-1FILEHANDLE\s0> is used in a context that is looking for | |
2352 | a list, a list comprising all input lines is returned, one line per | |
2353 | list element. It's easy to grow to a rather large data space this | |
2354 | way, so use with care. | |
2355 | .PP | |
2356 | <\s-1FILEHANDLE\s0> may also be spelled \f(CW\*(C`readline(*FILEHANDLE)\*(C'\fR. | |
2357 | See \*(L"readline\*(R" in perlfunc. | |
2358 | .PP | |
2359 | The null filehandle <> is special: it can be used to emulate the | |
2360 | behavior of \fBsed\fR and \fBawk\fR. Input from <> comes either from | |
2361 | standard input, or from each file listed on the command line. Here's | |
2362 | how it works: the first time <> is evaluated, the \f(CW@ARGV\fR array is | |
2363 | checked, and if it is empty, \f(CW$ARGV[0]\fR is set to \*(L"\-\*(R", which when opened | |
2364 | gives you standard input. The \f(CW@ARGV\fR array is then processed as a list | |
2365 | of filenames. The loop | |
2366 | .PP | |
2367 | .Vb 3 | |
2368 | \& while (<>) { | |
2369 | \& ... # code for each line | |
2370 | \& } | |
2371 | .Ve | |
2372 | .PP | |
2373 | is equivalent to the following Perl-like pseudo code: | |
2374 | .PP | |
2375 | .Vb 7 | |
2376 | \& unshift(@ARGV, '-') unless @ARGV; | |
2377 | \& while ($ARGV = shift) { | |
2378 | \& open(ARGV, $ARGV); | |
2379 | \& while (<ARGV>) { | |
2380 | \& ... # code for each line | |
2381 | \& } | |
2382 | \& } | |
2383 | .Ve | |
2384 | .PP | |
2385 | except that it isn't so cumbersome to say, and will actually work. | |
2386 | It really does shift the \f(CW@ARGV\fR array and put the current filename | |
2387 | into the \f(CW$ARGV\fR variable. It also uses filehandle \fI\s-1ARGV\s0\fR | |
2388 | internally\-\-<> is just a synonym for <\s-1ARGV\s0>, which | |
2389 | is magical. (The pseudo code above doesn't work because it treats | |
2390 | <\s-1ARGV\s0> as non\-magical.) | |
2391 | .PP | |
2392 | You can modify \f(CW@ARGV\fR before the first <> as long as the array ends up | |
2393 | containing the list of filenames you really want. Line numbers (\f(CW$.\fR) | |
2394 | continue as though the input were one big happy file. See the example | |
2395 | in \*(L"eof\*(R" in perlfunc for how to reset line numbers on each file. | |
2396 | .PP | |
2397 | If you want to set \f(CW@ARGV\fR to your own list of files, go right ahead. | |
2398 | This sets \f(CW@ARGV\fR to all plain text files if no \f(CW@ARGV\fR was given: | |
2399 | .PP | |
2400 | .Vb 1 | |
2401 | \& @ARGV = grep { -f && -T } glob('*') unless @ARGV; | |
2402 | .Ve | |
2403 | .PP | |
2404 | You can even set them to pipe commands. For example, this automatically | |
2405 | filters compressed arguments through \fBgzip\fR: | |
2406 | .PP | |
2407 | .Vb 1 | |
2408 | \& @ARGV = map { /\e.(gz|Z)$/ ? "gzip -dc < $_ |" : $_ } @ARGV; | |
2409 | .Ve | |
2410 | .PP | |
2411 | If you want to pass switches into your script, you can use one of the | |
2412 | Getopts modules or put a loop on the front like this: | |
2413 | .PP | |
2414 | .Vb 7 | |
2415 | \& while ($_ = $ARGV[0], /^-/) { | |
2416 | \& shift; | |
2417 | \& last if /^--$/; | |
2418 | \& if (/^-D(.*)/) { $debug = $1 } | |
2419 | \& if (/^-v/) { $verbose++ } | |
2420 | \& # ... # other switches | |
2421 | \& } | |
2422 | .Ve | |
2423 | .PP | |
2424 | .Vb 3 | |
2425 | \& while (<>) { | |
2426 | \& # ... # code for each line | |
2427 | \& } | |
2428 | .Ve | |
2429 | .PP | |
2430 | The <> symbol will return \f(CW\*(C`undef\*(C'\fR for end-of-file only once. | |
2431 | If you call it again after this, it will assume you are processing another | |
2432 | \&\f(CW@ARGV\fR list, and if you haven't set \f(CW@ARGV\fR, will read input from \s-1STDIN\s0. | |
2433 | .PP | |
2434 | If what the angle brackets contain is a simple scalar variable (e.g., | |
2435 | <$foo>), then that variable contains the name of the | |
2436 | filehandle to input from, or its typeglob, or a reference to the | |
2437 | same. For example: | |
2438 | .PP | |
2439 | .Vb 2 | |
2440 | \& $fh = \e*STDIN; | |
2441 | \& $line = <$fh>; | |
2442 | .Ve | |
2443 | .PP | |
2444 | If what's within the angle brackets is neither a filehandle nor a simple | |
2445 | scalar variable containing a filehandle name, typeglob, or typeglob | |
2446 | reference, it is interpreted as a filename pattern to be globbed, and | |
2447 | either a list of filenames or the next filename in the list is returned, | |
2448 | depending on context. This distinction is determined on syntactic | |
2449 | grounds alone. That means \f(CW\*(C`<$x>\*(C'\fR is always a \fIreadline()\fR from | |
2450 | an indirect handle, but \f(CW\*(C`<$hash{key}>\*(C'\fR is always a \fIglob()\fR. | |
2451 | That's because \f(CW$x\fR is a simple scalar variable, but \f(CW$hash{key}\fR is | |
2452 | not\*(--it's a hash element. Even \f(CW\*(C`<$x >\*(C'\fR (note the extra space) | |
2453 | is treated as \f(CW\*(C`glob("$x ")\*(C'\fR, not \f(CW\*(C`readline($x)\*(C'\fR. | |
2454 | .PP | |
2455 | One level of double-quote interpretation is done first, but you can't | |
2456 | say \f(CW\*(C`<$foo>\*(C'\fR because that's an indirect filehandle as explained | |
2457 | in the previous paragraph. (In older versions of Perl, programmers | |
2458 | would insert curly brackets to force interpretation as a filename glob: | |
2459 | \&\f(CW\*(C`<${foo}>\*(C'\fR. These days, it's considered cleaner to call the | |
2460 | internal function directly as \f(CW\*(C`glob($foo)\*(C'\fR, which is probably the right | |
2461 | way to have done it in the first place.) For example: | |
2462 | .PP | |
2463 | .Vb 3 | |
2464 | \& while (<*.c>) { | |
2465 | \& chmod 0644, $_; | |
2466 | \& } | |
2467 | .Ve | |
2468 | .PP | |
2469 | is roughly equivalent to: | |
2470 | .PP | |
2471 | .Vb 5 | |
2472 | \& open(FOO, "echo *.c | tr -s ' \et\er\ef' '\e\e012\e\e012\e\e012\e\e012'|"); | |
2473 | \& while (<FOO>) { | |
2474 | \& chomp; | |
2475 | \& chmod 0644, $_; | |
2476 | \& } | |
2477 | .Ve | |
2478 | .PP | |
2479 | except that the globbing is actually done internally using the standard | |
2480 | \&\f(CW\*(C`File::Glob\*(C'\fR extension. Of course, the shortest way to do the above is: | |
2481 | .PP | |
2482 | .Vb 1 | |
2483 | \& chmod 0644, <*.c>; | |
2484 | .Ve | |
2485 | .PP | |
2486 | A (file)glob evaluates its (embedded) argument only when it is | |
2487 | starting a new list. All values must be read before it will start | |
2488 | over. In list context, this isn't important because you automatically | |
2489 | get them all anyway. However, in scalar context the operator returns | |
2490 | the next value each time it's called, or \f(CW\*(C`undef\*(C'\fR when the list has | |
2491 | run out. As with filehandle reads, an automatic \f(CW\*(C`defined\*(C'\fR is | |
2492 | generated when the glob occurs in the test part of a \f(CW\*(C`while\*(C'\fR, | |
2493 | because legal glob returns (e.g. a file called \fI0\fR) would otherwise | |
2494 | terminate the loop. Again, \f(CW\*(C`undef\*(C'\fR is returned only once. So if | |
2495 | you're expecting a single value from a glob, it is much better to | |
2496 | say | |
2497 | .PP | |
2498 | .Vb 1 | |
2499 | \& ($file) = <blurch*>; | |
2500 | .Ve | |
2501 | .PP | |
2502 | than | |
2503 | .PP | |
2504 | .Vb 1 | |
2505 | \& $file = <blurch*>; | |
2506 | .Ve | |
2507 | .PP | |
2508 | because the latter will alternate between returning a filename and | |
2509 | returning false. | |
2510 | .PP | |
2511 | If you're trying to do variable interpolation, it's definitely better | |
2512 | to use the \fIglob()\fR function, because the older notation can cause people | |
2513 | to become confused with the indirect filehandle notation. | |
2514 | .PP | |
2515 | .Vb 2 | |
2516 | \& @files = glob("$dir/*.[ch]"); | |
2517 | \& @files = glob($files[$i]); | |
2518 | .Ve | |
2519 | .Sh "Constant Folding" | |
2520 | .IX Xref "constant folding folding" | |
2521 | .IX Subsection "Constant Folding" | |
2522 | Like C, Perl does a certain amount of expression evaluation at | |
2523 | compile time whenever it determines that all arguments to an | |
2524 | operator are static and have no side effects. In particular, string | |
2525 | concatenation happens at compile time between literals that don't do | |
2526 | variable substitution. Backslash interpolation also happens at | |
2527 | compile time. You can say | |
2528 | .PP | |
2529 | .Vb 2 | |
2530 | \& 'Now is the time for all' . "\en" . | |
2531 | \& 'good men to come to.' | |
2532 | .Ve | |
2533 | .PP | |
2534 | and this all reduces to one string internally. Likewise, if | |
2535 | you say | |
2536 | .PP | |
2537 | .Vb 3 | |
2538 | \& foreach $file (@filenames) { | |
2539 | \& if (-s $file > 5 + 100 * 2**16) { } | |
2540 | \& } | |
2541 | .Ve | |
2542 | .PP | |
2543 | the compiler will precompute the number which that expression | |
2544 | represents so that the interpreter won't have to. | |
2545 | .Sh "No-ops" | |
2546 | .IX Xref "no-op nop" | |
2547 | .IX Subsection "No-ops" | |
2548 | Perl doesn't officially have a no-op operator, but the bare constants | |
2549 | \&\f(CW0\fR and \f(CW1\fR are special-cased to not produce a warning in a void | |
2550 | context, so you can for example safely do | |
2551 | .PP | |
2552 | .Vb 1 | |
2553 | \& 1 while foo(); | |
2554 | .Ve | |
2555 | .Sh "Bitwise String Operators" | |
2556 | .IX Xref "operator, bitwise, string" | |
2557 | .IX Subsection "Bitwise String Operators" | |
2558 | Bitstrings of any size may be manipulated by the bitwise operators | |
2559 | (\f(CW\*(C`~ | & ^\*(C'\fR). | |
2560 | .PP | |
2561 | If the operands to a binary bitwise op are strings of different | |
2562 | sizes, \fB|\fR and \fB^\fR ops act as though the shorter operand had | |
2563 | additional zero bits on the right, while the \fB&\fR op acts as though | |
2564 | the longer operand were truncated to the length of the shorter. | |
2565 | The granularity for such extension or truncation is one or more | |
2566 | bytes. | |
2567 | .PP | |
2568 | .Vb 5 | |
2569 | \& # ASCII-based examples | |
2570 | \& print "j p \en" ^ " a h"; # prints "JAPH\en" | |
2571 | \& print "JA" | " ph\en"; # prints "japh\en" | |
2572 | \& print "japh\enJunk" & '_____'; # prints "JAPH\en"; | |
2573 | \& print 'p N$' ^ " E<H\en"; # prints "Perl\en"; | |
2574 | .Ve | |
2575 | .PP | |
2576 | If you are intending to manipulate bitstrings, be certain that | |
2577 | you're supplying bitstrings: If an operand is a number, that will imply | |
2578 | a \fBnumeric\fR bitwise operation. You may explicitly show which type of | |
2579 | operation you intend by using \f(CW""\fR or \f(CW\*(C`0+\*(C'\fR, as in the examples below. | |
2580 | .PP | |
2581 | .Vb 4 | |
2582 | \& $foo = 150 | 105; # yields 255 (0x96 | 0x69 is 0xFF) | |
2583 | \& $foo = '150' | 105; # yields 255 | |
2584 | \& $foo = 150 | '105'; # yields 255 | |
2585 | \& $foo = '150' | '105'; # yields string '155' (under ASCII) | |
2586 | .Ve | |
2587 | .PP | |
2588 | .Vb 2 | |
2589 | \& $baz = 0+$foo & 0+$bar; # both ops explicitly numeric | |
2590 | \& $biz = "$foo" ^ "$bar"; # both ops explicitly stringy | |
2591 | .Ve | |
2592 | .PP | |
2593 | See \*(L"vec\*(R" in perlfunc for information on how to manipulate individual bits | |
2594 | in a bit vector. | |
2595 | .Sh "Integer Arithmetic" | |
2596 | .IX Xref "integer" | |
2597 | .IX Subsection "Integer Arithmetic" | |
2598 | By default, Perl assumes that it must do most of its arithmetic in | |
2599 | floating point. But by saying | |
2600 | .PP | |
2601 | .Vb 1 | |
2602 | \& use integer; | |
2603 | .Ve | |
2604 | .PP | |
2605 | you may tell the compiler that it's okay to use integer operations | |
2606 | (if it feels like it) from here to the end of the enclosing \s-1BLOCK\s0. | |
2607 | An inner \s-1BLOCK\s0 may countermand this by saying | |
2608 | .PP | |
2609 | .Vb 1 | |
2610 | \& no integer; | |
2611 | .Ve | |
2612 | .PP | |
2613 | which lasts until the end of that \s-1BLOCK\s0. Note that this doesn't | |
2614 | mean everything is only an integer, merely that Perl may use integer | |
2615 | operations if it is so inclined. For example, even under \f(CW\*(C`use | |
2616 | integer\*(C'\fR, if you take the \f(CWsqrt(2)\fR, you'll still get \f(CW1.4142135623731\fR | |
2617 | or so. | |
2618 | .PP | |
2619 | Used on numbers, the bitwise operators (\*(L"&\*(R", \*(L"|\*(R", \*(L"^\*(R", \*(L"~\*(R", \*(L"<<\*(R", | |
2620 | and \*(L">>\*(R") always produce integral results. (But see also | |
2621 | \&\*(L"Bitwise String Operators\*(R".) However, \f(CW\*(C`use integer\*(C'\fR still has meaning for | |
2622 | them. By default, their results are interpreted as unsigned integers, but | |
2623 | if \f(CW\*(C`use integer\*(C'\fR is in effect, their results are interpreted | |
2624 | as signed integers. For example, \f(CW\*(C`~0\*(C'\fR usually evaluates to a large | |
2625 | integral value. However, \f(CW\*(C`use integer; ~0\*(C'\fR is \f(CW\*(C`\-1\*(C'\fR on twos-complement | |
2626 | machines. | |
2627 | .Sh "Floating-point Arithmetic" | |
2628 | .IX Xref "floating-point floating point float real" | |
2629 | .IX Subsection "Floating-point Arithmetic" | |
2630 | While \f(CW\*(C`use integer\*(C'\fR provides integer-only arithmetic, there is no | |
2631 | analogous mechanism to provide automatic rounding or truncation to a | |
2632 | certain number of decimal places. For rounding to a certain number | |
2633 | of digits, \fIsprintf()\fR or \fIprintf()\fR is usually the easiest route. | |
2634 | See perlfaq4. | |
2635 | .PP | |
2636 | Floating-point numbers are only approximations to what a mathematician | |
2637 | would call real numbers. There are infinitely more reals than floats, | |
2638 | so some corners must be cut. For example: | |
2639 | .PP | |
2640 | .Vb 2 | |
2641 | \& printf "%.20g\en", 123456789123456789; | |
2642 | \& # produces 123456789123456784 | |
2643 | .Ve | |
2644 | .PP | |
2645 | Testing for exact equality of floating-point equality or inequality is | |
2646 | not a good idea. Here's a (relatively expensive) work-around to compare | |
2647 | whether two floating-point numbers are equal to a particular number of | |
2648 | decimal places. See Knuth, volume \s-1II\s0, for a more robust treatment of | |
2649 | this topic. | |
2650 | .PP | |
2651 | .Vb 7 | |
2652 | \& sub fp_equal { | |
2653 | \& my ($X, $Y, $POINTS) = @_; | |
2654 | \& my ($tX, $tY); | |
2655 | \& $tX = sprintf("%.${POINTS}g", $X); | |
2656 | \& $tY = sprintf("%.${POINTS}g", $Y); | |
2657 | \& return $tX eq $tY; | |
2658 | \& } | |
2659 | .Ve | |
2660 | .PP | |
2661 | The \s-1POSIX\s0 module (part of the standard perl distribution) implements | |
2662 | \&\fIceil()\fR, \fIfloor()\fR, and other mathematical and trigonometric functions. | |
2663 | The Math::Complex module (part of the standard perl distribution) | |
2664 | defines mathematical functions that work on both the reals and the | |
2665 | imaginary numbers. Math::Complex not as efficient as \s-1POSIX\s0, but | |
2666 | \&\s-1POSIX\s0 can't work with complex numbers. | |
2667 | .PP | |
2668 | Rounding in financial applications can have serious implications, and | |
2669 | the rounding method used should be specified precisely. In these | |
2670 | cases, it probably pays not to trust whichever system rounding is | |
2671 | being used by Perl, but to instead implement the rounding function you | |
2672 | need yourself. | |
2673 | .Sh "Bigger Numbers" | |
2674 | .IX Xref "number, arbitrary precision" | |
2675 | .IX Subsection "Bigger Numbers" | |
2676 | The standard Math::BigInt and Math::BigFloat modules provide | |
2677 | variable-precision arithmetic and overloaded operators, although | |
2678 | they're currently pretty slow. At the cost of some space and | |
2679 | considerable speed, they avoid the normal pitfalls associated with | |
2680 | limited-precision representations. | |
2681 | .PP | |
2682 | .Vb 3 | |
2683 | \& use Math::BigInt; | |
2684 | \& $x = Math::BigInt->new('123456789123456789'); | |
2685 | \& print $x * $x; | |
2686 | .Ve | |
2687 | .PP | |
2688 | .Vb 1 | |
2689 | \& # prints +15241578780673678515622620750190521 | |
2690 | .Ve | |
2691 | .PP | |
2692 | There are several modules that let you calculate with (bound only by | |
2693 | memory and cpu\-time) unlimited or fixed precision. There are also | |
2694 | some non-standard modules that provide faster implementations via | |
2695 | external C libraries. | |
2696 | .PP | |
2697 | Here is a short, but incomplete summary: | |
2698 | .PP | |
2699 | .Vb 11 | |
2700 | \& Math::Fraction big, unlimited fractions like 9973 / 12967 | |
2701 | \& Math::String treat string sequences like numbers | |
2702 | \& Math::FixedPrecision calculate with a fixed precision | |
2703 | \& Math::Currency for currency calculations | |
2704 | \& Bit::Vector manipulate bit vectors fast (uses C) | |
2705 | \& Math::BigIntFast Bit::Vector wrapper for big numbers | |
2706 | \& Math::Pari provides access to the Pari C library | |
2707 | \& Math::BigInteger uses an external C library | |
2708 | \& Math::Cephes uses external Cephes C library (no big numbers) | |
2709 | \& Math::Cephes::Fraction fractions via the Cephes library | |
2710 | \& Math::GMP another one using an external C library | |
2711 | .Ve | |
2712 | .PP | |
2713 | Choose wisely. |