Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLDATA 1" | |
132 | .TH PERLDATA 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perldata \- Perl data types | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | .Sh "Variable names" | |
138 | .IX Xref "variable, name variable name data type type" | |
139 | .IX Subsection "Variable names" | |
140 | Perl has three built-in data types: scalars, arrays of scalars, and | |
141 | associative arrays of scalars, known as \*(L"hashes\*(R". A scalar is a | |
142 | single string (of any size, limited only by the available memory), | |
143 | number, or a reference to something (which will be discussed | |
144 | in perlref). Normal arrays are ordered lists of scalars indexed | |
145 | by number, starting with 0. Hashes are unordered collections of scalar | |
146 | values indexed by their associated string key. | |
147 | .PP | |
148 | Values are usually referred to by name, or through a named reference. | |
149 | The first character of the name tells you to what sort of data | |
150 | structure it refers. The rest of the name tells you the particular | |
151 | value to which it refers. Usually this name is a single \fIidentifier\fR, | |
152 | that is, a string beginning with a letter or underscore, and | |
153 | containing letters, underscores, and digits. In some cases, it may | |
154 | be a chain of identifiers, separated by \f(CW\*(C`::\*(C'\fR (or by the slightly | |
155 | archaic \f(CW\*(C`'\*(C'\fR); all but the last are interpreted as names of packages, | |
156 | to locate the namespace in which to look up the final identifier | |
157 | (see \*(L"Packages\*(R" in perlmod for details). It's possible to substitute | |
158 | for a simple identifier, an expression that produces a reference | |
159 | to the value at runtime. This is described in more detail below | |
160 | and in perlref. | |
161 | .IX Xref "identifier" | |
162 | .PP | |
163 | Perl also has its own built-in variables whose names don't follow | |
164 | these rules. They have strange names so they don't accidentally | |
165 | collide with one of your normal variables. Strings that match | |
166 | parenthesized parts of a regular expression are saved under names | |
167 | containing only digits after the \f(CW\*(C`$\*(C'\fR (see perlop and perlre). | |
168 | In addition, several special variables that provide windows into | |
169 | the inner working of Perl have names containing punctuation characters | |
170 | and control characters. These are documented in perlvar. | |
171 | .IX Xref "variable, built-in" | |
172 | .PP | |
173 | Scalar values are always named with '$', even when referring to a | |
174 | scalar that is part of an array or a hash. The '$' symbol works | |
175 | semantically like the English word \*(L"the\*(R" in that it indicates a | |
176 | single value is expected. | |
177 | .IX Xref "scalar" | |
178 | .PP | |
179 | .Vb 4 | |
180 | \& $days # the simple scalar value "days" | |
181 | \& $days[28] # the 29th element of array @days | |
182 | \& $days{'Feb'} # the 'Feb' value from hash %days | |
183 | \& $#days # the last index of array @days | |
184 | .Ve | |
185 | .PP | |
186 | Entire arrays (and slices of arrays and hashes) are denoted by '@', | |
187 | which works much like the word \*(L"these\*(R" or \*(L"those\*(R" does in English, | |
188 | in that it indicates multiple values are expected. | |
189 | .IX Xref "array" | |
190 | .PP | |
191 | .Vb 3 | |
192 | \& @days # ($days[0], $days[1],... $days[n]) | |
193 | \& @days[3,4,5] # same as ($days[3],$days[4],$days[5]) | |
194 | \& @days{'a','c'} # same as ($days{'a'},$days{'c'}) | |
195 | .Ve | |
196 | .PP | |
197 | Entire hashes are denoted by '%': | |
198 | .IX Xref "hash" | |
199 | .PP | |
200 | .Vb 1 | |
201 | \& %days # (key1, val1, key2, val2 ...) | |
202 | .Ve | |
203 | .PP | |
204 | In addition, subroutines are named with an initial '&', though this | |
205 | is optional when unambiguous, just as the word \*(L"do\*(R" is often redundant | |
206 | in English. Symbol table entries can be named with an initial '*', | |
207 | but you don't really care about that yet (if ever :\-). | |
208 | .PP | |
209 | Every variable type has its own namespace, as do several | |
210 | non-variable identifiers. This means that you can, without fear | |
211 | of conflict, use the same name for a scalar variable, an array, or | |
212 | a hash\*(--or, for that matter, for a filehandle, a directory handle, a | |
213 | subroutine name, a format name, or a label. This means that \f(CW$foo\fR | |
214 | and \f(CW@foo\fR are two different variables. It also means that \f(CW$foo[1]\fR | |
215 | is a part of \f(CW@foo\fR, not a part of \f(CW$foo\fR. This may seem a bit weird, | |
216 | but that's okay, because it is weird. | |
217 | .IX Xref "namespace" | |
218 | .PP | |
219 | Because variable references always start with '$', '@', or '%', the | |
220 | \&\*(L"reserved\*(R" words aren't in fact reserved with respect to variable | |
221 | names. They \fIare\fR reserved with respect to labels and filehandles, | |
222 | however, which don't have an initial special character. You can't | |
223 | have a filehandle named \*(L"log\*(R", for instance. Hint: you could say | |
224 | \&\f(CW\*(C`open(LOG,'logfile')\*(C'\fR rather than \f(CW\*(C`open(log,'logfile')\*(C'\fR. Using | |
225 | uppercase filehandles also improves readability and protects you | |
226 | from conflict with future reserved words. Case \fIis\fR significant\-\-\*(L"\s-1FOO\s0\*(R", | |
227 | \&\*(L"Foo\*(R", and \*(L"foo\*(R" are all different names. Names that start with a | |
228 | letter or underscore may also contain digits and underscores. | |
229 | .IX Xref "identifier, case sensitivity case" | |
230 | .PP | |
231 | It is possible to replace such an alphanumeric name with an expression | |
232 | that returns a reference to the appropriate type. For a description | |
233 | of this, see perlref. | |
234 | .PP | |
235 | Names that start with a digit may contain only more digits. Names | |
236 | that do not start with a letter, underscore, digit or a caret (i.e. | |
237 | a control character) are limited to one character, e.g., \f(CW$%\fR or | |
238 | \&\f(CW$$\fR. (Most of these one character names have a predefined | |
239 | significance to Perl. For instance, \f(CW$$\fR is the current process | |
240 | id.) | |
241 | .Sh "Context" | |
242 | .IX Xref "context scalar context list context" | |
243 | .IX Subsection "Context" | |
244 | The interpretation of operations and values in Perl sometimes depends | |
245 | on the requirements of the context around the operation or value. | |
246 | There are two major contexts: list and scalar. Certain operations | |
247 | return list values in contexts wanting a list, and scalar values | |
248 | otherwise. If this is true of an operation it will be mentioned in | |
249 | the documentation for that operation. In other words, Perl overloads | |
250 | certain operations based on whether the expected return value is | |
251 | singular or plural. Some words in English work this way, like \*(L"fish\*(R" | |
252 | and \*(L"sheep\*(R". | |
253 | .PP | |
254 | In a reciprocal fashion, an operation provides either a scalar or a | |
255 | list context to each of its arguments. For example, if you say | |
256 | .PP | |
257 | .Vb 1 | |
258 | \& int( <STDIN> ) | |
259 | .Ve | |
260 | .PP | |
261 | the integer operation provides scalar context for the <> | |
262 | operator, which responds by reading one line from \s-1STDIN\s0 and passing it | |
263 | back to the integer operation, which will then find the integer value | |
264 | of that line and return that. If, on the other hand, you say | |
265 | .PP | |
266 | .Vb 1 | |
267 | \& sort( <STDIN> ) | |
268 | .Ve | |
269 | .PP | |
270 | then the sort operation provides list context for <>, which | |
271 | will proceed to read every line available up to the end of file, and | |
272 | pass that list of lines back to the sort routine, which will then | |
273 | sort those lines and return them as a list to whatever the context | |
274 | of the sort was. | |
275 | .PP | |
276 | Assignment is a little bit special in that it uses its left argument | |
277 | to determine the context for the right argument. Assignment to a | |
278 | scalar evaluates the right-hand side in scalar context, while | |
279 | assignment to an array or hash evaluates the righthand side in list | |
280 | context. Assignment to a list (or slice, which is just a list | |
281 | anyway) also evaluates the righthand side in list context. | |
282 | .PP | |
283 | When you use the \f(CW\*(C`use warnings\*(C'\fR pragma or Perl's \fB\-w\fR command-line | |
284 | option, you may see warnings | |
285 | about useless uses of constants or functions in \*(L"void context\*(R". | |
286 | Void context just means the value has been discarded, such as a | |
287 | statement containing only \f(CW\*(C`"fred";\*(C'\fR or \f(CW\*(C`getpwuid(0);\*(C'\fR. It still | |
288 | counts as scalar context for functions that care whether or not | |
289 | they're being called in list context. | |
290 | .PP | |
291 | User-defined subroutines may choose to care whether they are being | |
292 | called in a void, scalar, or list context. Most subroutines do not | |
293 | need to bother, though. That's because both scalars and lists are | |
294 | automatically interpolated into lists. See \*(L"wantarray\*(R" in perlfunc | |
295 | for how you would dynamically discern your function's calling | |
296 | context. | |
297 | .Sh "Scalar values" | |
298 | .IX Xref "scalar number string reference" | |
299 | .IX Subsection "Scalar values" | |
300 | All data in Perl is a scalar, an array of scalars, or a hash of | |
301 | scalars. A scalar may contain one single value in any of three | |
302 | different flavors: a number, a string, or a reference. In general, | |
303 | conversion from one form to another is transparent. Although a | |
304 | scalar may not directly hold multiple values, it may contain a | |
305 | reference to an array or hash which in turn contains multiple values. | |
306 | .PP | |
307 | Scalars aren't necessarily one thing or another. There's no place | |
308 | to declare a scalar variable to be of type \*(L"string\*(R", type \*(L"number\*(R", | |
309 | type \*(L"reference\*(R", or anything else. Because of the automatic | |
310 | conversion of scalars, operations that return scalars don't need | |
311 | to care (and in fact, cannot care) whether their caller is looking | |
312 | for a string, a number, or a reference. Perl is a contextually | |
313 | polymorphic language whose scalars can be strings, numbers, or | |
314 | references (which includes objects). Although strings and numbers | |
315 | are considered pretty much the same thing for nearly all purposes, | |
316 | references are strongly\-typed, uncastable pointers with builtin | |
317 | reference-counting and destructor invocation. | |
318 | .PP | |
319 | A scalar value is interpreted as \s-1TRUE\s0 in the Boolean sense if it is not | |
320 | the null string or the number 0 (or its string equivalent, \*(L"0\*(R"). The | |
321 | Boolean context is just a special kind of scalar context where no | |
322 | conversion to a string or a number is ever performed. | |
323 | .IX Xref "boolean bool true false truth" | |
324 | .PP | |
325 | There are actually two varieties of null strings (sometimes referred | |
326 | to as \*(L"empty\*(R" strings), a defined one and an undefined one. The | |
327 | defined version is just a string of length zero, such as \f(CW""\fR. | |
328 | The undefined version is the value that indicates that there is | |
329 | no real value for something, such as when there was an error, or | |
330 | at end of file, or when you refer to an uninitialized variable or | |
331 | element of an array or hash. Although in early versions of Perl, | |
332 | an undefined scalar could become defined when first used in a | |
333 | place expecting a defined value, this no longer happens except for | |
334 | rare cases of autovivification as explained in perlref. You can | |
335 | use the \fIdefined()\fR operator to determine whether a scalar value is | |
336 | defined (this has no meaning on arrays or hashes), and the \fIundef()\fR | |
337 | operator to produce an undefined value. | |
338 | .IX Xref "defined undefined undef null string, null" | |
339 | .PP | |
340 | To find out whether a given string is a valid non-zero number, it's | |
341 | sometimes enough to test it against both numeric 0 and also lexical | |
342 | \&\*(L"0\*(R" (although this will cause noises if warnings are on). That's | |
343 | because strings that aren't numbers count as 0, just as they do in \fBawk\fR: | |
344 | .PP | |
345 | .Vb 3 | |
346 | \& if ($str == 0 && $str ne "0") { | |
347 | \& warn "That doesn't look like a number"; | |
348 | \& } | |
349 | .Ve | |
350 | .PP | |
351 | That method may be best because otherwise you won't treat \s-1IEEE\s0 | |
352 | notations like \f(CW\*(C`NaN\*(C'\fR or \f(CW\*(C`Infinity\*(C'\fR properly. At other times, you | |
353 | might prefer to determine whether string data can be used numerically | |
354 | by calling the \fIPOSIX::strtod()\fR function or by inspecting your string | |
355 | with a regular expression (as documented in perlre). | |
356 | .PP | |
357 | .Vb 8 | |
358 | \& warn "has nondigits" if /\eD/; | |
359 | \& warn "not a natural number" unless /^\ed+$/; # rejects -3 | |
360 | \& warn "not an integer" unless /^-?\ed+$/; # rejects +3 | |
361 | \& warn "not an integer" unless /^[+-]?\ed+$/; | |
362 | \& warn "not a decimal number" unless /^-?\ed+\e.?\ed*$/; # rejects .2 | |
363 | \& warn "not a decimal number" unless /^-?(?:\ed+(?:\e.\ed*)?|\e.\ed+)$/; | |
364 | \& warn "not a C float" | |
365 | \& unless /^([+-]?)(?=\ed|\e.\ed)\ed*(\e.\ed*)?([Ee]([+-]?\ed+))?$/; | |
366 | .Ve | |
367 | .PP | |
368 | The length of an array is a scalar value. You may find the length | |
369 | of array \f(CW@days\fR by evaluating \f(CW$#days\fR, as in \fBcsh\fR. However, this | |
370 | isn't the length of the array; it's the subscript of the last element, | |
371 | which is a different value since there is ordinarily a 0th element. | |
372 | Assigning to \f(CW$#days\fR actually changes the length of the array. | |
373 | Shortening an array this way destroys intervening values. Lengthening | |
374 | an array that was previously shortened does not recover values | |
375 | that were in those elements. (It used to do so in Perl 4, but we | |
376 | had to break this to make sure destructors were called when expected.) | |
377 | .IX Xref "$# array, length" | |
378 | .PP | |
379 | You can also gain some minuscule measure of efficiency by pre-extending | |
380 | an array that is going to get big. You can also extend an array | |
381 | by assigning to an element that is off the end of the array. You | |
382 | can truncate an array down to nothing by assigning the null list | |
383 | () to it. The following are equivalent: | |
384 | .PP | |
385 | .Vb 2 | |
386 | \& @whatever = (); | |
387 | \& $#whatever = -1; | |
388 | .Ve | |
389 | .PP | |
390 | If you evaluate an array in scalar context, it returns the length | |
391 | of the array. (Note that this is not true of lists, which return | |
392 | the last value, like the C comma operator, nor of built-in functions, | |
393 | which return whatever they feel like returning.) The following is | |
394 | always true: | |
395 | .IX Xref "array, length" | |
396 | .PP | |
397 | .Vb 1 | |
398 | \& scalar(@whatever) == $#whatever - $[ + 1; | |
399 | .Ve | |
400 | .PP | |
401 | Version 5 of Perl changed the semantics of \f(CW$[\fR: files that don't set | |
402 | the value of \f(CW$[\fR no longer need to worry about whether another | |
403 | file changed its value. (In other words, use of \f(CW$[\fR is deprecated.) | |
404 | So in general you can assume that | |
405 | .IX Xref "$[" | |
406 | .PP | |
407 | .Vb 1 | |
408 | \& scalar(@whatever) == $#whatever + 1; | |
409 | .Ve | |
410 | .PP | |
411 | Some programmers choose to use an explicit conversion so as to | |
412 | leave nothing to doubt: | |
413 | .PP | |
414 | .Vb 1 | |
415 | \& $element_count = scalar(@whatever); | |
416 | .Ve | |
417 | .PP | |
418 | If you evaluate a hash in scalar context, it returns false if the | |
419 | hash is empty. If there are any key/value pairs, it returns true; | |
420 | more precisely, the value returned is a string consisting of the | |
421 | number of used buckets and the number of allocated buckets, separated | |
422 | by a slash. This is pretty much useful only to find out whether | |
423 | Perl's internal hashing algorithm is performing poorly on your data | |
424 | set. For example, you stick 10,000 things in a hash, but evaluating | |
425 | \&\f(CW%HASH\fR in scalar context reveals \f(CW"1/16"\fR, which means only one out | |
426 | of sixteen buckets has been touched, and presumably contains all | |
427 | 10,000 of your items. This isn't supposed to happen. | |
428 | .IX Xref "hash, scalar context hash, bucket bucket" | |
429 | .PP | |
430 | You can preallocate space for a hash by assigning to the \fIkeys()\fR function. | |
431 | This rounds up the allocated buckets to the next power of two: | |
432 | .PP | |
433 | .Vb 1 | |
434 | \& keys(%users) = 1000; # allocate 1024 buckets | |
435 | .Ve | |
436 | .Sh "Scalar value constructors" | |
437 | .IX Xref "scalar, literal scalar, constant" | |
438 | .IX Subsection "Scalar value constructors" | |
439 | Numeric literals are specified in any of the following floating point or | |
440 | integer formats: | |
441 | .PP | |
442 | .Vb 9 | |
443 | \& 12345 | |
444 | \& 12345.67 | |
445 | \& .23E-10 # a very small number | |
446 | \& 3.14_15_92 # a very important number | |
447 | \& 4_294_967_296 # underscore for legibility | |
448 | \& 0xff # hex | |
449 | \& 0xdead_beef # more hex | |
450 | \& 0377 # octal (only numbers, begins with 0) | |
451 | \& 0b011011 # binary | |
452 | .Ve | |
453 | .PP | |
454 | You are allowed to use underscores (underbars) in numeric literals | |
455 | between digits for legibility. You could, for example, group binary | |
456 | digits by threes (as for a Unix-style mode argument such as 0b110_100_100) | |
457 | or by fours (to represent nibbles, as in 0b1010_0110) or in other groups. | |
458 | .IX Xref "number, literal" | |
459 | .PP | |
460 | String literals are usually delimited by either single or double | |
461 | quotes. They work much like quotes in the standard Unix shells: | |
462 | double-quoted string literals are subject to backslash and variable | |
463 | substitution; single-quoted strings are not (except for \f(CW\*(C`\e'\*(C'\fR and | |
464 | \&\f(CW\*(C`\e\e\*(C'\fR). The usual C\-style backslash rules apply for making | |
465 | characters such as newline, tab, etc., as well as some more exotic | |
466 | forms. See \*(L"Quote and Quote-like Operators\*(R" in perlop for a list. | |
467 | .IX Xref "string, literal" | |
468 | .PP | |
469 | Hexadecimal, octal, or binary, representations in string literals | |
470 | (e.g. '0xff') are not automatically converted to their integer | |
471 | representation. The \fIhex()\fR and \fIoct()\fR functions make these conversions | |
472 | for you. See \*(L"hex\*(R" in perlfunc and \*(L"oct\*(R" in perlfunc for more details. | |
473 | .PP | |
474 | You can also embed newlines directly in your strings, i.e., they can end | |
475 | on a different line than they begin. This is nice, but if you forget | |
476 | your trailing quote, the error will not be reported until Perl finds | |
477 | another line containing the quote character, which may be much further | |
478 | on in the script. Variable substitution inside strings is limited to | |
479 | scalar variables, arrays, and array or hash slices. (In other words, | |
480 | names beginning with $ or @, followed by an optional bracketed | |
481 | expression as a subscript.) The following code segment prints out "The | |
482 | price is $\&100." | |
483 | .IX Xref "interpolation" | |
484 | .PP | |
485 | .Vb 2 | |
486 | \& $Price = '$100'; # not interpolated | |
487 | \& print "The price is $Price.\en"; # interpolated | |
488 | .Ve | |
489 | .PP | |
490 | There is no double interpolation in Perl, so the \f(CW$100\fR is left as is. | |
491 | .PP | |
492 | As in some shells, you can enclose the variable name in braces to | |
493 | disambiguate it from following alphanumerics (and underscores). | |
494 | You must also do | |
495 | this when interpolating a variable into a string to separate the | |
496 | variable name from a following double-colon or an apostrophe, since | |
497 | these would be otherwise treated as a package separator: | |
498 | .IX Xref "interpolation" | |
499 | .PP | |
500 | .Vb 3 | |
501 | \& $who = "Larry"; | |
502 | \& print PASSWD "${who}::0:0:Superuser:/:/bin/perl\en"; | |
503 | \& print "We use ${who}speak when ${who}'s here.\en"; | |
504 | .Ve | |
505 | .PP | |
506 | Without the braces, Perl would have looked for a \f(CW$whospeak\fR, a | |
507 | \&\f(CW$who::0\fR, and a \f(CW$who's\fR variable. The last two would be the | |
508 | \&\f(CW$0\fR and the \f(CW$s\fR variables in the (presumably) non-existent package | |
509 | \&\f(CW\*(C`who\*(C'\fR. | |
510 | .PP | |
511 | In fact, an identifier within such curlies is forced to be a string, | |
512 | as is any simple identifier within a hash subscript. Neither need | |
513 | quoting. Our earlier example, \f(CW$days{'Feb'}\fR can be written as | |
514 | \&\f(CW$days{Feb}\fR and the quotes will be assumed automatically. But | |
515 | anything more complicated in the subscript will be interpreted as an | |
516 | expression. This means for example that \f(CW\*(C`$version{2.0}++\*(C'\fR is | |
517 | equivalent to \f(CW\*(C`$version{2}++\*(C'\fR, not to \f(CW\*(C`$version{'2.0'}++\*(C'\fR. | |
518 | .PP | |
519 | \fIVersion Strings\fR | |
520 | .IX Xref "version string vstring v-string" | |
521 | .IX Subsection "Version Strings" | |
522 | .PP | |
523 | \&\fBNote:\fR Version Strings (v\-strings) have been deprecated. They will | |
524 | not be available after Perl 5.8. The marginal benefits of v\-strings | |
525 | were greatly outweighed by the potential for Surprise and Confusion. | |
526 | .PP | |
527 | A literal of the form \f(CW\*(C`v1.20.300.4000\*(C'\fR is parsed as a string composed | |
528 | of characters with the specified ordinals. This form, known as | |
529 | v\-strings, provides an alternative, more readable way to construct | |
530 | strings, rather than use the somewhat less readable interpolation form | |
531 | \&\f(CW"\ex{1}\ex{14}\ex{12c}\ex{fa0}"\fR. This is useful for representing | |
532 | Unicode strings, and for comparing version \*(L"numbers\*(R" using the string | |
533 | comparison operators, \f(CW\*(C`cmp\*(C'\fR, \f(CW\*(C`gt\*(C'\fR, \f(CW\*(C`lt\*(C'\fR etc. If there are two or | |
534 | more dots in the literal, the leading \f(CW\*(C`v\*(C'\fR may be omitted. | |
535 | .PP | |
536 | .Vb 3 | |
537 | \& print v9786; # prints UTF-8 encoded SMILEY, "\ex{263a}" | |
538 | \& print v102.111.111; # prints "foo" | |
539 | \& print 102.111.111; # same | |
540 | .Ve | |
541 | .PP | |
542 | Such literals are accepted by both \f(CW\*(C`require\*(C'\fR and \f(CW\*(C`use\*(C'\fR for | |
543 | doing a version check. The \f(CW$^V\fR special variable also contains the | |
544 | running Perl interpreter's version in this form. See \*(L"$^V\*(R" in perlvar. | |
545 | Note that using the v\-strings for IPv4 addresses is not portable unless | |
546 | you also use the \fIinet_aton()\fR/\fIinet_ntoa()\fR routines of the Socket package. | |
547 | .PP | |
548 | Note that since Perl 5.8.1 the single-number v\-strings (like \f(CW\*(C`v65\*(C'\fR) | |
549 | are not v\-strings before the \f(CW\*(C`=>\*(C'\fR operator (which is usually used | |
550 | to separate a hash key from a hash value), instead they are interpreted | |
551 | as literal strings ('v65'). They were v\-strings from Perl 5.6.0 to | |
552 | Perl 5.8.0, but that caused more confusion and breakage than good. | |
553 | Multi-number v\-strings like \f(CW\*(C`v65.66\*(C'\fR and \f(CW65.66.67\fR continue to | |
554 | be v\-strings always. | |
555 | .PP | |
556 | \fISpecial Literals\fR | |
557 | .IX Xref "special literal __END__ __DATA__ END DATA end data ^D ^Z" | |
558 | .IX Subsection "Special Literals" | |
559 | .PP | |
560 | The special literals _\|_FILE_\|_, _\|_LINE_\|_, and _\|_PACKAGE_\|_ | |
561 | represent the current filename, line number, and package name at that | |
562 | point in your program. They may be used only as separate tokens; they | |
563 | will not be interpolated into strings. If there is no current package | |
564 | (due to an empty \f(CW\*(C`package;\*(C'\fR directive), _\|_PACKAGE_\|_ is the undefined | |
565 | value. | |
566 | .IX Xref "__FILE__ __LINE__ __PACKAGE__ line file package" | |
567 | .PP | |
568 | The two control characters ^D and ^Z, and the tokens _\|_END_\|_ and _\|_DATA_\|_ | |
569 | may be used to indicate the logical end of the script before the actual | |
570 | end of file. Any following text is ignored. | |
571 | .PP | |
572 | Text after _\|_DATA_\|_ but may be read via the filehandle \f(CW\*(C`PACKNAME::DATA\*(C'\fR, | |
573 | where \f(CW\*(C`PACKNAME\*(C'\fR is the package that was current when the _\|_DATA_\|_ | |
574 | token was encountered. The filehandle is left open pointing to the | |
575 | contents after _\|_DATA_\|_. It is the program's responsibility to | |
576 | \&\f(CW\*(C`close DATA\*(C'\fR when it is done reading from it. For compatibility with | |
577 | older scripts written before _\|_DATA_\|_ was introduced, _\|_END_\|_ behaves | |
578 | like _\|_DATA_\|_ in the toplevel script (but not in files loaded with | |
579 | \&\f(CW\*(C`require\*(C'\fR or \f(CW\*(C`do\*(C'\fR) and leaves the remaining contents of the | |
580 | file accessible via \f(CW\*(C`main::DATA\*(C'\fR. | |
581 | .PP | |
582 | See SelfLoader for more description of _\|_DATA_\|_, and | |
583 | an example of its use. Note that you cannot read from the \s-1DATA\s0 | |
584 | filehandle in a \s-1BEGIN\s0 block: the \s-1BEGIN\s0 block is executed as soon | |
585 | as it is seen (during compilation), at which point the corresponding | |
586 | _\|_DATA_\|_ (or _\|_END_\|_) token has not yet been seen. | |
587 | .PP | |
588 | \fIBarewords\fR | |
589 | .IX Xref "bareword" | |
590 | .IX Subsection "Barewords" | |
591 | .PP | |
592 | A word that has no other interpretation in the grammar will | |
593 | be treated as if it were a quoted string. These are known as | |
594 | \&\*(L"barewords\*(R". As with filehandles and labels, a bareword that consists | |
595 | entirely of lowercase letters risks conflict with future reserved | |
596 | words, and if you use the \f(CW\*(C`use warnings\*(C'\fR pragma or the \fB\-w\fR switch, | |
597 | Perl will warn you about any | |
598 | such words. Some people may wish to outlaw barewords entirely. If you | |
599 | say | |
600 | .PP | |
601 | .Vb 1 | |
602 | \& use strict 'subs'; | |
603 | .Ve | |
604 | .PP | |
605 | then any bareword that would \s-1NOT\s0 be interpreted as a subroutine call | |
606 | produces a compile-time error instead. The restriction lasts to the | |
607 | end of the enclosing block. An inner block may countermand this | |
608 | by saying \f(CW\*(C`no strict 'subs'\*(C'\fR. | |
609 | .PP | |
610 | \fIArray Joining Delimiter\fR | |
611 | .IX Xref "array, interpolation interpolation, array $""" | |
612 | .IX Subsection "Array Joining Delimiter" | |
613 | .PP | |
614 | Arrays and slices are interpolated into double-quoted strings | |
615 | by joining the elements with the delimiter specified in the \f(CW$"\fR | |
616 | variable (\f(CW$LIST_SEPARATOR\fR if \*(L"use English;\*(R" is specified), | |
617 | space by default. The following are equivalent: | |
618 | .PP | |
619 | .Vb 2 | |
620 | \& $temp = join($", @ARGV); | |
621 | \& system "echo $temp"; | |
622 | .Ve | |
623 | .PP | |
624 | .Vb 1 | |
625 | \& system "echo @ARGV"; | |
626 | .Ve | |
627 | .PP | |
628 | Within search patterns (which also undergo double-quotish substitution) | |
629 | there is an unfortunate ambiguity: Is \f(CW\*(C`/$foo[bar]/\*(C'\fR to be interpreted as | |
630 | \&\f(CW\*(C`/${foo}[bar]/\*(C'\fR (where \f(CW\*(C`[bar]\*(C'\fR is a character class for the regular | |
631 | expression) or as \f(CW\*(C`/${foo[bar]}/\*(C'\fR (where \f(CW\*(C`[bar]\*(C'\fR is the subscript to array | |
632 | \&\f(CW@foo\fR)? If \f(CW@foo\fR doesn't otherwise exist, then it's obviously a | |
633 | character class. If \f(CW@foo\fR exists, Perl takes a good guess about \f(CW\*(C`[bar]\*(C'\fR, | |
634 | and is almost always right. If it does guess wrong, or if you're just | |
635 | plain paranoid, you can force the correct interpretation with curly | |
636 | braces as above. | |
637 | .PP | |
638 | If you're looking for the information on how to use here\-documents, | |
639 | which used to be here, that's been moved to | |
640 | \&\*(L"Quote and Quote-like Operators\*(R" in perlop. | |
641 | .Sh "List value constructors" | |
642 | .IX Xref "list" | |
643 | .IX Subsection "List value constructors" | |
644 | List values are denoted by separating individual values by commas | |
645 | (and enclosing the list in parentheses where precedence requires it): | |
646 | .PP | |
647 | .Vb 1 | |
648 | \& (LIST) | |
649 | .Ve | |
650 | .PP | |
651 | In a context not requiring a list value, the value of what appears | |
652 | to be a list literal is simply the value of the final element, as | |
653 | with the C comma operator. For example, | |
654 | .PP | |
655 | .Vb 1 | |
656 | \& @foo = ('cc', '-E', $bar); | |
657 | .Ve | |
658 | .PP | |
659 | assigns the entire list value to array \f(CW@foo\fR, but | |
660 | .PP | |
661 | .Vb 1 | |
662 | \& $foo = ('cc', '-E', $bar); | |
663 | .Ve | |
664 | .PP | |
665 | assigns the value of variable \f(CW$bar\fR to the scalar variable \f(CW$foo\fR. | |
666 | Note that the value of an actual array in scalar context is the | |
667 | length of the array; the following assigns the value 3 to \f(CW$foo:\fR | |
668 | .PP | |
669 | .Vb 2 | |
670 | \& @foo = ('cc', '-E', $bar); | |
671 | \& $foo = @foo; # $foo gets 3 | |
672 | .Ve | |
673 | .PP | |
674 | You may have an optional comma before the closing parenthesis of a | |
675 | list literal, so that you can say: | |
676 | .PP | |
677 | .Vb 5 | |
678 | \& @foo = ( | |
679 | \& 1, | |
680 | \& 2, | |
681 | \& 3, | |
682 | \& ); | |
683 | .Ve | |
684 | .PP | |
685 | To use a here-document to assign an array, one line per element, | |
686 | you might use an approach like this: | |
687 | .PP | |
688 | .Vb 7 | |
689 | \& @sauces = <<End_Lines =~ m/(\eS.*\eS)/g; | |
690 | \& normal tomato | |
691 | \& spicy tomato | |
692 | \& green chile | |
693 | \& pesto | |
694 | \& white wine | |
695 | \& End_Lines | |
696 | .Ve | |
697 | .PP | |
698 | LISTs do automatic interpolation of sublists. That is, when a \s-1LIST\s0 is | |
699 | evaluated, each element of the list is evaluated in list context, and | |
700 | the resulting list value is interpolated into \s-1LIST\s0 just as if each | |
701 | individual element were a member of \s-1LIST\s0. Thus arrays and hashes lose their | |
702 | identity in a LIST\*(--the list | |
703 | .PP | |
704 | .Vb 1 | |
705 | \& (@foo,@bar,&SomeSub,%glarch) | |
706 | .Ve | |
707 | .PP | |
708 | contains all the elements of \f(CW@foo\fR followed by all the elements of \f(CW@bar\fR, | |
709 | followed by all the elements returned by the subroutine named SomeSub | |
710 | called in list context, followed by the key/value pairs of \f(CW%glarch\fR. | |
711 | To make a list reference that does \fI\s-1NOT\s0\fR interpolate, see perlref. | |
712 | .PP | |
713 | The null list is represented by (). Interpolating it in a list | |
714 | has no effect. Thus ((),(),()) is equivalent to (). Similarly, | |
715 | interpolating an array with no elements is the same as if no | |
716 | array had been interpolated at that point. | |
717 | .PP | |
718 | This interpolation combines with the facts that the opening | |
719 | and closing parentheses are optional (except when necessary for | |
720 | precedence) and lists may end with an optional comma to mean that | |
721 | multiple commas within lists are legal syntax. The list \f(CW\*(C`1,,3\*(C'\fR is a | |
722 | concatenation of two lists, \f(CW\*(C`1,\*(C'\fR and \f(CW3\fR, the first of which ends | |
723 | with that optional comma. \f(CW\*(C`1,,3\*(C'\fR is \f(CW\*(C`(1,),(3)\*(C'\fR is \f(CW\*(C`1,3\*(C'\fR (And | |
724 | similarly for \f(CW\*(C`1,,,3\*(C'\fR is \f(CW\*(C`(1,),(,),3\*(C'\fR is \f(CW\*(C`1,3\*(C'\fR and so on.) Not that | |
725 | we'd advise you to use this obfuscation. | |
726 | .PP | |
727 | A list value may also be subscripted like a normal array. You must | |
728 | put the list in parentheses to avoid ambiguity. For example: | |
729 | .PP | |
730 | .Vb 2 | |
731 | \& # Stat returns list value. | |
732 | \& $time = (stat($file))[8]; | |
733 | .Ve | |
734 | .PP | |
735 | .Vb 2 | |
736 | \& # SYNTAX ERROR HERE. | |
737 | \& $time = stat($file)[8]; # OOPS, FORGOT PARENTHESES | |
738 | .Ve | |
739 | .PP | |
740 | .Vb 2 | |
741 | \& # Find a hex digit. | |
742 | \& $hexdigit = ('a','b','c','d','e','f')[$digit-10]; | |
743 | .Ve | |
744 | .PP | |
745 | .Vb 2 | |
746 | \& # A "reverse comma operator". | |
747 | \& return (pop(@foo),pop(@foo))[0]; | |
748 | .Ve | |
749 | .PP | |
750 | Lists may be assigned to only when each element of the list | |
751 | is itself legal to assign to: | |
752 | .PP | |
753 | .Vb 1 | |
754 | \& ($a, $b, $c) = (1, 2, 3); | |
755 | .Ve | |
756 | .PP | |
757 | .Vb 1 | |
758 | \& ($map{'red'}, $map{'blue'}, $map{'green'}) = (0x00f, 0x0f0, 0xf00); | |
759 | .Ve | |
760 | .PP | |
761 | An exception to this is that you may assign to \f(CW\*(C`undef\*(C'\fR in a list. | |
762 | This is useful for throwing away some of the return values of a | |
763 | function: | |
764 | .PP | |
765 | .Vb 1 | |
766 | \& ($dev, $ino, undef, undef, $uid, $gid) = stat($file); | |
767 | .Ve | |
768 | .PP | |
769 | List assignment in scalar context returns the number of elements | |
770 | produced by the expression on the right side of the assignment: | |
771 | .PP | |
772 | .Vb 2 | |
773 | \& $x = (($foo,$bar) = (3,2,1)); # set $x to 3, not 2 | |
774 | \& $x = (($foo,$bar) = f()); # set $x to f()'s return count | |
775 | .Ve | |
776 | .PP | |
777 | This is handy when you want to do a list assignment in a Boolean | |
778 | context, because most list functions return a null list when finished, | |
779 | which when assigned produces a 0, which is interpreted as \s-1FALSE\s0. | |
780 | .PP | |
781 | It's also the source of a useful idiom for executing a function or | |
782 | performing an operation in list context and then counting the number of | |
783 | return values, by assigning to an empty list and then using that | |
784 | assignment in scalar context. For example, this code: | |
785 | .PP | |
786 | .Vb 1 | |
787 | \& $count = () = $string =~ /\ed+/g; | |
788 | .Ve | |
789 | .PP | |
790 | will place into \f(CW$count\fR the number of digit groups found in \f(CW$string\fR. | |
791 | This happens because the pattern match is in list context (since it | |
792 | is being assigned to the empty list), and will therefore return a list | |
793 | of all matching parts of the string. The list assignment in scalar | |
794 | context will translate that into the number of elements (here, the | |
795 | number of times the pattern matched) and assign that to \f(CW$count\fR. Note | |
796 | that simply using | |
797 | .PP | |
798 | .Vb 1 | |
799 | \& $count = $string =~ /\ed+/g; | |
800 | .Ve | |
801 | .PP | |
802 | would not have worked, since a pattern match in scalar context will | |
803 | only return true or false, rather than a count of matches. | |
804 | .PP | |
805 | The final element of a list assignment may be an array or a hash: | |
806 | .PP | |
807 | .Vb 2 | |
808 | \& ($a, $b, @rest) = split; | |
809 | \& my($a, $b, %rest) = @_; | |
810 | .Ve | |
811 | .PP | |
812 | You can actually put an array or hash anywhere in the list, but the first one | |
813 | in the list will soak up all the values, and anything after it will become | |
814 | undefined. This may be useful in a \fImy()\fR or \fIlocal()\fR. | |
815 | .PP | |
816 | A hash can be initialized using a literal list holding pairs of | |
817 | items to be interpreted as a key and a value: | |
818 | .PP | |
819 | .Vb 2 | |
820 | \& # same as map assignment above | |
821 | \& %map = ('red',0x00f,'blue',0x0f0,'green',0xf00); | |
822 | .Ve | |
823 | .PP | |
824 | While literal lists and named arrays are often interchangeable, that's | |
825 | not the case for hashes. Just because you can subscript a list value like | |
826 | a normal array does not mean that you can subscript a list value as a | |
827 | hash. Likewise, hashes included as parts of other lists (including | |
828 | parameters lists and return lists from functions) always flatten out into | |
829 | key/value pairs. That's why it's good to use references sometimes. | |
830 | .PP | |
831 | It is often more readable to use the \f(CW\*(C`=>\*(C'\fR operator between key/value | |
832 | pairs. The \f(CW\*(C`=>\*(C'\fR operator is mostly just a more visually distinctive | |
833 | synonym for a comma, but it also arranges for its left-hand operand to be | |
834 | interpreted as a string \*(-- if it's a bareword that would be a legal simple | |
835 | identifier (\f(CW\*(C`=>\*(C'\fR doesn't quote compound identifiers, that contain | |
836 | double colons). This makes it nice for initializing hashes: | |
837 | .PP | |
838 | .Vb 5 | |
839 | \& %map = ( | |
840 | \& red => 0x00f, | |
841 | \& blue => 0x0f0, | |
842 | \& green => 0xf00, | |
843 | \& ); | |
844 | .Ve | |
845 | .PP | |
846 | or for initializing hash references to be used as records: | |
847 | .PP | |
848 | .Vb 5 | |
849 | \& $rec = { | |
850 | \& witch => 'Mable the Merciless', | |
851 | \& cat => 'Fluffy the Ferocious', | |
852 | \& date => '10/31/1776', | |
853 | \& }; | |
854 | .Ve | |
855 | .PP | |
856 | or for using call-by-named-parameter to complicated functions: | |
857 | .PP | |
858 | .Vb 7 | |
859 | \& $field = $query->radio_group( | |
860 | \& name => 'group_name', | |
861 | \& values => ['eenie','meenie','minie'], | |
862 | \& default => 'meenie', | |
863 | \& linebreak => 'true', | |
864 | \& labels => \e%labels | |
865 | \& ); | |
866 | .Ve | |
867 | .PP | |
868 | Note that just because a hash is initialized in that order doesn't | |
869 | mean that it comes out in that order. See \*(L"sort\*(R" in perlfunc for examples | |
870 | of how to arrange for an output ordering. | |
871 | .Sh "Subscripts" | |
872 | .IX Subsection "Subscripts" | |
873 | An array is subscripted by specifying a dollar sign (\f(CW\*(C`$\*(C'\fR), then the | |
874 | name of the array (without the leading \f(CW\*(C`@\*(C'\fR), then the subscript inside | |
875 | square brackets. For example: | |
876 | .PP | |
877 | .Vb 2 | |
878 | \& @myarray = (5, 50, 500, 5000); | |
879 | \& print "Element Number 2 is", $myarray[2], "\en"; | |
880 | .Ve | |
881 | .PP | |
882 | The array indices start with 0. A negative subscript retrieves its | |
883 | value from the end. In our example, \f(CW$myarray[\-1]\fR would have been | |
884 | 5000, and \f(CW$myarray[\-2]\fR would have been 500. | |
885 | .PP | |
886 | Hash subscripts are similar, only instead of square brackets curly brackets | |
887 | are used. For example: | |
888 | .PP | |
889 | .Vb 7 | |
890 | \& %scientists = | |
891 | \& ( | |
892 | \& "Newton" => "Isaac", | |
893 | \& "Einstein" => "Albert", | |
894 | \& "Darwin" => "Charles", | |
895 | \& "Feynman" => "Richard", | |
896 | \& ); | |
897 | .Ve | |
898 | .PP | |
899 | .Vb 1 | |
900 | \& print "Darwin's First Name is ", $scientists{"Darwin"}, "\en"; | |
901 | .Ve | |
902 | .Sh "Slices" | |
903 | .IX Xref "slice array, slice hash, slice" | |
904 | .IX Subsection "Slices" | |
905 | A common way to access an array or a hash is one scalar element at a | |
906 | time. You can also subscript a list to get a single element from it. | |
907 | .PP | |
908 | .Vb 3 | |
909 | \& $whoami = $ENV{"USER"}; # one element from the hash | |
910 | \& $parent = $ISA[0]; # one element from the array | |
911 | \& $dir = (getpwnam("daemon"))[7]; # likewise, but with list | |
912 | .Ve | |
913 | .PP | |
914 | A slice accesses several elements of a list, an array, or a hash | |
915 | simultaneously using a list of subscripts. It's more convenient | |
916 | than writing out the individual elements as a list of separate | |
917 | scalar values. | |
918 | .PP | |
919 | .Vb 4 | |
920 | \& ($him, $her) = @folks[0,-1]; # array slice | |
921 | \& @them = @folks[0 .. 3]; # array slice | |
922 | \& ($who, $home) = @ENV{"USER", "HOME"}; # hash slice | |
923 | \& ($uid, $dir) = (getpwnam("daemon"))[2,7]; # list slice | |
924 | .Ve | |
925 | .PP | |
926 | Since you can assign to a list of variables, you can also assign to | |
927 | an array or hash slice. | |
928 | .PP | |
929 | .Vb 4 | |
930 | \& @days[3..5] = qw/Wed Thu Fri/; | |
931 | \& @colors{'red','blue','green'} | |
932 | \& = (0xff0000, 0x0000ff, 0x00ff00); | |
933 | \& @folks[0, -1] = @folks[-1, 0]; | |
934 | .Ve | |
935 | .PP | |
936 | The previous assignments are exactly equivalent to | |
937 | .PP | |
938 | .Vb 4 | |
939 | \& ($days[3], $days[4], $days[5]) = qw/Wed Thu Fri/; | |
940 | \& ($colors{'red'}, $colors{'blue'}, $colors{'green'}) | |
941 | \& = (0xff0000, 0x0000ff, 0x00ff00); | |
942 | \& ($folks[0], $folks[-1]) = ($folks[-1], $folks[0]); | |
943 | .Ve | |
944 | .PP | |
945 | Since changing a slice changes the original array or hash that it's | |
946 | slicing, a \f(CW\*(C`foreach\*(C'\fR construct will alter some\*(--or even all\*(--of the | |
947 | values of the array or hash. | |
948 | .PP | |
949 | .Vb 1 | |
950 | \& foreach (@array[ 4 .. 10 ]) { s/peter/paul/ } | |
951 | .Ve | |
952 | .PP | |
953 | .Vb 5 | |
954 | \& foreach (@hash{qw[key1 key2]}) { | |
955 | \& s/^\es+//; # trim leading whitespace | |
956 | \& s/\es+$//; # trim trailing whitespace | |
957 | \& s/(\ew+)/\eu\eL$1/g; # "titlecase" words | |
958 | \& } | |
959 | .Ve | |
960 | .PP | |
961 | A slice of an empty list is still an empty list. Thus: | |
962 | .PP | |
963 | .Vb 3 | |
964 | \& @a = ()[1,0]; # @a has no elements | |
965 | \& @b = (@a)[0,1]; # @b has no elements | |
966 | \& @c = (0,1)[2,3]; # @c has no elements | |
967 | .Ve | |
968 | .PP | |
969 | But: | |
970 | .PP | |
971 | .Vb 2 | |
972 | \& @a = (1)[1,0]; # @a has two elements | |
973 | \& @b = (1,undef)[1,0,2]; # @b has three elements | |
974 | .Ve | |
975 | .PP | |
976 | This makes it easy to write loops that terminate when a null list | |
977 | is returned: | |
978 | .PP | |
979 | .Vb 3 | |
980 | \& while ( ($home, $user) = (getpwent)[7,0]) { | |
981 | \& printf "%-8s %s\en", $user, $home; | |
982 | \& } | |
983 | .Ve | |
984 | .PP | |
985 | As noted earlier in this document, the scalar sense of list assignment | |
986 | is the number of elements on the right-hand side of the assignment. | |
987 | The null list contains no elements, so when the password file is | |
988 | exhausted, the result is 0, not 2. | |
989 | .PP | |
990 | If you're confused about why you use an '@' there on a hash slice | |
991 | instead of a '%', think of it like this. The type of bracket (square | |
992 | or curly) governs whether it's an array or a hash being looked at. | |
993 | On the other hand, the leading symbol ('$' or '@') on the array or | |
994 | hash indicates whether you are getting back a singular value (a | |
995 | scalar) or a plural one (a list). | |
996 | .Sh "Typeglobs and Filehandles" | |
997 | .IX Xref "typeglob filehandle *" | |
998 | .IX Subsection "Typeglobs and Filehandles" | |
999 | Perl uses an internal type called a \fItypeglob\fR to hold an entire | |
1000 | symbol table entry. The type prefix of a typeglob is a \f(CW\*(C`*\*(C'\fR, because | |
1001 | it represents all types. This used to be the preferred way to | |
1002 | pass arrays and hashes by reference into a function, but now that | |
1003 | we have real references, this is seldom needed. | |
1004 | .PP | |
1005 | The main use of typeglobs in modern Perl is create symbol table aliases. | |
1006 | This assignment: | |
1007 | .PP | |
1008 | .Vb 1 | |
1009 | \& *this = *that; | |
1010 | .Ve | |
1011 | .PP | |
1012 | makes \f(CW$this\fR an alias for \f(CW$that\fR, \f(CW@this\fR an alias for \f(CW@that\fR, \f(CW%this\fR an alias | |
1013 | for \f(CW%that\fR, &this an alias for &that, etc. Much safer is to use a reference. | |
1014 | This: | |
1015 | .PP | |
1016 | .Vb 1 | |
1017 | \& local *Here::blue = \e$There::green; | |
1018 | .Ve | |
1019 | .PP | |
1020 | temporarily makes \f(CW$Here::blue\fR an alias for \f(CW$There::green\fR, but doesn't | |
1021 | make \f(CW@Here::blue\fR an alias for \f(CW@There::green\fR, or \f(CW%Here::blue\fR an alias for | |
1022 | \&\f(CW%There::green\fR, etc. See \*(L"Symbol Tables\*(R" in perlmod for more examples | |
1023 | of this. Strange though this may seem, this is the basis for the whole | |
1024 | module import/export system. | |
1025 | .PP | |
1026 | Another use for typeglobs is to pass filehandles into a function or | |
1027 | to create new filehandles. If you need to use a typeglob to save away | |
1028 | a filehandle, do it this way: | |
1029 | .PP | |
1030 | .Vb 1 | |
1031 | \& $fh = *STDOUT; | |
1032 | .Ve | |
1033 | .PP | |
1034 | or perhaps as a real reference, like this: | |
1035 | .PP | |
1036 | .Vb 1 | |
1037 | \& $fh = \e*STDOUT; | |
1038 | .Ve | |
1039 | .PP | |
1040 | See perlsub for examples of using these as indirect filehandles | |
1041 | in functions. | |
1042 | .PP | |
1043 | Typeglobs are also a way to create a local filehandle using the \fIlocal()\fR | |
1044 | operator. These last until their block is exited, but may be passed back. | |
1045 | For example: | |
1046 | .PP | |
1047 | .Vb 7 | |
1048 | \& sub newopen { | |
1049 | \& my $path = shift; | |
1050 | \& local *FH; # not my! | |
1051 | \& open (FH, $path) or return undef; | |
1052 | \& return *FH; | |
1053 | \& } | |
1054 | \& $fh = newopen('/etc/passwd'); | |
1055 | .Ve | |
1056 | .PP | |
1057 | Now that we have the \f(CW*foo{THING}\fR notation, typeglobs aren't used as much | |
1058 | for filehandle manipulations, although they're still needed to pass brand | |
1059 | new file and directory handles into or out of functions. That's because | |
1060 | \&\f(CW*HANDLE{IO}\fR only works if \s-1HANDLE\s0 has already been used as a handle. | |
1061 | In other words, \f(CW*FH\fR must be used to create new symbol table entries; | |
1062 | \&\f(CW*foo{THING}\fR cannot. When in doubt, use \f(CW*FH\fR. | |
1063 | .PP | |
1064 | All functions that are capable of creating filehandles (\fIopen()\fR, | |
1065 | \&\fIopendir()\fR, \fIpipe()\fR, \fIsocketpair()\fR, \fIsysopen()\fR, \fIsocket()\fR, and \fIaccept()\fR) | |
1066 | automatically create an anonymous filehandle if the handle passed to | |
1067 | them is an uninitialized scalar variable. This allows the constructs | |
1068 | such as \f(CW\*(C`open(my $fh, ...)\*(C'\fR and \f(CW\*(C`open(local $fh,...)\*(C'\fR to be used to | |
1069 | create filehandles that will conveniently be closed automatically when | |
1070 | the scope ends, provided there are no other references to them. This | |
1071 | largely eliminates the need for typeglobs when opening filehandles | |
1072 | that must be passed around, as in the following example: | |
1073 | .PP | |
1074 | .Vb 5 | |
1075 | \& sub myopen { | |
1076 | \& open my $fh, "@_" | |
1077 | \& or die "Can't open '@_': $!"; | |
1078 | \& return $fh; | |
1079 | \& } | |
1080 | .Ve | |
1081 | .PP | |
1082 | .Vb 5 | |
1083 | \& { | |
1084 | \& my $f = myopen("</etc/motd"); | |
1085 | \& print <$f>; | |
1086 | \& # $f implicitly closed here | |
1087 | \& } | |
1088 | .Ve | |
1089 | .PP | |
1090 | Note that if an initialized scalar variable is used instead the | |
1091 | result is different: \f(CW\*(C`my $fh='zzz'; open($fh, ...)\*(C'\fR is equivalent | |
1092 | to \f(CW\*(C`open( *{'zzz'}, ...)\*(C'\fR. | |
1093 | \&\f(CW\*(C`use strict 'refs'\*(C'\fR forbids such practice. | |
1094 | .PP | |
1095 | Another way to create anonymous filehandles is with the Symbol | |
1096 | module or with the IO::Handle module and its ilk. These modules | |
1097 | have the advantage of not hiding different types of the same name | |
1098 | during the \fIlocal()\fR. See the bottom of \*(L"\fIopen()\fR\*(R" in perlfunc for an | |
1099 | example. | |
1100 | .SH "SEE ALSO" | |
1101 | .IX Header "SEE ALSO" | |
1102 | See perlvar for a description of Perl's built-in variables and | |
1103 | a discussion of legal variable names. See perlref, perlsub, | |
1104 | and \*(L"Symbol Tables\*(R" in perlmod for more discussion on typeglobs and | |
1105 | the \f(CW*foo{THING}\fR syntax. |