Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | .\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLSUB 1" | |
132 | .TH PERLSUB 1 "2002-06-08" "perl v5.8.0" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlsub \- Perl subroutines | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | To declare subroutines: | |
138 | .PP | |
139 | .Vb 4 | |
140 | \& sub NAME; # A "forward" declaration. | |
141 | \& sub NAME(PROTO); # ditto, but with prototypes | |
142 | \& sub NAME : ATTRS; # with attributes | |
143 | \& sub NAME(PROTO) : ATTRS; # with attributes and prototypes | |
144 | .Ve | |
145 | .PP | |
146 | .Vb 4 | |
147 | \& sub NAME BLOCK # A declaration and a definition. | |
148 | \& sub NAME(PROTO) BLOCK # ditto, but with prototypes | |
149 | \& sub NAME : ATTRS BLOCK # with attributes | |
150 | \& sub NAME(PROTO) : ATTRS BLOCK # with prototypes and attributes | |
151 | .Ve | |
152 | .PP | |
153 | To define an anonymous subroutine at runtime: | |
154 | .PP | |
155 | .Vb 4 | |
156 | \& $subref = sub BLOCK; # no proto | |
157 | \& $subref = sub (PROTO) BLOCK; # with proto | |
158 | \& $subref = sub : ATTRS BLOCK; # with attributes | |
159 | \& $subref = sub (PROTO) : ATTRS BLOCK; # with proto and attributes | |
160 | .Ve | |
161 | .PP | |
162 | To import subroutines: | |
163 | .PP | |
164 | .Vb 1 | |
165 | \& use MODULE qw(NAME1 NAME2 NAME3); | |
166 | .Ve | |
167 | .PP | |
168 | To call subroutines: | |
169 | .PP | |
170 | .Vb 4 | |
171 | \& NAME(LIST); # & is optional with parentheses. | |
172 | \& NAME LIST; # Parentheses optional if predeclared/imported. | |
173 | \& &NAME(LIST); # Circumvent prototypes. | |
174 | \& &NAME; # Makes current @_ visible to called subroutine. | |
175 | .Ve | |
176 | .SH "DESCRIPTION" | |
177 | .IX Header "DESCRIPTION" | |
178 | Like many languages, Perl provides for user-defined subroutines. | |
179 | These may be located anywhere in the main program, loaded in from | |
180 | other files via the \f(CW\*(C`do\*(C'\fR, \f(CW\*(C`require\*(C'\fR, or \f(CW\*(C`use\*(C'\fR keywords, or | |
181 | generated on the fly using \f(CW\*(C`eval\*(C'\fR or anonymous subroutines. | |
182 | You can even call a function indirectly using a variable containing | |
183 | its name or a \s-1CODE\s0 reference. | |
184 | .PP | |
185 | The Perl model for function call and return values is simple: all | |
186 | functions are passed as parameters one single flat list of scalars, and | |
187 | all functions likewise return to their caller one single flat list of | |
188 | scalars. Any arrays or hashes in these call and return lists will | |
189 | collapse, losing their identities\*(--but you may always use | |
190 | pass-by-reference instead to avoid this. Both call and return lists may | |
191 | contain as many or as few scalar elements as you'd like. (Often a | |
192 | function without an explicit return statement is called a subroutine, but | |
193 | there's really no difference from Perl's perspective.) | |
194 | .PP | |
195 | Any arguments passed in show up in the array \f(CW@_\fR. Therefore, if | |
196 | you called a function with two arguments, those would be stored in | |
197 | \&\f(CW$_[0]\fR and \f(CW$_[1]\fR. The array \f(CW@_\fR is a local array, but its | |
198 | elements are aliases for the actual scalar parameters. In particular, | |
199 | if an element \f(CW$_[0]\fR is updated, the corresponding argument is | |
200 | updated (or an error occurs if it is not updatable). If an argument | |
201 | is an array or hash element which did not exist when the function | |
202 | was called, that element is created only when (and if) it is modified | |
203 | or a reference to it is taken. (Some earlier versions of Perl | |
204 | created the element whether or not the element was assigned to.) | |
205 | Assigning to the whole array \f(CW@_\fR removes that aliasing, and does | |
206 | not update any arguments. | |
207 | .PP | |
208 | The return value of a subroutine is the value of the last expression | |
209 | evaluated. More explicitly, a \f(CW\*(C`return\*(C'\fR statement may be used to exit the | |
210 | subroutine, optionally specifying the returned value, which will be | |
211 | evaluated in the appropriate context (list, scalar, or void) depending | |
212 | on the context of the subroutine call. If you specify no return value, | |
213 | the subroutine returns an empty list in list context, the undefined | |
214 | value in scalar context, or nothing in void context. If you return | |
215 | one or more aggregates (arrays and hashes), these will be flattened | |
216 | together into one large indistinguishable list. | |
217 | .PP | |
218 | Perl does not have named formal parameters. In practice all you | |
219 | do is assign to a \f(CW\*(C`my()\*(C'\fR list of these. Variables that aren't | |
220 | declared to be private are global variables. For gory details | |
221 | on creating private variables, see \*(L"Private Variables via \fImy()\fR\*(R" | |
222 | and \*(L"Temporary Values via \fIlocal()\fR\*(R". To create protected | |
223 | environments for a set of functions in a separate package (and | |
224 | probably a separate file), see \*(L"Packages\*(R" in perlmod. | |
225 | .PP | |
226 | Example: | |
227 | .PP | |
228 | .Vb 8 | |
229 | \& sub max { | |
230 | \& my $max = shift(@_); | |
231 | \& foreach $foo (@_) { | |
232 | \& $max = $foo if $max < $foo; | |
233 | \& } | |
234 | \& return $max; | |
235 | \& } | |
236 | \& $bestday = max($mon,$tue,$wed,$thu,$fri); | |
237 | .Ve | |
238 | .PP | |
239 | Example: | |
240 | .PP | |
241 | .Vb 2 | |
242 | \& # get a line, combining continuation lines | |
243 | \& # that start with whitespace | |
244 | .Ve | |
245 | .PP | |
246 | .Vb 12 | |
247 | \& sub get_line { | |
248 | \& $thisline = $lookahead; # global variables! | |
249 | \& LINE: while (defined($lookahead = <STDIN>)) { | |
250 | \& if ($lookahead =~ /^[ \et]/) { | |
251 | \& $thisline .= $lookahead; | |
252 | \& } | |
253 | \& else { | |
254 | \& last LINE; | |
255 | \& } | |
256 | \& } | |
257 | \& return $thisline; | |
258 | \& } | |
259 | .Ve | |
260 | .PP | |
261 | .Vb 4 | |
262 | \& $lookahead = <STDIN>; # get first line | |
263 | \& while (defined($line = get_line())) { | |
264 | \& ... | |
265 | \& } | |
266 | .Ve | |
267 | .PP | |
268 | Assigning to a list of private variables to name your arguments: | |
269 | .PP | |
270 | .Vb 4 | |
271 | \& sub maybeset { | |
272 | \& my($key, $value) = @_; | |
273 | \& $Foo{$key} = $value unless $Foo{$key}; | |
274 | \& } | |
275 | .Ve | |
276 | .PP | |
277 | Because the assignment copies the values, this also has the effect | |
278 | of turning call-by-reference into call\-by\-value. Otherwise a | |
279 | function is free to do in-place modifications of \f(CW@_\fR and change | |
280 | its caller's values. | |
281 | .PP | |
282 | .Vb 4 | |
283 | \& upcase_in($v1, $v2); # this changes $v1 and $v2 | |
284 | \& sub upcase_in { | |
285 | \& for (@_) { tr/a-z/A-Z/ } | |
286 | \& } | |
287 | .Ve | |
288 | .PP | |
289 | You aren't allowed to modify constants in this way, of course. If an | |
290 | argument were actually literal and you tried to change it, you'd take a | |
291 | (presumably fatal) exception. For example, this won't work: | |
292 | .PP | |
293 | .Vb 1 | |
294 | \& upcase_in("frederick"); | |
295 | .Ve | |
296 | .PP | |
297 | It would be much safer if the \f(CW\*(C`upcase_in()\*(C'\fR function | |
298 | were written to return a copy of its parameters instead | |
299 | of changing them in place: | |
300 | .PP | |
301 | .Vb 7 | |
302 | \& ($v3, $v4) = upcase($v1, $v2); # this doesn't change $v1 and $v2 | |
303 | \& sub upcase { | |
304 | \& return unless defined wantarray; # void context, do nothing | |
305 | \& my @parms = @_; | |
306 | \& for (@parms) { tr/a-z/A-Z/ } | |
307 | \& return wantarray ? @parms : $parms[0]; | |
308 | \& } | |
309 | .Ve | |
310 | .PP | |
311 | Notice how this (unprototyped) function doesn't care whether it was | |
312 | passed real scalars or arrays. Perl sees all arguments as one big, | |
313 | long, flat parameter list in \f(CW@_\fR. This is one area where | |
314 | Perl's simple argument-passing style shines. The \f(CW\*(C`upcase()\*(C'\fR | |
315 | function would work perfectly well without changing the \f(CW\*(C`upcase()\*(C'\fR | |
316 | definition even if we fed it things like this: | |
317 | .PP | |
318 | .Vb 2 | |
319 | \& @newlist = upcase(@list1, @list2); | |
320 | \& @newlist = upcase( split /:/, $var ); | |
321 | .Ve | |
322 | .PP | |
323 | Do not, however, be tempted to do this: | |
324 | .PP | |
325 | .Vb 1 | |
326 | \& (@a, @b) = upcase(@list1, @list2); | |
327 | .Ve | |
328 | .PP | |
329 | Like the flattened incoming parameter list, the return list is also | |
330 | flattened on return. So all you have managed to do here is stored | |
331 | everything in \f(CW@a\fR and made \f(CW@b\fR empty. See | |
332 | \&\*(L"Pass by Reference\*(R" for alternatives. | |
333 | .PP | |
334 | A subroutine may be called using an explicit \f(CW\*(C`&\*(C'\fR prefix. The | |
335 | \&\f(CW\*(C`&\*(C'\fR is optional in modern Perl, as are parentheses if the | |
336 | subroutine has been predeclared. The \f(CW\*(C`&\*(C'\fR is \fInot\fR optional | |
337 | when just naming the subroutine, such as when it's used as | |
338 | an argument to \fIdefined()\fR or \fIundef()\fR. Nor is it optional when you | |
339 | want to do an indirect subroutine call with a subroutine name or | |
340 | reference using the \f(CW\*(C`&$subref()\*(C'\fR or \f(CW\*(C`&{$subref}()\*(C'\fR constructs, | |
341 | although the \f(CW\*(C`$subref\->()\*(C'\fR notation solves that problem. | |
342 | See perlref for more about all that. | |
343 | .PP | |
344 | Subroutines may be called recursively. If a subroutine is called | |
345 | using the \f(CW\*(C`&\*(C'\fR form, the argument list is optional, and if omitted, | |
346 | no \f(CW@_\fR array is set up for the subroutine: the \f(CW@_\fR array at the | |
347 | time of the call is visible to subroutine instead. This is an | |
348 | efficiency mechanism that new users may wish to avoid. | |
349 | .PP | |
350 | .Vb 2 | |
351 | \& &foo(1,2,3); # pass three arguments | |
352 | \& foo(1,2,3); # the same | |
353 | .Ve | |
354 | .PP | |
355 | .Vb 2 | |
356 | \& foo(); # pass a null list | |
357 | \& &foo(); # the same | |
358 | .Ve | |
359 | .PP | |
360 | .Vb 2 | |
361 | \& &foo; # foo() get current args, like foo(@_) !! | |
362 | \& foo; # like foo() IFF sub foo predeclared, else "foo" | |
363 | .Ve | |
364 | .PP | |
365 | Not only does the \f(CW\*(C`&\*(C'\fR form make the argument list optional, it also | |
366 | disables any prototype checking on arguments you do provide. This | |
367 | is partly for historical reasons, and partly for having a convenient way | |
368 | to cheat if you know what you're doing. See Prototypes below. | |
369 | .PP | |
370 | Functions whose names are in all upper case are reserved to the Perl | |
371 | core, as are modules whose names are in all lower case. A | |
372 | function in all capitals is a loosely-held convention meaning it | |
373 | will be called indirectly by the run-time system itself, usually | |
374 | due to a triggered event. Functions that do special, pre-defined | |
375 | things include \f(CW\*(C`BEGIN\*(C'\fR, \f(CW\*(C`CHECK\*(C'\fR, \f(CW\*(C`INIT\*(C'\fR, \f(CW\*(C`END\*(C'\fR, \f(CW\*(C`AUTOLOAD\*(C'\fR, | |
376 | \&\f(CW\*(C`CLONE\*(C'\fR and \f(CW\*(C`DESTROY\*(C'\fR\-\-plus all functions mentioned in perltie. | |
377 | .Sh "Private Variables via \fImy()\fP" | |
378 | .IX Subsection "Private Variables via my()" | |
379 | Synopsis: | |
380 | .PP | |
381 | .Vb 5 | |
382 | \& my $foo; # declare $foo lexically local | |
383 | \& my (@wid, %get); # declare list of variables local | |
384 | \& my $foo = "flurp"; # declare $foo lexical, and init it | |
385 | \& my @oof = @bar; # declare @oof lexical, and init it | |
386 | \& my $x : Foo = $y; # similar, with an attribute applied | |
387 | .Ve | |
388 | .PP | |
389 | \&\fB\s-1WARNING\s0\fR: The use of attribute lists on \f(CW\*(C`my\*(C'\fR declarations is still | |
390 | evolving. The current semantics and interface are subject to change. | |
391 | See attributes and Attribute::Handlers. | |
392 | .PP | |
393 | The \f(CW\*(C`my\*(C'\fR operator declares the listed variables to be lexically | |
394 | confined to the enclosing block, conditional (\f(CW\*(C`if/unless/elsif/else\*(C'\fR), | |
395 | loop (\f(CW\*(C`for/foreach/while/until/continue\*(C'\fR), subroutine, \f(CW\*(C`eval\*(C'\fR, | |
396 | or \f(CW\*(C`do/require/use\*(C'\fR'd file. If more than one value is listed, the | |
397 | list must be placed in parentheses. All listed elements must be | |
398 | legal lvalues. Only alphanumeric identifiers may be lexically | |
399 | scoped\*(--magical built-ins like \f(CW$/\fR must currently be \f(CW\*(C`local\*(C'\fRize | |
400 | with \f(CW\*(C`local\*(C'\fR instead. | |
401 | .PP | |
402 | Unlike dynamic variables created by the \f(CW\*(C`local\*(C'\fR operator, lexical | |
403 | variables declared with \f(CW\*(C`my\*(C'\fR are totally hidden from the outside | |
404 | world, including any called subroutines. This is true if it's the | |
405 | same subroutine called from itself or elsewhere\*(--every call gets | |
406 | its own copy. | |
407 | .PP | |
408 | This doesn't mean that a \f(CW\*(C`my\*(C'\fR variable declared in a statically | |
409 | enclosing lexical scope would be invisible. Only dynamic scopes | |
410 | are cut off. For example, the \f(CW\*(C`bumpx()\*(C'\fR function below has access | |
411 | to the lexical \f(CW$x\fR variable because both the \f(CW\*(C`my\*(C'\fR and the \f(CW\*(C`sub\*(C'\fR | |
412 | occurred at the same scope, presumably file scope. | |
413 | .PP | |
414 | .Vb 2 | |
415 | \& my $x = 10; | |
416 | \& sub bumpx { $x++ } | |
417 | .Ve | |
418 | .PP | |
419 | An \f(CW\*(C`eval()\*(C'\fR, however, can see lexical variables of the scope it is | |
420 | being evaluated in, so long as the names aren't hidden by declarations within | |
421 | the \f(CW\*(C`eval()\*(C'\fR itself. See perlref. | |
422 | .PP | |
423 | The parameter list to \fImy()\fR may be assigned to if desired, which allows you | |
424 | to initialize your variables. (If no initializer is given for a | |
425 | particular variable, it is created with the undefined value.) Commonly | |
426 | this is used to name input parameters to a subroutine. Examples: | |
427 | .PP | |
428 | .Vb 4 | |
429 | \& $arg = "fred"; # "global" variable | |
430 | \& $n = cube_root(27); | |
431 | \& print "$arg thinks the root is $n\en"; | |
432 | \& fred thinks the root is 3 | |
433 | .Ve | |
434 | .PP | |
435 | .Vb 5 | |
436 | \& sub cube_root { | |
437 | \& my $arg = shift; # name doesn't matter | |
438 | \& $arg **= 1/3; | |
439 | \& return $arg; | |
440 | \& } | |
441 | .Ve | |
442 | .PP | |
443 | The \f(CW\*(C`my\*(C'\fR is simply a modifier on something you might assign to. So when | |
444 | you do assign to variables in its argument list, \f(CW\*(C`my\*(C'\fR doesn't | |
445 | change whether those variables are viewed as a scalar or an array. So | |
446 | .PP | |
447 | .Vb 2 | |
448 | \& my ($foo) = <STDIN>; # WRONG? | |
449 | \& my @FOO = <STDIN>; | |
450 | .Ve | |
451 | .PP | |
452 | both supply a list context to the right-hand side, while | |
453 | .PP | |
454 | .Vb 1 | |
455 | \& my $foo = <STDIN>; | |
456 | .Ve | |
457 | .PP | |
458 | supplies a scalar context. But the following declares only one variable: | |
459 | .PP | |
460 | .Vb 1 | |
461 | \& my $foo, $bar = 1; # WRONG | |
462 | .Ve | |
463 | .PP | |
464 | That has the same effect as | |
465 | .PP | |
466 | .Vb 2 | |
467 | \& my $foo; | |
468 | \& $bar = 1; | |
469 | .Ve | |
470 | .PP | |
471 | The declared variable is not introduced (is not visible) until after | |
472 | the current statement. Thus, | |
473 | .PP | |
474 | .Vb 1 | |
475 | \& my $x = $x; | |
476 | .Ve | |
477 | .PP | |
478 | can be used to initialize a new \f(CW$x\fR with the value of the old \f(CW$x\fR, and | |
479 | the expression | |
480 | .PP | |
481 | .Vb 1 | |
482 | \& my $x = 123 and $x == 123 | |
483 | .Ve | |
484 | .PP | |
485 | is false unless the old \f(CW$x\fR happened to have the value \f(CW123\fR. | |
486 | .PP | |
487 | Lexical scopes of control structures are not bounded precisely by the | |
488 | braces that delimit their controlled blocks; control expressions are | |
489 | part of that scope, too. Thus in the loop | |
490 | .PP | |
491 | .Vb 5 | |
492 | \& while (my $line = <>) { | |
493 | \& $line = lc $line; | |
494 | \& } continue { | |
495 | \& print $line; | |
496 | \& } | |
497 | .Ve | |
498 | .PP | |
499 | the scope of \f(CW$line\fR extends from its declaration throughout the rest of | |
500 | the loop construct (including the \f(CW\*(C`continue\*(C'\fR clause), but not beyond | |
501 | it. Similarly, in the conditional | |
502 | .PP | |
503 | .Vb 8 | |
504 | \& if ((my $answer = <STDIN>) =~ /^yes$/i) { | |
505 | \& user_agrees(); | |
506 | \& } elsif ($answer =~ /^no$/i) { | |
507 | \& user_disagrees(); | |
508 | \& } else { | |
509 | \& chomp $answer; | |
510 | \& die "'$answer' is neither 'yes' nor 'no'"; | |
511 | \& } | |
512 | .Ve | |
513 | .PP | |
514 | the scope of \f(CW$answer\fR extends from its declaration through the rest | |
515 | of that conditional, including any \f(CW\*(C`elsif\*(C'\fR and \f(CW\*(C`else\*(C'\fR clauses, | |
516 | but not beyond it. | |
517 | .PP | |
518 | \&\fB\s-1NOTE:\s0\fR The behaviour of a \f(CW\*(C`my\*(C'\fR statement modified with a statement | |
519 | modifier conditional or loop construct (e.g. \f(CW\*(C`my $x if ...\*(C'\fR) is | |
520 | \&\fBundefined\fR. The value of the \f(CW\*(C`my\*(C'\fR variable may be \f(CW\*(C`undef\*(C'\fR, any | |
521 | previously assigned value, or possibly anything else. Don't rely on | |
522 | it. Future versions of perl might do something different from the | |
523 | version of perl you try it out on. Here be dragons. | |
524 | .PP | |
525 | The \f(CW\*(C`foreach\*(C'\fR loop defaults to scoping its index variable dynamically | |
526 | in the manner of \f(CW\*(C`local\*(C'\fR. However, if the index variable is | |
527 | prefixed with the keyword \f(CW\*(C`my\*(C'\fR, or if there is already a lexical | |
528 | by that name in scope, then a new lexical is created instead. Thus | |
529 | in the loop | |
530 | .PP | |
531 | .Vb 3 | |
532 | \& for my $i (1, 2, 3) { | |
533 | \& some_function(); | |
534 | \& } | |
535 | .Ve | |
536 | .PP | |
537 | the scope of \f(CW$i\fR extends to the end of the loop, but not beyond it, | |
538 | rendering the value of \f(CW$i\fR inaccessible within \f(CW\*(C`some_function()\*(C'\fR. | |
539 | .PP | |
540 | Some users may wish to encourage the use of lexically scoped variables. | |
541 | As an aid to catching implicit uses to package variables, | |
542 | which are always global, if you say | |
543 | .PP | |
544 | .Vb 1 | |
545 | \& use strict 'vars'; | |
546 | .Ve | |
547 | .PP | |
548 | then any variable mentioned from there to the end of the enclosing | |
549 | block must either refer to a lexical variable, be predeclared via | |
550 | \&\f(CW\*(C`our\*(C'\fR or \f(CW\*(C`use vars\*(C'\fR, or else must be fully qualified with the package name. | |
551 | A compilation error results otherwise. An inner block may countermand | |
552 | this with \f(CW\*(C`no strict 'vars'\*(C'\fR. | |
553 | .PP | |
554 | A \f(CW\*(C`my\*(C'\fR has both a compile-time and a run-time effect. At compile | |
555 | time, the compiler takes notice of it. The principal usefulness | |
556 | of this is to quiet \f(CW\*(C`use strict 'vars'\*(C'\fR, but it is also essential | |
557 | for generation of closures as detailed in perlref. Actual | |
558 | initialization is delayed until run time, though, so it gets executed | |
559 | at the appropriate time, such as each time through a loop, for | |
560 | example. | |
561 | .PP | |
562 | Variables declared with \f(CW\*(C`my\*(C'\fR are not part of any package and are therefore | |
563 | never fully qualified with the package name. In particular, you're not | |
564 | allowed to try to make a package variable (or other global) lexical: | |
565 | .PP | |
566 | .Vb 2 | |
567 | \& my $pack::var; # ERROR! Illegal syntax | |
568 | \& my $_; # also illegal (currently) | |
569 | .Ve | |
570 | .PP | |
571 | In fact, a dynamic variable (also known as package or global variables) | |
572 | are still accessible using the fully qualified \f(CW\*(C`::\*(C'\fR notation even while a | |
573 | lexical of the same name is also visible: | |
574 | .PP | |
575 | .Vb 4 | |
576 | \& package main; | |
577 | \& local $x = 10; | |
578 | \& my $x = 20; | |
579 | \& print "$x and $::x\en"; | |
580 | .Ve | |
581 | .PP | |
582 | That will print out \f(CW20\fR and \f(CW10\fR. | |
583 | .PP | |
584 | You may declare \f(CW\*(C`my\*(C'\fR variables at the outermost scope of a file | |
585 | to hide any such identifiers from the world outside that file. This | |
586 | is similar in spirit to C's static variables when they are used at | |
587 | the file level. To do this with a subroutine requires the use of | |
588 | a closure (an anonymous function that accesses enclosing lexicals). | |
589 | If you want to create a private subroutine that cannot be called | |
590 | from outside that block, it can declare a lexical variable containing | |
591 | an anonymous sub reference: | |
592 | .PP | |
593 | .Vb 3 | |
594 | \& my $secret_version = '1.001-beta'; | |
595 | \& my $secret_sub = sub { print $secret_version }; | |
596 | \& &$secret_sub(); | |
597 | .Ve | |
598 | .PP | |
599 | As long as the reference is never returned by any function within the | |
600 | module, no outside module can see the subroutine, because its name is not in | |
601 | any package's symbol table. Remember that it's not \fI\s-1REALLY\s0\fR called | |
602 | \&\f(CW$some_pack::secret_version\fR or anything; it's just \f(CW$secret_version\fR, | |
603 | unqualified and unqualifiable. | |
604 | .PP | |
605 | This does not work with object methods, however; all object methods | |
606 | have to be in the symbol table of some package to be found. See | |
607 | \&\*(L"Function Templates\*(R" in perlref for something of a work-around to | |
608 | this. | |
609 | .Sh "Persistent Private Variables" | |
610 | .IX Subsection "Persistent Private Variables" | |
611 | Just because a lexical variable is lexically (also called statically) | |
612 | scoped to its enclosing block, \f(CW\*(C`eval\*(C'\fR, or \f(CW\*(C`do\*(C'\fR \s-1FILE\s0, this doesn't mean that | |
613 | within a function it works like a C static. It normally works more | |
614 | like a C auto, but with implicit garbage collection. | |
615 | .PP | |
616 | Unlike local variables in C or \*(C+, Perl's lexical variables don't | |
617 | necessarily get recycled just because their scope has exited. | |
618 | If something more permanent is still aware of the lexical, it will | |
619 | stick around. So long as something else references a lexical, that | |
620 | lexical won't be freed\*(--which is as it should be. You wouldn't want | |
621 | memory being free until you were done using it, or kept around once you | |
622 | were done. Automatic garbage collection takes care of this for you. | |
623 | .PP | |
624 | This means that you can pass back or save away references to lexical | |
625 | variables, whereas to return a pointer to a C auto is a grave error. | |
626 | It also gives us a way to simulate C's function statics. Here's a | |
627 | mechanism for giving a function private variables with both lexical | |
628 | scoping and a static lifetime. If you do want to create something like | |
629 | C's static variables, just enclose the whole function in an extra block, | |
630 | and put the static variable outside the function but in the block. | |
631 | .PP | |
632 | .Vb 8 | |
633 | \& { | |
634 | \& my $secret_val = 0; | |
635 | \& sub gimme_another { | |
636 | \& return ++$secret_val; | |
637 | \& } | |
638 | \& } | |
639 | \& # $secret_val now becomes unreachable by the outside | |
640 | \& # world, but retains its value between calls to gimme_another | |
641 | .Ve | |
642 | .PP | |
643 | If this function is being sourced in from a separate file | |
644 | via \f(CW\*(C`require\*(C'\fR or \f(CW\*(C`use\*(C'\fR, then this is probably just fine. If it's | |
645 | all in the main program, you'll need to arrange for the \f(CW\*(C`my\*(C'\fR | |
646 | to be executed early, either by putting the whole block above | |
647 | your main program, or more likely, placing merely a \f(CW\*(C`BEGIN\*(C'\fR | |
648 | sub around it to make sure it gets executed before your program | |
649 | starts to run: | |
650 | .PP | |
651 | .Vb 6 | |
652 | \& sub BEGIN { | |
653 | \& my $secret_val = 0; | |
654 | \& sub gimme_another { | |
655 | \& return ++$secret_val; | |
656 | \& } | |
657 | \& } | |
658 | .Ve | |
659 | .PP | |
660 | See \*(L"Package Constructors and Destructors\*(R" in perlmod about the | |
661 | special triggered functions, \f(CW\*(C`BEGIN\*(C'\fR, \f(CW\*(C`CHECK\*(C'\fR, \f(CW\*(C`INIT\*(C'\fR and \f(CW\*(C`END\*(C'\fR. | |
662 | .PP | |
663 | If declared at the outermost scope (the file scope), then lexicals | |
664 | work somewhat like C's file statics. They are available to all | |
665 | functions in that same file declared below them, but are inaccessible | |
666 | from outside that file. This strategy is sometimes used in modules | |
667 | to create private variables that the whole module can see. | |
668 | .Sh "Temporary Values via \fIlocal()\fP" | |
669 | .IX Subsection "Temporary Values via local()" | |
670 | \&\fB\s-1WARNING\s0\fR: In general, you should be using \f(CW\*(C`my\*(C'\fR instead of \f(CW\*(C`local\*(C'\fR, because | |
671 | it's faster and safer. Exceptions to this include the global punctuation | |
672 | variables, filehandles and formats, and direct manipulation of the Perl | |
673 | symbol table itself. Format variables often use \f(CW\*(C`local\*(C'\fR though, as do | |
674 | other variables whose current value must be visible to called | |
675 | subroutines. | |
676 | .PP | |
677 | Synopsis: | |
678 | .PP | |
679 | .Vb 4 | |
680 | \& local $foo; # declare $foo dynamically local | |
681 | \& local (@wid, %get); # declare list of variables local | |
682 | \& local $foo = "flurp"; # declare $foo dynamic, and init it | |
683 | \& local @oof = @bar; # declare @oof dynamic, and init it | |
684 | .Ve | |
685 | .PP | |
686 | .Vb 5 | |
687 | \& local *FH; # localize $FH, @FH, %FH, &FH ... | |
688 | \& local *merlyn = *randal; # now $merlyn is really $randal, plus | |
689 | \& # @merlyn is really @randal, etc | |
690 | \& local *merlyn = 'randal'; # SAME THING: promote 'randal' to *randal | |
691 | \& local *merlyn = \e$randal; # just alias $merlyn, not @merlyn etc | |
692 | .Ve | |
693 | .PP | |
694 | A \f(CW\*(C`local\*(C'\fR modifies its listed variables to be \*(L"local\*(R" to the | |
695 | enclosing block, \f(CW\*(C`eval\*(C'\fR, or \f(CW\*(C`do FILE\*(C'\fR\-\-and to \fIany subroutine | |
696 | called from within that block\fR. A \f(CW\*(C`local\*(C'\fR just gives temporary | |
697 | values to global (meaning package) variables. It does \fInot\fR create | |
698 | a local variable. This is known as dynamic scoping. Lexical scoping | |
699 | is done with \f(CW\*(C`my\*(C'\fR, which works more like C's auto declarations. | |
700 | .PP | |
701 | If more than one variable is given to \f(CW\*(C`local\*(C'\fR, they must be placed in | |
702 | parentheses. All listed elements must be legal lvalues. This operator works | |
703 | by saving the current values of those variables in its argument list on a | |
704 | hidden stack and restoring them upon exiting the block, subroutine, or | |
705 | eval. This means that called subroutines can also reference the local | |
706 | variable, but not the global one. The argument list may be assigned to if | |
707 | desired, which allows you to initialize your local variables. (If no | |
708 | initializer is given for a particular variable, it is created with an | |
709 | undefined value.) Commonly this is used to name the parameters to a | |
710 | subroutine. Examples: | |
711 | .PP | |
712 | .Vb 5 | |
713 | \& for $i ( 0 .. 9 ) { | |
714 | \& $digits{$i} = $i; | |
715 | \& } | |
716 | \& # assume this function uses global %digits hash | |
717 | \& parse_num(); | |
718 | .Ve | |
719 | .PP | |
720 | .Vb 7 | |
721 | \& # now temporarily add to %digits hash | |
722 | \& if ($base12) { | |
723 | \& # (NOTE: not claiming this is efficient!) | |
724 | \& local %digits = (%digits, 't' => 10, 'e' => 11); | |
725 | \& parse_num(); # parse_num gets this new %digits! | |
726 | \& } | |
727 | \& # old %digits restored here | |
728 | .Ve | |
729 | .PP | |
730 | Because \f(CW\*(C`local\*(C'\fR is a run-time operator, it gets executed each time | |
731 | through a loop. In releases of Perl previous to 5.0, this used more stack | |
732 | storage each time until the loop was exited. Perl now reclaims the space | |
733 | each time through, but it's still more efficient to declare your variables | |
734 | outside the loop. | |
735 | .PP | |
736 | A \f(CW\*(C`local\*(C'\fR is simply a modifier on an lvalue expression. When you assign to | |
737 | a \f(CW\*(C`local\*(C'\fRized variable, the \f(CW\*(C`local\*(C'\fR doesn't change whether its list is viewed | |
738 | as a scalar or an array. So | |
739 | .PP | |
740 | .Vb 2 | |
741 | \& local($foo) = <STDIN>; | |
742 | \& local @FOO = <STDIN>; | |
743 | .Ve | |
744 | .PP | |
745 | both supply a list context to the right-hand side, while | |
746 | .PP | |
747 | .Vb 1 | |
748 | \& local $foo = <STDIN>; | |
749 | .Ve | |
750 | .PP | |
751 | supplies a scalar context. | |
752 | .PP | |
753 | A note about \f(CW\*(C`local()\*(C'\fR and composite types is in order. Something | |
754 | like \f(CW\*(C`local(%foo)\*(C'\fR works by temporarily placing a brand new hash in | |
755 | the symbol table. The old hash is left alone, but is hidden \*(L"behind\*(R" | |
756 | the new one. | |
757 | .PP | |
758 | This means the old variable is completely invisible via the symbol | |
759 | table (i.e. the hash entry in the \f(CW*foo\fR typeglob) for the duration | |
760 | of the dynamic scope within which the \f(CW\*(C`local()\*(C'\fR was seen. This | |
761 | has the effect of allowing one to temporarily occlude any magic on | |
762 | composite types. For instance, this will briefly alter a tied | |
763 | hash to some other implementation: | |
764 | .PP | |
765 | .Vb 12 | |
766 | \& tie %ahash, 'APackage'; | |
767 | \& [...] | |
768 | \& { | |
769 | \& local %ahash; | |
770 | \& tie %ahash, 'BPackage'; | |
771 | \& [..called code will see %ahash tied to 'BPackage'..] | |
772 | \& { | |
773 | \& local %ahash; | |
774 | \& [..%ahash is a normal (untied) hash here..] | |
775 | \& } | |
776 | \& } | |
777 | \& [..%ahash back to its initial tied self again..] | |
778 | .Ve | |
779 | .PP | |
780 | \&\fB\s-1WARNING\s0\fR The code example above does not currently work as described. | |
781 | This will be fixed in a future release of Perl; in the meantime, avoid | |
782 | code that relies on any particular behaviour of localising tied arrays | |
783 | or hashes (localising individual elements is still okay). | |
784 | See \*(L"Localising Tied Arrays and Hashes Is Broken\*(R" in perldelta for more | |
785 | details. | |
786 | .PP | |
787 | As another example, a custom implementation of \f(CW%ENV\fR might look | |
788 | like this: | |
789 | .PP | |
790 | .Vb 6 | |
791 | \& { | |
792 | \& local %ENV; | |
793 | \& tie %ENV, 'MyOwnEnv'; | |
794 | \& [..do your own fancy %ENV manipulation here..] | |
795 | \& } | |
796 | \& [..normal %ENV behavior here..] | |
797 | .Ve | |
798 | .PP | |
799 | It's also worth taking a moment to explain what happens when you | |
800 | \&\f(CW\*(C`local\*(C'\fRize a member of a composite type (i.e. an array or hash element). | |
801 | In this case, the element is \f(CW\*(C`local\*(C'\fRized \fIby name\fR. This means that | |
802 | when the scope of the \f(CW\*(C`local()\*(C'\fR ends, the saved value will be | |
803 | restored to the hash element whose key was named in the \f(CW\*(C`local()\*(C'\fR, or | |
804 | the array element whose index was named in the \f(CW\*(C`local()\*(C'\fR. If that | |
805 | element was deleted while the \f(CW\*(C`local()\*(C'\fR was in effect (e.g. by a | |
806 | \&\f(CW\*(C`delete()\*(C'\fR from a hash or a \f(CW\*(C`shift()\*(C'\fR of an array), it will spring | |
807 | back into existence, possibly extending an array and filling in the | |
808 | skipped elements with \f(CW\*(C`undef\*(C'\fR. For instance, if you say | |
809 | .PP | |
810 | .Vb 17 | |
811 | \& %hash = ( 'This' => 'is', 'a' => 'test' ); | |
812 | \& @ary = ( 0..5 ); | |
813 | \& { | |
814 | \& local($ary[5]) = 6; | |
815 | \& local($hash{'a'}) = 'drill'; | |
816 | \& while (my $e = pop(@ary)) { | |
817 | \& print "$e . . .\en"; | |
818 | \& last unless $e > 3; | |
819 | \& } | |
820 | \& if (@ary) { | |
821 | \& $hash{'only a'} = 'test'; | |
822 | \& delete $hash{'a'}; | |
823 | \& } | |
824 | \& } | |
825 | \& print join(' ', map { "$_ $hash{$_}" } sort keys %hash),".\en"; | |
826 | \& print "The array has ",scalar(@ary)," elements: ", | |
827 | \& join(', ', map { defined $_ ? $_ : 'undef' } @ary),"\en"; | |
828 | .Ve | |
829 | .PP | |
830 | Perl will print | |
831 | .PP | |
832 | .Vb 5 | |
833 | \& 6 . . . | |
834 | \& 4 . . . | |
835 | \& 3 . . . | |
836 | \& This is a test only a test. | |
837 | \& The array has 6 elements: 0, 1, 2, undef, undef, 5 | |
838 | .Ve | |
839 | .PP | |
840 | The behavior of \fIlocal()\fR on non-existent members of composite | |
841 | types is subject to change in future. | |
842 | .Sh "Lvalue subroutines" | |
843 | .IX Subsection "Lvalue subroutines" | |
844 | \&\fB\s-1WARNING\s0\fR: Lvalue subroutines are still experimental and the | |
845 | implementation may change in future versions of Perl. | |
846 | .PP | |
847 | It is possible to return a modifiable value from a subroutine. | |
848 | To do this, you have to declare the subroutine to return an lvalue. | |
849 | .PP | |
850 | .Vb 8 | |
851 | \& my $val; | |
852 | \& sub canmod : lvalue { | |
853 | \& # return $val; this doesn't work, don't say "return" | |
854 | \& $val; | |
855 | \& } | |
856 | \& sub nomod { | |
857 | \& $val; | |
858 | \& } | |
859 | .Ve | |
860 | .PP | |
861 | .Vb 2 | |
862 | \& canmod() = 5; # assigns to $val | |
863 | \& nomod() = 5; # ERROR | |
864 | .Ve | |
865 | .PP | |
866 | The scalar/list context for the subroutine and for the right-hand | |
867 | side of assignment is determined as if the subroutine call is replaced | |
868 | by a scalar. For example, consider: | |
869 | .PP | |
870 | .Vb 1 | |
871 | \& data(2,3) = get_data(3,4); | |
872 | .Ve | |
873 | .PP | |
874 | Both subroutines here are called in a scalar context, while in: | |
875 | .PP | |
876 | .Vb 1 | |
877 | \& (data(2,3)) = get_data(3,4); | |
878 | .Ve | |
879 | .PP | |
880 | and in: | |
881 | .PP | |
882 | .Vb 1 | |
883 | \& (data(2),data(3)) = get_data(3,4); | |
884 | .Ve | |
885 | .PP | |
886 | all the subroutines are called in a list context. | |
887 | .IP "Lvalue subroutines are \s-1EXPERIMENTAL\s0" 4 | |
888 | .IX Item "Lvalue subroutines are EXPERIMENTAL" | |
889 | They appear to be convenient, but there are several reasons to be | |
890 | circumspect. | |
891 | .Sp | |
892 | You can't use the return keyword, you must pass out the value before | |
893 | falling out of subroutine scope. (see comment in example above). This | |
894 | is usually not a problem, but it disallows an explicit return out of a | |
895 | deeply nested loop, which is sometimes a nice way out. | |
896 | .Sp | |
897 | They violate encapsulation. A normal mutator can check the supplied | |
898 | argument before setting the attribute it is protecting, an lvalue | |
899 | subroutine never gets that chance. Consider; | |
900 | .Sp | |
901 | .Vb 1 | |
902 | \& my $some_array_ref = []; # protected by mutators ?? | |
903 | .Ve | |
904 | .Sp | |
905 | .Vb 9 | |
906 | \& sub set_arr { # normal mutator | |
907 | \& my $val = shift; | |
908 | \& die("expected array, you supplied ", ref $val) | |
909 | \& unless ref $val eq 'ARRAY'; | |
910 | \& $some_array_ref = $val; | |
911 | \& } | |
912 | \& sub set_arr_lv : lvalue { # lvalue mutator | |
913 | \& $some_array_ref; | |
914 | \& } | |
915 | .Ve | |
916 | .Sp | |
917 | .Vb 2 | |
918 | \& # set_arr_lv cannot stop this ! | |
919 | \& set_arr_lv() = { a => 1 }; | |
920 | .Ve | |
921 | .Sh "Passing Symbol Table Entries (typeglobs)" | |
922 | .IX Subsection "Passing Symbol Table Entries (typeglobs)" | |
923 | \&\fB\s-1WARNING\s0\fR: The mechanism described in this section was originally | |
924 | the only way to simulate pass-by-reference in older versions of | |
925 | Perl. While it still works fine in modern versions, the new reference | |
926 | mechanism is generally easier to work with. See below. | |
927 | .PP | |
928 | Sometimes you don't want to pass the value of an array to a subroutine | |
929 | but rather the name of it, so that the subroutine can modify the global | |
930 | copy of it rather than working with a local copy. In perl you can | |
931 | refer to all objects of a particular name by prefixing the name | |
932 | with a star: \f(CW*foo\fR. This is often known as a \*(L"typeglob\*(R", because the | |
933 | star on the front can be thought of as a wildcard match for all the | |
934 | funny prefix characters on variables and subroutines and such. | |
935 | .PP | |
936 | When evaluated, the typeglob produces a scalar value that represents | |
937 | all the objects of that name, including any filehandle, format, or | |
938 | subroutine. When assigned to, it causes the name mentioned to refer to | |
939 | whatever \f(CW\*(C`*\*(C'\fR value was assigned to it. Example: | |
940 | .PP | |
941 | .Vb 8 | |
942 | \& sub doubleary { | |
943 | \& local(*someary) = @_; | |
944 | \& foreach $elem (@someary) { | |
945 | \& $elem *= 2; | |
946 | \& } | |
947 | \& } | |
948 | \& doubleary(*foo); | |
949 | \& doubleary(*bar); | |
950 | .Ve | |
951 | .PP | |
952 | Scalars are already passed by reference, so you can modify | |
953 | scalar arguments without using this mechanism by referring explicitly | |
954 | to \f(CW$_[0]\fR etc. You can modify all the elements of an array by passing | |
955 | all the elements as scalars, but you have to use the \f(CW\*(C`*\*(C'\fR mechanism (or | |
956 | the equivalent reference mechanism) to \f(CW\*(C`push\*(C'\fR, \f(CW\*(C`pop\*(C'\fR, or change the size of | |
957 | an array. It will certainly be faster to pass the typeglob (or reference). | |
958 | .PP | |
959 | Even if you don't want to modify an array, this mechanism is useful for | |
960 | passing multiple arrays in a single \s-1LIST\s0, because normally the \s-1LIST\s0 | |
961 | mechanism will merge all the array values so that you can't extract out | |
962 | the individual arrays. For more on typeglobs, see | |
963 | \&\*(L"Typeglobs and Filehandles\*(R" in perldata. | |
964 | .Sh "When to Still Use \fIlocal()\fP" | |
965 | .IX Subsection "When to Still Use local()" | |
966 | Despite the existence of \f(CW\*(C`my\*(C'\fR, there are still three places where the | |
967 | \&\f(CW\*(C`local\*(C'\fR operator still shines. In fact, in these three places, you | |
968 | \&\fImust\fR use \f(CW\*(C`local\*(C'\fR instead of \f(CW\*(C`my\*(C'\fR. | |
969 | .IP "1." 4 | |
970 | You need to give a global variable a temporary value, especially \f(CW$_\fR. | |
971 | .Sp | |
972 | The global variables, like \f(CW@ARGV\fR or the punctuation variables, must be | |
973 | \&\f(CW\*(C`local\*(C'\fRized with \f(CW\*(C`local()\*(C'\fR. This block reads in \fI/etc/motd\fR, and splits | |
974 | it up into chunks separated by lines of equal signs, which are placed | |
975 | in \f(CW@Fields\fR. | |
976 | .Sp | |
977 | .Vb 6 | |
978 | \& { | |
979 | \& local @ARGV = ("/etc/motd"); | |
980 | \& local $/ = undef; | |
981 | \& local $_ = <>; | |
982 | \& @Fields = split /^\es*=+\es*$/; | |
983 | \& } | |
984 | .Ve | |
985 | .Sp | |
986 | It particular, it's important to \f(CW\*(C`local\*(C'\fRize \f(CW$_\fR in any routine that assigns | |
987 | to it. Look out for implicit assignments in \f(CW\*(C`while\*(C'\fR conditionals. | |
988 | .IP "2." 4 | |
989 | You need to create a local file or directory handle or a local function. | |
990 | .Sp | |
991 | A function that needs a filehandle of its own must use | |
992 | \&\f(CW\*(C`local()\*(C'\fR on a complete typeglob. This can be used to create new symbol | |
993 | table entries: | |
994 | .Sp | |
995 | .Vb 6 | |
996 | \& sub ioqueue { | |
997 | \& local (*READER, *WRITER); # not my! | |
998 | \& pipe (READER, WRITER) or die "pipe: $!"; | |
999 | \& return (*READER, *WRITER); | |
1000 | \& } | |
1001 | \& ($head, $tail) = ioqueue(); | |
1002 | .Ve | |
1003 | .Sp | |
1004 | See the Symbol module for a way to create anonymous symbol table | |
1005 | entries. | |
1006 | .Sp | |
1007 | Because assignment of a reference to a typeglob creates an alias, this | |
1008 | can be used to create what is effectively a local function, or at least, | |
1009 | a local alias. | |
1010 | .Sp | |
1011 | .Vb 6 | |
1012 | \& { | |
1013 | \& local *grow = \e&shrink; # only until this block exists | |
1014 | \& grow(); # really calls shrink() | |
1015 | \& move(); # if move() grow()s, it shrink()s too | |
1016 | \& } | |
1017 | \& grow(); # get the real grow() again | |
1018 | .Ve | |
1019 | .Sp | |
1020 | See \*(L"Function Templates\*(R" in perlref for more about manipulating | |
1021 | functions by name in this way. | |
1022 | .IP "3." 4 | |
1023 | You want to temporarily change just one element of an array or hash. | |
1024 | .Sp | |
1025 | You can \f(CW\*(C`local\*(C'\fRize just one element of an aggregate. Usually this | |
1026 | is done on dynamics: | |
1027 | .Sp | |
1028 | .Vb 5 | |
1029 | \& { | |
1030 | \& local $SIG{INT} = 'IGNORE'; | |
1031 | \& funct(); # uninterruptible | |
1032 | \& } | |
1033 | \& # interruptibility automatically restored here | |
1034 | .Ve | |
1035 | .Sp | |
1036 | But it also works on lexically declared aggregates. Prior to 5.005, | |
1037 | this operation could on occasion misbehave. | |
1038 | .Sh "Pass by Reference" | |
1039 | .IX Subsection "Pass by Reference" | |
1040 | If you want to pass more than one array or hash into a function\*(--or | |
1041 | return them from it\*(--and have them maintain their integrity, then | |
1042 | you're going to have to use an explicit pass\-by\-reference. Before you | |
1043 | do that, you need to understand references as detailed in perlref. | |
1044 | This section may not make much sense to you otherwise. | |
1045 | .PP | |
1046 | Here are a few simple examples. First, let's pass in several arrays | |
1047 | to a function and have it \f(CW\*(C`pop\*(C'\fR all of then, returning a new list | |
1048 | of all their former last elements: | |
1049 | .PP | |
1050 | .Vb 1 | |
1051 | \& @tailings = popmany ( \e@a, \e@b, \e@c, \e@d ); | |
1052 | .Ve | |
1053 | .PP | |
1054 | .Vb 8 | |
1055 | \& sub popmany { | |
1056 | \& my $aref; | |
1057 | \& my @retlist = (); | |
1058 | \& foreach $aref ( @_ ) { | |
1059 | \& push @retlist, pop @$aref; | |
1060 | \& } | |
1061 | \& return @retlist; | |
1062 | \& } | |
1063 | .Ve | |
1064 | .PP | |
1065 | Here's how you might write a function that returns a | |
1066 | list of keys occurring in all the hashes passed to it: | |
1067 | .PP | |
1068 | .Vb 10 | |
1069 | \& @common = inter( \e%foo, \e%bar, \e%joe ); | |
1070 | \& sub inter { | |
1071 | \& my ($k, $href, %seen); # locals | |
1072 | \& foreach $href (@_) { | |
1073 | \& while ( $k = each %$href ) { | |
1074 | \& $seen{$k}++; | |
1075 | \& } | |
1076 | \& } | |
1077 | \& return grep { $seen{$_} == @_ } keys %seen; | |
1078 | \& } | |
1079 | .Ve | |
1080 | .PP | |
1081 | So far, we're using just the normal list return mechanism. | |
1082 | What happens if you want to pass or return a hash? Well, | |
1083 | if you're using only one of them, or you don't mind them | |
1084 | concatenating, then the normal calling convention is ok, although | |
1085 | a little expensive. | |
1086 | .PP | |
1087 | Where people get into trouble is here: | |
1088 | .PP | |
1089 | .Vb 3 | |
1090 | \& (@a, @b) = func(@c, @d); | |
1091 | \&or | |
1092 | \& (%a, %b) = func(%c, %d); | |
1093 | .Ve | |
1094 | .PP | |
1095 | That syntax simply won't work. It sets just \f(CW@a\fR or \f(CW%a\fR and | |
1096 | clears the \f(CW@b\fR or \f(CW%b\fR. Plus the function didn't get passed | |
1097 | into two separate arrays or hashes: it got one long list in \f(CW@_\fR, | |
1098 | as always. | |
1099 | .PP | |
1100 | If you can arrange for everyone to deal with this through references, it's | |
1101 | cleaner code, although not so nice to look at. Here's a function that | |
1102 | takes two array references as arguments, returning the two array elements | |
1103 | in order of how many elements they have in them: | |
1104 | .PP | |
1105 | .Vb 10 | |
1106 | \& ($aref, $bref) = func(\e@c, \e@d); | |
1107 | \& print "@$aref has more than @$bref\en"; | |
1108 | \& sub func { | |
1109 | \& my ($cref, $dref) = @_; | |
1110 | \& if (@$cref > @$dref) { | |
1111 | \& return ($cref, $dref); | |
1112 | \& } else { | |
1113 | \& return ($dref, $cref); | |
1114 | \& } | |
1115 | \& } | |
1116 | .Ve | |
1117 | .PP | |
1118 | It turns out that you can actually do this also: | |
1119 | .PP | |
1120 | .Vb 10 | |
1121 | \& (*a, *b) = func(\e@c, \e@d); | |
1122 | \& print "@a has more than @b\en"; | |
1123 | \& sub func { | |
1124 | \& local (*c, *d) = @_; | |
1125 | \& if (@c > @d) { | |
1126 | \& return (\e@c, \e@d); | |
1127 | \& } else { | |
1128 | \& return (\e@d, \e@c); | |
1129 | \& } | |
1130 | \& } | |
1131 | .Ve | |
1132 | .PP | |
1133 | Here we're using the typeglobs to do symbol table aliasing. It's | |
1134 | a tad subtle, though, and also won't work if you're using \f(CW\*(C`my\*(C'\fR | |
1135 | variables, because only globals (even in disguise as \f(CW\*(C`local\*(C'\fRs) | |
1136 | are in the symbol table. | |
1137 | .PP | |
1138 | If you're passing around filehandles, you could usually just use the bare | |
1139 | typeglob, like \f(CW*STDOUT\fR, but typeglobs references work, too. | |
1140 | For example: | |
1141 | .PP | |
1142 | .Vb 5 | |
1143 | \& splutter(\e*STDOUT); | |
1144 | \& sub splutter { | |
1145 | \& my $fh = shift; | |
1146 | \& print $fh "her um well a hmmm\en"; | |
1147 | \& } | |
1148 | .Ve | |
1149 | .PP | |
1150 | .Vb 5 | |
1151 | \& $rec = get_rec(\e*STDIN); | |
1152 | \& sub get_rec { | |
1153 | \& my $fh = shift; | |
1154 | \& return scalar <$fh>; | |
1155 | \& } | |
1156 | .Ve | |
1157 | .PP | |
1158 | If you're planning on generating new filehandles, you could do this. | |
1159 | Notice to pass back just the bare *FH, not its reference. | |
1160 | .PP | |
1161 | .Vb 5 | |
1162 | \& sub openit { | |
1163 | \& my $path = shift; | |
1164 | \& local *FH; | |
1165 | \& return open (FH, $path) ? *FH : undef; | |
1166 | \& } | |
1167 | .Ve | |
1168 | .Sh "Prototypes" | |
1169 | .IX Subsection "Prototypes" | |
1170 | Perl supports a very limited kind of compile-time argument checking | |
1171 | using function prototyping. If you declare | |
1172 | .PP | |
1173 | .Vb 1 | |
1174 | \& sub mypush (\e@@) | |
1175 | .Ve | |
1176 | .PP | |
1177 | then \f(CW\*(C`mypush()\*(C'\fR takes arguments exactly like \f(CW\*(C`push()\*(C'\fR does. The | |
1178 | function declaration must be visible at compile time. The prototype | |
1179 | affects only interpretation of new-style calls to the function, | |
1180 | where new-style is defined as not using the \f(CW\*(C`&\*(C'\fR character. In | |
1181 | other words, if you call it like a built-in function, then it behaves | |
1182 | like a built-in function. If you call it like an old-fashioned | |
1183 | subroutine, then it behaves like an old-fashioned subroutine. It | |
1184 | naturally falls out from this rule that prototypes have no influence | |
1185 | on subroutine references like \f(CW\*(C`\e&foo\*(C'\fR or on indirect subroutine | |
1186 | calls like \f(CW\*(C`&{$subref}\*(C'\fR or \f(CW\*(C`$subref\->()\*(C'\fR. | |
1187 | .PP | |
1188 | Method calls are not influenced by prototypes either, because the | |
1189 | function to be called is indeterminate at compile time, since | |
1190 | the exact code called depends on inheritance. | |
1191 | .PP | |
1192 | Because the intent of this feature is primarily to let you define | |
1193 | subroutines that work like built-in functions, here are prototypes | |
1194 | for some other functions that parse almost exactly like the | |
1195 | corresponding built\-in. | |
1196 | .PP | |
1197 | .Vb 1 | |
1198 | \& Declared as Called as | |
1199 | .Ve | |
1200 | .PP | |
1201 | .Vb 14 | |
1202 | \& sub mylink ($$) mylink $old, $new | |
1203 | \& sub myvec ($$$) myvec $var, $offset, 1 | |
1204 | \& sub myindex ($$;$) myindex &getstring, "substr" | |
1205 | \& sub mysyswrite ($$$;$) mysyswrite $buf, 0, length($buf) - $off, $off | |
1206 | \& sub myreverse (@) myreverse $a, $b, $c | |
1207 | \& sub myjoin ($@) myjoin ":", $a, $b, $c | |
1208 | \& sub mypop (\e@) mypop @array | |
1209 | \& sub mysplice (\e@$$@) mysplice @array, @array, 0, @pushme | |
1210 | \& sub mykeys (\e%) mykeys %{$hashref} | |
1211 | \& sub myopen (*;$) myopen HANDLE, $name | |
1212 | \& sub mypipe (**) mypipe READHANDLE, WRITEHANDLE | |
1213 | \& sub mygrep (&@) mygrep { /foo/ } $a, $b, $c | |
1214 | \& sub myrand ($) myrand 42 | |
1215 | \& sub mytime () mytime | |
1216 | .Ve | |
1217 | .PP | |
1218 | Any backslashed prototype character represents an actual argument | |
1219 | that absolutely must start with that character. The value passed | |
1220 | as part of \f(CW@_\fR will be a reference to the actual argument given | |
1221 | in the subroutine call, obtained by applying \f(CW\*(C`\e\*(C'\fR to that argument. | |
1222 | .PP | |
1223 | You can also backslash several argument types simultaneously by using | |
1224 | the \f(CW\*(C`\e[]\*(C'\fR notation: | |
1225 | .PP | |
1226 | .Vb 1 | |
1227 | \& sub myref (\e[$@%&*]) | |
1228 | .Ve | |
1229 | .PP | |
1230 | will allow calling \fImyref()\fR as | |
1231 | .PP | |
1232 | .Vb 5 | |
1233 | \& myref $var | |
1234 | \& myref @array | |
1235 | \& myref %hash | |
1236 | \& myref &sub | |
1237 | \& myref *glob | |
1238 | .Ve | |
1239 | .PP | |
1240 | and the first argument of \fImyref()\fR will be a reference to | |
1241 | a scalar, an array, a hash, a code, or a glob. | |
1242 | .PP | |
1243 | Unbackslashed prototype characters have special meanings. Any | |
1244 | unbackslashed \f(CW\*(C`@\*(C'\fR or \f(CW\*(C`%\*(C'\fR eats all remaining arguments, and forces | |
1245 | list context. An argument represented by \f(CW\*(C`$\*(C'\fR forces scalar context. An | |
1246 | \&\f(CW\*(C`&\*(C'\fR requires an anonymous subroutine, which, if passed as the first | |
1247 | argument, does not require the \f(CW\*(C`sub\*(C'\fR keyword or a subsequent comma. | |
1248 | .PP | |
1249 | A \f(CW\*(C`*\*(C'\fR allows the subroutine to accept a bareword, constant, scalar expression, | |
1250 | typeglob, or a reference to a typeglob in that slot. The value will be | |
1251 | available to the subroutine either as a simple scalar, or (in the latter | |
1252 | two cases) as a reference to the typeglob. If you wish to always convert | |
1253 | such arguments to a typeglob reference, use \fISymbol::qualify_to_ref()\fR as | |
1254 | follows: | |
1255 | .PP | |
1256 | .Vb 1 | |
1257 | \& use Symbol 'qualify_to_ref'; | |
1258 | .Ve | |
1259 | .PP | |
1260 | .Vb 4 | |
1261 | \& sub foo (*) { | |
1262 | \& my $fh = qualify_to_ref(shift, caller); | |
1263 | \& ... | |
1264 | \& } | |
1265 | .Ve | |
1266 | .PP | |
1267 | A semicolon separates mandatory arguments from optional arguments. | |
1268 | It is redundant before \f(CW\*(C`@\*(C'\fR or \f(CW\*(C`%\*(C'\fR, which gobble up everything else. | |
1269 | .PP | |
1270 | Note how the last three examples in the table above are treated | |
1271 | specially by the parser. \f(CW\*(C`mygrep()\*(C'\fR is parsed as a true list | |
1272 | operator, \f(CW\*(C`myrand()\*(C'\fR is parsed as a true unary operator with unary | |
1273 | precedence the same as \f(CW\*(C`rand()\*(C'\fR, and \f(CW\*(C`mytime()\*(C'\fR is truly without | |
1274 | arguments, just like \f(CW\*(C`time()\*(C'\fR. That is, if you say | |
1275 | .PP | |
1276 | .Vb 1 | |
1277 | \& mytime +2; | |
1278 | .Ve | |
1279 | .PP | |
1280 | you'll get \f(CW\*(C`mytime() + 2\*(C'\fR, not \f(CWmytime(2)\fR, which is how it would be parsed | |
1281 | without a prototype. | |
1282 | .PP | |
1283 | The interesting thing about \f(CW\*(C`&\*(C'\fR is that you can generate new syntax with it, | |
1284 | provided it's in the initial position: | |
1285 | .PP | |
1286 | .Vb 9 | |
1287 | \& sub try (&@) { | |
1288 | \& my($try,$catch) = @_; | |
1289 | \& eval { &$try }; | |
1290 | \& if ($@) { | |
1291 | \& local $_ = $@; | |
1292 | \& &$catch; | |
1293 | \& } | |
1294 | \& } | |
1295 | \& sub catch (&) { $_[0] } | |
1296 | .Ve | |
1297 | .PP | |
1298 | .Vb 5 | |
1299 | \& try { | |
1300 | \& die "phooey"; | |
1301 | \& } catch { | |
1302 | \& /phooey/ and print "unphooey\en"; | |
1303 | \& }; | |
1304 | .Ve | |
1305 | .PP | |
1306 | That prints \f(CW"unphooey"\fR. (Yes, there are still unresolved | |
1307 | issues having to do with visibility of \f(CW@_\fR. I'm ignoring that | |
1308 | question for the moment. (But note that if we make \f(CW@_\fR lexically | |
1309 | scoped, those anonymous subroutines can act like closures... (Gee, | |
1310 | is this sounding a little Lispish? (Never mind.)))) | |
1311 | .PP | |
1312 | And here's a reimplementation of the Perl \f(CW\*(C`grep\*(C'\fR operator: | |
1313 | .PP | |
1314 | .Vb 8 | |
1315 | \& sub mygrep (&@) { | |
1316 | \& my $code = shift; | |
1317 | \& my @result; | |
1318 | \& foreach $_ (@_) { | |
1319 | \& push(@result, $_) if &$code; | |
1320 | \& } | |
1321 | \& @result; | |
1322 | \& } | |
1323 | .Ve | |
1324 | .PP | |
1325 | Some folks would prefer full alphanumeric prototypes. Alphanumerics have | |
1326 | been intentionally left out of prototypes for the express purpose of | |
1327 | someday in the future adding named, formal parameters. The current | |
1328 | mechanism's main goal is to let module writers provide better diagnostics | |
1329 | for module users. Larry feels the notation quite understandable to Perl | |
1330 | programmers, and that it will not intrude greatly upon the meat of the | |
1331 | module, nor make it harder to read. The line noise is visually | |
1332 | encapsulated into a small pill that's easy to swallow. | |
1333 | .PP | |
1334 | If you try to use an alphanumeric sequence in a prototype you will | |
1335 | generate an optional warning \- \*(L"Illegal character in prototype...\*(R". | |
1336 | Unfortunately earlier versions of Perl allowed the prototype to be | |
1337 | used as long as its prefix was a valid prototype. The warning may be | |
1338 | upgraded to a fatal error in a future version of Perl once the | |
1339 | majority of offending code is fixed. | |
1340 | .PP | |
1341 | It's probably best to prototype new functions, not retrofit prototyping | |
1342 | into older ones. That's because you must be especially careful about | |
1343 | silent impositions of differing list versus scalar contexts. For example, | |
1344 | if you decide that a function should take just one parameter, like this: | |
1345 | .PP | |
1346 | .Vb 4 | |
1347 | \& sub func ($) { | |
1348 | \& my $n = shift; | |
1349 | \& print "you gave me $n\en"; | |
1350 | \& } | |
1351 | .Ve | |
1352 | .PP | |
1353 | and someone has been calling it with an array or expression | |
1354 | returning a list: | |
1355 | .PP | |
1356 | .Vb 2 | |
1357 | \& func(@foo); | |
1358 | \& func( split /:/ ); | |
1359 | .Ve | |
1360 | .PP | |
1361 | Then you've just supplied an automatic \f(CW\*(C`scalar\*(C'\fR in front of their | |
1362 | argument, which can be more than a bit surprising. The old \f(CW@foo\fR | |
1363 | which used to hold one thing doesn't get passed in. Instead, | |
1364 | \&\f(CW\*(C`func()\*(C'\fR now gets passed in a \f(CW1\fR; that is, the number of elements | |
1365 | in \f(CW@foo\fR. And the \f(CW\*(C`split\*(C'\fR gets called in scalar context so it | |
1366 | starts scribbling on your \f(CW@_\fR parameter list. Ouch! | |
1367 | .PP | |
1368 | This is all very powerful, of course, and should be used only in moderation | |
1369 | to make the world a better place. | |
1370 | .Sh "Constant Functions" | |
1371 | .IX Subsection "Constant Functions" | |
1372 | Functions with a prototype of \f(CW\*(C`()\*(C'\fR are potential candidates for | |
1373 | inlining. If the result after optimization and constant folding | |
1374 | is either a constant or a lexically-scoped scalar which has no other | |
1375 | references, then it will be used in place of function calls made | |
1376 | without \f(CW\*(C`&\*(C'\fR. Calls made using \f(CW\*(C`&\*(C'\fR are never inlined. (See | |
1377 | \&\fIconstant.pm\fR for an easy way to declare most constants.) | |
1378 | .PP | |
1379 | The following functions would all be inlined: | |
1380 | .PP | |
1381 | .Vb 5 | |
1382 | \& sub pi () { 3.14159 } # Not exact, but close. | |
1383 | \& sub PI () { 4 * atan2 1, 1 } # As good as it gets, | |
1384 | \& # and it's inlined, too! | |
1385 | \& sub ST_DEV () { 0 } | |
1386 | \& sub ST_INO () { 1 } | |
1387 | .Ve | |
1388 | .PP | |
1389 | .Vb 3 | |
1390 | \& sub FLAG_FOO () { 1 << 8 } | |
1391 | \& sub FLAG_BAR () { 1 << 9 } | |
1392 | \& sub FLAG_MASK () { FLAG_FOO | FLAG_BAR } | |
1393 | .Ve | |
1394 | .PP | |
1395 | .Vb 9 | |
1396 | \& sub OPT_BAZ () { not (0x1B58 & FLAG_MASK) } | |
1397 | \& sub BAZ_VAL () { | |
1398 | \& if (OPT_BAZ) { | |
1399 | \& return 23; | |
1400 | \& } | |
1401 | \& else { | |
1402 | \& return 42; | |
1403 | \& } | |
1404 | \& } | |
1405 | .Ve | |
1406 | .PP | |
1407 | .Vb 6 | |
1408 | \& sub N () { int(BAZ_VAL) / 3 } | |
1409 | \& BEGIN { | |
1410 | \& my $prod = 1; | |
1411 | \& for (1..N) { $prod *= $_ } | |
1412 | \& sub N_FACTORIAL () { $prod } | |
1413 | \& } | |
1414 | .Ve | |
1415 | .PP | |
1416 | If you redefine a subroutine that was eligible for inlining, you'll get | |
1417 | a mandatory warning. (You can use this warning to tell whether or not a | |
1418 | particular subroutine is considered constant.) The warning is | |
1419 | considered severe enough not to be optional because previously compiled | |
1420 | invocations of the function will still be using the old value of the | |
1421 | function. If you need to be able to redefine the subroutine, you need to | |
1422 | ensure that it isn't inlined, either by dropping the \f(CW\*(C`()\*(C'\fR prototype | |
1423 | (which changes calling semantics, so beware) or by thwarting the | |
1424 | inlining mechanism in some other way, such as | |
1425 | .PP | |
1426 | .Vb 3 | |
1427 | \& sub not_inlined () { | |
1428 | \& 23 if $]; | |
1429 | \& } | |
1430 | .Ve | |
1431 | .Sh "Overriding Built-in Functions" | |
1432 | .IX Subsection "Overriding Built-in Functions" | |
1433 | Many built-in functions may be overridden, though this should be tried | |
1434 | only occasionally and for good reason. Typically this might be | |
1435 | done by a package attempting to emulate missing built-in functionality | |
1436 | on a non-Unix system. | |
1437 | .PP | |
1438 | Overriding may be done only by importing the name from a | |
1439 | module\*(--ordinary predeclaration isn't good enough. However, the | |
1440 | \&\f(CW\*(C`use subs\*(C'\fR pragma lets you, in effect, predeclare subs | |
1441 | via the import syntax, and these names may then override built-in ones: | |
1442 | .PP | |
1443 | .Vb 3 | |
1444 | \& use subs 'chdir', 'chroot', 'chmod', 'chown'; | |
1445 | \& chdir $somewhere; | |
1446 | \& sub chdir { ... } | |
1447 | .Ve | |
1448 | .PP | |
1449 | To unambiguously refer to the built-in form, precede the | |
1450 | built-in name with the special package qualifier \f(CW\*(C`CORE::\*(C'\fR. For example, | |
1451 | saying \f(CW\*(C`CORE::open()\*(C'\fR always refers to the built-in \f(CW\*(C`open()\*(C'\fR, even | |
1452 | if the current package has imported some other subroutine called | |
1453 | \&\f(CW\*(C`&open()\*(C'\fR from elsewhere. Even though it looks like a regular | |
1454 | function call, it isn't: you can't take a reference to it, such as | |
1455 | the incorrect \f(CW\*(C`\e&CORE::open\*(C'\fR might appear to produce. | |
1456 | .PP | |
1457 | Library modules should not in general export built-in names like \f(CW\*(C`open\*(C'\fR | |
1458 | or \f(CW\*(C`chdir\*(C'\fR as part of their default \f(CW@EXPORT\fR list, because these may | |
1459 | sneak into someone else's namespace and change the semantics unexpectedly. | |
1460 | Instead, if the module adds that name to \f(CW@EXPORT_OK\fR, then it's | |
1461 | possible for a user to import the name explicitly, but not implicitly. | |
1462 | That is, they could say | |
1463 | .PP | |
1464 | .Vb 1 | |
1465 | \& use Module 'open'; | |
1466 | .Ve | |
1467 | .PP | |
1468 | and it would import the \f(CW\*(C`open\*(C'\fR override. But if they said | |
1469 | .PP | |
1470 | .Vb 1 | |
1471 | \& use Module; | |
1472 | .Ve | |
1473 | .PP | |
1474 | they would get the default imports without overrides. | |
1475 | .PP | |
1476 | The foregoing mechanism for overriding built-in is restricted, quite | |
1477 | deliberately, to the package that requests the import. There is a second | |
1478 | method that is sometimes applicable when you wish to override a built-in | |
1479 | everywhere, without regard to namespace boundaries. This is achieved by | |
1480 | importing a sub into the special namespace \f(CW\*(C`CORE::GLOBAL::\*(C'\fR. Here is an | |
1481 | example that quite brazenly replaces the \f(CW\*(C`glob\*(C'\fR operator with something | |
1482 | that understands regular expressions. | |
1483 | .PP | |
1484 | .Vb 4 | |
1485 | \& package REGlob; | |
1486 | \& require Exporter; | |
1487 | \& @ISA = 'Exporter'; | |
1488 | \& @EXPORT_OK = 'glob'; | |
1489 | .Ve | |
1490 | .PP | |
1491 | .Vb 7 | |
1492 | \& sub import { | |
1493 | \& my $pkg = shift; | |
1494 | \& return unless @_; | |
1495 | \& my $sym = shift; | |
1496 | \& my $where = ($sym =~ s/^GLOBAL_// ? 'CORE::GLOBAL' : caller(0)); | |
1497 | \& $pkg->export($where, $sym, @_); | |
1498 | \& } | |
1499 | .Ve | |
1500 | .PP | |
1501 | .Vb 11 | |
1502 | \& sub glob { | |
1503 | \& my $pat = shift; | |
1504 | \& my @got; | |
1505 | \& local *D; | |
1506 | \& if (opendir D, '.') { | |
1507 | \& @got = grep /$pat/, readdir D; | |
1508 | \& closedir D; | |
1509 | \& } | |
1510 | \& return @got; | |
1511 | \& } | |
1512 | \& 1; | |
1513 | .Ve | |
1514 | .PP | |
1515 | And here's how it could be (ab)used: | |
1516 | .PP | |
1517 | .Vb 4 | |
1518 | \& #use REGlob 'GLOBAL_glob'; # override glob() in ALL namespaces | |
1519 | \& package Foo; | |
1520 | \& use REGlob 'glob'; # override glob() in Foo:: only | |
1521 | \& print for <^[a-z_]+\e.pm\e$>; # show all pragmatic modules | |
1522 | .Ve | |
1523 | .PP | |
1524 | The initial comment shows a contrived, even dangerous example. | |
1525 | By overriding \f(CW\*(C`glob\*(C'\fR globally, you would be forcing the new (and | |
1526 | subversive) behavior for the \f(CW\*(C`glob\*(C'\fR operator for \fIevery\fR namespace, | |
1527 | without the complete cognizance or cooperation of the modules that own | |
1528 | those namespaces. Naturally, this should be done with extreme caution\*(--if | |
1529 | it must be done at all. | |
1530 | .PP | |
1531 | The \f(CW\*(C`REGlob\*(C'\fR example above does not implement all the support needed to | |
1532 | cleanly override perl's \f(CW\*(C`glob\*(C'\fR operator. The built-in \f(CW\*(C`glob\*(C'\fR has | |
1533 | different behaviors depending on whether it appears in a scalar or list | |
1534 | context, but our \f(CW\*(C`REGlob\*(C'\fR doesn't. Indeed, many perl built-in have such | |
1535 | context sensitive behaviors, and these must be adequately supported by | |
1536 | a properly written override. For a fully functional example of overriding | |
1537 | \&\f(CW\*(C`glob\*(C'\fR, study the implementation of \f(CW\*(C`File::DosGlob\*(C'\fR in the standard | |
1538 | library. | |
1539 | .PP | |
1540 | When you override a built\-in, your replacement should be consistent (if | |
1541 | possible) with the built-in native syntax. You can achieve this by using | |
1542 | a suitable prototype. To get the prototype of an overridable built\-in, | |
1543 | use the \f(CW\*(C`prototype\*(C'\fR function with an argument of \f(CW"CORE::builtin_name"\fR | |
1544 | (see \*(L"prototype\*(R" in perlfunc). | |
1545 | .PP | |
1546 | Note however that some built-ins can't have their syntax expressed by a | |
1547 | prototype (such as \f(CW\*(C`system\*(C'\fR or \f(CW\*(C`chomp\*(C'\fR). If you override them you won't | |
1548 | be able to fully mimic their original syntax. | |
1549 | .PP | |
1550 | The built-ins \f(CW\*(C`do\*(C'\fR, \f(CW\*(C`require\*(C'\fR and \f(CW\*(C`glob\*(C'\fR can also be overridden, but due | |
1551 | to special magic, their original syntax is preserved, and you don't have | |
1552 | to define a prototype for their replacements. (You can't override the | |
1553 | \&\f(CW\*(C`do BLOCK\*(C'\fR syntax, though). | |
1554 | .PP | |
1555 | \&\f(CW\*(C`require\*(C'\fR has special additional dark magic: if you invoke your | |
1556 | \&\f(CW\*(C`require\*(C'\fR replacement as \f(CW\*(C`require Foo::Bar\*(C'\fR, it will actually receive | |
1557 | the argument \f(CW"Foo/Bar.pm"\fR in \f(CW@_\fR. See \*(L"require\*(R" in perlfunc. | |
1558 | .PP | |
1559 | And, as you'll have noticed from the previous example, if you override | |
1560 | \&\f(CW\*(C`glob\*(C'\fR, the \f(CW\*(C`<*>\*(C'\fR glob operator is overridden as well. | |
1561 | .PP | |
1562 | In a similar fashion, overriding the \f(CW\*(C`readline\*(C'\fR function also overrides | |
1563 | the equivalent I/O operator \f(CW\*(C`<FILEHANDLE>\*(C'\fR. | |
1564 | .PP | |
1565 | Finally, some built-ins (e.g. \f(CW\*(C`exists\*(C'\fR or \f(CW\*(C`grep\*(C'\fR) can't be overridden. | |
1566 | .Sh "Autoloading" | |
1567 | .IX Subsection "Autoloading" | |
1568 | If you call a subroutine that is undefined, you would ordinarily | |
1569 | get an immediate, fatal error complaining that the subroutine doesn't | |
1570 | exist. (Likewise for subroutines being used as methods, when the | |
1571 | method doesn't exist in any base class of the class's package.) | |
1572 | However, if an \f(CW\*(C`AUTOLOAD\*(C'\fR subroutine is defined in the package or | |
1573 | packages used to locate the original subroutine, then that | |
1574 | \&\f(CW\*(C`AUTOLOAD\*(C'\fR subroutine is called with the arguments that would have | |
1575 | been passed to the original subroutine. The fully qualified name | |
1576 | of the original subroutine magically appears in the global \f(CW$AUTOLOAD\fR | |
1577 | variable of the same package as the \f(CW\*(C`AUTOLOAD\*(C'\fR routine. The name | |
1578 | is not passed as an ordinary argument because, er, well, just | |
1579 | because, that's why... | |
1580 | .PP | |
1581 | Many \f(CW\*(C`AUTOLOAD\*(C'\fR routines load in a definition for the requested | |
1582 | subroutine using \fIeval()\fR, then execute that subroutine using a special | |
1583 | form of \fIgoto()\fR that erases the stack frame of the \f(CW\*(C`AUTOLOAD\*(C'\fR routine | |
1584 | without a trace. (See the source to the standard module documented | |
1585 | in AutoLoader, for example.) But an \f(CW\*(C`AUTOLOAD\*(C'\fR routine can | |
1586 | also just emulate the routine and never define it. For example, | |
1587 | let's pretend that a function that wasn't defined should just invoke | |
1588 | \&\f(CW\*(C`system\*(C'\fR with those arguments. All you'd do is: | |
1589 | .PP | |
1590 | .Vb 8 | |
1591 | \& sub AUTOLOAD { | |
1592 | \& my $program = $AUTOLOAD; | |
1593 | \& $program =~ s/.*:://; | |
1594 | \& system($program, @_); | |
1595 | \& } | |
1596 | \& date(); | |
1597 | \& who('am', 'i'); | |
1598 | \& ls('-l'); | |
1599 | .Ve | |
1600 | .PP | |
1601 | In fact, if you predeclare functions you want to call that way, you don't | |
1602 | even need parentheses: | |
1603 | .PP | |
1604 | .Vb 4 | |
1605 | \& use subs qw(date who ls); | |
1606 | \& date; | |
1607 | \& who "am", "i"; | |
1608 | \& ls -l; | |
1609 | .Ve | |
1610 | .PP | |
1611 | A more complete example of this is the standard Shell module, which | |
1612 | can treat undefined subroutine calls as calls to external programs. | |
1613 | .PP | |
1614 | Mechanisms are available to help modules writers split their modules | |
1615 | into autoloadable files. See the standard AutoLoader module | |
1616 | described in AutoLoader and in AutoSplit, the standard | |
1617 | SelfLoader modules in SelfLoader, and the document on adding C | |
1618 | functions to Perl code in perlxs. | |
1619 | .Sh "Subroutine Attributes" | |
1620 | .IX Subsection "Subroutine Attributes" | |
1621 | A subroutine declaration or definition may have a list of attributes | |
1622 | associated with it. If such an attribute list is present, it is | |
1623 | broken up at space or colon boundaries and treated as though a | |
1624 | \&\f(CW\*(C`use attributes\*(C'\fR had been seen. See attributes for details | |
1625 | about what attributes are currently supported. | |
1626 | Unlike the limitation with the obsolescent \f(CW\*(C`use attrs\*(C'\fR, the | |
1627 | \&\f(CW\*(C`sub : ATTRLIST\*(C'\fR syntax works to associate the attributes with | |
1628 | a pre\-declaration, and not just with a subroutine definition. | |
1629 | .PP | |
1630 | The attributes must be valid as simple identifier names (without any | |
1631 | punctuation other than the '_' character). They may have a parameter | |
1632 | list appended, which is only checked for whether its parentheses ('(',')') | |
1633 | nest properly. | |
1634 | .PP | |
1635 | Examples of valid syntax (even though the attributes are unknown): | |
1636 | .PP | |
1637 | .Vb 3 | |
1638 | \& sub fnord (&\e%) : switch(10,foo(7,3)) : expensive ; | |
1639 | \& sub plugh () : Ugly('\e(") :Bad ; | |
1640 | \& sub xyzzy : _5x5 { ... } | |
1641 | .Ve | |
1642 | .PP | |
1643 | Examples of invalid syntax: | |
1644 | .PP | |
1645 | .Vb 5 | |
1646 | \& sub fnord : switch(10,foo() ; # ()-string not balanced | |
1647 | \& sub snoid : Ugly('(') ; # ()-string not balanced | |
1648 | \& sub xyzzy : 5x5 ; # "5x5" not a valid identifier | |
1649 | \& sub plugh : Y2::north ; # "Y2::north" not a simple identifier | |
1650 | \& sub snurt : foo + bar ; # "+" not a colon or space | |
1651 | .Ve | |
1652 | .PP | |
1653 | The attribute list is passed as a list of constant strings to the code | |
1654 | which associates them with the subroutine. In particular, the second example | |
1655 | of valid syntax above currently looks like this in terms of how it's | |
1656 | parsed and invoked: | |
1657 | .PP | |
1658 | .Vb 1 | |
1659 | \& use attributes __PACKAGE__, \e&plugh, q[Ugly('\e(")], 'Bad'; | |
1660 | .Ve | |
1661 | .PP | |
1662 | For further details on attribute lists and their manipulation, | |
1663 | see attributes and Attribute::Handlers. | |
1664 | .SH "SEE ALSO" | |
1665 | .IX Header "SEE ALSO" | |
1666 | See \*(L"Function Templates\*(R" in perlref for more about references and closures. | |
1667 | See perlxs if you'd like to learn about calling C subroutines from Perl. | |
1668 | See perlembed if you'd like to learn about calling Perl subroutines from C. | |
1669 | See perlmod to learn about bundling up your functions in separate files. | |
1670 | See perlmodlib to learn what library modules come standard on your system. | |
1671 | See perltoot to learn how to make object method calls. |