Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLREFTUT 1" | |
132 | .TH PERLREFTUT 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlreftut \- Mark's very short tutorial about references | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | One of the most important new features in Perl 5 was the capability to | |
138 | manage complicated data structures like multidimensional arrays and | |
139 | nested hashes. To enable these, Perl 5 introduced a feature called | |
140 | `references', and using references is the key to managing complicated, | |
141 | structured data in Perl. Unfortunately, there's a lot of funny syntax | |
142 | to learn, and the main manual page can be hard to follow. The manual | |
143 | is quite complete, and sometimes people find that a problem, because | |
144 | it can be hard to tell what is important and what isn't. | |
145 | .PP | |
146 | Fortunately, you only need to know 10% of what's in the main page to get | |
147 | 90% of the benefit. This page will show you that 10%. | |
148 | .SH "Who Needs Complicated Data Structures?" | |
149 | .IX Header "Who Needs Complicated Data Structures?" | |
150 | One problem that came up all the time in Perl 4 was how to represent a | |
151 | hash whose values were lists. Perl 4 had hashes, of course, but the | |
152 | values had to be scalars; they couldn't be lists. | |
153 | .PP | |
154 | Why would you want a hash of lists? Let's take a simple example: You | |
155 | have a file of city and country names, like this: | |
156 | .PP | |
157 | .Vb 6 | |
158 | \& Chicago, USA | |
159 | \& Frankfurt, Germany | |
160 | \& Berlin, Germany | |
161 | \& Washington, USA | |
162 | \& Helsinki, Finland | |
163 | \& New York, USA | |
164 | .Ve | |
165 | .PP | |
166 | and you want to produce an output like this, with each country mentioned | |
167 | once, and then an alphabetical list of the cities in that country: | |
168 | .PP | |
169 | .Vb 3 | |
170 | \& Finland: Helsinki. | |
171 | \& Germany: Berlin, Frankfurt. | |
172 | \& USA: Chicago, New York, Washington. | |
173 | .Ve | |
174 | .PP | |
175 | The natural way to do this is to have a hash whose keys are country | |
176 | names. Associated with each country name key is a list of the cities in | |
177 | that country. Each time you read a line of input, split it into a country | |
178 | and a city, look up the list of cities already known to be in that | |
179 | country, and append the new city to the list. When you're done reading | |
180 | the input, iterate over the hash as usual, sorting each list of cities | |
181 | before you print it out. | |
182 | .PP | |
183 | If hash values can't be lists, you lose. In Perl 4, hash values can't | |
184 | be lists; they can only be strings. You lose. You'd probably have to | |
185 | combine all the cities into a single string somehow, and then when | |
186 | time came to write the output, you'd have to break the string into a | |
187 | list, sort the list, and turn it back into a string. This is messy | |
188 | and error\-prone. And it's frustrating, because Perl already has | |
189 | perfectly good lists that would solve the problem if only you could | |
190 | use them. | |
191 | .SH "The Solution" | |
192 | .IX Header "The Solution" | |
193 | By the time Perl 5 rolled around, we were already stuck with this | |
194 | design: Hash values must be scalars. The solution to this is | |
195 | references. | |
196 | .PP | |
197 | A reference is a scalar value that \fIrefers to\fR an entire array or an | |
198 | entire hash (or to just about anything else). Names are one kind of | |
199 | reference that you're already familiar with. Think of the President | |
200 | of the United States: a messy, inconvenient bag of blood and bones. | |
201 | But to talk about him, or to represent him in a computer program, all | |
202 | you need is the easy, convenient scalar string \*(L"George Bush\*(R". | |
203 | .PP | |
204 | References in Perl are like names for arrays and hashes. They're | |
205 | Perl's private, internal names, so you can be sure they're | |
206 | unambiguous. Unlike \*(L"George Bush\*(R", a reference only refers to one | |
207 | thing, and you always know what it refers to. If you have a reference | |
208 | to an array, you can recover the entire array from it. If you have a | |
209 | reference to a hash, you can recover the entire hash. But the | |
210 | reference is still an easy, compact scalar value. | |
211 | .PP | |
212 | You can't have a hash whose values are arrays; hash values can only be | |
213 | scalars. We're stuck with that. But a single reference can refer to | |
214 | an entire array, and references are scalars, so you can have a hash of | |
215 | references to arrays, and it'll act a lot like a hash of arrays, and | |
216 | it'll be just as useful as a hash of arrays. | |
217 | .PP | |
218 | We'll come back to this city-country problem later, after we've seen | |
219 | some syntax for managing references. | |
220 | .SH "Syntax" | |
221 | .IX Header "Syntax" | |
222 | There are just two ways to make a reference, and just two ways to use | |
223 | it once you have it. | |
224 | .Sh "Making References" | |
225 | .IX Subsection "Making References" | |
226 | \fI\f(BIMake Rule 1\fI\fR | |
227 | .IX Subsection "Make Rule 1" | |
228 | .PP | |
229 | If you put a \f(CW\*(C`\e\*(C'\fR in front of a variable, you get a | |
230 | reference to that variable. | |
231 | .PP | |
232 | .Vb 3 | |
233 | \& $aref = \e@array; # $aref now holds a reference to @array | |
234 | \& $href = \e%hash; # $href now holds a reference to %hash | |
235 | \& $sref = \e$scalar; # $sref now holds a reference to $scalar | |
236 | .Ve | |
237 | .PP | |
238 | Once the reference is stored in a variable like \f(CW$aref\fR or \f(CW$href\fR, you | |
239 | can copy it or store it just the same as any other scalar value: | |
240 | .PP | |
241 | .Vb 3 | |
242 | \& $xy = $aref; # $xy now holds a reference to @array | |
243 | \& $p[3] = $href; # $p[3] now holds a reference to %hash | |
244 | \& $z = $p[3]; # $z now holds a reference to %hash | |
245 | .Ve | |
246 | .PP | |
247 | These examples show how to make references to variables with names. | |
248 | Sometimes you want to make an array or a hash that doesn't have a | |
249 | name. This is analogous to the way you like to be able to use the | |
250 | string \f(CW"\en"\fR or the number 80 without having to store it in a named | |
251 | variable first. | |
252 | .PP | |
253 | \&\fBMake Rule 2\fR | |
254 | .PP | |
255 | \&\f(CW\*(C`[ ITEMS ]\*(C'\fR makes a new, anonymous array, and returns a reference to | |
256 | that array. \f(CW\*(C`{ ITEMS }\*(C'\fR makes a new, anonymous hash, and returns a | |
257 | reference to that hash. | |
258 | .PP | |
259 | .Vb 2 | |
260 | \& $aref = [ 1, "foo", undef, 13 ]; | |
261 | \& # $aref now holds a reference to an array | |
262 | .Ve | |
263 | .PP | |
264 | .Vb 2 | |
265 | \& $href = { APR => 4, AUG => 8 }; | |
266 | \& # $href now holds a reference to a hash | |
267 | .Ve | |
268 | .PP | |
269 | The references you get from rule 2 are the same kind of | |
270 | references that you get from rule 1: | |
271 | .PP | |
272 | .Vb 2 | |
273 | \& # This: | |
274 | \& $aref = [ 1, 2, 3 ]; | |
275 | .Ve | |
276 | .PP | |
277 | .Vb 3 | |
278 | \& # Does the same as this: | |
279 | \& @array = (1, 2, 3); | |
280 | \& $aref = \e@array; | |
281 | .Ve | |
282 | .PP | |
283 | The first line is an abbreviation for the following two lines, except | |
284 | that it doesn't create the superfluous array variable \f(CW@array\fR. | |
285 | .PP | |
286 | If you write just \f(CW\*(C`[]\*(C'\fR, you get a new, empty anonymous array. | |
287 | If you write just \f(CW\*(C`{}\*(C'\fR, you get a new, empty anonymous hash. | |
288 | .Sh "Using References" | |
289 | .IX Subsection "Using References" | |
290 | What can you do with a reference once you have it? It's a scalar | |
291 | value, and we've seen that you can store it as a scalar and get it back | |
292 | again just like any scalar. There are just two more ways to use it: | |
293 | .PP | |
294 | \fI\f(BIUse Rule 1\fI\fR | |
295 | .IX Subsection "Use Rule 1" | |
296 | .PP | |
297 | You can always use an array reference, in curly braces, in place of | |
298 | the name of an array. For example, \f(CW\*(C`@{$aref}\*(C'\fR instead of \f(CW@array\fR. | |
299 | .PP | |
300 | Here are some examples of that: | |
301 | .PP | |
302 | Arrays: | |
303 | .PP | |
304 | .Vb 4 | |
305 | \& @a @{$aref} An array | |
306 | \& reverse @a reverse @{$aref} Reverse the array | |
307 | \& $a[3] ${$aref}[3] An element of the array | |
308 | \& $a[3] = 17; ${$aref}[3] = 17 Assigning an element | |
309 | .Ve | |
310 | .PP | |
311 | On each line are two expressions that do the same thing. The | |
312 | left-hand versions operate on the array \f(CW@a\fR. The right-hand | |
313 | versions operate on the array that is referred to by \f(CW$aref\fR. Once | |
314 | they find the array they're operating on, both versions do the same | |
315 | things to the arrays. | |
316 | .PP | |
317 | Using a hash reference is \fIexactly\fR the same: | |
318 | .PP | |
319 | .Vb 4 | |
320 | \& %h %{$href} A hash | |
321 | \& keys %h keys %{$href} Get the keys from the hash | |
322 | \& $h{'red'} ${$href}{'red'} An element of the hash | |
323 | \& $h{'red'} = 17 ${$href}{'red'} = 17 Assigning an element | |
324 | .Ve | |
325 | .PP | |
326 | Whatever you want to do with a reference, \fBUse Rule 1\fR tells you how | |
327 | to do it. You just write the Perl code that you would have written | |
328 | for doing the same thing to a regular array or hash, and then replace | |
329 | the array or hash name with \f(CW\*(C`{$reference}\*(C'\fR. \*(L"How do I loop over an | |
330 | array when all I have is a reference?\*(R" Well, to loop over an array, you | |
331 | would write | |
332 | .PP | |
333 | .Vb 3 | |
334 | \& for my $element (@array) { | |
335 | \& ... | |
336 | \& } | |
337 | .Ve | |
338 | .PP | |
339 | so replace the array name, \f(CW@array\fR, with the reference: | |
340 | .PP | |
341 | .Vb 3 | |
342 | \& for my $element (@{$aref}) { | |
343 | \& ... | |
344 | \& } | |
345 | .Ve | |
346 | .PP | |
347 | \&\*(L"How do I print out the contents of a hash when all I have is a | |
348 | reference?\*(R" First write the code for printing out a hash: | |
349 | .PP | |
350 | .Vb 3 | |
351 | \& for my $key (keys %hash) { | |
352 | \& print "$key => $hash{$key}\en"; | |
353 | \& } | |
354 | .Ve | |
355 | .PP | |
356 | And then replace the hash name with the reference: | |
357 | .PP | |
358 | .Vb 3 | |
359 | \& for my $key (keys %{$href}) { | |
360 | \& print "$key => ${$href}{$key}\en"; | |
361 | \& } | |
362 | .Ve | |
363 | .PP | |
364 | \fI\f(BIUse Rule 2\fI\fR | |
365 | .IX Subsection "Use Rule 2" | |
366 | .PP | |
367 | \&\fBUse Rule 1\fR is all you really need, because it tells you how to do | |
368 | absolutely everything you ever need to do with references. But the | |
369 | most common thing to do with an array or a hash is to extract a single | |
370 | element, and the \fBUse Rule 1\fR notation is cumbersome. So there is an | |
371 | abbreviation. | |
372 | .PP | |
373 | \&\f(CW\*(C`${$aref}[3]\*(C'\fR is too hard to read, so you can write \f(CW\*(C`$aref\->[3]\*(C'\fR | |
374 | instead. | |
375 | .PP | |
376 | \&\f(CW\*(C`${$href}{red}\*(C'\fR is too hard to read, so you can write | |
377 | \&\f(CW\*(C`$href\->{red}\*(C'\fR instead. | |
378 | .PP | |
379 | If \f(CW$aref\fR holds a reference to an array, then \f(CW\*(C`$aref\->[3]\*(C'\fR is | |
380 | the fourth element of the array. Don't confuse this with \f(CW$aref[3]\fR, | |
381 | which is the fourth element of a totally different array, one | |
382 | deceptively named \f(CW@aref\fR. \f(CW$aref\fR and \f(CW@aref\fR are unrelated the | |
383 | same way that \f(CW$item\fR and \f(CW@item\fR are. | |
384 | .PP | |
385 | Similarly, \f(CW\*(C`$href\->{'red'}\*(C'\fR is part of the hash referred to by | |
386 | the scalar variable \f(CW$href\fR, perhaps even one with no name. | |
387 | \&\f(CW$href{'red'}\fR is part of the deceptively named \f(CW%href\fR hash. It's | |
388 | easy to forget to leave out the \f(CW\*(C`\->\*(C'\fR, and if you do, you'll get | |
389 | bizarre results when your program gets array and hash elements out of | |
390 | totally unexpected hashes and arrays that weren't the ones you wanted | |
391 | to use. | |
392 | .Sh "An Example" | |
393 | .IX Subsection "An Example" | |
394 | Let's see a quick example of how all this is useful. | |
395 | .PP | |
396 | First, remember that \f(CW\*(C`[1, 2, 3]\*(C'\fR makes an anonymous array containing | |
397 | \&\f(CW\*(C`(1, 2, 3)\*(C'\fR, and gives you a reference to that array. | |
398 | .PP | |
399 | Now think about | |
400 | .PP | |
401 | .Vb 4 | |
402 | \& @a = ( [1, 2, 3], | |
403 | \& [4, 5, 6], | |
404 | \& [7, 8, 9] | |
405 | \& ); | |
406 | .Ve | |
407 | .PP | |
408 | @a is an array with three elements, and each one is a reference to | |
409 | another array. | |
410 | .PP | |
411 | \&\f(CW$a[1]\fR is one of these references. It refers to an array, the array | |
412 | containing \f(CW\*(C`(4, 5, 6)\*(C'\fR, and because it is a reference to an array, | |
413 | \&\fBUse Rule 2\fR says that we can write \f(CW$a[1]\->[2]\fR to get the | |
414 | third element from that array. \f(CW$a[1]\->[2]\fR is the 6. | |
415 | Similarly, \f(CW$a[0]\->[1]\fR is the 2. What we have here is like a | |
416 | two-dimensional array; you can write \f(CW$a[ROW]\->[COLUMN]\fR to get | |
417 | or set the element in any row and any column of the array. | |
418 | .PP | |
419 | The notation still looks a little cumbersome, so there's one more | |
420 | abbreviation: | |
421 | .Sh "Arrow Rule" | |
422 | .IX Subsection "Arrow Rule" | |
423 | In between two \fBsubscripts\fR, the arrow is optional. | |
424 | .PP | |
425 | Instead of \f(CW$a[1]\->[2]\fR, we can write \f(CW$a[1][2]\fR; it means the | |
426 | same thing. Instead of \f(CW\*(C`$a[0]\->[1] = 23\*(C'\fR, we can write | |
427 | \&\f(CW\*(C`$a[0][1] = 23\*(C'\fR; it means the same thing. | |
428 | .PP | |
429 | Now it really looks like two-dimensional arrays! | |
430 | .PP | |
431 | You can see why the arrows are important. Without them, we would have | |
432 | had to write \f(CW\*(C`${$a[1]}[2]\*(C'\fR instead of \f(CW$a[1][2]\fR. For | |
433 | three-dimensional arrays, they let us write \f(CW$x[2][3][5]\fR instead of | |
434 | the unreadable \f(CW\*(C`${${$x[2]}[3]}[5]\*(C'\fR. | |
435 | .SH "Solution" | |
436 | .IX Header "Solution" | |
437 | Here's the answer to the problem I posed earlier, of reformatting a | |
438 | file of city and country names. | |
439 | .PP | |
440 | .Vb 1 | |
441 | \& 1 my %table; | |
442 | .Ve | |
443 | .PP | |
444 | .Vb 6 | |
445 | \& 2 while (<>) { | |
446 | \& 3 chomp; | |
447 | \& 4 my ($city, $country) = split /, /; | |
448 | \& 5 $table{$country} = [] unless exists $table{$country}; | |
449 | \& 6 push @{$table{$country}}, $city; | |
450 | \& 7 } | |
451 | .Ve | |
452 | .PP | |
453 | .Vb 6 | |
454 | \& 8 foreach $country (sort keys %table) { | |
455 | \& 9 print "$country: "; | |
456 | \& 10 my @cities = @{$table{$country}}; | |
457 | \& 11 print join ', ', sort @cities; | |
458 | \& 12 print ".\en"; | |
459 | \& 13 } | |
460 | .Ve | |
461 | .PP | |
462 | The program has two pieces: Lines 2\-\-7 read the input and build a data | |
463 | structure, and lines 8\-13 analyze the data and print out the report. | |
464 | We're going to have a hash, \f(CW%table\fR, whose keys are country names, | |
465 | and whose values are references to arrays of city names. The data | |
466 | structure will look like this: | |
467 | .PP | |
468 | .Vb 14 | |
469 | \& %table | |
470 | \& +-------+---+ | |
471 | \& | | | +-----------+--------+ | |
472 | \& |Germany| *---->| Frankfurt | Berlin | | |
473 | \& | | | +-----------+--------+ | |
474 | \& +-------+---+ | |
475 | \& | | | +----------+ | |
476 | \& |Finland| *---->| Helsinki | | |
477 | \& | | | +----------+ | |
478 | \& +-------+---+ | |
479 | \& | | | +---------+------------+----------+ | |
480 | \& | USA | *---->| Chicago | Washington | New York | | |
481 | \& | | | +---------+------------+----------+ | |
482 | \& +-------+---+ | |
483 | .Ve | |
484 | .PP | |
485 | We'll look at output first. Supposing we already have this structure, | |
486 | how do we print it out? | |
487 | .PP | |
488 | .Vb 6 | |
489 | \& 8 foreach $country (sort keys %table) { | |
490 | \& 9 print "$country: "; | |
491 | \& 10 my @cities = @{$table{$country}}; | |
492 | \& 11 print join ', ', sort @cities; | |
493 | \& 12 print ".\en"; | |
494 | \& 13 } | |
495 | .Ve | |
496 | .PP | |
497 | \&\f(CW%table\fR is an | |
498 | ordinary hash, and we get a list of keys from it, sort the keys, and | |
499 | loop over the keys as usual. The only use of references is in line 10. | |
500 | \&\f(CW$table{$country}\fR looks up the key \f(CW$country\fR in the hash | |
501 | and gets the value, which is a reference to an array of cities in that country. | |
502 | \&\fBUse Rule 1\fR says that | |
503 | we can recover the array by saying | |
504 | \&\f(CW\*(C`@{$table{$country}}\*(C'\fR. Line 10 is just like | |
505 | .PP | |
506 | .Vb 1 | |
507 | \& @cities = @array; | |
508 | .Ve | |
509 | .PP | |
510 | except that the name \f(CW\*(C`array\*(C'\fR has been replaced by the reference | |
511 | \&\f(CW\*(C`{$table{$country}}\*(C'\fR. The \f(CW\*(C`@\*(C'\fR tells Perl to get the entire array. | |
512 | Having gotten the list of cities, we sort it, join it, and print it | |
513 | out as usual. | |
514 | .PP | |
515 | Lines 2\-7 are responsible for building the structure in the first | |
516 | place. Here they are again: | |
517 | .PP | |
518 | .Vb 6 | |
519 | \& 2 while (<>) { | |
520 | \& 3 chomp; | |
521 | \& 4 my ($city, $country) = split /, /; | |
522 | \& 5 $table{$country} = [] unless exists $table{$country}; | |
523 | \& 6 push @{$table{$country}}, $city; | |
524 | \& 7 } | |
525 | .Ve | |
526 | .PP | |
527 | Lines 2\-4 acquire a city and country name. Line 5 looks to see if the | |
528 | country is already present as a key in the hash. If it's not, the | |
529 | program uses the \f(CW\*(C`[]\*(C'\fR notation (\fBMake Rule 2\fR) to manufacture a new, | |
530 | empty anonymous array of cities, and installs a reference to it into | |
531 | the hash under the appropriate key. | |
532 | .PP | |
533 | Line 6 installs the city name into the appropriate array. | |
534 | \&\f(CW$table{$country}\fR now holds a reference to the array of cities seen | |
535 | in that country so far. Line 6 is exactly like | |
536 | .PP | |
537 | .Vb 1 | |
538 | \& push @array, $city; | |
539 | .Ve | |
540 | .PP | |
541 | except that the name \f(CW\*(C`array\*(C'\fR has been replaced by the reference | |
542 | \&\f(CW\*(C`{$table{$country}}\*(C'\fR. The \f(CW\*(C`push\*(C'\fR adds a city name to the end of the | |
543 | referred-to array. | |
544 | .PP | |
545 | There's one fine point I skipped. Line 5 is unnecessary, and we can | |
546 | get rid of it. | |
547 | .PP | |
548 | .Vb 6 | |
549 | \& 2 while (<>) { | |
550 | \& 3 chomp; | |
551 | \& 4 my ($city, $country) = split /, /; | |
552 | \& 5 #### $table{$country} = [] unless exists $table{$country}; | |
553 | \& 6 push @{$table{$country}}, $city; | |
554 | \& 7 } | |
555 | .Ve | |
556 | .PP | |
557 | If there's already an entry in \f(CW%table\fR for the current \f(CW$country\fR, | |
558 | then nothing is different. Line 6 will locate the value in | |
559 | \&\f(CW$table{$country}\fR, which is a reference to an array, and push | |
560 | \&\f(CW$city\fR into the array. But | |
561 | what does it do when | |
562 | \&\f(CW$country\fR holds a key, say \f(CW\*(C`Greece\*(C'\fR, that is not yet in \f(CW%table\fR? | |
563 | .PP | |
564 | This is Perl, so it does the exact right thing. It sees that you want | |
565 | to push \f(CW\*(C`Athens\*(C'\fR onto an array that doesn't exist, so it helpfully | |
566 | makes a new, empty, anonymous array for you, installs it into | |
567 | \&\f(CW%table\fR, and then pushes \f(CW\*(C`Athens\*(C'\fR onto it. This is called | |
568 | `autovivification'\-\-bringing things to life automatically. Perl saw | |
569 | that they key wasn't in the hash, so it created a new hash entry | |
570 | automatically. Perl saw that you wanted to use the hash value as an | |
571 | array, so it created a new empty array and installed a reference to it | |
572 | in the hash automatically. And as usual, Perl made the array one | |
573 | element longer to hold the new city name. | |
574 | .SH "The Rest" | |
575 | .IX Header "The Rest" | |
576 | I promised to give you 90% of the benefit with 10% of the details, and | |
577 | that means I left out 90% of the details. Now that you have an | |
578 | overview of the important parts, it should be easier to read the | |
579 | perlref manual page, which discusses 100% of the details. | |
580 | .PP | |
581 | Some of the highlights of perlref: | |
582 | .IP "\(bu" 4 | |
583 | You can make references to anything, including scalars, functions, and | |
584 | other references. | |
585 | .IP "\(bu" 4 | |
586 | In \fBUse Rule 1\fR, you can omit the curly brackets whenever the thing | |
587 | inside them is an atomic scalar variable like \f(CW$aref\fR. For example, | |
588 | \&\f(CW@$aref\fR is the same as \f(CW\*(C`@{$aref}\*(C'\fR, and \f(CW$$aref[1]\fR is the same as | |
589 | \&\f(CW\*(C`${$aref}[1]\*(C'\fR. If you're just starting out, you may want to adopt | |
590 | the habit of always including the curly brackets. | |
591 | .IP "\(bu" 4 | |
592 | This doesn't copy the underlying array: | |
593 | .Sp | |
594 | .Vb 1 | |
595 | \& $aref2 = $aref1; | |
596 | .Ve | |
597 | .Sp | |
598 | You get two references to the same array. If you modify | |
599 | \&\f(CW\*(C`$aref1\->[23]\*(C'\fR and then look at | |
600 | \&\f(CW\*(C`$aref2\->[23]\*(C'\fR you'll see the change. | |
601 | .Sp | |
602 | To copy the array, use | |
603 | .Sp | |
604 | .Vb 1 | |
605 | \& $aref2 = [@{$aref1}]; | |
606 | .Ve | |
607 | .Sp | |
608 | This uses \f(CW\*(C`[...]\*(C'\fR notation to create a new anonymous array, and | |
609 | \&\f(CW$aref2\fR is assigned a reference to the new array. The new array is | |
610 | initialized with the contents of the array referred to by \f(CW$aref1\fR. | |
611 | .Sp | |
612 | Similarly, to copy an anonymous hash, you can use | |
613 | .Sp | |
614 | .Vb 1 | |
615 | \& $href2 = {%{$href1}}; | |
616 | .Ve | |
617 | .IP "\(bu" 4 | |
618 | To see if a variable contains a reference, use the \f(CW\*(C`ref\*(C'\fR function. It | |
619 | returns true if its argument is a reference. Actually it's a little | |
620 | better than that: It returns \f(CW\*(C`HASH\*(C'\fR for hash references and \f(CW\*(C`ARRAY\*(C'\fR | |
621 | for array references. | |
622 | .IP "\(bu" 4 | |
623 | If you try to use a reference like a string, you get strings like | |
624 | .Sp | |
625 | .Vb 1 | |
626 | \& ARRAY(0x80f5dec) or HASH(0x826afc0) | |
627 | .Ve | |
628 | .Sp | |
629 | If you ever see a string that looks like this, you'll know you | |
630 | printed out a reference by mistake. | |
631 | .Sp | |
632 | A side effect of this representation is that you can use \f(CW\*(C`eq\*(C'\fR to see | |
633 | if two references refer to the same thing. (But you should usually use | |
634 | \&\f(CW\*(C`==\*(C'\fR instead because it's much faster.) | |
635 | .IP "\(bu" 4 | |
636 | You can use a string as if it were a reference. If you use the string | |
637 | \&\f(CW"foo"\fR as an array reference, it's taken to be a reference to the | |
638 | array \f(CW@foo\fR. This is called a \fIsoft reference\fR or \fIsymbolic | |
639 | reference\fR. The declaration \f(CW\*(C`use strict 'refs'\*(C'\fR disables this | |
640 | feature, which can cause all sorts of trouble if you use it by accident. | |
641 | .PP | |
642 | You might prefer to go on to perllol instead of perlref; it | |
643 | discusses lists of lists and multidimensional arrays in detail. After | |
644 | that, you should move on to perldsc; it's a Data Structure Cookbook | |
645 | that shows recipes for using and printing out arrays of hashes, hashes | |
646 | of arrays, and other kinds of data. | |
647 | .SH "Summary" | |
648 | .IX Header "Summary" | |
649 | Everyone needs compound data structures, and in Perl the way you get | |
650 | them is with references. There are four important rules for managing | |
651 | references: Two for making references and two for using them. Once | |
652 | you know these rules you can do most of the important things you need | |
653 | to do with references. | |
654 | .SH "Credits" | |
655 | .IX Header "Credits" | |
656 | Author: Mark Jason Dominus, Plover Systems (\f(CW\*(C`mjd\-perl\-ref+@plover.com\*(C'\fR) | |
657 | .PP | |
658 | This article originally appeared in \fIThe Perl Journal\fR | |
659 | ( http://www.tpj.com/ ) volume 3, #2. Reprinted with permission. | |
660 | .PP | |
661 | The original title was \fIUnderstand References Today\fR. | |
662 | .Sh "Distribution Conditions" | |
663 | .IX Subsection "Distribution Conditions" | |
664 | Copyright 1998 The Perl Journal. | |
665 | .PP | |
666 | This documentation is free; you can redistribute it and/or modify it | |
667 | under the same terms as Perl itself. | |
668 | .PP | |
669 | Irrespective of its distribution, all code examples in these files are | |
670 | hereby placed into the public domain. You are permitted and | |
671 | encouraged to use this code in your own programs for fun or for profit | |
672 | as you see fit. A simple comment in the code giving credit would be | |
673 | courteous but is not required. |