Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLFAQ4 1" | |
132 | .TH PERLFAQ4 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlfaq4 \- Data Manipulation ($Revision: 1.73 $, $Date: 2005/12/31 00:54:37 $) | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | This section of the \s-1FAQ\s0 answers questions related to manipulating | |
138 | numbers, dates, strings, arrays, hashes, and miscellaneous data issues. | |
139 | .SH "Data: Numbers" | |
140 | .IX Header "Data: Numbers" | |
141 | .Sh "Why am I getting long decimals (eg, 19.9499999999999) instead of the numbers I should be getting (eg, 19.95)?" | |
142 | .IX Subsection "Why am I getting long decimals (eg, 19.9499999999999) instead of the numbers I should be getting (eg, 19.95)?" | |
143 | Internally, your computer represents floating-point numbers | |
144 | in binary. Digital (as in powers of two) computers cannot | |
145 | store all numbers exactly. Some real numbers lose precision | |
146 | in the process. This is a problem with how computers store | |
147 | numbers and affects all computer languages, not just Perl. | |
148 | .PP | |
149 | perlnumber show the gory details of number | |
150 | representations and conversions. | |
151 | .PP | |
152 | To limit the number of decimal places in your numbers, you | |
153 | can use the printf or sprintf function. See the | |
154 | \&\*(L"Floating Point Arithmetic\*(R" for more details. | |
155 | .PP | |
156 | .Vb 1 | |
157 | \& printf "%.2f", 10/3; | |
158 | .Ve | |
159 | .PP | |
160 | .Vb 1 | |
161 | \& my $number = sprintf "%.2f", 10/3; | |
162 | .Ve | |
163 | .Sh "Why is \fIint()\fP broken?" | |
164 | .IX Subsection "Why is int() broken?" | |
165 | Your \fIint()\fR is most probably working just fine. It's the numbers that | |
166 | aren't quite what you think. | |
167 | .PP | |
168 | First, see the above item \*(L"Why am I getting long decimals | |
169 | (eg, 19.9499999999999) instead of the numbers I should be getting | |
170 | (eg, 19.95)?\*(R". | |
171 | .PP | |
172 | For example, this | |
173 | .PP | |
174 | .Vb 1 | |
175 | \& print int(0.6/0.2-2), "\en"; | |
176 | .Ve | |
177 | .PP | |
178 | will in most computers print 0, not 1, because even such simple | |
179 | numbers as 0.6 and 0.2 cannot be presented exactly by floating-point | |
180 | numbers. What you think in the above as 'three' is really more like | |
181 | 2.9999999999999995559. | |
182 | .Sh "Why isn't my octal data interpreted correctly?" | |
183 | .IX Subsection "Why isn't my octal data interpreted correctly?" | |
184 | Perl only understands octal and hex numbers as such when they occur as | |
185 | literals in your program. Octal literals in perl must start with a | |
186 | leading \*(L"0\*(R" and hexadecimal literals must start with a leading \*(L"0x\*(R". | |
187 | If they are read in from somewhere and assigned, no automatic | |
188 | conversion takes place. You must explicitly use \fIoct()\fR or \fIhex()\fR if you | |
189 | want the values converted to decimal. \fIoct()\fR interprets hex (\*(L"0x350\*(R"), | |
190 | octal (\*(L"0350\*(R" or even without the leading \*(L"0\*(R", like \*(L"377\*(R") and binary | |
191 | (\*(L"0b1010\*(R") numbers, while \fIhex()\fR only converts hexadecimal ones, with | |
192 | or without a leading \*(L"0x\*(R", like \*(L"0x255\*(R", \*(L"3A\*(R", \*(L"ff\*(R", or \*(L"deadbeef\*(R". | |
193 | The inverse mapping from decimal to octal can be done with either the | |
194 | \&\*(L"%o\*(R" or \*(L"%O\*(R" \fIsprintf()\fR formats. | |
195 | .PP | |
196 | This problem shows up most often when people try using \fIchmod()\fR, \fImkdir()\fR, | |
197 | \&\fIumask()\fR, or \fIsysopen()\fR, which by widespread tradition typically take | |
198 | permissions in octal. | |
199 | .PP | |
200 | .Vb 2 | |
201 | \& chmod(644, $file); # WRONG | |
202 | \& chmod(0644, $file); # right | |
203 | .Ve | |
204 | .PP | |
205 | Note the mistake in the first line was specifying the decimal literal | |
206 | 644, rather than the intended octal literal 0644. The problem can | |
207 | be seen with: | |
208 | .PP | |
209 | .Vb 1 | |
210 | \& printf("%#o",644); # prints 01204 | |
211 | .Ve | |
212 | .PP | |
213 | Surely you had not intended \f(CW\*(C`chmod(01204, $file);\*(C'\fR \- did you? If you | |
214 | want to use numeric literals as arguments to \fIchmod()\fR et al. then please | |
215 | try to express them as octal constants, that is with a leading zero and | |
216 | with the following digits restricted to the set 0..7. | |
217 | .Sh "Does Perl have a \fIround()\fP function? What about \fIceil()\fP and \fIfloor()\fP? Trig functions?" | |
218 | .IX Subsection "Does Perl have a round() function? What about ceil() and floor()? Trig functions?" | |
219 | Remember that \fIint()\fR merely truncates toward 0. For rounding to a | |
220 | certain number of digits, \fIsprintf()\fR or \fIprintf()\fR is usually the easiest | |
221 | route. | |
222 | .PP | |
223 | .Vb 1 | |
224 | \& printf("%.3f", 3.1415926535); # prints 3.142 | |
225 | .Ve | |
226 | .PP | |
227 | The \s-1POSIX\s0 module (part of the standard Perl distribution) implements | |
228 | \&\fIceil()\fR, \fIfloor()\fR, and a number of other mathematical and trigonometric | |
229 | functions. | |
230 | .PP | |
231 | .Vb 3 | |
232 | \& use POSIX; | |
233 | \& $ceil = ceil(3.5); # 4 | |
234 | \& $floor = floor(3.5); # 3 | |
235 | .Ve | |
236 | .PP | |
237 | In 5.000 to 5.003 perls, trigonometry was done in the Math::Complex | |
238 | module. With 5.004, the Math::Trig module (part of the standard Perl | |
239 | distribution) implements the trigonometric functions. Internally it | |
240 | uses the Math::Complex module and some functions can break out from | |
241 | the real axis into the complex plane, for example the inverse sine of | |
242 | 2. | |
243 | .PP | |
244 | Rounding in financial applications can have serious implications, and | |
245 | the rounding method used should be specified precisely. In these | |
246 | cases, it probably pays not to trust whichever system rounding is | |
247 | being used by Perl, but to instead implement the rounding function you | |
248 | need yourself. | |
249 | .PP | |
250 | To see why, notice how you'll still have an issue on half-way-point | |
251 | alternation: | |
252 | .PP | |
253 | .Vb 1 | |
254 | \& for ($i = 0; $i < 1.01; $i += 0.05) { printf "%.1f ",$i} | |
255 | .Ve | |
256 | .PP | |
257 | .Vb 2 | |
258 | \& 0.0 0.1 0.1 0.2 0.2 0.2 0.3 0.3 0.4 0.4 0.5 0.5 0.6 0.7 0.7 | |
259 | \& 0.8 0.8 0.9 0.9 1.0 1.0 | |
260 | .Ve | |
261 | .PP | |
262 | Don't blame Perl. It's the same as in C. \s-1IEEE\s0 says we have to do this. | |
263 | Perl numbers whose absolute values are integers under 2**31 (on 32 bit | |
264 | machines) will work pretty much like mathematical integers. Other numbers | |
265 | are not guaranteed. | |
266 | .Sh "How do I convert between numeric representations/bases/radixes?" | |
267 | .IX Subsection "How do I convert between numeric representations/bases/radixes?" | |
268 | As always with Perl there is more than one way to do it. Below | |
269 | are a few examples of approaches to making common conversions | |
270 | between number representations. This is intended to be representational | |
271 | rather than exhaustive. | |
272 | .PP | |
273 | Some of the examples below use the Bit::Vector module from \s-1CPAN\s0. | |
274 | The reason you might choose Bit::Vector over the perl built in | |
275 | functions is that it works with numbers of \s-1ANY\s0 size, that it is | |
276 | optimized for speed on some operations, and for at least some | |
277 | programmers the notation might be familiar. | |
278 | .IP "How do I convert hexadecimal into decimal" 4 | |
279 | .IX Item "How do I convert hexadecimal into decimal" | |
280 | Using perl's built in conversion of 0x notation: | |
281 | .Sp | |
282 | .Vb 1 | |
283 | \& $dec = 0xDEADBEEF; | |
284 | .Ve | |
285 | .Sp | |
286 | Using the hex function: | |
287 | .Sp | |
288 | .Vb 1 | |
289 | \& $dec = hex("DEADBEEF"); | |
290 | .Ve | |
291 | .Sp | |
292 | Using pack: | |
293 | .Sp | |
294 | .Vb 1 | |
295 | \& $dec = unpack("N", pack("H8", substr("0" x 8 . "DEADBEEF", -8))); | |
296 | .Ve | |
297 | .Sp | |
298 | Using the \s-1CPAN\s0 module Bit::Vector: | |
299 | .Sp | |
300 | .Vb 3 | |
301 | \& use Bit::Vector; | |
302 | \& $vec = Bit::Vector->new_Hex(32, "DEADBEEF"); | |
303 | \& $dec = $vec->to_Dec(); | |
304 | .Ve | |
305 | .IP "How do I convert from decimal to hexadecimal" 4 | |
306 | .IX Item "How do I convert from decimal to hexadecimal" | |
307 | Using sprintf: | |
308 | .Sp | |
309 | .Vb 2 | |
310 | \& $hex = sprintf("%X", 3735928559); # upper case A-F | |
311 | \& $hex = sprintf("%x", 3735928559); # lower case a-f | |
312 | .Ve | |
313 | .Sp | |
314 | Using unpack: | |
315 | .Sp | |
316 | .Vb 1 | |
317 | \& $hex = unpack("H*", pack("N", 3735928559)); | |
318 | .Ve | |
319 | .Sp | |
320 | Using Bit::Vector: | |
321 | .Sp | |
322 | .Vb 3 | |
323 | \& use Bit::Vector; | |
324 | \& $vec = Bit::Vector->new_Dec(32, -559038737); | |
325 | \& $hex = $vec->to_Hex(); | |
326 | .Ve | |
327 | .Sp | |
328 | And Bit::Vector supports odd bit counts: | |
329 | .Sp | |
330 | .Vb 4 | |
331 | \& use Bit::Vector; | |
332 | \& $vec = Bit::Vector->new_Dec(33, 3735928559); | |
333 | \& $vec->Resize(32); # suppress leading 0 if unwanted | |
334 | \& $hex = $vec->to_Hex(); | |
335 | .Ve | |
336 | .IP "How do I convert from octal to decimal" 4 | |
337 | .IX Item "How do I convert from octal to decimal" | |
338 | Using Perl's built in conversion of numbers with leading zeros: | |
339 | .Sp | |
340 | .Vb 1 | |
341 | \& $dec = 033653337357; # note the leading 0! | |
342 | .Ve | |
343 | .Sp | |
344 | Using the oct function: | |
345 | .Sp | |
346 | .Vb 1 | |
347 | \& $dec = oct("33653337357"); | |
348 | .Ve | |
349 | .Sp | |
350 | Using Bit::Vector: | |
351 | .Sp | |
352 | .Vb 4 | |
353 | \& use Bit::Vector; | |
354 | \& $vec = Bit::Vector->new(32); | |
355 | \& $vec->Chunk_List_Store(3, split(//, reverse "33653337357")); | |
356 | \& $dec = $vec->to_Dec(); | |
357 | .Ve | |
358 | .IP "How do I convert from decimal to octal" 4 | |
359 | .IX Item "How do I convert from decimal to octal" | |
360 | Using sprintf: | |
361 | .Sp | |
362 | .Vb 1 | |
363 | \& $oct = sprintf("%o", 3735928559); | |
364 | .Ve | |
365 | .Sp | |
366 | Using Bit::Vector: | |
367 | .Sp | |
368 | .Vb 3 | |
369 | \& use Bit::Vector; | |
370 | \& $vec = Bit::Vector->new_Dec(32, -559038737); | |
371 | \& $oct = reverse join('', $vec->Chunk_List_Read(3)); | |
372 | .Ve | |
373 | .IP "How do I convert from binary to decimal" 4 | |
374 | .IX Item "How do I convert from binary to decimal" | |
375 | Perl 5.6 lets you write binary numbers directly with | |
376 | the 0b notation: | |
377 | .Sp | |
378 | .Vb 1 | |
379 | \& $number = 0b10110110; | |
380 | .Ve | |
381 | .Sp | |
382 | Using oct: | |
383 | .Sp | |
384 | .Vb 2 | |
385 | \& my $input = "10110110"; | |
386 | \& $decimal = oct( "0b$input" ); | |
387 | .Ve | |
388 | .Sp | |
389 | Using pack and ord: | |
390 | .Sp | |
391 | .Vb 1 | |
392 | \& $decimal = ord(pack('B8', '10110110')); | |
393 | .Ve | |
394 | .Sp | |
395 | Using pack and unpack for larger strings: | |
396 | .Sp | |
397 | .Vb 3 | |
398 | \& $int = unpack("N", pack("B32", | |
399 | \& substr("0" x 32 . "11110101011011011111011101111", -32))); | |
400 | \& $dec = sprintf("%d", $int); | |
401 | .Ve | |
402 | .Sp | |
403 | .Vb 1 | |
404 | \& # substr() is used to left pad a 32 character string with zeros. | |
405 | .Ve | |
406 | .Sp | |
407 | Using Bit::Vector: | |
408 | .Sp | |
409 | .Vb 2 | |
410 | \& $vec = Bit::Vector->new_Bin(32, "11011110101011011011111011101111"); | |
411 | \& $dec = $vec->to_Dec(); | |
412 | .Ve | |
413 | .IP "How do I convert from decimal to binary" 4 | |
414 | .IX Item "How do I convert from decimal to binary" | |
415 | Using sprintf (perl 5.6+): | |
416 | .Sp | |
417 | .Vb 1 | |
418 | \& $bin = sprintf("%b", 3735928559); | |
419 | .Ve | |
420 | .Sp | |
421 | Using unpack: | |
422 | .Sp | |
423 | .Vb 1 | |
424 | \& $bin = unpack("B*", pack("N", 3735928559)); | |
425 | .Ve | |
426 | .Sp | |
427 | Using Bit::Vector: | |
428 | .Sp | |
429 | .Vb 3 | |
430 | \& use Bit::Vector; | |
431 | \& $vec = Bit::Vector->new_Dec(32, -559038737); | |
432 | \& $bin = $vec->to_Bin(); | |
433 | .Ve | |
434 | .Sp | |
435 | The remaining transformations (e.g. hex \-> oct, bin \-> hex, etc.) | |
436 | are left as an exercise to the inclined reader. | |
437 | .Sh "Why doesn't & work the way I want it to?" | |
438 | .IX Subsection "Why doesn't & work the way I want it to?" | |
439 | The behavior of binary arithmetic operators depends on whether they're | |
440 | used on numbers or strings. The operators treat a string as a series | |
441 | of bits and work with that (the string \f(CW"3"\fR is the bit pattern | |
442 | \&\f(CW00110011\fR). The operators work with the binary form of a number | |
443 | (the number \f(CW3\fR is treated as the bit pattern \f(CW00000011\fR). | |
444 | .PP | |
445 | So, saying \f(CW\*(C`11 & 3\*(C'\fR performs the \*(L"and\*(R" operation on numbers (yielding | |
446 | \&\f(CW3\fR). Saying \f(CW"11" & "3"\fR performs the \*(L"and\*(R" operation on strings | |
447 | (yielding \f(CW"1"\fR). | |
448 | .PP | |
449 | Most problems with \f(CW\*(C`&\*(C'\fR and \f(CW\*(C`|\*(C'\fR arise because the programmer thinks | |
450 | they have a number but really it's a string. The rest arise because | |
451 | the programmer says: | |
452 | .PP | |
453 | .Vb 3 | |
454 | \& if ("\e020\e020" & "\e101\e101") { | |
455 | \& # ... | |
456 | \& } | |
457 | .Ve | |
458 | .PP | |
459 | but a string consisting of two null bytes (the result of \f(CW\*(C`"\e020\e020" | |
460 | & "\e101\e101"\*(C'\fR) is not a false value in Perl. You need: | |
461 | .PP | |
462 | .Vb 3 | |
463 | \& if ( ("\e020\e020" & "\e101\e101") !~ /[^\e000]/) { | |
464 | \& # ... | |
465 | \& } | |
466 | .Ve | |
467 | .Sh "How do I multiply matrices?" | |
468 | .IX Subsection "How do I multiply matrices?" | |
469 | Use the Math::Matrix or Math::MatrixReal modules (available from \s-1CPAN\s0) | |
470 | or the \s-1PDL\s0 extension (also available from \s-1CPAN\s0). | |
471 | .Sh "How do I perform an operation on a series of integers?" | |
472 | .IX Subsection "How do I perform an operation on a series of integers?" | |
473 | To call a function on each element in an array, and collect the | |
474 | results, use: | |
475 | .PP | |
476 | .Vb 1 | |
477 | \& @results = map { my_func($_) } @array; | |
478 | .Ve | |
479 | .PP | |
480 | For example: | |
481 | .PP | |
482 | .Vb 1 | |
483 | \& @triple = map { 3 * $_ } @single; | |
484 | .Ve | |
485 | .PP | |
486 | To call a function on each element of an array, but ignore the | |
487 | results: | |
488 | .PP | |
489 | .Vb 3 | |
490 | \& foreach $iterator (@array) { | |
491 | \& some_func($iterator); | |
492 | \& } | |
493 | .Ve | |
494 | .PP | |
495 | To call a function on each integer in a (small) range, you \fBcan\fR use: | |
496 | .PP | |
497 | .Vb 1 | |
498 | \& @results = map { some_func($_) } (5 .. 25); | |
499 | .Ve | |
500 | .PP | |
501 | but you should be aware that the \f(CW\*(C`..\*(C'\fR operator creates an array of | |
502 | all integers in the range. This can take a lot of memory for large | |
503 | ranges. Instead use: | |
504 | .PP | |
505 | .Vb 4 | |
506 | \& @results = (); | |
507 | \& for ($i=5; $i < 500_005; $i++) { | |
508 | \& push(@results, some_func($i)); | |
509 | \& } | |
510 | .Ve | |
511 | .PP | |
512 | This situation has been fixed in Perl5.005. Use of \f(CW\*(C`..\*(C'\fR in a \f(CW\*(C`for\*(C'\fR | |
513 | loop will iterate over the range, without creating the entire range. | |
514 | .PP | |
515 | .Vb 3 | |
516 | \& for my $i (5 .. 500_005) { | |
517 | \& push(@results, some_func($i)); | |
518 | \& } | |
519 | .Ve | |
520 | .PP | |
521 | will not create a list of 500,000 integers. | |
522 | .Sh "How can I output Roman numerals?" | |
523 | .IX Subsection "How can I output Roman numerals?" | |
524 | Get the http://www.cpan.org/modules/by\-module/Roman module. | |
525 | .Sh "Why aren't my random numbers random?" | |
526 | .IX Subsection "Why aren't my random numbers random?" | |
527 | If you're using a version of Perl before 5.004, you must call \f(CW\*(C`srand\*(C'\fR | |
528 | once at the start of your program to seed the random number generator. | |
529 | .PP | |
530 | .Vb 1 | |
531 | \& BEGIN { srand() if $] < 5.004 } | |
532 | .Ve | |
533 | .PP | |
534 | 5.004 and later automatically call \f(CW\*(C`srand\*(C'\fR at the beginning. Don't | |
535 | call \f(CW\*(C`srand\*(C'\fR more than once\-\-\-you make your numbers less random, rather | |
536 | than more. | |
537 | .PP | |
538 | Computers are good at being predictable and bad at being random | |
539 | (despite appearances caused by bugs in your programs :\-). see the | |
540 | \&\fIrandom\fR article in the \*(L"Far More Than You Ever Wanted To Know\*(R" | |
541 | collection in http://www.cpan.org/misc/olddoc/FMTEYEWTK.tgz , courtesy of | |
542 | Tom Phoenix, talks more about this. John von Neumann said, \*(L"Anyone | |
543 | who attempts to generate random numbers by deterministic means is, of | |
544 | course, living in a state of sin.\*(R" | |
545 | .PP | |
546 | If you want numbers that are more random than \f(CW\*(C`rand\*(C'\fR with \f(CW\*(C`srand\*(C'\fR | |
547 | provides, you should also check out the Math::TrulyRandom module from | |
548 | \&\s-1CPAN\s0. It uses the imperfections in your system's timer to generate | |
549 | random numbers, but this takes quite a while. If you want a better | |
550 | pseudorandom generator than comes with your operating system, look at | |
551 | \&\*(L"Numerical Recipes in C\*(R" at http://www.nr.com/ . | |
552 | .Sh "How do I get a random number between X and Y?" | |
553 | .IX Subsection "How do I get a random number between X and Y?" | |
554 | \&\f(CW\*(C`rand($x)\*(C'\fR returns a number such that | |
555 | \&\f(CW\*(C`0 <= rand($x) < $x\*(C'\fR. Thus what you want to have perl | |
556 | figure out is a random number in the range from 0 to the | |
557 | difference between your \fIX\fR and \fIY\fR. | |
558 | .PP | |
559 | That is, to get a number between 10 and 15, inclusive, you | |
560 | want a random number between 0 and 5 that you can then add | |
561 | to 10. | |
562 | .PP | |
563 | .Vb 1 | |
564 | \& my $number = 10 + int rand( 15-10+1 ); | |
565 | .Ve | |
566 | .PP | |
567 | Hence you derive the following simple function to abstract | |
568 | that. It selects a random integer between the two given | |
569 | integers (inclusive), For example: \f(CW\*(C`random_int_in(50,120)\*(C'\fR. | |
570 | .PP | |
571 | .Vb 7 | |
572 | \& sub random_int_in ($$) { | |
573 | \& my($min, $max) = @_; | |
574 | \& # Assumes that the two arguments are integers themselves! | |
575 | \& return $min if $min == $max; | |
576 | \& ($min, $max) = ($max, $min) if $min > $max; | |
577 | \& return $min + int rand(1 + $max - $min); | |
578 | \& } | |
579 | .Ve | |
580 | .SH "Data: Dates" | |
581 | .IX Header "Data: Dates" | |
582 | .Sh "How do I find the day or week of the year?" | |
583 | .IX Subsection "How do I find the day or week of the year?" | |
584 | The localtime function returns the day of the year. Without an | |
585 | argument localtime uses the current time. | |
586 | .PP | |
587 | .Vb 1 | |
588 | \& $day_of_year = (localtime)[7]; | |
589 | .Ve | |
590 | .PP | |
591 | The \s-1POSIX\s0 module can also format a date as the day of the year or | |
592 | week of the year. | |
593 | .PP | |
594 | .Vb 3 | |
595 | \& use POSIX qw/strftime/; | |
596 | \& my $day_of_year = strftime "%j", localtime; | |
597 | \& my $week_of_year = strftime "%W", localtime; | |
598 | .Ve | |
599 | .PP | |
600 | To get the day of year for any date, use the Time::Local module to get | |
601 | a time in epoch seconds for the argument to localtime. | |
602 | .PP | |
603 | .Vb 4 | |
604 | \& use POSIX qw/strftime/; | |
605 | \& use Time::Local; | |
606 | \& my $week_of_year = strftime "%W", | |
607 | \& localtime( timelocal( 0, 0, 0, 18, 11, 1987 ) ); | |
608 | .Ve | |
609 | .PP | |
610 | The Date::Calc module provides two functions to calculate these. | |
611 | .PP | |
612 | .Vb 3 | |
613 | \& use Date::Calc; | |
614 | \& my $day_of_year = Day_of_Year( 1987, 12, 18 ); | |
615 | \& my $week_of_year = Week_of_Year( 1987, 12, 18 ); | |
616 | .Ve | |
617 | .Sh "How do I find the current century or millennium?" | |
618 | .IX Subsection "How do I find the current century or millennium?" | |
619 | Use the following simple functions: | |
620 | .PP | |
621 | .Vb 3 | |
622 | \& sub get_century { | |
623 | \& return int((((localtime(shift || time))[5] + 1999))/100); | |
624 | \& } | |
625 | .Ve | |
626 | .PP | |
627 | .Vb 3 | |
628 | \& sub get_millennium { | |
629 | \& return 1+int((((localtime(shift || time))[5] + 1899))/1000); | |
630 | \& } | |
631 | .Ve | |
632 | .PP | |
633 | On some systems, the \s-1POSIX\s0 module's \fIstrftime()\fR function has | |
634 | been extended in a non-standard way to use a \f(CW%C\fR format, | |
635 | which they sometimes claim is the \*(L"century\*(R". It isn't, | |
636 | because on most such systems, this is only the first two | |
637 | digits of the four-digit year, and thus cannot be used to | |
638 | reliably determine the current century or millennium. | |
639 | .Sh "How can I compare two dates and find the difference?" | |
640 | .IX Subsection "How can I compare two dates and find the difference?" | |
641 | (contributed by brian d foy) | |
642 | .PP | |
643 | You could just store all your dates as a number and then subtract. Life | |
644 | isn't always that simple though. If you want to work with formatted | |
645 | dates, the Date::Manip, Date::Calc, or DateTime modules can help you. | |
646 | .Sh "How can I take a string and turn it into epoch seconds?" | |
647 | .IX Subsection "How can I take a string and turn it into epoch seconds?" | |
648 | If it's a regular enough string that it always has the same format, | |
649 | you can split it up and pass the parts to \f(CW\*(C`timelocal\*(C'\fR in the standard | |
650 | Time::Local module. Otherwise, you should look into the Date::Calc | |
651 | and Date::Manip modules from \s-1CPAN\s0. | |
652 | .Sh "How can I find the Julian Day?" | |
653 | .IX Subsection "How can I find the Julian Day?" | |
654 | (contributed by brian d foy and Dave Cross) | |
655 | .PP | |
656 | You can use the Time::JulianDay module available on \s-1CPAN\s0. Ensure that | |
657 | you really want to find a Julian day, though, as many people have | |
658 | different ideas about Julian days. See | |
659 | http://www.hermetic.ch/cal_stud/jdn.htm for instance. | |
660 | .PP | |
661 | You can also try the DateTime module, which can convert a date/time | |
662 | to a Julian Day. | |
663 | .PP | |
664 | .Vb 2 | |
665 | \& $ perl -MDateTime -le'print DateTime->today->jd' | |
666 | \& 2453401.5 | |
667 | .Ve | |
668 | .PP | |
669 | Or the modified Julian Day | |
670 | .PP | |
671 | .Vb 2 | |
672 | \& $ perl -MDateTime -le'print DateTime->today->mjd' | |
673 | \& 53401 | |
674 | .Ve | |
675 | .PP | |
676 | Or even the day of the year (which is what some people think of as a | |
677 | Julian day) | |
678 | .PP | |
679 | .Vb 2 | |
680 | \& $ perl -MDateTime -le'print DateTime->today->doy' | |
681 | \& 31 | |
682 | .Ve | |
683 | .Sh "How do I find yesterday's date?" | |
684 | .IX Subsection "How do I find yesterday's date?" | |
685 | (contributed by brian d foy) | |
686 | .PP | |
687 | Use one of the Date modules. The \f(CW\*(C`DateTime\*(C'\fR module makes it simple, and | |
688 | give you the same time of day, only the day before. | |
689 | .PP | |
690 | .Vb 1 | |
691 | \& use DateTime; | |
692 | .Ve | |
693 | .PP | |
694 | .Vb 1 | |
695 | \& my $yesterday = DateTime->now->subtract( days => 1 ); | |
696 | .Ve | |
697 | .PP | |
698 | .Vb 1 | |
699 | \& print "Yesterday was $yesterday\en"; | |
700 | .Ve | |
701 | .PP | |
702 | You can also use the \f(CW\*(C`Date::Calc\*(C'\fR module using its Today_and_Now | |
703 | function. | |
704 | .PP | |
705 | .Vb 1 | |
706 | \& use Date::Calc qw( Today_and_Now Add_Delta_DHMS ); | |
707 | .Ve | |
708 | .PP | |
709 | .Vb 1 | |
710 | \& my @date_time = Add_Delta_DHMS( Today_and_Now(), -1, 0, 0, 0 ); | |
711 | .Ve | |
712 | .PP | |
713 | .Vb 1 | |
714 | \& print "@date\en"; | |
715 | .Ve | |
716 | .PP | |
717 | Most people try to use the time rather than the calendar to figure out | |
718 | dates, but that assumes that days are twenty-four hours each. For | |
719 | most people, there are two days a year when they aren't: the switch to | |
720 | and from summer time throws this off. Let the modules do the work. | |
721 | .Sh "Does Perl have a Year 2000 problem? Is Perl Y2K compliant?" | |
722 | .IX Subsection "Does Perl have a Year 2000 problem? Is Perl Y2K compliant?" | |
723 | Short answer: No, Perl does not have a Year 2000 problem. Yes, Perl is | |
724 | Y2K compliant (whatever that means). The programmers you've hired to | |
725 | use it, however, probably are not. | |
726 | .PP | |
727 | Long answer: The question belies a true understanding of the issue. | |
728 | Perl is just as Y2K compliant as your pencil\*(--no more, and no less. | |
729 | Can you use your pencil to write a non\-Y2K\-compliant memo? Of course | |
730 | you can. Is that the pencil's fault? Of course it isn't. | |
731 | .PP | |
732 | The date and time functions supplied with Perl (gmtime and localtime) | |
733 | supply adequate information to determine the year well beyond 2000 | |
734 | (2038 is when trouble strikes for 32\-bit machines). The year returned | |
735 | by these functions when used in a list context is the year minus 1900. | |
736 | For years between 1910 and 1999 this \fIhappens\fR to be a 2\-digit decimal | |
737 | number. To avoid the year 2000 problem simply do not treat the year as | |
738 | a 2\-digit number. It isn't. | |
739 | .PP | |
740 | When \fIgmtime()\fR and \fIlocaltime()\fR are used in scalar context they return | |
741 | a timestamp string that contains a fully-expanded year. For example, | |
742 | \&\f(CW\*(C`$timestamp = gmtime(1005613200)\*(C'\fR sets \f(CW$timestamp\fR to \*(L"Tue Nov 13 01:00:00 | |
743 | 2001\*(R". There's no year 2000 problem here. | |
744 | .PP | |
745 | That doesn't mean that Perl can't be used to create non\-Y2K compliant | |
746 | programs. It can. But so can your pencil. It's the fault of the user, | |
747 | not the language. At the risk of inflaming the \s-1NRA:\s0 \*(L"Perl doesn't | |
748 | break Y2K, people do.\*(R" See http://www.perl.org/about/y2k.html for | |
749 | a longer exposition. | |
750 | .SH "Data: Strings" | |
751 | .IX Header "Data: Strings" | |
752 | .Sh "How do I validate input?" | |
753 | .IX Subsection "How do I validate input?" | |
754 | (contributed by brian d foy) | |
755 | .PP | |
756 | There are many ways to ensure that values are what you expect or | |
757 | want to accept. Besides the specific examples that we cover in the | |
758 | perlfaq, you can also look at the modules with \*(L"Assert\*(R" and \*(L"Validate\*(R" | |
759 | in their names, along with other modules such as \f(CW\*(C`Regexp::Common\*(C'\fR. | |
760 | .PP | |
761 | Some modules have validation for particular types of input, such | |
762 | as \f(CW\*(C`Business::ISBN\*(C'\fR, \f(CW\*(C`Business::CreditCard\*(C'\fR, \f(CW\*(C`Email::Valid\*(C'\fR, | |
763 | and \f(CW\*(C`Data::Validate::IP\*(C'\fR. | |
764 | .Sh "How do I unescape a string?" | |
765 | .IX Subsection "How do I unescape a string?" | |
766 | It depends just what you mean by \*(L"escape\*(R". \s-1URL\s0 escapes are dealt | |
767 | with in perlfaq9. Shell escapes with the backslash (\f(CW\*(C`\e\*(C'\fR) | |
768 | character are removed with | |
769 | .PP | |
770 | .Vb 1 | |
771 | \& s/\e\e(.)/$1/g; | |
772 | .Ve | |
773 | .PP | |
774 | This won't expand \f(CW"\en"\fR or \f(CW"\et"\fR or any other special escapes. | |
775 | .Sh "How do I remove consecutive pairs of characters?" | |
776 | .IX Subsection "How do I remove consecutive pairs of characters?" | |
777 | (contributed by brian d foy) | |
778 | .PP | |
779 | You can use the substitution operator to find pairs of characters (or | |
780 | runs of characters) and replace them with a single instance. In this | |
781 | substitution, we find a character in \f(CW\*(C`(.)\*(C'\fR. The memory parentheses | |
782 | store the matched character in the back-reference \f(CW\*(C`\e1\*(C'\fR and we use | |
783 | that to require that the same thing immediately follow it. We replace | |
784 | that part of the string with the character in \f(CW$1\fR. | |
785 | .PP | |
786 | .Vb 1 | |
787 | \& s/(.)\e1/$1/g; | |
788 | .Ve | |
789 | .PP | |
790 | We can also use the transliteration operator, \f(CW\*(C`tr///\*(C'\fR. In this | |
791 | example, the search list side of our \f(CW\*(C`tr///\*(C'\fR contains nothing, but | |
792 | the \f(CW\*(C`c\*(C'\fR option complements that so it contains everything. The | |
793 | replacement list also contains nothing, so the transliteration is | |
794 | almost a no-op since it won't do any replacements (or more exactly, | |
795 | replace the character with itself). However, the \f(CW\*(C`s\*(C'\fR option squashes | |
796 | duplicated and consecutive characters in the string so a character | |
797 | does not show up next to itself | |
798 | .PP | |
799 | .Vb 2 | |
800 | \& my $str = 'Haarlem'; # in the Netherlands | |
801 | \& $str =~ tr///cs; # Now Harlem, like in New York | |
802 | .Ve | |
803 | .Sh "How do I expand function calls in a string?" | |
804 | .IX Subsection "How do I expand function calls in a string?" | |
805 | (contributed by brian d foy) | |
806 | .PP | |
807 | This is documented in perlref, and although it's not the easiest | |
808 | thing to read, it does work. In each of these examples, we call the | |
809 | function inside the braces used to dereference a reference. If we | |
810 | have a more than one return value, we can construct and dereference an | |
811 | anonymous array. In this case, we call the function in list context. | |
812 | .PP | |
813 | .Vb 1 | |
814 | \& print "The time values are @{ [localtime] }.\en"; | |
815 | .Ve | |
816 | .PP | |
817 | If we want to call the function in scalar context, we have to do a bit | |
818 | more work. We can really have any code we like inside the braces, so | |
819 | we simply have to end with the scalar reference, although how you do | |
820 | that is up to you, and you can use code inside the braces. | |
821 | .PP | |
822 | .Vb 1 | |
823 | \& print "The time is ${\e(scalar localtime)}.\en" | |
824 | .Ve | |
825 | .PP | |
826 | .Vb 1 | |
827 | \& print "The time is ${ my $x = localtime; \e$x }.\en"; | |
828 | .Ve | |
829 | .PP | |
830 | If your function already returns a reference, you don't need to create | |
831 | the reference yourself. | |
832 | .PP | |
833 | .Vb 1 | |
834 | \& sub timestamp { my $t = localtime; \e$t } | |
835 | .Ve | |
836 | .PP | |
837 | .Vb 1 | |
838 | \& print "The time is ${ timestamp() }.\en"; | |
839 | .Ve | |
840 | .PP | |
841 | The \f(CW\*(C`Interpolation\*(C'\fR module can also do a lot of magic for you. You can | |
842 | specify a variable name, in this case \f(CW\*(C`E\*(C'\fR, to set up a tied hash that | |
843 | does the interpolation for you. It has several other methods to do this | |
844 | as well. | |
845 | .PP | |
846 | .Vb 2 | |
847 | \& use Interpolation E => 'eval'; | |
848 | \& print "The time values are $E{localtime()}.\en"; | |
849 | .Ve | |
850 | .PP | |
851 | In most cases, it is probably easier to simply use string concatenation, | |
852 | which also forces scalar context. | |
853 | .PP | |
854 | .Vb 1 | |
855 | \& print "The time is " . localtime . ".\en"; | |
856 | .Ve | |
857 | .Sh "How do I find matching/nesting anything?" | |
858 | .IX Subsection "How do I find matching/nesting anything?" | |
859 | This isn't something that can be done in one regular expression, no | |
860 | matter how complicated. To find something between two single | |
861 | characters, a pattern like \f(CW\*(C`/x([^x]*)x/\*(C'\fR will get the intervening | |
862 | bits in \f(CW$1\fR. For multiple ones, then something more like | |
863 | \&\f(CW\*(C`/alpha(.*?)omega/\*(C'\fR would be needed. But none of these deals with | |
864 | nested patterns. For balanced expressions using \f(CW\*(C`(\*(C'\fR, \f(CW\*(C`{\*(C'\fR, \f(CW\*(C`[\*(C'\fR or | |
865 | \&\f(CW\*(C`<\*(C'\fR as delimiters, use the \s-1CPAN\s0 module Regexp::Common, or see | |
866 | \&\*(L"(??{ code })\*(R" in perlre. For other cases, you'll have to write a | |
867 | parser. | |
868 | .PP | |
869 | If you are serious about writing a parser, there are a number of | |
870 | modules or oddities that will make your life a lot easier. There are | |
871 | the \s-1CPAN\s0 modules Parse::RecDescent, Parse::Yapp, and Text::Balanced; | |
872 | and the byacc program. Starting from perl 5.8 the Text::Balanced is | |
873 | part of the standard distribution. | |
874 | .PP | |
875 | One simple destructive, inside-out approach that you might try is to | |
876 | pull out the smallest nesting parts one at a time: | |
877 | .PP | |
878 | .Vb 3 | |
879 | \& while (s/BEGIN((?:(?!BEGIN)(?!END).)*)END//gs) { | |
880 | \& # do something with $1 | |
881 | \& } | |
882 | .Ve | |
883 | .PP | |
884 | A more complicated and sneaky approach is to make Perl's regular | |
885 | expression engine do it for you. This is courtesy Dean Inada, and | |
886 | rather has the nature of an Obfuscated Perl Contest entry, but it | |
887 | really does work: | |
888 | .PP | |
889 | .Vb 3 | |
890 | \& # $_ contains the string to parse | |
891 | \& # BEGIN and END are the opening and closing markers for the | |
892 | \& # nested text. | |
893 | .Ve | |
894 | .PP | |
895 | .Vb 5 | |
896 | \& @( = ('(',''); | |
897 | \& @) = (')',''); | |
898 | \& ($re=$_)=~s/((BEGIN)|(END)|.)/$)[!$3]\eQ$1\eE$([!$2]/gs; | |
899 | \& @$ = (eval{/$re/},$@!~/unmatched/i); | |
900 | \& print join("\en",@$[0..$#$]) if( $$[-1] ); | |
901 | .Ve | |
902 | .Sh "How do I reverse a string?" | |
903 | .IX Subsection "How do I reverse a string?" | |
904 | Use \fIreverse()\fR in scalar context, as documented in | |
905 | \&\*(L"reverse\*(R" in perlfunc. | |
906 | .PP | |
907 | .Vb 1 | |
908 | \& $reversed = reverse $string; | |
909 | .Ve | |
910 | .Sh "How do I expand tabs in a string?" | |
911 | .IX Subsection "How do I expand tabs in a string?" | |
912 | You can do it yourself: | |
913 | .PP | |
914 | .Vb 1 | |
915 | \& 1 while $string =~ s/\et+/' ' x (length($&) * 8 - length($`) % 8)/e; | |
916 | .Ve | |
917 | .PP | |
918 | Or you can just use the Text::Tabs module (part of the standard Perl | |
919 | distribution). | |
920 | .PP | |
921 | .Vb 2 | |
922 | \& use Text::Tabs; | |
923 | \& @expanded_lines = expand(@lines_with_tabs); | |
924 | .Ve | |
925 | .Sh "How do I reformat a paragraph?" | |
926 | .IX Subsection "How do I reformat a paragraph?" | |
927 | Use Text::Wrap (part of the standard Perl distribution): | |
928 | .PP | |
929 | .Vb 2 | |
930 | \& use Text::Wrap; | |
931 | \& print wrap("\et", ' ', @paragraphs); | |
932 | .Ve | |
933 | .PP | |
934 | The paragraphs you give to Text::Wrap should not contain embedded | |
935 | newlines. Text::Wrap doesn't justify the lines (flush\-right). | |
936 | .PP | |
937 | Or use the \s-1CPAN\s0 module Text::Autoformat. Formatting files can be easily | |
938 | done by making a shell alias, like so: | |
939 | .PP | |
940 | .Vb 2 | |
941 | \& alias fmt="perl -i -MText::Autoformat -n0777 \e | |
942 | \& -e 'print autoformat $_, {all=>1}' $*" | |
943 | .Ve | |
944 | .PP | |
945 | See the documentation for Text::Autoformat to appreciate its many | |
946 | capabilities. | |
947 | .Sh "How can I access or change N characters of a string?" | |
948 | .IX Subsection "How can I access or change N characters of a string?" | |
949 | You can access the first characters of a string with \fIsubstr()\fR. | |
950 | To get the first character, for example, start at position 0 | |
951 | and grab the string of length 1. | |
952 | .PP | |
953 | .Vb 2 | |
954 | \& $string = "Just another Perl Hacker"; | |
955 | \& $first_char = substr( $string, 0, 1 ); # 'J' | |
956 | .Ve | |
957 | .PP | |
958 | To change part of a string, you can use the optional fourth | |
959 | argument which is the replacement string. | |
960 | .PP | |
961 | .Vb 1 | |
962 | \& substr( $string, 13, 4, "Perl 5.8.0" ); | |
963 | .Ve | |
964 | .PP | |
965 | You can also use \fIsubstr()\fR as an lvalue. | |
966 | .PP | |
967 | .Vb 1 | |
968 | \& substr( $string, 13, 4 ) = "Perl 5.8.0"; | |
969 | .Ve | |
970 | .Sh "How do I change the Nth occurrence of something?" | |
971 | .IX Subsection "How do I change the Nth occurrence of something?" | |
972 | You have to keep track of N yourself. For example, let's say you want | |
973 | to change the fifth occurrence of \f(CW"whoever"\fR or \f(CW"whomever"\fR into | |
974 | \&\f(CW"whosoever"\fR or \f(CW"whomsoever"\fR, case insensitively. These | |
975 | all assume that \f(CW$_\fR contains the string to be altered. | |
976 | .PP | |
977 | .Vb 6 | |
978 | \& $count = 0; | |
979 | \& s{((whom?)ever)}{ | |
980 | \& ++$count == 5 # is it the 5th? | |
981 | \& ? "${2}soever" # yes, swap | |
982 | \& : $1 # renege and leave it there | |
983 | \& }ige; | |
984 | .Ve | |
985 | .PP | |
986 | In the more general case, you can use the \f(CW\*(C`/g\*(C'\fR modifier in a \f(CW\*(C`while\*(C'\fR | |
987 | loop, keeping count of matches. | |
988 | .PP | |
989 | .Vb 8 | |
990 | \& $WANT = 3; | |
991 | \& $count = 0; | |
992 | \& $_ = "One fish two fish red fish blue fish"; | |
993 | \& while (/(\ew+)\es+fish\eb/gi) { | |
994 | \& if (++$count == $WANT) { | |
995 | \& print "The third fish is a $1 one.\en"; | |
996 | \& } | |
997 | \& } | |
998 | .Ve | |
999 | .PP | |
1000 | That prints out: \f(CW"The third fish is a red one."\fR You can also use a | |
1001 | repetition count and repeated pattern like this: | |
1002 | .PP | |
1003 | .Vb 1 | |
1004 | \& /(?:\ew+\es+fish\es+){2}(\ew+)\es+fish/i; | |
1005 | .Ve | |
1006 | .Sh "How can I count the number of occurrences of a substring within a string?" | |
1007 | .IX Subsection "How can I count the number of occurrences of a substring within a string?" | |
1008 | There are a number of ways, with varying efficiency. If you want a | |
1009 | count of a certain single character (X) within a string, you can use the | |
1010 | \&\f(CW\*(C`tr///\*(C'\fR function like so: | |
1011 | .PP | |
1012 | .Vb 3 | |
1013 | \& $string = "ThisXlineXhasXsomeXx'sXinXit"; | |
1014 | \& $count = ($string =~ tr/X//); | |
1015 | \& print "There are $count X characters in the string"; | |
1016 | .Ve | |
1017 | .PP | |
1018 | This is fine if you are just looking for a single character. However, | |
1019 | if you are trying to count multiple character substrings within a | |
1020 | larger string, \f(CW\*(C`tr///\*(C'\fR won't work. What you can do is wrap a \fIwhile()\fR | |
1021 | loop around a global pattern match. For example, let's count negative | |
1022 | integers: | |
1023 | .PP | |
1024 | .Vb 3 | |
1025 | \& $string = "-9 55 48 -2 23 -76 4 14 -44"; | |
1026 | \& while ($string =~ /-\ed+/g) { $count++ } | |
1027 | \& print "There are $count negative numbers in the string"; | |
1028 | .Ve | |
1029 | .PP | |
1030 | Another version uses a global match in list context, then assigns the | |
1031 | result to a scalar, producing a count of the number of matches. | |
1032 | .PP | |
1033 | .Vb 1 | |
1034 | \& $count = () = $string =~ /-\ed+/g; | |
1035 | .Ve | |
1036 | .Sh "How do I capitalize all the words on one line?" | |
1037 | .IX Subsection "How do I capitalize all the words on one line?" | |
1038 | To make the first letter of each word upper case: | |
1039 | .PP | |
1040 | .Vb 1 | |
1041 | \& $line =~ s/\eb(\ew)/\eU$1/g; | |
1042 | .Ve | |
1043 | .PP | |
1044 | This has the strange effect of turning "\f(CW\*(C`don't do it\*(C'\fR\*(L" into \*(R"\f(CW\*(C`Don'T | |
1045 | Do It\*(C'\fR". Sometimes you might want this. Other times you might need a | |
1046 | more thorough solution (Suggested by brian d foy): | |
1047 | .PP | |
1048 | .Vb 7 | |
1049 | \& $string =~ s/ ( | |
1050 | \& (^\ew) #at the beginning of the line | |
1051 | \& | # or | |
1052 | \& (\es\ew) #preceded by whitespace | |
1053 | \& ) | |
1054 | \& /\eU$1/xg; | |
1055 | \& $string =~ /([\ew']+)/\eu\eL$1/g; | |
1056 | .Ve | |
1057 | .PP | |
1058 | To make the whole line upper case: | |
1059 | .PP | |
1060 | .Vb 1 | |
1061 | \& $line = uc($line); | |
1062 | .Ve | |
1063 | .PP | |
1064 | To force each word to be lower case, with the first letter upper case: | |
1065 | .PP | |
1066 | .Vb 1 | |
1067 | \& $line =~ s/(\ew+)/\eu\eL$1/g; | |
1068 | .Ve | |
1069 | .PP | |
1070 | You can (and probably should) enable locale awareness of those | |
1071 | characters by placing a \f(CW\*(C`use locale\*(C'\fR pragma in your program. | |
1072 | See perllocale for endless details on locales. | |
1073 | .PP | |
1074 | This is sometimes referred to as putting something into \*(L"title | |
1075 | case\*(R", but that's not quite accurate. Consider the proper | |
1076 | capitalization of the movie \fIDr. Strangelove or: How I Learned to | |
1077 | Stop Worrying and Love the Bomb\fR, for example. | |
1078 | .PP | |
1079 | Damian Conway's Text::Autoformat module provides some smart | |
1080 | case transformations: | |
1081 | .PP | |
1082 | .Vb 3 | |
1083 | \& use Text::Autoformat; | |
1084 | \& my $x = "Dr. Strangelove or: How I Learned to Stop ". | |
1085 | \& "Worrying and Love the Bomb"; | |
1086 | .Ve | |
1087 | .PP | |
1088 | .Vb 5 | |
1089 | \& print $x, "\en"; | |
1090 | \& for my $style (qw( sentence title highlight )) | |
1091 | \& { | |
1092 | \& print autoformat($x, { case => $style }), "\en"; | |
1093 | \& } | |
1094 | .Ve | |
1095 | .Sh "How can I split a [character] delimited string except when inside [character]?" | |
1096 | .IX Subsection "How can I split a [character] delimited string except when inside [character]?" | |
1097 | Several modules can handle this sort of pasing\-\-\-Text::Balanced, | |
1098 | Text::CSV, Text::CSV_XS, and Text::ParseWords, among others. | |
1099 | .PP | |
1100 | Take the example case of trying to split a string that is | |
1101 | comma-separated into its different fields. You can't use \f(CW\*(C`split(/,/)\*(C'\fR | |
1102 | because you shouldn't split if the comma is inside quotes. For | |
1103 | example, take a data line like this: | |
1104 | .PP | |
1105 | .Vb 1 | |
1106 | \& SAR001,"","Cimetrix, Inc","Bob Smith","CAM",N,8,1,0,7,"Error, Core Dumped" | |
1107 | .Ve | |
1108 | .PP | |
1109 | Due to the restriction of the quotes, this is a fairly complex | |
1110 | problem. Thankfully, we have Jeffrey Friedl, author of | |
1111 | \&\fIMastering Regular Expressions\fR, to handle these for us. He | |
1112 | suggests (assuming your string is contained in \f(CW$text\fR): | |
1113 | .PP | |
1114 | .Vb 7 | |
1115 | \& @new = (); | |
1116 | \& push(@new, $+) while $text =~ m{ | |
1117 | \& "([^\e"\e\e]*(?:\e\e.[^\e"\e\e]*)*)",? # groups the phrase inside the quotes | |
1118 | \& | ([^,]+),? | |
1119 | \& | , | |
1120 | \& }gx; | |
1121 | \& push(@new, undef) if substr($text,-1,1) eq ','; | |
1122 | .Ve | |
1123 | .PP | |
1124 | If you want to represent quotation marks inside a | |
1125 | quotation-mark-delimited field, escape them with backslashes (eg, | |
1126 | \&\f(CW"like \e"this\e""\fR. | |
1127 | .PP | |
1128 | Alternatively, the Text::ParseWords module (part of the standard Perl | |
1129 | distribution) lets you say: | |
1130 | .PP | |
1131 | .Vb 2 | |
1132 | \& use Text::ParseWords; | |
1133 | \& @new = quotewords(",", 0, $text); | |
1134 | .Ve | |
1135 | .PP | |
1136 | There's also a Text::CSV (Comma\-Separated Values) module on \s-1CPAN\s0. | |
1137 | .Sh "How do I strip blank space from the beginning/end of a string?" | |
1138 | .IX Subsection "How do I strip blank space from the beginning/end of a string?" | |
1139 | (contributed by brian d foy) | |
1140 | .PP | |
1141 | A substitution can do this for you. For a single line, you want to | |
1142 | replace all the leading or trailing whitespace with nothing. You | |
1143 | can do that with a pair of substitutions. | |
1144 | .PP | |
1145 | .Vb 2 | |
1146 | \& s/^\es+//; | |
1147 | \& s/\es+$//; | |
1148 | .Ve | |
1149 | .PP | |
1150 | You can also write that as a single substitution, although it turns | |
1151 | out the combined statement is slower than the separate ones. That | |
1152 | might not matter to you, though. | |
1153 | .PP | |
1154 | .Vb 1 | |
1155 | \& s/^\es+|\es+$//g; | |
1156 | .Ve | |
1157 | .PP | |
1158 | In this regular expression, the alternation matches either at the | |
1159 | beginning or the end of the string since the anchors have a lower | |
1160 | precedence than the alternation. With the \f(CW\*(C`/g\*(C'\fR flag, the substitution | |
1161 | makes all possible matches, so it gets both. Remember, the trailing | |
1162 | newline matches the \f(CW\*(C`\es+\*(C'\fR, and the \f(CW\*(C`$\*(C'\fR anchor can match to the | |
1163 | physical end of the string, so the newline disappears too. Just add | |
1164 | the newline to the output, which has the added benefit of preserving | |
1165 | \&\*(L"blank\*(R" (consisting entirely of whitespace) lines which the \f(CW\*(C`^\es+\*(C'\fR | |
1166 | would remove all by itself. | |
1167 | .PP | |
1168 | .Vb 5 | |
1169 | \& while( <> ) | |
1170 | \& { | |
1171 | \& s/^\es+|\es+$//g; | |
1172 | \& print "$_\en"; | |
1173 | \& } | |
1174 | .Ve | |
1175 | .PP | |
1176 | For a multi-line string, you can apply the regular expression | |
1177 | to each logical line in the string by adding the \f(CW\*(C`/m\*(C'\fR flag (for | |
1178 | \&\*(L"multi\-line\*(R"). With the \f(CW\*(C`/m\*(C'\fR flag, the \f(CW\*(C`$\*(C'\fR matches \fIbefore\fR an | |
1179 | embedded newline, so it doesn't remove it. It still removes the | |
1180 | newline at the end of the string. | |
1181 | .PP | |
1182 | .Vb 1 | |
1183 | \& $string =~ s/^\es+|\es+$//gm; | |
1184 | .Ve | |
1185 | .PP | |
1186 | Remember that lines consisting entirely of whitespace will disappear, | |
1187 | since the first part of the alternation can match the entire string | |
1188 | and replace it with nothing. If need to keep embedded blank lines, | |
1189 | you have to do a little more work. Instead of matching any whitespace | |
1190 | (since that includes a newline), just match the other whitespace. | |
1191 | .PP | |
1192 | .Vb 1 | |
1193 | \& $string =~ s/^[\et\ef ]+|[\et\ef ]+$//mg; | |
1194 | .Ve | |
1195 | .Sh "How do I pad a string with blanks or pad a number with zeroes?" | |
1196 | .IX Subsection "How do I pad a string with blanks or pad a number with zeroes?" | |
1197 | In the following examples, \f(CW$pad_len\fR is the length to which you wish | |
1198 | to pad the string, \f(CW$text\fR or \f(CW$num\fR contains the string to be padded, | |
1199 | and \f(CW$pad_char\fR contains the padding character. You can use a single | |
1200 | character string constant instead of the \f(CW$pad_char\fR variable if you | |
1201 | know what it is in advance. And in the same way you can use an integer in | |
1202 | place of \f(CW$pad_len\fR if you know the pad length in advance. | |
1203 | .PP | |
1204 | The simplest method uses the \f(CW\*(C`sprintf\*(C'\fR function. It can pad on the left | |
1205 | or right with blanks and on the left with zeroes and it will not | |
1206 | truncate the result. The \f(CW\*(C`pack\*(C'\fR function can only pad strings on the | |
1207 | right with blanks and it will truncate the result to a maximum length of | |
1208 | \&\f(CW$pad_len\fR. | |
1209 | .PP | |
1210 | .Vb 3 | |
1211 | \& # Left padding a string with blanks (no truncation): | |
1212 | \& $padded = sprintf("%${pad_len}s", $text); | |
1213 | \& $padded = sprintf("%*s", $pad_len, $text); # same thing | |
1214 | .Ve | |
1215 | .PP | |
1216 | .Vb 3 | |
1217 | \& # Right padding a string with blanks (no truncation): | |
1218 | \& $padded = sprintf("%-${pad_len}s", $text); | |
1219 | \& $padded = sprintf("%-*s", $pad_len, $text); # same thing | |
1220 | .Ve | |
1221 | .PP | |
1222 | .Vb 3 | |
1223 | \& # Left padding a number with 0 (no truncation): | |
1224 | \& $padded = sprintf("%0${pad_len}d", $num); | |
1225 | \& $padded = sprintf("%0*d", $pad_len, $num); # same thing | |
1226 | .Ve | |
1227 | .PP | |
1228 | .Vb 2 | |
1229 | \& # Right padding a string with blanks using pack (will truncate): | |
1230 | \& $padded = pack("A$pad_len",$text); | |
1231 | .Ve | |
1232 | .PP | |
1233 | If you need to pad with a character other than blank or zero you can use | |
1234 | one of the following methods. They all generate a pad string with the | |
1235 | \&\f(CW\*(C`x\*(C'\fR operator and combine that with \f(CW$text\fR. These methods do | |
1236 | not truncate \f(CW$text\fR. | |
1237 | .PP | |
1238 | Left and right padding with any character, creating a new string: | |
1239 | .PP | |
1240 | .Vb 2 | |
1241 | \& $padded = $pad_char x ( $pad_len - length( $text ) ) . $text; | |
1242 | \& $padded = $text . $pad_char x ( $pad_len - length( $text ) ); | |
1243 | .Ve | |
1244 | .PP | |
1245 | Left and right padding with any character, modifying \f(CW$text\fR directly: | |
1246 | .PP | |
1247 | .Vb 2 | |
1248 | \& substr( $text, 0, 0 ) = $pad_char x ( $pad_len - length( $text ) ); | |
1249 | \& $text .= $pad_char x ( $pad_len - length( $text ) ); | |
1250 | .Ve | |
1251 | .Sh "How do I extract selected columns from a string?" | |
1252 | .IX Subsection "How do I extract selected columns from a string?" | |
1253 | Use \fIsubstr()\fR or \fIunpack()\fR, both documented in perlfunc. | |
1254 | If you prefer thinking in terms of columns instead of widths, | |
1255 | you can use this kind of thing: | |
1256 | .PP | |
1257 | .Vb 3 | |
1258 | \& # determine the unpack format needed to split Linux ps output | |
1259 | \& # arguments are cut columns | |
1260 | \& my $fmt = cut2fmt(8, 14, 20, 26, 30, 34, 41, 47, 59, 63, 67, 72); | |
1261 | .Ve | |
1262 | .PP | |
1263 | .Vb 11 | |
1264 | \& sub cut2fmt { | |
1265 | \& my(@positions) = @_; | |
1266 | \& my $template = ''; | |
1267 | \& my $lastpos = 1; | |
1268 | \& for my $place (@positions) { | |
1269 | \& $template .= "A" . ($place - $lastpos) . " "; | |
1270 | \& $lastpos = $place; | |
1271 | \& } | |
1272 | \& $template .= "A*"; | |
1273 | \& return $template; | |
1274 | \& } | |
1275 | .Ve | |
1276 | .Sh "How do I find the soundex value of a string?" | |
1277 | .IX Subsection "How do I find the soundex value of a string?" | |
1278 | (contributed by brian d foy) | |
1279 | .PP | |
1280 | You can use the Text::Soundex module. If you want to do fuzzy or close | |
1281 | matching, you might also try the String::Approx, and Text::Metaphone, | |
1282 | and Text::DoubleMetaphone modules. | |
1283 | .Sh "How can I expand variables in text strings?" | |
1284 | .IX Subsection "How can I expand variables in text strings?" | |
1285 | Let's assume that you have a string that contains placeholder | |
1286 | variables. | |
1287 | .PP | |
1288 | .Vb 1 | |
1289 | \& $text = 'this has a $foo in it and a $bar'; | |
1290 | .Ve | |
1291 | .PP | |
1292 | You can use a substitution with a double evaluation. The | |
1293 | first /e turns \f(CW$1\fR into \f(CW$foo\fR, and the second /e turns | |
1294 | \&\f(CW$foo\fR into its value. You may want to wrap this in an | |
1295 | \&\f(CW\*(C`eval\*(C'\fR: if you try to get the value of an undeclared variable | |
1296 | while running under \f(CW\*(C`use strict\*(C'\fR, you get a fatal error. | |
1297 | .PP | |
1298 | .Vb 2 | |
1299 | \& eval { $text =~ s/(\e$\ew+)/$1/eeg }; | |
1300 | \& die if $@; | |
1301 | .Ve | |
1302 | .PP | |
1303 | It's probably better in the general case to treat those | |
1304 | variables as entries in some special hash. For example: | |
1305 | .PP | |
1306 | .Vb 5 | |
1307 | \& %user_defs = ( | |
1308 | \& foo => 23, | |
1309 | \& bar => 19, | |
1310 | \& ); | |
1311 | \& $text =~ s/\e$(\ew+)/$user_defs{$1}/g; | |
1312 | .Ve | |
1313 | .ie n .Sh "What's wrong with always quoting ""$vars""?" | |
1314 | .el .Sh "What's wrong with always quoting ``$vars''?" | |
1315 | .IX Subsection "What's wrong with always quoting $vars?" | |
1316 | The problem is that those double-quotes force stringification\*(-- | |
1317 | coercing numbers and references into strings\*(--even when you | |
1318 | don't want them to be strings. Think of it this way: double-quote | |
1319 | expansion is used to produce new strings. If you already | |
1320 | have a string, why do you need more? | |
1321 | .PP | |
1322 | If you get used to writing odd things like these: | |
1323 | .PP | |
1324 | .Vb 3 | |
1325 | \& print "$var"; # BAD | |
1326 | \& $new = "$old"; # BAD | |
1327 | \& somefunc("$var"); # BAD | |
1328 | .Ve | |
1329 | .PP | |
1330 | You'll be in trouble. Those should (in 99.8% of the cases) be | |
1331 | the simpler and more direct: | |
1332 | .PP | |
1333 | .Vb 3 | |
1334 | \& print $var; | |
1335 | \& $new = $old; | |
1336 | \& somefunc($var); | |
1337 | .Ve | |
1338 | .PP | |
1339 | Otherwise, besides slowing you down, you're going to break code when | |
1340 | the thing in the scalar is actually neither a string nor a number, but | |
1341 | a reference: | |
1342 | .PP | |
1343 | .Vb 5 | |
1344 | \& func(\e@array); | |
1345 | \& sub func { | |
1346 | \& my $aref = shift; | |
1347 | \& my $oref = "$aref"; # WRONG | |
1348 | \& } | |
1349 | .Ve | |
1350 | .PP | |
1351 | You can also get into subtle problems on those few operations in Perl | |
1352 | that actually do care about the difference between a string and a | |
1353 | number, such as the magical \f(CW\*(C`++\*(C'\fR autoincrement operator or the | |
1354 | \&\fIsyscall()\fR function. | |
1355 | .PP | |
1356 | Stringification also destroys arrays. | |
1357 | .PP | |
1358 | .Vb 3 | |
1359 | \& @lines = `command`; | |
1360 | \& print "@lines"; # WRONG - extra blanks | |
1361 | \& print @lines; # right | |
1362 | .Ve | |
1363 | .Sh "Why don't my <<\s-1HERE\s0 documents work?" | |
1364 | .IX Subsection "Why don't my <<HERE documents work?" | |
1365 | Check for these three things: | |
1366 | .IP "There must be no space after the << part." 4 | |
1367 | .IX Item "There must be no space after the << part." | |
1368 | .PD 0 | |
1369 | .IP "There (probably) should be a semicolon at the end." 4 | |
1370 | .IX Item "There (probably) should be a semicolon at the end." | |
1371 | .IP "You can't (easily) have any space in front of the tag." 4 | |
1372 | .IX Item "You can't (easily) have any space in front of the tag." | |
1373 | .PD | |
1374 | .PP | |
1375 | If you want to indent the text in the here document, you | |
1376 | can do this: | |
1377 | .PP | |
1378 | .Vb 5 | |
1379 | \& # all in one | |
1380 | \& ($VAR = <<HERE_TARGET) =~ s/^\es+//gm; | |
1381 | \& your text | |
1382 | \& goes here | |
1383 | \& HERE_TARGET | |
1384 | .Ve | |
1385 | .PP | |
1386 | But the \s-1HERE_TARGET\s0 must still be flush against the margin. | |
1387 | If you want that indented also, you'll have to quote | |
1388 | in the indentation. | |
1389 | .PP | |
1390 | .Vb 7 | |
1391 | \& ($quote = <<' FINIS') =~ s/^\es+//gm; | |
1392 | \& ...we will have peace, when you and all your works have | |
1393 | \& perished--and the works of your dark master to whom you | |
1394 | \& would deliver us. You are a liar, Saruman, and a corrupter | |
1395 | \& of men's hearts. --Theoden in /usr/src/perl/taint.c | |
1396 | \& FINIS | |
1397 | \& $quote =~ s/\es+--/\en--/; | |
1398 | .Ve | |
1399 | .PP | |
1400 | A nice general-purpose fixer-upper function for indented here documents | |
1401 | follows. It expects to be called with a here document as its argument. | |
1402 | It looks to see whether each line begins with a common substring, and | |
1403 | if so, strips that substring off. Otherwise, it takes the amount of leading | |
1404 | whitespace found on the first line and removes that much off each | |
1405 | subsequent line. | |
1406 | .PP | |
1407 | .Vb 11 | |
1408 | \& sub fix { | |
1409 | \& local $_ = shift; | |
1410 | \& my ($white, $leader); # common whitespace and common leading string | |
1411 | \& if (/^\es*(?:([^\ew\es]+)(\es*).*\en)(?:\es*\e1\e2?.*\en)+$/) { | |
1412 | \& ($white, $leader) = ($2, quotemeta($1)); | |
1413 | \& } else { | |
1414 | \& ($white, $leader) = (/^(\es+)/, ''); | |
1415 | \& } | |
1416 | \& s/^\es*?$leader(?:$white)?//gm; | |
1417 | \& return $_; | |
1418 | \& } | |
1419 | .Ve | |
1420 | .PP | |
1421 | This works with leading special strings, dynamically determined: | |
1422 | .PP | |
1423 | .Vb 10 | |
1424 | \& $remember_the_main = fix<<' MAIN_INTERPRETER_LOOP'; | |
1425 | \& @@@ int | |
1426 | \& @@@ runops() { | |
1427 | \& @@@ SAVEI32(runlevel); | |
1428 | \& @@@ runlevel++; | |
1429 | \& @@@ while ( op = (*op->op_ppaddr)() ); | |
1430 | \& @@@ TAINT_NOT; | |
1431 | \& @@@ return 0; | |
1432 | \& @@@ } | |
1433 | \& MAIN_INTERPRETER_LOOP | |
1434 | .Ve | |
1435 | .PP | |
1436 | Or with a fixed amount of leading whitespace, with remaining | |
1437 | indentation correctly preserved: | |
1438 | .PP | |
1439 | .Vb 9 | |
1440 | \& $poem = fix<<EVER_ON_AND_ON; | |
1441 | \& Now far ahead the Road has gone, | |
1442 | \& And I must follow, if I can, | |
1443 | \& Pursuing it with eager feet, | |
1444 | \& Until it joins some larger way | |
1445 | \& Where many paths and errands meet. | |
1446 | \& And whither then? I cannot say. | |
1447 | \& --Bilbo in /usr/src/perl/pp_ctl.c | |
1448 | \& EVER_ON_AND_ON | |
1449 | .Ve | |
1450 | .SH "Data: Arrays" | |
1451 | .IX Header "Data: Arrays" | |
1452 | .Sh "What is the difference between a list and an array?" | |
1453 | .IX Subsection "What is the difference between a list and an array?" | |
1454 | An array has a changeable length. A list does not. An array is something | |
1455 | you can push or pop, while a list is a set of values. Some people make | |
1456 | the distinction that a list is a value while an array is a variable. | |
1457 | Subroutines are passed and return lists, you put things into list | |
1458 | context, you initialize arrays with lists, and you \fIforeach()\fR across | |
1459 | a list. \f(CW\*(C`@\*(C'\fR variables are arrays, anonymous arrays are arrays, arrays | |
1460 | in scalar context behave like the number of elements in them, subroutines | |
1461 | access their arguments through the array \f(CW@_\fR, and push/pop/shift only work | |
1462 | on arrays. | |
1463 | .PP | |
1464 | As a side note, there's no such thing as a list in scalar context. | |
1465 | When you say | |
1466 | .PP | |
1467 | .Vb 1 | |
1468 | \& $scalar = (2, 5, 7, 9); | |
1469 | .Ve | |
1470 | .PP | |
1471 | you're using the comma operator in scalar context, so it uses the scalar | |
1472 | comma operator. There never was a list there at all! This causes the | |
1473 | last value to be returned: 9. | |
1474 | .ie n .Sh "What is the difference between $array\fP[1] and \f(CW@array[1]?" | |
1475 | .el .Sh "What is the difference between \f(CW$array\fP[1] and \f(CW@array\fP[1]?" | |
1476 | .IX Subsection "What is the difference between $array[1] and @array[1]?" | |
1477 | The former is a scalar value; the latter an array slice, making | |
1478 | it a list with one (scalar) value. You should use $ when you want a | |
1479 | scalar value (most of the time) and @ when you want a list with one | |
1480 | scalar value in it (very, very rarely; nearly never, in fact). | |
1481 | .PP | |
1482 | Sometimes it doesn't make a difference, but sometimes it does. | |
1483 | For example, compare: | |
1484 | .PP | |
1485 | .Vb 1 | |
1486 | \& $good[0] = `some program that outputs several lines`; | |
1487 | .Ve | |
1488 | .PP | |
1489 | with | |
1490 | .PP | |
1491 | .Vb 1 | |
1492 | \& @bad[0] = `same program that outputs several lines`; | |
1493 | .Ve | |
1494 | .PP | |
1495 | The \f(CW\*(C`use warnings\*(C'\fR pragma and the \fB\-w\fR flag will warn you about these | |
1496 | matters. | |
1497 | .Sh "How can I remove duplicate elements from a list or array?" | |
1498 | .IX Subsection "How can I remove duplicate elements from a list or array?" | |
1499 | (contributed by brian d foy) | |
1500 | .PP | |
1501 | Use a hash. When you think the words \*(L"unique\*(R" or \*(L"duplicated\*(R", think | |
1502 | \&\*(L"hash keys\*(R". | |
1503 | .PP | |
1504 | If you don't care about the order of the elements, you could just | |
1505 | create the hash then extract the keys. It's not important how you | |
1506 | create that hash: just that you use \f(CW\*(C`keys\*(C'\fR to get the unique | |
1507 | elements. | |
1508 | .PP | |
1509 | .Vb 3 | |
1510 | \& my %hash = map { $_, 1 } @array; | |
1511 | \& # or a hash slice: @hash{ @array } = (); | |
1512 | \& # or a foreach: $hash{$_} = 1 foreach ( @array ); | |
1513 | .Ve | |
1514 | .PP | |
1515 | .Vb 1 | |
1516 | \& my @unique = keys %hash; | |
1517 | .Ve | |
1518 | .PP | |
1519 | You can also go through each element and skip the ones you've seen | |
1520 | before. Use a hash to keep track. The first time the loop sees an | |
1521 | element, that element has no key in \f(CW%Seen\fR. The \f(CW\*(C`next\*(C'\fR statement | |
1522 | creates the key and immediately uses its value, which is \f(CW\*(C`undef\*(C'\fR, so | |
1523 | the loop continues to the \f(CW\*(C`push\*(C'\fR and increments the value for that | |
1524 | key. The next time the loop sees that same element, its key exists in | |
1525 | the hash \fIand\fR the value for that key is true (since it's not 0 or | |
1526 | undef), so the next skips that iteration and the loop goes to the next | |
1527 | element. | |
1528 | .PP | |
1529 | .Vb 2 | |
1530 | \& my @unique = (); | |
1531 | \& my %seen = (); | |
1532 | .Ve | |
1533 | .PP | |
1534 | .Vb 5 | |
1535 | \& foreach my $elem ( @array ) | |
1536 | \& { | |
1537 | \& next if $seen{ $elem }++; | |
1538 | \& push @unique, $elem; | |
1539 | \& } | |
1540 | .Ve | |
1541 | .PP | |
1542 | You can write this more briefly using a grep, which does the | |
1543 | same thing. | |
1544 | .PP | |
1545 | .Vb 2 | |
1546 | \& my %seen = (); | |
1547 | \& my @unique = grep { ! $seen{ $_ }++ } @array; | |
1548 | .Ve | |
1549 | .Sh "How can I tell whether a certain element is contained in a list or array?" | |
1550 | .IX Subsection "How can I tell whether a certain element is contained in a list or array?" | |
1551 | (portions of this answer contributed by Anno Siegel) | |
1552 | .PP | |
1553 | Hearing the word \*(L"in\*(R" is an \fIin\fRdication that you probably should have | |
1554 | used a hash, not a list or array, to store your data. Hashes are | |
1555 | designed to answer this question quickly and efficiently. Arrays aren't. | |
1556 | .PP | |
1557 | That being said, there are several ways to approach this. If you | |
1558 | are going to make this query many times over arbitrary string values, | |
1559 | the fastest way is probably to invert the original array and maintain a | |
1560 | hash whose keys are the first array's values. | |
1561 | .PP | |
1562 | .Vb 3 | |
1563 | \& @blues = qw/azure cerulean teal turquoise lapis-lazuli/; | |
1564 | \& %is_blue = (); | |
1565 | \& for (@blues) { $is_blue{$_} = 1 } | |
1566 | .Ve | |
1567 | .PP | |
1568 | Now you can check whether \f(CW$is_blue\fR{$some_color}. It might have been a | |
1569 | good idea to keep the blues all in a hash in the first place. | |
1570 | .PP | |
1571 | If the values are all small integers, you could use a simple indexed | |
1572 | array. This kind of an array will take up less space: | |
1573 | .PP | |
1574 | .Vb 4 | |
1575 | \& @primes = (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31); | |
1576 | \& @is_tiny_prime = (); | |
1577 | \& for (@primes) { $is_tiny_prime[$_] = 1 } | |
1578 | \& # or simply @istiny_prime[@primes] = (1) x @primes; | |
1579 | .Ve | |
1580 | .PP | |
1581 | Now you check whether \f(CW$is_tiny_prime\fR[$some_number]. | |
1582 | .PP | |
1583 | If the values in question are integers instead of strings, you can save | |
1584 | quite a lot of space by using bit strings instead: | |
1585 | .PP | |
1586 | .Vb 3 | |
1587 | \& @articles = ( 1..10, 150..2000, 2017 ); | |
1588 | \& undef $read; | |
1589 | \& for (@articles) { vec($read,$_,1) = 1 } | |
1590 | .Ve | |
1591 | .PP | |
1592 | Now check whether \f(CW\*(C`vec($read,$n,1)\*(C'\fR is true for some \f(CW$n\fR. | |
1593 | .PP | |
1594 | These methods guarantee fast individual tests but require a re-organization | |
1595 | of the original list or array. They only pay off if you have to test | |
1596 | multiple values against the same array. | |
1597 | .PP | |
1598 | If you are testing only once, the standard module List::Util exports | |
1599 | the function \f(CW\*(C`first\*(C'\fR for this purpose. It works by stopping once it | |
1600 | finds the element. It's written in C for speed, and its Perl equivalant | |
1601 | looks like this subroutine: | |
1602 | .PP | |
1603 | .Vb 7 | |
1604 | \& sub first (&@) { | |
1605 | \& my $code = shift; | |
1606 | \& foreach (@_) { | |
1607 | \& return $_ if &{$code}(); | |
1608 | \& } | |
1609 | \& undef; | |
1610 | \& } | |
1611 | .Ve | |
1612 | .PP | |
1613 | If speed is of little concern, the common idiom uses grep in scalar context | |
1614 | (which returns the number of items that passed its condition) to traverse the | |
1615 | entire list. This does have the benefit of telling you how many matches it | |
1616 | found, though. | |
1617 | .PP | |
1618 | .Vb 1 | |
1619 | \& my $is_there = grep $_ eq $whatever, @array; | |
1620 | .Ve | |
1621 | .PP | |
1622 | If you want to actually extract the matching elements, simply use grep in | |
1623 | list context. | |
1624 | .PP | |
1625 | .Vb 1 | |
1626 | \& my @matches = grep $_ eq $whatever, @array; | |
1627 | .Ve | |
1628 | .Sh "How do I compute the difference of two arrays? How do I compute the intersection of two arrays?" | |
1629 | .IX Subsection "How do I compute the difference of two arrays? How do I compute the intersection of two arrays?" | |
1630 | Use a hash. Here's code to do both and more. It assumes that | |
1631 | each element is unique in a given array: | |
1632 | .PP | |
1633 | .Vb 7 | |
1634 | \& @union = @intersection = @difference = (); | |
1635 | \& %count = (); | |
1636 | \& foreach $element (@array1, @array2) { $count{$element}++ } | |
1637 | \& foreach $element (keys %count) { | |
1638 | \& push @union, $element; | |
1639 | \& push @{ $count{$element} > 1 ? \e@intersection : \e@difference }, $element; | |
1640 | \& } | |
1641 | .Ve | |
1642 | .PP | |
1643 | Note that this is the \fIsymmetric difference\fR, that is, all elements in | |
1644 | either A or in B but not in both. Think of it as an xor operation. | |
1645 | .Sh "How do I test whether two arrays or hashes are equal?" | |
1646 | .IX Subsection "How do I test whether two arrays or hashes are equal?" | |
1647 | The following code works for single-level arrays. It uses a stringwise | |
1648 | comparison, and does not distinguish defined versus undefined empty | |
1649 | strings. Modify if you have other needs. | |
1650 | .PP | |
1651 | .Vb 1 | |
1652 | \& $are_equal = compare_arrays(\e@frogs, \e@toads); | |
1653 | .Ve | |
1654 | .PP | |
1655 | .Vb 9 | |
1656 | \& sub compare_arrays { | |
1657 | \& my ($first, $second) = @_; | |
1658 | \& no warnings; # silence spurious -w undef complaints | |
1659 | \& return 0 unless @$first == @$second; | |
1660 | \& for (my $i = 0; $i < @$first; $i++) { | |
1661 | \& return 0 if $first->[$i] ne $second->[$i]; | |
1662 | \& } | |
1663 | \& return 1; | |
1664 | \& } | |
1665 | .Ve | |
1666 | .PP | |
1667 | For multilevel structures, you may wish to use an approach more | |
1668 | like this one. It uses the \s-1CPAN\s0 module FreezeThaw: | |
1669 | .PP | |
1670 | .Vb 2 | |
1671 | \& use FreezeThaw qw(cmpStr); | |
1672 | \& @a = @b = ( "this", "that", [ "more", "stuff" ] ); | |
1673 | .Ve | |
1674 | .PP | |
1675 | .Vb 4 | |
1676 | \& printf "a and b contain %s arrays\en", | |
1677 | \& cmpStr(\e@a, \e@b) == 0 | |
1678 | \& ? "the same" | |
1679 | \& : "different"; | |
1680 | .Ve | |
1681 | .PP | |
1682 | This approach also works for comparing hashes. Here | |
1683 | we'll demonstrate two different answers: | |
1684 | .PP | |
1685 | .Vb 1 | |
1686 | \& use FreezeThaw qw(cmpStr cmpStrHard); | |
1687 | .Ve | |
1688 | .PP | |
1689 | .Vb 3 | |
1690 | \& %a = %b = ( "this" => "that", "extra" => [ "more", "stuff" ] ); | |
1691 | \& $a{EXTRA} = \e%b; | |
1692 | \& $b{EXTRA} = \e%a; | |
1693 | .Ve | |
1694 | .PP | |
1695 | .Vb 2 | |
1696 | \& printf "a and b contain %s hashes\en", | |
1697 | \& cmpStr(\e%a, \e%b) == 0 ? "the same" : "different"; | |
1698 | .Ve | |
1699 | .PP | |
1700 | .Vb 2 | |
1701 | \& printf "a and b contain %s hashes\en", | |
1702 | \& cmpStrHard(\e%a, \e%b) == 0 ? "the same" : "different"; | |
1703 | .Ve | |
1704 | .PP | |
1705 | The first reports that both those the hashes contain the same data, | |
1706 | while the second reports that they do not. Which you prefer is left as | |
1707 | an exercise to the reader. | |
1708 | .Sh "How do I find the first array element for which a condition is true?" | |
1709 | .IX Subsection "How do I find the first array element for which a condition is true?" | |
1710 | To find the first array element which satisfies a condition, you can | |
1711 | use the \fIfirst()\fR function in the List::Util module, which comes with | |
1712 | Perl 5.8. This example finds the first element that contains \*(L"Perl\*(R". | |
1713 | .PP | |
1714 | .Vb 1 | |
1715 | \& use List::Util qw(first); | |
1716 | .Ve | |
1717 | .PP | |
1718 | .Vb 1 | |
1719 | \& my $element = first { /Perl/ } @array; | |
1720 | .Ve | |
1721 | .PP | |
1722 | If you cannot use List::Util, you can make your own loop to do the | |
1723 | same thing. Once you find the element, you stop the loop with last. | |
1724 | .PP | |
1725 | .Vb 5 | |
1726 | \& my $found; | |
1727 | \& foreach ( @array ) | |
1728 | \& { | |
1729 | \& if( /Perl/ ) { $found = $_; last } | |
1730 | \& } | |
1731 | .Ve | |
1732 | .PP | |
1733 | If you want the array index, you can iterate through the indices | |
1734 | and check the array element at each index until you find one | |
1735 | that satisfies the condition. | |
1736 | .PP | |
1737 | .Vb 10 | |
1738 | \& my( $found, $index ) = ( undef, -1 ); | |
1739 | \& for( $i = 0; $i < @array; $i++ ) | |
1740 | \& { | |
1741 | \& if( $array[$i] =~ /Perl/ ) | |
1742 | \& { | |
1743 | \& $found = $array[$i]; | |
1744 | \& $index = $i; | |
1745 | \& last; | |
1746 | \& } | |
1747 | \& } | |
1748 | .Ve | |
1749 | .Sh "How do I handle linked lists?" | |
1750 | .IX Subsection "How do I handle linked lists?" | |
1751 | In general, you usually don't need a linked list in Perl, since with | |
1752 | regular arrays, you can push and pop or shift and unshift at either end, | |
1753 | or you can use splice to add and/or remove arbitrary number of elements at | |
1754 | arbitrary points. Both pop and shift are both O(1) operations on Perl's | |
1755 | dynamic arrays. In the absence of shifts and pops, push in general | |
1756 | needs to reallocate on the order every log(N) times, and unshift will | |
1757 | need to copy pointers each time. | |
1758 | .PP | |
1759 | If you really, really wanted, you could use structures as described in | |
1760 | perldsc or perltoot and do just what the algorithm book tells you | |
1761 | to do. For example, imagine a list node like this: | |
1762 | .PP | |
1763 | .Vb 4 | |
1764 | \& $node = { | |
1765 | \& VALUE => 42, | |
1766 | \& LINK => undef, | |
1767 | \& }; | |
1768 | .Ve | |
1769 | .PP | |
1770 | You could walk the list this way: | |
1771 | .PP | |
1772 | .Vb 5 | |
1773 | \& print "List: "; | |
1774 | \& for ($node = $head; $node; $node = $node->{LINK}) { | |
1775 | \& print $node->{VALUE}, " "; | |
1776 | \& } | |
1777 | \& print "\en"; | |
1778 | .Ve | |
1779 | .PP | |
1780 | You could add to the list this way: | |
1781 | .PP | |
1782 | .Vb 5 | |
1783 | \& my ($head, $tail); | |
1784 | \& $tail = append($head, 1); # grow a new head | |
1785 | \& for $value ( 2 .. 10 ) { | |
1786 | \& $tail = append($tail, $value); | |
1787 | \& } | |
1788 | .Ve | |
1789 | .PP | |
1790 | .Vb 11 | |
1791 | \& sub append { | |
1792 | \& my($list, $value) = @_; | |
1793 | \& my $node = { VALUE => $value }; | |
1794 | \& if ($list) { | |
1795 | \& $node->{LINK} = $list->{LINK}; | |
1796 | \& $list->{LINK} = $node; | |
1797 | \& } else { | |
1798 | \& $_[0] = $node; # replace caller's version | |
1799 | \& } | |
1800 | \& return $node; | |
1801 | \& } | |
1802 | .Ve | |
1803 | .PP | |
1804 | But again, Perl's built-in are virtually always good enough. | |
1805 | .Sh "How do I handle circular lists?" | |
1806 | .IX Subsection "How do I handle circular lists?" | |
1807 | Circular lists could be handled in the traditional fashion with linked | |
1808 | lists, or you could just do something like this with an array: | |
1809 | .PP | |
1810 | .Vb 2 | |
1811 | \& unshift(@array, pop(@array)); # the last shall be first | |
1812 | \& push(@array, shift(@array)); # and vice versa | |
1813 | .Ve | |
1814 | .Sh "How do I shuffle an array randomly?" | |
1815 | .IX Subsection "How do I shuffle an array randomly?" | |
1816 | If you either have Perl 5.8.0 or later installed, or if you have | |
1817 | Scalar-List-Utils 1.03 or later installed, you can say: | |
1818 | .PP | |
1819 | .Vb 1 | |
1820 | \& use List::Util 'shuffle'; | |
1821 | .Ve | |
1822 | .PP | |
1823 | .Vb 1 | |
1824 | \& @shuffled = shuffle(@list); | |
1825 | .Ve | |
1826 | .PP | |
1827 | If not, you can use a Fisher-Yates shuffle. | |
1828 | .PP | |
1829 | .Vb 8 | |
1830 | \& sub fisher_yates_shuffle { | |
1831 | \& my $deck = shift; # $deck is a reference to an array | |
1832 | \& my $i = @$deck; | |
1833 | \& while (--$i) { | |
1834 | \& my $j = int rand ($i+1); | |
1835 | \& @$deck[$i,$j] = @$deck[$j,$i]; | |
1836 | \& } | |
1837 | \& } | |
1838 | .Ve | |
1839 | .PP | |
1840 | .Vb 5 | |
1841 | \& # shuffle my mpeg collection | |
1842 | \& # | |
1843 | \& my @mpeg = <audio/*/*.mp3>; | |
1844 | \& fisher_yates_shuffle( \e@mpeg ); # randomize @mpeg in place | |
1845 | \& print @mpeg; | |
1846 | .Ve | |
1847 | .PP | |
1848 | Note that the above implementation shuffles an array in place, | |
1849 | unlike the \fIList::Util::shuffle()\fR which takes a list and returns | |
1850 | a new shuffled list. | |
1851 | .PP | |
1852 | You've probably seen shuffling algorithms that work using splice, | |
1853 | randomly picking another element to swap the current element with | |
1854 | .PP | |
1855 | .Vb 6 | |
1856 | \& srand; | |
1857 | \& @new = (); | |
1858 | \& @old = 1 .. 10; # just a demo | |
1859 | \& while (@old) { | |
1860 | \& push(@new, splice(@old, rand @old, 1)); | |
1861 | \& } | |
1862 | .Ve | |
1863 | .PP | |
1864 | This is bad because splice is already O(N), and since you do it N times, | |
1865 | you just invented a quadratic algorithm; that is, O(N**2). This does | |
1866 | not scale, although Perl is so efficient that you probably won't notice | |
1867 | this until you have rather largish arrays. | |
1868 | .Sh "How do I process/modify each element of an array?" | |
1869 | .IX Subsection "How do I process/modify each element of an array?" | |
1870 | Use \f(CW\*(C`for\*(C'\fR/\f(CW\*(C`foreach\*(C'\fR: | |
1871 | .PP | |
1872 | .Vb 4 | |
1873 | \& for (@lines) { | |
1874 | \& s/foo/bar/; # change that word | |
1875 | \& tr/XZ/ZX/; # swap those letters | |
1876 | \& } | |
1877 | .Ve | |
1878 | .PP | |
1879 | Here's another; let's compute spherical volumes: | |
1880 | .PP | |
1881 | .Vb 4 | |
1882 | \& for (@volumes = @radii) { # @volumes has changed parts | |
1883 | \& $_ **= 3; | |
1884 | \& $_ *= (4/3) * 3.14159; # this will be constant folded | |
1885 | \& } | |
1886 | .Ve | |
1887 | .PP | |
1888 | which can also be done with \fImap()\fR which is made to transform | |
1889 | one list into another: | |
1890 | .PP | |
1891 | .Vb 1 | |
1892 | \& @volumes = map {$_ ** 3 * (4/3) * 3.14159} @radii; | |
1893 | .Ve | |
1894 | .PP | |
1895 | If you want to do the same thing to modify the values of the | |
1896 | hash, you can use the \f(CW\*(C`values\*(C'\fR function. As of Perl 5.6 | |
1897 | the values are not copied, so if you modify \f(CW$orbit\fR (in this | |
1898 | case), you modify the value. | |
1899 | .PP | |
1900 | .Vb 3 | |
1901 | \& for $orbit ( values %orbits ) { | |
1902 | \& ($orbit **= 3) *= (4/3) * 3.14159; | |
1903 | \& } | |
1904 | .Ve | |
1905 | .PP | |
1906 | Prior to perl 5.6 \f(CW\*(C`values\*(C'\fR returned copies of the values, | |
1907 | so older perl code often contains constructions such as | |
1908 | \&\f(CW@orbits{keys %orbits}\fR instead of \f(CW\*(C`values %orbits\*(C'\fR where | |
1909 | the hash is to be modified. | |
1910 | .Sh "How do I select a random element from an array?" | |
1911 | .IX Subsection "How do I select a random element from an array?" | |
1912 | Use the \fIrand()\fR function (see \*(L"rand\*(R" in perlfunc): | |
1913 | .PP | |
1914 | .Vb 2 | |
1915 | \& $index = rand @array; | |
1916 | \& $element = $array[$index]; | |
1917 | .Ve | |
1918 | .PP | |
1919 | Or, simply: | |
1920 | my \f(CW$element\fR = \f(CW$array\fR[ rand \f(CW@array\fR ]; | |
1921 | .Sh "How do I permute N elements of a list?" | |
1922 | .IX Subsection "How do I permute N elements of a list?" | |
1923 | Use the List::Permutor module on \s-1CPAN\s0. If the list is | |
1924 | actually an array, try the Algorithm::Permute module (also | |
1925 | on \s-1CPAN\s0). It's written in \s-1XS\s0 code and is very efficient. | |
1926 | .PP | |
1927 | .Vb 6 | |
1928 | \& use Algorithm::Permute; | |
1929 | \& my @array = 'a'..'d'; | |
1930 | \& my $p_iterator = Algorithm::Permute->new ( \e@array ); | |
1931 | \& while (my @perm = $p_iterator->next) { | |
1932 | \& print "next permutation: (@perm)\en"; | |
1933 | \& } | |
1934 | .Ve | |
1935 | .PP | |
1936 | For even faster execution, you could do: | |
1937 | .PP | |
1938 | .Vb 5 | |
1939 | \& use Algorithm::Permute; | |
1940 | \& my @array = 'a'..'d'; | |
1941 | \& Algorithm::Permute::permute { | |
1942 | \& print "next permutation: (@array)\en"; | |
1943 | \& } @array; | |
1944 | .Ve | |
1945 | .PP | |
1946 | Here's a little program that generates all permutations of | |
1947 | all the words on each line of input. The algorithm embodied | |
1948 | in the \fIpermute()\fR function is discussed in Volume 4 (still | |
1949 | unpublished) of Knuth's \fIThe Art of Computer Programming\fR | |
1950 | and will work on any list: | |
1951 | .PP | |
1952 | .Vb 2 | |
1953 | \& #!/usr/bin/perl -n | |
1954 | \& # Fischer-Kause ordered permutation generator | |
1955 | .Ve | |
1956 | .PP | |
1957 | .Vb 12 | |
1958 | \& sub permute (&@) { | |
1959 | \& my $code = shift; | |
1960 | \& my @idx = 0..$#_; | |
1961 | \& while ( $code->(@_[@idx]) ) { | |
1962 | \& my $p = $#idx; | |
1963 | \& --$p while $idx[$p-1] > $idx[$p]; | |
1964 | \& my $q = $p or return; | |
1965 | \& push @idx, reverse splice @idx, $p; | |
1966 | \& ++$q while $idx[$p-1] > $idx[$q]; | |
1967 | \& @idx[$p-1,$q]=@idx[$q,$p-1]; | |
1968 | \& } | |
1969 | \& } | |
1970 | .Ve | |
1971 | .PP | |
1972 | .Vb 1 | |
1973 | \& permute {print"@_\en"} split; | |
1974 | .Ve | |
1975 | .Sh "How do I sort an array by (anything)?" | |
1976 | .IX Subsection "How do I sort an array by (anything)?" | |
1977 | Supply a comparison function to \fIsort()\fR (described in \*(L"sort\*(R" in perlfunc): | |
1978 | .PP | |
1979 | .Vb 1 | |
1980 | \& @list = sort { $a <=> $b } @list; | |
1981 | .Ve | |
1982 | .PP | |
1983 | The default sort function is cmp, string comparison, which would | |
1984 | sort \f(CW\*(C`(1, 2, 10)\*(C'\fR into \f(CW\*(C`(1, 10, 2)\*(C'\fR. \f(CW\*(C`<=>\*(C'\fR, used above, is | |
1985 | the numerical comparison operator. | |
1986 | .PP | |
1987 | If you have a complicated function needed to pull out the part you | |
1988 | want to sort on, then don't do it inside the sort function. Pull it | |
1989 | out first, because the sort \s-1BLOCK\s0 can be called many times for the | |
1990 | same element. Here's an example of how to pull out the first word | |
1991 | after the first number on each item, and then sort those words | |
1992 | case\-insensitively. | |
1993 | .PP | |
1994 | .Vb 6 | |
1995 | \& @idx = (); | |
1996 | \& for (@data) { | |
1997 | \& ($item) = /\ed+\es*(\eS+)/; | |
1998 | \& push @idx, uc($item); | |
1999 | \& } | |
2000 | \& @sorted = @data[ sort { $idx[$a] cmp $idx[$b] } 0 .. $#idx ]; | |
2001 | .Ve | |
2002 | .PP | |
2003 | which could also be written this way, using a trick | |
2004 | that's come to be known as the Schwartzian Transform: | |
2005 | .PP | |
2006 | .Vb 3 | |
2007 | \& @sorted = map { $_->[0] } | |
2008 | \& sort { $a->[1] cmp $b->[1] } | |
2009 | \& map { [ $_, uc( (/\ed+\es*(\eS+)/)[0]) ] } @data; | |
2010 | .Ve | |
2011 | .PP | |
2012 | If you need to sort on several fields, the following paradigm is useful. | |
2013 | .PP | |
2014 | .Vb 4 | |
2015 | \& @sorted = sort { field1($a) <=> field1($b) || | |
2016 | \& field2($a) cmp field2($b) || | |
2017 | \& field3($a) cmp field3($b) | |
2018 | \& } @data; | |
2019 | .Ve | |
2020 | .PP | |
2021 | This can be conveniently combined with precalculation of keys as given | |
2022 | above. | |
2023 | .PP | |
2024 | See the \fIsort\fR article in the \*(L"Far More Than You Ever Wanted | |
2025 | To Know\*(R" collection in http://www.cpan.org/misc/olddoc/FMTEYEWTK.tgz for | |
2026 | more about this approach. | |
2027 | .PP | |
2028 | See also the question below on sorting hashes. | |
2029 | .Sh "How do I manipulate arrays of bits?" | |
2030 | .IX Subsection "How do I manipulate arrays of bits?" | |
2031 | Use \fIpack()\fR and \fIunpack()\fR, or else \fIvec()\fR and the bitwise operations. | |
2032 | .PP | |
2033 | For example, this sets \f(CW$vec\fR to have bit N set if \f(CW$ints\fR[N] was set: | |
2034 | .PP | |
2035 | .Vb 2 | |
2036 | \& $vec = ''; | |
2037 | \& foreach(@ints) { vec($vec,$_,1) = 1 } | |
2038 | .Ve | |
2039 | .PP | |
2040 | Here's how, given a vector in \f(CW$vec\fR, you can | |
2041 | get those bits into your \f(CW@ints\fR array: | |
2042 | .PP | |
2043 | .Vb 28 | |
2044 | \& sub bitvec_to_list { | |
2045 | \& my $vec = shift; | |
2046 | \& my @ints; | |
2047 | \& # Find null-byte density then select best algorithm | |
2048 | \& if ($vec =~ tr/\e0// / length $vec > 0.95) { | |
2049 | \& use integer; | |
2050 | \& my $i; | |
2051 | \& # This method is faster with mostly null-bytes | |
2052 | \& while($vec =~ /[^\e0]/g ) { | |
2053 | \& $i = -9 + 8 * pos $vec; | |
2054 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2055 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2056 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2057 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2058 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2059 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2060 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2061 | \& push @ints, $i if vec($vec, ++$i, 1); | |
2062 | \& } | |
2063 | \& } else { | |
2064 | \& # This method is a fast general algorithm | |
2065 | \& use integer; | |
2066 | \& my $bits = unpack "b*", $vec; | |
2067 | \& push @ints, 0 if $bits =~ s/^(\ed)// && $1; | |
2068 | \& push @ints, pos $bits while($bits =~ /1/g); | |
2069 | \& } | |
2070 | \& return \e@ints; | |
2071 | \& } | |
2072 | .Ve | |
2073 | .PP | |
2074 | This method gets faster the more sparse the bit vector is. | |
2075 | (Courtesy of Tim Bunce and Winfried Koenig.) | |
2076 | .PP | |
2077 | You can make the while loop a lot shorter with this suggestion | |
2078 | from Benjamin Goldberg: | |
2079 | .PP | |
2080 | .Vb 3 | |
2081 | \& while($vec =~ /[^\e0]+/g ) { | |
2082 | \& push @ints, grep vec($vec, $_, 1), $-[0] * 8 .. $+[0] * 8; | |
2083 | \& } | |
2084 | .Ve | |
2085 | .PP | |
2086 | Or use the \s-1CPAN\s0 module Bit::Vector: | |
2087 | .PP | |
2088 | .Vb 3 | |
2089 | \& $vector = Bit::Vector->new($num_of_bits); | |
2090 | \& $vector->Index_List_Store(@ints); | |
2091 | \& @ints = $vector->Index_List_Read(); | |
2092 | .Ve | |
2093 | .PP | |
2094 | Bit::Vector provides efficient methods for bit vector, sets of small integers | |
2095 | and \*(L"big int\*(R" math. | |
2096 | .PP | |
2097 | Here's a more extensive illustration using \fIvec()\fR: | |
2098 | .PP | |
2099 | .Vb 7 | |
2100 | \& # vec demo | |
2101 | \& $vector = "\exff\ex0f\exef\exfe"; | |
2102 | \& print "Ilya's string \e\exff\e\ex0f\e\exef\e\exfe represents the number ", | |
2103 | \& unpack("N", $vector), "\en"; | |
2104 | \& $is_set = vec($vector, 23, 1); | |
2105 | \& print "Its 23rd bit is ", $is_set ? "set" : "clear", ".\en"; | |
2106 | \& pvec($vector); | |
2107 | .Ve | |
2108 | .PP | |
2109 | .Vb 3 | |
2110 | \& set_vec(1,1,1); | |
2111 | \& set_vec(3,1,1); | |
2112 | \& set_vec(23,1,1); | |
2113 | .Ve | |
2114 | .PP | |
2115 | .Vb 6 | |
2116 | \& set_vec(3,1,3); | |
2117 | \& set_vec(3,2,3); | |
2118 | \& set_vec(3,4,3); | |
2119 | \& set_vec(3,4,7); | |
2120 | \& set_vec(3,8,3); | |
2121 | \& set_vec(3,8,7); | |
2122 | .Ve | |
2123 | .PP | |
2124 | .Vb 2 | |
2125 | \& set_vec(0,32,17); | |
2126 | \& set_vec(1,32,17); | |
2127 | .Ve | |
2128 | .PP | |
2129 | .Vb 7 | |
2130 | \& sub set_vec { | |
2131 | \& my ($offset, $width, $value) = @_; | |
2132 | \& my $vector = ''; | |
2133 | \& vec($vector, $offset, $width) = $value; | |
2134 | \& print "offset=$offset width=$width value=$value\en"; | |
2135 | \& pvec($vector); | |
2136 | \& } | |
2137 | .Ve | |
2138 | .PP | |
2139 | .Vb 5 | |
2140 | \& sub pvec { | |
2141 | \& my $vector = shift; | |
2142 | \& my $bits = unpack("b*", $vector); | |
2143 | \& my $i = 0; | |
2144 | \& my $BASE = 8; | |
2145 | .Ve | |
2146 | .PP | |
2147 | .Vb 4 | |
2148 | \& print "vector length in bytes: ", length($vector), "\en"; | |
2149 | \& @bytes = unpack("A8" x length($vector), $bits); | |
2150 | \& print "bits are: @bytes\en\en"; | |
2151 | \& } | |
2152 | .Ve | |
2153 | .Sh "Why does \fIdefined()\fP return true on empty arrays and hashes?" | |
2154 | .IX Subsection "Why does defined() return true on empty arrays and hashes?" | |
2155 | The short story is that you should probably only use defined on scalars or | |
2156 | functions, not on aggregates (arrays and hashes). See \*(L"defined\*(R" in perlfunc | |
2157 | in the 5.004 release or later of Perl for more detail. | |
2158 | .SH "Data: Hashes (Associative Arrays)" | |
2159 | .IX Header "Data: Hashes (Associative Arrays)" | |
2160 | .Sh "How do I process an entire hash?" | |
2161 | .IX Subsection "How do I process an entire hash?" | |
2162 | Use the \fIeach()\fR function (see \*(L"each\*(R" in perlfunc) if you don't care | |
2163 | whether it's sorted: | |
2164 | .PP | |
2165 | .Vb 3 | |
2166 | \& while ( ($key, $value) = each %hash) { | |
2167 | \& print "$key = $value\en"; | |
2168 | \& } | |
2169 | .Ve | |
2170 | .PP | |
2171 | If you want it sorted, you'll have to use \fIforeach()\fR on the result of | |
2172 | sorting the keys as shown in an earlier question. | |
2173 | .Sh "What happens if I add or remove keys from a hash while iterating over it?" | |
2174 | .IX Subsection "What happens if I add or remove keys from a hash while iterating over it?" | |
2175 | (contributed by brian d foy) | |
2176 | .PP | |
2177 | The easy answer is \*(L"Don't do that!\*(R" | |
2178 | .PP | |
2179 | If you iterate through the hash with \fIeach()\fR, you can delete the key | |
2180 | most recently returned without worrying about it. If you delete or add | |
2181 | other keys, the iterator may skip or double up on them since perl | |
2182 | may rearrange the hash table. See the | |
2183 | entry for \f(CW\*(C`each()\*(C'\fR in perlfunc. | |
2184 | .Sh "How do I look up a hash element by value?" | |
2185 | .IX Subsection "How do I look up a hash element by value?" | |
2186 | Create a reverse hash: | |
2187 | .PP | |
2188 | .Vb 2 | |
2189 | \& %by_value = reverse %by_key; | |
2190 | \& $key = $by_value{$value}; | |
2191 | .Ve | |
2192 | .PP | |
2193 | That's not particularly efficient. It would be more space-efficient | |
2194 | to use: | |
2195 | .PP | |
2196 | .Vb 3 | |
2197 | \& while (($key, $value) = each %by_key) { | |
2198 | \& $by_value{$value} = $key; | |
2199 | \& } | |
2200 | .Ve | |
2201 | .PP | |
2202 | If your hash could have repeated values, the methods above will only find | |
2203 | one of the associated keys. This may or may not worry you. If it does | |
2204 | worry you, you can always reverse the hash into a hash of arrays instead: | |
2205 | .PP | |
2206 | .Vb 3 | |
2207 | \& while (($key, $value) = each %by_key) { | |
2208 | \& push @{$key_list_by_value{$value}}, $key; | |
2209 | \& } | |
2210 | .Ve | |
2211 | .Sh "How can I know how many entries are in a hash?" | |
2212 | .IX Subsection "How can I know how many entries are in a hash?" | |
2213 | If you mean how many keys, then all you have to do is | |
2214 | use the \fIkeys()\fR function in a scalar context: | |
2215 | .PP | |
2216 | .Vb 1 | |
2217 | \& $num_keys = keys %hash; | |
2218 | .Ve | |
2219 | .PP | |
2220 | The \fIkeys()\fR function also resets the iterator, which means that you may | |
2221 | see strange results if you use this between uses of other hash operators | |
2222 | such as \fIeach()\fR. | |
2223 | .Sh "How do I sort a hash (optionally by value instead of key)?" | |
2224 | .IX Subsection "How do I sort a hash (optionally by value instead of key)?" | |
2225 | (contributed by brian d foy) | |
2226 | .PP | |
2227 | To sort a hash, start with the keys. In this example, we give the list of | |
2228 | keys to the sort function which then compares them ASCIIbetically (which | |
2229 | might be affected by your locale settings). The output list has the keys | |
2230 | in ASCIIbetical order. Once we have the keys, we can go through them to | |
2231 | create a report which lists the keys in ASCIIbetical order. | |
2232 | .PP | |
2233 | .Vb 1 | |
2234 | \& my @keys = sort { $a cmp $b } keys %hash; | |
2235 | .Ve | |
2236 | .PP | |
2237 | .Vb 4 | |
2238 | \& foreach my $key ( @keys ) | |
2239 | \& { | |
2240 | \& printf "%-20s %6d\en", $key, $hash{$value}; | |
2241 | \& } | |
2242 | .Ve | |
2243 | .PP | |
2244 | We could get more fancy in the \f(CW\*(C`sort()\*(C'\fR block though. Instead of | |
2245 | comparing the keys, we can compute a value with them and use that | |
2246 | value as the comparison. | |
2247 | .PP | |
2248 | For instance, to make our report order case\-insensitive, we use | |
2249 | the \f(CW\*(C`\eL\*(C'\fR sequence in a double-quoted string to make everything | |
2250 | lowercase. The \f(CW\*(C`sort()\*(C'\fR block then compares the lowercased | |
2251 | values to determine in which order to put the keys. | |
2252 | .PP | |
2253 | .Vb 1 | |
2254 | \& my @keys = sort { "\eL$a" cmp "\eL$b" } keys %hash; | |
2255 | .Ve | |
2256 | .PP | |
2257 | Note: if the computation is expensive or the hash has many elements, | |
2258 | you may want to look at the Schwartzian Transform to cache the | |
2259 | computation results. | |
2260 | .PP | |
2261 | If we want to sort by the hash value instead, we use the hash key | |
2262 | to look it up. We still get out a list of keys, but this time they | |
2263 | are ordered by their value. | |
2264 | .PP | |
2265 | .Vb 1 | |
2266 | \& my @keys = sort { $hash{$a} <=> $hash{$b} } keys %hash; | |
2267 | .Ve | |
2268 | .PP | |
2269 | From there we can get more complex. If the hash values are the same, | |
2270 | we can provide a secondary sort on the hash key. | |
2271 | .PP | |
2272 | .Vb 5 | |
2273 | \& my @keys = sort { | |
2274 | \& $hash{$a} <=> $hash{$b} | |
2275 | \& or | |
2276 | \& "\eL$a" cmp "\eL$b" | |
2277 | \& } keys %hash; | |
2278 | .Ve | |
2279 | .Sh "How can I always keep my hash sorted?" | |
2280 | .IX Subsection "How can I always keep my hash sorted?" | |
2281 | You can look into using the DB_File module and \fItie()\fR using the | |
2282 | \&\f(CW$DB_BTREE\fR hash bindings as documented in \*(L"In Memory Databases\*(R" in DB_File. | |
2283 | The Tie::IxHash module from \s-1CPAN\s0 might also be instructive. | |
2284 | .ie n .Sh "What's the difference between ""delete"" and ""undef"" with hashes?" | |
2285 | .el .Sh "What's the difference between ``delete'' and ``undef'' with hashes?" | |
2286 | .IX Subsection "What's the difference between delete and undef with hashes?" | |
2287 | Hashes contain pairs of scalars: the first is the key, the | |
2288 | second is the value. The key will be coerced to a string, | |
2289 | although the value can be any kind of scalar: string, | |
2290 | number, or reference. If a key \f(CW$key\fR is present in | |
2291 | \&\f(CW%hash\fR, \f(CW\*(C`exists($hash{$key})\*(C'\fR will return true. The value | |
2292 | for a given key can be \f(CW\*(C`undef\*(C'\fR, in which case | |
2293 | \&\f(CW$hash{$key}\fR will be \f(CW\*(C`undef\*(C'\fR while \f(CW\*(C`exists $hash{$key}\*(C'\fR | |
2294 | will return true. This corresponds to (\f(CW$key\fR, \f(CW\*(C`undef\*(C'\fR) | |
2295 | being in the hash. | |
2296 | .PP | |
2297 | Pictures help... here's the \f(CW%hash\fR table: | |
2298 | .PP | |
2299 | .Vb 7 | |
2300 | \& keys values | |
2301 | \& +------+------+ | |
2302 | \& | a | 3 | | |
2303 | \& | x | 7 | | |
2304 | \& | d | 0 | | |
2305 | \& | e | 2 | | |
2306 | \& +------+------+ | |
2307 | .Ve | |
2308 | .PP | |
2309 | And these conditions hold | |
2310 | .PP | |
2311 | .Vb 6 | |
2312 | \& $hash{'a'} is true | |
2313 | \& $hash{'d'} is false | |
2314 | \& defined $hash{'d'} is true | |
2315 | \& defined $hash{'a'} is true | |
2316 | \& exists $hash{'a'} is true (Perl5 only) | |
2317 | \& grep ($_ eq 'a', keys %hash) is true | |
2318 | .Ve | |
2319 | .PP | |
2320 | If you now say | |
2321 | .PP | |
2322 | .Vb 1 | |
2323 | \& undef $hash{'a'} | |
2324 | .Ve | |
2325 | .PP | |
2326 | your table now reads: | |
2327 | .PP | |
2328 | .Vb 7 | |
2329 | \& keys values | |
2330 | \& +------+------+ | |
2331 | \& | a | undef| | |
2332 | \& | x | 7 | | |
2333 | \& | d | 0 | | |
2334 | \& | e | 2 | | |
2335 | \& +------+------+ | |
2336 | .Ve | |
2337 | .PP | |
2338 | and these conditions now hold; changes in caps: | |
2339 | .PP | |
2340 | .Vb 6 | |
2341 | \& $hash{'a'} is FALSE | |
2342 | \& $hash{'d'} is false | |
2343 | \& defined $hash{'d'} is true | |
2344 | \& defined $hash{'a'} is FALSE | |
2345 | \& exists $hash{'a'} is true (Perl5 only) | |
2346 | \& grep ($_ eq 'a', keys %hash) is true | |
2347 | .Ve | |
2348 | .PP | |
2349 | Notice the last two: you have an undef value, but a defined key! | |
2350 | .PP | |
2351 | Now, consider this: | |
2352 | .PP | |
2353 | .Vb 1 | |
2354 | \& delete $hash{'a'} | |
2355 | .Ve | |
2356 | .PP | |
2357 | your table now reads: | |
2358 | .PP | |
2359 | .Vb 6 | |
2360 | \& keys values | |
2361 | \& +------+------+ | |
2362 | \& | x | 7 | | |
2363 | \& | d | 0 | | |
2364 | \& | e | 2 | | |
2365 | \& +------+------+ | |
2366 | .Ve | |
2367 | .PP | |
2368 | and these conditions now hold; changes in caps: | |
2369 | .PP | |
2370 | .Vb 6 | |
2371 | \& $hash{'a'} is false | |
2372 | \& $hash{'d'} is false | |
2373 | \& defined $hash{'d'} is true | |
2374 | \& defined $hash{'a'} is false | |
2375 | \& exists $hash{'a'} is FALSE (Perl5 only) | |
2376 | \& grep ($_ eq 'a', keys %hash) is FALSE | |
2377 | .Ve | |
2378 | .PP | |
2379 | See, the whole entry is gone! | |
2380 | .Sh "Why don't my tied hashes make the defined/exists distinction?" | |
2381 | .IX Subsection "Why don't my tied hashes make the defined/exists distinction?" | |
2382 | This depends on the tied hash's implementation of \s-1\fIEXISTS\s0()\fR. | |
2383 | For example, there isn't the concept of undef with hashes | |
2384 | that are tied to DBM* files. It also means that \fIexists()\fR and | |
2385 | \&\fIdefined()\fR do the same thing with a DBM* file, and what they | |
2386 | end up doing is not what they do with ordinary hashes. | |
2387 | .Sh "How do I reset an \fIeach()\fP operation part-way through?" | |
2388 | .IX Subsection "How do I reset an each() operation part-way through?" | |
2389 | Using \f(CW\*(C`keys %hash\*(C'\fR in scalar context returns the number of keys in | |
2390 | the hash \fIand\fR resets the iterator associated with the hash. You may | |
2391 | need to do this if you use \f(CW\*(C`last\*(C'\fR to exit a loop early so that when you | |
2392 | re-enter it, the hash iterator has been reset. | |
2393 | .Sh "How can I get the unique keys from two hashes?" | |
2394 | .IX Subsection "How can I get the unique keys from two hashes?" | |
2395 | First you extract the keys from the hashes into lists, then solve | |
2396 | the \*(L"removing duplicates\*(R" problem described above. For example: | |
2397 | .PP | |
2398 | .Vb 5 | |
2399 | \& %seen = (); | |
2400 | \& for $element (keys(%foo), keys(%bar)) { | |
2401 | \& $seen{$element}++; | |
2402 | \& } | |
2403 | \& @uniq = keys %seen; | |
2404 | .Ve | |
2405 | .PP | |
2406 | Or more succinctly: | |
2407 | .PP | |
2408 | .Vb 1 | |
2409 | \& @uniq = keys %{{%foo,%bar}}; | |
2410 | .Ve | |
2411 | .PP | |
2412 | Or if you really want to save space: | |
2413 | .PP | |
2414 | .Vb 8 | |
2415 | \& %seen = (); | |
2416 | \& while (defined ($key = each %foo)) { | |
2417 | \& $seen{$key}++; | |
2418 | \& } | |
2419 | \& while (defined ($key = each %bar)) { | |
2420 | \& $seen{$key}++; | |
2421 | \& } | |
2422 | \& @uniq = keys %seen; | |
2423 | .Ve | |
2424 | .Sh "How can I store a multidimensional array in a \s-1DBM\s0 file?" | |
2425 | .IX Subsection "How can I store a multidimensional array in a DBM file?" | |
2426 | Either stringify the structure yourself (no fun), or else | |
2427 | get the \s-1MLDBM\s0 (which uses Data::Dumper) module from \s-1CPAN\s0 and layer | |
2428 | it on top of either DB_File or GDBM_File. | |
2429 | .Sh "How can I make my hash remember the order I put elements into it?" | |
2430 | .IX Subsection "How can I make my hash remember the order I put elements into it?" | |
2431 | Use the Tie::IxHash from \s-1CPAN\s0. | |
2432 | .PP | |
2433 | .Vb 7 | |
2434 | \& use Tie::IxHash; | |
2435 | \& tie my %myhash, 'Tie::IxHash'; | |
2436 | \& for (my $i=0; $i<20; $i++) { | |
2437 | \& $myhash{$i} = 2*$i; | |
2438 | \& } | |
2439 | \& my @keys = keys %myhash; | |
2440 | \& # @keys = (0,1,2,3,...) | |
2441 | .Ve | |
2442 | .Sh "Why does passing a subroutine an undefined element in a hash create it?" | |
2443 | .IX Subsection "Why does passing a subroutine an undefined element in a hash create it?" | |
2444 | If you say something like: | |
2445 | .PP | |
2446 | .Vb 1 | |
2447 | \& somefunc($hash{"nonesuch key here"}); | |
2448 | .Ve | |
2449 | .PP | |
2450 | Then that element \*(L"autovivifies\*(R"; that is, it springs into existence | |
2451 | whether you store something there or not. That's because functions | |
2452 | get scalars passed in by reference. If \fIsomefunc()\fR modifies \f(CW$_[0]\fR, | |
2453 | it has to be ready to write it back into the caller's version. | |
2454 | .PP | |
2455 | This has been fixed as of Perl5.004. | |
2456 | .PP | |
2457 | Normally, merely accessing a key's value for a nonexistent key does | |
2458 | \&\fInot\fR cause that key to be forever there. This is different than | |
2459 | awk's behavior. | |
2460 | .Sh "How can I make the Perl equivalent of a C structure/\*(C+ class/hash or array of hashes or arrays?" | |
2461 | .IX Subsection "How can I make the Perl equivalent of a C structure/ class/hash or array of hashes or arrays?" | |
2462 | Usually a hash ref, perhaps like this: | |
2463 | .PP | |
2464 | .Vb 8 | |
2465 | \& $record = { | |
2466 | \& NAME => "Jason", | |
2467 | \& EMPNO => 132, | |
2468 | \& TITLE => "deputy peon", | |
2469 | \& AGE => 23, | |
2470 | \& SALARY => 37_000, | |
2471 | \& PALS => [ "Norbert", "Rhys", "Phineas"], | |
2472 | \& }; | |
2473 | .Ve | |
2474 | .PP | |
2475 | References are documented in perlref and the upcoming perlreftut. | |
2476 | Examples of complex data structures are given in perldsc and | |
2477 | perllol. Examples of structures and object-oriented classes are | |
2478 | in perltoot. | |
2479 | .Sh "How can I use a reference as a hash key?" | |
2480 | .IX Subsection "How can I use a reference as a hash key?" | |
2481 | (contributed by brian d foy) | |
2482 | .PP | |
2483 | Hash keys are strings, so you can't really use a reference as the key. | |
2484 | When you try to do that, perl turns the reference into its stringified | |
2485 | form (for instance, \f(CW\*(C`HASH(0xDEADBEEF)\*(C'\fR). From there you can't get back | |
2486 | the reference from the stringified form, at least without doing some | |
2487 | extra work on your own. Also remember that hash keys must be unique, but | |
2488 | two different variables can store the same reference (and those variables | |
2489 | can change later). | |
2490 | .PP | |
2491 | The Tie::RefHash module, which is distributed with perl, might be what | |
2492 | you want. It handles that extra work. | |
2493 | .SH "Data: Misc" | |
2494 | .IX Header "Data: Misc" | |
2495 | .Sh "How do I handle binary data correctly?" | |
2496 | .IX Subsection "How do I handle binary data correctly?" | |
2497 | Perl is binary clean, so this shouldn't be a problem. For example, | |
2498 | this works fine (assuming the files are found): | |
2499 | .PP | |
2500 | .Vb 3 | |
2501 | \& if (`cat /vmunix` =~ /gzip/) { | |
2502 | \& print "Your kernel is GNU-zip enabled!\en"; | |
2503 | \& } | |
2504 | .Ve | |
2505 | .PP | |
2506 | On less elegant (read: Byzantine) systems, however, you have | |
2507 | to play tedious games with \*(L"text\*(R" versus \*(L"binary\*(R" files. See | |
2508 | \&\*(L"binmode\*(R" in perlfunc or perlopentut. | |
2509 | .PP | |
2510 | If you're concerned about 8\-bit \s-1ASCII\s0 data, then see perllocale. | |
2511 | .PP | |
2512 | If you want to deal with multibyte characters, however, there are | |
2513 | some gotchas. See the section on Regular Expressions. | |
2514 | .Sh "How do I determine whether a scalar is a number/whole/integer/float?" | |
2515 | .IX Subsection "How do I determine whether a scalar is a number/whole/integer/float?" | |
2516 | Assuming that you don't care about \s-1IEEE\s0 notations like \*(L"NaN\*(R" or | |
2517 | \&\*(L"Infinity\*(R", you probably just want to use a regular expression. | |
2518 | .PP | |
2519 | .Vb 8 | |
2520 | \& if (/\eD/) { print "has nondigits\en" } | |
2521 | \& if (/^\ed+$/) { print "is a whole number\en" } | |
2522 | \& if (/^-?\ed+$/) { print "is an integer\en" } | |
2523 | \& if (/^[+-]?\ed+$/) { print "is a +/- integer\en" } | |
2524 | \& if (/^-?\ed+\e.?\ed*$/) { print "is a real number\en" } | |
2525 | \& if (/^-?(?:\ed+(?:\e.\ed*)?|\e.\ed+)$/) { print "is a decimal number\en" } | |
2526 | \& if (/^([+-]?)(?=\ed|\e.\ed)\ed*(\e.\ed*)?([Ee]([+-]?\ed+))?$/) | |
2527 | \& { print "a C float\en" } | |
2528 | .Ve | |
2529 | .PP | |
2530 | There are also some commonly used modules for the task. | |
2531 | Scalar::Util (distributed with 5.8) provides access to perl's | |
2532 | internal function \f(CW\*(C`looks_like_number\*(C'\fR for determining | |
2533 | whether a variable looks like a number. Data::Types | |
2534 | exports functions that validate data types using both the | |
2535 | above and other regular expressions. Thirdly, there is | |
2536 | \&\f(CW\*(C`Regexp::Common\*(C'\fR which has regular expressions to match | |
2537 | various types of numbers. Those three modules are available | |
2538 | from the \s-1CPAN\s0. | |
2539 | .PP | |
2540 | If you're on a \s-1POSIX\s0 system, Perl supports the \f(CW\*(C`POSIX::strtod\*(C'\fR | |
2541 | function. Its semantics are somewhat cumbersome, so here's a \f(CW\*(C`getnum\*(C'\fR | |
2542 | wrapper function for more convenient access. This function takes | |
2543 | a string and returns the number it found, or \f(CW\*(C`undef\*(C'\fR for input that | |
2544 | isn't a C float. The \f(CW\*(C`is_numeric\*(C'\fR function is a front end to \f(CW\*(C`getnum\*(C'\fR | |
2545 | if you just want to say, \*(L"Is this a float?\*(R" | |
2546 | .PP | |
2547 | .Vb 13 | |
2548 | \& sub getnum { | |
2549 | \& use POSIX qw(strtod); | |
2550 | \& my $str = shift; | |
2551 | \& $str =~ s/^\es+//; | |
2552 | \& $str =~ s/\es+$//; | |
2553 | \& $! = 0; | |
2554 | \& my($num, $unparsed) = strtod($str); | |
2555 | \& if (($str eq '') || ($unparsed != 0) || $!) { | |
2556 | \& return undef; | |
2557 | \& } else { | |
2558 | \& return $num; | |
2559 | \& } | |
2560 | \& } | |
2561 | .Ve | |
2562 | .PP | |
2563 | .Vb 1 | |
2564 | \& sub is_numeric { defined getnum($_[0]) } | |
2565 | .Ve | |
2566 | .PP | |
2567 | Or you could check out the String::Scanf module on the \s-1CPAN\s0 | |
2568 | instead. The \s-1POSIX\s0 module (part of the standard Perl distribution) provides | |
2569 | the \f(CW\*(C`strtod\*(C'\fR and \f(CW\*(C`strtol\*(C'\fR for converting strings to double and longs, | |
2570 | respectively. | |
2571 | .Sh "How do I keep persistent data across program calls?" | |
2572 | .IX Subsection "How do I keep persistent data across program calls?" | |
2573 | For some specific applications, you can use one of the \s-1DBM\s0 modules. | |
2574 | See AnyDBM_File. More generically, you should consult the FreezeThaw | |
2575 | or Storable modules from \s-1CPAN\s0. Starting from Perl 5.8 Storable is part | |
2576 | of the standard distribution. Here's one example using Storable's \f(CW\*(C`store\*(C'\fR | |
2577 | and \f(CW\*(C`retrieve\*(C'\fR functions: | |
2578 | .PP | |
2579 | .Vb 2 | |
2580 | \& use Storable; | |
2581 | \& store(\e%hash, "filename"); | |
2582 | .Ve | |
2583 | .PP | |
2584 | .Vb 3 | |
2585 | \& # later on... | |
2586 | \& $href = retrieve("filename"); # by ref | |
2587 | \& %hash = %{ retrieve("filename") }; # direct to hash | |
2588 | .Ve | |
2589 | .Sh "How do I print out or copy a recursive data structure?" | |
2590 | .IX Subsection "How do I print out or copy a recursive data structure?" | |
2591 | The Data::Dumper module on \s-1CPAN\s0 (or the 5.005 release of Perl) is great | |
2592 | for printing out data structures. The Storable module on \s-1CPAN\s0 (or the | |
2593 | 5.8 release of Perl), provides a function called \f(CW\*(C`dclone\*(C'\fR that recursively | |
2594 | copies its argument. | |
2595 | .PP | |
2596 | .Vb 2 | |
2597 | \& use Storable qw(dclone); | |
2598 | \& $r2 = dclone($r1); | |
2599 | .Ve | |
2600 | .PP | |
2601 | Where \f(CW$r1\fR can be a reference to any kind of data structure you'd like. | |
2602 | It will be deeply copied. Because \f(CW\*(C`dclone\*(C'\fR takes and returns references, | |
2603 | you'd have to add extra punctuation if you had a hash of arrays that | |
2604 | you wanted to copy. | |
2605 | .PP | |
2606 | .Vb 1 | |
2607 | \& %newhash = %{ dclone(\e%oldhash) }; | |
2608 | .Ve | |
2609 | .Sh "How do I define methods for every class/object?" | |
2610 | .IX Subsection "How do I define methods for every class/object?" | |
2611 | Use the \s-1UNIVERSAL\s0 class (see \s-1UNIVERSAL\s0). | |
2612 | .Sh "How do I verify a credit card checksum?" | |
2613 | .IX Subsection "How do I verify a credit card checksum?" | |
2614 | Get the Business::CreditCard module from \s-1CPAN\s0. | |
2615 | .Sh "How do I pack arrays of doubles or floats for \s-1XS\s0 code?" | |
2616 | .IX Subsection "How do I pack arrays of doubles or floats for XS code?" | |
2617 | The kgbpack.c code in the \s-1PGPLOT\s0 module on \s-1CPAN\s0 does just this. | |
2618 | If you're doing a lot of float or double processing, consider using | |
2619 | the \s-1PDL\s0 module from \s-1CPAN\s0 instead\*(--it makes number-crunching easy. | |
2620 | .SH "AUTHOR AND COPYRIGHT" | |
2621 | .IX Header "AUTHOR AND COPYRIGHT" | |
2622 | Copyright (c) 1997\-2006 Tom Christiansen, Nathan Torkington, and | |
2623 | other authors as noted. All rights reserved. | |
2624 | .PP | |
2625 | This documentation is free; you can redistribute it and/or modify it | |
2626 | under the same terms as Perl itself. | |
2627 | .PP | |
2628 | Irrespective of its distribution, all code examples in this file | |
2629 | are hereby placed into the public domain. You are permitted and | |
2630 | encouraged to use this code in your own programs for fun | |
2631 | or for profit as you see fit. A simple comment in the code giving | |
2632 | credit would be courteous but is not required. |