Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "Memoize 3" | |
132 | .TH Memoize 3 "2001-09-21" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | Memoize \- Make functions faster by trading space for time | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | .Vb 4 | |
138 | \& # This is the documentation for Memoize 1.01 | |
139 | \& use Memoize; | |
140 | \& memoize('slow_function'); | |
141 | \& slow_function(arguments); # Is faster than it was before | |
142 | .Ve | |
143 | .PP | |
144 | This is normally all you need to know. However, many options are available: | |
145 | .PP | |
146 | .Vb 1 | |
147 | \& memoize(function, options...); | |
148 | .Ve | |
149 | .PP | |
150 | Options include: | |
151 | .PP | |
152 | .Vb 2 | |
153 | \& NORMALIZER => function | |
154 | \& INSTALL => new_name | |
155 | .Ve | |
156 | .PP | |
157 | .Vb 4 | |
158 | \& SCALAR_CACHE => 'MEMORY' | |
159 | \& SCALAR_CACHE => ['HASH', \e%cache_hash ] | |
160 | \& SCALAR_CACHE => 'FAULT' | |
161 | \& SCALAR_CACHE => 'MERGE' | |
162 | .Ve | |
163 | .PP | |
164 | .Vb 4 | |
165 | \& LIST_CACHE => 'MEMORY' | |
166 | \& LIST_CACHE => ['HASH', \e%cache_hash ] | |
167 | \& LIST_CACHE => 'FAULT' | |
168 | \& LIST_CACHE => 'MERGE' | |
169 | .Ve | |
170 | .SH "DESCRIPTION" | |
171 | .IX Header "DESCRIPTION" | |
172 | `Memoizing' a function makes it faster by trading space for time. It | |
173 | does this by caching the return values of the function in a table. | |
174 | If you call the function again with the same arguments, \f(CW\*(C`memoize\*(C'\fR | |
175 | jumps in and gives you the value out of the table, instead of letting | |
176 | the function compute the value all over again. | |
177 | .PP | |
178 | Here is an extreme example. Consider the Fibonacci sequence, defined | |
179 | by the following function: | |
180 | .PP | |
181 | .Vb 6 | |
182 | \& # Compute Fibonacci numbers | |
183 | \& sub fib { | |
184 | \& my $n = shift; | |
185 | \& return $n if $n < 2; | |
186 | \& fib($n-1) + fib($n-2); | |
187 | \& } | |
188 | .Ve | |
189 | .PP | |
190 | This function is very slow. Why? To compute fib(14), it first wants | |
191 | to compute fib(13) and fib(12), and add the results. But to compute | |
192 | fib(13), it first has to compute fib(12) and fib(11), and then it | |
193 | comes back and computes fib(12) all over again even though the answer | |
194 | is the same. And both of the times that it wants to compute fib(12), | |
195 | it has to compute fib(11) from scratch, and then it has to do it | |
196 | again each time it wants to compute fib(13). This function does so | |
197 | much recomputing of old results that it takes a really long time to | |
198 | run\-\-\-fib(14) makes 1,200 extra recursive calls to itself, to compute | |
199 | and recompute things that it already computed. | |
200 | .PP | |
201 | This function is a good candidate for memoization. If you memoize the | |
202 | `fib' function above, it will compute fib(14) exactly once, the first | |
203 | time it needs to, and then save the result in a table. Then if you | |
204 | ask for fib(14) again, it gives you the result out of the table. | |
205 | While computing fib(14), instead of computing fib(12) twice, it does | |
206 | it once; the second time it needs the value it gets it from the table. | |
207 | It doesn't compute fib(11) four times; it computes it once, getting it | |
208 | from the table the next three times. Instead of making 1,200 | |
209 | recursive calls to `fib', it makes 15. This makes the function about | |
210 | 150 times faster. | |
211 | .PP | |
212 | You could do the memoization yourself, by rewriting the function, like | |
213 | this: | |
214 | .PP | |
215 | .Vb 9 | |
216 | \& # Compute Fibonacci numbers, memoized version | |
217 | \& { my @fib; | |
218 | \& sub fib { | |
219 | \& my $n = shift; | |
220 | \& return $fib[$n] if defined $fib[$n]; | |
221 | \& return $fib[$n] = $n if $n < 2; | |
222 | \& $fib[$n] = fib($n-1) + fib($n-2); | |
223 | \& } | |
224 | \& } | |
225 | .Ve | |
226 | .PP | |
227 | Or you could use this module, like this: | |
228 | .PP | |
229 | .Vb 2 | |
230 | \& use Memoize; | |
231 | \& memoize('fib'); | |
232 | .Ve | |
233 | .PP | |
234 | .Vb 1 | |
235 | \& # Rest of the fib function just like the original version. | |
236 | .Ve | |
237 | .PP | |
238 | This makes it easy to turn memoizing on and off. | |
239 | .PP | |
240 | Here's an even simpler example: I wrote a simple ray tracer; the | |
241 | program would look in a certain direction, figure out what it was | |
242 | looking at, and then convert the `color' value (typically a string | |
243 | like `red') of that object to a red, green, and blue pixel value, like | |
244 | this: | |
245 | .PP | |
246 | .Vb 6 | |
247 | \& for ($direction = 0; $direction < 300; $direction++) { | |
248 | \& # Figure out which object is in direction $direction | |
249 | \& $color = $object->{color}; | |
250 | \& ($r, $g, $b) = @{&ColorToRGB($color)}; | |
251 | \& ... | |
252 | \& } | |
253 | .Ve | |
254 | .PP | |
255 | Since there are relatively few objects in a picture, there are only a | |
256 | few colors, which get looked up over and over again. Memoizing | |
257 | \&\f(CW\*(C`ColorToRGB\*(C'\fR sped up the program by several percent. | |
258 | .SH "DETAILS" | |
259 | .IX Header "DETAILS" | |
260 | This module exports exactly one function, \f(CW\*(C`memoize\*(C'\fR. The rest of the | |
261 | functions in this package are None of Your Business. | |
262 | .PP | |
263 | You should say | |
264 | .PP | |
265 | .Vb 1 | |
266 | \& memoize(function) | |
267 | .Ve | |
268 | .PP | |
269 | where \f(CW\*(C`function\*(C'\fR is the name of the function you want to memoize, or | |
270 | a reference to it. \f(CW\*(C`memoize\*(C'\fR returns a reference to the new, | |
271 | memoized version of the function, or \f(CW\*(C`undef\*(C'\fR on a non-fatal error. | |
272 | At present, there are no non-fatal errors, but there might be some in | |
273 | the future. | |
274 | .PP | |
275 | If \f(CW\*(C`function\*(C'\fR was the name of a function, then \f(CW\*(C`memoize\*(C'\fR hides the | |
276 | old version and installs the new memoized version under the old name, | |
277 | so that \f(CW\*(C`&function(...)\*(C'\fR actually invokes the memoized version. | |
278 | .SH "OPTIONS" | |
279 | .IX Header "OPTIONS" | |
280 | There are some optional options you can pass to \f(CW\*(C`memoize\*(C'\fR to change | |
281 | the way it behaves a little. To supply options, invoke \f(CW\*(C`memoize\*(C'\fR | |
282 | like this: | |
283 | .PP | |
284 | .Vb 5 | |
285 | \& memoize(function, NORMALIZER => function, | |
286 | \& INSTALL => newname, | |
287 | \& SCALAR_CACHE => option, | |
288 | \& LIST_CACHE => option | |
289 | \& ); | |
290 | .Ve | |
291 | .PP | |
292 | Each of these options is optional; you can include some, all, or none | |
293 | of them. | |
294 | .Sh "\s-1INSTALL\s0" | |
295 | .IX Subsection "INSTALL" | |
296 | If you supply a function name with \f(CW\*(C`INSTALL\*(C'\fR, memoize will install | |
297 | the new, memoized version of the function under the name you give. | |
298 | For example, | |
299 | .PP | |
300 | .Vb 1 | |
301 | \& memoize('fib', INSTALL => 'fastfib') | |
302 | .Ve | |
303 | .PP | |
304 | installs the memoized version of \f(CW\*(C`fib\*(C'\fR as \f(CW\*(C`fastfib\*(C'\fR; without the | |
305 | \&\f(CW\*(C`INSTALL\*(C'\fR option it would have replaced the old \f(CW\*(C`fib\*(C'\fR with the | |
306 | memoized version. | |
307 | .PP | |
308 | To prevent \f(CW\*(C`memoize\*(C'\fR from installing the memoized version anywhere, use | |
309 | \&\f(CW\*(C`INSTALL => undef\*(C'\fR. | |
310 | .Sh "\s-1NORMALIZER\s0" | |
311 | .IX Subsection "NORMALIZER" | |
312 | Suppose your function looks like this: | |
313 | .PP | |
314 | .Vb 6 | |
315 | \& # Typical call: f('aha!', A => 11, B => 12); | |
316 | \& sub f { | |
317 | \& my $a = shift; | |
318 | \& my %hash = @_; | |
319 | \& $hash{B} ||= 2; # B defaults to 2 | |
320 | \& $hash{C} ||= 7; # C defaults to 7 | |
321 | .Ve | |
322 | .PP | |
323 | .Vb 2 | |
324 | \& # Do something with $a, %hash | |
325 | \& } | |
326 | .Ve | |
327 | .PP | |
328 | Now, the following calls to your function are all completely equivalent: | |
329 | .PP | |
330 | .Vb 6 | |
331 | \& f(OUCH); | |
332 | \& f(OUCH, B => 2); | |
333 | \& f(OUCH, C => 7); | |
334 | \& f(OUCH, B => 2, C => 7); | |
335 | \& f(OUCH, C => 7, B => 2); | |
336 | \& (etc.) | |
337 | .Ve | |
338 | .PP | |
339 | However, unless you tell \f(CW\*(C`Memoize\*(C'\fR that these calls are equivalent, | |
340 | it will not know that, and it will compute the values for these | |
341 | invocations of your function separately, and store them separately. | |
342 | .PP | |
343 | To prevent this, supply a \f(CW\*(C`NORMALIZER\*(C'\fR function that turns the | |
344 | program arguments into a string in a way that equivalent arguments | |
345 | turn into the same string. A \f(CW\*(C`NORMALIZER\*(C'\fR function for \f(CW\*(C`f\*(C'\fR above | |
346 | might look like this: | |
347 | .PP | |
348 | .Vb 5 | |
349 | \& sub normalize_f { | |
350 | \& my $a = shift; | |
351 | \& my %hash = @_; | |
352 | \& $hash{B} ||= 2; | |
353 | \& $hash{C} ||= 7; | |
354 | .Ve | |
355 | .PP | |
356 | .Vb 2 | |
357 | \& join(',', $a, map ($_ => $hash{$_}) sort keys %hash); | |
358 | \& } | |
359 | .Ve | |
360 | .PP | |
361 | Each of the argument lists above comes out of the \f(CW\*(C`normalize_f\*(C'\fR | |
362 | function looking exactly the same, like this: | |
363 | .PP | |
364 | .Vb 1 | |
365 | \& OUCH,B,2,C,7 | |
366 | .Ve | |
367 | .PP | |
368 | You would tell \f(CW\*(C`Memoize\*(C'\fR to use this normalizer this way: | |
369 | .PP | |
370 | .Vb 1 | |
371 | \& memoize('f', NORMALIZER => 'normalize_f'); | |
372 | .Ve | |
373 | .PP | |
374 | \&\f(CW\*(C`memoize\*(C'\fR knows that if the normalized version of the arguments is | |
375 | the same for two argument lists, then it can safely look up the value | |
376 | that it computed for one argument list and return it as the result of | |
377 | calling the function with the other argument list, even if the | |
378 | argument lists look different. | |
379 | .PP | |
380 | The default normalizer just concatenates the arguments with character | |
381 | 28 in between. (In \s-1ASCII\s0, this is called \s-1FS\s0 or control\-\e.) This | |
382 | always works correctly for functions with only one string argument, | |
383 | and also when the arguments never contain character 28. However, it | |
384 | can confuse certain argument lists: | |
385 | .PP | |
386 | .Vb 3 | |
387 | \& normalizer("a\e034", "b") | |
388 | \& normalizer("a", "\e034b") | |
389 | \& normalizer("a\e034\e034b") | |
390 | .Ve | |
391 | .PP | |
392 | for example. | |
393 | .PP | |
394 | Since hash keys are strings, the default normalizer will not | |
395 | distinguish between \f(CW\*(C`undef\*(C'\fR and the empty string. It also won't work | |
396 | when the function's arguments are references. For example, consider a | |
397 | function \f(CW\*(C`g\*(C'\fR which gets two arguments: A number, and a reference to | |
398 | an array of numbers: | |
399 | .PP | |
400 | .Vb 1 | |
401 | \& g(13, [1,2,3,4,5,6,7]); | |
402 | .Ve | |
403 | .PP | |
404 | The default normalizer will turn this into something like | |
405 | \&\f(CW"13\e034ARRAY(0x436c1f)"\fR. That would be all right, except that a | |
406 | subsequent array of numbers might be stored at a different location | |
407 | even though it contains the same data. If this happens, \f(CW\*(C`Memoize\*(C'\fR | |
408 | will think that the arguments are different, even though they are | |
409 | equivalent. In this case, a normalizer like this is appropriate: | |
410 | .PP | |
411 | .Vb 1 | |
412 | \& sub normalize { join ' ', $_[0], @{$_[1]} } | |
413 | .Ve | |
414 | .PP | |
415 | For the example above, this produces the key \*(L"13 1 2 3 4 5 6 7\*(R". | |
416 | .PP | |
417 | Another use for normalizers is when the function depends on data other | |
418 | than those in its arguments. Suppose you have a function which | |
419 | returns a value which depends on the current hour of the day: | |
420 | .PP | |
421 | .Vb 10 | |
422 | \& sub on_duty { | |
423 | \& my ($problem_type) = @_; | |
424 | \& my $hour = (localtime)[2]; | |
425 | \& open my $fh, "$DIR/$problem_type" or die...; | |
426 | \& my $line; | |
427 | \& while ($hour-- > 0) | |
428 | \& $line = <$fh>; | |
429 | \& } | |
430 | \& return $line; | |
431 | \& } | |
432 | .Ve | |
433 | .PP | |
434 | At 10:23, this function generates the 10th line of a data file; at | |
435 | 3:45 \s-1PM\s0 it generates the 15th line instead. By default, \f(CW\*(C`Memoize\*(C'\fR | |
436 | will only see the \f(CW$problem_type\fR argument. To fix this, include the | |
437 | current hour in the normalizer: | |
438 | .PP | |
439 | .Vb 1 | |
440 | \& sub normalize { join ' ', (localtime)[2], @_ } | |
441 | .Ve | |
442 | .PP | |
443 | The calling context of the function (scalar or list context) is | |
444 | propagated to the normalizer. This means that if the memoized | |
445 | function will treat its arguments differently in list context than it | |
446 | would in scalar context, you can have the normalizer function select | |
447 | its behavior based on the results of \f(CW\*(C`wantarray\*(C'\fR. Even if called in | |
448 | a list context, a normalizer should still return a single string. | |
449 | .ie n .Sh """SCALAR_CACHE""\fP, \f(CW""LIST_CACHE""" | |
450 | .el .Sh "\f(CWSCALAR_CACHE\fP, \f(CWLIST_CACHE\fP" | |
451 | .IX Subsection "SCALAR_CACHE, LIST_CACHE" | |
452 | Normally, \f(CW\*(C`Memoize\*(C'\fR caches your function's return values into an | |
453 | ordinary Perl hash variable. However, you might like to have the | |
454 | values cached on the disk, so that they persist from one run of your | |
455 | program to the next, or you might like to associate some other | |
456 | interesting semantics with the cached values. | |
457 | .PP | |
458 | There's a slight complication under the hood of \f(CW\*(C`Memoize\*(C'\fR: There are | |
459 | actually \fItwo\fR caches, one for scalar values and one for list values. | |
460 | When your function is called in scalar context, its return value is | |
461 | cached in one hash, and when your function is called in list context, | |
462 | its value is cached in the other hash. You can control the caching | |
463 | behavior of both contexts independently with these options. | |
464 | .PP | |
465 | The argument to \f(CW\*(C`LIST_CACHE\*(C'\fR or \f(CW\*(C`SCALAR_CACHE\*(C'\fR must either be one of | |
466 | the following four strings: | |
467 | .PP | |
468 | .Vb 4 | |
469 | \& MEMORY | |
470 | \& FAULT | |
471 | \& MERGE | |
472 | \& HASH | |
473 | .Ve | |
474 | .PP | |
475 | or else it must be a reference to a list whose first element is one of | |
476 | these four strings, such as \f(CW\*(C`[HASH, arguments...]\*(C'\fR. | |
477 | .ie n .IP """MEMORY""" 4 | |
478 | .el .IP "\f(CWMEMORY\fR" 4 | |
479 | .IX Item "MEMORY" | |
480 | \&\f(CW\*(C`MEMORY\*(C'\fR means that return values from the function will be cached in | |
481 | an ordinary Perl hash variable. The hash variable will not persist | |
482 | after the program exits. This is the default. | |
483 | .ie n .IP """HASH""" 4 | |
484 | .el .IP "\f(CWHASH\fR" 4 | |
485 | .IX Item "HASH" | |
486 | \&\f(CW\*(C`HASH\*(C'\fR allows you to specify that a particular hash that you supply | |
487 | will be used as the cache. You can tie this hash beforehand to give | |
488 | it any behavior you want. | |
489 | .Sp | |
490 | A tied hash can have any semantics at all. It is typically tied to an | |
491 | on-disk database, so that cached values are stored in the database and | |
492 | retrieved from it again when needed, and the disk file typically | |
493 | persists after your program has exited. See \f(CW\*(C`perltie\*(C'\fR for more | |
494 | complete details about \f(CW\*(C`tie\*(C'\fR. | |
495 | .Sp | |
496 | A typical example is: | |
497 | .Sp | |
498 | .Vb 3 | |
499 | \& use DB_File; | |
500 | \& tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666; | |
501 | \& memoize 'function', SCALAR_CACHE => [HASH => \e%cache]; | |
502 | .Ve | |
503 | .Sp | |
504 | This has the effect of storing the cache in a \f(CW\*(C`DB_File\*(C'\fR database | |
505 | whose name is in \f(CW$filename\fR. The cache will persist after the | |
506 | program has exited. Next time the program runs, it will find the | |
507 | cache already populated from the previous run of the program. Or you | |
508 | can forcibly populate the cache by constructing a batch program that | |
509 | runs in the background and populates the cache file. Then when you | |
510 | come to run your real program the memoized function will be fast | |
511 | because all its results have been precomputed. | |
512 | .ie n .IP """TIE""" 4 | |
513 | .el .IP "\f(CWTIE\fR" 4 | |
514 | .IX Item "TIE" | |
515 | This option is no longer supported. It is still documented only to | |
516 | aid in the debugging of old programs that use it. Old programs should | |
517 | be converted to use the \f(CW\*(C`HASH\*(C'\fR option instead. | |
518 | .Sp | |
519 | .Vb 1 | |
520 | \& memoize ... [TIE, PACKAGE, ARGS...] | |
521 | .Ve | |
522 | .Sp | |
523 | is merely a shortcut for | |
524 | .Sp | |
525 | .Vb 5 | |
526 | \& require PACKAGE; | |
527 | \& { my %cache; | |
528 | \& tie %cache, PACKAGE, ARGS...; | |
529 | \& } | |
530 | \& memoize ... [HASH => \e%cache]; | |
531 | .Ve | |
532 | .ie n .IP """FAULT""" 4 | |
533 | .el .IP "\f(CWFAULT\fR" 4 | |
534 | .IX Item "FAULT" | |
535 | \&\f(CW\*(C`FAULT\*(C'\fR means that you never expect to call the function in scalar | |
536 | (or list) context, and that if \f(CW\*(C`Memoize\*(C'\fR detects such a call, it | |
537 | should abort the program. The error message is one of | |
538 | .Sp | |
539 | .Vb 2 | |
540 | \& `foo' function called in forbidden list context at line ... | |
541 | \& `foo' function called in forbidden scalar context at line ... | |
542 | .Ve | |
543 | .ie n .IP """MERGE""" 4 | |
544 | .el .IP "\f(CWMERGE\fR" 4 | |
545 | .IX Item "MERGE" | |
546 | \&\f(CW\*(C`MERGE\*(C'\fR normally means the function does not distinguish between list | |
547 | and sclar context, and that return values in both contexts should be | |
548 | stored together. \f(CW\*(C`LIST_CACHE => MERGE\*(C'\fR means that list context | |
549 | return values should be stored in the same hash that is used for | |
550 | scalar context returns, and \f(CW\*(C`SCALAR_CACHE => MERGE\*(C'\fR means the | |
551 | same, mutatis mutandis. It is an error to specify \f(CW\*(C`MERGE\*(C'\fR for both, | |
552 | but it probably does something useful. | |
553 | .Sp | |
554 | Consider this function: | |
555 | .Sp | |
556 | .Vb 1 | |
557 | \& sub pi { 3; } | |
558 | .Ve | |
559 | .Sp | |
560 | Normally, the following code will result in two calls to \f(CW\*(C`pi\*(C'\fR: | |
561 | .Sp | |
562 | .Vb 3 | |
563 | \& $x = pi(); | |
564 | \& ($y) = pi(); | |
565 | \& $z = pi(); | |
566 | .Ve | |
567 | .Sp | |
568 | The first call caches the value \f(CW3\fR in the scalar cache; the second | |
569 | caches the list \f(CW\*(C`(3)\*(C'\fR in the list cache. The third call doesn't call | |
570 | the real \f(CW\*(C`pi\*(C'\fR function; it gets the value from the scalar cache. | |
571 | .Sp | |
572 | Obviously, the second call to \f(CW\*(C`pi\*(C'\fR is a waste of time, and storing | |
573 | its return value is a waste of space. Specifying \f(CW\*(C`LIST_CACHE => | |
574 | MERGE\*(C'\fR will make \f(CW\*(C`memoize\*(C'\fR use the same cache for scalar and list | |
575 | context return values, so that the second call uses the scalar cache | |
576 | that was populated by the first call. \f(CW\*(C`pi\*(C'\fR ends up being called only | |
577 | once, and both subsequent calls return \f(CW3\fR from the cache, regardless | |
578 | of the calling context. | |
579 | .Sp | |
580 | Another use for \f(CW\*(C`MERGE\*(C'\fR is when you want both kinds of return values | |
581 | stored in the same disk file; this saves you from having to deal with | |
582 | two disk files instead of one. You can use a normalizer function to | |
583 | keep the two sets of return values separate. For example: | |
584 | .Sp | |
585 | .Vb 1 | |
586 | \& tie my %cache => 'MLDBM', 'DB_File', $filename, ...; | |
587 | .Ve | |
588 | .Sp | |
589 | .Vb 5 | |
590 | \& memoize 'myfunc', | |
591 | \& NORMALIZER => 'n', | |
592 | \& SCALAR_CACHE => [HASH => \e%cache], | |
593 | \& LIST_CACHE => MERGE, | |
594 | \& ; | |
595 | .Ve | |
596 | .Sp | |
597 | .Vb 5 | |
598 | \& sub n { | |
599 | \& my $context = wantarray() ? 'L' : 'S'; | |
600 | \& # ... now compute the hash key from the arguments ... | |
601 | \& $hashkey = "$context:$hashkey"; | |
602 | \& } | |
603 | .Ve | |
604 | .Sp | |
605 | This normalizer function will store scalar context return values in | |
606 | the disk file under keys that begin with \f(CW\*(C`S:\*(C'\fR, and list context | |
607 | return values under keys that begin with \f(CW\*(C`L:\*(C'\fR. | |
608 | .SH "OTHER FACILITIES" | |
609 | .IX Header "OTHER FACILITIES" | |
610 | .ie n .Sh """unmemoize""" | |
611 | .el .Sh "\f(CWunmemoize\fP" | |
612 | .IX Subsection "unmemoize" | |
613 | There's an \f(CW\*(C`unmemoize\*(C'\fR function that you can import if you want to. | |
614 | Why would you want to? Here's an example: Suppose you have your cache | |
615 | tied to a \s-1DBM\s0 file, and you want to make sure that the cache is | |
616 | written out to disk if someone interrupts the program. If the program | |
617 | exits normally, this will happen anyway, but if someone types | |
618 | control-C or something then the program will terminate immediately | |
619 | without synchronizing the database. So what you can do instead is | |
620 | .PP | |
621 | .Vb 1 | |
622 | \& $SIG{INT} = sub { unmemoize 'function' }; | |
623 | .Ve | |
624 | .PP | |
625 | \&\f(CW\*(C`unmemoize\*(C'\fR accepts a reference to, or the name of a previously | |
626 | memoized function, and undoes whatever it did to provide the memoized | |
627 | version in the first place, including making the name refer to the | |
628 | unmemoized version if appropriate. It returns a reference to the | |
629 | unmemoized version of the function. | |
630 | .PP | |
631 | If you ask it to unmemoize a function that was never memoized, it | |
632 | croaks. | |
633 | .ie n .Sh """flush_cache""" | |
634 | .el .Sh "\f(CWflush_cache\fP" | |
635 | .IX Subsection "flush_cache" | |
636 | \&\f(CW\*(C`flush_cache(function)\*(C'\fR will flush out the caches, discarding \fIall\fR | |
637 | the cached data. The argument may be a function name or a reference | |
638 | to a function. For finer control over when data is discarded or | |
639 | expired, see the documentation for \f(CW\*(C`Memoize::Expire\*(C'\fR, included in | |
640 | this package. | |
641 | .PP | |
642 | Note that if the cache is a tied hash, \f(CW\*(C`flush_cache\*(C'\fR will attempt to | |
643 | invoke the \f(CW\*(C`CLEAR\*(C'\fR method on the hash. If there is no \f(CW\*(C`CLEAR\*(C'\fR | |
644 | method, this will cause a run-time error. | |
645 | .PP | |
646 | An alternative approach to cache flushing is to use the \f(CW\*(C`HASH\*(C'\fR option | |
647 | (see above) to request that \f(CW\*(C`Memoize\*(C'\fR use a particular hash variable | |
648 | as its cache. Then you can examine or modify the hash at any time in | |
649 | any way you desire. You may flush the cache by using \f(CW\*(C`%hash = ()\*(C'\fR. | |
650 | .SH "CAVEATS" | |
651 | .IX Header "CAVEATS" | |
652 | Memoization is not a cure\-all: | |
653 | .IP "\(bu" 4 | |
654 | Do not memoize a function whose behavior depends on program | |
655 | state other than its own arguments, such as global variables, the time | |
656 | of day, or file input. These functions will not produce correct | |
657 | results when memoized. For a particularly easy example: | |
658 | .Sp | |
659 | .Vb 3 | |
660 | \& sub f { | |
661 | \& time; | |
662 | \& } | |
663 | .Ve | |
664 | .Sp | |
665 | This function takes no arguments, and as far as \f(CW\*(C`Memoize\*(C'\fR is | |
666 | concerned, it always returns the same result. \f(CW\*(C`Memoize\*(C'\fR is wrong, of | |
667 | course, and the memoized version of this function will call \f(CW\*(C`time\*(C'\fR once | |
668 | to get the current time, and it will return that same time | |
669 | every time you call it after that. | |
670 | .IP "\(bu" 4 | |
671 | Do not memoize a function with side effects. | |
672 | .Sp | |
673 | .Vb 5 | |
674 | \& sub f { | |
675 | \& my ($a, $b) = @_; | |
676 | \& my $s = $a + $b; | |
677 | \& print "$a + $b = $s.\en"; | |
678 | \& } | |
679 | .Ve | |
680 | .Sp | |
681 | This function accepts two arguments, adds them, and prints their sum. | |
682 | Its return value is the numuber of characters it printed, but you | |
683 | probably didn't care about that. But \f(CW\*(C`Memoize\*(C'\fR doesn't understand | |
684 | that. If you memoize this function, you will get the result you | |
685 | expect the first time you ask it to print the sum of 2 and 3, but | |
686 | subsequent calls will return 1 (the return value of | |
687 | \&\f(CW\*(C`print\*(C'\fR) without actually printing anything. | |
688 | .IP "\(bu" 4 | |
689 | Do not memoize a function that returns a data structure that is | |
690 | modified by its caller. | |
691 | .Sp | |
692 | Consider these functions: \f(CW\*(C`getusers\*(C'\fR returns a list of users somehow, | |
693 | and then \f(CW\*(C`main\*(C'\fR throws away the first user on the list and prints the | |
694 | rest: | |
695 | .Sp | |
696 | .Vb 7 | |
697 | \& sub main { | |
698 | \& my $userlist = getusers(); | |
699 | \& shift @$userlist; | |
700 | \& foreach $u (@$userlist) { | |
701 | \& print "User $u\en"; | |
702 | \& } | |
703 | \& } | |
704 | .Ve | |
705 | .Sp | |
706 | .Vb 5 | |
707 | \& sub getusers { | |
708 | \& my @users; | |
709 | \& # Do something to get a list of users; | |
710 | \& \e@users; # Return reference to list. | |
711 | \& } | |
712 | .Ve | |
713 | .Sp | |
714 | If you memoize \f(CW\*(C`getusers\*(C'\fR here, it will work right exactly once. The | |
715 | reference to the users list will be stored in the memo table. \f(CW\*(C`main\*(C'\fR | |
716 | will discard the first element from the referenced list. The next | |
717 | time you invoke \f(CW\*(C`main\*(C'\fR, \f(CW\*(C`Memoize\*(C'\fR will not call \f(CW\*(C`getusers\*(C'\fR; it will | |
718 | just return the same reference to the same list it got last time. But | |
719 | this time the list has already had its head removed; \f(CW\*(C`main\*(C'\fR will | |
720 | erroneously remove another element from it. The list will get shorter | |
721 | and shorter every time you call \f(CW\*(C`main\*(C'\fR. | |
722 | .Sp | |
723 | Similarly, this: | |
724 | .Sp | |
725 | .Vb 3 | |
726 | \& $u1 = getusers(); | |
727 | \& $u2 = getusers(); | |
728 | \& pop @$u1; | |
729 | .Ve | |
730 | .Sp | |
731 | will modify \f(CW$u2\fR as well as \f(CW$u1\fR, because both variables are references | |
732 | to the same array. Had \f(CW\*(C`getusers\*(C'\fR not been memoized, \f(CW$u1\fR and \f(CW$u2\fR | |
733 | would have referred to different arrays. | |
734 | .IP "\(bu" 4 | |
735 | Do not memoize a very simple function. | |
736 | .Sp | |
737 | Recently someone mentioned to me that the Memoize module made his | |
738 | program run slower instead of faster. It turned out that he was | |
739 | memoizing the following function: | |
740 | .Sp | |
741 | .Vb 3 | |
742 | \& sub square { | |
743 | \& $_[0] * $_[0]; | |
744 | \& } | |
745 | .Ve | |
746 | .Sp | |
747 | I pointed out that \f(CW\*(C`Memoize\*(C'\fR uses a hash, and that looking up a | |
748 | number in the hash is necessarily going to take a lot longer than a | |
749 | single multiplication. There really is no way to speed up the | |
750 | \&\f(CW\*(C`square\*(C'\fR function. | |
751 | .Sp | |
752 | Memoization is not magical. | |
753 | .SH "PERSISTENT CACHE SUPPORT" | |
754 | .IX Header "PERSISTENT CACHE SUPPORT" | |
755 | You can tie the cache tables to any sort of tied hash that you want | |
756 | to, as long as it supports \f(CW\*(C`TIEHASH\*(C'\fR, \f(CW\*(C`FETCH\*(C'\fR, \f(CW\*(C`STORE\*(C'\fR, and | |
757 | \&\f(CW\*(C`EXISTS\*(C'\fR. For example, | |
758 | .PP | |
759 | .Vb 2 | |
760 | \& tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666; | |
761 | \& memoize 'function', SCALAR_CACHE => [HASH => \e%cache]; | |
762 | .Ve | |
763 | .PP | |
764 | works just fine. For some storage methods, you need a little glue. | |
765 | .PP | |
766 | \&\f(CW\*(C`SDBM_File\*(C'\fR doesn't supply an \f(CW\*(C`EXISTS\*(C'\fR method, so included in this | |
767 | package is a glue module called \f(CW\*(C`Memoize::SDBM_File\*(C'\fR which does | |
768 | provide one. Use this instead of plain \f(CW\*(C`SDBM_File\*(C'\fR to store your | |
769 | cache table on disk in an \f(CW\*(C`SDBM_File\*(C'\fR database: | |
770 | .PP | |
771 | .Vb 2 | |
772 | \& tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666; | |
773 | \& memoize 'function', SCALAR_CACHE => [HASH => \e%cache]; | |
774 | .Ve | |
775 | .PP | |
776 | \&\f(CW\*(C`NDBM_File\*(C'\fR has the same problem and the same solution. (Use | |
777 | \&\f(CW\*(C`Memoize::NDBM_File instead of plain NDBM_File.\*(C'\fR) | |
778 | .PP | |
779 | \&\f(CW\*(C`Storable\*(C'\fR isn't a tied hash class at all. You can use it to store a | |
780 | hash to disk and retrieve it again, but you can't modify the hash while | |
781 | it's on the disk. So if you want to store your cache table in a | |
782 | \&\f(CW\*(C`Storable\*(C'\fR database, use \f(CW\*(C`Memoize::Storable\*(C'\fR, which puts a hashlike | |
783 | front-end onto \f(CW\*(C`Storable\*(C'\fR. The hash table is actually kept in | |
784 | memory, and is loaded from your \f(CW\*(C`Storable\*(C'\fR file at the time you | |
785 | memoize the function, and stored back at the time you unmemoize the | |
786 | function (or when your program exits): | |
787 | .PP | |
788 | .Vb 2 | |
789 | \& tie my %cache => 'Memoize::Storable', $filename; | |
790 | \& memoize 'function', SCALAR_CACHE => [HASH => \e%cache]; | |
791 | .Ve | |
792 | .PP | |
793 | .Vb 2 | |
794 | \& tie my %cache => 'Memoize::Storable', $filename, 'nstore'; | |
795 | \& memoize 'function', SCALAR_CACHE => [HASH => \e%cache]; | |
796 | .Ve | |
797 | .PP | |
798 | Include the `nstore' option to have the \f(CW\*(C`Storable\*(C'\fR database written | |
799 | in `network order'. (See Storable for more details about this.) | |
800 | .PP | |
801 | The \f(CW\*(C`flush_cache()\*(C'\fR function will raise a run-time error unless the | |
802 | tied package provides a \f(CW\*(C`CLEAR\*(C'\fR method. | |
803 | .SH "EXPIRATION SUPPORT" | |
804 | .IX Header "EXPIRATION SUPPORT" | |
805 | See Memoize::Expire, which is a plug-in module that adds expiration | |
806 | functionality to Memoize. If you don't like the kinds of policies | |
807 | that Memoize::Expire implements, it is easy to write your own plug-in | |
808 | module to implement whatever policy you desire. Memoize comes with | |
809 | several examples. An expiration manager that implements a \s-1LRU\s0 policy | |
810 | is available on \s-1CPAN\s0 as Memoize::ExpireLRU. | |
811 | .SH "BUGS" | |
812 | .IX Header "BUGS" | |
813 | The test suite is much better, but always needs improvement. | |
814 | .PP | |
815 | There is some problem with the way \f(CW\*(C`goto &f\*(C'\fR works under threaded | |
816 | Perl, perhaps because of the lexical scoping of \f(CW@_\fR. This is a bug | |
817 | in Perl, and until it is resolved, memoized functions will see a | |
818 | slightly different \f(CW\*(C`caller()\*(C'\fR and will perform a little more slowly | |
819 | on threaded perls than unthreaded perls. | |
820 | .PP | |
821 | Some versions of \f(CW\*(C`DB_File\*(C'\fR won't let you store data under a key of | |
822 | length 0. That means that if you have a function \f(CW\*(C`f\*(C'\fR which you | |
823 | memoized and the cache is in a \f(CW\*(C`DB_File\*(C'\fR database, then the value of | |
824 | \&\f(CW\*(C`f()\*(C'\fR (\f(CW\*(C`f\*(C'\fR called with no arguments) will not be memoized. If this | |
825 | is a big problem, you can supply a normalizer function that prepends | |
826 | \&\f(CW"x"\fR to every key. | |
827 | .SH "MAILING LIST" | |
828 | .IX Header "MAILING LIST" | |
829 | To join a very low-traffic mailing list for announcements about | |
830 | \&\f(CW\*(C`Memoize\*(C'\fR, send an empty note to \f(CW\*(C`mjd\-perl\-memoize\-request@plover.com\*(C'\fR. | |
831 | .SH "AUTHOR" | |
832 | .IX Header "AUTHOR" | |
833 | Mark-Jason Dominus (\f(CW\*(C`mjd\-perl\-memoize+@plover.com\*(C'\fR), Plover Systems co. | |
834 | .PP | |
835 | See the \f(CW\*(C`Memoize.pm\*(C'\fR Page at http://www.plover.com/~mjd/perl/Memoize/ | |
836 | for news and upgrades. Near this page, at | |
837 | http://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about | |
838 | memoization and about the internals of Memoize that appeared in The | |
839 | Perl Journal, issue #13. (This article is also included in the | |
840 | Memoize distribution as `article.html'.) | |
841 | .PP | |
842 | My upcoming book will discuss memoization (and many other fascinating | |
843 | topics) in tremendous detail. It will be published by Morgan Kaufmann | |
844 | in 2002, possibly under the title \fIPerl Advanced Techniques | |
845 | Handbook\fR. It will also be available on-line for free. For more | |
846 | information, visit http://perl.plover.com/book/ . | |
847 | .PP | |
848 | To join a mailing list for announcements about \f(CW\*(C`Memoize\*(C'\fR, send an | |
849 | empty message to \f(CW\*(C`mjd\-perl\-memoize\-request@plover.com\*(C'\fR. This mailing | |
850 | list is for announcements only and has extremely low traffic\-\-\-about | |
851 | two messages per year. | |
852 | .SH "COPYRIGHT AND LICENSE" | |
853 | .IX Header "COPYRIGHT AND LICENSE" | |
854 | Copyright 1998, 1999, 2000, 2001 by Mark Jason Dominus | |
855 | .PP | |
856 | This library is free software; you may redistribute it and/or modify | |
857 | it under the same terms as Perl itself. | |
858 | .SH "THANK YOU" | |
859 | .IX Header "THANK YOU" | |
860 | Many thanks to Jonathan Roy for bug reports and suggestions, to | |
861 | Michael Schwern for other bug reports and patches, to Mike Cariaso for | |
862 | helping me to figure out the Right Thing to Do About Expiration, to | |
863 | Joshua Gerth, Joshua Chamas, Jonathan Roy (again), Mark D. Anderson, | |
864 | and Andrew Johnson for more suggestions about expiration, to Brent | |
865 | Powers for the Memoize::ExpireLRU module, to Ariel Scolnicov for | |
866 | delightful messages about the Fibonacci function, to Dion Almaer for | |
867 | thought-provoking suggestions about the default normalizer, to Walt | |
868 | Mankowski and Kurt Starsinic for much help investigating problems | |
869 | under threaded Perl, to Alex Dudkevich for reporting the bug in | |
870 | prototyped functions and for checking my patch, to Tony Bass for many | |
871 | helpful suggestions, to Jonathan Roy (again) for finding a use for | |
872 | \&\f(CW\*(C`unmemoize()\*(C'\fR, to Philippe Verdret for enlightening discussion of | |
873 | \&\f(CW\*(C`Hook::PrePostCall\*(C'\fR, to Nat Torkington for advice I ignored, to Chris | |
874 | Nandor for portability advice, to Randal Schwartz for suggesting the | |
875 | \&'\f(CW\*(C`flush_cache\*(C'\fR function, and to Jenda Krynicky for being a light in | |
876 | the world. | |
877 | .PP | |
878 | Special thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including | |
879 | this module in the core and for his patient and helpful guidance | |
880 | during the integration process. |