Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | # |
2 | # Copyright (c) 1995-2000, Raphael Manfredi | |
3 | # | |
4 | # You may redistribute only under the same terms as Perl 5, as specified | |
5 | # in the README file that comes with the distribution. | |
6 | # | |
7 | ||
8 | require DynaLoader; | |
9 | require Exporter; | |
10 | package Storable; @ISA = qw(Exporter DynaLoader); | |
11 | ||
12 | @EXPORT = qw(store retrieve); | |
13 | @EXPORT_OK = qw( | |
14 | nstore store_fd nstore_fd fd_retrieve | |
15 | freeze nfreeze thaw | |
16 | dclone | |
17 | retrieve_fd | |
18 | lock_store lock_nstore lock_retrieve | |
19 | ); | |
20 | ||
21 | use AutoLoader; | |
22 | use vars qw($canonical $forgive_me $VERSION); | |
23 | ||
24 | $VERSION = '2.04'; | |
25 | *AUTOLOAD = \&AutoLoader::AUTOLOAD; # Grrr... | |
26 | ||
27 | # | |
28 | # Use of Log::Agent is optional | |
29 | # | |
30 | ||
31 | eval "use Log::Agent"; | |
32 | ||
33 | require Carp; | |
34 | ||
35 | # | |
36 | # They might miss :flock in Fcntl | |
37 | # | |
38 | ||
39 | BEGIN { | |
40 | if (eval { require Fcntl; 1 } && exists $Fcntl::EXPORT_TAGS{'flock'}) { | |
41 | Fcntl->import(':flock'); | |
42 | } else { | |
43 | eval q{ | |
44 | sub LOCK_SH () {1} | |
45 | sub LOCK_EX () {2} | |
46 | }; | |
47 | } | |
48 | } | |
49 | ||
50 | # Can't Autoload cleanly as this clashes 8.3 with &retrieve | |
51 | sub retrieve_fd { &fd_retrieve } # Backward compatibility | |
52 | ||
53 | # By default restricted hashes are downgraded on earlier perls. | |
54 | ||
55 | $Storable::downgrade_restricted = 1; | |
56 | $Storable::accept_future_minor = 1; | |
57 | bootstrap Storable; | |
58 | 1; | |
59 | __END__ | |
60 | # | |
61 | # Use of Log::Agent is optional. If it hasn't imported these subs then | |
62 | # Autoloader will kindly supply our fallback implementation. | |
63 | # | |
64 | ||
65 | sub logcroak { | |
66 | Carp::croak(@_); | |
67 | } | |
68 | ||
69 | sub logcarp { | |
70 | Carp::carp(@_); | |
71 | } | |
72 | ||
73 | # | |
74 | # Determine whether locking is possible, but only when needed. | |
75 | # | |
76 | ||
77 | sub CAN_FLOCK; my $CAN_FLOCK; sub CAN_FLOCK { | |
78 | return $CAN_FLOCK if defined $CAN_FLOCK; | |
79 | require Config; import Config; | |
80 | return $CAN_FLOCK = | |
81 | $Config{'d_flock'} || | |
82 | $Config{'d_fcntl_can_lock'} || | |
83 | $Config{'d_lockf'}; | |
84 | } | |
85 | ||
86 | sub show_file_magic { | |
87 | print <<EOM; | |
88 | # | |
89 | # To recognize the data files of the Perl module Storable, | |
90 | # the following lines need to be added to the local magic(5) file, | |
91 | # usually either /usr/share/misc/magic or /etc/magic. | |
92 | # | |
93 | 0 string perl-store perl Storable(v0.6) data | |
94 | >4 byte >0 (net-order %d) | |
95 | >>4 byte &01 (network-ordered) | |
96 | >>4 byte =3 (major 1) | |
97 | >>4 byte =2 (major 1) | |
98 | ||
99 | 0 string pst0 perl Storable(v0.7) data | |
100 | >4 byte >0 | |
101 | >>4 byte &01 (network-ordered) | |
102 | >>4 byte =5 (major 2) | |
103 | >>4 byte =4 (major 2) | |
104 | >>5 byte >0 (minor %d) | |
105 | EOM | |
106 | } | |
107 | ||
108 | sub read_magic { | |
109 | my $header = shift; | |
110 | return unless defined $header and length $header > 11; | |
111 | my $result; | |
112 | if ($header =~ s/^perl-store//) { | |
113 | die "Can't deal with version 0 headers"; | |
114 | } elsif ($header =~ s/^pst0//) { | |
115 | $result->{file} = 1; | |
116 | } | |
117 | # Assume it's a string. | |
118 | my ($major, $minor, $bytelen) = unpack "C3", $header; | |
119 | ||
120 | my $net_order = $major & 1; | |
121 | $major >>= 1; | |
122 | @$result{qw(major minor netorder)} = ($major, $minor, $net_order); | |
123 | ||
124 | return $result if $net_order; | |
125 | ||
126 | # I assume that it is rare to find v1 files, so this is an intentionally | |
127 | # inefficient way of doing it, to make the rest of the code constant. | |
128 | if ($major < 2) { | |
129 | delete $result->{minor}; | |
130 | $header = '.' . $header; | |
131 | $bytelen = $minor; | |
132 | } | |
133 | ||
134 | @$result{qw(byteorder intsize longsize ptrsize)} = | |
135 | unpack "x3 A$bytelen C3", $header; | |
136 | ||
137 | if ($major >= 2 and $minor >= 2) { | |
138 | $result->{nvsize} = unpack "x6 x$bytelen C", $header; | |
139 | } | |
140 | $result; | |
141 | } | |
142 | ||
143 | # | |
144 | # store | |
145 | # | |
146 | # Store target object hierarchy, identified by a reference to its root. | |
147 | # The stored object tree may later be retrieved to memory via retrieve. | |
148 | # Returns undef if an I/O error occurred, in which case the file is | |
149 | # removed. | |
150 | # | |
151 | sub store { | |
152 | return _store(\&pstore, @_, 0); | |
153 | } | |
154 | ||
155 | # | |
156 | # nstore | |
157 | # | |
158 | # Same as store, but in network order. | |
159 | # | |
160 | sub nstore { | |
161 | return _store(\&net_pstore, @_, 0); | |
162 | } | |
163 | ||
164 | # | |
165 | # lock_store | |
166 | # | |
167 | # Same as store, but flock the file first (advisory locking). | |
168 | # | |
169 | sub lock_store { | |
170 | return _store(\&pstore, @_, 1); | |
171 | } | |
172 | ||
173 | # | |
174 | # lock_nstore | |
175 | # | |
176 | # Same as nstore, but flock the file first (advisory locking). | |
177 | # | |
178 | sub lock_nstore { | |
179 | return _store(\&net_pstore, @_, 1); | |
180 | } | |
181 | ||
182 | # Internal store to file routine | |
183 | sub _store { | |
184 | my $xsptr = shift; | |
185 | my $self = shift; | |
186 | my ($file, $use_locking) = @_; | |
187 | logcroak "not a reference" unless ref($self); | |
188 | logcroak "wrong argument number" unless @_ == 2; # No @foo in arglist | |
189 | local *FILE; | |
190 | if ($use_locking) { | |
191 | open(FILE, ">>$file") || logcroak "can't write into $file: $!"; | |
192 | unless (&CAN_FLOCK) { | |
193 | logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O"; | |
194 | return undef; | |
195 | } | |
196 | flock(FILE, LOCK_EX) || | |
197 | logcroak "can't get exclusive lock on $file: $!"; | |
198 | truncate FILE, 0; | |
199 | # Unlocking will happen when FILE is closed | |
200 | } else { | |
201 | open(FILE, ">$file") || logcroak "can't create $file: $!"; | |
202 | } | |
203 | binmode FILE; # Archaic systems... | |
204 | my $da = $@; # Don't mess if called from exception handler | |
205 | my $ret; | |
206 | # Call C routine nstore or pstore, depending on network order | |
207 | eval { $ret = &$xsptr(*FILE, $self) }; | |
208 | close(FILE) or $ret = undef; | |
209 | unlink($file) or warn "Can't unlink $file: $!\n" if $@ || !defined $ret; | |
210 | logcroak $@ if $@ =~ s/\.?\n$/,/; | |
211 | $@ = $da; | |
212 | return $ret ? $ret : undef; | |
213 | } | |
214 | ||
215 | # | |
216 | # store_fd | |
217 | # | |
218 | # Same as store, but perform on an already opened file descriptor instead. | |
219 | # Returns undef if an I/O error occurred. | |
220 | # | |
221 | sub store_fd { | |
222 | return _store_fd(\&pstore, @_); | |
223 | } | |
224 | ||
225 | # | |
226 | # nstore_fd | |
227 | # | |
228 | # Same as store_fd, but in network order. | |
229 | # | |
230 | sub nstore_fd { | |
231 | my ($self, $file) = @_; | |
232 | return _store_fd(\&net_pstore, @_); | |
233 | } | |
234 | ||
235 | # Internal store routine on opened file descriptor | |
236 | sub _store_fd { | |
237 | my $xsptr = shift; | |
238 | my $self = shift; | |
239 | my ($file) = @_; | |
240 | logcroak "not a reference" unless ref($self); | |
241 | logcroak "too many arguments" unless @_ == 1; # No @foo in arglist | |
242 | my $fd = fileno($file); | |
243 | logcroak "not a valid file descriptor" unless defined $fd; | |
244 | my $da = $@; # Don't mess if called from exception handler | |
245 | my $ret; | |
246 | # Call C routine nstore or pstore, depending on network order | |
247 | eval { $ret = &$xsptr($file, $self) }; | |
248 | logcroak $@ if $@ =~ s/\.?\n$/,/; | |
249 | local $\; print $file ''; # Autoflush the file if wanted | |
250 | $@ = $da; | |
251 | return $ret ? $ret : undef; | |
252 | } | |
253 | ||
254 | # | |
255 | # freeze | |
256 | # | |
257 | # Store oject and its hierarchy in memory and return a scalar | |
258 | # containing the result. | |
259 | # | |
260 | sub freeze { | |
261 | _freeze(\&mstore, @_); | |
262 | } | |
263 | ||
264 | # | |
265 | # nfreeze | |
266 | # | |
267 | # Same as freeze but in network order. | |
268 | # | |
269 | sub nfreeze { | |
270 | _freeze(\&net_mstore, @_); | |
271 | } | |
272 | ||
273 | # Internal freeze routine | |
274 | sub _freeze { | |
275 | my $xsptr = shift; | |
276 | my $self = shift; | |
277 | logcroak "not a reference" unless ref($self); | |
278 | logcroak "too many arguments" unless @_ == 0; # No @foo in arglist | |
279 | my $da = $@; # Don't mess if called from exception handler | |
280 | my $ret; | |
281 | # Call C routine mstore or net_mstore, depending on network order | |
282 | eval { $ret = &$xsptr($self) }; | |
283 | logcroak $@ if $@ =~ s/\.?\n$/,/; | |
284 | $@ = $da; | |
285 | return $ret ? $ret : undef; | |
286 | } | |
287 | ||
288 | # | |
289 | # retrieve | |
290 | # | |
291 | # Retrieve object hierarchy from disk, returning a reference to the root | |
292 | # object of that tree. | |
293 | # | |
294 | sub retrieve { | |
295 | _retrieve($_[0], 0); | |
296 | } | |
297 | ||
298 | # | |
299 | # lock_retrieve | |
300 | # | |
301 | # Same as retrieve, but with advisory locking. | |
302 | # | |
303 | sub lock_retrieve { | |
304 | _retrieve($_[0], 1); | |
305 | } | |
306 | ||
307 | # Internal retrieve routine | |
308 | sub _retrieve { | |
309 | my ($file, $use_locking) = @_; | |
310 | local *FILE; | |
311 | open(FILE, $file) || logcroak "can't open $file: $!"; | |
312 | binmode FILE; # Archaic systems... | |
313 | my $self; | |
314 | my $da = $@; # Could be from exception handler | |
315 | if ($use_locking) { | |
316 | unless (&CAN_FLOCK) { | |
317 | logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O"; | |
318 | return undef; | |
319 | } | |
320 | flock(FILE, LOCK_SH) || logcroak "can't get shared lock on $file: $!"; | |
321 | # Unlocking will happen when FILE is closed | |
322 | } | |
323 | eval { $self = pretrieve(*FILE) }; # Call C routine | |
324 | close(FILE); | |
325 | logcroak $@ if $@ =~ s/\.?\n$/,/; | |
326 | $@ = $da; | |
327 | return $self; | |
328 | } | |
329 | ||
330 | # | |
331 | # fd_retrieve | |
332 | # | |
333 | # Same as retrieve, but perform from an already opened file descriptor instead. | |
334 | # | |
335 | sub fd_retrieve { | |
336 | my ($file) = @_; | |
337 | my $fd = fileno($file); | |
338 | logcroak "not a valid file descriptor" unless defined $fd; | |
339 | my $self; | |
340 | my $da = $@; # Could be from exception handler | |
341 | eval { $self = pretrieve($file) }; # Call C routine | |
342 | logcroak $@ if $@ =~ s/\.?\n$/,/; | |
343 | $@ = $da; | |
344 | return $self; | |
345 | } | |
346 | ||
347 | # | |
348 | # thaw | |
349 | # | |
350 | # Recreate objects in memory from an existing frozen image created | |
351 | # by freeze. If the frozen image passed is undef, return undef. | |
352 | # | |
353 | sub thaw { | |
354 | my ($frozen) = @_; | |
355 | return undef unless defined $frozen; | |
356 | my $self; | |
357 | my $da = $@; # Could be from exception handler | |
358 | eval { $self = mretrieve($frozen) }; # Call C routine | |
359 | logcroak $@ if $@ =~ s/\.?\n$/,/; | |
360 | $@ = $da; | |
361 | return $self; | |
362 | } | |
363 | ||
364 | =head1 NAME | |
365 | ||
366 | Storable - persistence for Perl data structures | |
367 | ||
368 | =head1 SYNOPSIS | |
369 | ||
370 | use Storable; | |
371 | store \%table, 'file'; | |
372 | $hashref = retrieve('file'); | |
373 | ||
374 | use Storable qw(nstore store_fd nstore_fd freeze thaw dclone); | |
375 | ||
376 | # Network order | |
377 | nstore \%table, 'file'; | |
378 | $hashref = retrieve('file'); # There is NO nretrieve() | |
379 | ||
380 | # Storing to and retrieving from an already opened file | |
381 | store_fd \@array, \*STDOUT; | |
382 | nstore_fd \%table, \*STDOUT; | |
383 | $aryref = fd_retrieve(\*SOCKET); | |
384 | $hashref = fd_retrieve(\*SOCKET); | |
385 | ||
386 | # Serializing to memory | |
387 | $serialized = freeze \%table; | |
388 | %table_clone = %{ thaw($serialized) }; | |
389 | ||
390 | # Deep (recursive) cloning | |
391 | $cloneref = dclone($ref); | |
392 | ||
393 | # Advisory locking | |
394 | use Storable qw(lock_store lock_nstore lock_retrieve) | |
395 | lock_store \%table, 'file'; | |
396 | lock_nstore \%table, 'file'; | |
397 | $hashref = lock_retrieve('file'); | |
398 | ||
399 | =head1 DESCRIPTION | |
400 | ||
401 | The Storable package brings persistence to your Perl data structures | |
402 | containing SCALAR, ARRAY, HASH or REF objects, i.e. anything that can be | |
403 | conveniently stored to disk and retrieved at a later time. | |
404 | ||
405 | It can be used in the regular procedural way by calling C<store> with | |
406 | a reference to the object to be stored, along with the file name where | |
407 | the image should be written. | |
408 | ||
409 | The routine returns C<undef> for I/O problems or other internal error, | |
410 | a true value otherwise. Serious errors are propagated as a C<die> exception. | |
411 | ||
412 | To retrieve data stored to disk, use C<retrieve> with a file name. | |
413 | The objects stored into that file are recreated into memory for you, | |
414 | and a I<reference> to the root object is returned. In case an I/O error | |
415 | occurs while reading, C<undef> is returned instead. Other serious | |
416 | errors are propagated via C<die>. | |
417 | ||
418 | Since storage is performed recursively, you might want to stuff references | |
419 | to objects that share a lot of common data into a single array or hash | |
420 | table, and then store that object. That way, when you retrieve back the | |
421 | whole thing, the objects will continue to share what they originally shared. | |
422 | ||
423 | At the cost of a slight header overhead, you may store to an already | |
424 | opened file descriptor using the C<store_fd> routine, and retrieve | |
425 | from a file via C<fd_retrieve>. Those names aren't imported by default, | |
426 | so you will have to do that explicitly if you need those routines. | |
427 | The file descriptor you supply must be already opened, for read | |
428 | if you're going to retrieve and for write if you wish to store. | |
429 | ||
430 | store_fd(\%table, *STDOUT) || die "can't store to stdout\n"; | |
431 | $hashref = fd_retrieve(*STDIN); | |
432 | ||
433 | You can also store data in network order to allow easy sharing across | |
434 | multiple platforms, or when storing on a socket known to be remotely | |
435 | connected. The routines to call have an initial C<n> prefix for I<network>, | |
436 | as in C<nstore> and C<nstore_fd>. At retrieval time, your data will be | |
437 | correctly restored so you don't have to know whether you're restoring | |
438 | from native or network ordered data. Double values are stored stringified | |
439 | to ensure portability as well, at the slight risk of loosing some precision | |
440 | in the last decimals. | |
441 | ||
442 | When using C<fd_retrieve>, objects are retrieved in sequence, one | |
443 | object (i.e. one recursive tree) per associated C<store_fd>. | |
444 | ||
445 | If you're more from the object-oriented camp, you can inherit from | |
446 | Storable and directly store your objects by invoking C<store> as | |
447 | a method. The fact that the root of the to-be-stored tree is a | |
448 | blessed reference (i.e. an object) is special-cased so that the | |
449 | retrieve does not provide a reference to that object but rather the | |
450 | blessed object reference itself. (Otherwise, you'd get a reference | |
451 | to that blessed object). | |
452 | ||
453 | =head1 MEMORY STORE | |
454 | ||
455 | The Storable engine can also store data into a Perl scalar instead, to | |
456 | later retrieve them. This is mainly used to freeze a complex structure in | |
457 | some safe compact memory place (where it can possibly be sent to another | |
458 | process via some IPC, since freezing the structure also serializes it in | |
459 | effect). Later on, and maybe somewhere else, you can thaw the Perl scalar | |
460 | out and recreate the original complex structure in memory. | |
461 | ||
462 | Surprisingly, the routines to be called are named C<freeze> and C<thaw>. | |
463 | If you wish to send out the frozen scalar to another machine, use | |
464 | C<nfreeze> instead to get a portable image. | |
465 | ||
466 | Note that freezing an object structure and immediately thawing it | |
467 | actually achieves a deep cloning of that structure: | |
468 | ||
469 | dclone(.) = thaw(freeze(.)) | |
470 | ||
471 | Storable provides you with a C<dclone> interface which does not create | |
472 | that intermediary scalar but instead freezes the structure in some | |
473 | internal memory space and then immediately thaws it out. | |
474 | ||
475 | =head1 ADVISORY LOCKING | |
476 | ||
477 | The C<lock_store> and C<lock_nstore> routine are equivalent to | |
478 | C<store> and C<nstore>, except that they get an exclusive lock on | |
479 | the file before writing. Likewise, C<lock_retrieve> does the same | |
480 | as C<retrieve>, but also gets a shared lock on the file before reading. | |
481 | ||
482 | As with any advisory locking scheme, the protection only works if you | |
483 | systematically use C<lock_store> and C<lock_retrieve>. If one side of | |
484 | your application uses C<store> whilst the other uses C<lock_retrieve>, | |
485 | you will get no protection at all. | |
486 | ||
487 | The internal advisory locking is implemented using Perl's flock() | |
488 | routine. If your system does not support any form of flock(), or if | |
489 | you share your files across NFS, you might wish to use other forms | |
490 | of locking by using modules such as LockFile::Simple which lock a | |
491 | file using a filesystem entry, instead of locking the file descriptor. | |
492 | ||
493 | =head1 SPEED | |
494 | ||
495 | The heart of Storable is written in C for decent speed. Extra low-level | |
496 | optimizations have been made when manipulating perl internals, to | |
497 | sacrifice encapsulation for the benefit of greater speed. | |
498 | ||
499 | =head1 CANONICAL REPRESENTATION | |
500 | ||
501 | Normally, Storable stores elements of hashes in the order they are | |
502 | stored internally by Perl, i.e. pseudo-randomly. If you set | |
503 | C<$Storable::canonical> to some C<TRUE> value, Storable will store | |
504 | hashes with the elements sorted by their key. This allows you to | |
505 | compare data structures by comparing their frozen representations (or | |
506 | even the compressed frozen representations), which can be useful for | |
507 | creating lookup tables for complicated queries. | |
508 | ||
509 | Canonical order does not imply network order; those are two orthogonal | |
510 | settings. | |
511 | ||
512 | =head1 FORWARD COMPATIBILITY | |
513 | ||
514 | This release of Storable can be used on a newer version of Perl to | |
515 | serialize data which is not supported by earlier Perls. By default, | |
516 | Storable will attempt to do the right thing, by C<croak()>ing if it | |
517 | encounters data that it cannot deserialize. However, the defaults | |
518 | can be changed as follows: | |
519 | ||
520 | =over 4 | |
521 | ||
522 | =item utf8 data | |
523 | ||
524 | Perl 5.6 added support for Unicode characters with code points > 255, | |
525 | and Perl 5.8 has full support for Unicode characters in hash keys. | |
526 | Perl internally encodes strings with these characters using utf8, and | |
527 | Storable serializes them as utf8. By default, if an older version of | |
528 | Perl encounters a utf8 value it cannot represent, it will C<croak()>. | |
529 | To change this behaviour so that Storable deserializes utf8 encoded | |
530 | values as the string of bytes (effectively dropping the I<is_utf8> flag) | |
531 | set C<$Storable::drop_utf8> to some C<TRUE> value. This is a form of | |
532 | data loss, because with C<$drop_utf8> true, it becomes impossible to tell | |
533 | whether the original data was the Unicode string, or a series of bytes | |
534 | that happen to be valid utf8. | |
535 | ||
536 | =item restricted hashes | |
537 | ||
538 | Perl 5.8 adds support for restricted hashes, which have keys | |
539 | restricted to a given set, and can have values locked to be read only. | |
540 | By default, when Storable encounters a restricted hash on a perl | |
541 | that doesn't support them, it will deserialize it as a normal hash, | |
542 | silently discarding any placeholder keys and leaving the keys and | |
543 | all values unlocked. To make Storable C<croak()> instead, set | |
544 | C<$Storable::downgrade_restricted> to a C<FALSE> value. To restore | |
545 | the default set it back to some C<TRUE> value. | |
546 | ||
547 | =item files from future versions of Storable | |
548 | ||
549 | Earlier versions of Storable would immediately croak if they encountered | |
550 | a file with a higher internal version number than the reading Storable | |
551 | knew about. Internal version numbers are increased each time new data | |
552 | types (such as restricted hashes) are added to the vocabulary of the file | |
553 | format. This meant that a newer Storable module had no way of writing a | |
554 | file readable by an older Storable, even if the writer didn't store newer | |
555 | data types. | |
556 | ||
557 | This version of Storable will defer croaking until it encounters a data | |
558 | type in the file that it does not recognize. This means that it will | |
559 | continue to read files generated by newer Storable modules which are careful | |
560 | in what they write out, making it easier to upgrade Storable modules in a | |
561 | mixed environment. | |
562 | ||
563 | The old behaviour of immediate croaking can be re-instated by setting | |
564 | C<$Storable::accept_future_minor> to some C<FALSE> value. | |
565 | ||
566 | =back | |
567 | ||
568 | All these variables have no effect on a newer Perl which supports the | |
569 | relevant feature. | |
570 | ||
571 | =head1 ERROR REPORTING | |
572 | ||
573 | Storable uses the "exception" paradigm, in that it does not try to workaround | |
574 | failures: if something bad happens, an exception is generated from the | |
575 | caller's perspective (see L<Carp> and C<croak()>). Use eval {} to trap | |
576 | those exceptions. | |
577 | ||
578 | When Storable croaks, it tries to report the error via the C<logcroak()> | |
579 | routine from the C<Log::Agent> package, if it is available. | |
580 | ||
581 | Normal errors are reported by having store() or retrieve() return C<undef>. | |
582 | Such errors are usually I/O errors (or truncated stream errors at retrieval). | |
583 | ||
584 | =head1 WIZARDS ONLY | |
585 | ||
586 | =head2 Hooks | |
587 | ||
588 | Any class may define hooks that will be called during the serialization | |
589 | and deserialization process on objects that are instances of that class. | |
590 | Those hooks can redefine the way serialization is performed (and therefore, | |
591 | how the symmetrical deserialization should be conducted). | |
592 | ||
593 | Since we said earlier: | |
594 | ||
595 | dclone(.) = thaw(freeze(.)) | |
596 | ||
597 | everything we say about hooks should also hold for deep cloning. However, | |
598 | hooks get to know whether the operation is a mere serialization, or a cloning. | |
599 | ||
600 | Therefore, when serializing hooks are involved, | |
601 | ||
602 | dclone(.) <> thaw(freeze(.)) | |
603 | ||
604 | Well, you could keep them in sync, but there's no guarantee it will always | |
605 | hold on classes somebody else wrote. Besides, there is little to gain in | |
606 | doing so: a serializing hook could keep only one attribute of an object, | |
607 | which is probably not what should happen during a deep cloning of that | |
608 | same object. | |
609 | ||
610 | Here is the hooking interface: | |
611 | ||
612 | =over 4 | |
613 | ||
614 | =item C<STORABLE_freeze> I<obj>, I<cloning> | |
615 | ||
616 | The serializing hook, called on the object during serialization. It can be | |
617 | inherited, or defined in the class itself, like any other method. | |
618 | ||
619 | Arguments: I<obj> is the object to serialize, I<cloning> is a flag indicating | |
620 | whether we're in a dclone() or a regular serialization via store() or freeze(). | |
621 | ||
622 | Returned value: A LIST C<($serialized, $ref1, $ref2, ...)> where $serialized | |
623 | is the serialized form to be used, and the optional $ref1, $ref2, etc... are | |
624 | extra references that you wish to let the Storable engine serialize. | |
625 | ||
626 | At deserialization time, you will be given back the same LIST, but all the | |
627 | extra references will be pointing into the deserialized structure. | |
628 | ||
629 | The B<first time> the hook is hit in a serialization flow, you may have it | |
630 | return an empty list. That will signal the Storable engine to further | |
631 | discard that hook for this class and to therefore revert to the default | |
632 | serialization of the underlying Perl data. The hook will again be normally | |
633 | processed in the next serialization. | |
634 | ||
635 | Unless you know better, serializing hook should always say: | |
636 | ||
637 | sub STORABLE_freeze { | |
638 | my ($self, $cloning) = @_; | |
639 | return if $cloning; # Regular default serialization | |
640 | .... | |
641 | } | |
642 | ||
643 | in order to keep reasonable dclone() semantics. | |
644 | ||
645 | =item C<STORABLE_thaw> I<obj>, I<cloning>, I<serialized>, ... | |
646 | ||
647 | The deserializing hook called on the object during deserialization. | |
648 | But wait: if we're deserializing, there's no object yet... right? | |
649 | ||
650 | Wrong: the Storable engine creates an empty one for you. If you know Eiffel, | |
651 | you can view C<STORABLE_thaw> as an alternate creation routine. | |
652 | ||
653 | This means the hook can be inherited like any other method, and that | |
654 | I<obj> is your blessed reference for this particular instance. | |
655 | ||
656 | The other arguments should look familiar if you know C<STORABLE_freeze>: | |
657 | I<cloning> is true when we're part of a deep clone operation, I<serialized> | |
658 | is the serialized string you returned to the engine in C<STORABLE_freeze>, | |
659 | and there may be an optional list of references, in the same order you gave | |
660 | them at serialization time, pointing to the deserialized objects (which | |
661 | have been processed courtesy of the Storable engine). | |
662 | ||
663 | When the Storable engine does not find any C<STORABLE_thaw> hook routine, | |
664 | it tries to load the class by requiring the package dynamically (using | |
665 | the blessed package name), and then re-attempts the lookup. If at that | |
666 | time the hook cannot be located, the engine croaks. Note that this mechanism | |
667 | will fail if you define several classes in the same file, but L<perlmod> | |
668 | warned you. | |
669 | ||
670 | It is up to you to use this information to populate I<obj> the way you want. | |
671 | ||
672 | Returned value: none. | |
673 | ||
674 | =back | |
675 | ||
676 | =head2 Predicates | |
677 | ||
678 | Predicates are not exportable. They must be called by explicitly prefixing | |
679 | them with the Storable package name. | |
680 | ||
681 | =over 4 | |
682 | ||
683 | =item C<Storable::last_op_in_netorder> | |
684 | ||
685 | The C<Storable::last_op_in_netorder()> predicate will tell you whether | |
686 | network order was used in the last store or retrieve operation. If you | |
687 | don't know how to use this, just forget about it. | |
688 | ||
689 | =item C<Storable::is_storing> | |
690 | ||
691 | Returns true if within a store operation (via STORABLE_freeze hook). | |
692 | ||
693 | =item C<Storable::is_retrieving> | |
694 | ||
695 | Returns true if within a retrieve operation (via STORABLE_thaw hook). | |
696 | ||
697 | =back | |
698 | ||
699 | =head2 Recursion | |
700 | ||
701 | With hooks comes the ability to recurse back to the Storable engine. | |
702 | Indeed, hooks are regular Perl code, and Storable is convenient when | |
703 | it comes to serializing and deserializing things, so why not use it | |
704 | to handle the serialization string? | |
705 | ||
706 | There are a few things you need to know, however: | |
707 | ||
708 | =over 4 | |
709 | ||
710 | =item * | |
711 | ||
712 | You can create endless loops if the things you serialize via freeze() | |
713 | (for instance) point back to the object we're trying to serialize in | |
714 | the hook. | |
715 | ||
716 | =item * | |
717 | ||
718 | Shared references among objects will not stay shared: if we're serializing | |
719 | the list of object [A, C] where both object A and C refer to the SAME object | |
720 | B, and if there is a serializing hook in A that says freeze(B), then when | |
721 | deserializing, we'll get [A', C'] where A' refers to B', but C' refers to D, | |
722 | a deep clone of B'. The topology was not preserved. | |
723 | ||
724 | =back | |
725 | ||
726 | That's why C<STORABLE_freeze> lets you provide a list of references | |
727 | to serialize. The engine guarantees that those will be serialized in the | |
728 | same context as the other objects, and therefore that shared objects will | |
729 | stay shared. | |
730 | ||
731 | In the above [A, C] example, the C<STORABLE_freeze> hook could return: | |
732 | ||
733 | ("something", $self->{B}) | |
734 | ||
735 | and the B part would be serialized by the engine. In C<STORABLE_thaw>, you | |
736 | would get back the reference to the B' object, deserialized for you. | |
737 | ||
738 | Therefore, recursion should normally be avoided, but is nonetheless supported. | |
739 | ||
740 | =head2 Deep Cloning | |
741 | ||
742 | There is a Clone module available on CPAN which implements deep cloning | |
743 | natively, i.e. without freezing to memory and thawing the result. It is | |
744 | aimed to replace Storable's dclone() some day. However, it does not currently | |
745 | support Storable hooks to redefine the way deep cloning is performed. | |
746 | ||
747 | =head1 Storable magic | |
748 | ||
749 | Yes, there's a lot of that :-) But more precisely, in UNIX systems | |
750 | there's a utility called C<file>, which recognizes data files based on | |
751 | their contents (usually their first few bytes). For this to work, | |
752 | a certain file called F<magic> needs to taught about the I<signature> | |
753 | of the data. Where that configuration file lives depends on the UNIX | |
754 | flavour; often it's something like F</usr/share/misc/magic> or | |
755 | F</etc/magic>. Your system administrator needs to do the updating of | |
756 | the F<magic> file. The necessary signature information is output to | |
757 | STDOUT by invoking Storable::show_file_magic(). Note that the GNU | |
758 | implementation of the C<file> utility, version 3.38 or later, | |
759 | is expected to contain support for recognising Storable files | |
760 | out-of-the-box, in addition to other kinds of Perl files. | |
761 | ||
762 | =head1 EXAMPLES | |
763 | ||
764 | Here are some code samples showing a possible usage of Storable: | |
765 | ||
766 | use Storable qw(store retrieve freeze thaw dclone); | |
767 | ||
768 | %color = ('Blue' => 0.1, 'Red' => 0.8, 'Black' => 0, 'White' => 1); | |
769 | ||
770 | store(\%color, '/tmp/colors') or die "Can't store %a in /tmp/colors!\n"; | |
771 | ||
772 | $colref = retrieve('/tmp/colors'); | |
773 | die "Unable to retrieve from /tmp/colors!\n" unless defined $colref; | |
774 | printf "Blue is still %lf\n", $colref->{'Blue'}; | |
775 | ||
776 | $colref2 = dclone(\%color); | |
777 | ||
778 | $str = freeze(\%color); | |
779 | printf "Serialization of %%color is %d bytes long.\n", length($str); | |
780 | $colref3 = thaw($str); | |
781 | ||
782 | which prints (on my machine): | |
783 | ||
784 | Blue is still 0.100000 | |
785 | Serialization of %color is 102 bytes long. | |
786 | ||
787 | =head1 WARNING | |
788 | ||
789 | If you're using references as keys within your hash tables, you're bound | |
790 | to be disappointed when retrieving your data. Indeed, Perl stringifies | |
791 | references used as hash table keys. If you later wish to access the | |
792 | items via another reference stringification (i.e. using the same | |
793 | reference that was used for the key originally to record the value into | |
794 | the hash table), it will work because both references stringify to the | |
795 | same string. | |
796 | ||
797 | It won't work across a sequence of C<store> and C<retrieve> operations, | |
798 | however, because the addresses in the retrieved objects, which are | |
799 | part of the stringified references, will probably differ from the | |
800 | original addresses. The topology of your structure is preserved, | |
801 | but not hidden semantics like those. | |
802 | ||
803 | On platforms where it matters, be sure to call C<binmode()> on the | |
804 | descriptors that you pass to Storable functions. | |
805 | ||
806 | Storing data canonically that contains large hashes can be | |
807 | significantly slower than storing the same data normally, as | |
808 | temporary arrays to hold the keys for each hash have to be allocated, | |
809 | populated, sorted and freed. Some tests have shown a halving of the | |
810 | speed of storing -- the exact penalty will depend on the complexity of | |
811 | your data. There is no slowdown on retrieval. | |
812 | ||
813 | =head1 BUGS | |
814 | ||
815 | You can't store GLOB, CODE, FORMLINE, etc.... If you can define | |
816 | semantics for those operations, feel free to enhance Storable so that | |
817 | it can deal with them. | |
818 | ||
819 | The store functions will C<croak> if they run into such references | |
820 | unless you set C<$Storable::forgive_me> to some C<TRUE> value. In that | |
821 | case, the fatal message is turned in a warning and some | |
822 | meaningless string is stored instead. | |
823 | ||
824 | Setting C<$Storable::canonical> may not yield frozen strings that | |
825 | compare equal due to possible stringification of numbers. When the | |
826 | string version of a scalar exists, it is the form stored; therefore, | |
827 | if you happen to use your numbers as strings between two freezing | |
828 | operations on the same data structures, you will get different | |
829 | results. | |
830 | ||
831 | When storing doubles in network order, their value is stored as text. | |
832 | However, you should also not expect non-numeric floating-point values | |
833 | such as infinity and "not a number" to pass successfully through a | |
834 | nstore()/retrieve() pair. | |
835 | ||
836 | As Storable neither knows nor cares about character sets (although it | |
837 | does know that characters may be more than eight bits wide), any difference | |
838 | in the interpretation of character codes between a host and a target | |
839 | system is your problem. In particular, if host and target use different | |
840 | code points to represent the characters used in the text representation | |
841 | of floating-point numbers, you will not be able be able to exchange | |
842 | floating-point data, even with nstore(). | |
843 | ||
844 | C<Storable::drop_utf8> is a blunt tool. There is no facility either to | |
845 | return B<all> strings as utf8 sequences, or to attempt to convert utf8 | |
846 | data back to 8 bit and C<croak()> if the conversion fails. | |
847 | ||
848 | Prior to Storable 2.01, no distinction was made between signed and | |
849 | unsigned integers on storing. By default Storable prefers to store a | |
850 | scalars string representation (if it has one) so this would only cause | |
851 | problems when storing large unsigned integers that had never been coverted | |
852 | to string or floating point. In other words values that had been generated | |
853 | by integer operations such as logic ops and then not used in any string or | |
854 | arithmetic context before storing. | |
855 | ||
856 | =head2 64 bit data in perl 5.6.0 and 5.6.1 | |
857 | ||
858 | This section only applies to you if you have existing data written out | |
859 | by Storable 2.02 or earlier on perl 5.6.0 or 5.6.1 on Unix or Linux which | |
860 | has been configured with 64 bit integer support (not the default) | |
861 | If you got a precompiled perl, rather than running Configure to build | |
862 | your own perl from source, then it almost certainly does not affect you, | |
863 | and you can stop reading now (unless you're curious). If you're using perl | |
864 | on Windows it does not affect you. | |
865 | ||
866 | Storable writes a file header which contains the sizes of various C | |
867 | language types for the C compiler that built Storable (when not writing in | |
868 | network order), and will refuse to load files written by a Storable not | |
869 | on the same (or compatible) architecture. This check and a check on | |
870 | machine byteorder is needed because the size of various fields in the file | |
871 | are given by the sizes of the C language types, and so files written on | |
872 | different architectures are incompatible. This is done for increased speed. | |
873 | (When writing in network order, all fields are written out as standard | |
874 | lengths, which allows full interworking, but takes longer to read and write) | |
875 | ||
876 | Perl 5.6.x introduced the ability to optional configure the perl interpreter | |
877 | to use C's C<long long> type to allow scalars to store 64 bit integers on 32 | |
878 | bit systems. However, due to the way the Perl configuration system | |
879 | generated the C configuration files on non-Windows platforms, and the way | |
880 | Storable generates its header, nothing in the Storable file header reflected | |
881 | whether the perl writing was using 32 or 64 bit integers, despite the fact | |
882 | that Storable was storing some data differently in the file. Hence Storable | |
883 | running on perl with 64 bit integers will read the header from a file | |
884 | written by a 32 bit perl, not realise that the data is actually in a subtly | |
885 | incompatible format, and then go horribly wrong (possibly crashing) if it | |
886 | encountered a stored integer. This is a design failure. | |
887 | ||
888 | Storable has now been changed to write out and read in a file header with | |
889 | information about the size of integers. It's impossible to detect whether | |
890 | an old file being read in was written with 32 or 64 bit integers (they have | |
891 | the same header) so it's impossible to automatically switch to a correct | |
892 | backwards compatibility mode. Hence this Storable defaults to the new, | |
893 | correct behaviour. | |
894 | ||
895 | What this means is that if you have data written by Storable 1.x running | |
896 | on perl 5.6.0 or 5.6.1 configured with 64 bit integers on Unix or Linux | |
897 | then by default this Storable will refuse to read it, giving the error | |
898 | I<Byte order is not compatible>. If you have such data then you you | |
899 | should set C<$Storable::interwork_56_64bit> to a true value to make this | |
900 | Storable read and write files with the old header. You should also | |
901 | migrate your data, or any older perl you are communicating with, to this | |
902 | current version of Storable. | |
903 | ||
904 | If you don't have data written with specific configuration of perl described | |
905 | above, then you do not and should not do anything. Don't set the flag - | |
906 | not only will Storable on an identically configured perl refuse to load them, | |
907 | but Storable a differently configured perl will load them believing them | |
908 | to be correct for it, and then may well fail or crash part way through | |
909 | reading them. | |
910 | ||
911 | =head1 CREDITS | |
912 | ||
913 | Thank you to (in chronological order): | |
914 | ||
915 | Jarkko Hietaniemi <jhi@iki.fi> | |
916 | Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de> | |
917 | Benjamin A. Holzman <bah@ecnvantage.com> | |
918 | Andrew Ford <A.Ford@ford-mason.co.uk> | |
919 | Gisle Aas <gisle@aas.no> | |
920 | Jeff Gresham <gresham_jeffrey@jpmorgan.com> | |
921 | Murray Nesbitt <murray@activestate.com> | |
922 | Marc Lehmann <pcg@opengroup.org> | |
923 | Justin Banks <justinb@wamnet.com> | |
924 | Jarkko Hietaniemi <jhi@iki.fi> (AGAIN, as perl 5.7.0 Pumpkin!) | |
925 | Salvador Ortiz Garcia <sog@msg.com.mx> | |
926 | Dominic Dunlop <domo@computer.org> | |
927 | Erik Haugan <erik@solbors.no> | |
928 | ||
929 | for their bug reports, suggestions and contributions. | |
930 | ||
931 | Benjamin Holzman contributed the tied variable support, Andrew Ford | |
932 | contributed the canonical order for hashes, and Gisle Aas fixed | |
933 | a few misunderstandings of mine regarding the perl internals, | |
934 | and optimized the emission of "tags" in the output streams by | |
935 | simply counting the objects instead of tagging them (leading to | |
936 | a binary incompatibility for the Storable image starting at version | |
937 | 0.6--older images are, of course, still properly understood). | |
938 | Murray Nesbitt made Storable thread-safe. Marc Lehmann added overloading | |
939 | and references to tied items support. | |
940 | ||
941 | =head1 AUTHOR | |
942 | ||
943 | Storable was written by Raphael Manfredi F<E<lt>Raphael_Manfredi@pobox.comE<gt>> | |
944 | Maintenance is now done by the perl5-porters F<E<lt>perl5-porters@perl.orgE<gt>> | |
945 | ||
946 | Please e-mail us with problems, bug fixes, comments and complaints, | |
947 | although if you have complements you should send them to Raphael. | |
948 | Please don't e-mail Raphael with problems, as he no longer works on | |
949 | Storable, and your message will be delayed while he forwards it to us. | |
950 | ||
951 | =head1 SEE ALSO | |
952 | ||
953 | L<Clone>. | |
954 | ||
955 | =cut |