Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | package Data::Dump; |
2 | ||
3 | use strict; | |
4 | use vars qw(@EXPORT_OK $VERSION $DEBUG); | |
5 | ||
6 | require Exporter; | |
7 | *import = \&Exporter::import; | |
8 | @EXPORT_OK=qw(dump pp); | |
9 | ||
10 | $VERSION = "1.02"; # $Date: 2003/12/18 09:27:35 $ | |
11 | $DEBUG = 0; | |
12 | ||
13 | use overload (); | |
14 | use vars qw(%seen %refcnt @dump @fixup %require); | |
15 | ||
16 | my %is_perl_keyword = map { $_ => 1 } | |
17 | qw( __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD BEGIN CORE | |
18 | DESTROY END EQ GE GT INIT LE LT NE abs accept alarm and atan2 bind | |
19 | binmode bless caller chdir chmod chomp chop chown chr chroot close | |
20 | closedir cmp connect continue cos crypt dbmclose dbmopen defined | |
21 | delete die do dump each else elsif endgrent endhostent endnetent | |
22 | endprotoent endpwent endservent eof eq eval exec exists exit exp fcntl | |
23 | fileno flock for foreach fork format formline ge getc getgrent | |
24 | getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin | |
25 | getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid | |
26 | getpriority getprotobyname getprotobynumber getprotoent getpwent | |
27 | getpwnam getpwuid getservbyname getservbyport getservent getsockname | |
28 | getsockopt glob gmtime goto grep gt hex if index int ioctl join keys | |
29 | kill last lc lcfirst le length link listen local localtime lock log | |
30 | lstat lt m map mkdir msgctl msgget msgrcv msgsnd my ne next no not oct | |
31 | open opendir or ord pack package pipe pop pos print printf prototype | |
32 | push q qq qr quotemeta qw qx rand read readdir readline readlink | |
33 | readpipe recv redo ref rename require reset return reverse rewinddir | |
34 | rindex rmdir s scalar seek seekdir select semctl semget semop send | |
35 | setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent | |
36 | setservent setsockopt shift shmctl shmget shmread shmwrite shutdown | |
37 | sin sleep socket socketpair sort splice split sprintf sqrt srand stat | |
38 | study sub substr symlink syscall sysopen sysread sysseek system | |
39 | syswrite tell telldir tie tied time times tr truncate uc ucfirst umask | |
40 | undef unless unlink unpack unshift untie until use utime values vec | |
41 | wait waitpid wantarray warn while write x xor y); | |
42 | ||
43 | ||
44 | sub dump | |
45 | { | |
46 | local %seen; | |
47 | local %refcnt; | |
48 | local %require; | |
49 | local @fixup; | |
50 | ||
51 | my $name = "a"; | |
52 | my @dump; | |
53 | ||
54 | for (@_) { | |
55 | my $val = _dump($_, $name, []); | |
56 | push(@dump, [$name, $val]); | |
57 | } continue { | |
58 | $name++; | |
59 | } | |
60 | ||
61 | my $out = ""; | |
62 | if (%require) { | |
63 | for (sort keys %require) { | |
64 | $out .= "require $_;\n"; | |
65 | } | |
66 | } | |
67 | if (%refcnt) { | |
68 | # output all those with refcounts first | |
69 | for (@dump) { | |
70 | my $name = $_->[0]; | |
71 | if ($refcnt{$name}) { | |
72 | $out .= "my \$$name = $_->[1];\n"; | |
73 | undef $_->[1]; | |
74 | } | |
75 | } | |
76 | for (@fixup) { | |
77 | $out .= "$_;\n"; | |
78 | } | |
79 | } | |
80 | ||
81 | my $paren = (@dump != 1); | |
82 | $out .= "(" if $paren; | |
83 | $out .= format_list($paren, | |
84 | map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]} | |
85 | @dump | |
86 | ); | |
87 | $out .= ")" if $paren; | |
88 | ||
89 | if (%refcnt || %require) { | |
90 | $out .= ";\n"; | |
91 | $out =~ s/^/ /gm; # indent | |
92 | $out = "do {\n$out}"; | |
93 | } | |
94 | ||
95 | #use Data::Dumper; print Dumper(\%refcnt); | |
96 | #use Data::Dumper; print Dumper(\%seen); | |
97 | ||
98 | print STDERR "$out\n" unless defined wantarray; | |
99 | $out; | |
100 | } | |
101 | ||
102 | *pp = \&dump; | |
103 | ||
104 | sub _dump | |
105 | { | |
106 | my $ref = ref $_[0]; | |
107 | my $rval = $ref ? $_[0] : \$_[0]; | |
108 | shift; | |
109 | ||
110 | my($name, $idx) = @_; | |
111 | ||
112 | my($class, $type, $id); | |
113 | if (overload::StrVal($rval) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/) { | |
114 | $class = $1; | |
115 | $type = $2; | |
116 | $id = $3; | |
117 | } else { | |
118 | die "Can't parse " . overload::StrVal($rval); | |
119 | } | |
120 | warn "$name-(@$idx) $class $type $id ($ref)" if $DEBUG; | |
121 | ||
122 | if (my $s = $seen{$id}) { | |
123 | my($sname, $sidx) = @$s; | |
124 | $refcnt{$sname}++; | |
125 | my $sref = fullname($sname, $sidx, | |
126 | ($ref && $type eq "SCALAR")); | |
127 | warn "SEEN: [$name/@$idx] => [$sname/@$sidx] ($ref,$sref)" if $DEBUG; | |
128 | return $sref unless $sname eq $name; | |
129 | $refcnt{$name}++; | |
130 | push(@fixup, fullname($name,$idx)." = $sref"); | |
131 | return "'fix'"; | |
132 | } | |
133 | $seen{$id} = [$name, $idx]; | |
134 | ||
135 | my $out; | |
136 | if ($type eq "SCALAR" || $type eq "REF") { | |
137 | if ($ref) { | |
138 | if ($class && $class eq "Regexp") { | |
139 | my $v = "$rval"; | |
140 | ||
141 | my $mod = ""; | |
142 | if ($v =~ /^\(\?([msix-]+):([\x00-\xFF]*)\)\z/) { | |
143 | $mod = $1; | |
144 | $v = $2; | |
145 | $mod =~ s/-.*//; | |
146 | } | |
147 | ||
148 | my $sep = '/'; | |
149 | my $sep_count = ($v =~ tr/\///); | |
150 | if ($sep_count) { | |
151 | # see if we can find a better one | |
152 | for ('|', ',', ':', '#') { | |
153 | my $c = eval "\$v =~ tr/\Q$_\E//"; | |
154 | #print "SEP $_ $c $sep_count\n"; | |
155 | if ($c < $sep_count) { | |
156 | $sep = $_; | |
157 | $sep_count = $c; | |
158 | last if $sep_count == 0; | |
159 | } | |
160 | } | |
161 | } | |
162 | $v =~ s/\Q$sep\E/\\$sep/g; | |
163 | ||
164 | $out = "qr$sep$v$sep$mod"; | |
165 | undef($class); | |
166 | } | |
167 | else { | |
168 | delete $seen{$id}; # will be seen again shortly | |
169 | my $val = _dump($$rval, $name, [@$idx, "\$"]); | |
170 | $out = $class ? "do{\\(my \$o = $val)}" : "\\$val"; | |
171 | } | |
172 | } else { | |
173 | if (!defined $$rval) { | |
174 | $out = "undef"; | |
175 | } | |
176 | elsif ($$rval =~ /^-?[1-9]\d{0,8}$/ || $$rval eq "0") { | |
177 | if (length $$rval > 4) { | |
178 | # Separate thousands by _ to make it more readable | |
179 | $out = reverse $$rval; | |
180 | $out =~ s/(\d\d\d)(?=\d)/$1_/g; | |
181 | $out = reverse $out; | |
182 | } else { | |
183 | $out = $$rval; | |
184 | } | |
185 | } | |
186 | else { | |
187 | $out = quote($$rval); | |
188 | } | |
189 | if ($class && !@$idx) { | |
190 | # Top is an object, not a reference to one as perl needs | |
191 | $refcnt{$name}++; | |
192 | my $obj = fullname($name, $idx); | |
193 | my $cl = quote($class); | |
194 | push(@fixup, "bless \\$obj, $cl"); | |
195 | } | |
196 | } | |
197 | } | |
198 | elsif ($type eq "GLOB") { | |
199 | if ($ref) { | |
200 | delete $seen{$id}; | |
201 | my $val = _dump($$rval, $name, [@$idx, "*"]); | |
202 | $out = "\\$val"; | |
203 | if ($out =~ /^\\\*Symbol::/) { | |
204 | $require{Symbol}++; | |
205 | $out = "Symbol::gensym()"; | |
206 | } | |
207 | } else { | |
208 | my $val = "$$rval"; | |
209 | $out = "$$rval"; | |
210 | ||
211 | for my $k (qw(SCALAR ARRAY HASH)) { | |
212 | my $gval = *$$rval{$k}; | |
213 | next unless defined $gval; | |
214 | next if $k eq "SCALAR" && ! defined $$gval; # always there | |
215 | my $f = scalar @fixup; | |
216 | push(@fixup, "RESERVED"); # overwritten after _dump() below | |
217 | $gval = _dump($gval, $name, [@$idx, "*{$k}"]); | |
218 | $refcnt{$name}++; | |
219 | my $gname = fullname($name, $idx); | |
220 | $fixup[$f] = "$gname = $gval"; #XXX indent $gval | |
221 | } | |
222 | } | |
223 | } | |
224 | elsif ($type eq "ARRAY") { | |
225 | my @vals; | |
226 | my $i = 0; | |
227 | for (@$rval) { | |
228 | push(@vals, _dump($_, $name, [@$idx, "[$i]"])); | |
229 | $i++; | |
230 | } | |
231 | $out = "[" . format_list(1, @vals) . "]"; | |
232 | } | |
233 | elsif ($type eq "HASH") { | |
234 | my(@keys, @vals); | |
235 | ||
236 | # statistics to determine variation in key lengths | |
237 | my $kstat_max = 0; | |
238 | my $kstat_sum = 0; | |
239 | my $kstat_sum2 = 0; | |
240 | ||
241 | my @orig_keys = keys %$rval; | |
242 | my $text_keys = 0; | |
243 | for (@orig_keys) { | |
244 | $text_keys++, last unless $_ eq "0" || /^[-+]?[1-9]\d*(?:.\d+)?\z/; | |
245 | } | |
246 | ||
247 | if ($text_keys) { | |
248 | @orig_keys = sort @orig_keys; | |
249 | } | |
250 | else { | |
251 | @orig_keys = sort { $a <=> $b } @orig_keys; | |
252 | } | |
253 | ||
254 | for my $key (@orig_keys) { | |
255 | my $val = \$rval->{$key}; | |
256 | $key = quote($key) if $is_perl_keyword{$key} || | |
257 | !($key =~ /^[a-zA-Z_]\w{0,19}\z/ || | |
258 | $key =~ /^-?[1-9]\d{0,8}\z/ | |
259 | ); | |
260 | ||
261 | $kstat_max = length($key) if length($key) > $kstat_max; | |
262 | $kstat_sum += length($key); | |
263 | $kstat_sum2 += length($key)*length($key); | |
264 | ||
265 | push(@keys, $key); | |
266 | push(@vals, _dump($$val, $name, [@$idx, "{$key}"])); | |
267 | } | |
268 | my $nl = ""; | |
269 | my $klen_pad = 0; | |
270 | my $tmp = "@keys @vals"; | |
271 | if (length($tmp) > 60 || $tmp =~ /\n/) { | |
272 | $nl = "\n"; | |
273 | ||
274 | # Determine what padding to add | |
275 | if ($kstat_max < 4) { | |
276 | $klen_pad = $kstat_max; | |
277 | } | |
278 | elsif (@keys >= 2) { | |
279 | my $n = @keys; | |
280 | my $avg = $kstat_sum/$n; | |
281 | my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1)); | |
282 | ||
283 | # I am not actually very happy with this heuristics | |
284 | if ($stddev / $kstat_max < 0.25) { | |
285 | $klen_pad = $kstat_max; | |
286 | } | |
287 | if ($DEBUG) { | |
288 | push(@keys, "__S"); | |
289 | push(@vals, sprintf("%.2f (%d/%.1f/%.1f)", | |
290 | $stddev / $kstat_max, | |
291 | $kstat_max, $avg, $stddev)); | |
292 | } | |
293 | } | |
294 | } | |
295 | $out = "{$nl"; | |
296 | while (@keys) { | |
297 | my $key = shift @keys; | |
298 | my $val = shift @vals; | |
299 | my $pad = " " x ($klen_pad + 6); | |
300 | $val =~ s/\n/\n$pad/gm; | |
301 | $key = " $key" . " " x ($klen_pad - length($key)) if $nl; | |
302 | $out .= " $key => $val,$nl"; | |
303 | } | |
304 | $out =~ s/,$/ / unless $nl; | |
305 | $out .= "}"; | |
306 | } | |
307 | elsif ($type eq "CODE") { | |
308 | $out = 'sub { "???" }'; | |
309 | } | |
310 | else { | |
311 | warn "Can't handle $type data"; | |
312 | $out = "'#$type#'"; | |
313 | } | |
314 | ||
315 | if ($class && $ref) { | |
316 | $out = "bless($out, " . quote($class) . ")"; | |
317 | } | |
318 | return $out; | |
319 | } | |
320 | ||
321 | sub fullname | |
322 | { | |
323 | my($name, $idx, $ref) = @_; | |
324 | substr($name, 0, 0) = "\$"; | |
325 | ||
326 | my @i = @$idx; # need copy in order to not modify @$idx | |
327 | if ($ref && @i && $i[0] eq "\$") { | |
328 | shift(@i); # remove one deref | |
329 | $ref = 0; | |
330 | } | |
331 | while (@i && $i[0] eq "\$") { | |
332 | shift @i; | |
333 | $name = "\$$name"; | |
334 | } | |
335 | ||
336 | my $last_was_index; | |
337 | for my $i (@i) { | |
338 | if ($i eq "*" || $i eq "\$") { | |
339 | $last_was_index = 0; | |
340 | $name = "$i\{$name}"; | |
341 | } elsif ($i =~ s/^\*//) { | |
342 | $name .= $i; | |
343 | $last_was_index++; | |
344 | } else { | |
345 | $name .= "->" unless $last_was_index++; | |
346 | $name .= $i; | |
347 | } | |
348 | } | |
349 | $name = "\\$name" if $ref; | |
350 | $name; | |
351 | } | |
352 | ||
353 | sub format_list | |
354 | { | |
355 | my $paren = shift; | |
356 | my $indent_lim = $paren ? 0 : 1; | |
357 | my $tmp = "@_"; | |
358 | if (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/)) { | |
359 | my @elem = @_; | |
360 | for (@elem) { s/^/ /gm; } # indent | |
361 | return "\n" . join(",\n", @elem, ""); | |
362 | } else { | |
363 | return join(", ", @_) | |
364 | } | |
365 | } | |
366 | ||
367 | my %esc = ( | |
368 | "\a" => "\\a", | |
369 | "\b" => "\\b", | |
370 | "\t" => "\\t", | |
371 | "\n" => "\\n", | |
372 | "\f" => "\\f", | |
373 | "\r" => "\\r", | |
374 | "\e" => "\\e", | |
375 | ); | |
376 | ||
377 | # put a string value in double quotes | |
378 | sub quote { | |
379 | local($_) = $_[0]; | |
380 | if (length($_) > 20) { | |
381 | # Check for repeated string | |
382 | if (/^(.{1,5}?)(\1*)$/s) { | |
383 | my $base = quote($1); | |
384 | my $repeat = length($2)/length($1) + 1; | |
385 | return "($base x $repeat)"; | |
386 | } | |
387 | } | |
388 | # If there are many '"' we might want to use qq() instead | |
389 | s/([\\\"\@\$])/\\$1/g; | |
390 | return qq("$_") unless /[^\040-\176]/; # fast exit | |
391 | ||
392 | my $high = $_[1]; | |
393 | s/([\a\b\t\n\f\r\e])/$esc{$1}/g; | |
394 | ||
395 | # no need for 3 digits in escape for these | |
396 | s/([\0-\037])(?!\d)/'\\'.sprintf('%o',ord($1))/eg; | |
397 | ||
398 | if ($high) { | |
399 | s/([\0-\037\177])/'\\'.sprintf('%03o',ord($1))/eg; | |
400 | if ($high eq "iso8859") { | |
401 | s/[\200-\240]/'\\'.sprintf('%o',ord($1))/eg; | |
402 | } elsif ($high eq "utf8") { | |
403 | # use utf8; | |
404 | # $str =~ s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge; | |
405 | } | |
406 | } else { | |
407 | s/([\0-\037\177-\377])/'\\'.sprintf('%03o',ord($1))/eg; | |
408 | } | |
409 | ||
410 | if (length($_) > 40 && length($_) > (length($_[0]) * 2)) { | |
411 | # too much binary data, better to represent as a hex string? | |
412 | ||
413 | # Base64 is more compact than hex when string is longer than | |
414 | # 17 bytes (not counting any require statement needed). | |
415 | # But on the other hand, hex is much more readable. | |
416 | if (length($_[0]) > 50 && eval { require MIME::Base64 }) { | |
417 | # XXX Perhaps we should just use unpack("u",...) instead. | |
418 | $require{"MIME::Base64"}++; | |
419 | return "MIME::Base64::decode(\"" . | |
420 | MIME::Base64::encode($_[0],"") . | |
421 | "\")"; | |
422 | } | |
423 | return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")"; | |
424 | } | |
425 | ||
426 | return qq("$_"); | |
427 | } | |
428 | ||
429 | 1; | |
430 | ||
431 | __END__ | |
432 | ||
433 | =head1 NAME | |
434 | ||
435 | Data::Dump - Pretty printing of data structures | |
436 | ||
437 | =head1 SYNOPSIS | |
438 | ||
439 | use Data::Dump qw(dump); | |
440 | ||
441 | $str = dump(@list) | |
442 | @copy_of_list = eval $str; | |
443 | ||
444 | =head1 DESCRIPTION | |
445 | ||
446 | This module provides a single function called dump() that takes a list | |
447 | of values as its argument and produces a string as its result. The string | |
448 | contains Perl code that, when C<eval>ed, produces a deep copy of the | |
449 | original arguments. The string is formatted for easy reading. | |
450 | ||
451 | If dump() is called in a void context, then the dump is printed on | |
452 | STDERR instead of being returned. | |
453 | ||
454 | If you don't like importing a function that overrides Perl's | |
455 | not-so-useful builtin, then you can also import the same function as | |
456 | pp(), mnemonic for "pretty-print". | |
457 | ||
458 | =head1 HISTORY | |
459 | ||
460 | The C<Data::Dump> module grew out of frustration with Sarathy's | |
461 | in-most-cases-excellent C<Data::Dumper>. Basic ideas and some code are shared | |
462 | with Sarathy's module. | |
463 | ||
464 | The C<Data::Dump> module provides a much simpler interface than | |
465 | C<Data::Dumper>. No OO interface is available and there are no | |
466 | configuration options to worry about (yet :-). The other benefit is | |
467 | that the dump produced does not try to set any variables. It only | |
468 | returns what is needed to produce a copy of the arguments. This means | |
469 | that C<dump("foo")> simply returns C<"foo">, and C<dump(1..5)> simply | |
470 | returns C<(1, 2, 3, 4, 5)>. | |
471 | ||
472 | =head1 SEE ALSO | |
473 | ||
474 | L<Data::Dumper>, L<Storable> | |
475 | ||
476 | =head1 AUTHORS | |
477 | ||
478 | The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based | |
479 | on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>. | |
480 | ||
481 | Copyright 1998-2000,2003 Gisle Aas. | |
482 | Copyright 1996-1998 Gurusamy Sarathy. | |
483 | ||
484 | This library is free software; you can redistribute it and/or | |
485 | modify it under the same terms as Perl itself. | |
486 | ||
487 | =cut |