| 1 | package B::Concise; |
| 2 | # Copyright (C) 2000, 2001 Stephen McCamant. All rights reserved. |
| 3 | # This program is free software; you can redistribute and/or modify it |
| 4 | # under the same terms as Perl itself. |
| 5 | |
| 6 | use strict; |
| 7 | use warnings; |
| 8 | |
| 9 | use Exporter (); |
| 10 | |
| 11 | our $VERSION = "0.52"; |
| 12 | our @ISA = qw(Exporter); |
| 13 | our @EXPORT_OK = qw(set_style add_callback); |
| 14 | |
| 15 | use B qw(class ppname main_start main_root main_cv cstring svref_2object |
| 16 | SVf_IOK SVf_NOK SVf_POK OPf_KIDS); |
| 17 | |
| 18 | my %style = |
| 19 | ("terse" => |
| 20 | ["(?(#label =>\n)?)(*( )*)#class (#addr) #name (?([#targ])?) " |
| 21 | . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n", |
| 22 | "(*( )*)goto #class (#addr)\n", |
| 23 | "#class pp_#name"], |
| 24 | "concise" => |
| 25 | ["#hyphseq2 (*( (x( ;)x))*)<#classsym> " |
| 26 | . "#exname#arg(?([#targarglife])?)~#flags(?(/#private)?)(x(;~->#next)x)\n", |
| 27 | " (*( )*) goto #seq\n", |
| 28 | "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"], |
| 29 | "linenoise" => |
| 30 | ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)", |
| 31 | "gt_#seq ", |
| 32 | "(?(#seq)?)#noise#arg(?([#targarg])?)"], |
| 33 | "debug" => |
| 34 | ["#class (#addr)\n\top_next\t\t#nextaddr\n\top_sibling\t#sibaddr\n\t" |
| 35 | . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n\top_seq\t\t" |
| 36 | . "#seqnum\n\top_flags\t#flagval\n\top_private\t#privval\n" |
| 37 | . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)" |
| 38 | . "(?(\top_sv\t\t#svaddr\n)?)", |
| 39 | " GOTO #addr\n", |
| 40 | "#addr"], |
| 41 | "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT}, |
| 42 | $ENV{B_CONCISE_TREE_FORMAT}], |
| 43 | ); |
| 44 | |
| 45 | my($format, $gotofmt, $treefmt); |
| 46 | my $curcv; |
| 47 | my($seq_base, $cop_seq_base); |
| 48 | my @callbacks; |
| 49 | |
| 50 | sub set_style { |
| 51 | ($format, $gotofmt, $treefmt) = @_; |
| 52 | } |
| 53 | |
| 54 | sub add_callback { |
| 55 | push @callbacks, @_; |
| 56 | } |
| 57 | |
| 58 | sub concise_cv { |
| 59 | my ($order, $cvref) = @_; |
| 60 | my $cv = svref_2object($cvref); |
| 61 | $curcv = $cv; |
| 62 | if ($order eq "exec") { |
| 63 | walk_exec($cv->START); |
| 64 | } elsif ($order eq "basic") { |
| 65 | walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0); |
| 66 | } else { |
| 67 | print tree($cv->ROOT, 0) |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | my $start_sym = "\e(0"; # "\cN" sometimes also works |
| 72 | my $end_sym = "\e(B"; # "\cO" respectively |
| 73 | |
| 74 | my @tree_decorations = |
| 75 | ([" ", "--", "+-", "|-", "| ", "`-", "-", 1], |
| 76 | [" ", "-", "+", "+", "|", "`", "", 0], |
| 77 | [" ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1], |
| 78 | [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0], |
| 79 | ); |
| 80 | my $tree_style = 0; |
| 81 | |
| 82 | my $base = 36; |
| 83 | my $big_endian = 1; |
| 84 | |
| 85 | my $order = "basic"; |
| 86 | |
| 87 | set_style(@{$style{concise}}); |
| 88 | |
| 89 | sub compile { |
| 90 | my @options = grep(/^-/, @_); |
| 91 | my @args = grep(!/^-/, @_); |
| 92 | my $do_main = 0; |
| 93 | for my $o (@options) { |
| 94 | if ($o eq "-basic") { |
| 95 | $order = "basic"; |
| 96 | } elsif ($o eq "-exec") { |
| 97 | $order = "exec"; |
| 98 | } elsif ($o eq "-tree") { |
| 99 | $order = "tree"; |
| 100 | } elsif ($o eq "-compact") { |
| 101 | $tree_style |= 1; |
| 102 | } elsif ($o eq "-loose") { |
| 103 | $tree_style &= ~1; |
| 104 | } elsif ($o eq "-vt") { |
| 105 | $tree_style |= 2; |
| 106 | } elsif ($o eq "-ascii") { |
| 107 | $tree_style &= ~2; |
| 108 | } elsif ($o eq "-main") { |
| 109 | $do_main = 1; |
| 110 | } elsif ($o =~ /^-base(\d+)$/) { |
| 111 | $base = $1; |
| 112 | } elsif ($o eq "-bigendian") { |
| 113 | $big_endian = 1; |
| 114 | } elsif ($o eq "-littleendian") { |
| 115 | $big_endian = 0; |
| 116 | } elsif (exists $style{substr($o, 1)}) { |
| 117 | set_style(@{$style{substr($o, 1)}}); |
| 118 | } else { |
| 119 | warn "Option $o unrecognized"; |
| 120 | } |
| 121 | } |
| 122 | if (@args) { |
| 123 | return sub { |
| 124 | for my $objname (@args) { |
| 125 | $objname = "main::" . $objname unless $objname =~ /::/; |
| 126 | eval "concise_cv(\$order, \\&$objname)"; |
| 127 | die "concise_cv($order, \\&$objname) failed: $@" if $@; |
| 128 | } |
| 129 | } |
| 130 | } |
| 131 | if (!@args or $do_main) { |
| 132 | if ($order eq "exec") { |
| 133 | return sub { return if class(main_start) eq "NULL"; |
| 134 | $curcv = main_cv; |
| 135 | walk_exec(main_start) } |
| 136 | } elsif ($order eq "tree") { |
| 137 | return sub { return if class(main_root) eq "NULL"; |
| 138 | $curcv = main_cv; |
| 139 | print tree(main_root, 0) } |
| 140 | } elsif ($order eq "basic") { |
| 141 | return sub { return if class(main_root) eq "NULL"; |
| 142 | $curcv = main_cv; |
| 143 | walk_topdown(main_root, |
| 144 | sub { $_[0]->concise($_[1]) }, 0); } |
| 145 | } |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | my %labels; |
| 150 | my $lastnext; |
| 151 | |
| 152 | my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|", |
| 153 | 'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*", |
| 154 | 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#"); |
| 155 | |
| 156 | no warnings 'qw'; # "Possible attempt to put comments..." |
| 157 | my @linenoise = |
| 158 | qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl |
| 159 | ` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I |
| 160 | -1 -I 1+ I+ 1- I- ** * i* / i/ %$ i% x + i+ - i- . " << >> < i< |
| 161 | > i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i |
| 162 | ! ~ a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy |
| 163 | uf lf uc lc qm @ [f [ @[ eh vl ky dl ex % ${ @{ uk pk st jn ) )[ a@ |
| 164 | a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s} |
| 165 | v} ca wa di rs ;; ; ;d }{ { } {} f{ it {l l} rt }l }n }r dm }g }e ^o |
| 166 | ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v |
| 167 | ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r |
| 168 | -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd |
| 169 | co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3 |
| 170 | g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e |
| 171 | e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn |
| 172 | Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n>'; |
| 173 | |
| 174 | my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
| 175 | |
| 176 | sub op_flags { |
| 177 | my($x) = @_; |
| 178 | my(@v); |
| 179 | push @v, "v" if ($x & 3) == 1; |
| 180 | push @v, "s" if ($x & 3) == 2; |
| 181 | push @v, "l" if ($x & 3) == 3; |
| 182 | push @v, "K" if $x & 4; |
| 183 | push @v, "P" if $x & 8; |
| 184 | push @v, "R" if $x & 16; |
| 185 | push @v, "M" if $x & 32; |
| 186 | push @v, "S" if $x & 64; |
| 187 | push @v, "*" if $x & 128; |
| 188 | return join("", @v); |
| 189 | } |
| 190 | |
| 191 | sub base_n { |
| 192 | my $x = shift; |
| 193 | return "-" . base_n(-$x) if $x < 0; |
| 194 | my $str = ""; |
| 195 | do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base); |
| 196 | $str = reverse $str if $big_endian; |
| 197 | return $str; |
| 198 | } |
| 199 | |
| 200 | sub seq { return $_[0]->seq ? base_n($_[0]->seq - $seq_base) : "-" } |
| 201 | |
| 202 | sub walk_topdown { |
| 203 | my($op, $sub, $level) = @_; |
| 204 | $sub->($op, $level); |
| 205 | if ($op->flags & OPf_KIDS) { |
| 206 | for (my $kid = $op->first; $$kid; $kid = $kid->sibling) { |
| 207 | walk_topdown($kid, $sub, $level + 1); |
| 208 | } |
| 209 | } |
| 210 | if (class($op) eq "PMOP" and $ {$op->pmreplroot} |
| 211 | and $op->pmreplroot->isa("B::OP")) { |
| 212 | walk_topdown($op->pmreplroot, $sub, $level + 1); |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | sub walklines { |
| 217 | my($ar, $level) = @_; |
| 218 | for my $l (@$ar) { |
| 219 | if (ref($l) eq "ARRAY") { |
| 220 | walklines($l, $level + 1); |
| 221 | } else { |
| 222 | $l->concise($level); |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | sub walk_exec { |
| 228 | my($top, $level) = @_; |
| 229 | my %opsseen; |
| 230 | my @lines; |
| 231 | my @todo = ([$top, \@lines]); |
| 232 | while (@todo and my($op, $targ) = @{shift @todo}) { |
| 233 | for (; $$op; $op = $op->next) { |
| 234 | last if $opsseen{$$op}++; |
| 235 | push @$targ, $op; |
| 236 | my $name = $op->name; |
| 237 | if ($name |
| 238 | =~ /^(or|and|(map|grep)while|entertry|range|cond_expr)$/) { |
| 239 | my $ar = []; |
| 240 | push @$targ, $ar; |
| 241 | push @todo, [$op->other, $ar]; |
| 242 | } elsif ($name eq "subst" and $ {$op->pmreplstart}) { |
| 243 | my $ar = []; |
| 244 | push @$targ, $ar; |
| 245 | push @todo, [$op->pmreplstart, $ar]; |
| 246 | } elsif ($name =~ /^enter(loop|iter)$/) { |
| 247 | $labels{$op->nextop->seq} = "NEXT"; |
| 248 | $labels{$op->lastop->seq} = "LAST"; |
| 249 | $labels{$op->redoop->seq} = "REDO"; |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | walklines(\@lines, 0); |
| 254 | } |
| 255 | |
| 256 | sub fmt_line { |
| 257 | my($hr, $fmt, $level) = @_; |
| 258 | my $text = $fmt; |
| 259 | $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/ |
| 260 | $hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg; |
| 261 | $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs; |
| 262 | $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs; |
| 263 | $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs; |
| 264 | $text =~ s/#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg; |
| 265 | $text =~ s/#([a-zA-Z]+)/$hr->{$1}/eg; |
| 266 | $text =~ s/[ \t]*~+[ \t]*/ /g; |
| 267 | return $text; |
| 268 | } |
| 269 | |
| 270 | my %priv; |
| 271 | $priv{$_}{128} = "LVINTRO" |
| 272 | for ("pos", "substr", "vec", "threadsv", "gvsv", "rv2sv", "rv2hv", "rv2gv", |
| 273 | "rv2av", "rv2arylen", "aelem", "helem", "aslice", "hslice", "padsv", |
| 274 | "padav", "padhv"); |
| 275 | $priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite"); |
| 276 | $priv{"aassign"}{64} = "COMMON"; |
| 277 | $priv{"aassign"}{32} = "PHASH"; |
| 278 | $priv{"sassign"}{64} = "BKWARD"; |
| 279 | $priv{$_}{64} = "RTIME" for ("match", "subst", "substcont"); |
| 280 | @{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL", |
| 281 | "COMPL", "GROWS"); |
| 282 | $priv{"repeat"}{64} = "DOLIST"; |
| 283 | $priv{"leaveloop"}{64} = "CONT"; |
| 284 | @{$priv{$_}}{32,64,96} = ("DREFAV", "DREFHV", "DREFSV") |
| 285 | for ("entersub", map("rv2${_}v", "a", "s", "h", "g"), "aelem", "helem"); |
| 286 | $priv{"entersub"}{16} = "DBG"; |
| 287 | $priv{"entersub"}{32} = "TARG"; |
| 288 | @{$priv{$_}}{4,8,128} = ("INARGS","AMPER","NO()") for ("entersub", "rv2cv"); |
| 289 | $priv{"gv"}{32} = "EARLYCV"; |
| 290 | $priv{"aelem"}{16} = $priv{"helem"}{16} = "LVDEFER"; |
| 291 | $priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv"); |
| 292 | $priv{$_}{16} = "TARGMY" |
| 293 | for (map(($_,"s$_"),"chop", "chomp"), |
| 294 | map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo", |
| 295 | "add", "subtract", "negate"), "pow", "concat", "stringify", |
| 296 | "left_shift", "right_shift", "bit_and", "bit_xor", "bit_or", |
| 297 | "complement", "atan2", "sin", "cos", "rand", "exp", "log", "sqrt", |
| 298 | "int", "hex", "oct", "abs", "length", "index", "rindex", "sprintf", |
| 299 | "ord", "chr", "crypt", "quotemeta", "join", "push", "unshift", "flock", |
| 300 | "chdir", "chown", "chroot", "unlink", "chmod", "utime", "rename", |
| 301 | "link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system", |
| 302 | "exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority", |
| 303 | "setpriority", "time", "sleep"); |
| 304 | @{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", '$[', "BARE", "WARN"); |
| 305 | $priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM"; |
| 306 | $priv{"list"}{64} = "GUESSED"; |
| 307 | $priv{"delete"}{64} = "SLICE"; |
| 308 | $priv{"exists"}{64} = "SUB"; |
| 309 | $priv{$_}{64} = "LOCALE" |
| 310 | for ("sort", "prtf", "sprintf", "slt", "sle", "seq", "sne", "sgt", "sge", |
| 311 | "scmp", "lc", "uc", "lcfirst", "ucfirst"); |
| 312 | @{$priv{"sort"}}{1,2,4} = ("NUM", "INT", "REV"); |
| 313 | $priv{"threadsv"}{64} = "SVREFd"; |
| 314 | $priv{$_}{16} = "INBIN" for ("open", "backtick"); |
| 315 | $priv{$_}{32} = "INCR" for ("open", "backtick"); |
| 316 | $priv{$_}{64} = "OUTBIN" for ("open", "backtick"); |
| 317 | $priv{$_}{128} = "OUTCR" for ("open", "backtick"); |
| 318 | $priv{"exit"}{128} = "VMS"; |
| 319 | |
| 320 | sub private_flags { |
| 321 | my($name, $x) = @_; |
| 322 | my @s; |
| 323 | for my $flag (128, 96, 64, 32, 16, 8, 4, 2, 1) { |
| 324 | if ($priv{$name}{$flag} and $x & $flag and $x >= $flag) { |
| 325 | $x -= $flag; |
| 326 | push @s, $priv{$name}{$flag}; |
| 327 | } |
| 328 | } |
| 329 | push @s, $x if $x; |
| 330 | return join(",", @s); |
| 331 | } |
| 332 | |
| 333 | sub concise_op { |
| 334 | my ($op, $level, $format) = @_; |
| 335 | my %h; |
| 336 | $h{exname} = $h{name} = $op->name; |
| 337 | $h{NAME} = uc $h{name}; |
| 338 | $h{class} = class($op); |
| 339 | $h{extarg} = $h{targ} = $op->targ; |
| 340 | $h{extarg} = "" unless $h{extarg}; |
| 341 | if ($h{name} eq "null" and $h{targ}) { |
| 342 | $h{exname} = "ex-" . substr(ppname($h{targ}), 3); |
| 343 | $h{extarg} = ""; |
| 344 | } elsif ($h{targ}) { |
| 345 | my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}]; |
| 346 | if (defined $padname and class($padname) ne "SPECIAL") { |
| 347 | $h{targarg} = $padname->PVX; |
| 348 | my $intro = $padname->NVX - $cop_seq_base; |
| 349 | my $finish = int($padname->IVX) - $cop_seq_base; |
| 350 | $finish = "end" if $finish == 999999999 - $cop_seq_base; |
| 351 | $h{targarglife} = "$h{targarg}:$intro,$finish"; |
| 352 | } else { |
| 353 | $h{targarglife} = $h{targarg} = "t" . $h{targ}; |
| 354 | } |
| 355 | } |
| 356 | $h{arg} = ""; |
| 357 | $h{svclass} = $h{svaddr} = $h{svval} = ""; |
| 358 | if ($h{class} eq "PMOP") { |
| 359 | my $precomp = $op->precomp; |
| 360 | if (defined $precomp) { |
| 361 | # Escape literal control sequences |
| 362 | for ($precomp) { |
| 363 | s/\t/\\t/g; s/\n/\\n/g; s/\r/\\r/g; |
| 364 | # How can we do the below portably? |
| 365 | #s/([\0-\037\177-\377])/"\\".sprintf("%03o", ord($1))/eg; |
| 366 | } |
| 367 | $precomp = "/$precomp/"; |
| 368 | } |
| 369 | else { $precomp = ""; } |
| 370 | my $pmreplroot = $op->pmreplroot; |
| 371 | my $pmreplstart; |
| 372 | if ($$pmreplroot && $pmreplroot->isa("B::GV")) { |
| 373 | # with C<@stash_array = split(/pat/, str);>, |
| 374 | # *stash_array is stored in pmreplroot. |
| 375 | $h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")"; |
| 376 | } elsif ($ {$op->pmreplstart}) { |
| 377 | undef $lastnext; |
| 378 | $pmreplstart = "replstart->" . seq($op->pmreplstart); |
| 379 | $h{arg} = "(" . join(" ", $precomp, $pmreplstart) . ")"; |
| 380 | } else { |
| 381 | $h{arg} = "($precomp)"; |
| 382 | } |
| 383 | } elsif ($h{class} eq "PVOP" and $h{name} ne "trans") { |
| 384 | $h{arg} = '("' . $op->pv . '")'; |
| 385 | $h{svval} = '"' . $op->pv . '"'; |
| 386 | } elsif ($h{class} eq "COP") { |
| 387 | my $label = $op->label; |
| 388 | $h{coplabel} = $label; |
| 389 | $label = $label ? "$label: " : ""; |
| 390 | my $loc = $op->file; |
| 391 | $loc =~ s[.*/][]; |
| 392 | $loc .= ":" . $op->line; |
| 393 | my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base); |
| 394 | my $arybase = $op->arybase; |
| 395 | $arybase = $arybase ? ' $[=' . $arybase : ""; |
| 396 | $h{arg} = "($label$stash $cseq $loc$arybase)"; |
| 397 | } elsif ($h{class} eq "LOOP") { |
| 398 | $h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop) |
| 399 | . " redo->" . seq($op->redoop) . ")"; |
| 400 | } elsif ($h{class} eq "LOGOP") { |
| 401 | undef $lastnext; |
| 402 | $h{arg} = "(other->" . seq($op->other) . ")"; |
| 403 | } elsif ($h{class} eq "SVOP") { |
| 404 | my $sv = $op->sv; |
| 405 | $h{svclass} = class($sv); |
| 406 | $h{svaddr} = sprintf("%#x", $$sv); |
| 407 | if ($h{svclass} eq "GV") { |
| 408 | my $gv = $sv; |
| 409 | my $stash = $gv->STASH->NAME; |
| 410 | if ($stash eq "main") { |
| 411 | $stash = ""; |
| 412 | } else { |
| 413 | $stash = $stash . "::"; |
| 414 | } |
| 415 | $h{arg} = "(*$stash" . $gv->SAFENAME . ")"; |
| 416 | $h{svval} = "*$stash" . $gv->SAFENAME; |
| 417 | } else { |
| 418 | while (class($sv) eq "RV") { |
| 419 | $h{svval} .= "\\"; |
| 420 | $sv = $sv->RV; |
| 421 | } |
| 422 | if (class($sv) eq "SPECIAL") { |
| 423 | $h{svval} = ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv]; |
| 424 | } elsif ($sv->FLAGS & SVf_NOK) { |
| 425 | $h{svval} = $sv->NV; |
| 426 | } elsif ($sv->FLAGS & SVf_IOK) { |
| 427 | $h{svval} = $sv->IV; |
| 428 | } elsif ($sv->FLAGS & SVf_POK) { |
| 429 | $h{svval} = cstring($sv->PV); |
| 430 | } |
| 431 | $h{arg} = "($h{svclass} $h{svval})"; |
| 432 | } |
| 433 | } |
| 434 | $h{seq} = $h{hyphseq} = seq($op); |
| 435 | $h{seq} = "" if $h{seq} eq "-"; |
| 436 | $h{seqnum} = $op->seq; |
| 437 | $h{next} = $op->next; |
| 438 | $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next}); |
| 439 | $h{nextaddr} = sprintf("%#x", $ {$op->next}); |
| 440 | $h{sibaddr} = sprintf("%#x", $ {$op->sibling}); |
| 441 | $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first"); |
| 442 | $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last"); |
| 443 | |
| 444 | $h{classsym} = $opclass{$h{class}}; |
| 445 | $h{flagval} = $op->flags; |
| 446 | $h{flags} = op_flags($op->flags); |
| 447 | $h{privval} = $op->private; |
| 448 | $h{private} = private_flags($h{name}, $op->private); |
| 449 | $h{addr} = sprintf("%#x", $$op); |
| 450 | $h{label} = $labels{$op->seq}; |
| 451 | $h{typenum} = $op->type; |
| 452 | $h{noise} = $linenoise[$op->type]; |
| 453 | $_->(\%h, $op, \$format, \$level) for @callbacks; |
| 454 | return fmt_line(\%h, $format, $level); |
| 455 | } |
| 456 | |
| 457 | sub B::OP::concise { |
| 458 | my($op, $level) = @_; |
| 459 | if ($order eq "exec" and $lastnext and $$lastnext != $$op) { |
| 460 | my $h = {"seq" => seq($lastnext), "class" => class($lastnext), |
| 461 | "addr" => sprintf("%#x", $$lastnext)}; |
| 462 | print fmt_line($h, $gotofmt, $level+1); |
| 463 | } |
| 464 | $lastnext = $op->next; |
| 465 | print concise_op($op, $level, $format); |
| 466 | } |
| 467 | |
| 468 | sub tree { |
| 469 | my $op = shift; |
| 470 | my $level = shift; |
| 471 | my $style = $tree_decorations[$tree_style]; |
| 472 | my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style; |
| 473 | my $name = concise_op($op, $level, $treefmt); |
| 474 | if (not $op->flags & OPf_KIDS) { |
| 475 | return $name . "\n"; |
| 476 | } |
| 477 | my @lines; |
| 478 | for (my $kid = $op->first; $$kid; $kid = $kid->sibling) { |
| 479 | push @lines, tree($kid, $level+1); |
| 480 | } |
| 481 | my $i; |
| 482 | for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) { |
| 483 | $lines[$i] = $space . $lines[$i]; |
| 484 | } |
| 485 | if ($i > 0) { |
| 486 | $lines[$i] = $last . $lines[$i]; |
| 487 | while ($i-- > 1) { |
| 488 | if (substr($lines[$i], 0, 1) eq " ") { |
| 489 | $lines[$i] = $nokid . $lines[$i]; |
| 490 | } else { |
| 491 | $lines[$i] = $kid . $lines[$i]; |
| 492 | } |
| 493 | } |
| 494 | $lines[$i] = $kids . $lines[$i]; |
| 495 | } else { |
| 496 | $lines[0] = $single . $lines[0]; |
| 497 | } |
| 498 | return("$name$lead" . shift @lines, |
| 499 | map(" " x (length($name)+$size) . $_, @lines)); |
| 500 | } |
| 501 | |
| 502 | # *** Warning: fragile kludge ahead *** |
| 503 | # Because the B::* modules run in the same interpreter as the code |
| 504 | # they're compiling, their presence tends to distort the view we have |
| 505 | # of the code we're looking at. In particular, perl gives sequence |
| 506 | # numbers to both OPs in general and COPs in particular. If the |
| 507 | # program we're looking at were run on its own, these numbers would |
| 508 | # start at 1. Because all of B::Concise and all the modules it uses |
| 509 | # are compiled first, though, by the time we get to the user's program |
| 510 | # the sequence numbers are alreay at pretty high numbers, which would |
| 511 | # be distracting if you're trying to tell OPs apart. Therefore we'd |
| 512 | # like to subtract an offset from all the sequence numbers we display, |
| 513 | # to restore the simpler view of the world. The trick is to know what |
| 514 | # that offset will be, when we're still compiling B::Concise! If we |
| 515 | # hardcoded a value, it would have to change every time B::Concise or |
| 516 | # other modules we use do. To help a little, what we do here is |
| 517 | # compile a little code at the end of the module, and compute the base |
| 518 | # sequence number for the user's program as being a small offset |
| 519 | # later, so all we have to worry about are changes in the offset. |
| 520 | |
| 521 | # When you say "perl -MO=Concise -e '$a'", the output should look like: |
| 522 | |
| 523 | # 4 <@> leave[t1] vKP/REFC ->(end) |
| 524 | # 1 <0> enter ->2 |
| 525 | #^ smallest OP sequence number should be 1 |
| 526 | # 2 <;> nextstate(main 1 -e:1) v ->3 |
| 527 | # ^ smallest COP sequence number should be 1 |
| 528 | # - <1> ex-rv2sv vK/1 ->4 |
| 529 | # 3 <$> gvsv(*a) s ->4 |
| 530 | |
| 531 | # If either of the marked numbers there aren't 1, it means you need to |
| 532 | # update the corresponding magic number in the next two lines. |
| 533 | # Remember, these need to stay the last things in the module. |
| 534 | |
| 535 | # Why these are different for MacOS? Does it matter? |
| 536 | my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11; |
| 537 | my $seq_mnum = $^O eq 'MacOS' ? 100 : 84; |
| 538 | $cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum; |
| 539 | $seq_base = svref_2object(eval 'sub{}')->START->seq + $seq_mnum; |
| 540 | |
| 541 | 1; |
| 542 | |
| 543 | __END__ |
| 544 | |
| 545 | =head1 NAME |
| 546 | |
| 547 | B::Concise - Walk Perl syntax tree, printing concise info about ops |
| 548 | |
| 549 | =head1 SYNOPSIS |
| 550 | |
| 551 | perl -MO=Concise[,OPTIONS] foo.pl |
| 552 | |
| 553 | use B::Concise qw(set_style add_callback); |
| 554 | |
| 555 | =head1 DESCRIPTION |
| 556 | |
| 557 | This compiler backend prints the internal OPs of a Perl program's syntax |
| 558 | tree in one of several space-efficient text formats suitable for debugging |
| 559 | the inner workings of perl or other compiler backends. It can print OPs in |
| 560 | the order they appear in the OP tree, in the order they will execute, or |
| 561 | in a text approximation to their tree structure, and the format of the |
| 562 | information displyed is customizable. Its function is similar to that of |
| 563 | perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more |
| 564 | sophisticated and flexible. |
| 565 | |
| 566 | =head1 EXAMPLE |
| 567 | |
| 568 | Here's is a short example of output, using the default formatting |
| 569 | conventions : |
| 570 | |
| 571 | % perl -MO=Concise -e '$a = $b + 42' |
| 572 | 8 <@> leave[t1] vKP/REFC ->(end) |
| 573 | 1 <0> enter ->2 |
| 574 | 2 <;> nextstate(main 1 -e:1) v ->3 |
| 575 | 7 <2> sassign vKS/2 ->8 |
| 576 | 5 <2> add[t1] sK/2 ->6 |
| 577 | - <1> ex-rv2sv sK/1 ->4 |
| 578 | 3 <$> gvsv(*b) s ->4 |
| 579 | 4 <$> const(IV 42) s ->5 |
| 580 | - <1> ex-rv2sv sKRM*/1 ->7 |
| 581 | 6 <$> gvsv(*a) s ->7 |
| 582 | |
| 583 | Each line corresponds to an operator. Null ops appear as C<ex-opname>, |
| 584 | where I<opname> is the op that has been optimized away by perl. |
| 585 | |
| 586 | The number on the first row indicates the op's sequence number. It's |
| 587 | given in base 36 by default. |
| 588 | |
| 589 | The symbol between angle brackets indicates the op's type : for example, |
| 590 | <2> is a BINOP, <@> a LISTOP, etc. (see L</"OP class abbreviations">). |
| 591 | |
| 592 | The opname may be followed by op-specific information in parentheses |
| 593 | (e.g. C<gvsv(*b)>), and by targ information in brackets (e.g. |
| 594 | C<leave[t1]>). |
| 595 | |
| 596 | Next come the op flags. The common flags are listed below |
| 597 | (L</"OP flags abbreviations">). The private flags follow, separated |
| 598 | by a slash. For example, C<vKP/REFC> means that the leave op has |
| 599 | public flags OPf_WANT_VOID, OPf_KIDS, and OPf_PARENS, and the private |
| 600 | flag OPpREFCOUNTED. |
| 601 | |
| 602 | Finally an arrow points to the sequence number of the next op. |
| 603 | |
| 604 | =head1 OPTIONS |
| 605 | |
| 606 | Arguments that don't start with a hyphen are taken to be the names of |
| 607 | subroutines to print the OPs of; if no such functions are specified, the |
| 608 | main body of the program (outside any subroutines, and not including use'd |
| 609 | or require'd files) is printed. |
| 610 | |
| 611 | =over 4 |
| 612 | |
| 613 | =item B<-basic> |
| 614 | |
| 615 | Print OPs in the order they appear in the OP tree (a preorder |
| 616 | traversal, starting at the root). The indentation of each OP shows its |
| 617 | level in the tree. This mode is the default, so the flag is included |
| 618 | simply for completeness. |
| 619 | |
| 620 | =item B<-exec> |
| 621 | |
| 622 | Print OPs in the order they would normally execute (for the majority |
| 623 | of constructs this is a postorder traversal of the tree, ending at the |
| 624 | root). In most cases the OP that usually follows a given OP will |
| 625 | appear directly below it; alternate paths are shown by indentation. In |
| 626 | cases like loops when control jumps out of a linear path, a 'goto' |
| 627 | line is generated. |
| 628 | |
| 629 | =item B<-tree> |
| 630 | |
| 631 | Print OPs in a text approximation of a tree, with the root of the tree |
| 632 | at the left and 'left-to-right' order of children transformed into |
| 633 | 'top-to-bottom'. Because this mode grows both to the right and down, |
| 634 | it isn't suitable for large programs (unless you have a very wide |
| 635 | terminal). |
| 636 | |
| 637 | =item B<-compact> |
| 638 | |
| 639 | Use a tree format in which the minimum amount of space is used for the |
| 640 | lines connecting nodes (one character in most cases). This squeezes out |
| 641 | a few precious columns of screen real estate. |
| 642 | |
| 643 | =item B<-loose> |
| 644 | |
| 645 | Use a tree format that uses longer edges to separate OP nodes. This format |
| 646 | tends to look better than the compact one, especially in ASCII, and is |
| 647 | the default. |
| 648 | |
| 649 | =item B<-vt> |
| 650 | |
| 651 | Use tree connecting characters drawn from the VT100 line-drawing set. |
| 652 | This looks better if your terminal supports it. |
| 653 | |
| 654 | =item B<-ascii> |
| 655 | |
| 656 | Draw the tree with standard ASCII characters like C<+> and C<|>. These don't |
| 657 | look as clean as the VT100 characters, but they'll work with almost any |
| 658 | terminal (or the horizontal scrolling mode of less(1)) and are suitable |
| 659 | for text documentation or email. This is the default. |
| 660 | |
| 661 | =item B<-main> |
| 662 | |
| 663 | Include the main program in the output, even if subroutines were also |
| 664 | specified. |
| 665 | |
| 666 | =item B<-base>I<n> |
| 667 | |
| 668 | Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the |
| 669 | digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit |
| 670 | for 37 will be 'A', and so on until 62. Values greater than 62 are not |
| 671 | currently supported. The default is 36. |
| 672 | |
| 673 | =item B<-bigendian> |
| 674 | |
| 675 | Print sequence numbers with the most significant digit first. This is the |
| 676 | usual convention for Arabic numerals, and the default. |
| 677 | |
| 678 | =item B<-littleendian> |
| 679 | |
| 680 | Print seqence numbers with the least significant digit first. |
| 681 | |
| 682 | =item B<-concise> |
| 683 | |
| 684 | Use the author's favorite set of formatting conventions. This is the |
| 685 | default, of course. |
| 686 | |
| 687 | =item B<-terse> |
| 688 | |
| 689 | Use formatting conventions that emulate the ouput of B<B::Terse>. The |
| 690 | basic mode is almost indistinguishable from the real B<B::Terse>, and the |
| 691 | exec mode looks very similar, but is in a more logical order and lacks |
| 692 | curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode |
| 693 | is only vaguely reminiscient of B<B::Terse>. |
| 694 | |
| 695 | =item B<-linenoise> |
| 696 | |
| 697 | Use formatting conventions in which the name of each OP, rather than being |
| 698 | written out in full, is represented by a one- or two-character abbreviation. |
| 699 | This is mainly a joke. |
| 700 | |
| 701 | =item B<-debug> |
| 702 | |
| 703 | Use formatting conventions reminiscient of B<B::Debug>; these aren't |
| 704 | very concise at all. |
| 705 | |
| 706 | =item B<-env> |
| 707 | |
| 708 | Use formatting conventions read from the environment variables |
| 709 | C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>. |
| 710 | |
| 711 | =back |
| 712 | |
| 713 | =head1 FORMATTING SPECIFICATIONS |
| 714 | |
| 715 | For each general style ('concise', 'terse', 'linenoise', etc.) there are |
| 716 | three specifications: one of how OPs should appear in the basic or exec |
| 717 | modes, one of how 'goto' lines should appear (these occur in the exec |
| 718 | mode only), and one of how nodes should appear in tree mode. Each has the |
| 719 | same format, described below. Any text that doesn't match a special |
| 720 | pattern is copied verbatim. |
| 721 | |
| 722 | =over 4 |
| 723 | |
| 724 | =item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)> |
| 725 | |
| 726 | Generates I<exec_text> in exec mode, or I<basic_text> in basic mode. |
| 727 | |
| 728 | =item B<(*(>I<text>B<)*)> |
| 729 | |
| 730 | Generates one copy of I<text> for each indentation level. |
| 731 | |
| 732 | =item B<(*(>I<text1>B<;>I<text2>B<)*)> |
| 733 | |
| 734 | Generates one fewer copies of I<text1> than the indentation level, followed |
| 735 | by one copy of I<text2> if the indentation level is more than 0. |
| 736 | |
| 737 | =item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)> |
| 738 | |
| 739 | If the value of I<var> is true (not empty or zero), generates the |
| 740 | value of I<var> surrounded by I<text1> and I<Text2>, otherwise |
| 741 | nothing. |
| 742 | |
| 743 | =item B<#>I<var> |
| 744 | |
| 745 | Generates the value of the variable I<var>. |
| 746 | |
| 747 | =item B<#>I<var>I<N> |
| 748 | |
| 749 | Generates the value of I<var>, left jutified to fill I<N> spaces. |
| 750 | |
| 751 | =item B<~> |
| 752 | |
| 753 | Any number of tildes and surrounding whitespace will be collapsed to |
| 754 | a single space. |
| 755 | |
| 756 | =back |
| 757 | |
| 758 | The following variables are recognized: |
| 759 | |
| 760 | =over 4 |
| 761 | |
| 762 | =item B<#addr> |
| 763 | |
| 764 | The address of the OP, in hexidecimal. |
| 765 | |
| 766 | =item B<#arg> |
| 767 | |
| 768 | The OP-specific information of the OP (such as the SV for an SVOP, the |
| 769 | non-local exit pointers for a LOOP, etc.) enclosed in paretheses. |
| 770 | |
| 771 | =item B<#class> |
| 772 | |
| 773 | The B-determined class of the OP, in all caps. |
| 774 | |
| 775 | =item B<#classsym> |
| 776 | |
| 777 | A single symbol abbreviating the class of the OP. |
| 778 | |
| 779 | =item B<#coplabel> |
| 780 | |
| 781 | The label of the statement or block the OP is the start of, if any. |
| 782 | |
| 783 | =item B<#exname> |
| 784 | |
| 785 | The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo. |
| 786 | |
| 787 | =item B<#extarg> |
| 788 | |
| 789 | The target of the OP, or nothing for a nulled OP. |
| 790 | |
| 791 | =item B<#firstaddr> |
| 792 | |
| 793 | The address of the OP's first child, in hexidecimal. |
| 794 | |
| 795 | =item B<#flags> |
| 796 | |
| 797 | The OP's flags, abbreviated as a series of symbols. |
| 798 | |
| 799 | =item B<#flagval> |
| 800 | |
| 801 | The numeric value of the OP's flags. |
| 802 | |
| 803 | =item B<#hyphseq> |
| 804 | |
| 805 | The sequence number of the OP, or a hyphen if it doesn't have one. |
| 806 | |
| 807 | =item B<#label> |
| 808 | |
| 809 | 'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec |
| 810 | mode, or empty otherwise. |
| 811 | |
| 812 | =item B<#lastaddr> |
| 813 | |
| 814 | The address of the OP's last child, in hexidecimal. |
| 815 | |
| 816 | =item B<#name> |
| 817 | |
| 818 | The OP's name. |
| 819 | |
| 820 | =item B<#NAME> |
| 821 | |
| 822 | The OP's name, in all caps. |
| 823 | |
| 824 | =item B<#next> |
| 825 | |
| 826 | The sequence number of the OP's next OP. |
| 827 | |
| 828 | =item B<#nextaddr> |
| 829 | |
| 830 | The address of the OP's next OP, in hexidecimal. |
| 831 | |
| 832 | =item B<#noise> |
| 833 | |
| 834 | The two-character abbreviation for the OP's name. |
| 835 | |
| 836 | =item B<#private> |
| 837 | |
| 838 | The OP's private flags, rendered with abbreviated names if possible. |
| 839 | |
| 840 | =item B<#privval> |
| 841 | |
| 842 | The numeric value of the OP's private flags. |
| 843 | |
| 844 | =item B<#seq> |
| 845 | |
| 846 | The sequence number of the OP. |
| 847 | |
| 848 | =item B<#seqnum> |
| 849 | |
| 850 | The real sequence number of the OP, as a regular number and not adjusted |
| 851 | to be relative to the start of the real program. (This will generally be |
| 852 | a fairly large number because all of B<B::Concise> is compiled before |
| 853 | your program is). |
| 854 | |
| 855 | =item B<#sibaddr> |
| 856 | |
| 857 | The address of the OP's next youngest sibling, in hexidecimal. |
| 858 | |
| 859 | =item B<#svaddr> |
| 860 | |
| 861 | The address of the OP's SV, if it has an SV, in hexidecimal. |
| 862 | |
| 863 | =item B<#svclass> |
| 864 | |
| 865 | The class of the OP's SV, if it has one, in all caps (e.g., 'IV'). |
| 866 | |
| 867 | =item B<#svval> |
| 868 | |
| 869 | The value of the OP's SV, if it has one, in a short human-readable format. |
| 870 | |
| 871 | =item B<#targ> |
| 872 | |
| 873 | The numeric value of the OP's targ. |
| 874 | |
| 875 | =item B<#targarg> |
| 876 | |
| 877 | The name of the variable the OP's targ refers to, if any, otherwise the |
| 878 | letter t followed by the OP's targ in decimal. |
| 879 | |
| 880 | =item B<#targarglife> |
| 881 | |
| 882 | Same as B<#targarg>, but followed by the COP sequence numbers that delimit |
| 883 | the variable's lifetime (or 'end' for a variable in an open scope) for a |
| 884 | variable. |
| 885 | |
| 886 | =item B<#typenum> |
| 887 | |
| 888 | The numeric value of the OP's type, in decimal. |
| 889 | |
| 890 | =back |
| 891 | |
| 892 | =head1 ABBREVIATIONS |
| 893 | |
| 894 | =head2 OP flags abbreviations |
| 895 | |
| 896 | v OPf_WANT_VOID Want nothing (void context) |
| 897 | s OPf_WANT_SCALAR Want single value (scalar context) |
| 898 | l OPf_WANT_LIST Want list of any length (list context) |
| 899 | K OPf_KIDS There is a firstborn child. |
| 900 | P OPf_PARENS This operator was parenthesized. |
| 901 | (Or block needs explicit scope entry.) |
| 902 | R OPf_REF Certified reference. |
| 903 | (Return container, not containee). |
| 904 | M OPf_MOD Will modify (lvalue). |
| 905 | S OPf_STACKED Some arg is arriving on the stack. |
| 906 | * OPf_SPECIAL Do something weird for this op (see op.h) |
| 907 | |
| 908 | =head2 OP class abbreviations |
| 909 | |
| 910 | 0 OP (aka BASEOP) An OP with no children |
| 911 | 1 UNOP An OP with one child |
| 912 | 2 BINOP An OP with two children |
| 913 | | LOGOP A control branch OP |
| 914 | @ LISTOP An OP that could have lots of children |
| 915 | / PMOP An OP with a regular expression |
| 916 | $ SVOP An OP with an SV |
| 917 | " PVOP An OP with a string |
| 918 | { LOOP An OP that holds pointers for a loop |
| 919 | ; COP An OP that marks the start of a statement |
| 920 | # PADOP An OP with a GV on the pad |
| 921 | |
| 922 | =head1 Using B::Concise outside of the O framework |
| 923 | |
| 924 | It is possible to extend B<B::Concise> by using it outside of the B<O> |
| 925 | framework and providing new styles and new variables. |
| 926 | |
| 927 | use B::Concise qw(set_style add_callback); |
| 928 | set_style($format, $gotofmt, $treefmt); |
| 929 | add_callback |
| 930 | ( |
| 931 | sub |
| 932 | { |
| 933 | my ($h, $op, $level, $format) = @_; |
| 934 | $h->{variable} = some_func($op); |
| 935 | } |
| 936 | ); |
| 937 | B::Concise::compile(@options)->(); |
| 938 | |
| 939 | You can specify a style by calling the B<set_style> subroutine. If you |
| 940 | have a new variable in your style, or you want to change the value of an |
| 941 | existing variable, you will need to add a callback to specify the value |
| 942 | for that variable. |
| 943 | |
| 944 | This is done by calling B<add_callback> passing references to any |
| 945 | callback subroutines. The subroutines are called in the same order as |
| 946 | they are added. Each subroutine is passed four parameters. These are a |
| 947 | reference to a hash, the keys of which are the names of the variables |
| 948 | and the values of which are their values, the op, the level and the |
| 949 | format. |
| 950 | |
| 951 | To define your own variables, simply add them to the hash, or change |
| 952 | existing values if you need to. The level and format are passed in as |
| 953 | references to scalars, but it is unlikely that they will need to be |
| 954 | changed or even used. |
| 955 | |
| 956 | To see the output, call the subroutine returned by B<compile> in the |
| 957 | same way that B<O> does. |
| 958 | |
| 959 | =head1 AUTHOR |
| 960 | |
| 961 | Stephen McCamant, C<smcc@CSUA.Berkeley.EDU> |
| 962 | |
| 963 | =cut |