| 1 | package B::Xref; |
| 2 | |
| 3 | our $VERSION = '1.01'; |
| 4 | |
| 5 | =head1 NAME |
| 6 | |
| 7 | B::Xref - Generates cross reference reports for Perl programs |
| 8 | |
| 9 | =head1 SYNOPSIS |
| 10 | |
| 11 | perl -MO=Xref[,OPTIONS] foo.pl |
| 12 | |
| 13 | =head1 DESCRIPTION |
| 14 | |
| 15 | The B::Xref module is used to generate a cross reference listing of all |
| 16 | definitions and uses of variables, subroutines and formats in a Perl program. |
| 17 | It is implemented as a backend for the Perl compiler. |
| 18 | |
| 19 | The report generated is in the following format: |
| 20 | |
| 21 | File filename1 |
| 22 | Subroutine subname1 |
| 23 | Package package1 |
| 24 | object1 line numbers |
| 25 | object2 line numbers |
| 26 | ... |
| 27 | Package package2 |
| 28 | ... |
| 29 | |
| 30 | Each B<File> section reports on a single file. Each B<Subroutine> section |
| 31 | reports on a single subroutine apart from the special cases |
| 32 | "(definitions)" and "(main)". These report, respectively, on subroutine |
| 33 | definitions found by the initial symbol table walk and on the main part of |
| 34 | the program or module external to all subroutines. |
| 35 | |
| 36 | The report is then grouped by the B<Package> of each variable, |
| 37 | subroutine or format with the special case "(lexicals)" meaning |
| 38 | lexical variables. Each B<object> name (implicitly qualified by its |
| 39 | containing B<Package>) includes its type character(s) at the beginning |
| 40 | where possible. Lexical variables are easier to track and even |
| 41 | included dereferencing information where possible. |
| 42 | |
| 43 | The C<line numbers> are a comma separated list of line numbers (some |
| 44 | preceded by code letters) where that object is used in some way. |
| 45 | Simple uses aren't preceded by a code letter. Introductions (such as |
| 46 | where a lexical is first defined with C<my>) are indicated with the |
| 47 | letter "i". Subroutine and method calls are indicated by the character |
| 48 | "&". Subroutine definitions are indicated by "s" and format |
| 49 | definitions by "f". |
| 50 | |
| 51 | =head1 OPTIONS |
| 52 | |
| 53 | Option words are separated by commas (not whitespace) and follow the |
| 54 | usual conventions of compiler backend options. |
| 55 | |
| 56 | =over 8 |
| 57 | |
| 58 | =item C<-oFILENAME> |
| 59 | |
| 60 | Directs output to C<FILENAME> instead of standard output. |
| 61 | |
| 62 | =item C<-r> |
| 63 | |
| 64 | Raw output. Instead of producing a human-readable report, outputs a line |
| 65 | in machine-readable form for each definition/use of a variable/sub/format. |
| 66 | |
| 67 | =item C<-d> |
| 68 | |
| 69 | Don't output the "(definitions)" sections. |
| 70 | |
| 71 | =item C<-D[tO]> |
| 72 | |
| 73 | (Internal) debug options, probably only useful if C<-r> included. |
| 74 | The C<t> option prints the object on the top of the stack as it's |
| 75 | being tracked. The C<O> option prints each operator as it's being |
| 76 | processed in the execution order of the program. |
| 77 | |
| 78 | =back |
| 79 | |
| 80 | =head1 BUGS |
| 81 | |
| 82 | Non-lexical variables are quite difficult to track through a program. |
| 83 | Sometimes the type of a non-lexical variable's use is impossible to |
| 84 | determine. Introductions of non-lexical non-scalars don't seem to be |
| 85 | reported properly. |
| 86 | |
| 87 | =head1 AUTHOR |
| 88 | |
| 89 | Malcolm Beattie, mbeattie@sable.ox.ac.uk. |
| 90 | |
| 91 | =cut |
| 92 | |
| 93 | use strict; |
| 94 | use Config; |
| 95 | use B qw(peekop class comppadlist main_start svref_2object walksymtable |
| 96 | OPpLVAL_INTRO SVf_POK OPpOUR_INTRO cstring |
| 97 | ); |
| 98 | |
| 99 | sub UNKNOWN { ["?", "?", "?"] } |
| 100 | |
| 101 | my @pad; # lexicals in current pad |
| 102 | # as ["(lexical)", type, name] |
| 103 | my %done; # keyed by $$op: set when each $op is done |
| 104 | my $top = UNKNOWN; # shadows top element of stack as |
| 105 | # [pack, type, name] (pack can be "(lexical)") |
| 106 | my $file; # shadows current filename |
| 107 | my $line; # shadows current line number |
| 108 | my $subname; # shadows current sub name |
| 109 | my %table; # Multi-level hash to record all uses etc. |
| 110 | my @todo = (); # List of CVs that need processing |
| 111 | |
| 112 | my %code = (intro => "i", used => "", |
| 113 | subdef => "s", subused => "&", |
| 114 | formdef => "f", meth => "->"); |
| 115 | |
| 116 | |
| 117 | # Options |
| 118 | my ($debug_op, $debug_top, $nodefs, $raw); |
| 119 | |
| 120 | sub process { |
| 121 | my ($var, $event) = @_; |
| 122 | my ($pack, $type, $name) = @$var; |
| 123 | if ($type eq "*") { |
| 124 | if ($event eq "used") { |
| 125 | return; |
| 126 | } elsif ($event eq "subused") { |
| 127 | $type = "&"; |
| 128 | } |
| 129 | } |
| 130 | $type =~ s/(.)\*$/$1/g; |
| 131 | if ($raw) { |
| 132 | printf "%-16s %-12s %5d %-12s %4s %-16s %s\n", |
| 133 | $file, $subname, $line, $pack, $type, $name, $event; |
| 134 | } else { |
| 135 | # Wheee |
| 136 | push(@{$table{$file}->{$subname}->{$pack}->{$type.$name}->{$event}}, |
| 137 | $line); |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | sub load_pad { |
| 142 | my $padlist = shift; |
| 143 | my ($namelistav, $vallistav, @namelist, $ix); |
| 144 | @pad = (); |
| 145 | return if class($padlist) eq "SPECIAL"; |
| 146 | ($namelistav,$vallistav) = $padlist->ARRAY; |
| 147 | @namelist = $namelistav->ARRAY; |
| 148 | for ($ix = 1; $ix < @namelist; $ix++) { |
| 149 | my $namesv = $namelist[$ix]; |
| 150 | next if class($namesv) eq "SPECIAL"; |
| 151 | my ($type, $name) = $namesv->PV =~ /^(.)([^\0]*)(\0.*)?$/; |
| 152 | $pad[$ix] = ["(lexical)", $type || '?', $name || '?']; |
| 153 | } |
| 154 | if ($Config{useithreads}) { |
| 155 | my (@vallist); |
| 156 | @vallist = $vallistav->ARRAY; |
| 157 | for ($ix = 1; $ix < @vallist; $ix++) { |
| 158 | my $valsv = $vallist[$ix]; |
| 159 | next unless class($valsv) eq "GV"; |
| 160 | # these pad GVs don't have corresponding names, so same @pad |
| 161 | # array can be used without collisions |
| 162 | $pad[$ix] = [$valsv->STASH->NAME, "*", $valsv->NAME]; |
| 163 | } |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | sub xref { |
| 168 | my $start = shift; |
| 169 | my $op; |
| 170 | for ($op = $start; $$op; $op = $op->next) { |
| 171 | last if $done{$$op}++; |
| 172 | warn sprintf("top = [%s, %s, %s]\n", @$top) if $debug_top; |
| 173 | warn peekop($op), "\n" if $debug_op; |
| 174 | my $opname = $op->name; |
| 175 | if ($opname =~ /^(or|and|mapwhile|grepwhile|range|cond_expr)$/) { |
| 176 | xref($op->other); |
| 177 | } elsif ($opname eq "match" || $opname eq "subst") { |
| 178 | xref($op->pmreplstart); |
| 179 | } elsif ($opname eq "substcont") { |
| 180 | xref($op->other->pmreplstart); |
| 181 | $op = $op->other; |
| 182 | redo; |
| 183 | } elsif ($opname eq "enterloop") { |
| 184 | xref($op->redoop); |
| 185 | xref($op->nextop); |
| 186 | xref($op->lastop); |
| 187 | } elsif ($opname eq "subst") { |
| 188 | xref($op->pmreplstart); |
| 189 | } else { |
| 190 | no strict 'refs'; |
| 191 | my $ppname = "pp_$opname"; |
| 192 | &$ppname($op) if defined(&$ppname); |
| 193 | } |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | sub xref_cv { |
| 198 | my $cv = shift; |
| 199 | my $pack = $cv->GV->STASH->NAME; |
| 200 | $subname = ($pack eq "main" ? "" : "$pack\::") . $cv->GV->NAME; |
| 201 | load_pad($cv->PADLIST); |
| 202 | xref($cv->START); |
| 203 | $subname = "(main)"; |
| 204 | } |
| 205 | |
| 206 | sub xref_object { |
| 207 | my $cvref = shift; |
| 208 | xref_cv(svref_2object($cvref)); |
| 209 | } |
| 210 | |
| 211 | sub xref_main { |
| 212 | $subname = "(main)"; |
| 213 | load_pad(comppadlist); |
| 214 | xref(main_start); |
| 215 | while (@todo) { |
| 216 | xref_cv(shift @todo); |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | sub pp_nextstate { |
| 221 | my $op = shift; |
| 222 | $file = $op->file; |
| 223 | $line = $op->line; |
| 224 | $top = UNKNOWN; |
| 225 | } |
| 226 | |
| 227 | sub pp_padsv { |
| 228 | my $op = shift; |
| 229 | $top = $pad[$op->targ]; |
| 230 | process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); |
| 231 | } |
| 232 | |
| 233 | sub pp_padav { pp_padsv(@_) } |
| 234 | sub pp_padhv { pp_padsv(@_) } |
| 235 | |
| 236 | sub deref { |
| 237 | my ($op, $var, $as) = @_; |
| 238 | $var->[1] = $as . $var->[1]; |
| 239 | process($var, $op->private & OPpOUR_INTRO ? "intro" : "used"); |
| 240 | } |
| 241 | |
| 242 | sub pp_rv2cv { deref(shift, $top, "&"); } |
| 243 | sub pp_rv2hv { deref(shift, $top, "%"); } |
| 244 | sub pp_rv2sv { deref(shift, $top, "\$"); } |
| 245 | sub pp_rv2av { deref(shift, $top, "\@"); } |
| 246 | sub pp_rv2gv { deref(shift, $top, "*"); } |
| 247 | |
| 248 | sub pp_gvsv { |
| 249 | my $op = shift; |
| 250 | my $gv; |
| 251 | if ($Config{useithreads}) { |
| 252 | $top = $pad[$op->padix]; |
| 253 | $top = UNKNOWN unless $top; |
| 254 | $top->[1] = '$'; |
| 255 | } |
| 256 | else { |
| 257 | $gv = $op->gv; |
| 258 | $top = [$gv->STASH->NAME, '$', $gv->SAFENAME]; |
| 259 | } |
| 260 | process($top, $op->private & OPpLVAL_INTRO || |
| 261 | $op->private & OPpOUR_INTRO ? "intro" : "used"); |
| 262 | } |
| 263 | |
| 264 | sub pp_gv { |
| 265 | my $op = shift; |
| 266 | my $gv; |
| 267 | if ($Config{useithreads}) { |
| 268 | $top = $pad[$op->padix]; |
| 269 | $top = UNKNOWN unless $top; |
| 270 | $top->[1] = '*'; |
| 271 | } |
| 272 | else { |
| 273 | $gv = $op->gv; |
| 274 | $top = [$gv->STASH->NAME, "*", $gv->SAFENAME]; |
| 275 | } |
| 276 | process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); |
| 277 | } |
| 278 | |
| 279 | sub pp_const { |
| 280 | my $op = shift; |
| 281 | my $sv = $op->sv; |
| 282 | # constant could be in the pad (under useithreads) |
| 283 | if ($$sv) { |
| 284 | $top = ["?", "", |
| 285 | (class($sv) ne "SPECIAL" && $sv->FLAGS & SVf_POK) |
| 286 | ? cstring($sv->PV) : "?"]; |
| 287 | } |
| 288 | else { |
| 289 | $top = $pad[$op->targ]; |
| 290 | $top = UNKNOWN unless $top; |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | sub pp_method { |
| 295 | my $op = shift; |
| 296 | $top = ["(method)", "->".$top->[1], $top->[2]]; |
| 297 | } |
| 298 | |
| 299 | sub pp_entersub { |
| 300 | my $op = shift; |
| 301 | if ($top->[1] eq "m") { |
| 302 | process($top, "meth"); |
| 303 | } else { |
| 304 | process($top, "subused"); |
| 305 | } |
| 306 | $top = UNKNOWN; |
| 307 | } |
| 308 | |
| 309 | # |
| 310 | # Stuff for cross referencing definitions of variables and subs |
| 311 | # |
| 312 | |
| 313 | sub B::GV::xref { |
| 314 | my $gv = shift; |
| 315 | my $cv = $gv->CV; |
| 316 | if ($$cv) { |
| 317 | #return if $done{$$cv}++; |
| 318 | $file = $gv->FILE; |
| 319 | $line = $gv->LINE; |
| 320 | process([$gv->STASH->NAME, "&", $gv->NAME], "subdef"); |
| 321 | push(@todo, $cv); |
| 322 | } |
| 323 | my $form = $gv->FORM; |
| 324 | if ($$form) { |
| 325 | return if $done{$$form}++; |
| 326 | $file = $gv->FILE; |
| 327 | $line = $gv->LINE; |
| 328 | process([$gv->STASH->NAME, "", $gv->NAME], "formdef"); |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | sub xref_definitions { |
| 333 | my ($pack, %exclude); |
| 334 | return if $nodefs; |
| 335 | $subname = "(definitions)"; |
| 336 | foreach $pack (qw(B O AutoLoader DynaLoader XSLoader Config DB VMS |
| 337 | strict vars FileHandle Exporter Carp PerlIO::Layer |
| 338 | attributes utf8 warnings)) { |
| 339 | $exclude{$pack."::"} = 1; |
| 340 | } |
| 341 | no strict qw(vars refs); |
| 342 | walksymtable(\%{"main::"}, "xref", sub { !defined($exclude{$_[0]}) }); |
| 343 | } |
| 344 | |
| 345 | sub output { |
| 346 | return if $raw; |
| 347 | my ($file, $subname, $pack, $name, $ev, $perfile, $persubname, |
| 348 | $perpack, $pername, $perev); |
| 349 | foreach $file (sort(keys(%table))) { |
| 350 | $perfile = $table{$file}; |
| 351 | print "File $file\n"; |
| 352 | foreach $subname (sort(keys(%$perfile))) { |
| 353 | $persubname = $perfile->{$subname}; |
| 354 | print " Subroutine $subname\n"; |
| 355 | foreach $pack (sort(keys(%$persubname))) { |
| 356 | $perpack = $persubname->{$pack}; |
| 357 | print " Package $pack\n"; |
| 358 | foreach $name (sort(keys(%$perpack))) { |
| 359 | $pername = $perpack->{$name}; |
| 360 | my @lines; |
| 361 | foreach $ev (qw(intro formdef subdef meth subused used)) { |
| 362 | $perev = $pername->{$ev}; |
| 363 | if (defined($perev) && @$perev) { |
| 364 | my $code = $code{$ev}; |
| 365 | push(@lines, map("$code$_", @$perev)); |
| 366 | } |
| 367 | } |
| 368 | printf " %-16s %s\n", $name, join(", ", @lines); |
| 369 | } |
| 370 | } |
| 371 | } |
| 372 | } |
| 373 | } |
| 374 | |
| 375 | sub compile { |
| 376 | my @options = @_; |
| 377 | my ($option, $opt, $arg); |
| 378 | OPTION: |
| 379 | while ($option = shift @options) { |
| 380 | if ($option =~ /^-(.)(.*)/) { |
| 381 | $opt = $1; |
| 382 | $arg = $2; |
| 383 | } else { |
| 384 | unshift @options, $option; |
| 385 | last OPTION; |
| 386 | } |
| 387 | if ($opt eq "-" && $arg eq "-") { |
| 388 | shift @options; |
| 389 | last OPTION; |
| 390 | } elsif ($opt eq "o") { |
| 391 | $arg ||= shift @options; |
| 392 | open(STDOUT, ">$arg") or return "$arg: $!\n"; |
| 393 | } elsif ($opt eq "d") { |
| 394 | $nodefs = 1; |
| 395 | } elsif ($opt eq "r") { |
| 396 | $raw = 1; |
| 397 | } elsif ($opt eq "D") { |
| 398 | $arg ||= shift @options; |
| 399 | foreach $arg (split(//, $arg)) { |
| 400 | if ($arg eq "o") { |
| 401 | B->debug(1); |
| 402 | } elsif ($arg eq "O") { |
| 403 | $debug_op = 1; |
| 404 | } elsif ($arg eq "t") { |
| 405 | $debug_top = 1; |
| 406 | } |
| 407 | } |
| 408 | } |
| 409 | } |
| 410 | if (@options) { |
| 411 | return sub { |
| 412 | my $objname; |
| 413 | xref_definitions(); |
| 414 | foreach $objname (@options) { |
| 415 | $objname = "main::$objname" unless $objname =~ /::/; |
| 416 | eval "xref_object(\\&$objname)"; |
| 417 | die "xref_object(\\&$objname) failed: $@" if $@; |
| 418 | } |
| 419 | output(); |
| 420 | } |
| 421 | } else { |
| 422 | return sub { |
| 423 | xref_definitions(); |
| 424 | xref_main(); |
| 425 | output(); |
| 426 | } |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | 1; |