| 1 | package Filter::Simple; |
| 2 | |
| 3 | use Text::Balanced ':ALL'; |
| 4 | |
| 5 | use vars qw{ $VERSION @EXPORT }; |
| 6 | |
| 7 | $VERSION = '0.78'; |
| 8 | |
| 9 | use Filter::Util::Call; |
| 10 | use Carp; |
| 11 | |
| 12 | @EXPORT = qw( FILTER FILTER_ONLY ); |
| 13 | |
| 14 | |
| 15 | sub import { |
| 16 | if (@_>1) { shift; goto &FILTER } |
| 17 | else { *{caller()."::$_"} = \&$_ foreach @EXPORT } |
| 18 | } |
| 19 | |
| 20 | sub FILTER (&;$) { |
| 21 | my $caller = caller; |
| 22 | my ($filter, $terminator) = @_; |
| 23 | local $SIG{__WARN__} = sub{}; |
| 24 | *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); |
| 25 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
| 26 | } |
| 27 | |
| 28 | sub fail { |
| 29 | croak "FILTER_ONLY: ", @_; |
| 30 | } |
| 31 | |
| 32 | my $exql = sub { |
| 33 | my @bits = extract_quotelike $_[0], qr//; |
| 34 | return unless $bits[0]; |
| 35 | return \@bits; |
| 36 | }; |
| 37 | |
| 38 | my $ws = qr/\s+/; |
| 39 | my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/; |
| 40 | my $EOP = qr/\n\n|\Z/; |
| 41 | my $CUT = qr/\n=cut.*$EOP/; |
| 42 | my $pod_or_DATA = qr/ |
| 43 | ^=(?:head[1-4]|item) .*? $CUT |
| 44 | | ^=pod .*? $CUT |
| 45 | | ^=for .*? $EOP |
| 46 | | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP |
| 47 | | ^__(DATA|END)__\r?\n.* |
| 48 | /smx; |
| 49 | |
| 50 | my %extractor_for = ( |
| 51 | quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ], |
| 52 | regex => [ $ws, $pod_or_DATA, $id, $exql ], |
| 53 | string => [ $ws, $pod_or_DATA, $id, $exql ], |
| 54 | code => [ $ws, { DONT_MATCH => $pod_or_DATA }, |
| 55 | $id, { DONT_MATCH => \&extract_quotelike } ], |
| 56 | executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], |
| 57 | all => [ { MATCH => qr/(?s:.*)/ } ], |
| 58 | ); |
| 59 | |
| 60 | my %selector_for = ( |
| 61 | all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, |
| 62 | executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, |
| 63 | quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, |
| 64 | regex => sub { my ($t)=@_; |
| 65 | sub{ref() or return $_; |
| 66 | my ($ql,undef,$pre,$op,$ld,$pat) = @$_; |
| 67 | return $_->[0] unless $op =~ /^(qr|m|s)/ |
| 68 | || !$op && ($ld eq '/' || $ld eq '?'); |
| 69 | $_ = $pat; |
| 70 | $t->(@_); |
| 71 | $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; |
| 72 | return "$pre$ql"; |
| 73 | }; |
| 74 | }, |
| 75 | string => sub { my ($t)=@_; |
| 76 | sub{ref() or return $_; |
| 77 | local *args = \@_; |
| 78 | my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; |
| 79 | return $_->[0] if $op =~ /^(qr|m)/ |
| 80 | || !$op && ($ld1 eq '/' || $ld1 eq '?'); |
| 81 | if (!$op || $op eq 'tr' || $op eq 'y') { |
| 82 | local *_ = \$str1; |
| 83 | $t->(@args); |
| 84 | } |
| 85 | if ($op =~ /^(tr|y|s)/) { |
| 86 | local *_ = \$str2; |
| 87 | $t->(@args); |
| 88 | } |
| 89 | my $result = "$pre$op$ld1$str1$rd1"; |
| 90 | $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> |
| 91 | $result .= "$str2$rd2$flg"; |
| 92 | return $result; |
| 93 | }; |
| 94 | }, |
| 95 | ); |
| 96 | |
| 97 | |
| 98 | sub gen_std_filter_for { |
| 99 | my ($type, $transform) = @_; |
| 100 | return sub { my (@pieces, $instr); |
| 101 | $DB::single=1; |
| 102 | for (extract_multiple($_,$extractor_for{$type})) { |
| 103 | if (ref()) { push @pieces, $_; $instr=0 } |
| 104 | elsif ($instr) { $pieces[-1] .= $_ } |
| 105 | else { push @pieces, $_; $instr=1 } |
| 106 | } |
| 107 | if ($type eq 'code') { |
| 108 | my $count = 0; |
| 109 | local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/; |
| 110 | my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/; |
| 111 | $_ = join "", |
| 112 | map { ref $_ ? $;.pack('N',$count++).$; : $_ } |
| 113 | @pieces; |
| 114 | @pieces = grep { ref $_ } @pieces; |
| 115 | $transform->(@_); |
| 116 | s/$extractor/${$pieces[unpack('N',$1)]}/g; |
| 117 | } |
| 118 | else { |
| 119 | my $selector = $selector_for{$type}->($transform); |
| 120 | $_ = join "", map $selector->(@_), @pieces; |
| 121 | } |
| 122 | } |
| 123 | }; |
| 124 | |
| 125 | sub FILTER_ONLY { |
| 126 | my $caller = caller; |
| 127 | while (@_ > 1) { |
| 128 | my ($what, $how) = splice(@_, 0, 2); |
| 129 | fail "Unknown selector: $what" |
| 130 | unless exists $extractor_for{$what}; |
| 131 | fail "Filter for $what is not a subroutine reference" |
| 132 | unless ref $how eq 'CODE'; |
| 133 | push @transforms, gen_std_filter_for($what,$how); |
| 134 | } |
| 135 | my $terminator = shift; |
| 136 | |
| 137 | my $multitransform = sub { |
| 138 | foreach my $transform ( @transforms ) { |
| 139 | $transform->(@_); |
| 140 | } |
| 141 | }; |
| 142 | no warnings 'redefine'; |
| 143 | *{"${caller}::import"} = |
| 144 | gen_filter_import($caller,$multitransform,$terminator); |
| 145 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
| 146 | } |
| 147 | |
| 148 | my $ows = qr/(?:[ \t]+|#[^\n]*)*/; |
| 149 | |
| 150 | sub gen_filter_import { |
| 151 | my ($class, $filter, $terminator) = @_; |
| 152 | my %terminator; |
| 153 | my $prev_import = *{$class."::import"}{CODE}; |
| 154 | return sub { |
| 155 | my ($imported_class, @args) = @_; |
| 156 | my $def_terminator = |
| 157 | qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/; |
| 158 | if (!defined $terminator) { |
| 159 | $terminator{terminator} = $def_terminator; |
| 160 | } |
| 161 | elsif (!ref $terminator || ref $terminator eq 'Regexp') { |
| 162 | $terminator{terminator} = $terminator; |
| 163 | } |
| 164 | elsif (ref $terminator ne 'HASH') { |
| 165 | croak "Terminator must be specified as scalar or hash ref" |
| 166 | } |
| 167 | elsif (!exists $terminator->{terminator}) { |
| 168 | $terminator{terminator} = $def_terminator; |
| 169 | } |
| 170 | filter_add( |
| 171 | sub { |
| 172 | my ($status, $lastline); |
| 173 | my $count = 0; |
| 174 | my $data = ""; |
| 175 | while ($status = filter_read()) { |
| 176 | return $status if $status < 0; |
| 177 | if ($terminator{terminator} && |
| 178 | m/$terminator{terminator}/) { |
| 179 | $lastline = $_; |
| 180 | last; |
| 181 | } |
| 182 | $data .= $_; |
| 183 | $count++; |
| 184 | $_ = ""; |
| 185 | } |
| 186 | $_ = $data; |
| 187 | $filter->($imported_class, @args) unless $status < 0; |
| 188 | if (defined $lastline) { |
| 189 | if (defined $terminator{becomes}) { |
| 190 | $_ .= $terminator{becomes}; |
| 191 | } |
| 192 | elsif ($lastline =~ $def_terminator) { |
| 193 | $_ .= $lastline; |
| 194 | } |
| 195 | } |
| 196 | return $count; |
| 197 | } |
| 198 | ); |
| 199 | if ($prev_import) { |
| 200 | goto &$prev_import; |
| 201 | } |
| 202 | elsif ($class->isa('Exporter')) { |
| 203 | $class->export_to_level(1,@_); |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | sub gen_filter_unimport { |
| 209 | my ($class) = @_; |
| 210 | my $prev_unimport = *{$class."::unimport"}{CODE}; |
| 211 | return sub { |
| 212 | filter_del(); |
| 213 | goto &$prev_unimport if $prev_unimport; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | 1; |
| 218 | |
| 219 | __END__ |
| 220 | |
| 221 | =head1 NAME |
| 222 | |
| 223 | Filter::Simple - Simplified source filtering |
| 224 | |
| 225 | |
| 226 | =head1 SYNOPSIS |
| 227 | |
| 228 | # in MyFilter.pm: |
| 229 | |
| 230 | package MyFilter; |
| 231 | |
| 232 | use Filter::Simple; |
| 233 | |
| 234 | FILTER { ... }; |
| 235 | |
| 236 | # or just: |
| 237 | # |
| 238 | # use Filter::Simple sub { ... }; |
| 239 | |
| 240 | # in user's code: |
| 241 | |
| 242 | use MyFilter; |
| 243 | |
| 244 | # this code is filtered |
| 245 | |
| 246 | no MyFilter; |
| 247 | |
| 248 | # this code is not |
| 249 | |
| 250 | |
| 251 | =head1 DESCRIPTION |
| 252 | |
| 253 | =head2 The Problem |
| 254 | |
| 255 | Source filtering is an immensely powerful feature of recent versions of Perl. |
| 256 | It allows one to extend the language itself (e.g. the Switch module), to |
| 257 | simplify the language (e.g. Language::Pythonesque), or to completely recast the |
| 258 | language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use |
| 259 | the full power of Perl as its own, recursively applied, macro language. |
| 260 | |
| 261 | The excellent Filter::Util::Call module (by Paul Marquess) provides a |
| 262 | usable Perl interface to source filtering, but it is often too powerful |
| 263 | and not nearly as simple as it could be. |
| 264 | |
| 265 | To use the module it is necessary to do the following: |
| 266 | |
| 267 | =over 4 |
| 268 | |
| 269 | =item 1. |
| 270 | |
| 271 | Download, build, and install the Filter::Util::Call module. |
| 272 | (If you have Perl 5.7.1 or later, this is already done for you.) |
| 273 | |
| 274 | =item 2. |
| 275 | |
| 276 | Set up a module that does a C<use Filter::Util::Call>. |
| 277 | |
| 278 | =item 3. |
| 279 | |
| 280 | Within that module, create an C<import> subroutine. |
| 281 | |
| 282 | =item 4. |
| 283 | |
| 284 | Within the C<import> subroutine do a call to C<filter_add>, passing |
| 285 | it either a subroutine reference. |
| 286 | |
| 287 | =item 5. |
| 288 | |
| 289 | Within the subroutine reference, call C<filter_read> or C<filter_read_exact> |
| 290 | to "prime" $_ with source code data from the source file that will |
| 291 | C<use> your module. Check the status value returned to see if any |
| 292 | source code was actually read in. |
| 293 | |
| 294 | =item 6. |
| 295 | |
| 296 | Process the contents of $_ to change the source code in the desired manner. |
| 297 | |
| 298 | =item 7. |
| 299 | |
| 300 | Return the status value. |
| 301 | |
| 302 | =item 8. |
| 303 | |
| 304 | If the act of unimporting your module (via a C<no>) should cause source |
| 305 | code filtering to cease, create an C<unimport> subroutine, and have it call |
| 306 | C<filter_del>. Make sure that the call to C<filter_read> or |
| 307 | C<filter_read_exact> in step 5 will not accidentally read past the |
| 308 | C<no>. Effectively this limits source code filters to line-by-line |
| 309 | operation, unless the C<import> subroutine does some fancy |
| 310 | pre-pre-parsing of the source code it's filtering. |
| 311 | |
| 312 | =back |
| 313 | |
| 314 | For example, here is a minimal source code filter in a module named |
| 315 | BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG> |
| 316 | to the sequence C<die 'BANG' if $BANG> in any piece of code following a |
| 317 | C<use BANG;> statement (until the next C<no BANG;> statement, if any): |
| 318 | |
| 319 | package BANG; |
| 320 | |
| 321 | use Filter::Util::Call ; |
| 322 | |
| 323 | sub import { |
| 324 | filter_add( sub { |
| 325 | my $caller = caller; |
| 326 | my ($status, $no_seen, $data); |
| 327 | while ($status = filter_read()) { |
| 328 | if (/^\s*no\s+$caller\s*;\s*?$/) { |
| 329 | $no_seen=1; |
| 330 | last; |
| 331 | } |
| 332 | $data .= $_; |
| 333 | $_ = ""; |
| 334 | } |
| 335 | $_ = $data; |
| 336 | s/BANG\s+BANG/die 'BANG' if \$BANG/g |
| 337 | unless $status < 0; |
| 338 | $_ .= "no $class;\n" if $no_seen; |
| 339 | return 1; |
| 340 | }) |
| 341 | } |
| 342 | |
| 343 | sub unimport { |
| 344 | filter_del(); |
| 345 | } |
| 346 | |
| 347 | 1 ; |
| 348 | |
| 349 | This level of sophistication puts filtering out of the reach of |
| 350 | many programmers. |
| 351 | |
| 352 | |
| 353 | =head2 A Solution |
| 354 | |
| 355 | The Filter::Simple module provides a simplified interface to |
| 356 | Filter::Util::Call; one that is sufficient for most common cases. |
| 357 | |
| 358 | Instead of the above process, with Filter::Simple the task of setting up |
| 359 | a source code filter is reduced to: |
| 360 | |
| 361 | =over 4 |
| 362 | |
| 363 | =item 1. |
| 364 | |
| 365 | Download and install the Filter::Simple module. |
| 366 | (If you have Perl 5.7.1 or later, this is already done for you.) |
| 367 | |
| 368 | =item 2. |
| 369 | |
| 370 | Set up a module that does a C<use Filter::Simple> and then |
| 371 | calls C<FILTER { ... }>. |
| 372 | |
| 373 | =item 3. |
| 374 | |
| 375 | Within the anonymous subroutine or block that is passed to |
| 376 | C<FILTER>, process the contents of $_ to change the source code in |
| 377 | the desired manner. |
| 378 | |
| 379 | =back |
| 380 | |
| 381 | In other words, the previous example, would become: |
| 382 | |
| 383 | package BANG; |
| 384 | use Filter::Simple; |
| 385 | |
| 386 | FILTER { |
| 387 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
| 388 | }; |
| 389 | |
| 390 | 1 ; |
| 391 | |
| 392 | Note that the source code is passed as a single string, so any regex that |
| 393 | uses C<^> or C<$> to detect line boundaries will need the C</m> flag. |
| 394 | |
| 395 | =head2 Disabling or changing <no> behaviour |
| 396 | |
| 397 | By default, the installed filter only filters up to a line consisting of one of |
| 398 | the three standard source "terminators": |
| 399 | |
| 400 | no ModuleName; # optional comment |
| 401 | |
| 402 | or: |
| 403 | |
| 404 | __END__ |
| 405 | |
| 406 | or: |
| 407 | |
| 408 | __DATA__ |
| 409 | |
| 410 | but this can be altered by passing a second argument to C<use Filter::Simple> |
| 411 | or C<FILTER> (just remember: there's I<no> comma after the initial block when |
| 412 | you use C<FILTER>). |
| 413 | |
| 414 | That second argument may be either a C<qr>'d regular expression (which is then |
| 415 | used to match the terminator line), or a defined false value (which indicates |
| 416 | that no terminator line should be looked for), or a reference to a hash |
| 417 | (in which case the terminator is the value associated with the key |
| 418 | C<'terminator'>. |
| 419 | |
| 420 | For example, to cause the previous filter to filter only up to a line of the |
| 421 | form: |
| 422 | |
| 423 | GNAB esu; |
| 424 | |
| 425 | you would write: |
| 426 | |
| 427 | package BANG; |
| 428 | use Filter::Simple; |
| 429 | |
| 430 | FILTER { |
| 431 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
| 432 | } |
| 433 | qr/^\s*GNAB\s+esu\s*;\s*?$/; |
| 434 | |
| 435 | or: |
| 436 | |
| 437 | FILTER { |
| 438 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
| 439 | } |
| 440 | { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; |
| 441 | |
| 442 | and to prevent the filter's being turned off in any way: |
| 443 | |
| 444 | package BANG; |
| 445 | use Filter::Simple; |
| 446 | |
| 447 | FILTER { |
| 448 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
| 449 | } |
| 450 | ""; # or: 0 |
| 451 | |
| 452 | or: |
| 453 | |
| 454 | FILTER { |
| 455 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
| 456 | } |
| 457 | { terminator => "" }; |
| 458 | |
| 459 | B<Note that, no matter what you set the terminator pattern to, |
| 460 | the actual terminator itself I<must> be contained on a single source line.> |
| 461 | |
| 462 | |
| 463 | =head2 All-in-one interface |
| 464 | |
| 465 | Separating the loading of Filter::Simple: |
| 466 | |
| 467 | use Filter::Simple; |
| 468 | |
| 469 | from the setting up of the filtering: |
| 470 | |
| 471 | FILTER { ... }; |
| 472 | |
| 473 | is useful because it allows other code (typically parser support code |
| 474 | or caching variables) to be defined before the filter is invoked. |
| 475 | However, there is often no need for such a separation. |
| 476 | |
| 477 | In those cases, it is easier to just append the filtering subroutine and |
| 478 | any terminator specification directly to the C<use> statement that loads |
| 479 | Filter::Simple, like so: |
| 480 | |
| 481 | use Filter::Simple sub { |
| 482 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
| 483 | }; |
| 484 | |
| 485 | This is exactly the same as: |
| 486 | |
| 487 | use Filter::Simple; |
| 488 | BEGIN { |
| 489 | Filter::Simple::FILTER { |
| 490 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
| 491 | }; |
| 492 | } |
| 493 | |
| 494 | except that the C<FILTER> subroutine is not exported by Filter::Simple. |
| 495 | |
| 496 | |
| 497 | =head2 Filtering only specific components of source code |
| 498 | |
| 499 | One of the problems with a filter like: |
| 500 | |
| 501 | use Filter::Simple; |
| 502 | |
| 503 | FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; |
| 504 | |
| 505 | is that it indiscriminately applies the specified transformation to |
| 506 | the entire text of your source program. So something like: |
| 507 | |
| 508 | warn 'BANG BANG, YOU'RE DEAD'; |
| 509 | BANG BANG; |
| 510 | |
| 511 | will become: |
| 512 | |
| 513 | warn 'die 'BANG' if $BANG, YOU'RE DEAD'; |
| 514 | die 'BANG' if $BANG; |
| 515 | |
| 516 | It is very common when filtering source to only want to apply the filter |
| 517 | to the non-character-string parts of the code, or alternatively to I<only> |
| 518 | the character strings. |
| 519 | |
| 520 | Filter::Simple supports this type of filtering by automatically |
| 521 | exporting the C<FILTER_ONLY> subroutine. |
| 522 | |
| 523 | C<FILTER_ONLY> takes a sequence of specifiers that install separate |
| 524 | (and possibly multiple) filters that act on only parts of the source code. |
| 525 | For example: |
| 526 | |
| 527 | use Filter::Simple; |
| 528 | |
| 529 | FILTER_ONLY |
| 530 | code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, |
| 531 | quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g }; |
| 532 | |
| 533 | The C<"code"> subroutine will only be used to filter parts of the source |
| 534 | code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike> |
| 535 | subroutine only filters Perl quotelikes (including here documents). |
| 536 | |
| 537 | The full list of alternatives is: |
| 538 | |
| 539 | =over |
| 540 | |
| 541 | =item C<"code"> |
| 542 | |
| 543 | Filters only those sections of the source code that are not quotelikes, POD, or |
| 544 | C<__DATA__>. |
| 545 | |
| 546 | =item C<"executable"> |
| 547 | |
| 548 | Filters only those sections of the source code that are not POD or C<__DATA__>. |
| 549 | |
| 550 | =item C<"quotelike"> |
| 551 | |
| 552 | Filters only Perl quotelikes (as interpreted by |
| 553 | C<&Text::Balanced::extract_quotelike>). |
| 554 | |
| 555 | =item C<"string"> |
| 556 | |
| 557 | Filters only the string literal parts of a Perl quotelike (i.e. the |
| 558 | contents of a string literal, either half of a C<tr///>, the second |
| 559 | half of an C<s///>). |
| 560 | |
| 561 | =item C<"regex"> |
| 562 | |
| 563 | Filters only the pattern literal parts of a Perl quotelike (i.e. the |
| 564 | contents of a C<qr//> or an C<m//>, the first half of an C<s///>). |
| 565 | |
| 566 | =item C<"all"> |
| 567 | |
| 568 | Filters everything. Identical in effect to C<FILTER>. |
| 569 | |
| 570 | =back |
| 571 | |
| 572 | Except for C<< FILTER_ONLY code => sub {...} >>, each of |
| 573 | the component filters is called repeatedly, once for each component |
| 574 | found in the source code. |
| 575 | |
| 576 | Note that you can also apply two or more of the same type of filter in |
| 577 | a single C<FILTER_ONLY>. For example, here's a simple |
| 578 | macro-preprocessor that is only applied within regexes, |
| 579 | with a final debugging pass that prints the resulting source code: |
| 580 | |
| 581 | use Regexp::Common; |
| 582 | FILTER_ONLY |
| 583 | regex => sub { s/!\[/[^/g }, |
| 584 | regex => sub { s/%d/$RE{num}{int}/g }, |
| 585 | regex => sub { s/%f/$RE{num}{real}/g }, |
| 586 | all => sub { print if $::DEBUG }; |
| 587 | |
| 588 | |
| 589 | |
| 590 | =head2 Filtering only the code parts of source code |
| 591 | |
| 592 | Most source code ceases to be grammatically correct when it is broken up |
| 593 | into the pieces between string literals and regexes. So the C<'code'> |
| 594 | component filter behaves slightly differently from the other partial filters |
| 595 | described in the previous section. |
| 596 | |
| 597 | Rather than calling the specified processor on each individual piece of |
| 598 | code (i.e. on the bits between quotelikes), the C<'code'> partial filter |
| 599 | operates on the entire source code, but with the quotelike bits |
| 600 | "blanked out". |
| 601 | |
| 602 | That is, a C<'code'> filter I<replaces> each quoted string, quotelike, |
| 603 | regex, POD, and __DATA__ section with a placeholder. The |
| 604 | delimiters of this placeholder are the contents of the C<$;> variable |
| 605 | at the time the filter is applied (normally C<"\034">). The remaining |
| 606 | four bytes are a unique identifier for the component being replaced. |
| 607 | |
| 608 | This approach makes it comparatively easy to write code preprocessors |
| 609 | without worrying about the form or contents of strings, regexes, etc. |
| 610 | For convenience, during a C<'code'> filtering operation, Filter::Simple |
| 611 | provides a package variable (C<$Filter::Simple::placeholder>) that contains |
| 612 | a pre-compiled regex that matches any placeholder. Placeholders can be |
| 613 | moved and re-ordered within the source code as needed. |
| 614 | |
| 615 | Once the filtering has been applied, the original strings, regexes, |
| 616 | POD, etc. are re-inserted into the code, by replacing each |
| 617 | placeholder with the corresponding original component. |
| 618 | |
| 619 | For example, the following filter detects concatentated pairs of |
| 620 | strings/quotelikes and reverses the order in which they are |
| 621 | concatenated: |
| 622 | |
| 623 | package DemoRevCat; |
| 624 | use Filter::Simple; |
| 625 | |
| 626 | FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder; |
| 627 | s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx |
| 628 | }; |
| 629 | |
| 630 | Thus, the following code: |
| 631 | |
| 632 | use DemoRevCat; |
| 633 | |
| 634 | my $str = "abc" . q(def); |
| 635 | |
| 636 | print "$str\n"; |
| 637 | |
| 638 | would become: |
| 639 | |
| 640 | my $str = q(def)."abc"; |
| 641 | |
| 642 | print "$str\n"; |
| 643 | |
| 644 | and hence print: |
| 645 | |
| 646 | defabc |
| 647 | |
| 648 | |
| 649 | =head2 Using Filter::Simple with an explicit C<import> subroutine |
| 650 | |
| 651 | Filter::Simple generates a special C<import> subroutine for |
| 652 | your module (see L<"How it works">) which would normally replace any |
| 653 | C<import> subroutine you might have explicitly declared. |
| 654 | |
| 655 | However, Filter::Simple is smart enough to notice your existing |
| 656 | C<import> and Do The Right Thing with it. |
| 657 | That is, if you explicitly define an C<import> subroutine in a package |
| 658 | that's using Filter::Simple, that C<import> subroutine will still |
| 659 | be invoked immediately after any filter you install. |
| 660 | |
| 661 | The only thing you have to remember is that the C<import> subroutine |
| 662 | I<must> be declared I<before> the filter is installed. If you use C<FILTER> |
| 663 | to install the filter: |
| 664 | |
| 665 | package Filter::TurnItUpTo11; |
| 666 | |
| 667 | use Filter::Simple; |
| 668 | |
| 669 | FILTER { s/(\w+)/\U$1/ }; |
| 670 | |
| 671 | that will almost never be a problem, but if you install a filtering |
| 672 | subroutine by passing it directly to the C<use Filter::Simple> |
| 673 | statement: |
| 674 | |
| 675 | package Filter::TurnItUpTo11; |
| 676 | |
| 677 | use Filter::Simple sub{ s/(\w+)/\U$1/ }; |
| 678 | |
| 679 | then you must make sure that your C<import> subroutine appears before |
| 680 | that C<use> statement. |
| 681 | |
| 682 | |
| 683 | =head2 Using Filter::Simple and Exporter together |
| 684 | |
| 685 | Likewise, Filter::Simple is also smart enough |
| 686 | to Do The Right Thing if you use Exporter: |
| 687 | |
| 688 | package Switch; |
| 689 | use base Exporter; |
| 690 | use Filter::Simple; |
| 691 | |
| 692 | @EXPORT = qw(switch case); |
| 693 | @EXPORT_OK = qw(given when); |
| 694 | |
| 695 | FILTER { $_ = magic_Perl_filter($_) } |
| 696 | |
| 697 | Immediately after the filter has been applied to the source, |
| 698 | Filter::Simple will pass control to Exporter, so it can do its magic too. |
| 699 | |
| 700 | Of course, here too, Filter::Simple has to know you're using Exporter |
| 701 | before it applies the filter. That's almost never a problem, but if you're |
| 702 | nervous about it, you can guarantee that things will work correctly by |
| 703 | ensuring that your C<use base Exporter> always precedes your |
| 704 | C<use Filter::Simple>. |
| 705 | |
| 706 | |
| 707 | =head2 How it works |
| 708 | |
| 709 | The Filter::Simple module exports into the package that calls C<FILTER> |
| 710 | (or C<use>s it directly) -- such as package "BANG" in the above example -- |
| 711 | two automagically constructed |
| 712 | subroutines -- C<import> and C<unimport> -- which take care of all the |
| 713 | nasty details. |
| 714 | |
| 715 | In addition, the generated C<import> subroutine passes its own argument |
| 716 | list to the filtering subroutine, so the BANG.pm filter could easily |
| 717 | be made parametric: |
| 718 | |
| 719 | package BANG; |
| 720 | |
| 721 | use Filter::Simple; |
| 722 | |
| 723 | FILTER { |
| 724 | my ($die_msg, $var_name) = @_; |
| 725 | s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g; |
| 726 | }; |
| 727 | |
| 728 | # and in some user code: |
| 729 | |
| 730 | use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM |
| 731 | |
| 732 | |
| 733 | The specified filtering subroutine is called every time a C<use BANG> is |
| 734 | encountered, and passed all the source code following that call, up to |
| 735 | either the next C<no BANG;> (or whatever terminator you've set) or the |
| 736 | end of the source file, whichever occurs first. By default, any C<no |
| 737 | BANG;> call must appear by itself on a separate line, or it is ignored. |
| 738 | |
| 739 | |
| 740 | =head1 AUTHOR |
| 741 | |
| 742 | Damian Conway (damian@conway.org) |
| 743 | |
| 744 | =head1 COPYRIGHT |
| 745 | |
| 746 | Copyright (c) 2000-2001, Damian Conway. All Rights Reserved. |
| 747 | This module is free software. It may be used, redistributed |
| 748 | and/or modified under the same terms as Perl itself. |