Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / 5.8.0 / Filter / Simple.pm
CommitLineData
86530b38
AT
1package Filter::Simple;
2
3use Text::Balanced ':ALL';
4
5use vars qw{ $VERSION @EXPORT };
6
7$VERSION = '0.78';
8
9use Filter::Util::Call;
10use Carp;
11
12@EXPORT = qw( FILTER FILTER_ONLY );
13
14
15sub import {
16 if (@_>1) { shift; goto &FILTER }
17 else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
18}
19
20sub FILTER (&;$) {
21 my $caller = caller;
22 my ($filter, $terminator) = @_;
23 local $SIG{__WARN__} = sub{};
24 *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
25 *{"${caller}::unimport"} = gen_filter_unimport($caller);
26}
27
28sub fail {
29 croak "FILTER_ONLY: ", @_;
30}
31
32my $exql = sub {
33 my @bits = extract_quotelike $_[0], qr//;
34 return unless $bits[0];
35 return \@bits;
36};
37
38my $ws = qr/\s+/;
39my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
40my $EOP = qr/\n\n|\Z/;
41my $CUT = qr/\n=cut.*$EOP/;
42my $pod_or_DATA = qr/
43 ^=(?:head[1-4]|item) .*? $CUT
44 | ^=pod .*? $CUT
45 | ^=for .*? $EOP
46 | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
47 | ^__(DATA|END)__\r?\n.*
48 /smx;
49
50my %extractor_for = (
51 quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ],
52 regex => [ $ws, $pod_or_DATA, $id, $exql ],
53 string => [ $ws, $pod_or_DATA, $id, $exql ],
54 code => [ $ws, { DONT_MATCH => $pod_or_DATA },
55 $id, { DONT_MATCH => \&extract_quotelike } ],
56 executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
57 all => [ { MATCH => qr/(?s:.*)/ } ],
58);
59
60my %selector_for = (
61 all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
62 executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
63 quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
64 regex => sub { my ($t)=@_;
65 sub{ref() or return $_;
66 my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
67 return $_->[0] unless $op =~ /^(qr|m|s)/
68 || !$op && ($ld eq '/' || $ld eq '?');
69 $_ = $pat;
70 $t->(@_);
71 $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
72 return "$pre$ql";
73 };
74 },
75 string => sub { my ($t)=@_;
76 sub{ref() or return $_;
77 local *args = \@_;
78 my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
79 return $_->[0] if $op =~ /^(qr|m)/
80 || !$op && ($ld1 eq '/' || $ld1 eq '?');
81 if (!$op || $op eq 'tr' || $op eq 'y') {
82 local *_ = \$str1;
83 $t->(@args);
84 }
85 if ($op =~ /^(tr|y|s)/) {
86 local *_ = \$str2;
87 $t->(@args);
88 }
89 my $result = "$pre$op$ld1$str1$rd1";
90 $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
91 $result .= "$str2$rd2$flg";
92 return $result;
93 };
94 },
95);
96
97
98sub gen_std_filter_for {
99 my ($type, $transform) = @_;
100 return sub { my (@pieces, $instr);
101 $DB::single=1;
102 for (extract_multiple($_,$extractor_for{$type})) {
103 if (ref()) { push @pieces, $_; $instr=0 }
104 elsif ($instr) { $pieces[-1] .= $_ }
105 else { push @pieces, $_; $instr=1 }
106 }
107 if ($type eq 'code') {
108 my $count = 0;
109 local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/;
110 my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
111 $_ = join "",
112 map { ref $_ ? $;.pack('N',$count++).$; : $_ }
113 @pieces;
114 @pieces = grep { ref $_ } @pieces;
115 $transform->(@_);
116 s/$extractor/${$pieces[unpack('N',$1)]}/g;
117 }
118 else {
119 my $selector = $selector_for{$type}->($transform);
120 $_ = join "", map $selector->(@_), @pieces;
121 }
122 }
123};
124
125sub FILTER_ONLY {
126 my $caller = caller;
127 while (@_ > 1) {
128 my ($what, $how) = splice(@_, 0, 2);
129 fail "Unknown selector: $what"
130 unless exists $extractor_for{$what};
131 fail "Filter for $what is not a subroutine reference"
132 unless ref $how eq 'CODE';
133 push @transforms, gen_std_filter_for($what,$how);
134 }
135 my $terminator = shift;
136
137 my $multitransform = sub {
138 foreach my $transform ( @transforms ) {
139 $transform->(@_);
140 }
141 };
142 no warnings 'redefine';
143 *{"${caller}::import"} =
144 gen_filter_import($caller,$multitransform,$terminator);
145 *{"${caller}::unimport"} = gen_filter_unimport($caller);
146}
147
148my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
149
150sub gen_filter_import {
151 my ($class, $filter, $terminator) = @_;
152 my %terminator;
153 my $prev_import = *{$class."::import"}{CODE};
154 return sub {
155 my ($imported_class, @args) = @_;
156 my $def_terminator =
157 qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/;
158 if (!defined $terminator) {
159 $terminator{terminator} = $def_terminator;
160 }
161 elsif (!ref $terminator || ref $terminator eq 'Regexp') {
162 $terminator{terminator} = $terminator;
163 }
164 elsif (ref $terminator ne 'HASH') {
165 croak "Terminator must be specified as scalar or hash ref"
166 }
167 elsif (!exists $terminator->{terminator}) {
168 $terminator{terminator} = $def_terminator;
169 }
170 filter_add(
171 sub {
172 my ($status, $lastline);
173 my $count = 0;
174 my $data = "";
175 while ($status = filter_read()) {
176 return $status if $status < 0;
177 if ($terminator{terminator} &&
178 m/$terminator{terminator}/) {
179 $lastline = $_;
180 last;
181 }
182 $data .= $_;
183 $count++;
184 $_ = "";
185 }
186 $_ = $data;
187 $filter->($imported_class, @args) unless $status < 0;
188 if (defined $lastline) {
189 if (defined $terminator{becomes}) {
190 $_ .= $terminator{becomes};
191 }
192 elsif ($lastline =~ $def_terminator) {
193 $_ .= $lastline;
194 }
195 }
196 return $count;
197 }
198 );
199 if ($prev_import) {
200 goto &$prev_import;
201 }
202 elsif ($class->isa('Exporter')) {
203 $class->export_to_level(1,@_);
204 }
205 }
206}
207
208sub gen_filter_unimport {
209 my ($class) = @_;
210 my $prev_unimport = *{$class."::unimport"}{CODE};
211 return sub {
212 filter_del();
213 goto &$prev_unimport if $prev_unimport;
214 }
215}
216
2171;
218
219__END__
220
221=head1 NAME
222
223Filter::Simple - Simplified source filtering
224
225
226=head1 SYNOPSIS
227
228 # in MyFilter.pm:
229
230 package MyFilter;
231
232 use Filter::Simple;
233
234 FILTER { ... };
235
236 # or just:
237 #
238 # use Filter::Simple sub { ... };
239
240 # in user's code:
241
242 use MyFilter;
243
244 # this code is filtered
245
246 no MyFilter;
247
248 # this code is not
249
250
251=head1 DESCRIPTION
252
253=head2 The Problem
254
255Source filtering is an immensely powerful feature of recent versions of Perl.
256It allows one to extend the language itself (e.g. the Switch module), to
257simplify the language (e.g. Language::Pythonesque), or to completely recast the
258language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
259the full power of Perl as its own, recursively applied, macro language.
260
261The excellent Filter::Util::Call module (by Paul Marquess) provides a
262usable Perl interface to source filtering, but it is often too powerful
263and not nearly as simple as it could be.
264
265To use the module it is necessary to do the following:
266
267=over 4
268
269=item 1.
270
271Download, build, and install the Filter::Util::Call module.
272(If you have Perl 5.7.1 or later, this is already done for you.)
273
274=item 2.
275
276Set up a module that does a C<use Filter::Util::Call>.
277
278=item 3.
279
280Within that module, create an C<import> subroutine.
281
282=item 4.
283
284Within the C<import> subroutine do a call to C<filter_add>, passing
285it either a subroutine reference.
286
287=item 5.
288
289Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
290to "prime" $_ with source code data from the source file that will
291C<use> your module. Check the status value returned to see if any
292source code was actually read in.
293
294=item 6.
295
296Process the contents of $_ to change the source code in the desired manner.
297
298=item 7.
299
300Return the status value.
301
302=item 8.
303
304If the act of unimporting your module (via a C<no>) should cause source
305code filtering to cease, create an C<unimport> subroutine, and have it call
306C<filter_del>. Make sure that the call to C<filter_read> or
307C<filter_read_exact> in step 5 will not accidentally read past the
308C<no>. Effectively this limits source code filters to line-by-line
309operation, unless the C<import> subroutine does some fancy
310pre-pre-parsing of the source code it's filtering.
311
312=back
313
314For example, here is a minimal source code filter in a module named
315BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
316to the sequence C<die 'BANG' if $BANG> in any piece of code following a
317C<use BANG;> statement (until the next C<no BANG;> statement, if any):
318
319 package BANG;
320
321 use Filter::Util::Call ;
322
323 sub import {
324 filter_add( sub {
325 my $caller = caller;
326 my ($status, $no_seen, $data);
327 while ($status = filter_read()) {
328 if (/^\s*no\s+$caller\s*;\s*?$/) {
329 $no_seen=1;
330 last;
331 }
332 $data .= $_;
333 $_ = "";
334 }
335 $_ = $data;
336 s/BANG\s+BANG/die 'BANG' if \$BANG/g
337 unless $status < 0;
338 $_ .= "no $class;\n" if $no_seen;
339 return 1;
340 })
341 }
342
343 sub unimport {
344 filter_del();
345 }
346
347 1 ;
348
349This level of sophistication puts filtering out of the reach of
350many programmers.
351
352
353=head2 A Solution
354
355The Filter::Simple module provides a simplified interface to
356Filter::Util::Call; one that is sufficient for most common cases.
357
358Instead of the above process, with Filter::Simple the task of setting up
359a source code filter is reduced to:
360
361=over 4
362
363=item 1.
364
365Download and install the Filter::Simple module.
366(If you have Perl 5.7.1 or later, this is already done for you.)
367
368=item 2.
369
370Set up a module that does a C<use Filter::Simple> and then
371calls C<FILTER { ... }>.
372
373=item 3.
374
375Within the anonymous subroutine or block that is passed to
376C<FILTER>, process the contents of $_ to change the source code in
377the desired manner.
378
379=back
380
381In other words, the previous example, would become:
382
383 package BANG;
384 use Filter::Simple;
385
386 FILTER {
387 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
388 };
389
390 1 ;
391
392Note that the source code is passed as a single string, so any regex that
393uses C<^> or C<$> to detect line boundaries will need the C</m> flag.
394
395=head2 Disabling or changing <no> behaviour
396
397By default, the installed filter only filters up to a line consisting of one of
398the three standard source "terminators":
399
400 no ModuleName; # optional comment
401
402or:
403
404 __END__
405
406or:
407
408 __DATA__
409
410but this can be altered by passing a second argument to C<use Filter::Simple>
411or C<FILTER> (just remember: there's I<no> comma after the initial block when
412you use C<FILTER>).
413
414That second argument may be either a C<qr>'d regular expression (which is then
415used to match the terminator line), or a defined false value (which indicates
416that no terminator line should be looked for), or a reference to a hash
417(in which case the terminator is the value associated with the key
418C<'terminator'>.
419
420For example, to cause the previous filter to filter only up to a line of the
421form:
422
423 GNAB esu;
424
425you would write:
426
427 package BANG;
428 use Filter::Simple;
429
430 FILTER {
431 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
432 }
433 qr/^\s*GNAB\s+esu\s*;\s*?$/;
434
435or:
436
437 FILTER {
438 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
439 }
440 { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
441
442and to prevent the filter's being turned off in any way:
443
444 package BANG;
445 use Filter::Simple;
446
447 FILTER {
448 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
449 }
450 ""; # or: 0
451
452or:
453
454 FILTER {
455 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
456 }
457 { terminator => "" };
458
459B<Note that, no matter what you set the terminator pattern to,
460the actual terminator itself I<must> be contained on a single source line.>
461
462
463=head2 All-in-one interface
464
465Separating the loading of Filter::Simple:
466
467 use Filter::Simple;
468
469from the setting up of the filtering:
470
471 FILTER { ... };
472
473is useful because it allows other code (typically parser support code
474or caching variables) to be defined before the filter is invoked.
475However, there is often no need for such a separation.
476
477In those cases, it is easier to just append the filtering subroutine and
478any terminator specification directly to the C<use> statement that loads
479Filter::Simple, like so:
480
481 use Filter::Simple sub {
482 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
483 };
484
485This is exactly the same as:
486
487 use Filter::Simple;
488 BEGIN {
489 Filter::Simple::FILTER {
490 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
491 };
492 }
493
494except that the C<FILTER> subroutine is not exported by Filter::Simple.
495
496
497=head2 Filtering only specific components of source code
498
499One of the problems with a filter like:
500
501 use Filter::Simple;
502
503 FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
504
505is that it indiscriminately applies the specified transformation to
506the entire text of your source program. So something like:
507
508 warn 'BANG BANG, YOU'RE DEAD';
509 BANG BANG;
510
511will become:
512
513 warn 'die 'BANG' if $BANG, YOU'RE DEAD';
514 die 'BANG' if $BANG;
515
516It is very common when filtering source to only want to apply the filter
517to the non-character-string parts of the code, or alternatively to I<only>
518the character strings.
519
520Filter::Simple supports this type of filtering by automatically
521exporting the C<FILTER_ONLY> subroutine.
522
523C<FILTER_ONLY> takes a sequence of specifiers that install separate
524(and possibly multiple) filters that act on only parts of the source code.
525For example:
526
527 use Filter::Simple;
528
529 FILTER_ONLY
530 code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
531 quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g };
532
533The C<"code"> subroutine will only be used to filter parts of the source
534code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
535subroutine only filters Perl quotelikes (including here documents).
536
537The full list of alternatives is:
538
539=over
540
541=item C<"code">
542
543Filters only those sections of the source code that are not quotelikes, POD, or
544C<__DATA__>.
545
546=item C<"executable">
547
548Filters only those sections of the source code that are not POD or C<__DATA__>.
549
550=item C<"quotelike">
551
552Filters only Perl quotelikes (as interpreted by
553C<&Text::Balanced::extract_quotelike>).
554
555=item C<"string">
556
557Filters only the string literal parts of a Perl quotelike (i.e. the
558contents of a string literal, either half of a C<tr///>, the second
559half of an C<s///>).
560
561=item C<"regex">
562
563Filters only the pattern literal parts of a Perl quotelike (i.e. the
564contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
565
566=item C<"all">
567
568Filters everything. Identical in effect to C<FILTER>.
569
570=back
571
572Except for C<< FILTER_ONLY code => sub {...} >>, each of
573the component filters is called repeatedly, once for each component
574found in the source code.
575
576Note that you can also apply two or more of the same type of filter in
577a single C<FILTER_ONLY>. For example, here's a simple
578macro-preprocessor that is only applied within regexes,
579with a final debugging pass that prints the resulting source code:
580
581 use Regexp::Common;
582 FILTER_ONLY
583 regex => sub { s/!\[/[^/g },
584 regex => sub { s/%d/$RE{num}{int}/g },
585 regex => sub { s/%f/$RE{num}{real}/g },
586 all => sub { print if $::DEBUG };
587
588
589
590=head2 Filtering only the code parts of source code
591
592Most source code ceases to be grammatically correct when it is broken up
593into the pieces between string literals and regexes. So the C<'code'>
594component filter behaves slightly differently from the other partial filters
595described in the previous section.
596
597Rather than calling the specified processor on each individual piece of
598code (i.e. on the bits between quotelikes), the C<'code'> partial filter
599operates on the entire source code, but with the quotelike bits
600"blanked out".
601
602That is, a C<'code'> filter I<replaces> each quoted string, quotelike,
603regex, POD, and __DATA__ section with a placeholder. The
604delimiters of this placeholder are the contents of the C<$;> variable
605at the time the filter is applied (normally C<"\034">). The remaining
606four bytes are a unique identifier for the component being replaced.
607
608This approach makes it comparatively easy to write code preprocessors
609without worrying about the form or contents of strings, regexes, etc.
610For convenience, during a C<'code'> filtering operation, Filter::Simple
611provides a package variable (C<$Filter::Simple::placeholder>) that contains
612a pre-compiled regex that matches any placeholder. Placeholders can be
613moved and re-ordered within the source code as needed.
614
615Once the filtering has been applied, the original strings, regexes,
616POD, etc. are re-inserted into the code, by replacing each
617placeholder with the corresponding original component.
618
619For example, the following filter detects concatentated pairs of
620strings/quotelikes and reverses the order in which they are
621concatenated:
622
623 package DemoRevCat;
624 use Filter::Simple;
625
626 FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder;
627 s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
628 };
629
630Thus, the following code:
631
632 use DemoRevCat;
633
634 my $str = "abc" . q(def);
635
636 print "$str\n";
637
638would become:
639
640 my $str = q(def)."abc";
641
642 print "$str\n";
643
644and hence print:
645
646 defabc
647
648
649=head2 Using Filter::Simple with an explicit C<import> subroutine
650
651Filter::Simple generates a special C<import> subroutine for
652your module (see L<"How it works">) which would normally replace any
653C<import> subroutine you might have explicitly declared.
654
655However, Filter::Simple is smart enough to notice your existing
656C<import> and Do The Right Thing with it.
657That is, if you explicitly define an C<import> subroutine in a package
658that's using Filter::Simple, that C<import> subroutine will still
659be invoked immediately after any filter you install.
660
661The only thing you have to remember is that the C<import> subroutine
662I<must> be declared I<before> the filter is installed. If you use C<FILTER>
663to install the filter:
664
665 package Filter::TurnItUpTo11;
666
667 use Filter::Simple;
668
669 FILTER { s/(\w+)/\U$1/ };
670
671that will almost never be a problem, but if you install a filtering
672subroutine by passing it directly to the C<use Filter::Simple>
673statement:
674
675 package Filter::TurnItUpTo11;
676
677 use Filter::Simple sub{ s/(\w+)/\U$1/ };
678
679then you must make sure that your C<import> subroutine appears before
680that C<use> statement.
681
682
683=head2 Using Filter::Simple and Exporter together
684
685Likewise, Filter::Simple is also smart enough
686to Do The Right Thing if you use Exporter:
687
688 package Switch;
689 use base Exporter;
690 use Filter::Simple;
691
692 @EXPORT = qw(switch case);
693 @EXPORT_OK = qw(given when);
694
695 FILTER { $_ = magic_Perl_filter($_) }
696
697Immediately after the filter has been applied to the source,
698Filter::Simple will pass control to Exporter, so it can do its magic too.
699
700Of course, here too, Filter::Simple has to know you're using Exporter
701before it applies the filter. That's almost never a problem, but if you're
702nervous about it, you can guarantee that things will work correctly by
703ensuring that your C<use base Exporter> always precedes your
704C<use Filter::Simple>.
705
706
707=head2 How it works
708
709The Filter::Simple module exports into the package that calls C<FILTER>
710(or C<use>s it directly) -- such as package "BANG" in the above example --
711two automagically constructed
712subroutines -- C<import> and C<unimport> -- which take care of all the
713nasty details.
714
715In addition, the generated C<import> subroutine passes its own argument
716list to the filtering subroutine, so the BANG.pm filter could easily
717be made parametric:
718
719 package BANG;
720
721 use Filter::Simple;
722
723 FILTER {
724 my ($die_msg, $var_name) = @_;
725 s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
726 };
727
728 # and in some user code:
729
730 use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
731
732
733The specified filtering subroutine is called every time a C<use BANG> is
734encountered, and passed all the source code following that call, up to
735either the next C<no BANG;> (or whatever terminator you've set) or the
736end of the source file, whichever occurs first. By default, any C<no
737BANG;> call must appear by itself on a separate line, or it is ignored.
738
739
740=head1 AUTHOR
741
742Damian Conway (damian@conway.org)
743
744=head1 COPYRIGHT
745
746 Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
747 This module is free software. It may be used, redistributed
748 and/or modified under the same terms as Perl itself.