Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | package HTML::Filter; |
2 | ||
3 | require HTML::Parser; | |
4 | @ISA=qw(HTML::Parser); | |
5 | ||
6 | $VERSION = sprintf("%d.%02d", q$Revision: 2.9 $ =~ /(\d+)\.(\d+)/); | |
7 | ||
8 | sub declaration { $_[0]->output("<!$_[1]>") } | |
9 | sub process { $_[0]->output($_[2]) } | |
10 | sub comment { $_[0]->output("<!--$_[1]-->") } | |
11 | sub start { $_[0]->output($_[4]) } | |
12 | sub end { $_[0]->output($_[2]) } | |
13 | sub text { $_[0]->output($_[1]) } | |
14 | ||
15 | sub output { print $_[1] } | |
16 | ||
17 | 1; | |
18 | ||
19 | __END__ | |
20 | ||
21 | =head1 NAME | |
22 | ||
23 | HTML::Filter - Filter HTML text through the parser | |
24 | ||
25 | =head1 NOTE | |
26 | ||
27 | This module is deprecated. C<HTML::Parser> now provides the | |
28 | functionally of C<HTML::Filter> much more efficiently with the the | |
29 | C<default> handler. | |
30 | ||
31 | =head1 SYNOPSIS | |
32 | ||
33 | require HTML::Filter; | |
34 | $p = HTML::Filter->new->parse_file("index.html"); | |
35 | ||
36 | =head1 DESCRIPTION | |
37 | ||
38 | C<HTML::Filter> is an HTML parser that by default prints the | |
39 | original text of each HTML element (a slow version of cat(1) basically). | |
40 | The callback methods may be overridden to modify the filtering for some | |
41 | HTML elements and you can override output() method which is called to | |
42 | print the HTML text. | |
43 | ||
44 | C<HTML::Filter> is a subclass of C<HTML::Parser>. This means that | |
45 | the document should be given to the parser by calling the $p->parse() | |
46 | or $p->parse_file() methods. | |
47 | ||
48 | =head1 EXAMPLES | |
49 | ||
50 | The first example is a filter that will remove all comments from an | |
51 | HTML file. This is achieved by simply overriding the comment method | |
52 | to do nothing. | |
53 | ||
54 | package CommentStripper; | |
55 | require HTML::Filter; | |
56 | @ISA=qw(HTML::Filter); | |
57 | sub comment { } # ignore comments | |
58 | ||
59 | The second example shows a filter that will remove any E<lt>TABLE>s | |
60 | found in the HTML file. We specialize the start() and end() methods | |
61 | to count table tags and then make output not happen when inside a | |
62 | table. | |
63 | ||
64 | package TableStripper; | |
65 | require HTML::Filter; | |
66 | @ISA=qw(HTML::Filter); | |
67 | sub start | |
68 | { | |
69 | my $self = shift; | |
70 | $self->{table_seen}++ if $_[0] eq "table"; | |
71 | $self->SUPER::start(@_); | |
72 | } | |
73 | ||
74 | sub end | |
75 | { | |
76 | my $self = shift; | |
77 | $self->SUPER::end(@_); | |
78 | $self->{table_seen}-- if $_[0] eq "table"; | |
79 | } | |
80 | ||
81 | sub output | |
82 | { | |
83 | my $self = shift; | |
84 | unless ($self->{table_seen}) { | |
85 | $self->SUPER::output(@_); | |
86 | } | |
87 | } | |
88 | ||
89 | If you want to collect the parsed text internally you might want to do | |
90 | something like this: | |
91 | ||
92 | package FilterIntoString; | |
93 | require HTML::Filter; | |
94 | @ISA=qw(HTML::Filter); | |
95 | sub output { push(@{$_[0]->{fhtml}}, $_[1]) } | |
96 | sub filtered_html { join("", @{$_[0]->{fhtml}}) } | |
97 | ||
98 | =head1 SEE ALSO | |
99 | ||
100 | L<HTML::Parser> | |
101 | ||
102 | =head1 COPYRIGHT | |
103 | ||
104 | Copyright 1997-1999 Gisle Aas. | |
105 | ||
106 | This library is free software; you can redistribute it and/or | |
107 | modify it under the same terms as Perl itself. | |
108 | ||
109 | =cut |