Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / sun4-solaris / HTML / PullParser.pm
CommitLineData
86530b38
AT
1package HTML::PullParser;
2
3# $Id: PullParser.pm,v 2.6 2001/04/02 23:26:18 gisle Exp $
4
5require HTML::Parser;
6@ISA=qw(HTML::Parser);
7$VERSION = sprintf("%d.%02d", q$Revision: 2.6 $ =~ /(\d+)\.(\d+)/);
8
9use strict;
10use Carp ();
11
12sub new
13{
14 my($class, %cnf) = @_;
15
16 # Construct argspecs for the various events
17 my %argspec;
18 for (qw(start end text declaration comment process default)) {
19 my $tmp = delete $cnf{$_};
20 next unless defined $tmp;
21 $argspec{$_} = $tmp;
22 }
23 Carp::croak("Info not collected for any events")
24 unless %argspec;
25
26 my $file = delete $cnf{file};
27 my $doc = delete $cnf{doc};
28 Carp::croak("Can't parse from both 'doc' and 'file' at the same time")
29 if defined($file) && defined($doc);
30 Carp::croak("No 'doc' or 'file' given to parse from")
31 unless defined($file) || defined($doc);
32
33 # Create object
34 $cnf{api_version} = 3;
35 my $self = $class->SUPER::new(%cnf);
36
37 my $accum = $self->{pullparser_accum} = [];
38 while (my($event, $argspec) = each %argspec) {
39 $self->SUPER::handler($event => $accum, $argspec);
40 }
41
42 if (defined $doc) {
43 $self->{pullparser_str_ref} = ref($doc) ? $doc : \$doc;
44 $self->{pullparser_str_pos} = 0;
45 }
46 else {
47 if (!ref($file) && ref(\$file) ne "GLOB") {
48 require IO::File;
49 $file = IO::File->new($file, "r") || return;
50 }
51
52 $self->{pullparser_file} = $file;
53 }
54 $self;
55}
56
57
58sub handler
59{
60 Carp::croak("Can't set handlers for HTML::PullParser");
61}
62
63
64sub get_token
65{
66 my $self = shift;
67 while (!@{$self->{pullparser_accum}} && !$self->{pullparser_eof}) {
68 if (my $f = $self->{pullparser_file}) {
69 # must try to parse more from the file
70 my $buf;
71 if (read($f, $buf, 512)) {
72 $self->parse($buf);
73 } else {
74 $self->eof;
75 $self->{pullparser_eof}++;
76 delete $self->{pullparser_file};
77 }
78 }
79 elsif (my $sref = $self->{pullparser_str_ref}) {
80 # must try to parse more from the scalar
81 my $pos = $self->{pullparser_str_pos};
82 my $chunk = substr($$sref, $pos, 512);
83 $self->parse($chunk);
84 $pos += length($chunk);
85 if ($pos < length($$sref)) {
86 $self->{pullparser_str_pos} = $pos;
87 }
88 else {
89 $self->eof;
90 $self->{pullparser_eof}++;
91 delete $self->{pullparser_str_ref};
92 delete $self->{pullparser_str_pos};
93 }
94 }
95 else {
96 die;
97 }
98 }
99 shift @{$self->{pullparser_accum}};
100}
101
102
103sub unget_token
104{
105 my $self = shift;
106 unshift @{$self->{pullparser_accum}}, @_;
107 $self;
108}
109
1101;
111
112
113__END__
114
115=head1 NAME
116
117HTML::PullParser - Alternative HTML::Parser interface
118
119=head1 SYNOPSIS
120
121 use HTML::PullParser;
122
123 $p = HTML::PullParser->new(file => "index.html",
124 start => 'event, tagname, @attr',
125 end => 'event, tagname',
126 ignore_elements => [qw(script style)],
127 ) || die "Can't open: $!";
128 while (my $token = $p->get_token) {
129 #...do something with $token
130 }
131
132=head1 DESCRIPTION
133
134The HTML::PullParser is an alternative interface to the HTML::Parser class.
135It basically turns the HTML::Parser inside out. You associate a file
136(or any IO::Handle object or string) with the parser at construction time and
137then repeatedly call $parser->get_token to obtain the tags and text
138found in the parsed document.
139
140The following methods are provided:
141
142=over 4
143
144=item $p = HTML::PullParser->new( file => $file, %options )
145
146=item $p = HTML::PullParser->new( doc => \$doc, %options )
147
148A C<HTML::PullParser> can be made to parse from either a file or a
149literal document based on whether the C<file> or C<doc> option is
150passed to the parser's constructor.
151
152The C<file> passed in can either be a file name or a file handle
153object. If a file name is passed, and it can't be opened for reading,
154then the constructor will return an undefined value and $! will tell
155you why it failed. Otherwise the argument is taken to be some object
156that the C<HTML::PullParser> can read() from when it needs more data.
157The stream will be read() until EOF, but not closed.
158
159A C<doc> can be passed plain or as a reference
160to a scalar. If a reference is passed then the value of this scalar
161should not be changed before all tokens have been extracted.
162
163Next the information to be returned for the different token types must
164be set up. This is done by simply assosiating an argspec (as defined
165in L<HTML::Parser>) with the events you have an interrest in. For
166instance, if you want C<start> tokens to be reported as the string
167C<'S'> followed by the tagname and the attributes you might pass an
168C<start>-option like this:
169
170 $p = HTML::Parser-New( doc => $doc_to_parse,
171 start => '"S", tagname, @attr',
172 end => '"E", tagname',
173 );
174
175At last other C<HTML::Parser> options, like C<ignore_tags>, and
176C<unbroken_text>, can be passed in. Note that you should not use the
177I<event>_h options to set up parser handlers.
178
179=item $token = $p->get_token
180
181This method will return the next I<token> found in the HTML document,
182or C<undef> at the end of the document. The token is usually returned
183as an array reference. The content of this array match the argspec
184set up during C<HTML::PullParser> construction.
185
186=item $p->unget_token($token,...)
187
188If you find out you have read too many tokens you can push them back,
189so that they are returned again the next time $p->get_token is called.
190
191=head1 EXAMPLES
192
193The 'eg/hform' script shows how we might parse the form section of
194HTML::Documents using HTML::PullParser.
195
196=head1 SEE ALSO
197
198L<HTML::Parser>, L<HTML::TokeParser>
199
200=head1 COPYRIGHT
201
202Copyright 1998-2001 Gisle Aas.
203
204This library is free software; you can redistribute it and/or
205modify it under the same terms as Perl itself.
206
207=cut