Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / sun4-solaris / HTML / HeadParser.pm
CommitLineData
86530b38
AT
1package HTML::HeadParser;
2
3=head1 NAME
4
5HTML::HeadParser - Parse <HEAD> section of a HTML document
6
7=head1 SYNOPSIS
8
9 require HTML::HeadParser;
10 $p = HTML::HeadParser->new;
11 $p->parse($text) and print "not finished";
12
13 $p->header('Title') # to access <title>....</title>
14 $p->header('Content-Base') # to access <base href="http://...">
15 $p->header('Foo') # to access <meta http-equiv="Foo" content="...">
16
17=head1 DESCRIPTION
18
19The I<HTML::HeadParser> is a specialized (and lightweight)
20I<HTML::Parser> that will only parse the E<lt>HEAD>...E<lt>/HEAD>
21section of an HTML document. The parse() method
22will return a FALSE value as soon as some E<lt>BODY> element or body
23text are found, and should not be called again after this.
24
25The I<HTML::HeadParser> keeps a reference to a header object, and the
26parser will update this header object as the various elements of the
27E<lt>HEAD> section of the HTML document are recognized. The following
28header fields are affected:
29
30=over 4
31
32=item Content-Base:
33
34The I<Content-Base> header is initialized from the E<lt>base
35href="..."> element.
36
37=item Title:
38
39The I<Title> header is initialized from the E<lt>title>...E<lt>/title>
40element.
41
42=item Isindex:
43
44The I<Isindex> header will be added if there is a E<lt>isindex>
45element in the E<lt>head>. The header value is initialized from the
46I<prompt> attribute if it is present. If no I<prompt> attribute is
47given it will have '?' as the value.
48
49=item X-Meta-Foo:
50
51All E<lt>meta> elements will initialize headers with the prefix
52"C<X-Meta->" on the name. If the E<lt>meta> element contains a
53C<http-equiv> attribute, then it will be honored as the header name.
54
55=back
56
57=head1 METHODS
58
59The following methods (in addition to those provided by the
60superclass) are available:
61
62=over 4
63
64=cut
65
66
67require HTML::Parser;
68@ISA = qw(HTML::Parser);
69
70use HTML::Entities ();
71
72use strict;
73use vars qw($VERSION $DEBUG);
74#$DEBUG = 1;
75$VERSION = sprintf("%d.%02d", q$Revision: 2.17 $ =~ /(\d+)\.(\d+)/);
76
77=item $hp = HTML::HeadParser->new( [$header] )
78
79The object constructor. The optional $header argument should be a
80reference to an object that implement the header() and push_header()
81methods as defined by the I<HTTP::Headers> class. Normally it will be
82of some class that isa or delegates to the I<HTTP::Headers> class.
83
84If no $header is given I<HTML::HeadParser> will create an
85I<HTTP::Header> object by itself (initially empty).
86
87=cut
88
89sub new
90{
91 my($class, $header) = @_;
92 unless ($header) {
93 require HTTP::Headers;
94 $header = HTTP::Headers->new;
95 }
96
97 my $self = $class->SUPER::new(api_version => 2,
98 ignore_elements => [qw(script style)],
99 );
100 $self->{'header'} = $header;
101 $self->{'tag'} = ''; # name of active element that takes textual content
102 $self->{'text'} = ''; # the accumulated text associated with the element
103 $self;
104}
105
106=item $hp->header;
107
108Returns a reference to the header object.
109
110=item $hp->header( $key )
111
112Returns a header value. It is just a shorter way to write
113C<$hp-E<gt>header-E<gt>header($key)>.
114
115=cut
116
117sub header
118{
119 my $self = shift;
120 return $self->{'header'} unless @_;
121 $self->{'header'}->header(@_);
122}
123
124sub as_string # legacy
125{
126 my $self = shift;
127 $self->{'header'}->as_string;
128}
129
130sub flush_text # internal
131{
132 my $self = shift;
133 my $tag = $self->{'tag'};
134 my $text = $self->{'text'};
135 $text =~ s/^\s+//;
136 $text =~ s/\s+$//;
137 $text =~ s/\s+/ /g;
138 print "FLUSH $tag => '$text'\n" if $DEBUG;
139 if ($tag eq 'title') {
140 HTML::Entities::decode($text);
141 $self->{'header'}->header(Title => $text);
142 }
143 $self->{'tag'} = $self->{'text'} = '';
144}
145
146# This is an quote from the HTML3.2 DTD which shows which elements
147# that might be present in a <HEAD>...</HEAD>. Also note that the
148# <HEAD> tags themselves might be missing:
149#
150# <!ENTITY % head.content "TITLE & ISINDEX? & BASE? & STYLE? &
151# SCRIPT* & META* & LINK*">
152#
153# <!ELEMENT HEAD O O (%head.content)>
154
155
156sub start
157{
158 my($self, $tag, $attr) = @_; # $attr is reference to a HASH
159 print "START[$tag]\n" if $DEBUG;
160 $self->flush_text if $self->{'tag'};
161 if ($tag eq 'meta') {
162 my $key = $attr->{'http-equiv'};
163 if (!defined($key) || !length($key)) {
164 return unless $attr->{'name'};
165 $key = "X-Meta-\u$attr->{'name'}";
166 }
167 $self->{'header'}->push_header($key => $attr->{content});
168 } elsif ($tag eq 'base') {
169 return unless exists $attr->{href};
170 $self->{'header'}->header('Content-Base' => $attr->{href});
171 } elsif ($tag eq 'isindex') {
172 # This is a non-standard header. Perhaps we should just ignore
173 # this element
174 $self->{'header'}->header(Isindex => $attr->{prompt} || '?');
175 } elsif ($tag =~ /^(?:title|script|style)$/) {
176 # Just remember tag. Initialize header when we see the end tag.
177 $self->{'tag'} = $tag;
178 } elsif ($tag eq 'link') {
179 return unless exists $attr->{href};
180 # <link href="http:..." rel="xxx" rev="xxx" title="xxx">
181 my $h_val = "<" . delete($attr->{href}) . ">";
182 for (sort keys %{$attr}) {
183 $h_val .= qq(; $_="$attr->{$_}");
184 }
185 $self->{'header'}->push_header(Link => $h_val);
186 } elsif ($tag eq 'head' || $tag eq 'html') {
187 # ignore
188 } else {
189 # stop parsing
190 $self->eof;
191 }
192}
193
194sub end
195{
196 my($self, $tag) = @_;
197 print "END[$tag]\n" if $DEBUG;
198 $self->flush_text if $self->{'tag'};
199 $self->eof if $tag eq 'head';
200}
201
202sub text
203{
204 my($self, $text) = @_;
205 print "TEXT[$text]\n" if $DEBUG;
206 my $tag = $self->{tag};
207 if (!$tag && $text =~ /\S/) {
208 # Normal text means start of body
209 $self->eof;
210 return;
211 }
212 return if $tag ne 'title';
213 $self->{'text'} .= $text;
214}
215
2161;
217
218__END__
219
220=head1 EXAMPLE
221
222 $h = HTTP::Headers->new;
223 $p = HTML::HeadParser->new($h);
224 $p->parse(<<EOT);
225 <title>Stupid example</title>
226 <base href="http://www.linpro.no/lwp/">
227 Normal text starts here.
228 EOT
229 undef $p;
230 print $h->title; # should print "Stupid example"
231
232=head1 SEE ALSO
233
234L<HTML::Parser>, L<HTTP::Headers>
235
236The I<HTTP::Headers> class is distributed as part of the I<libwww-perl>
237package.
238
239=head1 COPYRIGHT
240
241Copyright 1996-2001 Gisle Aas. All rights reserved.
242
243This library is free software; you can redistribute it and/or
244modify it under the same terms as Perl itself.
245
246=cut
247