[OpenSPARC-T2-DV] / tools / perl-5.8.0 / man / man3 / HTML::TokeParser.3

.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sh \" Subsection heading
.br
.if t .Sp
.ne 5
.PP
\fB\\$1\fR
.PP
..
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  | will give a
.\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
.\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
.\" expand to `' in nroff, nothing in troff, for use with C<>.
.tr \(*W-|\(bv\*(Tr
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
'br\}
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.if \nF \{\
.    de IX
.    tm Index:\\$1\t\\n%\t"\\$2"
..
.    nr % 0
.    rr F
.\}
.\"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.hy 0
.if n .na
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "HTML::TokeParser 3"
.TH HTML::TokeParser 3 "2001-04-10" "perl v5.8.0" "User Contributed Perl Documentation"
.SH "NAME"
HTML::TokeParser \- Alternative HTML::Parser interface
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 5
\& require HTML::TokeParser;
\& $p = HTML::TokeParser->new("index.html") || die "Can't open: $!";
\& while (my $token = $p->get_token) {
\&     #...
\& }
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
The \f(CW\*(C`HTML::TokeParser\*(C'\fR is an alternative interface to the
\&\f(CW\*(C`HTML::Parser\*(C'\fR class.  It is an \f(CW\*(C`HTML::PullParser\*(C'\fR subclass.
.PP
The following methods are available:
.ie n .IP "$p = HTML::TokeParser\->new( $file_or_doc );" 4
.el .IP "$p = HTML::TokeParser\->new( \f(CW$file_or_doc\fR );" 4
.IX Item "$p = HTML::TokeParser->new( $file_or_doc );"
The object constructor argument is either a file name, a file handle
object, or the complete document to be parsed.
.Sp
If the argument is a plain scalar, then it is taken as the name of a
file to be opened and parsed.  If the file can't be opened for
reading, then the constructor will return an undefined value and $!
will tell you why it failed.
.Sp
If the argument is a reference to a plain scalar, then this scalar is
taken to be the literal document to parse.  The value of this
scalar should not be changed before all tokens have been extracted.
.Sp
Otherwise the argument is taken to be some object that the
\&\f(CW\*(C`HTML::TokeParser\*(C'\fR can \fIread()\fR from when it needs more data.  Typically
it will be a filehandle of some kind.  The stream will be \fIread()\fR until
\&\s-1EOF\s0, but not closed.
.IP "$p\->get_token" 4
.IX Item "$p->get_token"
This method will return the next \fItoken\fR found in the \s-1HTML\s0 document,
or \f(CW\*(C`undef\*(C'\fR at the end of the document.  The token is returned as an
array reference.  The first element of the array will be a (mostly)
single character string denoting the type of this token: \*(L"S\*(R" for start
tag, \*(L"E\*(R" for end tag, \*(L"T\*(R" for text, \*(L"C\*(R" for comment, \*(L"D\*(R" for
declaration, and \*(L"\s-1PI\s0\*(R" for process instructions.  The rest of the array
is the same as the arguments passed to the corresponding HTML::Parser
v2 compatible callbacks (see HTML::Parser).  In summary, returned
tokens look like this:
.Sp
.Vb 6
\&  ["S",  $tag, $attr, $attrseq, $text]
\&  ["E",  $tag, $text]
\&  ["T",  $text, $is_data]
\&  ["C",  $text]
\&  ["D",  $text]
\&  ["PI", $token0, $text]
.Ve
.Sp
where \f(CW$attr\fR is a hash reference, \f(CW$attrseq\fR is an array reference and
the rest is plain scalars.
.IP "$p\->unget_token($token,...)" 4
.IX Item "$p->unget_token($token,...)"
If you find out you have read too many tokens you can push them back,
so that they are returned the next time \f(CW$p\fR\->get_token is called.
.IP "$p\->get_tag( [$tag, ...] )" 4
.IX Item "$p->get_tag( [$tag, ...] )"
This method returns the next start or end tag (skipping any other
tokens), or \f(CW\*(C`undef\*(C'\fR if there are no more tags in the document.  If
one or more arguments are given, then we skip tokens until one of the
specified tag types is found.  For example:
.Sp
.Vb 1
\&   $p->get_tag("font", "/font");
.Ve
.Sp
will find the next start or end tag for a font\-element.
.Sp
The tag information is returned as an array reference in the same form
as for \f(CW$p\fR\->get_token above, but the type code (first element) is
missing. A start tag will be returned like this:
.Sp
.Vb 1
\&  [$tag, $attr, $attrseq, $text]
.Ve
.Sp
The tagname of end tags are prefixed with \*(L"/\*(R", i.e. end tag is
returned like this:
.Sp
.Vb 1
\&  ["/$tag", $text]
.Ve
.IP "$p\->get_text( [$endtag] )" 4
.IX Item "$p->get_text( [$endtag] )"
This method returns all text found at the current position. It will
return a zero length string if the next token is not text.  The
optional \f(CW$endtag\fR argument specifies that any text occurring before the
given tag is to be returned.  Any entities will be converted to their
corresponding character.
.Sp
The \f(CW$p\fR\->{textify} attribute is a hash that defines how certain tags can
be treated as text.  If the name of a start tag matches a key in this
hash then this tag is converted to text.  The hash value is used to
specify which tag attribute to obtain the text from.  If this tag
attribute is missing, then the upper case name of the tag enclosed in
brackets is returned, e.g. \*(L"[\s-1IMG\s0]\*(R".  The hash value can also be a
subroutine reference.  In this case the routine is called with the
start tag token content as its argument and the return value is treated
as the text.
.Sp
The default \f(CW$p\fR\->{textify} value is:
.Sp
.Vb 1
\&  {img => "alt", applet => "alt"}
.Ve
.Sp
This means that <\s-1IMG\s0> and <\s-1APPLET\s0> tags are treated as text, and that
the text to substitute can be found in the \s-1ALT\s0 attribute.
.IP "$p\->get_trimmed_text( [$endtag] )" 4
.IX Item "$p->get_trimmed_text( [$endtag] )"
Same as \f(CW$p\fR\->get_text above, but will collapse any sequences of white
space to a single space character.  Leading and trailing white space is
removed.
.SH "EXAMPLES"
.IX Header "EXAMPLES"
This example extracts all links from a document.  It will print one
line for each link, containing the \s-1URL\s0 and the textual description
between the <A>...</A> tags:
.PP
.Vb 2
\&  use HTML::TokeParser;
\&  $p = HTML::TokeParser->new(shift||"index.html");
.Ve
.PP
.Vb 5
\&  while (my $token = $p->get_tag("a")) {
\&      my $url = $token->[1]{href} || "-";
\&      my $text = $p->get_trimmed_text("/a");
\&      print "$url\et$text\en";
\&  }
.Ve
.PP
This example extract the <\s-1TITLE\s0> from the document:
.PP
.Vb 6
\&  use HTML::TokeParser;
\&  $p = HTML::TokeParser->new(shift||"index.html");
\&  if ($p->get_tag("title")) {
\&      my $title = $p->get_trimmed_text;
\&      print "Title: $title\en";
\&  }
.Ve
.SH "SEE ALSO"
.IX Header "SEE ALSO"
HTML::PullParser, HTML::Parser
.SH "COPYRIGHT"
.IX Header "COPYRIGHT"
Copyright 1998\-2001 Gisle Aas.
.PP
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
Commit	Line	Data
86530b38 AT	1	.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
	2	.\"
	3	.\" Standard preamble:
	4	.\" ========================================================================
	5	.de Sh \" Subsection heading
	6	.br
	7	.if t .Sp
	8	.ne 5
	9	.PP
	10	\fB\\$1\fR
	11	.PP
	12	..
	13	.de Sp \" Vertical space (when we can't use .PP)
	14	.if t .sp .5v
	15	.if n .sp
	16	..
	17	.de Vb \" Begin verbatim text
	18	.ft CW
	19	.nf
	20	.ne \\$1
	21	..
	22	.de Ve \" End verbatim text
	23	.ft R
	24	.fi
	25	..
	26	.\" Set up some character translations and predefined strings. \*(-- will
	27	.\" give an unbreakable dash, \(PI will give pi, \(L" will give a left
	28	.\" double quote, and \*(R" will give a right double quote. \| will give a
	29	.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
	30	.\" do unbreakable dashes and therefore won't be available. \(C` and \(C'
	31	.\" expand to `' in nroff, nothing in troff, for use with C<>.
	32	.tr \(W-\|\(bv\(Tr
	33	.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
	34	.ie n \{\
	35	. ds -- \(*W-
	36	. ds PI pi
	37	. if (\n(.H=4u)&(1m=24u) .ds -- \(W\h'-12u'\(W\h'-12u'-\" diablo 10 pitch
	38	. if (\n(.H=4u)&(1m=20u) .ds -- \(W\h'-12u'\(W\h'-8u'-\" diablo 12 pitch
	39	. ds L" ""
	40	. ds R" ""
	41	. ds C` ""
	42	. ds C' ""
	43	'br\}
	44	.el\{\
	45	. ds -- \\|\(em\\|
	46	. ds PI \(*p
	47	. ds L" ``
	48	. ds R" ''
	49	'br\}
	50	.\"
	51	.\" If the F register is turned on, we'll generate index entries on stderr for
	52	.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
	53	.\" entries marked with X<> in POD. Of course, you'll have to process the
	54	.\" output yourself in some meaningful fashion.
	55	.if \nF \{\
	56	. de IX
	57	. tm Index:\\$1\t\\n%\t"\\$2"
	58	..
	59	. nr % 0
	60	. rr F
	61	.\}
	62	.\"
	63	.\" For nroff, turn off justification. Always turn off hyphenation; it makes
	64	.\" way too many mistakes in technical documents.
65	.hy 0
66	.if n .na
67	.\"
68	.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69	.\" Fear. Run. Save yourself. No user-serviceable parts.
70	. \" fudge factors for nroff and troff
71	.if n \{\
72	. ds #H 0
73	. ds #V .8m
74	. ds #F .3m
75	. ds #[ \f1
76	. ds #] \fP
77	.\}
78	.if t \{\
79	. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80	. ds #V .6m
81	. ds #F 0
82	. ds #[ \&
83	. ds #] \&
84	.\}
85	. \" simple accents for nroff and troff
86	.if n \{\
87	. ds ' \&
88	. ds ` \&
89	. ds ^ \&
90	. ds , \&
91	. ds ~ ~
92	. ds /
93	.\}
94	.if t \{\
95	. ds ' \\k:\h'-(\\n(.wu8/10-\(#H)'\'\h"\|\\n:u"
96	. ds ` \\k:\h'-(\\n(.wu8/10-\(#H)'\`\h'\|\\n:u'
97	. ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'^\h'\|\\n:u'
98	. ds , \\k:\h'-(\\n(.wu*8/10)',\h'\|\\n:u'
99	. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'\|\\n:u'
100	. ds / \\k:\h'-(\\n(.wu8/10-\(#H)'\z\(sl\h'\|\\n:u'
101	.\}
102	. \" troff and (daisy-wheel) nroff accents
103	.ds : \\k:\h'-(\\n(.wu8/10-\(#H+.1m+\(#F)'\v'-\(#V'\z.\h'.2m+\(#F'.\h'\|\\n:u'\v'\(#V'
104	.ds 8 \h'\(#H'\(b\h'-\*(#H'
105	.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\(#H)/2u'\v'-.3n'\(#[\z\(de\v'.3n'\h'\|\\n:u'\*(#]
106	.ds d- \h'\(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\(#H'
107	.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'\|\\n:u'
108	.ds th \(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u2/3)'\s-1o\s+1\*(#]
109	.ds Th \(#[\s+2I\s-2\h'-\w'I'u3/5'\v'-.3m'o\v'.3m'\*(#]
110	.ds ae a\h'-(\w'a'u*4/10)'e
111	.ds Ae A\h'-(\w'A'u*4/10)'E
112	. \" corrections for vroff
113	.if v .ds ~ \\k:\h'-(\\n(.wu9/10-\(#H)'\s-2\u~\d\s+2\h'\|\\n:u'
114	.if v .ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'\v'-.4m'^\v'.4m'\h'\|\\n:u'
115	. \" for low resolution devices (crt and lpr)
116	.if \n(.H>23 .if \n(.V>19 \
117	\{\
118	. ds : e
119	. ds 8 ss
120	. ds o a
121	. ds d- d\h'-1'\(ga
122	. ds D- D\h'-1'\(hy
123	. ds th \o'bp'
124	. ds Th \o'LP'
125	. ds ae ae
126	. ds Ae AE
127	.\}
128	.rm #[ #] #H #V #F C
129	.\" ========================================================================
130	.\"
131	.IX Title "HTML::TokeParser 3"
132	.TH HTML::TokeParser 3 "2001-04-10" "perl v5.8.0" "User Contributed Perl Documentation"
133	.SH "NAME"
134	HTML::TokeParser \- Alternative HTML::Parser interface
135	.SH "SYNOPSIS"
136	.IX Header "SYNOPSIS"
137	.Vb 5
138	\& require HTML::TokeParser;
139	\& $p = HTML::TokeParser->new("index.html") \|\| die "Can't open: $!";
140	\& while (my $token = $p->get_token) {
141	\& #...
142	\& }
143	.Ve
144	.SH "DESCRIPTION"
145	.IX Header "DESCRIPTION"
146	The \f(CW\(C`HTML::TokeParser\(C'\fR is an alternative interface to the
147	\&\f(CW\(C`HTML::Parser\(C'\fR class. It is an \f(CW\(C`HTML::PullParser\(C'\fR subclass.
148	.PP
149	The following methods are available:
150	.ie n .IP "$p = HTML::TokeParser\->new( $file_or_doc );" 4
151	.el .IP "$p = HTML::TokeParser\->new( \f(CW$file_or_doc\fR );" 4
152	.IX Item "$p = HTML::TokeParser->new( $file_or_doc );"
153	The object constructor argument is either a file name, a file handle
154	object, or the complete document to be parsed.
155	.Sp
156	If the argument is a plain scalar, then it is taken as the name of a
157	file to be opened and parsed. If the file can't be opened for
158	reading, then the constructor will return an undefined value and $!
159	will tell you why it failed.
160	.Sp
161	If the argument is a reference to a plain scalar, then this scalar is
162	taken to be the literal document to parse. The value of this
163	scalar should not be changed before all tokens have been extracted.
164	.Sp
165	Otherwise the argument is taken to be some object that the
166	\&\f(CW\(C`HTML::TokeParser\(C'\fR can \fIread()\fR from when it needs more data. Typically
167	it will be a filehandle of some kind. The stream will be \fIread()\fR until
168	\&\s-1EOF\s0, but not closed.
169	.IP "$p\->get_token" 4
170	.IX Item "$p->get_token"
171	This method will return the next \fItoken\fR found in the \s-1HTML\s0 document,
172	or \f(CW\(C`undef\(C'\fR at the end of the document. The token is returned as an
173	array reference. The first element of the array will be a (mostly)
174	single character string denoting the type of this token: \(L"S\(R" for start
175	tag, \(L"E\(R" for end tag, \(L"T\(R" for text, \(L"C\(R" for comment, \(L"D\(R" for
176	declaration, and \(L"\s-1PI\s0\(R" for process instructions. The rest of the array
177	is the same as the arguments passed to the corresponding HTML::Parser
178	v2 compatible callbacks (see HTML::Parser). In summary, returned
179	tokens look like this:
180	.Sp
181	.Vb 6
182	\& ["S", $tag, $attr, $attrseq, $text]
183	\& ["E", $tag, $text]
184	\& ["T", $text, $is_data]
185	\& ["C", $text]
186	\& ["D", $text]
187	\& ["PI", $token0, $text]
188	.Ve
189	.Sp
190	where \f(CW$attr\fR is a hash reference, \f(CW$attrseq\fR is an array reference and
191	the rest is plain scalars.
192	.IP "$p\->unget_token($token,...)" 4
193	.IX Item "$p->unget_token($token,...)"
194	If you find out you have read too many tokens you can push them back,
195	so that they are returned the next time \f(CW$p\fR\->get_token is called.
196	.IP "$p\->get_tag( [$tag, ...] )" 4
197	.IX Item "$p->get_tag( [$tag, ...] )"
198	This method returns the next start or end tag (skipping any other
199	tokens), or \f(CW\(C`undef\(C'\fR if there are no more tags in the document. If
200	one or more arguments are given, then we skip tokens until one of the
201	specified tag types is found. For example:
202	.Sp
203	.Vb 1
204	\& $p->get_tag("font", "/font");
205	.Ve
206	.Sp
207	will find the next start or end tag for a font\-element.
208	.Sp
209	The tag information is returned as an array reference in the same form
210	as for \f(CW$p\fR\->get_token above, but the type code (first element) is
211	missing. A start tag will be returned like this:
212	.Sp
213	.Vb 1
214	\& [$tag, $attr, $attrseq, $text]
215	.Ve
216	.Sp
217	The tagname of end tags are prefixed with \(L"/\(R", i.e. end tag is
218	returned like this:
219	.Sp
220	.Vb 1
221	\& ["/$tag", $text]
222	.Ve
223	.IP "$p\->get_text( [$endtag] )" 4
224	.IX Item "$p->get_text( [$endtag] )"
225	This method returns all text found at the current position. It will
226	return a zero length string if the next token is not text. The
227	optional \f(CW$endtag\fR argument specifies that any text occurring before the
228	given tag is to be returned. Any entities will be converted to their
229	corresponding character.
230	.Sp
231	The \f(CW$p\fR\->{textify} attribute is a hash that defines how certain tags can
232	be treated as text. If the name of a start tag matches a key in this
233	hash then this tag is converted to text. The hash value is used to
234	specify which tag attribute to obtain the text from. If this tag
235	attribute is missing, then the upper case name of the tag enclosed in
236	brackets is returned, e.g. \(L"[\s-1IMG\s0]\(R". The hash value can also be a
237	subroutine reference. In this case the routine is called with the
238	start tag token content as its argument and the return value is treated
239	as the text.
240	.Sp
241	The default \f(CW$p\fR\->{textify} value is:
242	.Sp
243	.Vb 1
244	\& {img => "alt", applet => "alt"}
245	.Ve
246	.Sp
247	This means that <\s-1IMG\s0> and <\s-1APPLET\s0> tags are treated as text, and that
248	the text to substitute can be found in the \s-1ALT\s0 attribute.
249	.IP "$p\->get_trimmed_text( [$endtag] )" 4
250	.IX Item "$p->get_trimmed_text( [$endtag] )"
251	Same as \f(CW$p\fR\->get_text above, but will collapse any sequences of white
252	space to a single space character. Leading and trailing white space is
253	removed.
254	.SH "EXAMPLES"
255	.IX Header "EXAMPLES"
256	This example extracts all links from a document. It will print one
257	line for each link, containing the \s-1URL\s0 and the textual description
258	between the <A>...</A> tags:
259	.PP
260	.Vb 2
261	\& use HTML::TokeParser;
262	\& $p = HTML::TokeParser->new(shift\|\|"index.html");
263	.Ve
264	.PP
265	.Vb 5
266	\& while (my $token = $p->get_tag("a")) {
267	\& my $url = $token->[1]{href} \|\| "-";
268	\& my $text = $p->get_trimmed_text("/a");
269	\& print "$url\et$text\en";
270	\& }
271	.Ve
272	.PP
273	This example extract the <\s-1TITLE\s0> from the document:
274	.PP
275	.Vb 6
276	\& use HTML::TokeParser;
277	\& $p = HTML::TokeParser->new(shift\|\|"index.html");
278	\& if ($p->get_tag("title")) {
279	\& my $title = $p->get_trimmed_text;
280	\& print "Title: $title\en";
281	\& }
282	.Ve
283	.SH "SEE ALSO"
284	.IX Header "SEE ALSO"
285	HTML::PullParser, HTML::Parser
286	.SH "COPYRIGHT"
287	.IX Header "COPYRIGHT"
288	Copyright 1998\-2001 Gisle Aas.
289	.PP
290	This library is free software; you can redistribute it and/or
291	modify it under the same terms as Perl itself.