git.subgeniuskitty.com - OpenSPARC-T2-DV/.git/blame_incremental - tools/perl-5.8.0/man/man3/HTML::PullParser.3

... / ...

Commit	Line	Data
	1	.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
	2	.\"
	3	.\" Standard preamble:
	4	.\" ========================================================================
	5	.de Sh \" Subsection heading
	6	.br
	7	.if t .Sp
	8	.ne 5
	9	.PP
	10	\fB\\$1\fR
	11	.PP
	12	..
	13	.de Sp \" Vertical space (when we can't use .PP)
	14	.if t .sp .5v
	15	.if n .sp
	16	..
	17	.de Vb \" Begin verbatim text
	18	.ft CW
	19	.nf
	20	.ne \\$1
	21	..
	22	.de Ve \" End verbatim text
	23	.ft R
	24	.fi
	25	..
	26	.\" Set up some character translations and predefined strings. \*(-- will
	27	.\" give an unbreakable dash, \(PI will give pi, \(L" will give a left
	28	.\" double quote, and \*(R" will give a right double quote. \| will give a
	29	.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
	30	.\" do unbreakable dashes and therefore won't be available. \(C` and \(C'
	31	.\" expand to `' in nroff, nothing in troff, for use with C<>.
	32	.tr \(W-\|\(bv\(Tr
	33	.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
	34	.ie n \{\
	35	. ds -- \(*W-
	36	. ds PI pi
	37	. if (\n(.H=4u)&(1m=24u) .ds -- \(W\h'-12u'\(W\h'-12u'-\" diablo 10 pitch
	38	. if (\n(.H=4u)&(1m=20u) .ds -- \(W\h'-12u'\(W\h'-8u'-\" diablo 12 pitch
	39	. ds L" ""
	40	. ds R" ""
	41	. ds C` ""
	42	. ds C' ""
	43	'br\}
	44	.el\{\
	45	. ds -- \\|\(em\\|
	46	. ds PI \(*p
	47	. ds L" ``
	48	. ds R" ''
	49	'br\}
	50	.\"
	51	.\" If the F register is turned on, we'll generate index entries on stderr for
	52	.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
	53	.\" entries marked with X<> in POD. Of course, you'll have to process the
	54	.\" output yourself in some meaningful fashion.
	55	.if \nF \{\
	56	. de IX
	57	. tm Index:\\$1\t\\n%\t"\\$2"
	58	..
	59	. nr % 0
	60	. rr F
	61	.\}
	62	.\"
	63	.\" For nroff, turn off justification. Always turn off hyphenation; it makes
	64	.\" way too many mistakes in technical documents.
	65	.hy 0
	66	.if n .na
	67	.\"
	68	.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
	69	.\" Fear. Run. Save yourself. No user-serviceable parts.
	70	. \" fudge factors for nroff and troff
	71	.if n \{\
	72	. ds #H 0
	73	. ds #V .8m
	74	. ds #F .3m
	75	. ds #[ \f1
	76	. ds #] \fP
	77	.\}
	78	.if t \{\
	79	. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
	80	. ds #V .6m
	81	. ds #F 0
	82	. ds #[ \&
	83	. ds #] \&
	84	.\}
	85	. \" simple accents for nroff and troff
	86	.if n \{\
	87	. ds ' \&
	88	. ds ` \&
	89	. ds ^ \&
	90	. ds , \&
	91	. ds ~ ~
	92	. ds /
	93	.\}
	94	.if t \{\
	95	. ds ' \\k:\h'-(\\n(.wu8/10-\(#H)'\'\h"\|\\n:u"
	96	. ds ` \\k:\h'-(\\n(.wu8/10-\(#H)'\`\h'\|\\n:u'
	97	. ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'^\h'\|\\n:u'
	98	. ds , \\k:\h'-(\\n(.wu*8/10)',\h'\|\\n:u'
	99	. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'\|\\n:u'
	100	. ds / \\k:\h'-(\\n(.wu8/10-\(#H)'\z\(sl\h'\|\\n:u'
	101	.\}
	102	. \" troff and (daisy-wheel) nroff accents
	103	.ds : \\k:\h'-(\\n(.wu8/10-\(#H+.1m+\(#F)'\v'-\(#V'\z.\h'.2m+\(#F'.\h'\|\\n:u'\v'\(#V'
	104	.ds 8 \h'\(#H'\(b\h'-\*(#H'
	105	.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\(#H)/2u'\v'-.3n'\(#[\z\(de\v'.3n'\h'\|\\n:u'\*(#]
	106	.ds d- \h'\(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\(#H'
	107	.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'\|\\n:u'
	108	.ds th \(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u2/3)'\s-1o\s+1\*(#]
	109	.ds Th \(#[\s+2I\s-2\h'-\w'I'u3/5'\v'-.3m'o\v'.3m'\*(#]
	110	.ds ae a\h'-(\w'a'u*4/10)'e
	111	.ds Ae A\h'-(\w'A'u*4/10)'E
	112	. \" corrections for vroff
	113	.if v .ds ~ \\k:\h'-(\\n(.wu9/10-\(#H)'\s-2\u~\d\s+2\h'\|\\n:u'
	114	.if v .ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'\v'-.4m'^\v'.4m'\h'\|\\n:u'
	115	. \" for low resolution devices (crt and lpr)
	116	.if \n(.H>23 .if \n(.V>19 \
	117	\{\
	118	. ds : e
	119	. ds 8 ss
	120	. ds o a
	121	. ds d- d\h'-1'\(ga
	122	. ds D- D\h'-1'\(hy
	123	. ds th \o'bp'
	124	. ds Th \o'LP'
	125	. ds ae ae
	126	. ds Ae AE
	127	.\}
	128	.rm #[ #] #H #V #F C
	129	.\" ========================================================================
	130	.\"
	131	.IX Title "HTML::PullParser 3"
	132	.TH HTML::PullParser 3 "2001-04-02" "perl v5.8.0" "User Contributed Perl Documentation"
	133	.SH "NAME"
	134	HTML::PullParser \- Alternative HTML::Parser interface
	135	.SH "SYNOPSIS"
	136	.IX Header "SYNOPSIS"
	137	.Vb 1
	138	\& use HTML::PullParser;
	139	.Ve
	140	.PP
	141	.Vb 8
	142	\& $p = HTML::PullParser->new(file => "index.html",
	143	\& start => 'event, tagname, @attr',
	144	\& end => 'event, tagname',
	145	\& ignore_elements => [qw(script style)],
	146	\& ) \|\| die "Can't open: $!";
	147	\& while (my $token = $p->get_token) {
	148	\& #...do something with $token
	149	\& }
	150	.Ve
	151	.SH "DESCRIPTION"
	152	.IX Header "DESCRIPTION"
	153	The HTML::PullParser is an alternative interface to the HTML::Parser class.
	154	It basically turns the HTML::Parser inside out. You associate a file
	155	(or any IO::Handle object or string) with the parser at construction time and
	156	then repeatedly call \f(CW$parser\fR\->get_token to obtain the tags and text
	157	found in the parsed document.
	158	.PP
	159	The following methods are provided:
	160	.ie n .IP "$p = HTML::PullParser\->new( file => $file\fR, \f(CW%options )" 4
	161	.el .IP "$p = HTML::PullParser\->new( file => \f(CW$file\fR, \f(CW%options\fR )" 4
	162	.IX Item "$p = HTML::PullParser->new( file => $file, %options )"
	163	.PD 0
	164	.ie n .IP "$p = HTML::PullParser\->new( doc => \e$doc, %options )" 4
	165	.el .IP "$p = HTML::PullParser\->new( doc => \e$doc, \f(CW%options\fR )" 4
	166	.IX Item "$p = HTML::PullParser->new( doc => $doc, %options )"
	167	.PD
	168	A \f(CW\(C`HTML::PullParser\(C'\fR can be made to parse from either a file or a
	169	literal document based on whether the \f(CW\(C`file\(C'\fR or \f(CW\(C`doc\(C'\fR option is
	170	passed to the parser's constructor.
	171	.Sp
	172	The \f(CW\(C`file\(C'\fR passed in can either be a file name or a file handle
	173	object. If a file name is passed, and it can't be opened for reading,
	174	then the constructor will return an undefined value and $! will tell
	175	you why it failed. Otherwise the argument is taken to be some object
	176	that the \f(CW\(C`HTML::PullParser\(C'\fR can \fIread()\fR from when it needs more data.
	177	The stream will be \fIread()\fR until \s-1EOF\s0, but not closed.
	178	.Sp
	179	A \f(CW\(C`doc\(C'\fR can be passed plain or as a reference
	180	to a scalar. If a reference is passed then the value of this scalar
	181	should not be changed before all tokens have been extracted.
	182	.Sp
	183	Next the information to be returned for the different token types must
	184	be set up. This is done by simply assosiating an argspec (as defined
	185	in HTML::Parser) with the events you have an interrest in. For
	186	instance, if you want \f(CW\(C`start\(C'\fR tokens to be reported as the string
	187	\&\f(CW'S'\fR followed by the tagname and the attributes you might pass an
	188	\&\f(CW\(C`start\(C'\fR\-option like this:
	189	.Sp
	190	.Vb 4
	191	\& $p = HTML::Parser-New( doc => $doc_to_parse,
	192	\& start => '"S", tagname, @attr',
	193	\& end => '"E", tagname',
	194	\& );
	195	.Ve
	196	.Sp
	197	At last other \f(CW\(C`HTML::Parser\(C'\fR options, like \f(CW\(C`ignore_tags\(C'\fR, and
	198	\&\f(CW\(C`unbroken_text\(C'\fR, can be passed in. Note that you should not use the
	199	\&\fIevent\fR_h options to set up parser handlers.
	200	.ie n .IP "$token = $p\->get_token" 4
	201	.el .IP "$token = \f(CW$p\fR\->get_token" 4
	202	.IX Item "$token = $p->get_token"
	203	This method will return the next \fItoken\fR found in the \s-1HTML\s0 document,
	204	or \f(CW\(C`undef\(C'\fR at the end of the document. The token is usually returned
	205	as an array reference. The content of this array match the argspec
	206	set up during \f(CW\(C`HTML::PullParser\(C'\fR construction.
	207	.IP "$p\->unget_token($token,...)" 4
	208	.IX Item "$p->unget_token($token,...)"
	209	If you find out you have read too many tokens you can push them back,
	210	so that they are returned again the next time \f(CW$p\fR\->get_token is called.
	211	.SH "EXAMPLES"
	212	.IX Header "EXAMPLES"
	213	The 'eg/hform' script shows how we might parse the form section of
	214	HTML::Documents using HTML::PullParser.
	215	.SH "SEE ALSO"
	216	.IX Header "SEE ALSO"
	217	HTML::Parser, HTML::TokeParser
	218	.SH "COPYRIGHT"
	219	.IX Header "COPYRIGHT"
	220	Copyright 1998\-2001 Gisle Aas.
	221	.Sp
	222	This library is free software; you can redistribute it and/or
	223	modify it under the same terms as Perl itself.