Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / man / man3 / HTML::LinkExtor.3
CommitLineData
86530b38
AT
1.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "HTML::LinkExtor 3"
132.TH HTML::LinkExtor 3 "2001-04-10" "perl v5.8.0" "User Contributed Perl Documentation"
133.SH "NAME"
134HTML::LinkExtor \- Extract links from an HTML document
135.SH "SYNOPSIS"
136.IX Header "SYNOPSIS"
137.Vb 7
138\& require HTML::LinkExtor;
139\& $p = HTML::LinkExtor->new(\e&cb, "http://www.perl.org/");
140\& sub cb {
141\& my($tag, %links) = @_;
142\& print "$tag @{[%links]}\en";
143\& }
144\& $p->parse_file("index.html");
145.Ve
146.SH "DESCRIPTION"
147.IX Header "DESCRIPTION"
148\&\fIHTML::LinkExtor\fR is an \s-1HTML\s0 parser that extracts links from an
149\&\s-1HTML\s0 document. The \fIHTML::LinkExtor\fR is a subclass of
150\&\fIHTML::Parser\fR. This means that the document should be given to the
151parser by calling the \f(CW$p\fR\->\fIparse()\fR or \f(CW$p\fR\->\fIparse_file()\fR methods.
152.ie n .IP "$p = HTML::LinkExtor\->new([$callback[, $base]])" 4
153.el .IP "$p = HTML::LinkExtor\->new([$callback[, \f(CW$base\fR]])" 4
154.IX Item "$p = HTML::LinkExtor->new([$callback[, $base]])"
155The constructor takes two optional arguments. The first is a reference
156to a callback routine. It will be called as links are found. If a
157callback is not provided, then links are just accumulated internally
158and can be retrieved by calling the \f(CW$p\fR\->\fIlinks()\fR method.
159.Sp
160The \f(CW$base\fR argument is an optional base \s-1URL\s0 used to absolutize all URLs found.
161You need to have the \fI\s-1URI\s0\fR module installed if you provide \f(CW$base\fR.
162.Sp
163The callback is called with the lowercase tag name as first argument,
164and then all link attributes as separate key/value pairs. All
165non-link attributes are removed.
166.IP "$p\->links" 4
167.IX Item "$p->links"
168Returns a list of all links found in the document. The returned
169values will be anonymous arrays with the follwing elements:
170.Sp
171.Vb 1
172\& [$tag, $attr => $url1, $attr2 => $url2,...]
173.Ve
174.Sp
175The \f(CW$p\fR\->links method will also truncate the internal link list. This
176means that if the method is called twice without any parsing
177between them the second call will return an empty list.
178.Sp
179Also note that \f(CW$p\fR\->links will always be empty if a callback routine
180was provided when the \fIHTML::LinkExtor\fR was created.
181.SH "EXAMPLE"
182.IX Header "EXAMPLE"
183This is an example showing how you can extract links from a document
184received using \s-1LWP:\s0
185.PP
186.Vb 3
187\& use LWP::UserAgent;
188\& use HTML::LinkExtor;
189\& use URI::URL;
190.Ve
191.PP
192.Vb 2
193\& $url = "http://www.perl.org/"; # for instance
194\& $ua = LWP::UserAgent->new;
195.Ve
196.PP
197.Vb 7
198\& # Set up a callback that collect image links
199\& my @imgs = ();
200\& sub callback {
201\& my($tag, %attr) = @_;
202\& return if $tag ne 'img'; # we only look closer at <img ...>
203\& push(@imgs, values %attr);
204\& }
205.Ve
206.PP
207.Vb 3
208\& # Make the parser. Unfortunately, we don't know the base yet
209\& # (it might be diffent from $url)
210\& $p = HTML::LinkExtor->new(\e&callback);
211.Ve
212.PP
213.Vb 3
214\& # Request document and parse it as it arrives
215\& $res = $ua->request(HTTP::Request->new(GET => $url),
216\& sub {$p->parse($_[0])});
217.Ve
218.PP
219.Vb 3
220\& # Expand all image URLs to absolute ones
221\& my $base = $res->base;
222\& @imgs = map { $_ = url($_, $base)->abs; } @imgs;
223.Ve
224.PP
225.Vb 2
226\& # Print them out
227\& print join("\en", @imgs), "\en";
228.Ve
229.SH "SEE ALSO"
230.IX Header "SEE ALSO"
231HTML::Parser, HTML::Tagset, \s-1LWP\s0, \s-1URI::URL\s0
232.SH "COPYRIGHT"
233.IX Header "COPYRIGHT"
234Copyright 1996\-2001 Gisle Aas.
235.PP
236This library is free software; you can redistribute it and/or
237modify it under the same terms as Perl itself.