Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / man / man3 / HTML::Tagset.3
CommitLineData
86530b38
AT
1.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "Tagset 3"
132.TH Tagset 3 "2000-10-20" "perl v5.8.0" "User Contributed Perl Documentation"
133.SH "NAME"
134HTML::Tagset \- data tables useful in parsing HTML
135.SH "SYNOPSIS"
136.IX Header "SYNOPSIS"
137.Vb 3
138\& use HTML::Tagset;
139\& # Then use any of the items in the HTML::Tagset package
140\& # as need arises
141.Ve
142.SH "DESCRIPTION"
143.IX Header "DESCRIPTION"
144This module contains several data tables useful in various kinds of
145\&\s-1HTML\s0 parsing operations.
146.PP
147Note that all tag names used are lowercase.
148.PP
149In the following documentation, a \*(L"hashset\*(R" is a hash being used as a
150set \*(-- the hash conveys that its keys are there, and the actual values
151associated with the keys are not significant. (But what values are
152there, are always true.)
153.ie n .IP "hashset %HTML::Tagset::emptyElement" 4
154.el .IP "hashset \f(CW%HTML::Tagset::emptyElement\fR" 4
155.IX Item "hashset %HTML::Tagset::emptyElement"
156This hashset has as values the tag-names (GIs) of elements that cannot
157have content. (For example, \*(L"base\*(R", \*(L"br\*(R", \*(L"hr\*(R".) So
158\&\f(CW$HTML::Tagset::emptyElement{'hr'}\fR exists and is true.
159\&\f(CW$HTML::Tagset::emptyElement{'dl'}\fR does not exist, and so is not true.
160.ie n .IP "hashset %HTML::Tagset::optionalEndTag" 4
161.el .IP "hashset \f(CW%HTML::Tagset::optionalEndTag\fR" 4
162.IX Item "hashset %HTML::Tagset::optionalEndTag"
163This hashset lists tag-names for elements that can have content, but whose
164end-tags are generally, \*(L"safely\*(R", omissible. Example:
165\&\f(CW$HTML::Tagset::emptyElement{'li'}\fR exists and is true.
166.ie n .IP "hash %HTML::Tagset::linkElements" 4
167.el .IP "hash \f(CW%HTML::Tagset::linkElements\fR" 4
168.IX Item "hash %HTML::Tagset::linkElements"
169Values in this hash are tagnames for elements that might contain
170links, and the value for each is a reference to an array of the names
171of attributes whose values can be links.
172.ie n .IP "hash %HTML::Tagset::boolean_attr" 4
173.el .IP "hash \f(CW%HTML::Tagset::boolean_attr\fR" 4
174.IX Item "hash %HTML::Tagset::boolean_attr"
175This hash (not hashset) lists what attributes of what elements can be
176printed without showing the value (for example, the \*(L"noshade\*(R" attribute
177of \*(L"hr\*(R" elements). For elements with only one such attribute, its value
178is simply that attribute name. For elements with many such attributes,
179the value is a reference to a hashset containing all such attributes.
180.ie n .IP "hashset %HTML::Tagset::isPhraseMarkup" 4
181.el .IP "hashset \f(CW%HTML::Tagset::isPhraseMarkup\fR" 4
182.IX Item "hashset %HTML::Tagset::isPhraseMarkup"
183This hashset contains all phrasal-level elements.
184.ie n .IP "hashset %HTML::Tagset::is_Possible_Strict_P_Content" 4
185.el .IP "hashset \f(CW%HTML::Tagset::is_Possible_Strict_P_Content\fR" 4
186.IX Item "hashset %HTML::Tagset::is_Possible_Strict_P_Content"
187This hashset contains all phrasal-level elements that be content of a
188P element, for a strict model of \s-1HTML\s0.
189.ie n .IP "hashset %HTML::Tagset::isHeadElement" 4
190.el .IP "hashset \f(CW%HTML::Tagset::isHeadElement\fR" 4
191.IX Item "hashset %HTML::Tagset::isHeadElement"
192This hashset contains all elements that elements that should be
193present only in the 'head' element of an \s-1HTML\s0 document.
194.ie n .IP "hashset %HTML::Tagset::isList" 4
195.el .IP "hashset \f(CW%HTML::Tagset::isList\fR" 4
196.IX Item "hashset %HTML::Tagset::isList"
197This hashset contains all elements that can contain \*(L"li\*(R" elements.
198.ie n .IP "hashset %HTML::Tagset::isTableElement" 4
199.el .IP "hashset \f(CW%HTML::Tagset::isTableElement\fR" 4
200.IX Item "hashset %HTML::Tagset::isTableElement"
201This hashset contains all elements that are to be found only in/under
202a \*(L"table\*(R" element.
203.ie n .IP "hashset %HTML::Tagset::isFormElement" 4
204.el .IP "hashset \f(CW%HTML::Tagset::isFormElement\fR" 4
205.IX Item "hashset %HTML::Tagset::isFormElement"
206This hashset contains all elements that are to be found only in/under
207a \*(L"form\*(R" element.
208.ie n .IP "hashset %HTML::Tagset::isBodyMarkup" 4
209.el .IP "hashset \f(CW%HTML::Tagset::isBodyMarkup\fR" 4
210.IX Item "hashset %HTML::Tagset::isBodyMarkup"
211This hashset contains all elements that are to be found only in/under
212the \*(L"body\*(R" element of an \s-1HTML\s0 document.
213.ie n .IP "hashset %HTML::Tagset::isHeadOrBodyElement" 4
214.el .IP "hashset \f(CW%HTML::Tagset::isHeadOrBodyElement\fR" 4
215.IX Item "hashset %HTML::Tagset::isHeadOrBodyElement"
216This hashset includes all elements that I notice can fall either in
217the head or in the body.
218.ie n .IP "hashset %HTML::Tagset::isKnown" 4
219.el .IP "hashset \f(CW%HTML::Tagset::isKnown\fR" 4
220.IX Item "hashset %HTML::Tagset::isKnown"
221This hashset lists all known \s-1HTML\s0 elements.
222.ie n .IP "hashset %HTML::Tagset::canTighten" 4
223.el .IP "hashset \f(CW%HTML::Tagset::canTighten\fR" 4
224.IX Item "hashset %HTML::Tagset::canTighten"
225This hashset lists elements that might have ignorable whitespace as
226children or siblings.
227.ie n .IP "array @HTML::Tagset::p_closure_barriers" 4
228.el .IP "array \f(CW@HTML::Tagset::p_closure_barriers\fR" 4
229.IX Item "array @HTML::Tagset::p_closure_barriers"
230This array has a meaning that I have only seen a need for in
231\&\f(CW\*(C`HTML::TreeBuilder\*(C'\fR, but I include it here on the off chance that someone
232might find it of use:
233.Sp
234When we see a "<p>" token, we go lookup up the lineage for a p
235element we might have to minimize. At first sight, we might say that
236if there's a p anywhere in the lineage of this new p, it should be
237closed. But that's wrong. Consider this document:
238.Sp
239.Vb 17
240\& <html>
241\& <head>
242\& <title>foo</title>
243\& </head>
244\& <body>
245\& <p>foo
246\& <table>
247\& <tr>
248\& <td>
249\& foo
250\& <p>bar
251\& </td>
252\& </tr>
253\& </table>
254\& </p>
255\& </body>
256\& </html>
257.Ve
258.Sp
259The second p is quite legally inside a much higher p.
260.Sp
261My formalization of the reason why this is legal, but this:
262.Sp
263.Vb 1
264\& <p>foo<p>bar</p></p>
265.Ve
266.Sp
267isn't, is that something about the table constitutes a \*(L"barrier\*(R" to
268the application of the rule about what p must minimize.
269.Sp
270So \f(CW@HTML::Tagset::p_closure_barriers\fR is the list of all such
271barrier\-tags.
272.ie n .IP "hashset %isCDATA_Parent" 4
273.el .IP "hashset \f(CW%isCDATA_Parent\fR" 4
274.IX Item "hashset %isCDATA_Parent"
275This hashset includes all elements whose content is \s-1CDATA\s0.
276.SH "CAVEATS"
277.IX Header "CAVEATS"
278You may find it useful to alter the behavior of modules (like
279\&\f(CW\*(C`HTML::Element\*(C'\fR or \f(CW\*(C`HTML::TreeBuilder\*(C'\fR) that use \f(CW\*(C`HTML::Tagset\*(C'\fR's
280data tables by altering the data tables themselves. You are welcome
281to try, but be careful; and be aware that different modules may or may
282react differently to the data tables being changed.
283.PP
284Note that it may be inappropriate to use these tables for \fIproducing\fR
285\&\s-1HTML\s0 \*(-- for example, \f(CW%isHeadOrBodyElement\fR lists the tagnames
286for all elements that can appear either in the head or in the body,
287such as \*(L"script\*(R". That doesn't mean that I am saying your code that
288produces \s-1HTML\s0 should feel free to put script elements in either place!
289If you are producing programs that spit out \s-1HTML\s0, you should be
290\&\fIintimately\fR familiar with the DTDs for \s-1HTML\s0 or \s-1XHTML\s0 (available at
291\&\f(CW\*(C`http://www.w3.org/\*(C'\fR), and you should slavishly obey them, not
292the data tables in this document.
293.SH "SEE ALSO"
294.IX Header "SEE ALSO"
295HTML::Element, HTML::TreeBuilder, HTML::LinkExtor
296.SH "COPYRIGHT"
297.IX Header "COPYRIGHT"
298Copyright 1995\-2000 Gisle Aas; copyright 2000 Sean M. Burke.
299.PP
300This library is free software; you can redistribute it and/or
301modify it under the same terms as Perl itself.
302.SH "AUTHOR"
303.IX Header "AUTHOR"
304Current maintainer: Sean M. Burke, <sburke@cpan.org>
305.PP
306Most of the code/data in this module was adapted from code written by
307Gisle Aas <gisle@aas.no> for \f(CW\*(C`HTML::Element\*(C'\fR,
308\&\f(CW\*(C`HTML::TreeBuilder\*(C'\fR, and \f(CW\*(C`HTML::LinkExtor\*(C'\fR.