Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "Parser 3" | |
132 | .TH Parser 3 "2003-08-18" "perl v5.8.8" "User Contributed Perl Documentation" | |
133 | .SH "NAME" | |
134 | XML::Parser \- A perl module for parsing XML documents | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | .Vb 1 | |
138 | \& use XML::Parser; | |
139 | .Ve | |
140 | .PP | |
141 | .Vb 3 | |
142 | \& $p1 = new XML::Parser(Style => 'Debug'); | |
143 | \& $p1->parsefile('REC-xml-19980210.xml'); | |
144 | \& $p1->parse('<foo id="me">Hello World</foo>'); | |
145 | .Ve | |
146 | .PP | |
147 | .Vb 5 | |
148 | \& # Alternative | |
149 | \& $p2 = new XML::Parser(Handlers => {Start => \e&handle_start, | |
150 | \& End => \e&handle_end, | |
151 | \& Char => \e&handle_char}); | |
152 | \& $p2->parse($socket); | |
153 | .Ve | |
154 | .PP | |
155 | .Vb 2 | |
156 | \& # Another alternative | |
157 | \& $p3 = new XML::Parser(ErrorContext => 2); | |
158 | .Ve | |
159 | .PP | |
160 | .Vb 2 | |
161 | \& $p3->setHandlers(Char => \e&text, | |
162 | \& Default => \e&other); | |
163 | .Ve | |
164 | .PP | |
165 | .Vb 3 | |
166 | \& open(FOO, 'xmlgenerator |'); | |
167 | \& $p3->parse(*FOO, ProtocolEncoding => 'ISO-8859-1'); | |
168 | \& close(FOO); | |
169 | .Ve | |
170 | .PP | |
171 | .Vb 1 | |
172 | \& $p3->parsefile('junk.xml', ErrorContext => 3); | |
173 | .Ve | |
174 | .SH "DESCRIPTION" | |
175 | .IX Header "DESCRIPTION" | |
176 | This module provides ways to parse \s-1XML\s0 documents. It is built on top of | |
177 | XML::Parser::Expat, which is a lower level interface to James Clark's | |
178 | expat library. Each call to one of the parsing methods creates a new | |
179 | instance of XML::Parser::Expat which is then used to parse the document. | |
180 | Expat options may be provided when the XML::Parser object is created. | |
181 | These options are then passed on to the Expat object on each parse call. | |
182 | They can also be given as extra arguments to the parse methods, in which | |
183 | case they override options given at XML::Parser creation time. | |
184 | .PP | |
185 | The behavior of the parser is controlled either by \f(CW\*(C`\*(L"Style\*(R"\*(C'\fR and/or | |
186 | \&\f(CW\*(C`\*(L"Handlers\*(R"\*(C'\fR options, or by \*(L"setHandlers\*(R" method. These all provide | |
187 | mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat. | |
188 | If neither \f(CW\*(C`Style\*(C'\fR nor \f(CW\*(C`Handlers\*(C'\fR are specified, then parsing just | |
189 | checks the document for being well\-formed. | |
190 | .PP | |
191 | When underlying handlers get called, they receive as their first parameter | |
192 | the \fIExpat\fR object, not the Parser object. | |
193 | .SH "METHODS" | |
194 | .IX Header "METHODS" | |
195 | .IP "new" 4 | |
196 | .IX Item "new" | |
197 | This is a class method, the constructor for XML::Parser. Options are passed | |
198 | as keyword value pairs. Recognized options are: | |
199 | .RS 4 | |
200 | .IP "* Style" 4 | |
201 | .IX Item "Style" | |
202 | This option provides an easy way to create a given style of parser. The | |
203 | built in styles are: \*(L"Debug\*(R", \*(L"Subs\*(R", \*(L"Tree\*(R", \*(L"Objects\*(R", | |
204 | and \*(L"Stream\*(R". These are all defined in separate packages under | |
205 | \&\f(CW\*(C`XML::Parser::Style::*\*(C'\fR, and you can find further documentation for | |
206 | each style both below, and in those packages. | |
207 | .Sp | |
208 | Custom styles can be provided by giving a full package name containing | |
209 | at least one '::'. This package should then have subs defined for each | |
210 | handler it wishes to have installed. See \*(L"\s-1STYLES\s0\*(R" below | |
211 | for a discussion of each built in style. | |
212 | .IP "* Handlers" 4 | |
213 | .IX Item "Handlers" | |
214 | When provided, this option should be an anonymous hash containing as | |
215 | keys the type of handler and as values a sub reference to handle that | |
216 | type of event. All the handlers get passed as their 1st parameter the | |
217 | instance of expat that is parsing the document. Further details on | |
218 | handlers can be found in \*(L"\s-1HANDLERS\s0\*(R". Any handler set here | |
219 | overrides the corresponding handler set with the Style option. | |
220 | .IP "* Pkg" 4 | |
221 | .IX Item "Pkg" | |
222 | Some styles will refer to subs defined in this package. If not provided, | |
223 | it defaults to the package which called the constructor. | |
224 | .IP "* ErrorContext" 4 | |
225 | .IX Item "ErrorContext" | |
226 | This is an Expat option. When this option is defined, errors are reported | |
227 | in context. The value should be the number of lines to show on either side | |
228 | of the line in which the error occurred. | |
229 | .IP "* ProtocolEncoding" 4 | |
230 | .IX Item "ProtocolEncoding" | |
231 | This is an Expat option. This sets the protocol encoding name. It defaults | |
232 | to none. The built-in encodings are: \f(CW\*(C`UTF\-8\*(C'\fR, \f(CW\*(C`ISO\-8859\-1\*(C'\fR, \f(CW\*(C`UTF\-16\*(C'\fR, and | |
233 | \&\f(CW\*(C`US\-ASCII\*(C'\fR. Other encodings may be used if they have encoding maps in one | |
234 | of the directories in the \f(CW@Encoding_Path\fR list. Check \*(L"\s-1ENCODINGS\s0\*(R" for | |
235 | more information on encoding maps. Setting the protocol encoding overrides | |
236 | any encoding in the \s-1XML\s0 declaration. | |
237 | .IP "* Namespaces" 4 | |
238 | .IX Item "Namespaces" | |
239 | This is an Expat option. If this is set to a true value, then namespace | |
240 | processing is done during the parse. See \*(L"Namespaces\*(R" in XML::Parser::Expat | |
241 | for further discussion of namespace processing. | |
242 | .IP "* NoExpand" 4 | |
243 | .IX Item "NoExpand" | |
244 | This is an Expat option. Normally, the parser will try to expand references | |
245 | to entities defined in the internal subset. If this option is set to a true | |
246 | value, and a default handler is also set, then the default handler will be | |
247 | called when an entity reference is seen in text. This has no effect if a | |
248 | default handler has not been registered, and it has no effect on the expansion | |
249 | of entity references inside attribute values. | |
250 | .IP "* Stream_Delimiter" 4 | |
251 | .IX Item "Stream_Delimiter" | |
252 | This is an Expat option. It takes a string value. When this string is found | |
253 | alone on a line while parsing from a stream, then the parse is ended as if it | |
254 | saw an end of file. The intended use is with a stream of xml documents in a | |
255 | \&\s-1MIME\s0 multipart format. The string should not contain a trailing newline. | |
256 | .IP "* ParseParamEnt" 4 | |
257 | .IX Item "ParseParamEnt" | |
258 | This is an Expat option. Unless standalone is set to \*(L"yes\*(R" in the \s-1XML\s0 | |
259 | declaration, setting this to a true value allows the external \s-1DTD\s0 to be read, | |
260 | and parameter entities to be parsed and expanded. | |
261 | .IP "* NoLWP" 4 | |
262 | .IX Item "NoLWP" | |
263 | This option has no effect if the ExternEnt or ExternEntFin handlers are | |
264 | directly set. Otherwise, if true, it forces the use of a file based external | |
265 | entity handler. | |
266 | .IP "* Non-Expat-Options" 4 | |
267 | .IX Item "Non-Expat-Options" | |
268 | If provided, this should be an anonymous hash whose keys are options that | |
269 | shouldn't be passed to Expat. This should only be of concern to those | |
270 | subclassing XML::Parser. | |
271 | .RE | |
272 | .RS 4 | |
273 | .RE | |
274 | .IP "setHandlers(\s-1TYPE\s0, \s-1HANDLER\s0 [, \s-1TYPE\s0, \s-1HANDLER\s0 [...]])" 4 | |
275 | .IX Item "setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])" | |
276 | This method registers handlers for various parser events. It overrides any | |
277 | previous handlers registered through the Style or Handler options or through | |
278 | earlier calls to setHandlers. By providing a false or undefined value as | |
279 | the handler, the existing handler can be unset. | |
280 | .Sp | |
281 | This method returns a list of type, handler pairs corresponding to the | |
282 | input. The handlers returned are the ones that were in effect prior to | |
283 | the call. | |
284 | .Sp | |
285 | See a description of the handler types in \*(L"\s-1HANDLERS\s0\*(R". | |
286 | .IP "parse(\s-1SOURCE\s0 [, \s-1OPT\s0 => \s-1OPT_VALUE\s0 [...]])" 4 | |
287 | .IX Item "parse(SOURCE [, OPT => OPT_VALUE [...]])" | |
288 | The \s-1SOURCE\s0 parameter should either be a string containing the whole \s-1XML\s0 | |
289 | document, or it should be an open IO::Handle. Constructor options to | |
290 | XML::Parser::Expat given as keyword-value pairs may follow the \s-1SOURCE\s0 | |
291 | parameter. These override, for this call, any options or attributes passed | |
292 | through from the XML::Parser instance. | |
293 | .Sp | |
294 | A die call is thrown if a parse error occurs. Otherwise it will return 1 | |
295 | or whatever is returned from the \fBFinal\fR handler, if one is installed. | |
296 | In other words, what parse may return depends on the style. | |
297 | .IP "parsestring" 4 | |
298 | .IX Item "parsestring" | |
299 | This is just an alias for parse for backwards compatibility. | |
300 | .IP "parsefile(\s-1FILE\s0 [, \s-1OPT\s0 => \s-1OPT_VALUE\s0 [...]])" 4 | |
301 | .IX Item "parsefile(FILE [, OPT => OPT_VALUE [...]])" | |
302 | Open \s-1FILE\s0 for reading, then call parse with the open handle. The file | |
303 | is closed no matter how parse returns. Returns what parse returns. | |
304 | .IP "parse_start([ \s-1OPT\s0 => \s-1OPT_VALUE\s0 [...]])" 4 | |
305 | .IX Item "parse_start([ OPT => OPT_VALUE [...]])" | |
306 | Create and return a new instance of XML::Parser::ExpatNB. Constructor | |
307 | options may be provided. If an init handler has been provided, it is | |
308 | called before returning the ExpatNB object. Documents are parsed by | |
309 | making incremental calls to the parse_more method of this object, which | |
310 | takes a string. A single call to the parse_done method of this object, | |
311 | which takes no arguments, indicates that the document is finished. | |
312 | .Sp | |
313 | If there is a final handler installed, it is executed by the parse_done | |
314 | method before returning and the parse_done method returns whatever is | |
315 | returned by the final handler. | |
316 | .SH "HANDLERS" | |
317 | .IX Header "HANDLERS" | |
318 | Expat is an event based parser. As the parser recognizes parts of the | |
319 | document (say the start or end tag for an \s-1XML\s0 element), then any handlers | |
320 | registered for that type of an event are called with suitable parameters. | |
321 | All handlers receive an instance of XML::Parser::Expat as their first | |
322 | argument. See \*(L"\s-1METHODS\s0\*(R" in XML::Parser::Expat for a discussion of the | |
323 | methods that can be called on this object. | |
324 | .Sh "Init (Expat)" | |
325 | .IX Subsection "Init (Expat)" | |
326 | This is called just before the parsing of the document starts. | |
327 | .Sh "Final (Expat)" | |
328 | .IX Subsection "Final (Expat)" | |
329 | This is called just after parsing has finished, but only if no errors | |
330 | occurred during the parse. Parse returns what this returns. | |
331 | .Sh "Start (Expat, Element [, Attr, Val [,...]])" | |
332 | .IX Subsection "Start (Expat, Element [, Attr, Val [,...]])" | |
333 | This event is generated when an \s-1XML\s0 start tag is recognized. Element is the | |
334 | name of the \s-1XML\s0 element type that is opened with the start tag. The Attr & | |
335 | Val pairs are generated for each attribute in the start tag. | |
336 | .Sh "End (Expat, Element)" | |
337 | .IX Subsection "End (Expat, Element)" | |
338 | This event is generated when an \s-1XML\s0 end tag is recognized. Note that | |
339 | an \s-1XML\s0 empty tag (<foo/>) generates both a start and an end event. | |
340 | .Sh "Char (Expat, String)" | |
341 | .IX Subsection "Char (Expat, String)" | |
342 | This event is generated when non-markup is recognized. The non-markup | |
343 | sequence of characters is in String. A single non-markup sequence of | |
344 | characters may generate multiple calls to this handler. Whatever the | |
345 | encoding of the string in the original document, this is given to the | |
346 | handler in \s-1UTF\-8\s0. | |
347 | .Sh "Proc (Expat, Target, Data)" | |
348 | .IX Subsection "Proc (Expat, Target, Data)" | |
349 | This event is generated when a processing instruction is recognized. | |
350 | .Sh "Comment (Expat, Data)" | |
351 | .IX Subsection "Comment (Expat, Data)" | |
352 | This event is generated when a comment is recognized. | |
353 | .Sh "CdataStart (Expat)" | |
354 | .IX Subsection "CdataStart (Expat)" | |
355 | This is called at the start of a \s-1CDATA\s0 section. | |
356 | .Sh "CdataEnd (Expat)" | |
357 | .IX Subsection "CdataEnd (Expat)" | |
358 | This is called at the end of a \s-1CDATA\s0 section. | |
359 | .Sh "Default (Expat, String)" | |
360 | .IX Subsection "Default (Expat, String)" | |
361 | This is called for any characters that don't have a registered handler. | |
362 | This includes both characters that are part of markup for which no | |
363 | events are generated (markup declarations) and characters that | |
364 | could generate events, but for which no handler has been registered. | |
365 | .PP | |
366 | Whatever the encoding in the original document, the string is returned to | |
367 | the handler in \s-1UTF\-8\s0. | |
368 | .Sh "Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation)" | |
369 | .IX Subsection "Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation)" | |
370 | This is called for a declaration of an unparsed entity. Entity is the name | |
371 | of the entity. Base is the base to be used for resolving a relative \s-1URI\s0. | |
372 | Sysid is the system id. Pubid is the public id. Notation is the notation | |
373 | name. Base and Pubid may be undefined. | |
374 | .Sh "Notation (Expat, Notation, Base, Sysid, Pubid)" | |
375 | .IX Subsection "Notation (Expat, Notation, Base, Sysid, Pubid)" | |
376 | This is called for a declaration of notation. Notation is the notation name. | |
377 | Base is the base to be used for resolving a relative \s-1URI\s0. Sysid is the system | |
378 | id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. | |
379 | .Sh "ExternEnt (Expat, Base, Sysid, Pubid)" | |
380 | .IX Subsection "ExternEnt (Expat, Base, Sysid, Pubid)" | |
381 | This is called when an external entity is referenced. Base is the base to be | |
382 | used for resolving a relative \s-1URI\s0. Sysid is the system id. Pubid is the public | |
383 | id. Base, and Pubid may be undefined. | |
384 | .PP | |
385 | This handler should either return a string, which represents the contents of | |
386 | the external entity, or return an open filehandle that can be read to obtain | |
387 | the contents of the external entity, or return undef, which indicates the | |
388 | external entity couldn't be found and will generate a parse error. | |
389 | .PP | |
390 | If an open filehandle is returned, it must be returned as either a glob | |
391 | (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). | |
392 | .PP | |
393 | A default handler is installed for this event. The default handler is | |
394 | XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided with | |
395 | a true value, otherwise XML::Parser::file_ext_ent_handler is the default | |
396 | handler for external entities. Even without the NoLWP option, if the | |
397 | \&\s-1URI\s0 or \s-1LWP\s0 modules are missing, the file based handler ends up being used | |
398 | after giving a warning on the first external entity reference. | |
399 | .PP | |
400 | The \s-1LWP\s0 external entity handler will use proxies defined in the environment | |
401 | (http_proxy, ftp_proxy, etc.). | |
402 | .PP | |
403 | Please note that the \s-1LWP\s0 external entity handler reads the entire | |
404 | entity into a string and returns it, where as the file handler opens a | |
405 | filehandle. | |
406 | .PP | |
407 | Also note that the file external entity handler will likely choke on | |
408 | absolute URIs or file names that don't fit the conventions of the local | |
409 | operating system. | |
410 | .PP | |
411 | The expat base method can be used to set a basename for | |
412 | relative pathnames. If no basename is given, or if the basename is itself | |
413 | a relative name, then it is relative to the current working directory. | |
414 | .Sh "ExternEntFin (Expat)" | |
415 | .IX Subsection "ExternEntFin (Expat)" | |
416 | This is called after parsing an external entity. It's not called unless | |
417 | an ExternEnt handler is also set. There is a default handler installed | |
418 | that pairs with the default ExternEnt handler. | |
419 | .PP | |
420 | If you're going to install your own ExternEnt handler, then you should | |
421 | set (or unset) this handler too. | |
422 | .Sh "Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam)" | |
423 | .IX Subsection "Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam)" | |
424 | This is called when an entity is declared. For internal entities, the Val | |
425 | parameter will contain the value and the remaining three parameters will be | |
426 | undefined. For external entities, the Val parameter will be undefined, the | |
427 | Sysid parameter will have the system id, the Pubid parameter will have the | |
428 | public id if it was provided (it will be undefined otherwise), the Ndata | |
429 | parameter will contain the notation for unparsed entities. If this is a | |
430 | parameter entity declaration, then the IsParam parameter is true. | |
431 | .PP | |
432 | Note that this handler and the Unparsed handler above overlap. If both are | |
433 | set, then this handler will not be called for unparsed entities. | |
434 | .Sh "Element (Expat, Name, Model)" | |
435 | .IX Subsection "Element (Expat, Name, Model)" | |
436 | The element handler is called when an element declaration is found. Name | |
437 | is the element name, and Model is the content model as an XML::Parser::Content | |
438 | object. See \*(L"XML::Parser::ContentModel Methods\*(R" in XML::Parser::Expat | |
439 | for methods available for this class. | |
440 | .Sh "Attlist (Expat, Elname, Attname, Type, Default, Fixed)" | |
441 | .IX Subsection "Attlist (Expat, Elname, Attname, Type, Default, Fixed)" | |
442 | This handler is called for each attribute in an \s-1ATTLIST\s0 declaration. | |
443 | So an \s-1ATTLIST\s0 declaration that has multiple attributes will generate multiple | |
444 | calls to this handler. The Elname parameter is the name of the element with | |
445 | which the attribute is being associated. The Attname parameter is the name | |
446 | of the attribute. Type is the attribute type, given as a string. Default is | |
447 | the default value, which will either be \*(L"#REQUIRED\*(R", \*(L"#IMPLIED\*(R" or a quoted | |
448 | string (i.e. the returned string will begin and end with a quote character). | |
449 | If Fixed is true, then this is a fixed attribute. | |
450 | .Sh "Doctype (Expat, Name, Sysid, Pubid, Internal)" | |
451 | .IX Subsection "Doctype (Expat, Name, Sysid, Pubid, Internal)" | |
452 | This handler is called for \s-1DOCTYPE\s0 declarations. Name is the document type | |
453 | name. Sysid is the system id of the document type, if it was provided, | |
454 | otherwise it's undefined. Pubid is the public id of the document type, | |
455 | which will be undefined if no public id was given. Internal is the internal | |
456 | subset, given as a string. If there was no internal subset, it will be | |
457 | undefined. Internal will contain all whitespace, comments, processing | |
458 | instructions, and declarations seen in the internal subset. The declarations | |
459 | will be there whether or not they have been processed by another handler | |
460 | (except for unparsed entities processed by the Unparsed handler). However, | |
461 | comments and processing instructions will not appear if they've been processed | |
462 | by their respective handlers. | |
463 | .Sh "* DoctypeFin (Parser)" | |
464 | .IX Subsection "* DoctypeFin (Parser)" | |
465 | This handler is called after parsing of the \s-1DOCTYPE\s0 declaration has finished, | |
466 | including any internal or external \s-1DTD\s0 declarations. | |
467 | .Sh "XMLDecl (Expat, Version, Encoding, Standalone)" | |
468 | .IX Subsection "XMLDecl (Expat, Version, Encoding, Standalone)" | |
469 | This handler is called for xml declarations. Version is a string containg | |
470 | the version. Encoding is either undefined or contains an encoding string. | |
471 | Standalone will be either true, false, or undefined if the standalone attribute | |
472 | is yes, no, or not made respectively. | |
473 | .SH "STYLES" | |
474 | .IX Header "STYLES" | |
475 | .Sh "Debug" | |
476 | .IX Subsection "Debug" | |
477 | This just prints out the document in outline form. Nothing special is | |
478 | returned by parse. | |
479 | .Sh "Subs" | |
480 | .IX Subsection "Subs" | |
481 | Each time an element starts, a sub by that name in the package specified | |
482 | by the Pkg option is called with the same parameters that the Start | |
483 | handler gets called with. | |
484 | .PP | |
485 | Each time an element ends, a sub with that name appended with an underscore | |
486 | (\*(L"_\*(R"), is called with the same parameters that the End handler gets called | |
487 | with. | |
488 | .PP | |
489 | Nothing special is returned by parse. | |
490 | .Sh "Tree" | |
491 | .IX Subsection "Tree" | |
492 | Parse will return a parse tree for the document. Each node in the tree | |
493 | takes the form of a tag, content pair. Text nodes are represented with | |
494 | a pseudo-tag of \*(L"0\*(R" and the string that is their content. For elements, | |
495 | the content is an array reference. The first item in the array is a | |
496 | (possibly empty) hash reference containing attributes. The remainder of | |
497 | the array is a sequence of tag-content pairs representing the content | |
498 | of the element. | |
499 | .PP | |
500 | So for example the result of parsing: | |
501 | .PP | |
502 | .Vb 1 | |
503 | \& <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> | |
504 | .Ve | |
505 | .PP | |
506 | would be: | |
507 | .PP | |
508 | .Vb 7 | |
509 | \& Tag Content | |
510 | \& ================================================================== | |
511 | \& [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], | |
512 | \& bar, [ {}, 0, "Howdy", ref, [{}]], | |
513 | \& 0, "do" | |
514 | \& ] | |
515 | \& ] | |
516 | .Ve | |
517 | .PP | |
518 | The root document \*(L"foo\*(R", has 3 children: a \*(L"head\*(R" element, a \*(L"bar\*(R" | |
519 | element and the text \*(L"do\*(R". After the empty attribute hash, these are | |
520 | represented in it's contents by 3 tag-content pairs. | |
521 | .Sh "Objects" | |
522 | .IX Subsection "Objects" | |
523 | This is similar to the Tree style, except that a hash object is created for | |
524 | each element. The corresponding object will be in the class whose name | |
525 | is created by appending \*(L"::\*(R" and the element name to the package set with | |
526 | the Pkg option. Non-markup text will be in the ::Characters class. The | |
527 | contents of the corresponding object will be in an anonymous array that | |
528 | is the value of the Kids property for that object. | |
529 | .Sh "Stream" | |
530 | .IX Subsection "Stream" | |
531 | This style also uses the Pkg package. If none of the subs that this | |
532 | style looks for is there, then the effect of parsing with this style is | |
533 | to print a canonical copy of the document without comments or declarations. | |
534 | All the subs receive as their 1st parameter the Expat instance for the | |
535 | document they're parsing. | |
536 | .PP | |
537 | It looks for the following routines: | |
538 | .IP "* StartDocument" 4 | |
539 | .IX Item "StartDocument" | |
540 | Called at the start of the parse . | |
541 | .IP "* StartTag" 4 | |
542 | .IX Item "StartTag" | |
543 | Called for every start tag with a second parameter of the element type. The \f(CW$_\fR | |
544 | variable will contain a copy of the tag and the \f(CW%_\fR variable will contain | |
545 | attribute values supplied for that element. | |
546 | .IP "* EndTag" 4 | |
547 | .IX Item "EndTag" | |
548 | Called for every end tag with a second parameter of the element type. The \f(CW$_\fR | |
549 | variable will contain a copy of the end tag. | |
550 | .IP "* Text" 4 | |
551 | .IX Item "Text" | |
552 | Called just before start or end tags with accumulated non-markup text in | |
553 | the \f(CW$_\fR variable. | |
554 | .IP "* \s-1PI\s0" 4 | |
555 | .IX Item "PI" | |
556 | Called for processing instructions. The \f(CW$_\fR variable will contain a copy of | |
557 | the \s-1PI\s0 and the target and data are sent as 2nd and 3rd parameters | |
558 | respectively. | |
559 | .IP "* EndDocument" 4 | |
560 | .IX Item "EndDocument" | |
561 | Called at conclusion of the parse. | |
562 | .SH "ENCODINGS" | |
563 | .IX Header "ENCODINGS" | |
564 | \&\s-1XML\s0 documents may be encoded in character sets other than Unicode as | |
565 | long as they may be mapped into the Unicode character set. Expat has | |
566 | further restrictions on encodings. Read the xmlparse.h header file in | |
567 | the expat distribution to see details on these restrictions. | |
568 | .PP | |
569 | Expat has built-in encodings for: \f(CW\*(C`UTF\-8\*(C'\fR, \f(CW\*(C`ISO\-8859\-1\*(C'\fR, \f(CW\*(C`UTF\-16\*(C'\fR, and | |
570 | \&\f(CW\*(C`US\-ASCII\*(C'\fR. Encodings are set either through the \s-1XML\s0 declaration | |
571 | encoding attribute or through the ProtocolEncoding option to XML::Parser | |
572 | or XML::Parser::Expat. | |
573 | .PP | |
574 | For encodings other than the built\-ins, expat calls the function | |
575 | load_encoding in the Expat package with the encoding name. This function | |
576 | looks for a file in the path list \f(CW@XML::Parser::Expat::Encoding_Path\fR, that | |
577 | matches the lower-cased name with a '.enc' extension. The first one it | |
578 | finds, it loads. | |
579 | .PP | |
580 | If you wish to build your own encoding maps, check out the XML::Encoding | |
581 | module from \s-1CPAN\s0. | |
582 | .SH "AUTHORS" | |
583 | .IX Header "AUTHORS" | |
584 | Larry Wall <\fIlarry@wall.org\fR> wrote version 1.0. | |
585 | .PP | |
586 | Clark Cooper <\fIcoopercc@netheaven.com\fR> picked up support, changed the \s-1API\s0 | |
587 | for this version (2.x), provided documentation, | |
588 | and added some standard package features. | |
589 | .PP | |
590 | Matt Sergeant <\fImatt@sergeant.org\fR> is now maintaining XML::Parser |