Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "XML::Simple 3" | |
132 | .TH XML::Simple 3 "2004-11-19" "perl v5.8.8" "User Contributed Perl Documentation" | |
133 | .SH "NAME" | |
134 | XML::Simple \- Easy API to maintain XML (esp config files) | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | .Vb 1 | |
138 | \& use XML::Simple; | |
139 | .Ve | |
140 | .PP | |
141 | .Vb 1 | |
142 | \& my $ref = XMLin([<xml file or string>] [, <options>]); | |
143 | .Ve | |
144 | .PP | |
145 | .Vb 1 | |
146 | \& my $xml = XMLout($hashref [, <options>]); | |
147 | .Ve | |
148 | .PP | |
149 | Or the object oriented way: | |
150 | .PP | |
151 | .Vb 1 | |
152 | \& require XML::Simple; | |
153 | .Ve | |
154 | .PP | |
155 | .Vb 1 | |
156 | \& my $xs = XML::Simple->new(options); | |
157 | .Ve | |
158 | .PP | |
159 | .Vb 1 | |
160 | \& my $ref = $xs->XMLin([<xml file or string>] [, <options>]); | |
161 | .Ve | |
162 | .PP | |
163 | .Vb 1 | |
164 | \& my $xml = $xs->XMLout($hashref [, <options>]); | |
165 | .Ve | |
166 | .PP | |
167 | (or see \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for 'the \s-1SAX\s0 way'). | |
168 | .PP | |
169 | To catch common errors: | |
170 | .PP | |
171 | .Vb 1 | |
172 | \& use XML::Simple qw(:strict); | |
173 | .Ve | |
174 | .PP | |
175 | (see \*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R" for more details). | |
176 | .SH "QUICK START" | |
177 | .IX Header "QUICK START" | |
178 | Say you have a script called \fBfoo\fR and a file of configuration options | |
179 | called \fBfoo.xml\fR containing this: | |
180 | .PP | |
181 | .Vb 13 | |
182 | \& <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> | |
183 | \& <server name="sahara" osname="solaris" osversion="2.6"> | |
184 | \& <address>10.0.0.101</address> | |
185 | \& <address>10.0.1.101</address> | |
186 | \& </server> | |
187 | \& <server name="gobi" osname="irix" osversion="6.5"> | |
188 | \& <address>10.0.0.102</address> | |
189 | \& </server> | |
190 | \& <server name="kalahari" osname="linux" osversion="2.0.34"> | |
191 | \& <address>10.0.0.103</address> | |
192 | \& <address>10.0.1.103</address> | |
193 | \& </server> | |
194 | \& </config> | |
195 | .Ve | |
196 | .PP | |
197 | The following lines of code in \fBfoo\fR: | |
198 | .PP | |
199 | .Vb 1 | |
200 | \& use XML::Simple; | |
201 | .Ve | |
202 | .PP | |
203 | .Vb 1 | |
204 | \& my $config = XMLin(); | |
205 | .Ve | |
206 | .PP | |
207 | will 'slurp' the configuration options into the hashref \f(CW$config\fR (because no | |
208 | arguments are passed to \f(CW\*(C`XMLin()\*(C'\fR the name and location of the \s-1XML\s0 file will | |
209 | be inferred from name and location of the script). You can dump out the | |
210 | contents of the hashref using Data::Dumper: | |
211 | .PP | |
212 | .Vb 1 | |
213 | \& use Data::Dumper; | |
214 | .Ve | |
215 | .PP | |
216 | .Vb 1 | |
217 | \& print Dumper($config); | |
218 | .Ve | |
219 | .PP | |
220 | which will produce something like this (formatting has been adjusted for | |
221 | brevity): | |
222 | .PP | |
223 | .Vb 21 | |
224 | \& { | |
225 | \& 'logdir' => '/var/log/foo/', | |
226 | \& 'debugfile' => '/tmp/foo.debug', | |
227 | \& 'server' => { | |
228 | \& 'sahara' => { | |
229 | \& 'osversion' => '2.6', | |
230 | \& 'osname' => 'solaris', | |
231 | \& 'address' => [ '10.0.0.101', '10.0.1.101' ] | |
232 | \& }, | |
233 | \& 'gobi' => { | |
234 | \& 'osversion' => '6.5', | |
235 | \& 'osname' => 'irix', | |
236 | \& 'address' => '10.0.0.102' | |
237 | \& }, | |
238 | \& 'kalahari' => { | |
239 | \& 'osversion' => '2.0.34', | |
240 | \& 'osname' => 'linux', | |
241 | \& 'address' => [ '10.0.0.103', '10.0.1.103' ] | |
242 | \& } | |
243 | \& } | |
244 | \& } | |
245 | .Ve | |
246 | .PP | |
247 | Your script could then access the name of the log directory like this: | |
248 | .PP | |
249 | .Vb 1 | |
250 | \& print $config->{logdir}; | |
251 | .Ve | |
252 | .PP | |
253 | similarly, the second address on the server 'kalahari' could be referenced as: | |
254 | .PP | |
255 | .Vb 1 | |
256 | \& print $config->{server}->{kalahari}->{address}->[1]; | |
257 | .Ve | |
258 | .PP | |
259 | What could be simpler? (Rhetorical). | |
260 | .PP | |
261 | For simple requirements, that's really all there is to it. If you want to | |
262 | store your \s-1XML\s0 in a different directory or file, or pass it in as a string or | |
263 | even pass it in via some derivative of an IO::Handle, you'll need to check out | |
264 | \&\*(L"\s-1OPTIONS\s0\*(R". If you want to turn off or tweak the array folding feature (that | |
265 | neat little transformation that produced \f(CW$config\fR\->{server}) you'll find options | |
266 | for that as well. | |
267 | .PP | |
268 | If you want to generate \s-1XML\s0 (for example to write a modified version of | |
269 | \&\f(CW$config\fR back out as \s-1XML\s0), check out \f(CW\*(C`XMLout()\*(C'\fR. | |
270 | .PP | |
271 | If your needs are not so simple, this may not be the module for you. In that | |
272 | case, you might want to read \*(L"\s-1WHERE\s0 \s-1TO\s0 \s-1FROM\s0 \s-1HERE\s0?\*(R". | |
273 | .SH "DESCRIPTION" | |
274 | .IX Header "DESCRIPTION" | |
275 | The XML::Simple module provides a simple \s-1API\s0 layer on top of an underlying \s-1XML\s0 | |
276 | parsing module (either XML::Parser or one of the \s-1SAX2\s0 parser modules). Two | |
277 | functions are exported: \f(CW\*(C`XMLin()\*(C'\fR and \f(CW\*(C`XMLout()\*(C'\fR. Note: you can explicity | |
278 | request the lower case versions of the function names: \f(CW\*(C`xml_in()\*(C'\fR and | |
279 | \&\f(CW\*(C`xml_out()\*(C'\fR. | |
280 | .PP | |
281 | The simplest approach is to call these two functions directly, but an | |
282 | optional object oriented interface (see \*(L"\s-1OPTIONAL\s0 \s-1OO\s0 \s-1INTERFACE\s0\*(R" below) | |
283 | allows them to be called as methods of an \fBXML::Simple\fR object. The object | |
284 | interface can also be used at either end of a \s-1SAX\s0 pipeline. | |
285 | .Sh "\fIXMLin()\fP" | |
286 | .IX Subsection "XMLin()" | |
287 | Parses \s-1XML\s0 formatted data and returns a reference to a data structure which | |
288 | contains the same information in a more readily accessible form. (Skip | |
289 | down to \*(L"\s-1EXAMPLES\s0\*(R" below, for more sample code). | |
290 | .PP | |
291 | \&\f(CW\*(C`XMLin()\*(C'\fR accepts an optional \s-1XML\s0 specifier followed by zero or more 'name => | |
292 | value' option pairs. The \s-1XML\s0 specifier can be one of the following: | |
293 | .IP "A filename" 4 | |
294 | .IX Item "A filename" | |
295 | If the filename contains no directory components \f(CW\*(C`XMLin()\*(C'\fR will look for the | |
296 | file in each directory in the SearchPath (see \*(L"\s-1OPTIONS\s0\*(R" below) or in the | |
297 | current directory if the SearchPath option is not defined. eg: | |
298 | .Sp | |
299 | .Vb 1 | |
300 | \& $ref = XMLin('/etc/params.xml'); | |
301 | .Ve | |
302 | .Sp | |
303 | Note, the filename '\-' can be used to parse from \s-1STDIN\s0. | |
304 | .IP "undef" 4 | |
305 | .IX Item "undef" | |
306 | If there is no \s-1XML\s0 specifier, \f(CW\*(C`XMLin()\*(C'\fR will check the script directory and | |
307 | each of the SearchPath directories for a file with the same name as the script | |
308 | but with the extension '.xml'. Note: if you wish to specify options, you | |
309 | must specify the value 'undef'. eg: | |
310 | .Sp | |
311 | .Vb 1 | |
312 | \& $ref = XMLin(undef, ForceArray => 1); | |
313 | .Ve | |
314 | .IP "A string of \s-1XML\s0" 4 | |
315 | .IX Item "A string of XML" | |
316 | A string containing \s-1XML\s0 (recognised by the presence of '<' and '>' characters) | |
317 | will be parsed directly. eg: | |
318 | .Sp | |
319 | .Vb 1 | |
320 | \& $ref = XMLin('<opt username="bob" password="flurp" />'); | |
321 | .Ve | |
322 | .IP "An IO::Handle object" 4 | |
323 | .IX Item "An IO::Handle object" | |
324 | An IO::Handle object will be read to \s-1EOF\s0 and its contents parsed. eg: | |
325 | .Sp | |
326 | .Vb 2 | |
327 | \& $fh = IO::File->new('/etc/params.xml'); | |
328 | \& $ref = XMLin($fh); | |
329 | .Ve | |
330 | .Sh "\fIXMLout()\fP" | |
331 | .IX Subsection "XMLout()" | |
332 | Takes a data structure (generally a hashref) and returns an \s-1XML\s0 encoding of | |
333 | that structure. If the resulting \s-1XML\s0 is parsed using \f(CW\*(C`XMLin()\*(C'\fR, it should | |
334 | return a data structure equivalent to the original (see caveats below). | |
335 | .PP | |
336 | The \f(CW\*(C`XMLout()\*(C'\fR function can also be used to output the \s-1XML\s0 as \s-1SAX\s0 events | |
337 | see the \f(CW\*(C`Handler\*(C'\fR option and \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for more details). | |
338 | .PP | |
339 | When translating hashes to \s-1XML\s0, hash keys which have a leading '\-' will be | |
340 | silently skipped. This is the approved method for marking elements of a | |
341 | data structure which should be ignored by \f(CW\*(C`XMLout\*(C'\fR. (Note: If these items | |
342 | were not skipped the key names would be emitted as element or attribute names | |
343 | with a leading '\-' which would not be valid \s-1XML\s0). | |
344 | .Sh "Caveats" | |
345 | .IX Subsection "Caveats" | |
346 | Some care is required in creating data structures which will be passed to | |
347 | \&\f(CW\*(C`XMLout()\*(C'\fR. Hash keys from the data structure will be encoded as either \s-1XML\s0 | |
348 | element names or attribute names. Therefore, you should use hash key names | |
349 | which conform to the relatively strict \s-1XML\s0 naming rules: | |
350 | .PP | |
351 | Names in \s-1XML\s0 must begin with a letter. The remaining characters may be | |
352 | letters, digits, hyphens (\-), underscores (_) or full stops (.). It is also | |
353 | allowable to include one colon (:) in an element name but this should only be | |
354 | used when working with namespaces (\fBXML::Simple\fR can only usefully work with | |
355 | namespaces when teamed with a \s-1SAX\s0 Parser). | |
356 | .PP | |
357 | You can use other punctuation characters in hash values (just not in hash | |
358 | keys) however \fBXML::Simple\fR does not support dumping binary data. | |
359 | .PP | |
360 | If you break these rules, the current implementation of \f(CW\*(C`XMLout()\*(C'\fR will | |
361 | simply emit non-compliant \s-1XML\s0 which will be rejected if you try to read it | |
362 | back in. (A later version of \fBXML::Simple\fR might take a more proactive | |
363 | approach). | |
364 | .PP | |
365 | Note also that although you can nest hashes and arrays to arbitrary levels, | |
366 | circular data structures are not supported and will cause \f(CW\*(C`XMLout()\*(C'\fR to die. | |
367 | .PP | |
368 | If you wish to 'round\-trip' arbitrary data structures from Perl to \s-1XML\s0 and back | |
369 | to Perl, then you should probably disable array folding (using the KeyAttr | |
370 | option) both with \f(CW\*(C`XMLout()\*(C'\fR and with \f(CW\*(C`XMLin()\*(C'\fR. If you still don't get the | |
371 | expected results, you may prefer to use XML::Dumper which is designed for | |
372 | exactly that purpose. | |
373 | .PP | |
374 | Refer to \*(L"\s-1WHERE\s0 \s-1TO\s0 \s-1FROM\s0 \s-1HERE\s0?\*(R" if \f(CW\*(C`XMLout()\*(C'\fR is too simple for your needs. | |
375 | .SH "OPTIONS" | |
376 | .IX Header "OPTIONS" | |
377 | \&\fBXML::Simple\fR supports a number of options (in fact as each release of | |
378 | \&\fBXML::Simple\fR adds more options, the module's claim to the name 'Simple' | |
379 | becomes increasingly tenuous). If you find yourself repeatedly having to | |
380 | specify the same options, you might like to investigate \*(L"\s-1OPTIONAL\s0 \s-1OO\s0 \s-1INTERFACE\s0\*(R" below. | |
381 | .PP | |
382 | If you can't be bothered reading the documentation, refer to | |
383 | \&\*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R" to automatically catch common mistakes. | |
384 | .PP | |
385 | Because there are so many options, it's hard for new users to know which ones | |
386 | are important, so here are the two you really need to know about: | |
387 | .IP "\(bu" 4 | |
388 | check out \f(CW\*(C`ForceArray\*(C'\fR because you'll almost certainly want to turn it on | |
389 | .IP "\(bu" 4 | |
390 | make sure you know what the \f(CW\*(C`KeyAttr\*(C'\fR option does and what its default value is | |
391 | because it may surprise you otherwise (note in particular that 'KeyAttr' | |
392 | affects both \f(CW\*(C`XMLin\*(C'\fR and \f(CW\*(C`XMLout\*(C'\fR) | |
393 | .PP | |
394 | The option name headings below have a trailing 'comment' \- a hash followed by | |
395 | two pieces of metadata: | |
396 | .IP "\(bu" 4 | |
397 | Options are marked with '\fIin\fR' if they are recognised by \f(CW\*(C`XMLin()\*(C'\fR and | |
398 | \&'\fIout\fR' if they are recognised by \f(CW\*(C`XMLout()\*(C'\fR. | |
399 | .IP "\(bu" 4 | |
400 | Each option is also flagged to indicate whether it is: | |
401 | .Sp | |
402 | .Vb 7 | |
403 | \& 'important' - don't use the module until you understand this one | |
404 | \& 'handy' - you can skip this on the first time through | |
405 | \& 'advanced' - you can skip this on the second time through | |
406 | \& 'SAX only' - don't worry about this unless you're using SAX (or | |
407 | \& alternatively if you need this, you also need SAX) | |
408 | \& 'seldom used' - you'll probably never use this unless you were the | |
409 | \& person that requested the feature | |
410 | .Ve | |
411 | .PP | |
412 | The options are listed alphabetically: | |
413 | .PP | |
414 | Note: option names are no longer case sensitive so you can use the mixed case | |
415 | versions shown here; all lower case as required by versions 2.03 and earlier; | |
416 | or you can add underscores between the words (eg: key_attr). | |
417 | .Sh "AttrIndent => 1 \fI# out \- handy\fP" | |
418 | .IX Subsection "AttrIndent => 1 # out - handy" | |
419 | When you are using \f(CW\*(C`XMLout()\*(C'\fR, enable this option to have attributes printed | |
420 | one-per-line with sensible indentation rather than all on one line. | |
421 | .Sh "Cache => [ cache schemes ] \fI# in \- advanced\fP" | |
422 | .IX Subsection "Cache => [ cache schemes ] # in - advanced" | |
423 | Because loading the \fBXML::Parser\fR module and parsing an \s-1XML\s0 file can consume a | |
424 | significant number of \s-1CPU\s0 cycles, it is often desirable to cache the output of | |
425 | \&\f(CW\*(C`XMLin()\*(C'\fR for later reuse. | |
426 | .PP | |
427 | When parsing from a named file, \fBXML::Simple\fR supports a number of caching | |
428 | schemes. The 'Cache' option may be used to specify one or more schemes (using | |
429 | an anonymous array). Each scheme will be tried in turn in the hope of finding | |
430 | a cached pre-parsed representation of the \s-1XML\s0 file. If no cached copy is | |
431 | found, the file will be parsed and the first cache scheme in the list will be | |
432 | used to save a copy of the results. The following cache schemes have been | |
433 | implemented: | |
434 | .IP "storable" 4 | |
435 | .IX Item "storable" | |
436 | Utilises \fBStorable.pm\fR to read/write a cache file with the same name as the | |
437 | \&\s-1XML\s0 file but with the extension .stor | |
438 | .IP "memshare" 4 | |
439 | .IX Item "memshare" | |
440 | When a file is first parsed, a copy of the resulting data structure is retained | |
441 | in memory in the \fBXML::Simple\fR module's namespace. Subsequent calls to parse | |
442 | the same file will return a reference to this structure. This cached version | |
443 | will persist only for the life of the Perl interpreter (which in the case of | |
444 | mod_perl for example, may be some significant time). | |
445 | .Sp | |
446 | Because each caller receives a reference to the same data structure, a change | |
447 | made by one caller will be visible to all. For this reason, the reference | |
448 | returned should be treated as read\-only. | |
449 | .IP "memcopy" 4 | |
450 | .IX Item "memcopy" | |
451 | This scheme works identically to 'memshare' (above) except that each caller | |
452 | receives a reference to a new data structure which is a copy of the cached | |
453 | version. Copying the data structure will add a little processing overhead, | |
454 | therefore this scheme should only be used where the caller intends to modify | |
455 | the data structure (or wishes to protect itself from others who might). This | |
456 | scheme uses \fBStorable.pm\fR to perform the copy. | |
457 | .PP | |
458 | Warning! The memory-based caching schemes compare the timestamp on the file to | |
459 | the time when it was last parsed. If the file is stored on an \s-1NFS\s0 filesystem | |
460 | (or other network share) and the clock on the file server is not exactly | |
461 | synchronised with the clock where your script is run, updates to the source \s-1XML\s0 | |
462 | file may appear to be ignored. | |
463 | .Sh "ContentKey => 'keyname' \fI# in+out \- seldom used\fP" | |
464 | .IX Subsection "ContentKey => 'keyname' # in+out - seldom used" | |
465 | When text content is parsed to a hash value, this option let's you specify a | |
466 | name for the hash key to override the default 'content'. So for example: | |
467 | .PP | |
468 | .Vb 1 | |
469 | \& XMLin('<opt one="1">Text</opt>', ContentKey => 'text') | |
470 | .Ve | |
471 | .PP | |
472 | will parse to: | |
473 | .PP | |
474 | .Vb 1 | |
475 | \& { 'one' => 1, 'text' => 'Text' } | |
476 | .Ve | |
477 | .PP | |
478 | instead of: | |
479 | .PP | |
480 | .Vb 1 | |
481 | \& { 'one' => 1, 'content' => 'Text' } | |
482 | .Ve | |
483 | .PP | |
484 | \&\f(CW\*(C`XMLout()\*(C'\fR will also honour the value of this option when converting a hashref | |
485 | to \s-1XML\s0. | |
486 | .PP | |
487 | You can also prefix your selected key name with a '\-' character to have | |
488 | \&\f(CW\*(C`XMLin()\*(C'\fR try a little harder to eliminate unnecessary 'content' keys after | |
489 | array folding. For example: | |
490 | .PP | |
491 | .Vb 6 | |
492 | \& XMLin( | |
493 | \& '<opt><item name="one">First</item><item name="two">Second</item></opt>', | |
494 | \& KeyAttr => {item => 'name'}, | |
495 | \& ForceArray => [ 'item' ], | |
496 | \& ContentKey => '-content' | |
497 | \& ) | |
498 | .Ve | |
499 | .PP | |
500 | will parse to: | |
501 | .PP | |
502 | .Vb 6 | |
503 | \& { | |
504 | \& 'item' => { | |
505 | \& 'one' => 'First' | |
506 | \& 'two' => 'Second' | |
507 | \& } | |
508 | \& } | |
509 | .Ve | |
510 | .PP | |
511 | rather than this (without the '\-'): | |
512 | .PP | |
513 | .Vb 6 | |
514 | \& { | |
515 | \& 'item' => { | |
516 | \& 'one' => { 'content' => 'First' } | |
517 | \& 'two' => { 'content' => 'Second' } | |
518 | \& } | |
519 | \& } | |
520 | .Ve | |
521 | .Sh "DataHandler => code_ref \fI# in \- \s-1SAX\s0 only\fP" | |
522 | .IX Subsection "DataHandler => code_ref # in - SAX only" | |
523 | When you use an \fBXML::Simple\fR object as a \s-1SAX\s0 handler, it will return a | |
524 | \&'simple tree' data structure in the same format as \f(CW\*(C`XMLin()\*(C'\fR would return. If | |
525 | this option is set (to a subroutine reference), then when the tree is built the | |
526 | subroutine will be called and passed two arguments: a reference to the | |
527 | \&\fBXML::Simple\fR object and a reference to the data tree. The return value from | |
528 | the subroutine will be returned to the \s-1SAX\s0 driver. (See \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for | |
529 | more details). | |
530 | .Sh "ForceArray => 1 \fI# in \- important\fP" | |
531 | .IX Subsection "ForceArray => 1 # in - important" | |
532 | This option should be set to '1' to force nested elements to be represented | |
533 | as arrays even when there is only one. Eg, with ForceArray enabled, this | |
534 | \&\s-1XML:\s0 | |
535 | .PP | |
536 | .Vb 3 | |
537 | \& <opt> | |
538 | \& <name>value</name> | |
539 | \& </opt> | |
540 | .Ve | |
541 | .PP | |
542 | would parse to this: | |
543 | .PP | |
544 | .Vb 5 | |
545 | \& { | |
546 | \& 'name' => [ | |
547 | \& 'value' | |
548 | \& ] | |
549 | \& } | |
550 | .Ve | |
551 | .PP | |
552 | instead of this (the default): | |
553 | .PP | |
554 | .Vb 3 | |
555 | \& { | |
556 | \& 'name' => 'value' | |
557 | \& } | |
558 | .Ve | |
559 | .PP | |
560 | This option is especially useful if the data structure is likely to be written | |
561 | back out as \s-1XML\s0 and the default behaviour of rolling single nested elements up | |
562 | into attributes is not desirable. | |
563 | .PP | |
564 | If you are using the array folding feature, you should almost certainly enable | |
565 | this option. If you do not, single nested elements will not be parsed to | |
566 | arrays and therefore will not be candidates for folding to a hash. (Given that | |
567 | the default value of 'KeyAttr' enables array folding, the default value of this | |
568 | option should probably also have been enabled too \- sorry). | |
569 | .Sh "ForceArray => [ names ] \fI# in \- important\fP" | |
570 | .IX Subsection "ForceArray => [ names ] # in - important" | |
571 | This alternative (and preferred) form of the 'ForceArray' option allows you to | |
572 | specify a list of element names which should always be forced into an array | |
573 | representation, rather than the 'all or nothing' approach above. | |
574 | .PP | |
575 | It is also possible (since version 2.05) to include compiled regular | |
576 | expressions in the list \- any element names which match the pattern will be | |
577 | forced to arrays. If the list contains only a single regex, then it is not | |
578 | necessary to enclose it in an arrayref. Eg: | |
579 | .PP | |
580 | .Vb 1 | |
581 | \& ForceArray => qr/_list$/ | |
582 | .Ve | |
583 | .Sh "ForceContent => 1 \fI# in \- seldom used\fP" | |
584 | .IX Subsection "ForceContent => 1 # in - seldom used" | |
585 | When \f(CW\*(C`XMLin()\*(C'\fR parses elements which have text content as well as attributes, | |
586 | the text content must be represented as a hash value rather than a simple | |
587 | scalar. This option allows you to force text content to always parse to | |
588 | a hash value even when there are no attributes. So for example: | |
589 | .PP | |
590 | .Vb 1 | |
591 | \& XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) | |
592 | .Ve | |
593 | .PP | |
594 | will parse to: | |
595 | .PP | |
596 | .Vb 4 | |
597 | \& { | |
598 | \& 'x' => { 'content' => 'text1' }, | |
599 | \& 'y' => { 'a' => 2, 'content' => 'text2' } | |
600 | \& } | |
601 | .Ve | |
602 | .PP | |
603 | instead of: | |
604 | .PP | |
605 | .Vb 4 | |
606 | \& { | |
607 | \& 'x' => 'text1', | |
608 | \& 'y' => { 'a' => 2, 'content' => 'text2' } | |
609 | \& } | |
610 | .Ve | |
611 | .Sh "GroupTags => { grouping tag => grouped tag } \fI# in+out \- handy\fP" | |
612 | .IX Subsection "GroupTags => { grouping tag => grouped tag } # in+out - handy" | |
613 | You can use this option to eliminate extra levels of indirection in your Perl | |
614 | data structure. For example this \s-1XML:\s0 | |
615 | .PP | |
616 | .Vb 7 | |
617 | \& <opt> | |
618 | \& <searchpath> | |
619 | \& <dir>/usr/bin</dir> | |
620 | \& <dir>/usr/local/bin</dir> | |
621 | \& <dir>/usr/X11/bin</dir> | |
622 | \& </searchpath> | |
623 | \& </opt> | |
624 | .Ve | |
625 | .PP | |
626 | Would normally be read into a structure like this: | |
627 | .PP | |
628 | .Vb 5 | |
629 | \& { | |
630 | \& searchpath => { | |
631 | \& dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
632 | \& } | |
633 | \& } | |
634 | .Ve | |
635 | .PP | |
636 | But when read in with the appropriate value for 'GroupTags': | |
637 | .PP | |
638 | .Vb 1 | |
639 | \& my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); | |
640 | .Ve | |
641 | .PP | |
642 | It will return this simpler structure: | |
643 | .PP | |
644 | .Vb 3 | |
645 | \& { | |
646 | \& searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
647 | \& } | |
648 | .Ve | |
649 | .PP | |
650 | The grouping element (\f(CW\*(C`<searchpath>\*(C'\fR in the example) must not contain any | |
651 | attributes or elements other than the grouped element. | |
652 | .PP | |
653 | You can specify multiple 'grouping element' to 'grouped element' mappings in | |
654 | the same hashref. If this option is combined with \f(CW\*(C`KeyAttr\*(C'\fR, the array | |
655 | folding will occur first and then the grouped element names will be eliminated. | |
656 | .PP | |
657 | \&\f(CW\*(C`XMLout\*(C'\fR will also use the grouptag mappings to re-introduce the tags around | |
658 | the grouped elements. Beware though that this will occur in all places that | |
659 | the 'grouping tag' name occurs \- you probably don't want to use the same name | |
660 | for elements as well as attributes. | |
661 | .Sh "Handler => object_ref \fI# out \- \s-1SAX\s0 only\fP" | |
662 | .IX Subsection "Handler => object_ref # out - SAX only" | |
663 | Use the 'Handler' option to have \f(CW\*(C`XMLout()\*(C'\fR generate \s-1SAX\s0 events rather than | |
664 | returning a string of \s-1XML\s0. For more details see \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" below. | |
665 | .PP | |
666 | Note: the current implementation of this option generates a string of \s-1XML\s0 | |
667 | and uses a \s-1SAX\s0 parser to translate it into \s-1SAX\s0 events. The normal encoding | |
668 | rules apply here \- your data must be \s-1UTF8\s0 encoded unless you specify an | |
669 | alternative encoding via the 'XMLDecl' option; and by the time the data reaches | |
670 | the handler object, it will be in \s-1UTF8\s0 form regardless of the encoding you | |
671 | supply. A future implementation of this option may generate the events | |
672 | directly. | |
673 | .Sh "KeepRoot => 1 \fI# in+out \- handy\fP" | |
674 | .IX Subsection "KeepRoot => 1 # in+out - handy" | |
675 | In its attempt to return a data structure free of superfluous detail and | |
676 | unnecessary levels of indirection, \f(CW\*(C`XMLin()\*(C'\fR normally discards the root | |
677 | element name. Setting the 'KeepRoot' option to '1' will cause the root element | |
678 | name to be retained. So after executing this code: | |
679 | .PP | |
680 | .Vb 1 | |
681 | \& $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) | |
682 | .Ve | |
683 | .PP | |
684 | You'll be able to reference the tempdir as | |
685 | \&\f(CW\*(C`$config\->{config}\->{tempdir}\*(C'\fR instead of the default | |
686 | \&\f(CW\*(C`$config\->{tempdir}\*(C'\fR. | |
687 | .PP | |
688 | Similarly, setting the 'KeepRoot' option to '1' will tell \f(CW\*(C`XMLout()\*(C'\fR that the | |
689 | data structure already contains a root element name and it is not necessary to | |
690 | add another. | |
691 | .Sh "KeyAttr => [ list ] \fI# in+out \- important\fP" | |
692 | .IX Subsection "KeyAttr => [ list ] # in+out - important" | |
693 | This option controls the 'array folding' feature which translates nested | |
694 | elements from an array to a hash. It also controls the 'unfolding' of hashes | |
695 | to arrays. | |
696 | .PP | |
697 | For example, this \s-1XML:\s0 | |
698 | .PP | |
699 | .Vb 4 | |
700 | \& <opt> | |
701 | \& <user login="grep" fullname="Gary R Epstein" /> | |
702 | \& <user login="stty" fullname="Simon T Tyson" /> | |
703 | \& </opt> | |
704 | .Ve | |
705 | .PP | |
706 | would, by default, parse to this: | |
707 | .PP | |
708 | .Vb 12 | |
709 | \& { | |
710 | \& 'user' => [ | |
711 | \& { | |
712 | \& 'login' => 'grep', | |
713 | \& 'fullname' => 'Gary R Epstein' | |
714 | \& }, | |
715 | \& { | |
716 | \& 'login' => 'stty', | |
717 | \& 'fullname' => 'Simon T Tyson' | |
718 | \& } | |
719 | \& ] | |
720 | \& } | |
721 | .Ve | |
722 | .PP | |
723 | If the option 'KeyAttr => \*(L"login\*(R"' were used to specify that the 'login' | |
724 | attribute is a key, the same \s-1XML\s0 would parse to: | |
725 | .PP | |
726 | .Vb 10 | |
727 | \& { | |
728 | \& 'user' => { | |
729 | \& 'stty' => { | |
730 | \& 'fullname' => 'Simon T Tyson' | |
731 | \& }, | |
732 | \& 'grep' => { | |
733 | \& 'fullname' => 'Gary R Epstein' | |
734 | \& } | |
735 | \& } | |
736 | \& } | |
737 | .Ve | |
738 | .PP | |
739 | The key attribute names should be supplied in an arrayref if there is more | |
740 | than one. \f(CW\*(C`XMLin()\*(C'\fR will attempt to match attribute names in the order | |
741 | supplied. \f(CW\*(C`XMLout()\*(C'\fR will use the first attribute name supplied when | |
742 | \&'unfolding' a hash into an array. | |
743 | .PP | |
744 | Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do | |
745 | not want folding on input or unfolding on output you must setting this option | |
746 | to an empty list to disable the feature. | |
747 | .PP | |
748 | Note 2: If you wish to use this option, you should also enable the | |
749 | \&\f(CW\*(C`ForceArray\*(C'\fR option. Without 'ForceArray', a single nested element will be | |
750 | rolled up into a scalar rather than an array and therefore will not be folded | |
751 | (since only arrays get folded). | |
752 | .Sh "KeyAttr => { list } \fI# in+out \- important\fP" | |
753 | .IX Subsection "KeyAttr => { list } # in+out - important" | |
754 | This alternative (and preferred) method of specifiying the key attributes | |
755 | allows more fine grained control over which elements are folded and on which | |
756 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause | |
757 | any package elements to be folded on the 'id' attribute. No other elements | |
758 | which have an 'id' attribute will be folded at all. | |
759 | .PP | |
760 | Note: \f(CW\*(C`XMLin()\*(C'\fR will generate a warning (or a fatal error in \*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R") | |
761 | if this syntax is used and an element which does not have the specified key | |
762 | attribute is encountered (eg: a 'package' element without an 'id' attribute, to | |
763 | use the example above). Warnings will only be generated if \fB\-w\fR is in force. | |
764 | .PP | |
765 | Two further variations are made possible by prefixing a '+' or a '\-' character | |
766 | to the attribute name: | |
767 | .PP | |
768 | The option 'KeyAttr => { user => \*(L"+login\*(R" }' will cause this \s-1XML:\s0 | |
769 | .PP | |
770 | .Vb 4 | |
771 | \& <opt> | |
772 | \& <user login="grep" fullname="Gary R Epstein" /> | |
773 | \& <user login="stty" fullname="Simon T Tyson" /> | |
774 | \& </opt> | |
775 | .Ve | |
776 | .PP | |
777 | to parse to this data structure: | |
778 | .PP | |
779 | .Vb 12 | |
780 | \& { | |
781 | \& 'user' => { | |
782 | \& 'stty' => { | |
783 | \& 'fullname' => 'Simon T Tyson', | |
784 | \& 'login' => 'stty' | |
785 | \& }, | |
786 | \& 'grep' => { | |
787 | \& 'fullname' => 'Gary R Epstein', | |
788 | \& 'login' => 'grep' | |
789 | \& } | |
790 | \& } | |
791 | \& } | |
792 | .Ve | |
793 | .PP | |
794 | The '+' indicates that the value of the key attribute should be copied rather | |
795 | than moved to the folded hash key. | |
796 | .PP | |
797 | A '\-' prefix would produce this result: | |
798 | .PP | |
799 | .Vb 12 | |
800 | \& { | |
801 | \& 'user' => { | |
802 | \& 'stty' => { | |
803 | \& 'fullname' => 'Simon T Tyson', | |
804 | \& '-login' => 'stty' | |
805 | \& }, | |
806 | \& 'grep' => { | |
807 | \& 'fullname' => 'Gary R Epstein', | |
808 | \& '-login' => 'grep' | |
809 | \& } | |
810 | \& } | |
811 | \& } | |
812 | .Ve | |
813 | .PP | |
814 | As described earlier, \f(CW\*(C`XMLout\*(C'\fR will ignore hash keys starting with a '\-'. | |
815 | .Sh "NoAttr => 1 \fI# in+out \- handy\fP" | |
816 | .IX Subsection "NoAttr => 1 # in+out - handy" | |
817 | When used with \f(CW\*(C`XMLout()\*(C'\fR, the generated \s-1XML\s0 will contain no attributes. | |
818 | All hash key/values will be represented as nested elements instead. | |
819 | .PP | |
820 | When used with \f(CW\*(C`XMLin()\*(C'\fR, any attributes in the \s-1XML\s0 will be ignored. | |
821 | .Sh "NoEscape => 1 \fI# out \- seldom used\fP" | |
822 | .IX Subsection "NoEscape => 1 # out - seldom used" | |
823 | By default, \f(CW\*(C`XMLout()\*(C'\fR will translate the characters '<', '>', '&' and | |
824 | \&'"' to '<', '>', '&' and '"' respectively. Use this option to | |
825 | suppress escaping (presumably because you've already escaped the data in some | |
826 | more sophisticated manner). | |
827 | .Sh "NoIndent => 1 \fI# out \- seldom used\fP" | |
828 | .IX Subsection "NoIndent => 1 # out - seldom used" | |
829 | Set this option to 1 to disable \f(CW\*(C`XMLout()\*(C'\fR's default 'pretty printing' mode. | |
830 | With this option enabled, the \s-1XML\s0 output will all be on one line (unless there | |
831 | are newlines in the data) \- this may be easier for downstream processing. | |
832 | .Sh "NoSort => 1 \fI# out \- seldom used\fP" | |
833 | .IX Subsection "NoSort => 1 # out - seldom used" | |
834 | Newer versions of XML::Simple sort elements and attributes alphabetically (*), | |
835 | by default. Enable this option to suppress the sorting \- possibly for | |
836 | backwards compatibility. | |
837 | .PP | |
838 | * Actually, sorting is alphabetical but 'key' attribute or element names (as in | |
839 | \&'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements | |
840 | are sorted alphabetically by the value of the key field. | |
841 | .Sh "NormaliseSpace => 0 | 1 | 2 \fI# in \- handy\fP" | |
842 | .IX Subsection "NormaliseSpace => 0 | 1 | 2 # in - handy" | |
843 | This option controls how whitespace in text content is handled. Recognised | |
844 | values for the option are: | |
845 | .IP "\(bu" 4 | |
846 | 0 = (default) whitespace is passed through unaltered (except of course for the | |
847 | normalisation of whitespace in attribute values which is mandated by the \s-1XML\s0 | |
848 | recommendation) | |
849 | .IP "\(bu" 4 | |
850 | 1 = whitespace is normalised in any value used as a hash key (normalising means | |
851 | removing leading and trailing whitespace and collapsing sequences of whitespace | |
852 | characters to a single space) | |
853 | .IP "\(bu" 4 | |
854 | 2 = whitespace is normalised in all text content | |
855 | .PP | |
856 | Note: you can spell this option with a 'z' if that is more natural for you. | |
857 | .Sh "NSExpand => 1 \fI# in+out handy \- \s-1SAX\s0 only\fP" | |
858 | .IX Subsection "NSExpand => 1 # in+out handy - SAX only" | |
859 | This option controls namespace expansion \- the translation of element and | |
860 | attribute names of the form 'prefix:name' to '{uri}name'. For example the | |
861 | element name 'xsl:template' might be expanded to: | |
862 | \&'{http://www.w3.org/1999/XSL/Transform}template'. | |
863 | .PP | |
864 | By default, \f(CW\*(C`XMLin()\*(C'\fR will return element names and attribute names exactly as | |
865 | they appear in the \s-1XML\s0. Setting this option to 1 will cause all element and | |
866 | attribute names to be expanded to include their namespace prefix. | |
867 | .PP | |
868 | \&\fINote: You must be using a \s-1SAX\s0 parser for this option to work (ie: it does not | |
869 | work with XML::Parser)\fR. | |
870 | .PP | |
871 | This option also controls whether \f(CW\*(C`XMLout()\*(C'\fR performs the reverse translation | |
872 | from '{uri}name' back to 'prefix:name'. The default is no translation. If | |
873 | your data contains expanded names, you should set this option to 1 otherwise | |
874 | \&\f(CW\*(C`XMLout\*(C'\fR will emit \s-1XML\s0 which is not well formed. | |
875 | .PP | |
876 | \&\fINote: You must have the XML::NamespaceSupport module installed if you want | |
877 | \&\f(CI\*(C`XMLout()\*(C'\fI to translate URIs back to prefixes\fR. | |
878 | .Sh "NumericEscape => 0 | 1 | 2 \fI# out \- handy\fP" | |
879 | .IX Subsection "NumericEscape => 0 | 1 | 2 # out - handy" | |
880 | Use this option to have 'high' (non\-ASCII) characters in your Perl data | |
881 | structure converted to numeric entities (eg: €) in the \s-1XML\s0 output. Three | |
882 | levels are possible: | |
883 | .PP | |
884 | 0 \- default: no numeric escaping (\s-1OK\s0 if you're writing out \s-1UTF8\s0) | |
885 | .PP | |
886 | 1 \- only characters above 0xFF are escaped (ie: characters in the 0x80\-FF range are not escaped), possibly useful with \s-1ISO8859\-1\s0 output | |
887 | .PP | |
888 | 2 \- all characters above 0x7F are escaped (good for plain \s-1ASCII\s0 output) | |
889 | .Sh "OutputFile => <file specifier> \fI# out \- handy\fP" | |
890 | .IX Subsection "OutputFile => <file specifier> # out - handy" | |
891 | The default behaviour of \f(CW\*(C`XMLout()\*(C'\fR is to return the \s-1XML\s0 as a string. If you | |
892 | wish to write the \s-1XML\s0 to a file, simply supply the filename using the | |
893 | \&'OutputFile' option. | |
894 | .PP | |
895 | This option also accepts an \s-1IO\s0 handle object \- especially useful in Perl 5.8.0 | |
896 | and later for output using an encoding other than \s-1UTF\-8\s0, eg: | |
897 | .PP | |
898 | .Vb 2 | |
899 | \& open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; | |
900 | \& XMLout($ref, OutputFile => $fh); | |
901 | .Ve | |
902 | .PP | |
903 | Note, XML::Simple does not require that the object you pass in to the | |
904 | OutputFile option inherits from IO::Handle \- it simply assumes the object | |
905 | supports a \f(CW\*(C`print\*(C'\fR method. | |
906 | .Sh "ParserOpts => [ XML::Parser Options ] \fI# in \- don't use this\fP" | |
907 | .IX Subsection "ParserOpts => [ XML::Parser Options ] # in - don't use this" | |
908 | \&\fINote: This option is now officially deprecated. If you find it useful, email | |
909 | the author with an example of what you use it for. Do not use this option to | |
910 | set the ProtocolEncoding, that's just plain wrong \- fix the \s-1XML\s0\fR. | |
911 | .PP | |
912 | This option allows you to pass parameters to the constructor of the underlying | |
913 | XML::Parser object (which of course assumes you're not using \s-1SAX\s0). | |
914 | .Sh "RootName => 'string' \fI# out \- handy\fP" | |
915 | .IX Subsection "RootName => 'string' # out - handy" | |
916 | By default, when \f(CW\*(C`XMLout()\*(C'\fR generates \s-1XML\s0, the root element will be named | |
917 | \&'opt'. This option allows you to specify an alternative name. | |
918 | .PP | |
919 | Specifying either undef or the empty string for the RootName option will | |
920 | produce \s-1XML\s0 with no root elements. In most cases the resulting \s-1XML\s0 fragment | |
921 | will not be 'well formed' and therefore could not be read back in by \f(CW\*(C`XMLin()\*(C'\fR. | |
922 | Nevertheless, the option has been found to be useful in certain circumstances. | |
923 | .Sh "SearchPath => [ list ] \fI# in \- handy\fP" | |
924 | .IX Subsection "SearchPath => [ list ] # in - handy" | |
925 | If you pass \f(CW\*(C`XMLin()\*(C'\fR a filename, but the filename include no directory | |
926 | component, you can use this option to specify which directories should be | |
927 | searched to locate the file. You might use this option to search first in the | |
928 | user's home directory, then in a global directory such as /etc. | |
929 | .PP | |
930 | If a filename is provided to \f(CW\*(C`XMLin()\*(C'\fR but SearchPath is not defined, the | |
931 | file is assumed to be in the current directory. | |
932 | .PP | |
933 | If the first parameter to \f(CW\*(C`XMLin()\*(C'\fR is undefined, the default SearchPath | |
934 | will contain only the directory in which the script itself is located. | |
935 | Otherwise the default SearchPath will be empty. | |
936 | .Sh "SuppressEmpty => 1 | '' | undef \fI# in+out \- handy\fP" | |
937 | .IX Subsection "SuppressEmpty => 1 | '' | undef # in+out - handy" | |
938 | This option controls what \f(CW\*(C`XMLin()\*(C'\fR should do with empty elements (no | |
939 | attributes and no content). The default behaviour is to represent them as | |
940 | empty hashes. Setting this option to a true value (eg: 1) will cause empty | |
941 | elements to be skipped altogether. Setting the option to 'undef' or the empty | |
942 | string will cause empty elements to be represented as the undefined value or | |
943 | the empty string respectively. The latter two alternatives are a little | |
944 | easier to test for in your code than a hash with no keys. | |
945 | .PP | |
946 | The option also controls what \f(CW\*(C`XMLout()\*(C'\fR does with undefined values. Setting | |
947 | the option to undef causes undefined values to be output as empty elements | |
948 | (rather than empty attributes), it also suppresses the generation of warnings | |
949 | about undefined values. Setting the option to a true value (eg: 1) causes | |
950 | undefined values to be skipped altogether on output. | |
951 | .Sh "ValueAttr => [ names ] \fI# in \- handy\fP" | |
952 | .IX Subsection "ValueAttr => [ names ] # in - handy" | |
953 | Use this option to deal elements which always have a single attribute and no | |
954 | content. Eg: | |
955 | .PP | |
956 | .Vb 4 | |
957 | \& <opt> | |
958 | \& <colour value="red" /> | |
959 | \& <size value="XXL" /> | |
960 | \& </opt> | |
961 | .Ve | |
962 | .PP | |
963 | Setting \f(CW\*(C`ValueAttr => [ 'value' ]\*(C'\fR will cause the above \s-1XML\s0 to parse to: | |
964 | .PP | |
965 | .Vb 4 | |
966 | \& { | |
967 | \& colour => 'red', | |
968 | \& size => 'XXL' | |
969 | \& } | |
970 | .Ve | |
971 | .PP | |
972 | instead of this (the default): | |
973 | .PP | |
974 | .Vb 4 | |
975 | \& { | |
976 | \& colour => { value => 'red' }, | |
977 | \& size => { value => 'XXL' } | |
978 | \& } | |
979 | .Ve | |
980 | .PP | |
981 | Note: This form of the ValueAttr option is not compatible with \f(CW\*(C`XMLout()\*(C'\fR \- | |
982 | since the attribute name is discarded at parse time, the original \s-1XML\s0 cannot be | |
983 | reconstructed. | |
984 | .Sh "ValueAttr => { element => attribute, ... } \fI# in+out \- handy\fP" | |
985 | .IX Subsection "ValueAttr => { element => attribute, ... } # in+out - handy" | |
986 | This (preferred) form of the ValueAttr option requires you to specify both | |
987 | the element and the attribute names. This is not only safer, it also allows | |
988 | the original \s-1XML\s0 to be reconstructed by \f(CW\*(C`XMLout()\*(C'\fR. | |
989 | .PP | |
990 | Note: You probably don't want to use this option and the NoAttr option at the | |
991 | same time. | |
992 | .Sh "Variables => { name => value } \fI# in \- handy\fP" | |
993 | .IX Subsection "Variables => { name => value } # in - handy" | |
994 | This option allows variables in the \s-1XML\s0 to be expanded when the file is read. | |
995 | (there is no facility for putting the variable names back if you regenerate | |
996 | \&\s-1XML\s0 using \f(CW\*(C`XMLout\*(C'\fR). | |
997 | .PP | |
998 | A 'variable' is any text of the form \f(CW\*(C`${name}\*(C'\fR which occurs in an attribute | |
999 | value or in the text content of an element. If 'name' matches a key in the | |
1000 | supplied hashref, \f(CW\*(C`${name}\*(C'\fR will be replaced with the corresponding value from | |
1001 | the hashref. If no matching key is found, the variable will not be replaced. | |
1002 | Names must match the regex: \f(CW\*(C`[\ew.]+\*(C'\fR (ie: only 'word' characters and dots are | |
1003 | allowed). | |
1004 | .Sh "VarAttr => 'attr_name' \fI# in \- handy\fP" | |
1005 | .IX Subsection "VarAttr => 'attr_name' # in - handy" | |
1006 | In addition to the variables defined using \f(CW\*(C`Variables\*(C'\fR, this option allows | |
1007 | variables to be defined in the \s-1XML\s0. A variable definition consists of an | |
1008 | element with an attribute called 'attr_name' (the value of the \f(CW\*(C`VarAttr\*(C'\fR | |
1009 | option). The value of the attribute will be used as the variable name and the | |
1010 | text content of the element will be used as the value. A variable defined in | |
1011 | this way will override a variable defined using the \f(CW\*(C`Variables\*(C'\fR option. For | |
1012 | example: | |
1013 | .PP | |
1014 | .Vb 7 | |
1015 | \& XMLin( '<opt> | |
1016 | \& <dir name="prefix">/usr/local/apache</dir> | |
1017 | \& <dir name="exec_prefix">${prefix}</dir> | |
1018 | \& <dir name="bindir">${exec_prefix}/bin</dir> | |
1019 | \& </opt>', | |
1020 | \& VarAttr => 'name', ContentKey => '-content' | |
1021 | \& ); | |
1022 | .Ve | |
1023 | .PP | |
1024 | produces the following data structure: | |
1025 | .PP | |
1026 | .Vb 7 | |
1027 | \& { | |
1028 | \& dir => { | |
1029 | \& prefix => '/usr/local/apache', | |
1030 | \& exec_prefix => '/usr/local/apache', | |
1031 | \& bindir => '/usr/local/apache/bin', | |
1032 | \& } | |
1033 | \& } | |
1034 | .Ve | |
1035 | .Sh "XMLDecl => 1 or XMLDecl => 'string' \fI# out \- handy\fP" | |
1036 | .IX Subsection "XMLDecl => 1 or XMLDecl => 'string' # out - handy" | |
1037 | If you want the output from \f(CW\*(C`XMLout()\*(C'\fR to start with the optional \s-1XML\s0 | |
1038 | declaration, simply set the option to '1'. The default \s-1XML\s0 declaration is: | |
1039 | .PP | |
1040 | .Vb 1 | |
1041 | \& <?xml version='1.0' standalone='yes'?> | |
1042 | .Ve | |
1043 | .PP | |
1044 | If you want some other string (for example to declare an encoding value), set | |
1045 | the value of this option to the complete string you require. | |
1046 | .SH "OPTIONAL OO INTERFACE" | |
1047 | .IX Header "OPTIONAL OO INTERFACE" | |
1048 | The procedural interface is both simple and convenient however there are a | |
1049 | couple of reasons why you might prefer to use the object oriented (\s-1OO\s0) | |
1050 | interface: | |
1051 | .IP "\(bu" 4 | |
1052 | to define a set of default values which should be used on all subsequent calls | |
1053 | to \f(CW\*(C`XMLin()\*(C'\fR or \f(CW\*(C`XMLout()\*(C'\fR | |
1054 | .IP "\(bu" 4 | |
1055 | to override methods in \fBXML::Simple\fR to provide customised behaviour | |
1056 | .PP | |
1057 | The default values for the options described above are unlikely to suit | |
1058 | everyone. The \s-1OO\s0 interface allows you to effectively override \fBXML::Simple\fR's | |
1059 | defaults with your preferred values. It works like this: | |
1060 | .PP | |
1061 | First create an XML::Simple parser object with your preferred defaults: | |
1062 | .PP | |
1063 | .Vb 1 | |
1064 | \& my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); | |
1065 | .Ve | |
1066 | .PP | |
1067 | then call \f(CW\*(C`XMLin()\*(C'\fR or \f(CW\*(C`XMLout()\*(C'\fR as a method of that object: | |
1068 | .PP | |
1069 | .Vb 2 | |
1070 | \& my $ref = $xs->XMLin($xml); | |
1071 | \& my $xml = $xs->XMLout($ref); | |
1072 | .Ve | |
1073 | .PP | |
1074 | You can also specify options when you make the method calls and these values | |
1075 | will be merged with the values specified when the object was created. Values | |
1076 | specified in a method call take precedence. | |
1077 | .PP | |
1078 | Overriding methods is a more advanced topic but might be useful if for example | |
1079 | you wished to provide an alternative routine for escaping character data (the | |
1080 | escape_value method) or for building the initial parse tree (the build_tree | |
1081 | method). | |
1082 | .PP | |
1083 | Note: when called as methods, the \f(CW\*(C`XMLin()\*(C'\fR and \f(CW\*(C`XMLout()\*(C'\fR routines may be | |
1084 | called as \f(CW\*(C`xml_in()\*(C'\fR or \f(CW\*(C`xml_out()\*(C'\fR. The method names are aliased so the | |
1085 | only difference is the aesthetics. | |
1086 | .SH "STRICT MODE" | |
1087 | .IX Header "STRICT MODE" | |
1088 | If you import the \fBXML::Simple\fR routines like this: | |
1089 | .PP | |
1090 | .Vb 1 | |
1091 | \& use XML::Simple qw(:strict); | |
1092 | .Ve | |
1093 | .PP | |
1094 | the following common mistakes will be detected and treated as fatal errors | |
1095 | .IP "\(bu" 4 | |
1096 | Failing to explicitly set the \f(CW\*(C`KeyAttr\*(C'\fR option \- if you can't be bothered | |
1097 | reading about this option, turn it off with: KeyAttr => [ ] | |
1098 | .IP "\(bu" 4 | |
1099 | Failing to explicitly set the \f(CW\*(C`ForceArray\*(C'\fR option \- if you can't be bothered | |
1100 | reading about this option, set it to the safest mode with: ForceArray => 1 | |
1101 | .IP "\(bu" 4 | |
1102 | Setting ForceArray to an array, but failing to list all the elements from the | |
1103 | KeyAttr hash. | |
1104 | .IP "\(bu" 4 | |
1105 | Data error \- KeyAttr is set to say { part => 'partnum' } but the \s-1XML\s0 contains | |
1106 | one or more <part> elements without a 'partnum' attribute (or nested | |
1107 | element). Note: if strict mode is not set but \-w is, this condition triggers a | |
1108 | warning. | |
1109 | .IP "\(bu" 4 | |
1110 | Data error \- as above, but value of key attribute (eg: partnum) is not a | |
1111 | scalar string (due to nested elements etc). This will also trigger a warning | |
1112 | if strict mode is not enabled. | |
1113 | .SH "SAX SUPPORT" | |
1114 | .IX Header "SAX SUPPORT" | |
1115 | From version 1.08_01, \fBXML::Simple\fR includes support for \s-1SAX\s0 (the Simple \s-1API\s0 | |
1116 | for \s-1XML\s0) \- specifically \s-1SAX2\s0. | |
1117 | .PP | |
1118 | In a typical \s-1SAX\s0 application, an \s-1XML\s0 parser (or \s-1SAX\s0 'driver') module generates | |
1119 | \&\s-1SAX\s0 events (start of element, character data, end of element, etc) as it parses | |
1120 | an \s-1XML\s0 document and a 'handler' module processes the events to extract the | |
1121 | required data. This simple model allows for some interesting and powerful | |
1122 | possibilities: | |
1123 | .IP "\(bu" 4 | |
1124 | Applications written to the \s-1SAX\s0 \s-1API\s0 can extract data from huge \s-1XML\s0 documents | |
1125 | without the memory overheads of a \s-1DOM\s0 or tree \s-1API\s0. | |
1126 | .IP "\(bu" 4 | |
1127 | The \s-1SAX\s0 \s-1API\s0 allows for plug and play interchange of parser modules without | |
1128 | having to change your code to fit a new module's \s-1API\s0. A number of \s-1SAX\s0 parsers | |
1129 | are available with capabilities ranging from extreme portability to blazing | |
1130 | performance. | |
1131 | .IP "\(bu" 4 | |
1132 | A \s-1SAX\s0 'filter' module can implement both a handler interface for receiving | |
1133 | data and a generator interface for passing modified data on to a downstream | |
1134 | handler. Filters can be chained together in 'pipelines'. | |
1135 | .IP "\(bu" 4 | |
1136 | One filter module might split a data stream to direct data to two or more | |
1137 | downstream handlers. | |
1138 | .IP "\(bu" 4 | |
1139 | Generating \s-1SAX\s0 events is not the exclusive preserve of \s-1XML\s0 parsing modules. | |
1140 | For example, a module might extract data from a relational database using \s-1DBI\s0 | |
1141 | and pass it on to a \s-1SAX\s0 pipeline for filtering and formatting. | |
1142 | .PP | |
1143 | \&\fBXML::Simple\fR can operate at either end of a \s-1SAX\s0 pipeline. For example, | |
1144 | you can take a data structure in the form of a hashref and pass it into a | |
1145 | \&\s-1SAX\s0 pipeline using the 'Handler' option on \f(CW\*(C`XMLout()\*(C'\fR: | |
1146 | .PP | |
1147 | .Vb 3 | |
1148 | \& use XML::Simple; | |
1149 | \& use Some::SAX::Filter; | |
1150 | \& use XML::SAX::Writer; | |
1151 | .Ve | |
1152 | .PP | |
1153 | .Vb 3 | |
1154 | \& my $ref = { | |
1155 | \& .... # your data here | |
1156 | \& }; | |
1157 | .Ve | |
1158 | .PP | |
1159 | .Vb 4 | |
1160 | \& my $writer = XML::SAX::Writer->new(); | |
1161 | \& my $filter = Some::SAX::Filter->new(Handler => $writer); | |
1162 | \& my $simple = XML::Simple->new(Handler => $filter); | |
1163 | \& $simple->XMLout($ref); | |
1164 | .Ve | |
1165 | .PP | |
1166 | You can also put \fBXML::Simple\fR at the opposite end of the pipeline to take | |
1167 | advantage of the simple 'tree' data structure once the relevant data has been | |
1168 | isolated through filtering: | |
1169 | .PP | |
1170 | .Vb 3 | |
1171 | \& use XML::SAX; | |
1172 | \& use Some::SAX::Filter; | |
1173 | \& use XML::Simple; | |
1174 | .Ve | |
1175 | .PP | |
1176 | .Vb 3 | |
1177 | \& my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); | |
1178 | \& my $filter = Some::SAX::Filter->new(Handler => $simple); | |
1179 | \& my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | |
1180 | .Ve | |
1181 | .PP | |
1182 | .Vb 1 | |
1183 | \& my $ref = $parser->parse_uri('some_huge_file.xml'); | |
1184 | .Ve | |
1185 | .PP | |
1186 | .Vb 1 | |
1187 | \& print $ref->{part}->{'555-1234'}; | |
1188 | .Ve | |
1189 | .PP | |
1190 | You can build a filter by using an XML::Simple object as a handler and setting | |
1191 | its DataHandler option to point to a routine which takes the resulting tree, | |
1192 | modifies it and sends it off as \s-1SAX\s0 events to a downstream handler: | |
1193 | .PP | |
1194 | .Vb 5 | |
1195 | \& my $writer = XML::SAX::Writer->new(); | |
1196 | \& my $filter = XML::Simple->new( | |
1197 | \& DataHandler => sub { | |
1198 | \& my $simple = shift; | |
1199 | \& my $data = shift; | |
1200 | .Ve | |
1201 | .PP | |
1202 | .Vb 1 | |
1203 | \& # Modify $data here | |
1204 | .Ve | |
1205 | .PP | |
1206 | .Vb 4 | |
1207 | \& $simple->XMLout($data, Handler => $writer); | |
1208 | \& } | |
1209 | \& ); | |
1210 | \& my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | |
1211 | .Ve | |
1212 | .PP | |
1213 | .Vb 1 | |
1214 | \& $parser->parse_uri($filename); | |
1215 | .Ve | |
1216 | .PP | |
1217 | \&\fINote: In this last example, the 'Handler' option was specified in the call to | |
1218 | \&\f(CI\*(C`XMLout()\*(C'\fI but it could also have been specified in the constructor\fR. | |
1219 | .SH "ENVIRONMENT" | |
1220 | .IX Header "ENVIRONMENT" | |
1221 | If you don't care which parser module \fBXML::Simple\fR uses then skip this | |
1222 | section entirely (it looks more complicated than it really is). | |
1223 | .PP | |
1224 | \&\fBXML::Simple\fR will default to using a \fB\s-1SAX\s0\fR parser if one is available or | |
1225 | \&\fBXML::Parser\fR if \s-1SAX\s0 is not available. | |
1226 | .PP | |
1227 | You can dictate which parser module is used by setting either the environment | |
1228 | variable '\s-1XML_SIMPLE_PREFERRED_PARSER\s0' or the package variable | |
1229 | \&\f(CW$XML::Simple::PREFERRED_PARSER\fR to contain the module name. The following rules | |
1230 | are used: | |
1231 | .IP "\(bu" 4 | |
1232 | The package variable takes precedence over the environment variable if both are defined. To force \fBXML::Simple\fR to ignore the environment settings and use | |
1233 | its default rules, you can set the package variable to an empty string. | |
1234 | .IP "\(bu" 4 | |
1235 | If the 'preferred parser' is set to the string 'XML::Parser', then | |
1236 | XML::Parser will be used (or \f(CW\*(C`XMLin()\*(C'\fR will die if XML::Parser is not | |
1237 | installed). | |
1238 | .IP "\(bu" 4 | |
1239 | If the 'preferred parser' is set to some other value, then it is assumed to be | |
1240 | the name of a \s-1SAX\s0 parser module and is passed to XML::SAX::ParserFactory. | |
1241 | If \s-1XML::SAX\s0 is not installed, or the requested parser module is not | |
1242 | installed, then \f(CW\*(C`XMLin()\*(C'\fR will die. | |
1243 | .IP "\(bu" 4 | |
1244 | If the 'preferred parser' is not defined at all (the normal default | |
1245 | state), an attempt will be made to load \s-1XML::SAX\s0. If \s-1XML::SAX\s0 is | |
1246 | installed, then a parser module will be selected according to | |
1247 | XML::SAX::ParserFactory's normal rules (which typically means the last \s-1SAX\s0 | |
1248 | parser installed). | |
1249 | .IP "\(bu" 4 | |
1250 | if the 'preferred parser' is not defined and \fB\s-1XML::SAX\s0\fR is not | |
1251 | installed, then \fBXML::Parser\fR will be used. \f(CW\*(C`XMLin()\*(C'\fR will die if | |
1252 | XML::Parser is not installed. | |
1253 | .PP | |
1254 | Note: The \fB\s-1XML::SAX\s0\fR distribution includes an \s-1XML\s0 parser written entirely in | |
1255 | Perl. It is very portable but it is not very fast. You should consider | |
1256 | installing XML::LibXML or XML::SAX::Expat if they are available for your | |
1257 | platform. | |
1258 | .SH "ERROR HANDLING" | |
1259 | .IX Header "ERROR HANDLING" | |
1260 | The \s-1XML\s0 standard is very clear on the issue of non-compliant documents. An | |
1261 | error in parsing any single element (for example a missing end tag) must cause | |
1262 | the whole document to be rejected. \fBXML::Simple\fR will die with an appropriate | |
1263 | message if it encounters a parsing error. | |
1264 | .PP | |
1265 | If dying is not appropriate for your application, you should arrange to call | |
1266 | \&\f(CW\*(C`XMLin()\*(C'\fR in an eval block and look for errors in $@. eg: | |
1267 | .PP | |
1268 | .Vb 2 | |
1269 | \& my $config = eval { XMLin() }; | |
1270 | \& PopUpMessage($@) if($@); | |
1271 | .Ve | |
1272 | .PP | |
1273 | Note, there is a common misconception that use of \fBeval\fR will significantly | |
1274 | slow down a script. While that may be true when the code being eval'd is in a | |
1275 | string, it is not true of code like the sample above. | |
1276 | .SH "EXAMPLES" | |
1277 | .IX Header "EXAMPLES" | |
1278 | When \f(CW\*(C`XMLin()\*(C'\fR reads the following very simple piece of \s-1XML:\s0 | |
1279 | .PP | |
1280 | .Vb 1 | |
1281 | \& <opt username="testuser" password="frodo"></opt> | |
1282 | .Ve | |
1283 | .PP | |
1284 | it returns the following data structure: | |
1285 | .PP | |
1286 | .Vb 4 | |
1287 | \& { | |
1288 | \& 'username' => 'testuser', | |
1289 | \& 'password' => 'frodo' | |
1290 | \& } | |
1291 | .Ve | |
1292 | .PP | |
1293 | The identical result could have been produced with this alternative \s-1XML:\s0 | |
1294 | .PP | |
1295 | .Vb 1 | |
1296 | \& <opt username="testuser" password="frodo" /> | |
1297 | .Ve | |
1298 | .PP | |
1299 | Or this (although see 'ForceArray' option for variations): | |
1300 | .PP | |
1301 | .Vb 4 | |
1302 | \& <opt> | |
1303 | \& <username>testuser</username> | |
1304 | \& <password>frodo</password> | |
1305 | \& </opt> | |
1306 | .Ve | |
1307 | .PP | |
1308 | Repeated nested elements are represented as anonymous arrays: | |
1309 | .PP | |
1310 | .Vb 9 | |
1311 | \& <opt> | |
1312 | \& <person firstname="Joe" lastname="Smith"> | |
1313 | \& <email>joe@smith.com</email> | |
1314 | \& <email>jsmith@yahoo.com</email> | |
1315 | \& </person> | |
1316 | \& <person firstname="Bob" lastname="Smith"> | |
1317 | \& <email>bob@smith.com</email> | |
1318 | \& </person> | |
1319 | \& </opt> | |
1320 | .Ve | |
1321 | .PP | |
1322 | .Vb 17 | |
1323 | \& { | |
1324 | \& 'person' => [ | |
1325 | \& { | |
1326 | \& 'email' => [ | |
1327 | \& 'joe@smith.com', | |
1328 | \& 'jsmith@yahoo.com' | |
1329 | \& ], | |
1330 | \& 'firstname' => 'Joe', | |
1331 | \& 'lastname' => 'Smith' | |
1332 | \& }, | |
1333 | \& { | |
1334 | \& 'email' => 'bob@smith.com', | |
1335 | \& 'firstname' => 'Bob', | |
1336 | \& 'lastname' => 'Smith' | |
1337 | \& } | |
1338 | \& ] | |
1339 | \& } | |
1340 | .Ve | |
1341 | .PP | |
1342 | Nested elements with a recognised key attribute are transformed (folded) from | |
1343 | an array into a hash keyed on the value of that attribute (see the \f(CW\*(C`KeyAttr\*(C'\fR | |
1344 | option): | |
1345 | .PP | |
1346 | .Vb 5 | |
1347 | \& <opt> | |
1348 | \& <person key="jsmith" firstname="Joe" lastname="Smith" /> | |
1349 | \& <person key="tsmith" firstname="Tom" lastname="Smith" /> | |
1350 | \& <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> | |
1351 | \& </opt> | |
1352 | .Ve | |
1353 | .PP | |
1354 | .Vb 16 | |
1355 | \& { | |
1356 | \& 'person' => { | |
1357 | \& 'jbloggs' => { | |
1358 | \& 'firstname' => 'Joe', | |
1359 | \& 'lastname' => 'Bloggs' | |
1360 | \& }, | |
1361 | \& 'tsmith' => { | |
1362 | \& 'firstname' => 'Tom', | |
1363 | \& 'lastname' => 'Smith' | |
1364 | \& }, | |
1365 | \& 'jsmith' => { | |
1366 | \& 'firstname' => 'Joe', | |
1367 | \& 'lastname' => 'Smith' | |
1368 | \& } | |
1369 | \& } | |
1370 | \& } | |
1371 | .Ve | |
1372 | .PP | |
1373 | The <anon> tag can be used to form anonymous arrays: | |
1374 | .PP | |
1375 | .Vb 6 | |
1376 | \& <opt> | |
1377 | \& <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> | |
1378 | \& <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> | |
1379 | \& <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> | |
1380 | \& <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> | |
1381 | \& </opt> | |
1382 | .Ve | |
1383 | .PP | |
1384 | .Vb 10 | |
1385 | \& { | |
1386 | \& 'head' => [ | |
1387 | \& [ 'Col 1', 'Col 2', 'Col 3' ] | |
1388 | \& ], | |
1389 | \& 'data' => [ | |
1390 | \& [ 'R1C1', 'R1C2', 'R1C3' ], | |
1391 | \& [ 'R2C1', 'R2C2', 'R2C3' ], | |
1392 | \& [ 'R3C1', 'R3C2', 'R3C3' ] | |
1393 | \& ] | |
1394 | \& } | |
1395 | .Ve | |
1396 | .PP | |
1397 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if | |
1398 | the surrounding tags for an \s-1XML\s0 document contain only an anonymous array the | |
1399 | arrayref will be returned directly rather than the usual hashref: | |
1400 | .PP | |
1401 | .Vb 5 | |
1402 | \& <opt> | |
1403 | \& <anon><anon>Col 1</anon><anon>Col 2</anon></anon> | |
1404 | \& <anon><anon>R1C1</anon><anon>R1C2</anon></anon> | |
1405 | \& <anon><anon>R2C1</anon><anon>R2C2</anon></anon> | |
1406 | \& </opt> | |
1407 | .Ve | |
1408 | .PP | |
1409 | .Vb 5 | |
1410 | \& [ | |
1411 | \& [ 'Col 1', 'Col 2' ], | |
1412 | \& [ 'R1C1', 'R1C2' ], | |
1413 | \& [ 'R2C1', 'R2C2' ] | |
1414 | \& ] | |
1415 | .Ve | |
1416 | .PP | |
1417 | Elements which only contain text content will simply be represented as a | |
1418 | scalar. Where an element has both attributes and text content, the element | |
1419 | will be represented as a hashref with the text content in the 'content' key | |
1420 | (see the \f(CW\*(C`ContentKey\*(C'\fR option): | |
1421 | .PP | |
1422 | .Vb 4 | |
1423 | \& <opt> | |
1424 | \& <one>first</one> | |
1425 | \& <two attr="value">second</two> | |
1426 | \& </opt> | |
1427 | .Ve | |
1428 | .PP | |
1429 | .Vb 4 | |
1430 | \& { | |
1431 | \& 'one' => 'first', | |
1432 | \& 'two' => { 'attr' => 'value', 'content' => 'second' } | |
1433 | \& } | |
1434 | .Ve | |
1435 | .PP | |
1436 | Mixed content (elements which contain both text content and nested elements) | |
1437 | will be not be represented in a useful way \- element order and significant | |
1438 | whitespace will be lost. If you need to work with mixed content, then | |
1439 | XML::Simple is not the right tool for your job \- check out the next section. | |
1440 | .SH "WHERE TO FROM HERE?" | |
1441 | .IX Header "WHERE TO FROM HERE?" | |
1442 | \&\fBXML::Simple\fR is able to present a simple \s-1API\s0 because it makes some | |
1443 | assumptions on your behalf. These include: | |
1444 | .IP "\(bu" 4 | |
1445 | You're not interested in text content consisting only of whitespace | |
1446 | .IP "\(bu" 4 | |
1447 | You don't mind that when things get slurped into a hash the order is lost | |
1448 | .IP "\(bu" 4 | |
1449 | You don't want fine-grained control of the formatting of generated \s-1XML\s0 | |
1450 | .IP "\(bu" 4 | |
1451 | You would never use a hash key that was not a legal \s-1XML\s0 element name | |
1452 | .IP "\(bu" 4 | |
1453 | You don't need help converting between different encodings | |
1454 | .PP | |
1455 | In a serious \s-1XML\s0 project, you'll probably outgrow these assumptions fairly | |
1456 | quickly. This section of the document used to offer some advice on chosing a | |
1457 | more powerful option. That advice has now grown into the 'Perl\-XML \s-1FAQ\s0' | |
1458 | document which you can find at: <http://perl\-xml.sourceforge.net/faq/> | |
1459 | .PP | |
1460 | The advice in the \s-1FAQ\s0 boils down to a quick explanation of tree versus | |
1461 | event based parsers and then recommends: | |
1462 | .PP | |
1463 | For event based parsing, use \s-1SAX\s0 (do not set out to write any new code for | |
1464 | XML::Parser's handler \s-1API\s0 \- it is obselete). | |
1465 | .PP | |
1466 | For tree-based parsing, you could choose between the 'Perlish' approach of | |
1467 | XML::Twig and more standards based \s-1DOM\s0 implementations \- preferably one with | |
1468 | XPath support. | |
1469 | .SH "SEE ALSO" | |
1470 | .IX Header "SEE ALSO" | |
1471 | \&\fBXML::Simple\fR requires either XML::Parser or \s-1XML::SAX\s0. | |
1472 | .PP | |
1473 | To generate documents with namespaces, XML::NamespaceSupport is required. | |
1474 | .PP | |
1475 | The optional caching functions require Storable. | |
1476 | .PP | |
1477 | Answers to Frequently Asked Questions about XML::Simple are bundled with this | |
1478 | distribution as: XML::Simple::FAQ | |
1479 | .SH "COPYRIGHT" | |
1480 | .IX Header "COPYRIGHT" | |
1481 | Copyright 1999\-2004 Grant McLean <grantm@cpan.org> | |
1482 | .PP | |
1483 | This library is free software; you can redistribute it and/or modify it | |
1484 | under the same terms as Perl itself. |