Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "XML::Simple::FAQ 3" | |
132 | .TH XML::Simple::FAQ 3 "2004-11-19" "perl v5.8.8" "User Contributed Perl Documentation" | |
133 | .SH "Frequently Asked Questions about XML::Simple" | |
134 | .IX Header "Frequently Asked Questions about XML::Simple" | |
135 | .SH "Basics" | |
136 | .IX Header "Basics" | |
137 | .Sh "What is XML::Simple designed to be used for?" | |
138 | .IX Subsection "What is XML::Simple designed to be used for?" | |
139 | XML::Simple is a Perl module that was originally developed as a tool for | |
140 | reading and writing configuration data in \s-1XML\s0 format. You can use it for | |
141 | many other purposes that involve storing and retrieving structured data in | |
142 | \&\s-1XML\s0. | |
143 | .PP | |
144 | You might also find XML::Simple a good starting point for playing with \s-1XML\s0 | |
145 | from Perl. It doesn't have a steep learning curve and if you outgrow its | |
146 | capabilities there are plenty of other Perl/XML modules to 'step up' to. | |
147 | .Sh "Why store configuration data in \s-1XML\s0 anyway?" | |
148 | .IX Subsection "Why store configuration data in XML anyway?" | |
149 | The many advantages of using \s-1XML\s0 format for configuration data include: | |
150 | .IP "\(bu" 4 | |
151 | Using existing \s-1XML\s0 parsing tools requires less development time, is easier | |
152 | and more robust than developing your own config file parsing code | |
153 | .IP "\(bu" 4 | |
154 | \&\s-1XML\s0 can represent relationships between pieces of data, such as nesting of | |
155 | sections to arbitrary levels (not easily done with .INI files for example) | |
156 | .IP "\(bu" 4 | |
157 | \&\s-1XML\s0 is basically just text, so you can easily edit a config file (easier than | |
158 | editing a Win32 registry) | |
159 | .IP "\(bu" 4 | |
160 | \&\s-1XML\s0 provides standard solutions for handling character sets and encoding | |
161 | beyond basic \s-1ASCII\s0 (important for internationalization) | |
162 | .IP "\(bu" 4 | |
163 | If it becomes necessary to change your configuration file format, there are | |
164 | many tools available for performing transformations on \s-1XML\s0 files | |
165 | .IP "\(bu" 4 | |
166 | \&\s-1XML\s0 is an open standard (the world does not need more proprietary binary | |
167 | file formats) | |
168 | .IP "\(bu" 4 | |
169 | Taking the extra step of developing a \s-1DTD\s0 allows the format of configuration | |
170 | files to be validated before your program reads them (not directly supported | |
171 | by XML::Simple) | |
172 | .IP "\(bu" 4 | |
173 | Combining a \s-1DTD\s0 with a good \s-1XML\s0 editor can give you a \s-1GUI\s0 config editor for | |
174 | minimal coding effort | |
175 | .Sh "What isn't XML::Simple good for?" | |
176 | .IX Subsection "What isn't XML::Simple good for?" | |
177 | The main limitation of XML::Simple is that it does not work with 'mixed | |
178 | content' (see the next question). If you consider your \s-1XML\s0 files contain | |
179 | marked up text rather than structured data, you should probably use another | |
180 | module. | |
181 | .PP | |
182 | If you are working with very large \s-1XML\s0 files, XML::Simple's approach of | |
183 | representing the whole file in memory as a 'tree' data structure may not be | |
184 | suitable. | |
185 | .Sh "What is mixed content?" | |
186 | .IX Subsection "What is mixed content?" | |
187 | Consider this example \s-1XML:\s0 | |
188 | .PP | |
189 | .Vb 3 | |
190 | \& <document> | |
191 | \& <para>This is <em>mixed</em> content.</para> | |
192 | \& </document> | |
193 | .Ve | |
194 | .PP | |
195 | This is said to be mixed content, because the <para> element contains | |
196 | both character data (text content) and nested elements. | |
197 | .PP | |
198 | Here's some more \s-1XML:\s0 | |
199 | .PP | |
200 | .Vb 5 | |
201 | \& <person> | |
202 | \& <first_name>Joe</first_name> | |
203 | \& <last_name>Bloggs</last_name> | |
204 | \& <dob>25-April-1969</dob> | |
205 | \& </person> | |
206 | .Ve | |
207 | .PP | |
208 | This second example is not generally considered to be mixed content. The | |
209 | <first_name>, <last_name> and <dob> elements contain | |
210 | only character data and the <person> element contains only nested | |
211 | elements. (Note: Strictly speaking, the whitespace between the nested | |
212 | elements is character data, but it is ignored by XML::Simple). | |
213 | .Sh "Why doesn't XML::Simple handle mixed content?" | |
214 | .IX Subsection "Why doesn't XML::Simple handle mixed content?" | |
215 | Because if it did, it would no longer be simple :\-) | |
216 | .PP | |
217 | Seriously though, there are plenty of excellent modules that allow you to | |
218 | work with mixed content in a variety of ways. Handling mixed content | |
219 | correctly is not easy and by ignoring these issues, XML::Simple is able to | |
220 | present an \s-1API\s0 without a steep learning curve. | |
221 | .Sh "Which Perl modules do handle mixed content?" | |
222 | .IX Subsection "Which Perl modules do handle mixed content?" | |
223 | Every one of them except XML::Simple :\-) | |
224 | .PP | |
225 | If you're looking for a recommendation, I'd suggest you look at the Perl-XML | |
226 | \&\s-1FAQ\s0 at: | |
227 | .PP | |
228 | .Vb 1 | |
229 | \& http://perl-xml.sourceforge.net/faq/ | |
230 | .Ve | |
231 | .SH "Installation" | |
232 | .IX Header "Installation" | |
233 | .Sh "How do I install XML::Simple?" | |
234 | .IX Subsection "How do I install XML::Simple?" | |
235 | If you're running ActiveState Perl, you've probably already got XML::Simple | |
236 | (although you may want to upgrade to version 1.09 or better for \s-1SAX\s0 support). | |
237 | .PP | |
238 | If you do need to install XML::Simple, you'll need to install an \s-1XML\s0 parser | |
239 | module first. Install either XML::Parser (which you may have already) or | |
240 | \&\s-1XML::SAX\s0. If you install both, \s-1XML::SAX\s0 will be used by default. | |
241 | .PP | |
242 | Once you have a parser installed ... | |
243 | .PP | |
244 | On Unix systems, try: | |
245 | .PP | |
246 | .Vb 1 | |
247 | \& perl -MCPAN -e 'install XML::Simple' | |
248 | .Ve | |
249 | .PP | |
250 | If that doesn't work, download the latest distribution from | |
251 | ftp://ftp.cpan.org/pub/CPAN/authors/id/G/GR/GRANTM , unpack it and run these | |
252 | commands: | |
253 | .PP | |
254 | .Vb 4 | |
255 | \& perl Makefile.PL | |
256 | \& make | |
257 | \& make test | |
258 | \& make install | |
259 | .Ve | |
260 | .PP | |
261 | On Win32, if you have a recent build of ActiveState Perl (618 or better) try | |
262 | this command: | |
263 | .PP | |
264 | .Vb 1 | |
265 | \& ppm install XML::Simple | |
266 | .Ve | |
267 | .PP | |
268 | If that doesn't work, you really only need the Simple.pm file, so extract it | |
269 | from the .tar.gz file (eg: using WinZIP) and save it in the \esite\elib\eXML | |
270 | directory under your Perl installation (typically C:\ePerl). | |
271 | .Sh "I'm trying to install XML::Simple and 'make test' fails" | |
272 | .IX Subsection "I'm trying to install XML::Simple and 'make test' fails" | |
273 | Is the directory where you've unpacked XML::Simple mounted from a file server | |
274 | using \s-1NFS\s0, \s-1SMB\s0 or some other network file sharing? If so, that may cause | |
275 | errors in the the following test scripts: | |
276 | .PP | |
277 | .Vb 3 | |
278 | \& 3_Storable.t | |
279 | \& 4_MemShare.t | |
280 | \& 5_MemCopy.t | |
281 | .Ve | |
282 | .PP | |
283 | The test suite is designed to exercise the boundary conditions of all | |
284 | XML::Simple's functionality and these three scripts exercise the caching | |
285 | functions. If XML::Simple is asked to parse a file for which it has a cached | |
286 | copy of a previous parse, then it compares the timestamp on the \s-1XML\s0 file with | |
287 | the timestamp on the cached copy. If the cached copy is *newer* then it will | |
288 | be used. If the cached copy is older or the same age then the file is | |
289 | re\-parsed. The test scripts will get confused by networked filesystems if | |
290 | the workstation and server system clocks are not synchronised (to the | |
291 | second). | |
292 | .PP | |
293 | If you get an error in one of these three test scripts but you don't plan to | |
294 | use the caching options (they're not enabled by default), then go right ahead | |
295 | and run 'make install'. If you do plan to use caching, then try unpacking | |
296 | the distribution on local disk and doing the build/test there. | |
297 | .PP | |
298 | It's probably not a good idea to use the caching options with networked | |
299 | filesystems in production. If the file server's clock is ahead of the local | |
300 | clock, XML::Simple will re-parse files when it could have used the cached | |
301 | copy. However if the local clock is ahead of the file server clock and a | |
302 | file is changed immediately after it is cached, the old cached copy will be | |
303 | used. | |
304 | .PP | |
305 | Is one of the three test scripts (above) failing but you're not running on | |
306 | a network filesystem? Are you running Win32? If so, you may be seeing a bug | |
307 | in Win32 where writes to a file do not affect its modfication timestamp. | |
308 | .PP | |
309 | If none of these scenarios match your situation, please confirm you're | |
310 | running the latest version of XML::Simple and then email the output of | |
311 | \&'make test' to me at grantm@cpan.org | |
312 | .Sh "Why is XML::Simple so slow?" | |
313 | .IX Subsection "Why is XML::Simple so slow?" | |
314 | If you find that XML::Simple is very slow reading \s-1XML\s0, the most likely reason | |
315 | is that you have \s-1XML::SAX\s0 installed but no additional \s-1SAX\s0 parser module. The | |
316 | \&\s-1XML::SAX\s0 distribution includes an \s-1XML\s0 parser written entirely in Perl. This is | |
317 | very portable but not very fast. For better performance install either | |
318 | XML::SAX::Expat or XML::LibXML. | |
319 | .SH "Usage" | |
320 | .IX Header "Usage" | |
321 | .Sh "How do I use XML::Simple?" | |
322 | .IX Subsection "How do I use XML::Simple?" | |
323 | If you had an \s-1XML\s0 document called /etc/appconfig/foo.xml you could 'slurp' it | |
324 | into a simple data structure (typically a hashref) with these lines of code: | |
325 | .PP | |
326 | .Vb 1 | |
327 | \& use XML::Simple; | |
328 | .Ve | |
329 | .PP | |
330 | .Vb 1 | |
331 | \& my $config = XMLin('/etc/appconfig/foo.xml'); | |
332 | .Ve | |
333 | .PP | |
334 | The \fIXMLin()\fR function accepts options after the filename. | |
335 | .Sh "There are so many options, which ones do I really need to know about?" | |
336 | .IX Subsection "There are so many options, which ones do I really need to know about?" | |
337 | Although you can get by without using any options, you shouldn't even | |
338 | consider using XML::Simple in production until you know what these two | |
339 | options do: | |
340 | .IP "\(bu" 4 | |
341 | forcearray | |
342 | .IP "\(bu" 4 | |
343 | keyattr | |
344 | .PP | |
345 | The reason you really need to read about them is because the default values | |
346 | for these options will trip you up if you don't. Although everyone agrees | |
347 | that these defaults are not ideal, there is not wide agreement on what they | |
348 | should be changed to. The answer therefore is to read about them (see below) | |
349 | and select values which are right for you. | |
350 | .Sh "What is the forcearray option all about?" | |
351 | .IX Subsection "What is the forcearray option all about?" | |
352 | Consider this \s-1XML\s0 in a file called ./person.xml: | |
353 | .PP | |
354 | .Vb 7 | |
355 | \& <person> | |
356 | \& <first_name>Joe</first_name> | |
357 | \& <last_name>Bloggs</last_name> | |
358 | \& <hobbie>bungy jumping</hobbie> | |
359 | \& <hobbie>sky diving</hobbie> | |
360 | \& <hobbie>knitting</hobbie> | |
361 | \& </person> | |
362 | .Ve | |
363 | .PP | |
364 | You could read it in with this line: | |
365 | .PP | |
366 | .Vb 1 | |
367 | \& my $person = XMLin('./person.xml'); | |
368 | .Ve | |
369 | .PP | |
370 | Which would give you a data structure like this: | |
371 | .PP | |
372 | .Vb 5 | |
373 | \& $person = { | |
374 | \& 'first_name' => 'Joe', | |
375 | \& 'last_name' => 'Bloggs', | |
376 | \& 'hobbie' => [ 'bungy jumping', 'sky diving', 'knitting' ] | |
377 | \& }; | |
378 | .Ve | |
379 | .PP | |
380 | The <first_name> and <last_name> elements are represented as | |
381 | simple scalar values which you could refer to like this: | |
382 | .PP | |
383 | .Vb 1 | |
384 | \& print "$person->{first_name} $person->{last_name}\en"; | |
385 | .Ve | |
386 | .PP | |
387 | The <hobbie> elements are represented as an array \- since there is | |
388 | more than one. You could refer to the first one like this: | |
389 | .PP | |
390 | .Vb 1 | |
391 | \& print $person->{hobbie}->[0], "\en"; | |
392 | .Ve | |
393 | .PP | |
394 | Or the whole lot like this: | |
395 | .PP | |
396 | .Vb 1 | |
397 | \& print join(', ', @{$person->{hobbie}} ), "\en"; | |
398 | .Ve | |
399 | .PP | |
400 | The catch is, that these last two lines of code will only work for people | |
401 | who have more than one hobbie. If there is only one <hobbie> | |
402 | element, it will be represented as a simple scalar (just like | |
403 | <first_name> and <last_name>). Which might lead you to write | |
404 | code like this: | |
405 | .PP | |
406 | .Vb 6 | |
407 | \& if(ref($person->{hobbie})) { | |
408 | \& print join(', ', @{$person->{hobbie}} ), "\en"; | |
409 | \& } | |
410 | \& else { | |
411 | \& print $person->{hobbie}, "\en"; | |
412 | \& } | |
413 | .Ve | |
414 | .PP | |
415 | Don't do that. | |
416 | .PP | |
417 | One alternative approach is to set the forcearray option to a true value: | |
418 | .PP | |
419 | .Vb 1 | |
420 | \& my $person = XMLin('./person.xml', forcearray => 1); | |
421 | .Ve | |
422 | .PP | |
423 | Which will give you a data structure like this: | |
424 | .PP | |
425 | .Vb 5 | |
426 | \& $person = { | |
427 | \& 'first_name' => [ 'Joe' ], | |
428 | \& 'last_name' => [ 'Bloggs' ], | |
429 | \& 'hobbie' => [ 'bungy jumping', 'sky diving', 'knitting' ] | |
430 | \& }; | |
431 | .Ve | |
432 | .PP | |
433 | Then you can use this line to refer to all the list of hobbies even if there | |
434 | was only one: | |
435 | .PP | |
436 | .Vb 1 | |
437 | \& print join(', ', @{$person->{hobbie}} ), "\en"; | |
438 | .Ve | |
439 | .PP | |
440 | The downside of this approach is that the <first_name> and | |
441 | <last_name> elements will also always be represented as arrays even | |
442 | though there will never be more than one: | |
443 | .PP | |
444 | .Vb 1 | |
445 | \& print "$person->{first_name}->[0] $person->{last_name}->[0]\en"; | |
446 | .Ve | |
447 | .PP | |
448 | This might be \s-1OK\s0 if you change the \s-1XML\s0 to use attributes for things that | |
449 | will always be singular and nested elements for things that may be plural: | |
450 | .PP | |
451 | .Vb 3 | |
452 | \& <person first_name="Jane" last_name="Bloggs"> | |
453 | \& <hobbie>motorcycle maintenance</hobbie> | |
454 | \& </person> | |
455 | .Ve | |
456 | .PP | |
457 | On the other hand, if you prefer not to use attributes, then you could | |
458 | specify that any <hobbie> elements should always be represented as | |
459 | arrays and all other nested elements should be simple scalar values unless | |
460 | there is more than one: | |
461 | .PP | |
462 | .Vb 1 | |
463 | \& my $person = XMLin('./person.xml', forcearray => [ 'hobbie' ]); | |
464 | .Ve | |
465 | .PP | |
466 | The forcearray option accepts a list of element names which should always | |
467 | be forced to an array representation: | |
468 | .PP | |
469 | .Vb 1 | |
470 | \& forcearray => [ qw(hobbie qualification childs_name) ] | |
471 | .Ve | |
472 | .PP | |
473 | See the XML::Simple manual page for more information. | |
474 | .Sh "What is the keyattr option all about?" | |
475 | .IX Subsection "What is the keyattr option all about?" | |
476 | Consider this sample \s-1XML:\s0 | |
477 | .PP | |
478 | .Vb 5 | |
479 | \& <catalog> | |
480 | \& <part partnum="1842334" desc="High pressure flange" price="24.50" /> | |
481 | \& <part partnum="9344675" desc="Threaded gasket" price="9.25" /> | |
482 | \& <part partnum="5634896" desc="Low voltage washer" price="12.00" /> | |
483 | \& </catalog> | |
484 | .Ve | |
485 | .PP | |
486 | You could slurp it in with this code: | |
487 | .PP | |
488 | .Vb 1 | |
489 | \& my $catalog = XMLin('./catalog.xml'); | |
490 | .Ve | |
491 | .PP | |
492 | Which would return a data structure like this: | |
493 | .PP | |
494 | .Vb 19 | |
495 | \& $catalog = { | |
496 | \& 'part' => [ | |
497 | \& { | |
498 | \& 'partnum' => '1842334', | |
499 | \& 'desc' => 'High pressure flange', | |
500 | \& 'price' => '24.50' | |
501 | \& }, | |
502 | \& { | |
503 | \& 'partnum' => '9344675', | |
504 | \& 'desc' => 'Threaded gasket', | |
505 | \& 'price' => '9.25' | |
506 | \& }, | |
507 | \& { | |
508 | \& 'partnum' => '5634896', | |
509 | \& 'desc' => 'Low voltage washer', | |
510 | \& 'price' => '12.00' | |
511 | \& } | |
512 | \& ] | |
513 | \& }; | |
514 | .Ve | |
515 | .PP | |
516 | Then you could access the description of the first part in the catalog | |
517 | with this code: | |
518 | .PP | |
519 | .Vb 1 | |
520 | \& print $catalog->{part}->[0]->{desc}, "\en"; | |
521 | .Ve | |
522 | .PP | |
523 | However, if you wanted to access the description of the part with the | |
524 | part number of \*(L"9344675\*(R" then you'd have to code a loop like this: | |
525 | .PP | |
526 | .Vb 6 | |
527 | \& foreach my $part (@{$catalog->{part}}) { | |
528 | \& if($part->{partnum} eq '9344675') { | |
529 | \& print $part->{desc}, "\en"; | |
530 | \& last; | |
531 | \& } | |
532 | \& } | |
533 | .Ve | |
534 | .PP | |
535 | The knowledge that each <part> element has a unique partnum attribute | |
536 | allows you to eliminate this search. You can pass this knowledge on to | |
537 | XML::Simple like this: | |
538 | .PP | |
539 | .Vb 1 | |
540 | \& my $catalog = XMLin($xml, keyattr => ['partnum']); | |
541 | .Ve | |
542 | .PP | |
543 | Which will return a data structure like this: | |
544 | .PP | |
545 | .Vb 7 | |
546 | \& $catalog = { | |
547 | \& 'part' => { | |
548 | \& '5634896' => { 'desc' => 'Low voltage washer', 'price' => '12.00' }, | |
549 | \& '1842334' => { 'desc' => 'High pressure flange', 'price' => '24.50' }, | |
550 | \& '9344675' => { 'desc' => 'Threaded gasket', 'price' => '9.25' } | |
551 | \& } | |
552 | \& }; | |
553 | .Ve | |
554 | .PP | |
555 | XML::Simple has been able to transform \f(CW$catalog\fR\->{part} from an arrayref to | |
556 | a hashref (keyed on partnum). This transformation is called 'array folding'. | |
557 | .PP | |
558 | Through the use of array folding, you can now index directly to the | |
559 | description of the part you want: | |
560 | .PP | |
561 | .Vb 1 | |
562 | \& print $catalog->{part}->{9344675}->{desc}, "\en"; | |
563 | .Ve | |
564 | .PP | |
565 | The 'keyattr' option also enables array folding when the unique key is in a | |
566 | nested element rather than an attribute. eg: | |
567 | .PP | |
568 | .Vb 17 | |
569 | \& <catalog> | |
570 | \& <part> | |
571 | \& <partnum>1842334</partnum> | |
572 | \& <desc>High pressure flange</desc> | |
573 | \& <price>24.50</price> | |
574 | \& </part> | |
575 | \& <part> | |
576 | \& <partnum>9344675</partnum> | |
577 | \& <desc>Threaded gasket</desc> | |
578 | \& <price>9.25</price> | |
579 | \& </part> | |
580 | \& <part> | |
581 | \& <partnum>5634896</partnum> | |
582 | \& <desc>Low voltage washer</desc> | |
583 | \& <price>12.00</price> | |
584 | \& </part> | |
585 | \& </catalog> | |
586 | .Ve | |
587 | .PP | |
588 | See the XML::Simple manual page for more information. | |
589 | .Sh "So what's the catch with 'keyattr'?" | |
590 | .IX Subsection "So what's the catch with 'keyattr'?" | |
591 | One thing to watch out for is that you might get array folding even if you | |
592 | don't supply the keyattr option. The default value for this option is: | |
593 | .PP | |
594 | .Vb 1 | |
595 | \& [ 'name', 'key', 'id'] | |
596 | .Ve | |
597 | .PP | |
598 | Which means if your \s-1XML\s0 elements have a 'name', 'key' or 'id' attribute (or | |
599 | nested element) then they may get folded on those values. This means that | |
600 | you can take advantage of array folding simply through careful choice of | |
601 | attribute names. On the hand, if you really don't want array folding at all, | |
602 | you'll need to set 'key attr to an empty list: | |
603 | .PP | |
604 | .Vb 1 | |
605 | \& my $ref = XMLin($xml, keyattr => []); | |
606 | .Ve | |
607 | .PP | |
608 | A second 'gotcha' is that array folding only works on arrays. That might | |
609 | seem obvious, but if there's only one record in your \s-1XML\s0 and you didn't set | |
610 | the 'forcearray' option then it won't be represented as an array and | |
611 | consequently won't get folded into a hash. The moral is that if you're | |
612 | using array folding, you should always turn on the forcearray option. | |
613 | .PP | |
614 | You probably want to be as specific as you can be too. For instance, the | |
615 | safest way to parse the <catalog> example above would be: | |
616 | .PP | |
617 | .Vb 2 | |
618 | \& my $catalog = XMLin($xml, keyattr => { part => 'partnum'}, | |
619 | \& forcearray => ['part']); | |
620 | .Ve | |
621 | .PP | |
622 | By using the hashref for keyattr, you can specify that only <part> | |
623 | elements should be folded on the 'partnum' attribute (and that the | |
624 | <part> elements should not be folded on any other attribute). | |
625 | .PP | |
626 | By supplying a list of element names for forcearray, you're ensuring that | |
627 | folding will work even if there's only one <part>. You're also | |
628 | ensuring that if the 'partnum' unique key is supplied in a nested element | |
629 | then that element won't get forced to an array too. | |
630 | .Sh "How do I know what my data structure should look like?" | |
631 | .IX Subsection "How do I know what my data structure should look like?" | |
632 | The rules are fairly straightforward: | |
633 | .IP "\(bu" 4 | |
634 | each element gets represented as a hash | |
635 | .IP "\(bu" 4 | |
636 | unless it contains only text, in which case it'll be a simple scalar value | |
637 | .IP "\(bu" 4 | |
638 | or unless there's more than one element with the same name, in which case | |
639 | they'll be represented as an array | |
640 | .IP "\(bu" 4 | |
641 | unless you've got array folding enabled, in which case they'll be folded into | |
642 | a hash | |
643 | .IP "\(bu" 4 | |
644 | empty elements (no text contents \fBand\fR no attributes) will either be | |
645 | represented as an empty hash, an empty string or undef \- depending on the value | |
646 | of the 'suppressempty' option. | |
647 | .PP | |
648 | If you're in any doubt, use Data::Dumper, eg: | |
649 | .PP | |
650 | .Vb 2 | |
651 | \& use XML::Simple; | |
652 | \& use Data::Dumper; | |
653 | .Ve | |
654 | .PP | |
655 | .Vb 1 | |
656 | \& my $ref = XMLin($xml); | |
657 | .Ve | |
658 | .PP | |
659 | .Vb 1 | |
660 | \& print Dumper($ref); | |
661 | .Ve | |
662 | .Sh "I'm getting 'Use of uninitialized value' warnings" | |
663 | .IX Subsection "I'm getting 'Use of uninitialized value' warnings" | |
664 | You're probably trying to index into a non-existant hash key \- try | |
665 | Data::Dumper. | |
666 | .Sh "I'm getting a 'Not an \s-1ARRAY\s0 reference' error" | |
667 | .IX Subsection "I'm getting a 'Not an ARRAY reference' error" | |
668 | Something that you expect to be an array is not. The two most likely causes | |
669 | are that you forgot to use 'forcearray' or that the array got folded into a | |
670 | hash \- try Data::Dumper. | |
671 | .Sh "I'm getting a 'No such array field' error" | |
672 | .IX Subsection "I'm getting a 'No such array field' error" | |
673 | Something that you expect to be a hash is actually an array. Perhaps array | |
674 | folding failed because one element was missing the key attribute \- try | |
675 | Data::Dumper. | |
676 | .Sh "I'm getting an 'Out of memory' error" | |
677 | .IX Subsection "I'm getting an 'Out of memory' error" | |
678 | Something in the data structure is not as you expect and Perl may be trying | |
679 | unsuccessfully to autovivify things \- try Data::Dumper. | |
680 | .PP | |
681 | If you're already using Data::Dumper, try calling \fIDumper()\fR immediately after | |
682 | \&\fIXMLin()\fR \- ie: before you attempt to access anything in the data structure. | |
683 | .Sh "My element order is getting jumbled up" | |
684 | .IX Subsection "My element order is getting jumbled up" | |
685 | If you read an \s-1XML\s0 file with \fIXMLin()\fR and then write it back out with | |
686 | \&\fIXMLout()\fR, the order of the elements will likely be different. (However, if | |
687 | you read the file back in with \fIXMLin()\fR you'll get the same Perl data | |
688 | structure). | |
689 | .PP | |
690 | The reordering happens because XML::Simple uses hashrefs to store your data | |
691 | and Perl hashes do not really have any order. | |
692 | .PP | |
693 | It is possible that a future version of XML::Simple will use Tie::IxHash | |
694 | to store the data in hashrefs which do retain the order. However this will | |
695 | not fix all cases of element order being lost. | |
696 | .PP | |
697 | If your application really is sensitive to element order, don't use | |
698 | XML::Simple (and don't put order-sensitive values in attributes). | |
699 | .Sh "XML::Simple turns nested elements into attributes" | |
700 | .IX Subsection "XML::Simple turns nested elements into attributes" | |
701 | If you read an \s-1XML\s0 file with \fIXMLin()\fR and then write it back out with | |
702 | \&\fIXMLout()\fR, some data which was originally stored in nested elements may end up | |
703 | in attributes. (However, if you read the file back in with \fIXMLin()\fR you'll | |
704 | get the same Perl data structure). | |
705 | .PP | |
706 | There are a number of ways you might handle this: | |
707 | .IP "\(bu" 4 | |
708 | use the 'forcearray' option with \fIXMLin()\fR | |
709 | .IP "\(bu" 4 | |
710 | use the 'noattr' option with \fIXMLout()\fR | |
711 | .IP "\(bu" 4 | |
712 | live with it | |
713 | .IP "\(bu" 4 | |
714 | don't use XML::Simple | |
715 | .Sh "Why does \fIXMLout()\fP insert <name> elements (or attributes)?" | |
716 | .IX Subsection "Why does XMLout() insert <name> elements (or attributes)?" | |
717 | Try setting keyattr => []. | |
718 | .PP | |
719 | When you call \fIXMLin()\fR to read \s-1XML\s0, the 'keyattr' option controls whether arrays | |
720 | get 'folded' into hashes. Similarly, when you call \fIXMLout()\fR, the 'keyattr' | |
721 | option controls whether hashes get 'unfolded' into arrays. As described above, | |
722 | \&'keyattr' is enabled by default. | |
723 | .Sh "Why are empty elements represented as empty hashes?" | |
724 | .IX Subsection "Why are empty elements represented as empty hashes?" | |
725 | An element is always represented as a hash unless it contains only text, in | |
726 | which case it is represented as a scalar string. | |
727 | .PP | |
728 | If you would prefer empty elements to be represented as empty strings or the | |
729 | undefined value, set the 'suppressempty' option to '' or undef respectively. | |
730 | .Sh "Why is ParserOpts deprecated?" | |
731 | .IX Subsection "Why is ParserOpts deprecated?" | |
732 | The \f(CW\*(C`ParserOpts\*(C'\fR option is a remnant of the time when XML::Simple only worked | |
733 | with the XML::Parser \s-1API\s0. Its value is completely ignored if you're using a | |
734 | \&\s-1SAX\s0 parser, so writing code which relied on it would bar you from taking | |
735 | advantage of \s-1SAX\s0. | |
736 | .PP | |
737 | Even if you are using XML::Parser, it is seldom necessary to pass options to | |
738 | the parser object. A number of people have written to say they use this option | |
739 | to set XML::Parser's \f(CW\*(C`ProtocolEncoding\*(C'\fR option. Don't do that, it's wrong, | |
740 | Wrong, \s-1WRONG\s0! Fix the \s-1XML\s0 document so that it's well-formed and you won't have | |
741 | a problem. | |
742 | .PP | |
743 | Having said all of that, as long as XML::Simple continues to support the | |
744 | XML::Parser \s-1API\s0, this option will not be removed. There are currently no plans | |
745 | to remove support for the XML::Parser \s-1API\s0. |