| 1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
| 2 | .\" |
| 3 | .\" Standard preamble: |
| 4 | .\" ======================================================================== |
| 5 | .de Sh \" Subsection heading |
| 6 | .br |
| 7 | .if t .Sp |
| 8 | .ne 5 |
| 9 | .PP |
| 10 | \fB\\$1\fR |
| 11 | .PP |
| 12 | .. |
| 13 | .de Sp \" Vertical space (when we can't use .PP) |
| 14 | .if t .sp .5v |
| 15 | .if n .sp |
| 16 | .. |
| 17 | .de Vb \" Begin verbatim text |
| 18 | .ft CW |
| 19 | .nf |
| 20 | .ne \\$1 |
| 21 | .. |
| 22 | .de Ve \" End verbatim text |
| 23 | .ft R |
| 24 | .fi |
| 25 | .. |
| 26 | .\" Set up some character translations and predefined strings. \*(-- will |
| 27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
| 28 | .\" double quote, and \*(R" will give a right double quote. | will give a |
| 29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to |
| 30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' |
| 31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. |
| 32 | .tr \(*W-|\(bv\*(Tr |
| 33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
| 34 | .ie n \{\ |
| 35 | . ds -- \(*W- |
| 36 | . ds PI pi |
| 37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
| 38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
| 39 | . ds L" "" |
| 40 | . ds R" "" |
| 41 | . ds C` "" |
| 42 | . ds C' "" |
| 43 | 'br\} |
| 44 | .el\{\ |
| 45 | . ds -- \|\(em\| |
| 46 | . ds PI \(*p |
| 47 | . ds L" `` |
| 48 | . ds R" '' |
| 49 | 'br\} |
| 50 | .\" |
| 51 | .\" If the F register is turned on, we'll generate index entries on stderr for |
| 52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index |
| 53 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
| 54 | .\" output yourself in some meaningful fashion. |
| 55 | .if \nF \{\ |
| 56 | . de IX |
| 57 | . tm Index:\\$1\t\\n%\t"\\$2" |
| 58 | .. |
| 59 | . nr % 0 |
| 60 | . rr F |
| 61 | .\} |
| 62 | .\" |
| 63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
| 64 | .\" way too many mistakes in technical documents. |
| 65 | .hy 0 |
| 66 | .if n .na |
| 67 | .\" |
| 68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
| 69 | .\" Fear. Run. Save yourself. No user-serviceable parts. |
| 70 | . \" fudge factors for nroff and troff |
| 71 | .if n \{\ |
| 72 | . ds #H 0 |
| 73 | . ds #V .8m |
| 74 | . ds #F .3m |
| 75 | . ds #[ \f1 |
| 76 | . ds #] \fP |
| 77 | .\} |
| 78 | .if t \{\ |
| 79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
| 80 | . ds #V .6m |
| 81 | . ds #F 0 |
| 82 | . ds #[ \& |
| 83 | . ds #] \& |
| 84 | .\} |
| 85 | . \" simple accents for nroff and troff |
| 86 | .if n \{\ |
| 87 | . ds ' \& |
| 88 | . ds ` \& |
| 89 | . ds ^ \& |
| 90 | . ds , \& |
| 91 | . ds ~ ~ |
| 92 | . ds / |
| 93 | .\} |
| 94 | .if t \{\ |
| 95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
| 96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
| 97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
| 98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
| 99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
| 100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
| 101 | .\} |
| 102 | . \" troff and (daisy-wheel) nroff accents |
| 103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
| 104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
| 105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
| 106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
| 107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
| 108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
| 109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
| 110 | .ds ae a\h'-(\w'a'u*4/10)'e |
| 111 | .ds Ae A\h'-(\w'A'u*4/10)'E |
| 112 | . \" corrections for vroff |
| 113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
| 114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
| 115 | . \" for low resolution devices (crt and lpr) |
| 116 | .if \n(.H>23 .if \n(.V>19 \ |
| 117 | \{\ |
| 118 | . ds : e |
| 119 | . ds 8 ss |
| 120 | . ds o a |
| 121 | . ds d- d\h'-1'\(ga |
| 122 | . ds D- D\h'-1'\(hy |
| 123 | . ds th \o'bp' |
| 124 | . ds Th \o'LP' |
| 125 | . ds ae ae |
| 126 | . ds Ae AE |
| 127 | .\} |
| 128 | .rm #[ #] #H #V #F C |
| 129 | .\" ======================================================================== |
| 130 | .\" |
| 131 | .IX Title "XML::Simple::FAQ 3" |
| 132 | .TH XML::Simple::FAQ 3 "2004-11-19" "perl v5.8.8" "User Contributed Perl Documentation" |
| 133 | .SH "Frequently Asked Questions about XML::Simple" |
| 134 | .IX Header "Frequently Asked Questions about XML::Simple" |
| 135 | .SH "Basics" |
| 136 | .IX Header "Basics" |
| 137 | .Sh "What is XML::Simple designed to be used for?" |
| 138 | .IX Subsection "What is XML::Simple designed to be used for?" |
| 139 | XML::Simple is a Perl module that was originally developed as a tool for |
| 140 | reading and writing configuration data in \s-1XML\s0 format. You can use it for |
| 141 | many other purposes that involve storing and retrieving structured data in |
| 142 | \&\s-1XML\s0. |
| 143 | .PP |
| 144 | You might also find XML::Simple a good starting point for playing with \s-1XML\s0 |
| 145 | from Perl. It doesn't have a steep learning curve and if you outgrow its |
| 146 | capabilities there are plenty of other Perl/XML modules to 'step up' to. |
| 147 | .Sh "Why store configuration data in \s-1XML\s0 anyway?" |
| 148 | .IX Subsection "Why store configuration data in XML anyway?" |
| 149 | The many advantages of using \s-1XML\s0 format for configuration data include: |
| 150 | .IP "\(bu" 4 |
| 151 | Using existing \s-1XML\s0 parsing tools requires less development time, is easier |
| 152 | and more robust than developing your own config file parsing code |
| 153 | .IP "\(bu" 4 |
| 154 | \&\s-1XML\s0 can represent relationships between pieces of data, such as nesting of |
| 155 | sections to arbitrary levels (not easily done with .INI files for example) |
| 156 | .IP "\(bu" 4 |
| 157 | \&\s-1XML\s0 is basically just text, so you can easily edit a config file (easier than |
| 158 | editing a Win32 registry) |
| 159 | .IP "\(bu" 4 |
| 160 | \&\s-1XML\s0 provides standard solutions for handling character sets and encoding |
| 161 | beyond basic \s-1ASCII\s0 (important for internationalization) |
| 162 | .IP "\(bu" 4 |
| 163 | If it becomes necessary to change your configuration file format, there are |
| 164 | many tools available for performing transformations on \s-1XML\s0 files |
| 165 | .IP "\(bu" 4 |
| 166 | \&\s-1XML\s0 is an open standard (the world does not need more proprietary binary |
| 167 | file formats) |
| 168 | .IP "\(bu" 4 |
| 169 | Taking the extra step of developing a \s-1DTD\s0 allows the format of configuration |
| 170 | files to be validated before your program reads them (not directly supported |
| 171 | by XML::Simple) |
| 172 | .IP "\(bu" 4 |
| 173 | Combining a \s-1DTD\s0 with a good \s-1XML\s0 editor can give you a \s-1GUI\s0 config editor for |
| 174 | minimal coding effort |
| 175 | .Sh "What isn't XML::Simple good for?" |
| 176 | .IX Subsection "What isn't XML::Simple good for?" |
| 177 | The main limitation of XML::Simple is that it does not work with 'mixed |
| 178 | content' (see the next question). If you consider your \s-1XML\s0 files contain |
| 179 | marked up text rather than structured data, you should probably use another |
| 180 | module. |
| 181 | .PP |
| 182 | If you are working with very large \s-1XML\s0 files, XML::Simple's approach of |
| 183 | representing the whole file in memory as a 'tree' data structure may not be |
| 184 | suitable. |
| 185 | .Sh "What is mixed content?" |
| 186 | .IX Subsection "What is mixed content?" |
| 187 | Consider this example \s-1XML:\s0 |
| 188 | .PP |
| 189 | .Vb 3 |
| 190 | \& <document> |
| 191 | \& <para>This is <em>mixed</em> content.</para> |
| 192 | \& </document> |
| 193 | .Ve |
| 194 | .PP |
| 195 | This is said to be mixed content, because the <para> element contains |
| 196 | both character data (text content) and nested elements. |
| 197 | .PP |
| 198 | Here's some more \s-1XML:\s0 |
| 199 | .PP |
| 200 | .Vb 5 |
| 201 | \& <person> |
| 202 | \& <first_name>Joe</first_name> |
| 203 | \& <last_name>Bloggs</last_name> |
| 204 | \& <dob>25-April-1969</dob> |
| 205 | \& </person> |
| 206 | .Ve |
| 207 | .PP |
| 208 | This second example is not generally considered to be mixed content. The |
| 209 | <first_name>, <last_name> and <dob> elements contain |
| 210 | only character data and the <person> element contains only nested |
| 211 | elements. (Note: Strictly speaking, the whitespace between the nested |
| 212 | elements is character data, but it is ignored by XML::Simple). |
| 213 | .Sh "Why doesn't XML::Simple handle mixed content?" |
| 214 | .IX Subsection "Why doesn't XML::Simple handle mixed content?" |
| 215 | Because if it did, it would no longer be simple :\-) |
| 216 | .PP |
| 217 | Seriously though, there are plenty of excellent modules that allow you to |
| 218 | work with mixed content in a variety of ways. Handling mixed content |
| 219 | correctly is not easy and by ignoring these issues, XML::Simple is able to |
| 220 | present an \s-1API\s0 without a steep learning curve. |
| 221 | .Sh "Which Perl modules do handle mixed content?" |
| 222 | .IX Subsection "Which Perl modules do handle mixed content?" |
| 223 | Every one of them except XML::Simple :\-) |
| 224 | .PP |
| 225 | If you're looking for a recommendation, I'd suggest you look at the Perl-XML |
| 226 | \&\s-1FAQ\s0 at: |
| 227 | .PP |
| 228 | .Vb 1 |
| 229 | \& http://perl-xml.sourceforge.net/faq/ |
| 230 | .Ve |
| 231 | .SH "Installation" |
| 232 | .IX Header "Installation" |
| 233 | .Sh "How do I install XML::Simple?" |
| 234 | .IX Subsection "How do I install XML::Simple?" |
| 235 | If you're running ActiveState Perl, you've probably already got XML::Simple |
| 236 | (although you may want to upgrade to version 1.09 or better for \s-1SAX\s0 support). |
| 237 | .PP |
| 238 | If you do need to install XML::Simple, you'll need to install an \s-1XML\s0 parser |
| 239 | module first. Install either XML::Parser (which you may have already) or |
| 240 | \&\s-1XML::SAX\s0. If you install both, \s-1XML::SAX\s0 will be used by default. |
| 241 | .PP |
| 242 | Once you have a parser installed ... |
| 243 | .PP |
| 244 | On Unix systems, try: |
| 245 | .PP |
| 246 | .Vb 1 |
| 247 | \& perl -MCPAN -e 'install XML::Simple' |
| 248 | .Ve |
| 249 | .PP |
| 250 | If that doesn't work, download the latest distribution from |
| 251 | ftp://ftp.cpan.org/pub/CPAN/authors/id/G/GR/GRANTM , unpack it and run these |
| 252 | commands: |
| 253 | .PP |
| 254 | .Vb 4 |
| 255 | \& perl Makefile.PL |
| 256 | \& make |
| 257 | \& make test |
| 258 | \& make install |
| 259 | .Ve |
| 260 | .PP |
| 261 | On Win32, if you have a recent build of ActiveState Perl (618 or better) try |
| 262 | this command: |
| 263 | .PP |
| 264 | .Vb 1 |
| 265 | \& ppm install XML::Simple |
| 266 | .Ve |
| 267 | .PP |
| 268 | If that doesn't work, you really only need the Simple.pm file, so extract it |
| 269 | from the .tar.gz file (eg: using WinZIP) and save it in the \esite\elib\eXML |
| 270 | directory under your Perl installation (typically C:\ePerl). |
| 271 | .Sh "I'm trying to install XML::Simple and 'make test' fails" |
| 272 | .IX Subsection "I'm trying to install XML::Simple and 'make test' fails" |
| 273 | Is the directory where you've unpacked XML::Simple mounted from a file server |
| 274 | using \s-1NFS\s0, \s-1SMB\s0 or some other network file sharing? If so, that may cause |
| 275 | errors in the the following test scripts: |
| 276 | .PP |
| 277 | .Vb 3 |
| 278 | \& 3_Storable.t |
| 279 | \& 4_MemShare.t |
| 280 | \& 5_MemCopy.t |
| 281 | .Ve |
| 282 | .PP |
| 283 | The test suite is designed to exercise the boundary conditions of all |
| 284 | XML::Simple's functionality and these three scripts exercise the caching |
| 285 | functions. If XML::Simple is asked to parse a file for which it has a cached |
| 286 | copy of a previous parse, then it compares the timestamp on the \s-1XML\s0 file with |
| 287 | the timestamp on the cached copy. If the cached copy is *newer* then it will |
| 288 | be used. If the cached copy is older or the same age then the file is |
| 289 | re\-parsed. The test scripts will get confused by networked filesystems if |
| 290 | the workstation and server system clocks are not synchronised (to the |
| 291 | second). |
| 292 | .PP |
| 293 | If you get an error in one of these three test scripts but you don't plan to |
| 294 | use the caching options (they're not enabled by default), then go right ahead |
| 295 | and run 'make install'. If you do plan to use caching, then try unpacking |
| 296 | the distribution on local disk and doing the build/test there. |
| 297 | .PP |
| 298 | It's probably not a good idea to use the caching options with networked |
| 299 | filesystems in production. If the file server's clock is ahead of the local |
| 300 | clock, XML::Simple will re-parse files when it could have used the cached |
| 301 | copy. However if the local clock is ahead of the file server clock and a |
| 302 | file is changed immediately after it is cached, the old cached copy will be |
| 303 | used. |
| 304 | .PP |
| 305 | Is one of the three test scripts (above) failing but you're not running on |
| 306 | a network filesystem? Are you running Win32? If so, you may be seeing a bug |
| 307 | in Win32 where writes to a file do not affect its modfication timestamp. |
| 308 | .PP |
| 309 | If none of these scenarios match your situation, please confirm you're |
| 310 | running the latest version of XML::Simple and then email the output of |
| 311 | \&'make test' to me at grantm@cpan.org |
| 312 | .Sh "Why is XML::Simple so slow?" |
| 313 | .IX Subsection "Why is XML::Simple so slow?" |
| 314 | If you find that XML::Simple is very slow reading \s-1XML\s0, the most likely reason |
| 315 | is that you have \s-1XML::SAX\s0 installed but no additional \s-1SAX\s0 parser module. The |
| 316 | \&\s-1XML::SAX\s0 distribution includes an \s-1XML\s0 parser written entirely in Perl. This is |
| 317 | very portable but not very fast. For better performance install either |
| 318 | XML::SAX::Expat or XML::LibXML. |
| 319 | .SH "Usage" |
| 320 | .IX Header "Usage" |
| 321 | .Sh "How do I use XML::Simple?" |
| 322 | .IX Subsection "How do I use XML::Simple?" |
| 323 | If you had an \s-1XML\s0 document called /etc/appconfig/foo.xml you could 'slurp' it |
| 324 | into a simple data structure (typically a hashref) with these lines of code: |
| 325 | .PP |
| 326 | .Vb 1 |
| 327 | \& use XML::Simple; |
| 328 | .Ve |
| 329 | .PP |
| 330 | .Vb 1 |
| 331 | \& my $config = XMLin('/etc/appconfig/foo.xml'); |
| 332 | .Ve |
| 333 | .PP |
| 334 | The \fIXMLin()\fR function accepts options after the filename. |
| 335 | .Sh "There are so many options, which ones do I really need to know about?" |
| 336 | .IX Subsection "There are so many options, which ones do I really need to know about?" |
| 337 | Although you can get by without using any options, you shouldn't even |
| 338 | consider using XML::Simple in production until you know what these two |
| 339 | options do: |
| 340 | .IP "\(bu" 4 |
| 341 | forcearray |
| 342 | .IP "\(bu" 4 |
| 343 | keyattr |
| 344 | .PP |
| 345 | The reason you really need to read about them is because the default values |
| 346 | for these options will trip you up if you don't. Although everyone agrees |
| 347 | that these defaults are not ideal, there is not wide agreement on what they |
| 348 | should be changed to. The answer therefore is to read about them (see below) |
| 349 | and select values which are right for you. |
| 350 | .Sh "What is the forcearray option all about?" |
| 351 | .IX Subsection "What is the forcearray option all about?" |
| 352 | Consider this \s-1XML\s0 in a file called ./person.xml: |
| 353 | .PP |
| 354 | .Vb 7 |
| 355 | \& <person> |
| 356 | \& <first_name>Joe</first_name> |
| 357 | \& <last_name>Bloggs</last_name> |
| 358 | \& <hobbie>bungy jumping</hobbie> |
| 359 | \& <hobbie>sky diving</hobbie> |
| 360 | \& <hobbie>knitting</hobbie> |
| 361 | \& </person> |
| 362 | .Ve |
| 363 | .PP |
| 364 | You could read it in with this line: |
| 365 | .PP |
| 366 | .Vb 1 |
| 367 | \& my $person = XMLin('./person.xml'); |
| 368 | .Ve |
| 369 | .PP |
| 370 | Which would give you a data structure like this: |
| 371 | .PP |
| 372 | .Vb 5 |
| 373 | \& $person = { |
| 374 | \& 'first_name' => 'Joe', |
| 375 | \& 'last_name' => 'Bloggs', |
| 376 | \& 'hobbie' => [ 'bungy jumping', 'sky diving', 'knitting' ] |
| 377 | \& }; |
| 378 | .Ve |
| 379 | .PP |
| 380 | The <first_name> and <last_name> elements are represented as |
| 381 | simple scalar values which you could refer to like this: |
| 382 | .PP |
| 383 | .Vb 1 |
| 384 | \& print "$person->{first_name} $person->{last_name}\en"; |
| 385 | .Ve |
| 386 | .PP |
| 387 | The <hobbie> elements are represented as an array \- since there is |
| 388 | more than one. You could refer to the first one like this: |
| 389 | .PP |
| 390 | .Vb 1 |
| 391 | \& print $person->{hobbie}->[0], "\en"; |
| 392 | .Ve |
| 393 | .PP |
| 394 | Or the whole lot like this: |
| 395 | .PP |
| 396 | .Vb 1 |
| 397 | \& print join(', ', @{$person->{hobbie}} ), "\en"; |
| 398 | .Ve |
| 399 | .PP |
| 400 | The catch is, that these last two lines of code will only work for people |
| 401 | who have more than one hobbie. If there is only one <hobbie> |
| 402 | element, it will be represented as a simple scalar (just like |
| 403 | <first_name> and <last_name>). Which might lead you to write |
| 404 | code like this: |
| 405 | .PP |
| 406 | .Vb 6 |
| 407 | \& if(ref($person->{hobbie})) { |
| 408 | \& print join(', ', @{$person->{hobbie}} ), "\en"; |
| 409 | \& } |
| 410 | \& else { |
| 411 | \& print $person->{hobbie}, "\en"; |
| 412 | \& } |
| 413 | .Ve |
| 414 | .PP |
| 415 | Don't do that. |
| 416 | .PP |
| 417 | One alternative approach is to set the forcearray option to a true value: |
| 418 | .PP |
| 419 | .Vb 1 |
| 420 | \& my $person = XMLin('./person.xml', forcearray => 1); |
| 421 | .Ve |
| 422 | .PP |
| 423 | Which will give you a data structure like this: |
| 424 | .PP |
| 425 | .Vb 5 |
| 426 | \& $person = { |
| 427 | \& 'first_name' => [ 'Joe' ], |
| 428 | \& 'last_name' => [ 'Bloggs' ], |
| 429 | \& 'hobbie' => [ 'bungy jumping', 'sky diving', 'knitting' ] |
| 430 | \& }; |
| 431 | .Ve |
| 432 | .PP |
| 433 | Then you can use this line to refer to all the list of hobbies even if there |
| 434 | was only one: |
| 435 | .PP |
| 436 | .Vb 1 |
| 437 | \& print join(', ', @{$person->{hobbie}} ), "\en"; |
| 438 | .Ve |
| 439 | .PP |
| 440 | The downside of this approach is that the <first_name> and |
| 441 | <last_name> elements will also always be represented as arrays even |
| 442 | though there will never be more than one: |
| 443 | .PP |
| 444 | .Vb 1 |
| 445 | \& print "$person->{first_name}->[0] $person->{last_name}->[0]\en"; |
| 446 | .Ve |
| 447 | .PP |
| 448 | This might be \s-1OK\s0 if you change the \s-1XML\s0 to use attributes for things that |
| 449 | will always be singular and nested elements for things that may be plural: |
| 450 | .PP |
| 451 | .Vb 3 |
| 452 | \& <person first_name="Jane" last_name="Bloggs"> |
| 453 | \& <hobbie>motorcycle maintenance</hobbie> |
| 454 | \& </person> |
| 455 | .Ve |
| 456 | .PP |
| 457 | On the other hand, if you prefer not to use attributes, then you could |
| 458 | specify that any <hobbie> elements should always be represented as |
| 459 | arrays and all other nested elements should be simple scalar values unless |
| 460 | there is more than one: |
| 461 | .PP |
| 462 | .Vb 1 |
| 463 | \& my $person = XMLin('./person.xml', forcearray => [ 'hobbie' ]); |
| 464 | .Ve |
| 465 | .PP |
| 466 | The forcearray option accepts a list of element names which should always |
| 467 | be forced to an array representation: |
| 468 | .PP |
| 469 | .Vb 1 |
| 470 | \& forcearray => [ qw(hobbie qualification childs_name) ] |
| 471 | .Ve |
| 472 | .PP |
| 473 | See the XML::Simple manual page for more information. |
| 474 | .Sh "What is the keyattr option all about?" |
| 475 | .IX Subsection "What is the keyattr option all about?" |
| 476 | Consider this sample \s-1XML:\s0 |
| 477 | .PP |
| 478 | .Vb 5 |
| 479 | \& <catalog> |
| 480 | \& <part partnum="1842334" desc="High pressure flange" price="24.50" /> |
| 481 | \& <part partnum="9344675" desc="Threaded gasket" price="9.25" /> |
| 482 | \& <part partnum="5634896" desc="Low voltage washer" price="12.00" /> |
| 483 | \& </catalog> |
| 484 | .Ve |
| 485 | .PP |
| 486 | You could slurp it in with this code: |
| 487 | .PP |
| 488 | .Vb 1 |
| 489 | \& my $catalog = XMLin('./catalog.xml'); |
| 490 | .Ve |
| 491 | .PP |
| 492 | Which would return a data structure like this: |
| 493 | .PP |
| 494 | .Vb 19 |
| 495 | \& $catalog = { |
| 496 | \& 'part' => [ |
| 497 | \& { |
| 498 | \& 'partnum' => '1842334', |
| 499 | \& 'desc' => 'High pressure flange', |
| 500 | \& 'price' => '24.50' |
| 501 | \& }, |
| 502 | \& { |
| 503 | \& 'partnum' => '9344675', |
| 504 | \& 'desc' => 'Threaded gasket', |
| 505 | \& 'price' => '9.25' |
| 506 | \& }, |
| 507 | \& { |
| 508 | \& 'partnum' => '5634896', |
| 509 | \& 'desc' => 'Low voltage washer', |
| 510 | \& 'price' => '12.00' |
| 511 | \& } |
| 512 | \& ] |
| 513 | \& }; |
| 514 | .Ve |
| 515 | .PP |
| 516 | Then you could access the description of the first part in the catalog |
| 517 | with this code: |
| 518 | .PP |
| 519 | .Vb 1 |
| 520 | \& print $catalog->{part}->[0]->{desc}, "\en"; |
| 521 | .Ve |
| 522 | .PP |
| 523 | However, if you wanted to access the description of the part with the |
| 524 | part number of \*(L"9344675\*(R" then you'd have to code a loop like this: |
| 525 | .PP |
| 526 | .Vb 6 |
| 527 | \& foreach my $part (@{$catalog->{part}}) { |
| 528 | \& if($part->{partnum} eq '9344675') { |
| 529 | \& print $part->{desc}, "\en"; |
| 530 | \& last; |
| 531 | \& } |
| 532 | \& } |
| 533 | .Ve |
| 534 | .PP |
| 535 | The knowledge that each <part> element has a unique partnum attribute |
| 536 | allows you to eliminate this search. You can pass this knowledge on to |
| 537 | XML::Simple like this: |
| 538 | .PP |
| 539 | .Vb 1 |
| 540 | \& my $catalog = XMLin($xml, keyattr => ['partnum']); |
| 541 | .Ve |
| 542 | .PP |
| 543 | Which will return a data structure like this: |
| 544 | .PP |
| 545 | .Vb 7 |
| 546 | \& $catalog = { |
| 547 | \& 'part' => { |
| 548 | \& '5634896' => { 'desc' => 'Low voltage washer', 'price' => '12.00' }, |
| 549 | \& '1842334' => { 'desc' => 'High pressure flange', 'price' => '24.50' }, |
| 550 | \& '9344675' => { 'desc' => 'Threaded gasket', 'price' => '9.25' } |
| 551 | \& } |
| 552 | \& }; |
| 553 | .Ve |
| 554 | .PP |
| 555 | XML::Simple has been able to transform \f(CW$catalog\fR\->{part} from an arrayref to |
| 556 | a hashref (keyed on partnum). This transformation is called 'array folding'. |
| 557 | .PP |
| 558 | Through the use of array folding, you can now index directly to the |
| 559 | description of the part you want: |
| 560 | .PP |
| 561 | .Vb 1 |
| 562 | \& print $catalog->{part}->{9344675}->{desc}, "\en"; |
| 563 | .Ve |
| 564 | .PP |
| 565 | The 'keyattr' option also enables array folding when the unique key is in a |
| 566 | nested element rather than an attribute. eg: |
| 567 | .PP |
| 568 | .Vb 17 |
| 569 | \& <catalog> |
| 570 | \& <part> |
| 571 | \& <partnum>1842334</partnum> |
| 572 | \& <desc>High pressure flange</desc> |
| 573 | \& <price>24.50</price> |
| 574 | \& </part> |
| 575 | \& <part> |
| 576 | \& <partnum>9344675</partnum> |
| 577 | \& <desc>Threaded gasket</desc> |
| 578 | \& <price>9.25</price> |
| 579 | \& </part> |
| 580 | \& <part> |
| 581 | \& <partnum>5634896</partnum> |
| 582 | \& <desc>Low voltage washer</desc> |
| 583 | \& <price>12.00</price> |
| 584 | \& </part> |
| 585 | \& </catalog> |
| 586 | .Ve |
| 587 | .PP |
| 588 | See the XML::Simple manual page for more information. |
| 589 | .Sh "So what's the catch with 'keyattr'?" |
| 590 | .IX Subsection "So what's the catch with 'keyattr'?" |
| 591 | One thing to watch out for is that you might get array folding even if you |
| 592 | don't supply the keyattr option. The default value for this option is: |
| 593 | .PP |
| 594 | .Vb 1 |
| 595 | \& [ 'name', 'key', 'id'] |
| 596 | .Ve |
| 597 | .PP |
| 598 | Which means if your \s-1XML\s0 elements have a 'name', 'key' or 'id' attribute (or |
| 599 | nested element) then they may get folded on those values. This means that |
| 600 | you can take advantage of array folding simply through careful choice of |
| 601 | attribute names. On the hand, if you really don't want array folding at all, |
| 602 | you'll need to set 'key attr to an empty list: |
| 603 | .PP |
| 604 | .Vb 1 |
| 605 | \& my $ref = XMLin($xml, keyattr => []); |
| 606 | .Ve |
| 607 | .PP |
| 608 | A second 'gotcha' is that array folding only works on arrays. That might |
| 609 | seem obvious, but if there's only one record in your \s-1XML\s0 and you didn't set |
| 610 | the 'forcearray' option then it won't be represented as an array and |
| 611 | consequently won't get folded into a hash. The moral is that if you're |
| 612 | using array folding, you should always turn on the forcearray option. |
| 613 | .PP |
| 614 | You probably want to be as specific as you can be too. For instance, the |
| 615 | safest way to parse the <catalog> example above would be: |
| 616 | .PP |
| 617 | .Vb 2 |
| 618 | \& my $catalog = XMLin($xml, keyattr => { part => 'partnum'}, |
| 619 | \& forcearray => ['part']); |
| 620 | .Ve |
| 621 | .PP |
| 622 | By using the hashref for keyattr, you can specify that only <part> |
| 623 | elements should be folded on the 'partnum' attribute (and that the |
| 624 | <part> elements should not be folded on any other attribute). |
| 625 | .PP |
| 626 | By supplying a list of element names for forcearray, you're ensuring that |
| 627 | folding will work even if there's only one <part>. You're also |
| 628 | ensuring that if the 'partnum' unique key is supplied in a nested element |
| 629 | then that element won't get forced to an array too. |
| 630 | .Sh "How do I know what my data structure should look like?" |
| 631 | .IX Subsection "How do I know what my data structure should look like?" |
| 632 | The rules are fairly straightforward: |
| 633 | .IP "\(bu" 4 |
| 634 | each element gets represented as a hash |
| 635 | .IP "\(bu" 4 |
| 636 | unless it contains only text, in which case it'll be a simple scalar value |
| 637 | .IP "\(bu" 4 |
| 638 | or unless there's more than one element with the same name, in which case |
| 639 | they'll be represented as an array |
| 640 | .IP "\(bu" 4 |
| 641 | unless you've got array folding enabled, in which case they'll be folded into |
| 642 | a hash |
| 643 | .IP "\(bu" 4 |
| 644 | empty elements (no text contents \fBand\fR no attributes) will either be |
| 645 | represented as an empty hash, an empty string or undef \- depending on the value |
| 646 | of the 'suppressempty' option. |
| 647 | .PP |
| 648 | If you're in any doubt, use Data::Dumper, eg: |
| 649 | .PP |
| 650 | .Vb 2 |
| 651 | \& use XML::Simple; |
| 652 | \& use Data::Dumper; |
| 653 | .Ve |
| 654 | .PP |
| 655 | .Vb 1 |
| 656 | \& my $ref = XMLin($xml); |
| 657 | .Ve |
| 658 | .PP |
| 659 | .Vb 1 |
| 660 | \& print Dumper($ref); |
| 661 | .Ve |
| 662 | .Sh "I'm getting 'Use of uninitialized value' warnings" |
| 663 | .IX Subsection "I'm getting 'Use of uninitialized value' warnings" |
| 664 | You're probably trying to index into a non-existant hash key \- try |
| 665 | Data::Dumper. |
| 666 | .Sh "I'm getting a 'Not an \s-1ARRAY\s0 reference' error" |
| 667 | .IX Subsection "I'm getting a 'Not an ARRAY reference' error" |
| 668 | Something that you expect to be an array is not. The two most likely causes |
| 669 | are that you forgot to use 'forcearray' or that the array got folded into a |
| 670 | hash \- try Data::Dumper. |
| 671 | .Sh "I'm getting a 'No such array field' error" |
| 672 | .IX Subsection "I'm getting a 'No such array field' error" |
| 673 | Something that you expect to be a hash is actually an array. Perhaps array |
| 674 | folding failed because one element was missing the key attribute \- try |
| 675 | Data::Dumper. |
| 676 | .Sh "I'm getting an 'Out of memory' error" |
| 677 | .IX Subsection "I'm getting an 'Out of memory' error" |
| 678 | Something in the data structure is not as you expect and Perl may be trying |
| 679 | unsuccessfully to autovivify things \- try Data::Dumper. |
| 680 | .PP |
| 681 | If you're already using Data::Dumper, try calling \fIDumper()\fR immediately after |
| 682 | \&\fIXMLin()\fR \- ie: before you attempt to access anything in the data structure. |
| 683 | .Sh "My element order is getting jumbled up" |
| 684 | .IX Subsection "My element order is getting jumbled up" |
| 685 | If you read an \s-1XML\s0 file with \fIXMLin()\fR and then write it back out with |
| 686 | \&\fIXMLout()\fR, the order of the elements will likely be different. (However, if |
| 687 | you read the file back in with \fIXMLin()\fR you'll get the same Perl data |
| 688 | structure). |
| 689 | .PP |
| 690 | The reordering happens because XML::Simple uses hashrefs to store your data |
| 691 | and Perl hashes do not really have any order. |
| 692 | .PP |
| 693 | It is possible that a future version of XML::Simple will use Tie::IxHash |
| 694 | to store the data in hashrefs which do retain the order. However this will |
| 695 | not fix all cases of element order being lost. |
| 696 | .PP |
| 697 | If your application really is sensitive to element order, don't use |
| 698 | XML::Simple (and don't put order-sensitive values in attributes). |
| 699 | .Sh "XML::Simple turns nested elements into attributes" |
| 700 | .IX Subsection "XML::Simple turns nested elements into attributes" |
| 701 | If you read an \s-1XML\s0 file with \fIXMLin()\fR and then write it back out with |
| 702 | \&\fIXMLout()\fR, some data which was originally stored in nested elements may end up |
| 703 | in attributes. (However, if you read the file back in with \fIXMLin()\fR you'll |
| 704 | get the same Perl data structure). |
| 705 | .PP |
| 706 | There are a number of ways you might handle this: |
| 707 | .IP "\(bu" 4 |
| 708 | use the 'forcearray' option with \fIXMLin()\fR |
| 709 | .IP "\(bu" 4 |
| 710 | use the 'noattr' option with \fIXMLout()\fR |
| 711 | .IP "\(bu" 4 |
| 712 | live with it |
| 713 | .IP "\(bu" 4 |
| 714 | don't use XML::Simple |
| 715 | .Sh "Why does \fIXMLout()\fP insert <name> elements (or attributes)?" |
| 716 | .IX Subsection "Why does XMLout() insert <name> elements (or attributes)?" |
| 717 | Try setting keyattr => []. |
| 718 | .PP |
| 719 | When you call \fIXMLin()\fR to read \s-1XML\s0, the 'keyattr' option controls whether arrays |
| 720 | get 'folded' into hashes. Similarly, when you call \fIXMLout()\fR, the 'keyattr' |
| 721 | option controls whether hashes get 'unfolded' into arrays. As described above, |
| 722 | \&'keyattr' is enabled by default. |
| 723 | .Sh "Why are empty elements represented as empty hashes?" |
| 724 | .IX Subsection "Why are empty elements represented as empty hashes?" |
| 725 | An element is always represented as a hash unless it contains only text, in |
| 726 | which case it is represented as a scalar string. |
| 727 | .PP |
| 728 | If you would prefer empty elements to be represented as empty strings or the |
| 729 | undefined value, set the 'suppressempty' option to '' or undef respectively. |
| 730 | .Sh "Why is ParserOpts deprecated?" |
| 731 | .IX Subsection "Why is ParserOpts deprecated?" |
| 732 | The \f(CW\*(C`ParserOpts\*(C'\fR option is a remnant of the time when XML::Simple only worked |
| 733 | with the XML::Parser \s-1API\s0. Its value is completely ignored if you're using a |
| 734 | \&\s-1SAX\s0 parser, so writing code which relied on it would bar you from taking |
| 735 | advantage of \s-1SAX\s0. |
| 736 | .PP |
| 737 | Even if you are using XML::Parser, it is seldom necessary to pass options to |
| 738 | the parser object. A number of people have written to say they use this option |
| 739 | to set XML::Parser's \f(CW\*(C`ProtocolEncoding\*(C'\fR option. Don't do that, it's wrong, |
| 740 | Wrong, \s-1WRONG\s0! Fix the \s-1XML\s0 document so that it's well-formed and you won't have |
| 741 | a problem. |
| 742 | .PP |
| 743 | Having said all of that, as long as XML::Simple continues to support the |
| 744 | XML::Parser \s-1API\s0, this option will not be removed. There are currently no plans |
| 745 | to remove support for the XML::Parser \s-1API\s0. |