| 1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
| 2 | .\" |
| 3 | .\" Standard preamble: |
| 4 | .\" ======================================================================== |
| 5 | .de Sh \" Subsection heading |
| 6 | .br |
| 7 | .if t .Sp |
| 8 | .ne 5 |
| 9 | .PP |
| 10 | \fB\\$1\fR |
| 11 | .PP |
| 12 | .. |
| 13 | .de Sp \" Vertical space (when we can't use .PP) |
| 14 | .if t .sp .5v |
| 15 | .if n .sp |
| 16 | .. |
| 17 | .de Vb \" Begin verbatim text |
| 18 | .ft CW |
| 19 | .nf |
| 20 | .ne \\$1 |
| 21 | .. |
| 22 | .de Ve \" End verbatim text |
| 23 | .ft R |
| 24 | .fi |
| 25 | .. |
| 26 | .\" Set up some character translations and predefined strings. \*(-- will |
| 27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
| 28 | .\" double quote, and \*(R" will give a right double quote. | will give a |
| 29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to |
| 30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' |
| 31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. |
| 32 | .tr \(*W-|\(bv\*(Tr |
| 33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
| 34 | .ie n \{\ |
| 35 | . ds -- \(*W- |
| 36 | . ds PI pi |
| 37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
| 38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
| 39 | . ds L" "" |
| 40 | . ds R" "" |
| 41 | . ds C` "" |
| 42 | . ds C' "" |
| 43 | 'br\} |
| 44 | .el\{\ |
| 45 | . ds -- \|\(em\| |
| 46 | . ds PI \(*p |
| 47 | . ds L" `` |
| 48 | . ds R" '' |
| 49 | 'br\} |
| 50 | .\" |
| 51 | .\" If the F register is turned on, we'll generate index entries on stderr for |
| 52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index |
| 53 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
| 54 | .\" output yourself in some meaningful fashion. |
| 55 | .if \nF \{\ |
| 56 | . de IX |
| 57 | . tm Index:\\$1\t\\n%\t"\\$2" |
| 58 | .. |
| 59 | . nr % 0 |
| 60 | . rr F |
| 61 | .\} |
| 62 | .\" |
| 63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
| 64 | .\" way too many mistakes in technical documents. |
| 65 | .hy 0 |
| 66 | .if n .na |
| 67 | .\" |
| 68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
| 69 | .\" Fear. Run. Save yourself. No user-serviceable parts. |
| 70 | . \" fudge factors for nroff and troff |
| 71 | .if n \{\ |
| 72 | . ds #H 0 |
| 73 | . ds #V .8m |
| 74 | . ds #F .3m |
| 75 | . ds #[ \f1 |
| 76 | . ds #] \fP |
| 77 | .\} |
| 78 | .if t \{\ |
| 79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
| 80 | . ds #V .6m |
| 81 | . ds #F 0 |
| 82 | . ds #[ \& |
| 83 | . ds #] \& |
| 84 | .\} |
| 85 | . \" simple accents for nroff and troff |
| 86 | .if n \{\ |
| 87 | . ds ' \& |
| 88 | . ds ` \& |
| 89 | . ds ^ \& |
| 90 | . ds , \& |
| 91 | . ds ~ ~ |
| 92 | . ds / |
| 93 | .\} |
| 94 | .if t \{\ |
| 95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
| 96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
| 97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
| 98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
| 99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
| 100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
| 101 | .\} |
| 102 | . \" troff and (daisy-wheel) nroff accents |
| 103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
| 104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
| 105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
| 106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
| 107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
| 108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
| 109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
| 110 | .ds ae a\h'-(\w'a'u*4/10)'e |
| 111 | .ds Ae A\h'-(\w'A'u*4/10)'E |
| 112 | . \" corrections for vroff |
| 113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
| 114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
| 115 | . \" for low resolution devices (crt and lpr) |
| 116 | .if \n(.H>23 .if \n(.V>19 \ |
| 117 | \{\ |
| 118 | . ds : e |
| 119 | . ds 8 ss |
| 120 | . ds o a |
| 121 | . ds d- d\h'-1'\(ga |
| 122 | . ds D- D\h'-1'\(hy |
| 123 | . ds th \o'bp' |
| 124 | . ds Th \o'LP' |
| 125 | . ds ae ae |
| 126 | . ds Ae AE |
| 127 | .\} |
| 128 | .rm #[ #] #H #V #F C |
| 129 | .\" ======================================================================== |
| 130 | .\" |
| 131 | .IX Title "XML::Simple 3" |
| 132 | .TH XML::Simple 3 "2004-11-19" "perl v5.8.8" "User Contributed Perl Documentation" |
| 133 | .SH "NAME" |
| 134 | XML::Simple \- Easy API to maintain XML (esp config files) |
| 135 | .SH "SYNOPSIS" |
| 136 | .IX Header "SYNOPSIS" |
| 137 | .Vb 1 |
| 138 | \& use XML::Simple; |
| 139 | .Ve |
| 140 | .PP |
| 141 | .Vb 1 |
| 142 | \& my $ref = XMLin([<xml file or string>] [, <options>]); |
| 143 | .Ve |
| 144 | .PP |
| 145 | .Vb 1 |
| 146 | \& my $xml = XMLout($hashref [, <options>]); |
| 147 | .Ve |
| 148 | .PP |
| 149 | Or the object oriented way: |
| 150 | .PP |
| 151 | .Vb 1 |
| 152 | \& require XML::Simple; |
| 153 | .Ve |
| 154 | .PP |
| 155 | .Vb 1 |
| 156 | \& my $xs = XML::Simple->new(options); |
| 157 | .Ve |
| 158 | .PP |
| 159 | .Vb 1 |
| 160 | \& my $ref = $xs->XMLin([<xml file or string>] [, <options>]); |
| 161 | .Ve |
| 162 | .PP |
| 163 | .Vb 1 |
| 164 | \& my $xml = $xs->XMLout($hashref [, <options>]); |
| 165 | .Ve |
| 166 | .PP |
| 167 | (or see \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for 'the \s-1SAX\s0 way'). |
| 168 | .PP |
| 169 | To catch common errors: |
| 170 | .PP |
| 171 | .Vb 1 |
| 172 | \& use XML::Simple qw(:strict); |
| 173 | .Ve |
| 174 | .PP |
| 175 | (see \*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R" for more details). |
| 176 | .SH "QUICK START" |
| 177 | .IX Header "QUICK START" |
| 178 | Say you have a script called \fBfoo\fR and a file of configuration options |
| 179 | called \fBfoo.xml\fR containing this: |
| 180 | .PP |
| 181 | .Vb 13 |
| 182 | \& <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> |
| 183 | \& <server name="sahara" osname="solaris" osversion="2.6"> |
| 184 | \& <address>10.0.0.101</address> |
| 185 | \& <address>10.0.1.101</address> |
| 186 | \& </server> |
| 187 | \& <server name="gobi" osname="irix" osversion="6.5"> |
| 188 | \& <address>10.0.0.102</address> |
| 189 | \& </server> |
| 190 | \& <server name="kalahari" osname="linux" osversion="2.0.34"> |
| 191 | \& <address>10.0.0.103</address> |
| 192 | \& <address>10.0.1.103</address> |
| 193 | \& </server> |
| 194 | \& </config> |
| 195 | .Ve |
| 196 | .PP |
| 197 | The following lines of code in \fBfoo\fR: |
| 198 | .PP |
| 199 | .Vb 1 |
| 200 | \& use XML::Simple; |
| 201 | .Ve |
| 202 | .PP |
| 203 | .Vb 1 |
| 204 | \& my $config = XMLin(); |
| 205 | .Ve |
| 206 | .PP |
| 207 | will 'slurp' the configuration options into the hashref \f(CW$config\fR (because no |
| 208 | arguments are passed to \f(CW\*(C`XMLin()\*(C'\fR the name and location of the \s-1XML\s0 file will |
| 209 | be inferred from name and location of the script). You can dump out the |
| 210 | contents of the hashref using Data::Dumper: |
| 211 | .PP |
| 212 | .Vb 1 |
| 213 | \& use Data::Dumper; |
| 214 | .Ve |
| 215 | .PP |
| 216 | .Vb 1 |
| 217 | \& print Dumper($config); |
| 218 | .Ve |
| 219 | .PP |
| 220 | which will produce something like this (formatting has been adjusted for |
| 221 | brevity): |
| 222 | .PP |
| 223 | .Vb 21 |
| 224 | \& { |
| 225 | \& 'logdir' => '/var/log/foo/', |
| 226 | \& 'debugfile' => '/tmp/foo.debug', |
| 227 | \& 'server' => { |
| 228 | \& 'sahara' => { |
| 229 | \& 'osversion' => '2.6', |
| 230 | \& 'osname' => 'solaris', |
| 231 | \& 'address' => [ '10.0.0.101', '10.0.1.101' ] |
| 232 | \& }, |
| 233 | \& 'gobi' => { |
| 234 | \& 'osversion' => '6.5', |
| 235 | \& 'osname' => 'irix', |
| 236 | \& 'address' => '10.0.0.102' |
| 237 | \& }, |
| 238 | \& 'kalahari' => { |
| 239 | \& 'osversion' => '2.0.34', |
| 240 | \& 'osname' => 'linux', |
| 241 | \& 'address' => [ '10.0.0.103', '10.0.1.103' ] |
| 242 | \& } |
| 243 | \& } |
| 244 | \& } |
| 245 | .Ve |
| 246 | .PP |
| 247 | Your script could then access the name of the log directory like this: |
| 248 | .PP |
| 249 | .Vb 1 |
| 250 | \& print $config->{logdir}; |
| 251 | .Ve |
| 252 | .PP |
| 253 | similarly, the second address on the server 'kalahari' could be referenced as: |
| 254 | .PP |
| 255 | .Vb 1 |
| 256 | \& print $config->{server}->{kalahari}->{address}->[1]; |
| 257 | .Ve |
| 258 | .PP |
| 259 | What could be simpler? (Rhetorical). |
| 260 | .PP |
| 261 | For simple requirements, that's really all there is to it. If you want to |
| 262 | store your \s-1XML\s0 in a different directory or file, or pass it in as a string or |
| 263 | even pass it in via some derivative of an IO::Handle, you'll need to check out |
| 264 | \&\*(L"\s-1OPTIONS\s0\*(R". If you want to turn off or tweak the array folding feature (that |
| 265 | neat little transformation that produced \f(CW$config\fR\->{server}) you'll find options |
| 266 | for that as well. |
| 267 | .PP |
| 268 | If you want to generate \s-1XML\s0 (for example to write a modified version of |
| 269 | \&\f(CW$config\fR back out as \s-1XML\s0), check out \f(CW\*(C`XMLout()\*(C'\fR. |
| 270 | .PP |
| 271 | If your needs are not so simple, this may not be the module for you. In that |
| 272 | case, you might want to read \*(L"\s-1WHERE\s0 \s-1TO\s0 \s-1FROM\s0 \s-1HERE\s0?\*(R". |
| 273 | .SH "DESCRIPTION" |
| 274 | .IX Header "DESCRIPTION" |
| 275 | The XML::Simple module provides a simple \s-1API\s0 layer on top of an underlying \s-1XML\s0 |
| 276 | parsing module (either XML::Parser or one of the \s-1SAX2\s0 parser modules). Two |
| 277 | functions are exported: \f(CW\*(C`XMLin()\*(C'\fR and \f(CW\*(C`XMLout()\*(C'\fR. Note: you can explicity |
| 278 | request the lower case versions of the function names: \f(CW\*(C`xml_in()\*(C'\fR and |
| 279 | \&\f(CW\*(C`xml_out()\*(C'\fR. |
| 280 | .PP |
| 281 | The simplest approach is to call these two functions directly, but an |
| 282 | optional object oriented interface (see \*(L"\s-1OPTIONAL\s0 \s-1OO\s0 \s-1INTERFACE\s0\*(R" below) |
| 283 | allows them to be called as methods of an \fBXML::Simple\fR object. The object |
| 284 | interface can also be used at either end of a \s-1SAX\s0 pipeline. |
| 285 | .Sh "\fIXMLin()\fP" |
| 286 | .IX Subsection "XMLin()" |
| 287 | Parses \s-1XML\s0 formatted data and returns a reference to a data structure which |
| 288 | contains the same information in a more readily accessible form. (Skip |
| 289 | down to \*(L"\s-1EXAMPLES\s0\*(R" below, for more sample code). |
| 290 | .PP |
| 291 | \&\f(CW\*(C`XMLin()\*(C'\fR accepts an optional \s-1XML\s0 specifier followed by zero or more 'name => |
| 292 | value' option pairs. The \s-1XML\s0 specifier can be one of the following: |
| 293 | .IP "A filename" 4 |
| 294 | .IX Item "A filename" |
| 295 | If the filename contains no directory components \f(CW\*(C`XMLin()\*(C'\fR will look for the |
| 296 | file in each directory in the SearchPath (see \*(L"\s-1OPTIONS\s0\*(R" below) or in the |
| 297 | current directory if the SearchPath option is not defined. eg: |
| 298 | .Sp |
| 299 | .Vb 1 |
| 300 | \& $ref = XMLin('/etc/params.xml'); |
| 301 | .Ve |
| 302 | .Sp |
| 303 | Note, the filename '\-' can be used to parse from \s-1STDIN\s0. |
| 304 | .IP "undef" 4 |
| 305 | .IX Item "undef" |
| 306 | If there is no \s-1XML\s0 specifier, \f(CW\*(C`XMLin()\*(C'\fR will check the script directory and |
| 307 | each of the SearchPath directories for a file with the same name as the script |
| 308 | but with the extension '.xml'. Note: if you wish to specify options, you |
| 309 | must specify the value 'undef'. eg: |
| 310 | .Sp |
| 311 | .Vb 1 |
| 312 | \& $ref = XMLin(undef, ForceArray => 1); |
| 313 | .Ve |
| 314 | .IP "A string of \s-1XML\s0" 4 |
| 315 | .IX Item "A string of XML" |
| 316 | A string containing \s-1XML\s0 (recognised by the presence of '<' and '>' characters) |
| 317 | will be parsed directly. eg: |
| 318 | .Sp |
| 319 | .Vb 1 |
| 320 | \& $ref = XMLin('<opt username="bob" password="flurp" />'); |
| 321 | .Ve |
| 322 | .IP "An IO::Handle object" 4 |
| 323 | .IX Item "An IO::Handle object" |
| 324 | An IO::Handle object will be read to \s-1EOF\s0 and its contents parsed. eg: |
| 325 | .Sp |
| 326 | .Vb 2 |
| 327 | \& $fh = IO::File->new('/etc/params.xml'); |
| 328 | \& $ref = XMLin($fh); |
| 329 | .Ve |
| 330 | .Sh "\fIXMLout()\fP" |
| 331 | .IX Subsection "XMLout()" |
| 332 | Takes a data structure (generally a hashref) and returns an \s-1XML\s0 encoding of |
| 333 | that structure. If the resulting \s-1XML\s0 is parsed using \f(CW\*(C`XMLin()\*(C'\fR, it should |
| 334 | return a data structure equivalent to the original (see caveats below). |
| 335 | .PP |
| 336 | The \f(CW\*(C`XMLout()\*(C'\fR function can also be used to output the \s-1XML\s0 as \s-1SAX\s0 events |
| 337 | see the \f(CW\*(C`Handler\*(C'\fR option and \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for more details). |
| 338 | .PP |
| 339 | When translating hashes to \s-1XML\s0, hash keys which have a leading '\-' will be |
| 340 | silently skipped. This is the approved method for marking elements of a |
| 341 | data structure which should be ignored by \f(CW\*(C`XMLout\*(C'\fR. (Note: If these items |
| 342 | were not skipped the key names would be emitted as element or attribute names |
| 343 | with a leading '\-' which would not be valid \s-1XML\s0). |
| 344 | .Sh "Caveats" |
| 345 | .IX Subsection "Caveats" |
| 346 | Some care is required in creating data structures which will be passed to |
| 347 | \&\f(CW\*(C`XMLout()\*(C'\fR. Hash keys from the data structure will be encoded as either \s-1XML\s0 |
| 348 | element names or attribute names. Therefore, you should use hash key names |
| 349 | which conform to the relatively strict \s-1XML\s0 naming rules: |
| 350 | .PP |
| 351 | Names in \s-1XML\s0 must begin with a letter. The remaining characters may be |
| 352 | letters, digits, hyphens (\-), underscores (_) or full stops (.). It is also |
| 353 | allowable to include one colon (:) in an element name but this should only be |
| 354 | used when working with namespaces (\fBXML::Simple\fR can only usefully work with |
| 355 | namespaces when teamed with a \s-1SAX\s0 Parser). |
| 356 | .PP |
| 357 | You can use other punctuation characters in hash values (just not in hash |
| 358 | keys) however \fBXML::Simple\fR does not support dumping binary data. |
| 359 | .PP |
| 360 | If you break these rules, the current implementation of \f(CW\*(C`XMLout()\*(C'\fR will |
| 361 | simply emit non-compliant \s-1XML\s0 which will be rejected if you try to read it |
| 362 | back in. (A later version of \fBXML::Simple\fR might take a more proactive |
| 363 | approach). |
| 364 | .PP |
| 365 | Note also that although you can nest hashes and arrays to arbitrary levels, |
| 366 | circular data structures are not supported and will cause \f(CW\*(C`XMLout()\*(C'\fR to die. |
| 367 | .PP |
| 368 | If you wish to 'round\-trip' arbitrary data structures from Perl to \s-1XML\s0 and back |
| 369 | to Perl, then you should probably disable array folding (using the KeyAttr |
| 370 | option) both with \f(CW\*(C`XMLout()\*(C'\fR and with \f(CW\*(C`XMLin()\*(C'\fR. If you still don't get the |
| 371 | expected results, you may prefer to use XML::Dumper which is designed for |
| 372 | exactly that purpose. |
| 373 | .PP |
| 374 | Refer to \*(L"\s-1WHERE\s0 \s-1TO\s0 \s-1FROM\s0 \s-1HERE\s0?\*(R" if \f(CW\*(C`XMLout()\*(C'\fR is too simple for your needs. |
| 375 | .SH "OPTIONS" |
| 376 | .IX Header "OPTIONS" |
| 377 | \&\fBXML::Simple\fR supports a number of options (in fact as each release of |
| 378 | \&\fBXML::Simple\fR adds more options, the module's claim to the name 'Simple' |
| 379 | becomes increasingly tenuous). If you find yourself repeatedly having to |
| 380 | specify the same options, you might like to investigate \*(L"\s-1OPTIONAL\s0 \s-1OO\s0 \s-1INTERFACE\s0\*(R" below. |
| 381 | .PP |
| 382 | If you can't be bothered reading the documentation, refer to |
| 383 | \&\*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R" to automatically catch common mistakes. |
| 384 | .PP |
| 385 | Because there are so many options, it's hard for new users to know which ones |
| 386 | are important, so here are the two you really need to know about: |
| 387 | .IP "\(bu" 4 |
| 388 | check out \f(CW\*(C`ForceArray\*(C'\fR because you'll almost certainly want to turn it on |
| 389 | .IP "\(bu" 4 |
| 390 | make sure you know what the \f(CW\*(C`KeyAttr\*(C'\fR option does and what its default value is |
| 391 | because it may surprise you otherwise (note in particular that 'KeyAttr' |
| 392 | affects both \f(CW\*(C`XMLin\*(C'\fR and \f(CW\*(C`XMLout\*(C'\fR) |
| 393 | .PP |
| 394 | The option name headings below have a trailing 'comment' \- a hash followed by |
| 395 | two pieces of metadata: |
| 396 | .IP "\(bu" 4 |
| 397 | Options are marked with '\fIin\fR' if they are recognised by \f(CW\*(C`XMLin()\*(C'\fR and |
| 398 | \&'\fIout\fR' if they are recognised by \f(CW\*(C`XMLout()\*(C'\fR. |
| 399 | .IP "\(bu" 4 |
| 400 | Each option is also flagged to indicate whether it is: |
| 401 | .Sp |
| 402 | .Vb 7 |
| 403 | \& 'important' - don't use the module until you understand this one |
| 404 | \& 'handy' - you can skip this on the first time through |
| 405 | \& 'advanced' - you can skip this on the second time through |
| 406 | \& 'SAX only' - don't worry about this unless you're using SAX (or |
| 407 | \& alternatively if you need this, you also need SAX) |
| 408 | \& 'seldom used' - you'll probably never use this unless you were the |
| 409 | \& person that requested the feature |
| 410 | .Ve |
| 411 | .PP |
| 412 | The options are listed alphabetically: |
| 413 | .PP |
| 414 | Note: option names are no longer case sensitive so you can use the mixed case |
| 415 | versions shown here; all lower case as required by versions 2.03 and earlier; |
| 416 | or you can add underscores between the words (eg: key_attr). |
| 417 | .Sh "AttrIndent => 1 \fI# out \- handy\fP" |
| 418 | .IX Subsection "AttrIndent => 1 # out - handy" |
| 419 | When you are using \f(CW\*(C`XMLout()\*(C'\fR, enable this option to have attributes printed |
| 420 | one-per-line with sensible indentation rather than all on one line. |
| 421 | .Sh "Cache => [ cache schemes ] \fI# in \- advanced\fP" |
| 422 | .IX Subsection "Cache => [ cache schemes ] # in - advanced" |
| 423 | Because loading the \fBXML::Parser\fR module and parsing an \s-1XML\s0 file can consume a |
| 424 | significant number of \s-1CPU\s0 cycles, it is often desirable to cache the output of |
| 425 | \&\f(CW\*(C`XMLin()\*(C'\fR for later reuse. |
| 426 | .PP |
| 427 | When parsing from a named file, \fBXML::Simple\fR supports a number of caching |
| 428 | schemes. The 'Cache' option may be used to specify one or more schemes (using |
| 429 | an anonymous array). Each scheme will be tried in turn in the hope of finding |
| 430 | a cached pre-parsed representation of the \s-1XML\s0 file. If no cached copy is |
| 431 | found, the file will be parsed and the first cache scheme in the list will be |
| 432 | used to save a copy of the results. The following cache schemes have been |
| 433 | implemented: |
| 434 | .IP "storable" 4 |
| 435 | .IX Item "storable" |
| 436 | Utilises \fBStorable.pm\fR to read/write a cache file with the same name as the |
| 437 | \&\s-1XML\s0 file but with the extension .stor |
| 438 | .IP "memshare" 4 |
| 439 | .IX Item "memshare" |
| 440 | When a file is first parsed, a copy of the resulting data structure is retained |
| 441 | in memory in the \fBXML::Simple\fR module's namespace. Subsequent calls to parse |
| 442 | the same file will return a reference to this structure. This cached version |
| 443 | will persist only for the life of the Perl interpreter (which in the case of |
| 444 | mod_perl for example, may be some significant time). |
| 445 | .Sp |
| 446 | Because each caller receives a reference to the same data structure, a change |
| 447 | made by one caller will be visible to all. For this reason, the reference |
| 448 | returned should be treated as read\-only. |
| 449 | .IP "memcopy" 4 |
| 450 | .IX Item "memcopy" |
| 451 | This scheme works identically to 'memshare' (above) except that each caller |
| 452 | receives a reference to a new data structure which is a copy of the cached |
| 453 | version. Copying the data structure will add a little processing overhead, |
| 454 | therefore this scheme should only be used where the caller intends to modify |
| 455 | the data structure (or wishes to protect itself from others who might). This |
| 456 | scheme uses \fBStorable.pm\fR to perform the copy. |
| 457 | .PP |
| 458 | Warning! The memory-based caching schemes compare the timestamp on the file to |
| 459 | the time when it was last parsed. If the file is stored on an \s-1NFS\s0 filesystem |
| 460 | (or other network share) and the clock on the file server is not exactly |
| 461 | synchronised with the clock where your script is run, updates to the source \s-1XML\s0 |
| 462 | file may appear to be ignored. |
| 463 | .Sh "ContentKey => 'keyname' \fI# in+out \- seldom used\fP" |
| 464 | .IX Subsection "ContentKey => 'keyname' # in+out - seldom used" |
| 465 | When text content is parsed to a hash value, this option let's you specify a |
| 466 | name for the hash key to override the default 'content'. So for example: |
| 467 | .PP |
| 468 | .Vb 1 |
| 469 | \& XMLin('<opt one="1">Text</opt>', ContentKey => 'text') |
| 470 | .Ve |
| 471 | .PP |
| 472 | will parse to: |
| 473 | .PP |
| 474 | .Vb 1 |
| 475 | \& { 'one' => 1, 'text' => 'Text' } |
| 476 | .Ve |
| 477 | .PP |
| 478 | instead of: |
| 479 | .PP |
| 480 | .Vb 1 |
| 481 | \& { 'one' => 1, 'content' => 'Text' } |
| 482 | .Ve |
| 483 | .PP |
| 484 | \&\f(CW\*(C`XMLout()\*(C'\fR will also honour the value of this option when converting a hashref |
| 485 | to \s-1XML\s0. |
| 486 | .PP |
| 487 | You can also prefix your selected key name with a '\-' character to have |
| 488 | \&\f(CW\*(C`XMLin()\*(C'\fR try a little harder to eliminate unnecessary 'content' keys after |
| 489 | array folding. For example: |
| 490 | .PP |
| 491 | .Vb 6 |
| 492 | \& XMLin( |
| 493 | \& '<opt><item name="one">First</item><item name="two">Second</item></opt>', |
| 494 | \& KeyAttr => {item => 'name'}, |
| 495 | \& ForceArray => [ 'item' ], |
| 496 | \& ContentKey => '-content' |
| 497 | \& ) |
| 498 | .Ve |
| 499 | .PP |
| 500 | will parse to: |
| 501 | .PP |
| 502 | .Vb 6 |
| 503 | \& { |
| 504 | \& 'item' => { |
| 505 | \& 'one' => 'First' |
| 506 | \& 'two' => 'Second' |
| 507 | \& } |
| 508 | \& } |
| 509 | .Ve |
| 510 | .PP |
| 511 | rather than this (without the '\-'): |
| 512 | .PP |
| 513 | .Vb 6 |
| 514 | \& { |
| 515 | \& 'item' => { |
| 516 | \& 'one' => { 'content' => 'First' } |
| 517 | \& 'two' => { 'content' => 'Second' } |
| 518 | \& } |
| 519 | \& } |
| 520 | .Ve |
| 521 | .Sh "DataHandler => code_ref \fI# in \- \s-1SAX\s0 only\fP" |
| 522 | .IX Subsection "DataHandler => code_ref # in - SAX only" |
| 523 | When you use an \fBXML::Simple\fR object as a \s-1SAX\s0 handler, it will return a |
| 524 | \&'simple tree' data structure in the same format as \f(CW\*(C`XMLin()\*(C'\fR would return. If |
| 525 | this option is set (to a subroutine reference), then when the tree is built the |
| 526 | subroutine will be called and passed two arguments: a reference to the |
| 527 | \&\fBXML::Simple\fR object and a reference to the data tree. The return value from |
| 528 | the subroutine will be returned to the \s-1SAX\s0 driver. (See \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for |
| 529 | more details). |
| 530 | .Sh "ForceArray => 1 \fI# in \- important\fP" |
| 531 | .IX Subsection "ForceArray => 1 # in - important" |
| 532 | This option should be set to '1' to force nested elements to be represented |
| 533 | as arrays even when there is only one. Eg, with ForceArray enabled, this |
| 534 | \&\s-1XML:\s0 |
| 535 | .PP |
| 536 | .Vb 3 |
| 537 | \& <opt> |
| 538 | \& <name>value</name> |
| 539 | \& </opt> |
| 540 | .Ve |
| 541 | .PP |
| 542 | would parse to this: |
| 543 | .PP |
| 544 | .Vb 5 |
| 545 | \& { |
| 546 | \& 'name' => [ |
| 547 | \& 'value' |
| 548 | \& ] |
| 549 | \& } |
| 550 | .Ve |
| 551 | .PP |
| 552 | instead of this (the default): |
| 553 | .PP |
| 554 | .Vb 3 |
| 555 | \& { |
| 556 | \& 'name' => 'value' |
| 557 | \& } |
| 558 | .Ve |
| 559 | .PP |
| 560 | This option is especially useful if the data structure is likely to be written |
| 561 | back out as \s-1XML\s0 and the default behaviour of rolling single nested elements up |
| 562 | into attributes is not desirable. |
| 563 | .PP |
| 564 | If you are using the array folding feature, you should almost certainly enable |
| 565 | this option. If you do not, single nested elements will not be parsed to |
| 566 | arrays and therefore will not be candidates for folding to a hash. (Given that |
| 567 | the default value of 'KeyAttr' enables array folding, the default value of this |
| 568 | option should probably also have been enabled too \- sorry). |
| 569 | .Sh "ForceArray => [ names ] \fI# in \- important\fP" |
| 570 | .IX Subsection "ForceArray => [ names ] # in - important" |
| 571 | This alternative (and preferred) form of the 'ForceArray' option allows you to |
| 572 | specify a list of element names which should always be forced into an array |
| 573 | representation, rather than the 'all or nothing' approach above. |
| 574 | .PP |
| 575 | It is also possible (since version 2.05) to include compiled regular |
| 576 | expressions in the list \- any element names which match the pattern will be |
| 577 | forced to arrays. If the list contains only a single regex, then it is not |
| 578 | necessary to enclose it in an arrayref. Eg: |
| 579 | .PP |
| 580 | .Vb 1 |
| 581 | \& ForceArray => qr/_list$/ |
| 582 | .Ve |
| 583 | .Sh "ForceContent => 1 \fI# in \- seldom used\fP" |
| 584 | .IX Subsection "ForceContent => 1 # in - seldom used" |
| 585 | When \f(CW\*(C`XMLin()\*(C'\fR parses elements which have text content as well as attributes, |
| 586 | the text content must be represented as a hash value rather than a simple |
| 587 | scalar. This option allows you to force text content to always parse to |
| 588 | a hash value even when there are no attributes. So for example: |
| 589 | .PP |
| 590 | .Vb 1 |
| 591 | \& XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) |
| 592 | .Ve |
| 593 | .PP |
| 594 | will parse to: |
| 595 | .PP |
| 596 | .Vb 4 |
| 597 | \& { |
| 598 | \& 'x' => { 'content' => 'text1' }, |
| 599 | \& 'y' => { 'a' => 2, 'content' => 'text2' } |
| 600 | \& } |
| 601 | .Ve |
| 602 | .PP |
| 603 | instead of: |
| 604 | .PP |
| 605 | .Vb 4 |
| 606 | \& { |
| 607 | \& 'x' => 'text1', |
| 608 | \& 'y' => { 'a' => 2, 'content' => 'text2' } |
| 609 | \& } |
| 610 | .Ve |
| 611 | .Sh "GroupTags => { grouping tag => grouped tag } \fI# in+out \- handy\fP" |
| 612 | .IX Subsection "GroupTags => { grouping tag => grouped tag } # in+out - handy" |
| 613 | You can use this option to eliminate extra levels of indirection in your Perl |
| 614 | data structure. For example this \s-1XML:\s0 |
| 615 | .PP |
| 616 | .Vb 7 |
| 617 | \& <opt> |
| 618 | \& <searchpath> |
| 619 | \& <dir>/usr/bin</dir> |
| 620 | \& <dir>/usr/local/bin</dir> |
| 621 | \& <dir>/usr/X11/bin</dir> |
| 622 | \& </searchpath> |
| 623 | \& </opt> |
| 624 | .Ve |
| 625 | .PP |
| 626 | Would normally be read into a structure like this: |
| 627 | .PP |
| 628 | .Vb 5 |
| 629 | \& { |
| 630 | \& searchpath => { |
| 631 | \& dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] |
| 632 | \& } |
| 633 | \& } |
| 634 | .Ve |
| 635 | .PP |
| 636 | But when read in with the appropriate value for 'GroupTags': |
| 637 | .PP |
| 638 | .Vb 1 |
| 639 | \& my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); |
| 640 | .Ve |
| 641 | .PP |
| 642 | It will return this simpler structure: |
| 643 | .PP |
| 644 | .Vb 3 |
| 645 | \& { |
| 646 | \& searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] |
| 647 | \& } |
| 648 | .Ve |
| 649 | .PP |
| 650 | The grouping element (\f(CW\*(C`<searchpath>\*(C'\fR in the example) must not contain any |
| 651 | attributes or elements other than the grouped element. |
| 652 | .PP |
| 653 | You can specify multiple 'grouping element' to 'grouped element' mappings in |
| 654 | the same hashref. If this option is combined with \f(CW\*(C`KeyAttr\*(C'\fR, the array |
| 655 | folding will occur first and then the grouped element names will be eliminated. |
| 656 | .PP |
| 657 | \&\f(CW\*(C`XMLout\*(C'\fR will also use the grouptag mappings to re-introduce the tags around |
| 658 | the grouped elements. Beware though that this will occur in all places that |
| 659 | the 'grouping tag' name occurs \- you probably don't want to use the same name |
| 660 | for elements as well as attributes. |
| 661 | .Sh "Handler => object_ref \fI# out \- \s-1SAX\s0 only\fP" |
| 662 | .IX Subsection "Handler => object_ref # out - SAX only" |
| 663 | Use the 'Handler' option to have \f(CW\*(C`XMLout()\*(C'\fR generate \s-1SAX\s0 events rather than |
| 664 | returning a string of \s-1XML\s0. For more details see \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" below. |
| 665 | .PP |
| 666 | Note: the current implementation of this option generates a string of \s-1XML\s0 |
| 667 | and uses a \s-1SAX\s0 parser to translate it into \s-1SAX\s0 events. The normal encoding |
| 668 | rules apply here \- your data must be \s-1UTF8\s0 encoded unless you specify an |
| 669 | alternative encoding via the 'XMLDecl' option; and by the time the data reaches |
| 670 | the handler object, it will be in \s-1UTF8\s0 form regardless of the encoding you |
| 671 | supply. A future implementation of this option may generate the events |
| 672 | directly. |
| 673 | .Sh "KeepRoot => 1 \fI# in+out \- handy\fP" |
| 674 | .IX Subsection "KeepRoot => 1 # in+out - handy" |
| 675 | In its attempt to return a data structure free of superfluous detail and |
| 676 | unnecessary levels of indirection, \f(CW\*(C`XMLin()\*(C'\fR normally discards the root |
| 677 | element name. Setting the 'KeepRoot' option to '1' will cause the root element |
| 678 | name to be retained. So after executing this code: |
| 679 | .PP |
| 680 | .Vb 1 |
| 681 | \& $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) |
| 682 | .Ve |
| 683 | .PP |
| 684 | You'll be able to reference the tempdir as |
| 685 | \&\f(CW\*(C`$config\->{config}\->{tempdir}\*(C'\fR instead of the default |
| 686 | \&\f(CW\*(C`$config\->{tempdir}\*(C'\fR. |
| 687 | .PP |
| 688 | Similarly, setting the 'KeepRoot' option to '1' will tell \f(CW\*(C`XMLout()\*(C'\fR that the |
| 689 | data structure already contains a root element name and it is not necessary to |
| 690 | add another. |
| 691 | .Sh "KeyAttr => [ list ] \fI# in+out \- important\fP" |
| 692 | .IX Subsection "KeyAttr => [ list ] # in+out - important" |
| 693 | This option controls the 'array folding' feature which translates nested |
| 694 | elements from an array to a hash. It also controls the 'unfolding' of hashes |
| 695 | to arrays. |
| 696 | .PP |
| 697 | For example, this \s-1XML:\s0 |
| 698 | .PP |
| 699 | .Vb 4 |
| 700 | \& <opt> |
| 701 | \& <user login="grep" fullname="Gary R Epstein" /> |
| 702 | \& <user login="stty" fullname="Simon T Tyson" /> |
| 703 | \& </opt> |
| 704 | .Ve |
| 705 | .PP |
| 706 | would, by default, parse to this: |
| 707 | .PP |
| 708 | .Vb 12 |
| 709 | \& { |
| 710 | \& 'user' => [ |
| 711 | \& { |
| 712 | \& 'login' => 'grep', |
| 713 | \& 'fullname' => 'Gary R Epstein' |
| 714 | \& }, |
| 715 | \& { |
| 716 | \& 'login' => 'stty', |
| 717 | \& 'fullname' => 'Simon T Tyson' |
| 718 | \& } |
| 719 | \& ] |
| 720 | \& } |
| 721 | .Ve |
| 722 | .PP |
| 723 | If the option 'KeyAttr => \*(L"login\*(R"' were used to specify that the 'login' |
| 724 | attribute is a key, the same \s-1XML\s0 would parse to: |
| 725 | .PP |
| 726 | .Vb 10 |
| 727 | \& { |
| 728 | \& 'user' => { |
| 729 | \& 'stty' => { |
| 730 | \& 'fullname' => 'Simon T Tyson' |
| 731 | \& }, |
| 732 | \& 'grep' => { |
| 733 | \& 'fullname' => 'Gary R Epstein' |
| 734 | \& } |
| 735 | \& } |
| 736 | \& } |
| 737 | .Ve |
| 738 | .PP |
| 739 | The key attribute names should be supplied in an arrayref if there is more |
| 740 | than one. \f(CW\*(C`XMLin()\*(C'\fR will attempt to match attribute names in the order |
| 741 | supplied. \f(CW\*(C`XMLout()\*(C'\fR will use the first attribute name supplied when |
| 742 | \&'unfolding' a hash into an array. |
| 743 | .PP |
| 744 | Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do |
| 745 | not want folding on input or unfolding on output you must setting this option |
| 746 | to an empty list to disable the feature. |
| 747 | .PP |
| 748 | Note 2: If you wish to use this option, you should also enable the |
| 749 | \&\f(CW\*(C`ForceArray\*(C'\fR option. Without 'ForceArray', a single nested element will be |
| 750 | rolled up into a scalar rather than an array and therefore will not be folded |
| 751 | (since only arrays get folded). |
| 752 | .Sh "KeyAttr => { list } \fI# in+out \- important\fP" |
| 753 | .IX Subsection "KeyAttr => { list } # in+out - important" |
| 754 | This alternative (and preferred) method of specifiying the key attributes |
| 755 | allows more fine grained control over which elements are folded and on which |
| 756 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause |
| 757 | any package elements to be folded on the 'id' attribute. No other elements |
| 758 | which have an 'id' attribute will be folded at all. |
| 759 | .PP |
| 760 | Note: \f(CW\*(C`XMLin()\*(C'\fR will generate a warning (or a fatal error in \*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R") |
| 761 | if this syntax is used and an element which does not have the specified key |
| 762 | attribute is encountered (eg: a 'package' element without an 'id' attribute, to |
| 763 | use the example above). Warnings will only be generated if \fB\-w\fR is in force. |
| 764 | .PP |
| 765 | Two further variations are made possible by prefixing a '+' or a '\-' character |
| 766 | to the attribute name: |
| 767 | .PP |
| 768 | The option 'KeyAttr => { user => \*(L"+login\*(R" }' will cause this \s-1XML:\s0 |
| 769 | .PP |
| 770 | .Vb 4 |
| 771 | \& <opt> |
| 772 | \& <user login="grep" fullname="Gary R Epstein" /> |
| 773 | \& <user login="stty" fullname="Simon T Tyson" /> |
| 774 | \& </opt> |
| 775 | .Ve |
| 776 | .PP |
| 777 | to parse to this data structure: |
| 778 | .PP |
| 779 | .Vb 12 |
| 780 | \& { |
| 781 | \& 'user' => { |
| 782 | \& 'stty' => { |
| 783 | \& 'fullname' => 'Simon T Tyson', |
| 784 | \& 'login' => 'stty' |
| 785 | \& }, |
| 786 | \& 'grep' => { |
| 787 | \& 'fullname' => 'Gary R Epstein', |
| 788 | \& 'login' => 'grep' |
| 789 | \& } |
| 790 | \& } |
| 791 | \& } |
| 792 | .Ve |
| 793 | .PP |
| 794 | The '+' indicates that the value of the key attribute should be copied rather |
| 795 | than moved to the folded hash key. |
| 796 | .PP |
| 797 | A '\-' prefix would produce this result: |
| 798 | .PP |
| 799 | .Vb 12 |
| 800 | \& { |
| 801 | \& 'user' => { |
| 802 | \& 'stty' => { |
| 803 | \& 'fullname' => 'Simon T Tyson', |
| 804 | \& '-login' => 'stty' |
| 805 | \& }, |
| 806 | \& 'grep' => { |
| 807 | \& 'fullname' => 'Gary R Epstein', |
| 808 | \& '-login' => 'grep' |
| 809 | \& } |
| 810 | \& } |
| 811 | \& } |
| 812 | .Ve |
| 813 | .PP |
| 814 | As described earlier, \f(CW\*(C`XMLout\*(C'\fR will ignore hash keys starting with a '\-'. |
| 815 | .Sh "NoAttr => 1 \fI# in+out \- handy\fP" |
| 816 | .IX Subsection "NoAttr => 1 # in+out - handy" |
| 817 | When used with \f(CW\*(C`XMLout()\*(C'\fR, the generated \s-1XML\s0 will contain no attributes. |
| 818 | All hash key/values will be represented as nested elements instead. |
| 819 | .PP |
| 820 | When used with \f(CW\*(C`XMLin()\*(C'\fR, any attributes in the \s-1XML\s0 will be ignored. |
| 821 | .Sh "NoEscape => 1 \fI# out \- seldom used\fP" |
| 822 | .IX Subsection "NoEscape => 1 # out - seldom used" |
| 823 | By default, \f(CW\*(C`XMLout()\*(C'\fR will translate the characters '<', '>', '&' and |
| 824 | \&'"' to '<', '>', '&' and '"' respectively. Use this option to |
| 825 | suppress escaping (presumably because you've already escaped the data in some |
| 826 | more sophisticated manner). |
| 827 | .Sh "NoIndent => 1 \fI# out \- seldom used\fP" |
| 828 | .IX Subsection "NoIndent => 1 # out - seldom used" |
| 829 | Set this option to 1 to disable \f(CW\*(C`XMLout()\*(C'\fR's default 'pretty printing' mode. |
| 830 | With this option enabled, the \s-1XML\s0 output will all be on one line (unless there |
| 831 | are newlines in the data) \- this may be easier for downstream processing. |
| 832 | .Sh "NoSort => 1 \fI# out \- seldom used\fP" |
| 833 | .IX Subsection "NoSort => 1 # out - seldom used" |
| 834 | Newer versions of XML::Simple sort elements and attributes alphabetically (*), |
| 835 | by default. Enable this option to suppress the sorting \- possibly for |
| 836 | backwards compatibility. |
| 837 | .PP |
| 838 | * Actually, sorting is alphabetical but 'key' attribute or element names (as in |
| 839 | \&'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements |
| 840 | are sorted alphabetically by the value of the key field. |
| 841 | .Sh "NormaliseSpace => 0 | 1 | 2 \fI# in \- handy\fP" |
| 842 | .IX Subsection "NormaliseSpace => 0 | 1 | 2 # in - handy" |
| 843 | This option controls how whitespace in text content is handled. Recognised |
| 844 | values for the option are: |
| 845 | .IP "\(bu" 4 |
| 846 | 0 = (default) whitespace is passed through unaltered (except of course for the |
| 847 | normalisation of whitespace in attribute values which is mandated by the \s-1XML\s0 |
| 848 | recommendation) |
| 849 | .IP "\(bu" 4 |
| 850 | 1 = whitespace is normalised in any value used as a hash key (normalising means |
| 851 | removing leading and trailing whitespace and collapsing sequences of whitespace |
| 852 | characters to a single space) |
| 853 | .IP "\(bu" 4 |
| 854 | 2 = whitespace is normalised in all text content |
| 855 | .PP |
| 856 | Note: you can spell this option with a 'z' if that is more natural for you. |
| 857 | .Sh "NSExpand => 1 \fI# in+out handy \- \s-1SAX\s0 only\fP" |
| 858 | .IX Subsection "NSExpand => 1 # in+out handy - SAX only" |
| 859 | This option controls namespace expansion \- the translation of element and |
| 860 | attribute names of the form 'prefix:name' to '{uri}name'. For example the |
| 861 | element name 'xsl:template' might be expanded to: |
| 862 | \&'{http://www.w3.org/1999/XSL/Transform}template'. |
| 863 | .PP |
| 864 | By default, \f(CW\*(C`XMLin()\*(C'\fR will return element names and attribute names exactly as |
| 865 | they appear in the \s-1XML\s0. Setting this option to 1 will cause all element and |
| 866 | attribute names to be expanded to include their namespace prefix. |
| 867 | .PP |
| 868 | \&\fINote: You must be using a \s-1SAX\s0 parser for this option to work (ie: it does not |
| 869 | work with XML::Parser)\fR. |
| 870 | .PP |
| 871 | This option also controls whether \f(CW\*(C`XMLout()\*(C'\fR performs the reverse translation |
| 872 | from '{uri}name' back to 'prefix:name'. The default is no translation. If |
| 873 | your data contains expanded names, you should set this option to 1 otherwise |
| 874 | \&\f(CW\*(C`XMLout\*(C'\fR will emit \s-1XML\s0 which is not well formed. |
| 875 | .PP |
| 876 | \&\fINote: You must have the XML::NamespaceSupport module installed if you want |
| 877 | \&\f(CI\*(C`XMLout()\*(C'\fI to translate URIs back to prefixes\fR. |
| 878 | .Sh "NumericEscape => 0 | 1 | 2 \fI# out \- handy\fP" |
| 879 | .IX Subsection "NumericEscape => 0 | 1 | 2 # out - handy" |
| 880 | Use this option to have 'high' (non\-ASCII) characters in your Perl data |
| 881 | structure converted to numeric entities (eg: €) in the \s-1XML\s0 output. Three |
| 882 | levels are possible: |
| 883 | .PP |
| 884 | 0 \- default: no numeric escaping (\s-1OK\s0 if you're writing out \s-1UTF8\s0) |
| 885 | .PP |
| 886 | 1 \- only characters above 0xFF are escaped (ie: characters in the 0x80\-FF range are not escaped), possibly useful with \s-1ISO8859\-1\s0 output |
| 887 | .PP |
| 888 | 2 \- all characters above 0x7F are escaped (good for plain \s-1ASCII\s0 output) |
| 889 | .Sh "OutputFile => <file specifier> \fI# out \- handy\fP" |
| 890 | .IX Subsection "OutputFile => <file specifier> # out - handy" |
| 891 | The default behaviour of \f(CW\*(C`XMLout()\*(C'\fR is to return the \s-1XML\s0 as a string. If you |
| 892 | wish to write the \s-1XML\s0 to a file, simply supply the filename using the |
| 893 | \&'OutputFile' option. |
| 894 | .PP |
| 895 | This option also accepts an \s-1IO\s0 handle object \- especially useful in Perl 5.8.0 |
| 896 | and later for output using an encoding other than \s-1UTF\-8\s0, eg: |
| 897 | .PP |
| 898 | .Vb 2 |
| 899 | \& open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; |
| 900 | \& XMLout($ref, OutputFile => $fh); |
| 901 | .Ve |
| 902 | .PP |
| 903 | Note, XML::Simple does not require that the object you pass in to the |
| 904 | OutputFile option inherits from IO::Handle \- it simply assumes the object |
| 905 | supports a \f(CW\*(C`print\*(C'\fR method. |
| 906 | .Sh "ParserOpts => [ XML::Parser Options ] \fI# in \- don't use this\fP" |
| 907 | .IX Subsection "ParserOpts => [ XML::Parser Options ] # in - don't use this" |
| 908 | \&\fINote: This option is now officially deprecated. If you find it useful, email |
| 909 | the author with an example of what you use it for. Do not use this option to |
| 910 | set the ProtocolEncoding, that's just plain wrong \- fix the \s-1XML\s0\fR. |
| 911 | .PP |
| 912 | This option allows you to pass parameters to the constructor of the underlying |
| 913 | XML::Parser object (which of course assumes you're not using \s-1SAX\s0). |
| 914 | .Sh "RootName => 'string' \fI# out \- handy\fP" |
| 915 | .IX Subsection "RootName => 'string' # out - handy" |
| 916 | By default, when \f(CW\*(C`XMLout()\*(C'\fR generates \s-1XML\s0, the root element will be named |
| 917 | \&'opt'. This option allows you to specify an alternative name. |
| 918 | .PP |
| 919 | Specifying either undef or the empty string for the RootName option will |
| 920 | produce \s-1XML\s0 with no root elements. In most cases the resulting \s-1XML\s0 fragment |
| 921 | will not be 'well formed' and therefore could not be read back in by \f(CW\*(C`XMLin()\*(C'\fR. |
| 922 | Nevertheless, the option has been found to be useful in certain circumstances. |
| 923 | .Sh "SearchPath => [ list ] \fI# in \- handy\fP" |
| 924 | .IX Subsection "SearchPath => [ list ] # in - handy" |
| 925 | If you pass \f(CW\*(C`XMLin()\*(C'\fR a filename, but the filename include no directory |
| 926 | component, you can use this option to specify which directories should be |
| 927 | searched to locate the file. You might use this option to search first in the |
| 928 | user's home directory, then in a global directory such as /etc. |
| 929 | .PP |
| 930 | If a filename is provided to \f(CW\*(C`XMLin()\*(C'\fR but SearchPath is not defined, the |
| 931 | file is assumed to be in the current directory. |
| 932 | .PP |
| 933 | If the first parameter to \f(CW\*(C`XMLin()\*(C'\fR is undefined, the default SearchPath |
| 934 | will contain only the directory in which the script itself is located. |
| 935 | Otherwise the default SearchPath will be empty. |
| 936 | .Sh "SuppressEmpty => 1 | '' | undef \fI# in+out \- handy\fP" |
| 937 | .IX Subsection "SuppressEmpty => 1 | '' | undef # in+out - handy" |
| 938 | This option controls what \f(CW\*(C`XMLin()\*(C'\fR should do with empty elements (no |
| 939 | attributes and no content). The default behaviour is to represent them as |
| 940 | empty hashes. Setting this option to a true value (eg: 1) will cause empty |
| 941 | elements to be skipped altogether. Setting the option to 'undef' or the empty |
| 942 | string will cause empty elements to be represented as the undefined value or |
| 943 | the empty string respectively. The latter two alternatives are a little |
| 944 | easier to test for in your code than a hash with no keys. |
| 945 | .PP |
| 946 | The option also controls what \f(CW\*(C`XMLout()\*(C'\fR does with undefined values. Setting |
| 947 | the option to undef causes undefined values to be output as empty elements |
| 948 | (rather than empty attributes), it also suppresses the generation of warnings |
| 949 | about undefined values. Setting the option to a true value (eg: 1) causes |
| 950 | undefined values to be skipped altogether on output. |
| 951 | .Sh "ValueAttr => [ names ] \fI# in \- handy\fP" |
| 952 | .IX Subsection "ValueAttr => [ names ] # in - handy" |
| 953 | Use this option to deal elements which always have a single attribute and no |
| 954 | content. Eg: |
| 955 | .PP |
| 956 | .Vb 4 |
| 957 | \& <opt> |
| 958 | \& <colour value="red" /> |
| 959 | \& <size value="XXL" /> |
| 960 | \& </opt> |
| 961 | .Ve |
| 962 | .PP |
| 963 | Setting \f(CW\*(C`ValueAttr => [ 'value' ]\*(C'\fR will cause the above \s-1XML\s0 to parse to: |
| 964 | .PP |
| 965 | .Vb 4 |
| 966 | \& { |
| 967 | \& colour => 'red', |
| 968 | \& size => 'XXL' |
| 969 | \& } |
| 970 | .Ve |
| 971 | .PP |
| 972 | instead of this (the default): |
| 973 | .PP |
| 974 | .Vb 4 |
| 975 | \& { |
| 976 | \& colour => { value => 'red' }, |
| 977 | \& size => { value => 'XXL' } |
| 978 | \& } |
| 979 | .Ve |
| 980 | .PP |
| 981 | Note: This form of the ValueAttr option is not compatible with \f(CW\*(C`XMLout()\*(C'\fR \- |
| 982 | since the attribute name is discarded at parse time, the original \s-1XML\s0 cannot be |
| 983 | reconstructed. |
| 984 | .Sh "ValueAttr => { element => attribute, ... } \fI# in+out \- handy\fP" |
| 985 | .IX Subsection "ValueAttr => { element => attribute, ... } # in+out - handy" |
| 986 | This (preferred) form of the ValueAttr option requires you to specify both |
| 987 | the element and the attribute names. This is not only safer, it also allows |
| 988 | the original \s-1XML\s0 to be reconstructed by \f(CW\*(C`XMLout()\*(C'\fR. |
| 989 | .PP |
| 990 | Note: You probably don't want to use this option and the NoAttr option at the |
| 991 | same time. |
| 992 | .Sh "Variables => { name => value } \fI# in \- handy\fP" |
| 993 | .IX Subsection "Variables => { name => value } # in - handy" |
| 994 | This option allows variables in the \s-1XML\s0 to be expanded when the file is read. |
| 995 | (there is no facility for putting the variable names back if you regenerate |
| 996 | \&\s-1XML\s0 using \f(CW\*(C`XMLout\*(C'\fR). |
| 997 | .PP |
| 998 | A 'variable' is any text of the form \f(CW\*(C`${name}\*(C'\fR which occurs in an attribute |
| 999 | value or in the text content of an element. If 'name' matches a key in the |
| 1000 | supplied hashref, \f(CW\*(C`${name}\*(C'\fR will be replaced with the corresponding value from |
| 1001 | the hashref. If no matching key is found, the variable will not be replaced. |
| 1002 | Names must match the regex: \f(CW\*(C`[\ew.]+\*(C'\fR (ie: only 'word' characters and dots are |
| 1003 | allowed). |
| 1004 | .Sh "VarAttr => 'attr_name' \fI# in \- handy\fP" |
| 1005 | .IX Subsection "VarAttr => 'attr_name' # in - handy" |
| 1006 | In addition to the variables defined using \f(CW\*(C`Variables\*(C'\fR, this option allows |
| 1007 | variables to be defined in the \s-1XML\s0. A variable definition consists of an |
| 1008 | element with an attribute called 'attr_name' (the value of the \f(CW\*(C`VarAttr\*(C'\fR |
| 1009 | option). The value of the attribute will be used as the variable name and the |
| 1010 | text content of the element will be used as the value. A variable defined in |
| 1011 | this way will override a variable defined using the \f(CW\*(C`Variables\*(C'\fR option. For |
| 1012 | example: |
| 1013 | .PP |
| 1014 | .Vb 7 |
| 1015 | \& XMLin( '<opt> |
| 1016 | \& <dir name="prefix">/usr/local/apache</dir> |
| 1017 | \& <dir name="exec_prefix">${prefix}</dir> |
| 1018 | \& <dir name="bindir">${exec_prefix}/bin</dir> |
| 1019 | \& </opt>', |
| 1020 | \& VarAttr => 'name', ContentKey => '-content' |
| 1021 | \& ); |
| 1022 | .Ve |
| 1023 | .PP |
| 1024 | produces the following data structure: |
| 1025 | .PP |
| 1026 | .Vb 7 |
| 1027 | \& { |
| 1028 | \& dir => { |
| 1029 | \& prefix => '/usr/local/apache', |
| 1030 | \& exec_prefix => '/usr/local/apache', |
| 1031 | \& bindir => '/usr/local/apache/bin', |
| 1032 | \& } |
| 1033 | \& } |
| 1034 | .Ve |
| 1035 | .Sh "XMLDecl => 1 or XMLDecl => 'string' \fI# out \- handy\fP" |
| 1036 | .IX Subsection "XMLDecl => 1 or XMLDecl => 'string' # out - handy" |
| 1037 | If you want the output from \f(CW\*(C`XMLout()\*(C'\fR to start with the optional \s-1XML\s0 |
| 1038 | declaration, simply set the option to '1'. The default \s-1XML\s0 declaration is: |
| 1039 | .PP |
| 1040 | .Vb 1 |
| 1041 | \& <?xml version='1.0' standalone='yes'?> |
| 1042 | .Ve |
| 1043 | .PP |
| 1044 | If you want some other string (for example to declare an encoding value), set |
| 1045 | the value of this option to the complete string you require. |
| 1046 | .SH "OPTIONAL OO INTERFACE" |
| 1047 | .IX Header "OPTIONAL OO INTERFACE" |
| 1048 | The procedural interface is both simple and convenient however there are a |
| 1049 | couple of reasons why you might prefer to use the object oriented (\s-1OO\s0) |
| 1050 | interface: |
| 1051 | .IP "\(bu" 4 |
| 1052 | to define a set of default values which should be used on all subsequent calls |
| 1053 | to \f(CW\*(C`XMLin()\*(C'\fR or \f(CW\*(C`XMLout()\*(C'\fR |
| 1054 | .IP "\(bu" 4 |
| 1055 | to override methods in \fBXML::Simple\fR to provide customised behaviour |
| 1056 | .PP |
| 1057 | The default values for the options described above are unlikely to suit |
| 1058 | everyone. The \s-1OO\s0 interface allows you to effectively override \fBXML::Simple\fR's |
| 1059 | defaults with your preferred values. It works like this: |
| 1060 | .PP |
| 1061 | First create an XML::Simple parser object with your preferred defaults: |
| 1062 | .PP |
| 1063 | .Vb 1 |
| 1064 | \& my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); |
| 1065 | .Ve |
| 1066 | .PP |
| 1067 | then call \f(CW\*(C`XMLin()\*(C'\fR or \f(CW\*(C`XMLout()\*(C'\fR as a method of that object: |
| 1068 | .PP |
| 1069 | .Vb 2 |
| 1070 | \& my $ref = $xs->XMLin($xml); |
| 1071 | \& my $xml = $xs->XMLout($ref); |
| 1072 | .Ve |
| 1073 | .PP |
| 1074 | You can also specify options when you make the method calls and these values |
| 1075 | will be merged with the values specified when the object was created. Values |
| 1076 | specified in a method call take precedence. |
| 1077 | .PP |
| 1078 | Overriding methods is a more advanced topic but might be useful if for example |
| 1079 | you wished to provide an alternative routine for escaping character data (the |
| 1080 | escape_value method) or for building the initial parse tree (the build_tree |
| 1081 | method). |
| 1082 | .PP |
| 1083 | Note: when called as methods, the \f(CW\*(C`XMLin()\*(C'\fR and \f(CW\*(C`XMLout()\*(C'\fR routines may be |
| 1084 | called as \f(CW\*(C`xml_in()\*(C'\fR or \f(CW\*(C`xml_out()\*(C'\fR. The method names are aliased so the |
| 1085 | only difference is the aesthetics. |
| 1086 | .SH "STRICT MODE" |
| 1087 | .IX Header "STRICT MODE" |
| 1088 | If you import the \fBXML::Simple\fR routines like this: |
| 1089 | .PP |
| 1090 | .Vb 1 |
| 1091 | \& use XML::Simple qw(:strict); |
| 1092 | .Ve |
| 1093 | .PP |
| 1094 | the following common mistakes will be detected and treated as fatal errors |
| 1095 | .IP "\(bu" 4 |
| 1096 | Failing to explicitly set the \f(CW\*(C`KeyAttr\*(C'\fR option \- if you can't be bothered |
| 1097 | reading about this option, turn it off with: KeyAttr => [ ] |
| 1098 | .IP "\(bu" 4 |
| 1099 | Failing to explicitly set the \f(CW\*(C`ForceArray\*(C'\fR option \- if you can't be bothered |
| 1100 | reading about this option, set it to the safest mode with: ForceArray => 1 |
| 1101 | .IP "\(bu" 4 |
| 1102 | Setting ForceArray to an array, but failing to list all the elements from the |
| 1103 | KeyAttr hash. |
| 1104 | .IP "\(bu" 4 |
| 1105 | Data error \- KeyAttr is set to say { part => 'partnum' } but the \s-1XML\s0 contains |
| 1106 | one or more <part> elements without a 'partnum' attribute (or nested |
| 1107 | element). Note: if strict mode is not set but \-w is, this condition triggers a |
| 1108 | warning. |
| 1109 | .IP "\(bu" 4 |
| 1110 | Data error \- as above, but value of key attribute (eg: partnum) is not a |
| 1111 | scalar string (due to nested elements etc). This will also trigger a warning |
| 1112 | if strict mode is not enabled. |
| 1113 | .SH "SAX SUPPORT" |
| 1114 | .IX Header "SAX SUPPORT" |
| 1115 | From version 1.08_01, \fBXML::Simple\fR includes support for \s-1SAX\s0 (the Simple \s-1API\s0 |
| 1116 | for \s-1XML\s0) \- specifically \s-1SAX2\s0. |
| 1117 | .PP |
| 1118 | In a typical \s-1SAX\s0 application, an \s-1XML\s0 parser (or \s-1SAX\s0 'driver') module generates |
| 1119 | \&\s-1SAX\s0 events (start of element, character data, end of element, etc) as it parses |
| 1120 | an \s-1XML\s0 document and a 'handler' module processes the events to extract the |
| 1121 | required data. This simple model allows for some interesting and powerful |
| 1122 | possibilities: |
| 1123 | .IP "\(bu" 4 |
| 1124 | Applications written to the \s-1SAX\s0 \s-1API\s0 can extract data from huge \s-1XML\s0 documents |
| 1125 | without the memory overheads of a \s-1DOM\s0 or tree \s-1API\s0. |
| 1126 | .IP "\(bu" 4 |
| 1127 | The \s-1SAX\s0 \s-1API\s0 allows for plug and play interchange of parser modules without |
| 1128 | having to change your code to fit a new module's \s-1API\s0. A number of \s-1SAX\s0 parsers |
| 1129 | are available with capabilities ranging from extreme portability to blazing |
| 1130 | performance. |
| 1131 | .IP "\(bu" 4 |
| 1132 | A \s-1SAX\s0 'filter' module can implement both a handler interface for receiving |
| 1133 | data and a generator interface for passing modified data on to a downstream |
| 1134 | handler. Filters can be chained together in 'pipelines'. |
| 1135 | .IP "\(bu" 4 |
| 1136 | One filter module might split a data stream to direct data to two or more |
| 1137 | downstream handlers. |
| 1138 | .IP "\(bu" 4 |
| 1139 | Generating \s-1SAX\s0 events is not the exclusive preserve of \s-1XML\s0 parsing modules. |
| 1140 | For example, a module might extract data from a relational database using \s-1DBI\s0 |
| 1141 | and pass it on to a \s-1SAX\s0 pipeline for filtering and formatting. |
| 1142 | .PP |
| 1143 | \&\fBXML::Simple\fR can operate at either end of a \s-1SAX\s0 pipeline. For example, |
| 1144 | you can take a data structure in the form of a hashref and pass it into a |
| 1145 | \&\s-1SAX\s0 pipeline using the 'Handler' option on \f(CW\*(C`XMLout()\*(C'\fR: |
| 1146 | .PP |
| 1147 | .Vb 3 |
| 1148 | \& use XML::Simple; |
| 1149 | \& use Some::SAX::Filter; |
| 1150 | \& use XML::SAX::Writer; |
| 1151 | .Ve |
| 1152 | .PP |
| 1153 | .Vb 3 |
| 1154 | \& my $ref = { |
| 1155 | \& .... # your data here |
| 1156 | \& }; |
| 1157 | .Ve |
| 1158 | .PP |
| 1159 | .Vb 4 |
| 1160 | \& my $writer = XML::SAX::Writer->new(); |
| 1161 | \& my $filter = Some::SAX::Filter->new(Handler => $writer); |
| 1162 | \& my $simple = XML::Simple->new(Handler => $filter); |
| 1163 | \& $simple->XMLout($ref); |
| 1164 | .Ve |
| 1165 | .PP |
| 1166 | You can also put \fBXML::Simple\fR at the opposite end of the pipeline to take |
| 1167 | advantage of the simple 'tree' data structure once the relevant data has been |
| 1168 | isolated through filtering: |
| 1169 | .PP |
| 1170 | .Vb 3 |
| 1171 | \& use XML::SAX; |
| 1172 | \& use Some::SAX::Filter; |
| 1173 | \& use XML::Simple; |
| 1174 | .Ve |
| 1175 | .PP |
| 1176 | .Vb 3 |
| 1177 | \& my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); |
| 1178 | \& my $filter = Some::SAX::Filter->new(Handler => $simple); |
| 1179 | \& my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); |
| 1180 | .Ve |
| 1181 | .PP |
| 1182 | .Vb 1 |
| 1183 | \& my $ref = $parser->parse_uri('some_huge_file.xml'); |
| 1184 | .Ve |
| 1185 | .PP |
| 1186 | .Vb 1 |
| 1187 | \& print $ref->{part}->{'555-1234'}; |
| 1188 | .Ve |
| 1189 | .PP |
| 1190 | You can build a filter by using an XML::Simple object as a handler and setting |
| 1191 | its DataHandler option to point to a routine which takes the resulting tree, |
| 1192 | modifies it and sends it off as \s-1SAX\s0 events to a downstream handler: |
| 1193 | .PP |
| 1194 | .Vb 5 |
| 1195 | \& my $writer = XML::SAX::Writer->new(); |
| 1196 | \& my $filter = XML::Simple->new( |
| 1197 | \& DataHandler => sub { |
| 1198 | \& my $simple = shift; |
| 1199 | \& my $data = shift; |
| 1200 | .Ve |
| 1201 | .PP |
| 1202 | .Vb 1 |
| 1203 | \& # Modify $data here |
| 1204 | .Ve |
| 1205 | .PP |
| 1206 | .Vb 4 |
| 1207 | \& $simple->XMLout($data, Handler => $writer); |
| 1208 | \& } |
| 1209 | \& ); |
| 1210 | \& my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); |
| 1211 | .Ve |
| 1212 | .PP |
| 1213 | .Vb 1 |
| 1214 | \& $parser->parse_uri($filename); |
| 1215 | .Ve |
| 1216 | .PP |
| 1217 | \&\fINote: In this last example, the 'Handler' option was specified in the call to |
| 1218 | \&\f(CI\*(C`XMLout()\*(C'\fI but it could also have been specified in the constructor\fR. |
| 1219 | .SH "ENVIRONMENT" |
| 1220 | .IX Header "ENVIRONMENT" |
| 1221 | If you don't care which parser module \fBXML::Simple\fR uses then skip this |
| 1222 | section entirely (it looks more complicated than it really is). |
| 1223 | .PP |
| 1224 | \&\fBXML::Simple\fR will default to using a \fB\s-1SAX\s0\fR parser if one is available or |
| 1225 | \&\fBXML::Parser\fR if \s-1SAX\s0 is not available. |
| 1226 | .PP |
| 1227 | You can dictate which parser module is used by setting either the environment |
| 1228 | variable '\s-1XML_SIMPLE_PREFERRED_PARSER\s0' or the package variable |
| 1229 | \&\f(CW$XML::Simple::PREFERRED_PARSER\fR to contain the module name. The following rules |
| 1230 | are used: |
| 1231 | .IP "\(bu" 4 |
| 1232 | The package variable takes precedence over the environment variable if both are defined. To force \fBXML::Simple\fR to ignore the environment settings and use |
| 1233 | its default rules, you can set the package variable to an empty string. |
| 1234 | .IP "\(bu" 4 |
| 1235 | If the 'preferred parser' is set to the string 'XML::Parser', then |
| 1236 | XML::Parser will be used (or \f(CW\*(C`XMLin()\*(C'\fR will die if XML::Parser is not |
| 1237 | installed). |
| 1238 | .IP "\(bu" 4 |
| 1239 | If the 'preferred parser' is set to some other value, then it is assumed to be |
| 1240 | the name of a \s-1SAX\s0 parser module and is passed to XML::SAX::ParserFactory. |
| 1241 | If \s-1XML::SAX\s0 is not installed, or the requested parser module is not |
| 1242 | installed, then \f(CW\*(C`XMLin()\*(C'\fR will die. |
| 1243 | .IP "\(bu" 4 |
| 1244 | If the 'preferred parser' is not defined at all (the normal default |
| 1245 | state), an attempt will be made to load \s-1XML::SAX\s0. If \s-1XML::SAX\s0 is |
| 1246 | installed, then a parser module will be selected according to |
| 1247 | XML::SAX::ParserFactory's normal rules (which typically means the last \s-1SAX\s0 |
| 1248 | parser installed). |
| 1249 | .IP "\(bu" 4 |
| 1250 | if the 'preferred parser' is not defined and \fB\s-1XML::SAX\s0\fR is not |
| 1251 | installed, then \fBXML::Parser\fR will be used. \f(CW\*(C`XMLin()\*(C'\fR will die if |
| 1252 | XML::Parser is not installed. |
| 1253 | .PP |
| 1254 | Note: The \fB\s-1XML::SAX\s0\fR distribution includes an \s-1XML\s0 parser written entirely in |
| 1255 | Perl. It is very portable but it is not very fast. You should consider |
| 1256 | installing XML::LibXML or XML::SAX::Expat if they are available for your |
| 1257 | platform. |
| 1258 | .SH "ERROR HANDLING" |
| 1259 | .IX Header "ERROR HANDLING" |
| 1260 | The \s-1XML\s0 standard is very clear on the issue of non-compliant documents. An |
| 1261 | error in parsing any single element (for example a missing end tag) must cause |
| 1262 | the whole document to be rejected. \fBXML::Simple\fR will die with an appropriate |
| 1263 | message if it encounters a parsing error. |
| 1264 | .PP |
| 1265 | If dying is not appropriate for your application, you should arrange to call |
| 1266 | \&\f(CW\*(C`XMLin()\*(C'\fR in an eval block and look for errors in $@. eg: |
| 1267 | .PP |
| 1268 | .Vb 2 |
| 1269 | \& my $config = eval { XMLin() }; |
| 1270 | \& PopUpMessage($@) if($@); |
| 1271 | .Ve |
| 1272 | .PP |
| 1273 | Note, there is a common misconception that use of \fBeval\fR will significantly |
| 1274 | slow down a script. While that may be true when the code being eval'd is in a |
| 1275 | string, it is not true of code like the sample above. |
| 1276 | .SH "EXAMPLES" |
| 1277 | .IX Header "EXAMPLES" |
| 1278 | When \f(CW\*(C`XMLin()\*(C'\fR reads the following very simple piece of \s-1XML:\s0 |
| 1279 | .PP |
| 1280 | .Vb 1 |
| 1281 | \& <opt username="testuser" password="frodo"></opt> |
| 1282 | .Ve |
| 1283 | .PP |
| 1284 | it returns the following data structure: |
| 1285 | .PP |
| 1286 | .Vb 4 |
| 1287 | \& { |
| 1288 | \& 'username' => 'testuser', |
| 1289 | \& 'password' => 'frodo' |
| 1290 | \& } |
| 1291 | .Ve |
| 1292 | .PP |
| 1293 | The identical result could have been produced with this alternative \s-1XML:\s0 |
| 1294 | .PP |
| 1295 | .Vb 1 |
| 1296 | \& <opt username="testuser" password="frodo" /> |
| 1297 | .Ve |
| 1298 | .PP |
| 1299 | Or this (although see 'ForceArray' option for variations): |
| 1300 | .PP |
| 1301 | .Vb 4 |
| 1302 | \& <opt> |
| 1303 | \& <username>testuser</username> |
| 1304 | \& <password>frodo</password> |
| 1305 | \& </opt> |
| 1306 | .Ve |
| 1307 | .PP |
| 1308 | Repeated nested elements are represented as anonymous arrays: |
| 1309 | .PP |
| 1310 | .Vb 9 |
| 1311 | \& <opt> |
| 1312 | \& <person firstname="Joe" lastname="Smith"> |
| 1313 | \& <email>joe@smith.com</email> |
| 1314 | \& <email>jsmith@yahoo.com</email> |
| 1315 | \& </person> |
| 1316 | \& <person firstname="Bob" lastname="Smith"> |
| 1317 | \& <email>bob@smith.com</email> |
| 1318 | \& </person> |
| 1319 | \& </opt> |
| 1320 | .Ve |
| 1321 | .PP |
| 1322 | .Vb 17 |
| 1323 | \& { |
| 1324 | \& 'person' => [ |
| 1325 | \& { |
| 1326 | \& 'email' => [ |
| 1327 | \& 'joe@smith.com', |
| 1328 | \& 'jsmith@yahoo.com' |
| 1329 | \& ], |
| 1330 | \& 'firstname' => 'Joe', |
| 1331 | \& 'lastname' => 'Smith' |
| 1332 | \& }, |
| 1333 | \& { |
| 1334 | \& 'email' => 'bob@smith.com', |
| 1335 | \& 'firstname' => 'Bob', |
| 1336 | \& 'lastname' => 'Smith' |
| 1337 | \& } |
| 1338 | \& ] |
| 1339 | \& } |
| 1340 | .Ve |
| 1341 | .PP |
| 1342 | Nested elements with a recognised key attribute are transformed (folded) from |
| 1343 | an array into a hash keyed on the value of that attribute (see the \f(CW\*(C`KeyAttr\*(C'\fR |
| 1344 | option): |
| 1345 | .PP |
| 1346 | .Vb 5 |
| 1347 | \& <opt> |
| 1348 | \& <person key="jsmith" firstname="Joe" lastname="Smith" /> |
| 1349 | \& <person key="tsmith" firstname="Tom" lastname="Smith" /> |
| 1350 | \& <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> |
| 1351 | \& </opt> |
| 1352 | .Ve |
| 1353 | .PP |
| 1354 | .Vb 16 |
| 1355 | \& { |
| 1356 | \& 'person' => { |
| 1357 | \& 'jbloggs' => { |
| 1358 | \& 'firstname' => 'Joe', |
| 1359 | \& 'lastname' => 'Bloggs' |
| 1360 | \& }, |
| 1361 | \& 'tsmith' => { |
| 1362 | \& 'firstname' => 'Tom', |
| 1363 | \& 'lastname' => 'Smith' |
| 1364 | \& }, |
| 1365 | \& 'jsmith' => { |
| 1366 | \& 'firstname' => 'Joe', |
| 1367 | \& 'lastname' => 'Smith' |
| 1368 | \& } |
| 1369 | \& } |
| 1370 | \& } |
| 1371 | .Ve |
| 1372 | .PP |
| 1373 | The <anon> tag can be used to form anonymous arrays: |
| 1374 | .PP |
| 1375 | .Vb 6 |
| 1376 | \& <opt> |
| 1377 | \& <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> |
| 1378 | \& <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> |
| 1379 | \& <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> |
| 1380 | \& <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> |
| 1381 | \& </opt> |
| 1382 | .Ve |
| 1383 | .PP |
| 1384 | .Vb 10 |
| 1385 | \& { |
| 1386 | \& 'head' => [ |
| 1387 | \& [ 'Col 1', 'Col 2', 'Col 3' ] |
| 1388 | \& ], |
| 1389 | \& 'data' => [ |
| 1390 | \& [ 'R1C1', 'R1C2', 'R1C3' ], |
| 1391 | \& [ 'R2C1', 'R2C2', 'R2C3' ], |
| 1392 | \& [ 'R3C1', 'R3C2', 'R3C3' ] |
| 1393 | \& ] |
| 1394 | \& } |
| 1395 | .Ve |
| 1396 | .PP |
| 1397 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if |
| 1398 | the surrounding tags for an \s-1XML\s0 document contain only an anonymous array the |
| 1399 | arrayref will be returned directly rather than the usual hashref: |
| 1400 | .PP |
| 1401 | .Vb 5 |
| 1402 | \& <opt> |
| 1403 | \& <anon><anon>Col 1</anon><anon>Col 2</anon></anon> |
| 1404 | \& <anon><anon>R1C1</anon><anon>R1C2</anon></anon> |
| 1405 | \& <anon><anon>R2C1</anon><anon>R2C2</anon></anon> |
| 1406 | \& </opt> |
| 1407 | .Ve |
| 1408 | .PP |
| 1409 | .Vb 5 |
| 1410 | \& [ |
| 1411 | \& [ 'Col 1', 'Col 2' ], |
| 1412 | \& [ 'R1C1', 'R1C2' ], |
| 1413 | \& [ 'R2C1', 'R2C2' ] |
| 1414 | \& ] |
| 1415 | .Ve |
| 1416 | .PP |
| 1417 | Elements which only contain text content will simply be represented as a |
| 1418 | scalar. Where an element has both attributes and text content, the element |
| 1419 | will be represented as a hashref with the text content in the 'content' key |
| 1420 | (see the \f(CW\*(C`ContentKey\*(C'\fR option): |
| 1421 | .PP |
| 1422 | .Vb 4 |
| 1423 | \& <opt> |
| 1424 | \& <one>first</one> |
| 1425 | \& <two attr="value">second</two> |
| 1426 | \& </opt> |
| 1427 | .Ve |
| 1428 | .PP |
| 1429 | .Vb 4 |
| 1430 | \& { |
| 1431 | \& 'one' => 'first', |
| 1432 | \& 'two' => { 'attr' => 'value', 'content' => 'second' } |
| 1433 | \& } |
| 1434 | .Ve |
| 1435 | .PP |
| 1436 | Mixed content (elements which contain both text content and nested elements) |
| 1437 | will be not be represented in a useful way \- element order and significant |
| 1438 | whitespace will be lost. If you need to work with mixed content, then |
| 1439 | XML::Simple is not the right tool for your job \- check out the next section. |
| 1440 | .SH "WHERE TO FROM HERE?" |
| 1441 | .IX Header "WHERE TO FROM HERE?" |
| 1442 | \&\fBXML::Simple\fR is able to present a simple \s-1API\s0 because it makes some |
| 1443 | assumptions on your behalf. These include: |
| 1444 | .IP "\(bu" 4 |
| 1445 | You're not interested in text content consisting only of whitespace |
| 1446 | .IP "\(bu" 4 |
| 1447 | You don't mind that when things get slurped into a hash the order is lost |
| 1448 | .IP "\(bu" 4 |
| 1449 | You don't want fine-grained control of the formatting of generated \s-1XML\s0 |
| 1450 | .IP "\(bu" 4 |
| 1451 | You would never use a hash key that was not a legal \s-1XML\s0 element name |
| 1452 | .IP "\(bu" 4 |
| 1453 | You don't need help converting between different encodings |
| 1454 | .PP |
| 1455 | In a serious \s-1XML\s0 project, you'll probably outgrow these assumptions fairly |
| 1456 | quickly. This section of the document used to offer some advice on chosing a |
| 1457 | more powerful option. That advice has now grown into the 'Perl\-XML \s-1FAQ\s0' |
| 1458 | document which you can find at: <http://perl\-xml.sourceforge.net/faq/> |
| 1459 | .PP |
| 1460 | The advice in the \s-1FAQ\s0 boils down to a quick explanation of tree versus |
| 1461 | event based parsers and then recommends: |
| 1462 | .PP |
| 1463 | For event based parsing, use \s-1SAX\s0 (do not set out to write any new code for |
| 1464 | XML::Parser's handler \s-1API\s0 \- it is obselete). |
| 1465 | .PP |
| 1466 | For tree-based parsing, you could choose between the 'Perlish' approach of |
| 1467 | XML::Twig and more standards based \s-1DOM\s0 implementations \- preferably one with |
| 1468 | XPath support. |
| 1469 | .SH "SEE ALSO" |
| 1470 | .IX Header "SEE ALSO" |
| 1471 | \&\fBXML::Simple\fR requires either XML::Parser or \s-1XML::SAX\s0. |
| 1472 | .PP |
| 1473 | To generate documents with namespaces, XML::NamespaceSupport is required. |
| 1474 | .PP |
| 1475 | The optional caching functions require Storable. |
| 1476 | .PP |
| 1477 | Answers to Frequently Asked Questions about XML::Simple are bundled with this |
| 1478 | distribution as: XML::Simple::FAQ |
| 1479 | .SH "COPYRIGHT" |
| 1480 | .IX Header "COPYRIGHT" |
| 1481 | Copyright 1999\-2004 Grant McLean <grantm@cpan.org> |
| 1482 | .PP |
| 1483 | This library is free software; you can redistribute it and/or modify it |
| 1484 | under the same terms as Perl itself. |