Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # $Id: Simple.pm,v 1.34 2006/10/30 08:28:13 grantm Exp $ |
2 | ||
3 | package XML::Simple; | |
4 | ||
5 | =head1 NAME | |
6 | ||
7 | XML::Simple - Easy API to maintain XML (esp config files) | |
8 | ||
9 | =head1 SYNOPSIS | |
10 | ||
11 | use XML::Simple; | |
12 | ||
13 | my $ref = XMLin([<xml file or string>] [, <options>]); | |
14 | ||
15 | my $xml = XMLout($hashref [, <options>]); | |
16 | ||
17 | Or the object oriented way: | |
18 | ||
19 | require XML::Simple; | |
20 | ||
21 | my $xs = XML::Simple->new(options); | |
22 | ||
23 | my $ref = $xs->XMLin([<xml file or string>] [, <options>]); | |
24 | ||
25 | my $xml = $xs->XMLout($hashref [, <options>]); | |
26 | ||
27 | (or see L<"SAX SUPPORT"> for 'the SAX way'). | |
28 | ||
29 | To catch common errors: | |
30 | ||
31 | use XML::Simple qw(:strict); | |
32 | ||
33 | (see L<"STRICT MODE"> for more details). | |
34 | ||
35 | =cut | |
36 | ||
37 | # See after __END__ for more POD documentation | |
38 | ||
39 | ||
40 | # Load essentials here, other modules loaded on demand later | |
41 | ||
42 | use strict; | |
43 | use Carp; | |
44 | require Exporter; | |
45 | ||
46 | ||
47 | ############################################################################## | |
48 | # Define some constants | |
49 | # | |
50 | ||
51 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER); | |
52 | ||
53 | @ISA = qw(Exporter); | |
54 | @EXPORT = qw(XMLin XMLout); | |
55 | @EXPORT_OK = qw(xml_in xml_out); | |
56 | $VERSION = '2.16'; | |
57 | $PREFERRED_PARSER = undef; | |
58 | ||
59 | my $StrictMode = 0; | |
60 | ||
61 | my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr | |
62 | searchpath forcearray cache suppressempty parseropts | |
63 | grouptags nsexpand datahandler varattr variables | |
64 | normalisespace normalizespace valueattr); | |
65 | ||
66 | my @KnownOptOut = qw(keyattr keeproot contentkey noattr | |
67 | rootname xmldecl outputfile noescape suppressempty | |
68 | grouptags nsexpand handler noindent attrindent nosort | |
69 | valueattr numericescape); | |
70 | ||
71 | my @DefKeyAttr = qw(name key id); | |
72 | my $DefRootName = qq(opt); | |
73 | my $DefContentKey = qq(content); | |
74 | my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>); | |
75 | ||
76 | my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; | |
77 | my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround | |
78 | ||
79 | ||
80 | ############################################################################## | |
81 | # Globals for use by caching routines | |
82 | # | |
83 | ||
84 | my %MemShareCache = (); | |
85 | my %MemCopyCache = (); | |
86 | ||
87 | ||
88 | ############################################################################## | |
89 | # Wrapper for Exporter - handles ':strict' | |
90 | # | |
91 | ||
92 | sub import { | |
93 | ||
94 | # Handle the :strict tag | |
95 | ||
96 | $StrictMode = 1 if grep(/^:strict$/, @_); | |
97 | ||
98 | # Pass everything else to Exporter.pm | |
99 | ||
100 | __PACKAGE__->export_to_level(1, grep(!/^:strict$/, @_)); | |
101 | } | |
102 | ||
103 | ||
104 | ############################################################################## | |
105 | # Constructor for optional object interface. | |
106 | # | |
107 | ||
108 | sub new { | |
109 | my $class = shift; | |
110 | ||
111 | if(@_ % 2) { | |
112 | croak "Default options must be name=>value pairs (odd number supplied)"; | |
113 | } | |
114 | ||
115 | my %known_opt; | |
116 | @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100; | |
117 | ||
118 | my %raw_opt = @_; | |
119 | my %def_opt; | |
120 | while(my($key, $val) = each %raw_opt) { | |
121 | my $lkey = lc($key); | |
122 | $lkey =~ s/_//g; | |
123 | croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); | |
124 | $def_opt{$lkey} = $val; | |
125 | } | |
126 | my $self = { def_opt => \%def_opt }; | |
127 | ||
128 | return(bless($self, $class)); | |
129 | } | |
130 | ||
131 | ||
132 | ############################################################################## | |
133 | # Sub: _get_object() | |
134 | # | |
135 | # Helper routine called from XMLin() and XMLout() to create an object if none | |
136 | # was provided. Note, this routine does mess with the caller's @_ array. | |
137 | # | |
138 | ||
139 | sub _get_object { | |
140 | my $self; | |
141 | if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { | |
142 | $self = shift; | |
143 | } | |
144 | else { | |
145 | $self = XML::Simple->new(); | |
146 | } | |
147 | ||
148 | return $self; | |
149 | } | |
150 | ||
151 | ############################################################################## | |
152 | # Sub/Method: XMLin() | |
153 | # | |
154 | # Exported routine for slurping XML into a hashref - see pod for info. | |
155 | # | |
156 | # May be called as object method or as a plain function. | |
157 | # | |
158 | # Expects one arg for the source XML, optionally followed by a number of | |
159 | # name => value option pairs. | |
160 | # | |
161 | ||
162 | sub XMLin { | |
163 | my $self = &_get_object; # note, @_ is passed implicitly | |
164 | ||
165 | my $string = shift; | |
166 | ||
167 | $self->handle_options('in', @_); | |
168 | ||
169 | ||
170 | # If no XML or filename supplied, look for scriptname.xml in script directory | |
171 | ||
172 | unless(defined($string)) { | |
173 | ||
174 | # Translate scriptname[.suffix] to scriptname.xml | |
175 | ||
176 | require File::Basename; | |
177 | ||
178 | my($ScriptName, $ScriptDir, $Extension) = | |
179 | File::Basename::fileparse($0, '\.[^\.]+'); | |
180 | ||
181 | $string = $ScriptName . '.xml'; | |
182 | ||
183 | ||
184 | # Add script directory to searchpath | |
185 | ||
186 | if($ScriptDir) { | |
187 | unshift(@{$self->{opt}->{searchpath}}, $ScriptDir); | |
188 | } | |
189 | } | |
190 | ||
191 | ||
192 | # Are we parsing from a file? If so, is there a valid cache available? | |
193 | ||
194 | my($filename, $scheme); | |
195 | unless($string =~ m{<.*?>}s or ref($string) or $string eq '-') { | |
196 | ||
197 | require File::Basename; | |
198 | require File::Spec; | |
199 | ||
200 | $filename = $self->find_xml_file($string, @{$self->{opt}->{searchpath}}); | |
201 | ||
202 | if($self->{opt}->{cache}) { | |
203 | foreach $scheme (@{$self->{opt}->{cache}}) { | |
204 | my $method = 'cache_read_' . $scheme; | |
205 | my $opt = $self->$method($filename); | |
206 | return($opt) if($opt); | |
207 | } | |
208 | } | |
209 | } | |
210 | else { | |
211 | delete($self->{opt}->{cache}); | |
212 | if($string eq '-') { | |
213 | # Read from standard input | |
214 | ||
215 | local($/) = undef; | |
216 | $string = <STDIN>; | |
217 | } | |
218 | } | |
219 | ||
220 | ||
221 | # Parsing is required, so let's get on with it | |
222 | ||
223 | my $tree = $self->build_tree($filename, ref($string) ? $string : \$string); | |
224 | undef($string); | |
225 | ||
226 | # Now work some magic on the resulting parse tree | |
227 | ||
228 | my($ref); | |
229 | if($self->{opt}->{keeproot}) { | |
230 | $ref = $self->collapse({}, @$tree); | |
231 | } | |
232 | else { | |
233 | $ref = $self->collapse(@{$tree->[1]}); | |
234 | } | |
235 | ||
236 | if($self->{opt}->{cache}) { | |
237 | my $method = 'cache_write_' . $self->{opt}->{cache}->[0]; | |
238 | $self->$method($ref, $filename); | |
239 | } | |
240 | ||
241 | return($ref); | |
242 | } | |
243 | ||
244 | ||
245 | ############################################################################## | |
246 | #Method: build_tree() | |
247 | # | |
248 | # This routine will be called if there is no suitable pre-parsed tree in a | |
249 | # cache. It parses the XML and returns an XML::Parser 'Tree' style data | |
250 | # structure (summarised in the comments for the collapse() routine below). | |
251 | # | |
252 | # XML::Simple requires the services of another module that knows how to parse | |
253 | # XML. If XML::SAX is installed, the default SAX parser will be used, | |
254 | # otherwise XML::Parser will be used. | |
255 | # | |
256 | # This routine expects to be passed a 'string' as argument 1 or a filename as | |
257 | # argument 2. The 'string' might be a string of XML (passed by reference to | |
258 | # save memory) or it might be a reference to an IO::Handle. (This | |
259 | # non-intuitive mess results in part from the way XML::Parser works but that's | |
260 | # really no excuse). | |
261 | # | |
262 | ||
263 | sub build_tree { | |
264 | my $self = shift; | |
265 | my $filename = shift; | |
266 | my $string = shift; | |
267 | ||
268 | ||
269 | my $preferred_parser = $PREFERRED_PARSER; | |
270 | unless(defined($preferred_parser)) { | |
271 | $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; | |
272 | } | |
273 | if($preferred_parser eq 'XML::Parser') { | |
274 | return($self->build_tree_xml_parser($filename, $string)); | |
275 | } | |
276 | ||
277 | eval { require XML::SAX; }; # We didn't need it until now | |
278 | if($@) { # No XML::SAX - fall back to XML::Parser | |
279 | if($preferred_parser) { # unless a SAX parser was expressly requested | |
280 | croak "XMLin() could not load XML::SAX"; | |
281 | } | |
282 | return($self->build_tree_xml_parser($filename, $string)); | |
283 | } | |
284 | ||
285 | $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); | |
286 | ||
287 | my $sp = XML::SAX::ParserFactory->parser(Handler => $self); | |
288 | ||
289 | $self->{nocollapse} = 1; | |
290 | my($tree); | |
291 | if($filename) { | |
292 | $tree = $sp->parse_uri($filename); | |
293 | } | |
294 | else { | |
295 | if(ref($string) && ref($string) ne 'SCALAR') { | |
296 | $tree = $sp->parse_file($string); | |
297 | } | |
298 | else { | |
299 | $tree = $sp->parse_string($$string); | |
300 | } | |
301 | } | |
302 | ||
303 | return($tree); | |
304 | } | |
305 | ||
306 | ||
307 | ############################################################################## | |
308 | # Method: build_tree_xml_parser() | |
309 | # | |
310 | # This routine will be called if XML::SAX is not installed, or if XML::Parser | |
311 | # was specifically requested. It takes the same arguments as build_tree() and | |
312 | # returns the same data structure (XML::Parser 'Tree' style). | |
313 | # | |
314 | ||
315 | sub build_tree_xml_parser { | |
316 | my $self = shift; | |
317 | my $filename = shift; | |
318 | my $string = shift; | |
319 | ||
320 | ||
321 | eval { | |
322 | local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() | |
323 | require XML::Parser; # We didn't need it until now | |
324 | }; | |
325 | if($@) { | |
326 | croak "XMLin() requires either XML::SAX or XML::Parser"; | |
327 | } | |
328 | ||
329 | if($self->{opt}->{nsexpand}) { | |
330 | carp "'nsexpand' option requires XML::SAX"; | |
331 | } | |
332 | ||
333 | my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}}); | |
334 | my($tree); | |
335 | if($filename) { | |
336 | # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl | |
337 | local(*XML_FILE); | |
338 | open(XML_FILE, '<', $filename) || croak qq($filename - $!); | |
339 | $tree = $xp->parse(*XML_FILE); | |
340 | close(XML_FILE); | |
341 | } | |
342 | else { | |
343 | $tree = $xp->parse($$string); | |
344 | } | |
345 | ||
346 | return($tree); | |
347 | } | |
348 | ||
349 | ||
350 | ############################################################################## | |
351 | # Method: cache_write_storable() | |
352 | # | |
353 | # Wrapper routine for invoking Storable::nstore() to cache a parsed data | |
354 | # structure. | |
355 | # | |
356 | ||
357 | sub cache_write_storable { | |
358 | my($self, $data, $filename) = @_; | |
359 | ||
360 | my $cachefile = $self->storable_filename($filename); | |
361 | ||
362 | require Storable; # We didn't need it until now | |
363 | ||
364 | if ('VMS' eq $^O) { | |
365 | Storable::nstore($data, $cachefile); | |
366 | } | |
367 | else { | |
368 | # If the following line fails for you, your Storable.pm is old - upgrade | |
369 | Storable::lock_nstore($data, $cachefile); | |
370 | } | |
371 | ||
372 | } | |
373 | ||
374 | ||
375 | ############################################################################## | |
376 | # Method: cache_read_storable() | |
377 | # | |
378 | # Wrapper routine for invoking Storable::retrieve() to read a cached parsed | |
379 | # data structure. Only returns cached data if the cache file exists and is | |
380 | # newer than the source XML file. | |
381 | # | |
382 | ||
383 | sub cache_read_storable { | |
384 | my($self, $filename) = @_; | |
385 | ||
386 | my $cachefile = $self->storable_filename($filename); | |
387 | ||
388 | return unless(-r $cachefile); | |
389 | return unless((stat($cachefile))[9] > (stat($filename))[9]); | |
390 | ||
391 | require Storable; # We didn't need it until now | |
392 | ||
393 | if ('VMS' eq $^O) { | |
394 | return(Storable::retrieve($cachefile)); | |
395 | } | |
396 | else { | |
397 | return(Storable::lock_retrieve($cachefile)); | |
398 | } | |
399 | ||
400 | } | |
401 | ||
402 | ||
403 | ############################################################################## | |
404 | # Method: storable_filename() | |
405 | # | |
406 | # Translates the supplied source XML filename into a filename for the storable | |
407 | # cached data. A '.stor' suffix is added after stripping an optional '.xml' | |
408 | # suffix. | |
409 | # | |
410 | ||
411 | sub storable_filename { | |
412 | my($self, $cachefile) = @_; | |
413 | ||
414 | $cachefile =~ s{(\.xml)?$}{.stor}; | |
415 | return $cachefile; | |
416 | } | |
417 | ||
418 | ||
419 | ############################################################################## | |
420 | # Method: cache_write_memshare() | |
421 | # | |
422 | # Takes the supplied data structure reference and stores it away in a global | |
423 | # hash structure. | |
424 | # | |
425 | ||
426 | sub cache_write_memshare { | |
427 | my($self, $data, $filename) = @_; | |
428 | ||
429 | $MemShareCache{$filename} = [time(), $data]; | |
430 | } | |
431 | ||
432 | ||
433 | ############################################################################## | |
434 | # Method: cache_read_memshare() | |
435 | # | |
436 | # Takes a filename and looks in a global hash for a cached parsed version. | |
437 | # | |
438 | ||
439 | sub cache_read_memshare { | |
440 | my($self, $filename) = @_; | |
441 | ||
442 | return unless($MemShareCache{$filename}); | |
443 | return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); | |
444 | ||
445 | return($MemShareCache{$filename}->[1]); | |
446 | ||
447 | } | |
448 | ||
449 | ||
450 | ############################################################################## | |
451 | # Method: cache_write_memcopy() | |
452 | # | |
453 | # Takes the supplied data structure and stores a copy of it in a global hash | |
454 | # structure. | |
455 | # | |
456 | ||
457 | sub cache_write_memcopy { | |
458 | my($self, $data, $filename) = @_; | |
459 | ||
460 | require Storable; # We didn't need it until now | |
461 | ||
462 | $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; | |
463 | } | |
464 | ||
465 | ||
466 | ############################################################################## | |
467 | # Method: cache_read_memcopy() | |
468 | # | |
469 | # Takes a filename and looks in a global hash for a cached parsed version. | |
470 | # Returns a reference to a copy of that data structure. | |
471 | # | |
472 | ||
473 | sub cache_read_memcopy { | |
474 | my($self, $filename) = @_; | |
475 | ||
476 | return unless($MemCopyCache{$filename}); | |
477 | return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); | |
478 | ||
479 | return(Storable::dclone($MemCopyCache{$filename}->[1])); | |
480 | ||
481 | } | |
482 | ||
483 | ||
484 | ############################################################################## | |
485 | # Sub/Method: XMLout() | |
486 | # | |
487 | # Exported routine for 'unslurping' a data structure out to XML. | |
488 | # | |
489 | # Expects a reference to a data structure and an optional list of option | |
490 | # name => value pairs. | |
491 | # | |
492 | ||
493 | sub XMLout { | |
494 | my $self = &_get_object; # note, @_ is passed implicitly | |
495 | ||
496 | croak "XMLout() requires at least one argument" unless(@_); | |
497 | my $ref = shift; | |
498 | ||
499 | $self->handle_options('out', @_); | |
500 | ||
501 | ||
502 | # If namespace expansion is set, XML::NamespaceSupport is required | |
503 | ||
504 | if($self->{opt}->{nsexpand}) { | |
505 | require XML::NamespaceSupport; | |
506 | $self->{nsup} = XML::NamespaceSupport->new(); | |
507 | $self->{ns_prefix} = 'aaa'; | |
508 | } | |
509 | ||
510 | ||
511 | # Wrap top level arrayref in a hash | |
512 | ||
513 | if(UNIVERSAL::isa($ref, 'ARRAY')) { | |
514 | $ref = { anon => $ref }; | |
515 | } | |
516 | ||
517 | ||
518 | # Extract rootname from top level hash if keeproot enabled | |
519 | ||
520 | if($self->{opt}->{keeproot}) { | |
521 | my(@keys) = keys(%$ref); | |
522 | if(@keys == 1) { | |
523 | $ref = $ref->{$keys[0]}; | |
524 | $self->{opt}->{rootname} = $keys[0]; | |
525 | } | |
526 | } | |
527 | ||
528 | # Ensure there are no top level attributes if we're not adding root elements | |
529 | ||
530 | elsif($self->{opt}->{rootname} eq '') { | |
531 | if(UNIVERSAL::isa($ref, 'HASH')) { | |
532 | my $refsave = $ref; | |
533 | $ref = {}; | |
534 | foreach (keys(%$refsave)) { | |
535 | if(ref($refsave->{$_})) { | |
536 | $ref->{$_} = $refsave->{$_}; | |
537 | } | |
538 | else { | |
539 | $ref->{$_} = [ $refsave->{$_} ]; | |
540 | } | |
541 | } | |
542 | } | |
543 | } | |
544 | ||
545 | ||
546 | # Encode the hashref and write to file if necessary | |
547 | ||
548 | $self->{_ancestors} = []; | |
549 | my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); | |
550 | delete $self->{_ancestors}; | |
551 | ||
552 | if($self->{opt}->{xmldecl}) { | |
553 | $xml = $self->{opt}->{xmldecl} . "\n" . $xml; | |
554 | } | |
555 | ||
556 | if($self->{opt}->{outputfile}) { | |
557 | if(ref($self->{opt}->{outputfile})) { | |
558 | my $fh = $self->{opt}->{outputfile}; | |
559 | if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) { | |
560 | eval { require IO::Handle; }; | |
561 | croak $@ if $@; | |
562 | } | |
563 | return($fh->print($xml)); | |
564 | } | |
565 | else { | |
566 | local(*OUT); | |
567 | open(OUT, '>', "$self->{opt}->{outputfile}") || | |
568 | croak "open($self->{opt}->{outputfile}): $!"; | |
569 | binmode(OUT, ':utf8') if($] >= 5.008); | |
570 | print OUT $xml || croak "print: $!"; | |
571 | close(OUT); | |
572 | } | |
573 | } | |
574 | elsif($self->{opt}->{handler}) { | |
575 | require XML::SAX; | |
576 | my $sp = XML::SAX::ParserFactory->parser( | |
577 | Handler => $self->{opt}->{handler} | |
578 | ); | |
579 | return($sp->parse_string($xml)); | |
580 | } | |
581 | else { | |
582 | return($xml); | |
583 | } | |
584 | } | |
585 | ||
586 | ||
587 | ############################################################################## | |
588 | # Method: handle_options() | |
589 | # | |
590 | # Helper routine for both XMLin() and XMLout(). Both routines handle their | |
591 | # first argument and assume all other args are options handled by this routine. | |
592 | # Saves a hash of options in $self->{opt}. | |
593 | # | |
594 | # If default options were passed to the constructor, they will be retrieved | |
595 | # here and merged with options supplied to the method call. | |
596 | # | |
597 | # First argument should be the string 'in' or the string 'out'. | |
598 | # | |
599 | # Remaining arguments should be name=>value pairs. Sets up default values | |
600 | # for options not supplied. Unrecognised options are a fatal error. | |
601 | # | |
602 | ||
603 | sub handle_options { | |
604 | my $self = shift; | |
605 | my $dirn = shift; | |
606 | ||
607 | ||
608 | # Determine valid options based on context | |
609 | ||
610 | my %known_opt; | |
611 | if($dirn eq 'in') { | |
612 | @known_opt{@KnownOptIn} = @KnownOptIn; | |
613 | } | |
614 | else { | |
615 | @known_opt{@KnownOptOut} = @KnownOptOut; | |
616 | } | |
617 | ||
618 | ||
619 | # Store supplied options in hashref and weed out invalid ones | |
620 | ||
621 | if(@_ % 2) { | |
622 | croak "Options must be name=>value pairs (odd number supplied)"; | |
623 | } | |
624 | my %raw_opt = @_; | |
625 | my $opt = {}; | |
626 | $self->{opt} = $opt; | |
627 | ||
628 | while(my($key, $val) = each %raw_opt) { | |
629 | my $lkey = lc($key); | |
630 | $lkey =~ s/_//g; | |
631 | croak "Unrecognised option: $key" unless($known_opt{$lkey}); | |
632 | $opt->{$lkey} = $val; | |
633 | } | |
634 | ||
635 | ||
636 | # Merge in options passed to constructor | |
637 | ||
638 | foreach (keys(%known_opt)) { | |
639 | unless(exists($opt->{$_})) { | |
640 | if(exists($self->{def_opt}->{$_})) { | |
641 | $opt->{$_} = $self->{def_opt}->{$_}; | |
642 | } | |
643 | } | |
644 | } | |
645 | ||
646 | ||
647 | # Set sensible defaults if not supplied | |
648 | ||
649 | if(exists($opt->{rootname})) { | |
650 | unless(defined($opt->{rootname})) { | |
651 | $opt->{rootname} = ''; | |
652 | } | |
653 | } | |
654 | else { | |
655 | $opt->{rootname} = $DefRootName; | |
656 | } | |
657 | ||
658 | if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { | |
659 | $opt->{xmldecl} = $DefXmlDecl; | |
660 | } | |
661 | ||
662 | if(exists($opt->{contentkey})) { | |
663 | if($opt->{contentkey} =~ m{^-(.*)$}) { | |
664 | $opt->{contentkey} = $1; | |
665 | $opt->{collapseagain} = 1; | |
666 | } | |
667 | } | |
668 | else { | |
669 | $opt->{contentkey} = $DefContentKey; | |
670 | } | |
671 | ||
672 | unless(exists($opt->{normalisespace})) { | |
673 | $opt->{normalisespace} = $opt->{normalizespace}; | |
674 | } | |
675 | $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); | |
676 | ||
677 | # Cleanups for values assumed to be arrays later | |
678 | ||
679 | if($opt->{searchpath}) { | |
680 | unless(ref($opt->{searchpath})) { | |
681 | $opt->{searchpath} = [ $opt->{searchpath} ]; | |
682 | } | |
683 | } | |
684 | else { | |
685 | $opt->{searchpath} = [ ]; | |
686 | } | |
687 | ||
688 | if($opt->{cache} and !ref($opt->{cache})) { | |
689 | $opt->{cache} = [ $opt->{cache} ]; | |
690 | } | |
691 | if($opt->{cache}) { | |
692 | $_ = lc($_) foreach (@{$opt->{cache}}); | |
693 | foreach my $scheme (@{$opt->{cache}}) { | |
694 | my $method = 'cache_read_' . $scheme; | |
695 | croak "Unsupported caching scheme: $scheme" | |
696 | unless($self->can($method)); | |
697 | } | |
698 | } | |
699 | ||
700 | if(exists($opt->{parseropts})) { | |
701 | if($^W) { | |
702 | carp "Warning: " . | |
703 | "'ParserOpts' is deprecated, contact the author if you need it"; | |
704 | } | |
705 | } | |
706 | else { | |
707 | $opt->{parseropts} = [ ]; | |
708 | } | |
709 | ||
710 | ||
711 | # Special cleanup for {forcearray} which could be regex, arrayref or boolean | |
712 | # or left to default to 0 | |
713 | ||
714 | if(exists($opt->{forcearray})) { | |
715 | if(ref($opt->{forcearray}) eq 'Regexp') { | |
716 | $opt->{forcearray} = [ $opt->{forcearray} ]; | |
717 | } | |
718 | ||
719 | if(ref($opt->{forcearray}) eq 'ARRAY') { | |
720 | my @force_list = @{$opt->{forcearray}}; | |
721 | if(@force_list) { | |
722 | $opt->{forcearray} = {}; | |
723 | foreach my $tag (@force_list) { | |
724 | if(ref($tag) eq 'Regexp') { | |
725 | push @{$opt->{forcearray}->{_regex}}, $tag; | |
726 | } | |
727 | else { | |
728 | $opt->{forcearray}->{$tag} = 1; | |
729 | } | |
730 | } | |
731 | } | |
732 | else { | |
733 | $opt->{forcearray} = 0; | |
734 | } | |
735 | } | |
736 | else { | |
737 | $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); | |
738 | } | |
739 | } | |
740 | else { | |
741 | if($StrictMode and $dirn eq 'in') { | |
742 | croak "No value specified for 'ForceArray' option in call to XML$dirn()"; | |
743 | } | |
744 | $opt->{forcearray} = 0; | |
745 | } | |
746 | ||
747 | ||
748 | # Special cleanup for {keyattr} which could be arrayref or hashref or left | |
749 | # to default to arrayref | |
750 | ||
751 | if(exists($opt->{keyattr})) { | |
752 | if(ref($opt->{keyattr})) { | |
753 | if(ref($opt->{keyattr}) eq 'HASH') { | |
754 | ||
755 | # Make a copy so we can mess with it | |
756 | ||
757 | $opt->{keyattr} = { %{$opt->{keyattr}} }; | |
758 | ||
759 | ||
760 | # Convert keyattr => { elem => '+attr' } | |
761 | # to keyattr => { elem => [ 'attr', '+' ] } | |
762 | ||
763 | foreach my $el (keys(%{$opt->{keyattr}})) { | |
764 | if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { | |
765 | $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; | |
766 | if($StrictMode and $dirn eq 'in') { | |
767 | next if($opt->{forcearray} == 1); | |
768 | next if(ref($opt->{forcearray}) eq 'HASH' | |
769 | and $opt->{forcearray}->{$el}); | |
770 | croak "<$el> set in KeyAttr but not in ForceArray"; | |
771 | } | |
772 | } | |
773 | else { | |
774 | delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) | |
775 | } | |
776 | } | |
777 | } | |
778 | else { | |
779 | if(@{$opt->{keyattr}} == 0) { | |
780 | delete($opt->{keyattr}); | |
781 | } | |
782 | } | |
783 | } | |
784 | else { | |
785 | $opt->{keyattr} = [ $opt->{keyattr} ]; | |
786 | } | |
787 | } | |
788 | else { | |
789 | if($StrictMode) { | |
790 | croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; | |
791 | } | |
792 | $opt->{keyattr} = [ @DefKeyAttr ]; | |
793 | } | |
794 | ||
795 | ||
796 | # Special cleanup for {valueattr} which could be arrayref or hashref | |
797 | ||
798 | if(exists($opt->{valueattr})) { | |
799 | if(ref($opt->{valueattr}) eq 'ARRAY') { | |
800 | $opt->{valueattrlist} = {}; | |
801 | $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); | |
802 | } | |
803 | } | |
804 | ||
805 | # make sure there's nothing weird in {grouptags} | |
806 | ||
807 | if($opt->{grouptags}) { | |
808 | croak "Illegal value for 'GroupTags' option - expected a hashref" | |
809 | unless UNIVERSAL::isa($opt->{grouptags}, 'HASH'); | |
810 | ||
811 | while(my($key, $val) = each %{$opt->{grouptags}}) { | |
812 | next if $key ne $val; | |
813 | croak "Bad value in GroupTags: '$key' => '$val'"; | |
814 | } | |
815 | } | |
816 | ||
817 | ||
818 | # Check the {variables} option is valid and initialise variables hash | |
819 | ||
820 | if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { | |
821 | croak "Illegal value for 'Variables' option - expected a hashref"; | |
822 | } | |
823 | ||
824 | if($opt->{variables}) { | |
825 | $self->{_var_values} = { %{$opt->{variables}} }; | |
826 | } | |
827 | elsif($opt->{varattr}) { | |
828 | $self->{_var_values} = {}; | |
829 | } | |
830 | ||
831 | } | |
832 | ||
833 | ||
834 | ############################################################################## | |
835 | # Method: find_xml_file() | |
836 | # | |
837 | # Helper routine for XMLin(). | |
838 | # Takes a filename, and a list of directories, attempts to locate the file in | |
839 | # the directories listed. | |
840 | # Returns a full pathname on success; croaks on failure. | |
841 | # | |
842 | ||
843 | sub find_xml_file { | |
844 | my $self = shift; | |
845 | my $file = shift; | |
846 | my @search_path = @_; | |
847 | ||
848 | ||
849 | my($filename, $filedir) = | |
850 | File::Basename::fileparse($file); | |
851 | ||
852 | if($filename ne $file) { # Ignore searchpath if dir component | |
853 | return($file) if(-e $file); | |
854 | } | |
855 | else { | |
856 | my($path); | |
857 | foreach $path (@search_path) { | |
858 | my $fullpath = File::Spec->catfile($path, $file); | |
859 | return($fullpath) if(-e $fullpath); | |
860 | } | |
861 | } | |
862 | ||
863 | # If user did not supply a search path, default to current directory | |
864 | ||
865 | if(!@search_path) { | |
866 | return($file) if(-e $file); | |
867 | croak "File does not exist: $file"; | |
868 | } | |
869 | ||
870 | croak "Could not find $file in ", join(':', @search_path); | |
871 | } | |
872 | ||
873 | ||
874 | ############################################################################## | |
875 | # Method: collapse() | |
876 | # | |
877 | # Helper routine for XMLin(). This routine really comprises the 'smarts' (or | |
878 | # value add) of this module. | |
879 | # | |
880 | # Takes the parse tree that XML::Parser produced from the supplied XML and | |
881 | # recurses through it 'collapsing' unnecessary levels of indirection (nested | |
882 | # arrays etc) to produce a data structure that is easier to work with. | |
883 | # | |
884 | # Elements in the original parser tree are represented as an element name | |
885 | # followed by an arrayref. The first element of the array is a hashref | |
886 | # containing the attributes. The rest of the array contains a list of any | |
887 | # nested elements as name+arrayref pairs: | |
888 | # | |
889 | # <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ] | |
890 | # | |
891 | # The special element name '0' (zero) flags text content. | |
892 | # | |
893 | # This routine cuts down the noise by discarding any text content consisting of | |
894 | # only whitespace and then moves the nested elements into the attribute hash | |
895 | # using the name of the nested element as the hash key and the collapsed | |
896 | # version of the nested element as the value. Multiple nested elements with | |
897 | # the same name will initially be represented as an arrayref, but this may be | |
898 | # 'folded' into a hashref depending on the value of the keyattr option. | |
899 | # | |
900 | ||
901 | sub collapse { | |
902 | my $self = shift; | |
903 | ||
904 | ||
905 | # Start with the hash of attributes | |
906 | ||
907 | my $attr = shift; | |
908 | if($self->{opt}->{noattr}) { # Discard if 'noattr' set | |
909 | $attr = {}; | |
910 | } | |
911 | elsif($self->{opt}->{normalisespace} == 2) { | |
912 | while(my($key, $value) = each %$attr) { | |
913 | $attr->{$key} = $self->normalise_space($value) | |
914 | } | |
915 | } | |
916 | ||
917 | ||
918 | # Do variable substitutions | |
919 | ||
920 | if(my $var = $self->{_var_values}) { | |
921 | while(my($key, $val) = each(%$attr)) { | |
922 | $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge; | |
923 | $attr->{$key} = $val; | |
924 | } | |
925 | } | |
926 | ||
927 | ||
928 | # Roll up 'value' attributes (but only if no nested elements) | |
929 | ||
930 | if(!@_ and keys %$attr == 1) { | |
931 | my($k) = keys %$attr; | |
932 | if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { | |
933 | return $attr->{$k}; | |
934 | } | |
935 | } | |
936 | ||
937 | ||
938 | # Add any nested elements | |
939 | ||
940 | my($key, $val); | |
941 | while(@_) { | |
942 | $key = shift; | |
943 | $val = shift; | |
944 | ||
945 | if(ref($val)) { | |
946 | $val = $self->collapse(@$val); | |
947 | next if(!defined($val) and $self->{opt}->{suppressempty}); | |
948 | } | |
949 | elsif($key eq '0') { | |
950 | next if($val =~ m{^\s*$}s); # Skip all whitespace content | |
951 | ||
952 | $val = $self->normalise_space($val) | |
953 | if($self->{opt}->{normalisespace} == 2); | |
954 | ||
955 | # do variable substitutions | |
956 | ||
957 | if(my $var = $self->{_var_values}) { | |
958 | $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; | |
959 | } | |
960 | ||
961 | ||
962 | # look for variable definitions | |
963 | ||
964 | if(my $var = $self->{opt}->{varattr}) { | |
965 | if(exists $attr->{$var}) { | |
966 | $self->set_var($attr->{$var}, $val); | |
967 | } | |
968 | } | |
969 | ||
970 | ||
971 | # Collapse text content in element with no attributes to a string | |
972 | ||
973 | if(!%$attr and !@_) { | |
974 | return($self->{opt}->{forcecontent} ? | |
975 | { $self->{opt}->{contentkey} => $val } : $val | |
976 | ); | |
977 | } | |
978 | $key = $self->{opt}->{contentkey}; | |
979 | } | |
980 | ||
981 | ||
982 | # Combine duplicate attributes into arrayref if required | |
983 | ||
984 | if(exists($attr->{$key})) { | |
985 | if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { | |
986 | push(@{$attr->{$key}}, $val); | |
987 | } | |
988 | else { | |
989 | $attr->{$key} = [ $attr->{$key}, $val ]; | |
990 | } | |
991 | } | |
992 | elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { | |
993 | $attr->{$key} = [ $val ]; | |
994 | } | |
995 | else { | |
996 | if( $key ne $self->{opt}->{contentkey} | |
997 | and ( | |
998 | ($self->{opt}->{forcearray} == 1) | |
999 | or ( | |
1000 | (ref($self->{opt}->{forcearray}) eq 'HASH') | |
1001 | and ( | |
1002 | $self->{opt}->{forcearray}->{$key} | |
1003 | or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) | |
1004 | ) | |
1005 | ) | |
1006 | ) | |
1007 | ) { | |
1008 | $attr->{$key} = [ $val ]; | |
1009 | } | |
1010 | else { | |
1011 | $attr->{$key} = $val; | |
1012 | } | |
1013 | } | |
1014 | ||
1015 | } | |
1016 | ||
1017 | ||
1018 | # Turn arrayrefs into hashrefs if key fields present | |
1019 | ||
1020 | if($self->{opt}->{keyattr}) { | |
1021 | while(($key,$val) = each %$attr) { | |
1022 | if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { | |
1023 | $attr->{$key} = $self->array_to_hash($key, $val); | |
1024 | } | |
1025 | } | |
1026 | } | |
1027 | ||
1028 | ||
1029 | # disintermediate grouped tags | |
1030 | ||
1031 | if($self->{opt}->{grouptags}) { | |
1032 | while(my($key, $val) = each(%$attr)) { | |
1033 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | |
1034 | next unless(exists($self->{opt}->{grouptags}->{$key})); | |
1035 | ||
1036 | my($child_key, $child_val) = %$val; | |
1037 | ||
1038 | if($self->{opt}->{grouptags}->{$key} eq $child_key) { | |
1039 | $attr->{$key}= $child_val; | |
1040 | } | |
1041 | } | |
1042 | } | |
1043 | ||
1044 | ||
1045 | # Fold hashes containing a single anonymous array up into just the array | |
1046 | ||
1047 | my $count = scalar keys %$attr; | |
1048 | if($count == 1 | |
1049 | and exists $attr->{anon} | |
1050 | and UNIVERSAL::isa($attr->{anon}, 'ARRAY') | |
1051 | ) { | |
1052 | return($attr->{anon}); | |
1053 | } | |
1054 | ||
1055 | ||
1056 | # Do the right thing if hash is empty, otherwise just return it | |
1057 | ||
1058 | if(!%$attr and exists($self->{opt}->{suppressempty})) { | |
1059 | if(defined($self->{opt}->{suppressempty}) and | |
1060 | $self->{opt}->{suppressempty} eq '') { | |
1061 | return(''); | |
1062 | } | |
1063 | return(undef); | |
1064 | } | |
1065 | ||
1066 | ||
1067 | # Roll up named elements with named nested 'value' attributes | |
1068 | ||
1069 | if($self->{opt}->{valueattr}) { | |
1070 | while(my($key, $val) = each(%$attr)) { | |
1071 | next unless($self->{opt}->{valueattr}->{$key}); | |
1072 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | |
1073 | my($k) = keys %$val; | |
1074 | next unless($k eq $self->{opt}->{valueattr}->{$key}); | |
1075 | $attr->{$key} = $val->{$k}; | |
1076 | } | |
1077 | } | |
1078 | ||
1079 | return($attr) | |
1080 | ||
1081 | } | |
1082 | ||
1083 | ||
1084 | ############################################################################## | |
1085 | # Method: set_var() | |
1086 | # | |
1087 | # Called when a variable definition is encountered in the XML. (A variable | |
1088 | # definition looks like <element attrname="name">value</element> where attrname | |
1089 | # matches the varattr setting). | |
1090 | # | |
1091 | ||
1092 | sub set_var { | |
1093 | my($self, $name, $value) = @_; | |
1094 | ||
1095 | $self->{_var_values}->{$name} = $value; | |
1096 | } | |
1097 | ||
1098 | ||
1099 | ############################################################################## | |
1100 | # Method: get_var() | |
1101 | # | |
1102 | # Called during variable substitution to get the value for the named variable. | |
1103 | # | |
1104 | ||
1105 | sub get_var { | |
1106 | my($self, $name) = @_; | |
1107 | ||
1108 | my $value = $self->{_var_values}->{$name}; | |
1109 | return $value if(defined($value)); | |
1110 | ||
1111 | return '${' . $name . '}'; | |
1112 | } | |
1113 | ||
1114 | ||
1115 | ############################################################################## | |
1116 | # Method: normalise_space() | |
1117 | # | |
1118 | # Strips leading and trailing whitespace and collapses sequences of whitespace | |
1119 | # characters to a single space. | |
1120 | # | |
1121 | ||
1122 | sub normalise_space { | |
1123 | my($self, $text) = @_; | |
1124 | ||
1125 | $text =~ s/^\s+//s; | |
1126 | $text =~ s/\s+$//s; | |
1127 | $text =~ s/\s\s+/ /sg; | |
1128 | ||
1129 | return $text; | |
1130 | } | |
1131 | ||
1132 | ||
1133 | ############################################################################## | |
1134 | # Method: array_to_hash() | |
1135 | # | |
1136 | # Helper routine for collapse(). | |
1137 | # Attempts to 'fold' an array of hashes into an hash of hashes. Returns a | |
1138 | # reference to the hash on success or the original array if folding is | |
1139 | # not possible. Behaviour is controlled by 'keyattr' option. | |
1140 | # | |
1141 | ||
1142 | sub array_to_hash { | |
1143 | my $self = shift; | |
1144 | my $name = shift; | |
1145 | my $arrayref = shift; | |
1146 | ||
1147 | my $hashref = $self->new_hashref; | |
1148 | ||
1149 | my($i, $key, $val, $flag); | |
1150 | ||
1151 | ||
1152 | # Handle keyattr => { .... } | |
1153 | ||
1154 | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | |
1155 | return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); | |
1156 | ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; | |
1157 | for($i = 0; $i < @$arrayref; $i++) { | |
1158 | if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and | |
1159 | exists($arrayref->[$i]->{$key}) | |
1160 | ) { | |
1161 | $val = $arrayref->[$i]->{$key}; | |
1162 | if(ref($val)) { | |
1163 | if($StrictMode) { | |
1164 | croak "<$name> element has non-scalar '$key' key attribute"; | |
1165 | } | |
1166 | if($^W) { | |
1167 | carp "Warning: <$name> element has non-scalar '$key' key attribute"; | |
1168 | } | |
1169 | return($arrayref); | |
1170 | } | |
1171 | $val = $self->normalise_space($val) | |
1172 | if($self->{opt}->{normalisespace} == 1); | |
1173 | $hashref->{$val} = { %{$arrayref->[$i]} }; | |
1174 | $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); | |
1175 | delete $hashref->{$val}->{$key} unless($flag eq '+'); | |
1176 | } | |
1177 | else { | |
1178 | croak "<$name> element has no '$key' key attribute" if($StrictMode); | |
1179 | carp "Warning: <$name> element has no '$key' key attribute" if($^W); | |
1180 | return($arrayref); | |
1181 | } | |
1182 | } | |
1183 | } | |
1184 | ||
1185 | ||
1186 | # Or assume keyattr => [ .... ] | |
1187 | ||
1188 | else { | |
1189 | ELEMENT: for($i = 0; $i < @$arrayref; $i++) { | |
1190 | return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); | |
1191 | ||
1192 | foreach $key (@{$self->{opt}->{keyattr}}) { | |
1193 | if(defined($arrayref->[$i]->{$key})) { | |
1194 | $val = $arrayref->[$i]->{$key}; | |
1195 | return($arrayref) if(ref($val)); | |
1196 | $val = $self->normalise_space($val) | |
1197 | if($self->{opt}->{normalisespace} == 1); | |
1198 | $hashref->{$val} = { %{$arrayref->[$i]} }; | |
1199 | delete $hashref->{$val}->{$key}; | |
1200 | next ELEMENT; | |
1201 | } | |
1202 | } | |
1203 | ||
1204 | return($arrayref); # No keyfield matched | |
1205 | } | |
1206 | } | |
1207 | ||
1208 | # collapse any hashes which now only have a 'content' key | |
1209 | ||
1210 | if($self->{opt}->{collapseagain}) { | |
1211 | $hashref = $self->collapse_content($hashref); | |
1212 | } | |
1213 | ||
1214 | return($hashref); | |
1215 | } | |
1216 | ||
1217 | ||
1218 | ############################################################################## | |
1219 | # Method: new_hashref() | |
1220 | # | |
1221 | # This is a hook routine for overriding in a sub-class. Some people believe | |
1222 | # that using Tie::IxHash here will solve order-loss problems. | |
1223 | # | |
1224 | ||
1225 | sub new_hashref { | |
1226 | my $self = shift; | |
1227 | ||
1228 | return { @_ }; | |
1229 | } | |
1230 | ||
1231 | ||
1232 | ############################################################################## | |
1233 | # Method: collapse_content() | |
1234 | # | |
1235 | # Helper routine for array_to_hash | |
1236 | # | |
1237 | # Arguments expected are: | |
1238 | # - an XML::Simple object | |
1239 | # - a hasref | |
1240 | # the hashref is a former array, turned into a hash by array_to_hash because | |
1241 | # of the presence of key attributes | |
1242 | # at this point collapse_content avoids over-complicated structures like | |
1243 | # dir => { libexecdir => { content => '$exec_prefix/libexec' }, | |
1244 | # localstatedir => { content => '$prefix' }, | |
1245 | # } | |
1246 | # into | |
1247 | # dir => { libexecdir => '$exec_prefix/libexec', | |
1248 | # localstatedir => '$prefix', | |
1249 | # } | |
1250 | ||
1251 | sub collapse_content { | |
1252 | my $self = shift; | |
1253 | my $hashref = shift; | |
1254 | ||
1255 | my $contentkey = $self->{opt}->{contentkey}; | |
1256 | ||
1257 | # first go through the values,checking that they are fit to collapse | |
1258 | foreach my $val (values %$hashref) { | |
1259 | return $hashref unless ( (ref($val) eq 'HASH') | |
1260 | and (keys %$val == 1) | |
1261 | and (exists $val->{$contentkey}) | |
1262 | ); | |
1263 | } | |
1264 | ||
1265 | # now collapse them | |
1266 | foreach my $key (keys %$hashref) { | |
1267 | $hashref->{$key}= $hashref->{$key}->{$contentkey}; | |
1268 | } | |
1269 | ||
1270 | return $hashref; | |
1271 | } | |
1272 | ||
1273 | ||
1274 | ############################################################################## | |
1275 | # Method: value_to_xml() | |
1276 | # | |
1277 | # Helper routine for XMLout() - recurses through a data structure building up | |
1278 | # and returning an XML representation of that structure as a string. | |
1279 | # | |
1280 | # Arguments expected are: | |
1281 | # - the data structure to be encoded (usually a reference) | |
1282 | # - the XML tag name to use for this item | |
1283 | # - a string of spaces for use as the current indent level | |
1284 | # | |
1285 | ||
1286 | sub value_to_xml { | |
1287 | my $self = shift;; | |
1288 | ||
1289 | ||
1290 | # Grab the other arguments | |
1291 | ||
1292 | my($ref, $name, $indent) = @_; | |
1293 | ||
1294 | my $named = (defined($name) and $name ne '' ? 1 : 0); | |
1295 | ||
1296 | my $nl = "\n"; | |
1297 | ||
1298 | my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! | |
1299 | if($self->{opt}->{noindent}) { | |
1300 | $indent = ''; | |
1301 | $nl = ''; | |
1302 | } | |
1303 | ||
1304 | ||
1305 | # Convert to XML | |
1306 | ||
1307 | if(ref($ref)) { | |
1308 | croak "circular data structures not supported" | |
1309 | if(grep($_ == $ref, @{$self->{_ancestors}})); | |
1310 | push @{$self->{_ancestors}}, $ref; | |
1311 | } | |
1312 | else { | |
1313 | if($named) { | |
1314 | return(join('', | |
1315 | $indent, '<', $name, '>', | |
1316 | ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), | |
1317 | '</', $name, ">", $nl | |
1318 | )); | |
1319 | } | |
1320 | else { | |
1321 | return("$ref$nl"); | |
1322 | } | |
1323 | } | |
1324 | ||
1325 | ||
1326 | # Unfold hash to array if possible | |
1327 | ||
1328 | if(UNIVERSAL::isa($ref, 'HASH') # It is a hash | |
1329 | and keys %$ref # and it's not empty | |
1330 | and $self->{opt}->{keyattr} # and folding is enabled | |
1331 | and !$is_root # and its not the root element | |
1332 | ) { | |
1333 | $ref = $self->hash_to_array($name, $ref); | |
1334 | } | |
1335 | ||
1336 | ||
1337 | my @result = (); | |
1338 | my($key, $value); | |
1339 | ||
1340 | ||
1341 | # Handle hashrefs | |
1342 | ||
1343 | if(UNIVERSAL::isa($ref, 'HASH')) { | |
1344 | ||
1345 | # Reintermediate grouped values if applicable | |
1346 | ||
1347 | if($self->{opt}->{grouptags}) { | |
1348 | $ref = $self->copy_hash($ref); | |
1349 | while(my($key, $val) = each %$ref) { | |
1350 | if($self->{opt}->{grouptags}->{$key}) { | |
1351 | $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val }; | |
1352 | } | |
1353 | } | |
1354 | } | |
1355 | ||
1356 | ||
1357 | # Scan for namespace declaration attributes | |
1358 | ||
1359 | my $nsdecls = ''; | |
1360 | my $default_ns_uri; | |
1361 | if($self->{nsup}) { | |
1362 | $ref = $self->copy_hash($ref); | |
1363 | $self->{nsup}->push_context(); | |
1364 | ||
1365 | # Look for default namespace declaration first | |
1366 | ||
1367 | if(exists($ref->{xmlns})) { | |
1368 | $self->{nsup}->declare_prefix('', $ref->{xmlns}); | |
1369 | $nsdecls .= qq( xmlns="$ref->{xmlns}"); | |
1370 | delete($ref->{xmlns}); | |
1371 | } | |
1372 | $default_ns_uri = $self->{nsup}->get_uri(''); | |
1373 | ||
1374 | ||
1375 | # Then check all the other keys | |
1376 | ||
1377 | foreach my $qname (keys(%$ref)) { | |
1378 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | |
1379 | if($uri) { | |
1380 | if($uri eq $xmlns_ns) { | |
1381 | $self->{nsup}->declare_prefix($lname, $ref->{$qname}); | |
1382 | $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); | |
1383 | delete($ref->{$qname}); | |
1384 | } | |
1385 | } | |
1386 | } | |
1387 | ||
1388 | # Translate any remaining Clarkian names | |
1389 | ||
1390 | foreach my $qname (keys(%$ref)) { | |
1391 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | |
1392 | if($uri) { | |
1393 | if($default_ns_uri and $uri eq $default_ns_uri) { | |
1394 | $ref->{$lname} = $ref->{$qname}; | |
1395 | delete($ref->{$qname}); | |
1396 | } | |
1397 | else { | |
1398 | my $prefix = $self->{nsup}->get_prefix($uri); | |
1399 | unless($prefix) { | |
1400 | # $self->{nsup}->declare_prefix(undef, $uri); | |
1401 | # $prefix = $self->{nsup}->get_prefix($uri); | |
1402 | $prefix = $self->{ns_prefix}++; | |
1403 | $self->{nsup}->declare_prefix($prefix, $uri); | |
1404 | $nsdecls .= qq( xmlns:$prefix="$uri"); | |
1405 | } | |
1406 | $ref->{"$prefix:$lname"} = $ref->{$qname}; | |
1407 | delete($ref->{$qname}); | |
1408 | } | |
1409 | } | |
1410 | } | |
1411 | } | |
1412 | ||
1413 | ||
1414 | my @nested = (); | |
1415 | my $text_content = undef; | |
1416 | if($named) { | |
1417 | push @result, $indent, '<', $name, $nsdecls; | |
1418 | } | |
1419 | ||
1420 | if(keys %$ref) { | |
1421 | my $first_arg = 1; | |
1422 | foreach my $key ($self->sorted_keys($name, $ref)) { | |
1423 | my $value = $ref->{$key}; | |
1424 | next if(substr($key, 0, 1) eq '-'); | |
1425 | if(!defined($value)) { | |
1426 | next if $self->{opt}->{suppressempty}; | |
1427 | unless(exists($self->{opt}->{suppressempty}) | |
1428 | and !defined($self->{opt}->{suppressempty}) | |
1429 | ) { | |
1430 | carp 'Use of uninitialized value' if($^W); | |
1431 | } | |
1432 | if($key eq $self->{opt}->{contentkey}) { | |
1433 | $text_content = ''; | |
1434 | } | |
1435 | else { | |
1436 | $value = exists($self->{opt}->{suppressempty}) ? {} : ''; | |
1437 | } | |
1438 | } | |
1439 | ||
1440 | if(!ref($value) | |
1441 | and $self->{opt}->{valueattr} | |
1442 | and $self->{opt}->{valueattr}->{$key} | |
1443 | ) { | |
1444 | $value = { $self->{opt}->{valueattr}->{$key} => $value }; | |
1445 | } | |
1446 | ||
1447 | if(ref($value) or $self->{opt}->{noattr}) { | |
1448 | push @nested, | |
1449 | $self->value_to_xml($value, $key, "$indent "); | |
1450 | } | |
1451 | else { | |
1452 | $value = $self->escape_value($value) unless($self->{opt}->{noescape}); | |
1453 | if($key eq $self->{opt}->{contentkey}) { | |
1454 | $text_content = $value; | |
1455 | } | |
1456 | else { | |
1457 | push @result, "\n$indent " . ' ' x length($name) | |
1458 | if($self->{opt}->{attrindent} and !$first_arg); | |
1459 | push @result, ' ', $key, '="', $value , '"'; | |
1460 | $first_arg = 0; | |
1461 | } | |
1462 | } | |
1463 | } | |
1464 | } | |
1465 | else { | |
1466 | $text_content = ''; | |
1467 | } | |
1468 | ||
1469 | if(@nested or defined($text_content)) { | |
1470 | if($named) { | |
1471 | push @result, ">"; | |
1472 | if(defined($text_content)) { | |
1473 | push @result, $text_content; | |
1474 | $nested[0] =~ s/^\s+// if(@nested); | |
1475 | } | |
1476 | else { | |
1477 | push @result, $nl; | |
1478 | } | |
1479 | if(@nested) { | |
1480 | push @result, @nested, $indent; | |
1481 | } | |
1482 | push @result, '</', $name, ">", $nl; | |
1483 | } | |
1484 | else { | |
1485 | push @result, @nested; # Special case if no root elements | |
1486 | } | |
1487 | } | |
1488 | else { | |
1489 | push @result, " />", $nl; | |
1490 | } | |
1491 | $self->{nsup}->pop_context() if($self->{nsup}); | |
1492 | } | |
1493 | ||
1494 | ||
1495 | # Handle arrayrefs | |
1496 | ||
1497 | elsif(UNIVERSAL::isa($ref, 'ARRAY')) { | |
1498 | foreach $value (@$ref) { | |
1499 | next if !defined($value) and $self->{opt}->{suppressempty}; | |
1500 | if(!ref($value)) { | |
1501 | push @result, | |
1502 | $indent, '<', $name, '>', | |
1503 | ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), | |
1504 | '</', $name, ">$nl"; | |
1505 | } | |
1506 | elsif(UNIVERSAL::isa($value, 'HASH')) { | |
1507 | push @result, $self->value_to_xml($value, $name, $indent); | |
1508 | } | |
1509 | else { | |
1510 | push @result, | |
1511 | $indent, '<', $name, ">$nl", | |
1512 | $self->value_to_xml($value, 'anon', "$indent "), | |
1513 | $indent, '</', $name, ">$nl"; | |
1514 | } | |
1515 | } | |
1516 | } | |
1517 | ||
1518 | else { | |
1519 | croak "Can't encode a value of type: " . ref($ref); | |
1520 | } | |
1521 | ||
1522 | ||
1523 | pop @{$self->{_ancestors}} if(ref($ref)); | |
1524 | ||
1525 | return(join('', @result)); | |
1526 | } | |
1527 | ||
1528 | ||
1529 | ############################################################################## | |
1530 | # Method: sorted_keys() | |
1531 | # | |
1532 | # Returns the keys of the referenced hash sorted into alphabetical order, but | |
1533 | # with the 'key' key (as in KeyAttr) first, if there is one. | |
1534 | # | |
1535 | ||
1536 | sub sorted_keys { | |
1537 | my($self, $name, $ref) = @_; | |
1538 | ||
1539 | return keys %$ref if $self->{opt}->{nosort}; | |
1540 | ||
1541 | my %hash = %$ref; | |
1542 | my $keyattr = $self->{opt}->{keyattr}; | |
1543 | ||
1544 | my @key; | |
1545 | ||
1546 | if(ref $keyattr eq 'HASH') { | |
1547 | if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { | |
1548 | push @key, $keyattr->{$name}->[0]; | |
1549 | delete $hash{$keyattr->{$name}->[0]}; | |
1550 | } | |
1551 | } | |
1552 | elsif(ref $keyattr eq 'ARRAY') { | |
1553 | foreach (@{$keyattr}) { | |
1554 | if(exists $hash{$_}) { | |
1555 | push @key, $_; | |
1556 | delete $hash{$_}; | |
1557 | last; | |
1558 | } | |
1559 | } | |
1560 | } | |
1561 | ||
1562 | return(@key, sort keys %hash); | |
1563 | } | |
1564 | ||
1565 | ############################################################################## | |
1566 | # Method: escape_value() | |
1567 | # | |
1568 | # Helper routine for automatically escaping values for XMLout(). | |
1569 | # Expects a scalar data value. Returns escaped version. | |
1570 | # | |
1571 | ||
1572 | sub escape_value { | |
1573 | my($self, $data) = @_; | |
1574 | ||
1575 | return '' unless(defined($data)); | |
1576 | ||
1577 | $data =~ s/&/&/sg; | |
1578 | $data =~ s/</</sg; | |
1579 | $data =~ s/>/>/sg; | |
1580 | $data =~ s/"/"/sg; | |
1581 | ||
1582 | my $level = $self->{opt}->{numericescape} or return $data; | |
1583 | ||
1584 | return $self->numeric_escape($data, $level); | |
1585 | } | |
1586 | ||
1587 | sub numeric_escape { | |
1588 | my($self, $data, $level) = @_; | |
1589 | ||
1590 | use utf8; # required for 5.6 | |
1591 | ||
1592 | if($self->{opt}->{numericescape} eq '2') { | |
1593 | $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; | |
1594 | } | |
1595 | else { | |
1596 | $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; | |
1597 | } | |
1598 | ||
1599 | return $data; | |
1600 | } | |
1601 | ||
1602 | ||
1603 | ############################################################################## | |
1604 | # Method: hash_to_array() | |
1605 | # | |
1606 | # Helper routine for value_to_xml(). | |
1607 | # Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a | |
1608 | # reference to the array on success or the original hash if unfolding is | |
1609 | # not possible. | |
1610 | # | |
1611 | ||
1612 | sub hash_to_array { | |
1613 | my $self = shift; | |
1614 | my $parent = shift; | |
1615 | my $hashref = shift; | |
1616 | ||
1617 | my $arrayref = []; | |
1618 | ||
1619 | my($key, $value); | |
1620 | ||
1621 | my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; | |
1622 | foreach $key (@keys) { | |
1623 | $value = $hashref->{$key}; | |
1624 | return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); | |
1625 | ||
1626 | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | |
1627 | return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); | |
1628 | push @$arrayref, $self->copy_hash( | |
1629 | $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key | |
1630 | ); | |
1631 | } | |
1632 | else { | |
1633 | push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); | |
1634 | } | |
1635 | } | |
1636 | ||
1637 | return($arrayref); | |
1638 | } | |
1639 | ||
1640 | ||
1641 | ############################################################################## | |
1642 | # Method: copy_hash() | |
1643 | # | |
1644 | # Helper routine for hash_to_array(). When unfolding a hash of hashes into | |
1645 | # an array of hashes, we need to copy the key from the outer hash into the | |
1646 | # inner hash. This routine makes a copy of the original hash so we don't | |
1647 | # destroy the original data structure. You might wish to override this | |
1648 | # method if you're using tied hashes and don't want them to get untied. | |
1649 | # | |
1650 | ||
1651 | sub copy_hash { | |
1652 | my($self, $orig, @extra) = @_; | |
1653 | ||
1654 | return { @extra, %$orig }; | |
1655 | } | |
1656 | ||
1657 | ############################################################################## | |
1658 | # Methods required for building trees from SAX events | |
1659 | ############################################################################## | |
1660 | ||
1661 | sub start_document { | |
1662 | my $self = shift; | |
1663 | ||
1664 | $self->handle_options('in') unless($self->{opt}); | |
1665 | ||
1666 | $self->{lists} = []; | |
1667 | $self->{curlist} = $self->{tree} = []; | |
1668 | } | |
1669 | ||
1670 | ||
1671 | sub start_element { | |
1672 | my $self = shift; | |
1673 | my $element = shift; | |
1674 | ||
1675 | my $name = $element->{Name}; | |
1676 | if($self->{opt}->{nsexpand}) { | |
1677 | $name = $element->{LocalName} || ''; | |
1678 | if($element->{NamespaceURI}) { | |
1679 | $name = '{' . $element->{NamespaceURI} . '}' . $name; | |
1680 | } | |
1681 | } | |
1682 | my $attributes = {}; | |
1683 | if($element->{Attributes}) { # Might be undef | |
1684 | foreach my $attr (values %{$element->{Attributes}}) { | |
1685 | if($self->{opt}->{nsexpand}) { | |
1686 | my $name = $attr->{LocalName} || ''; | |
1687 | if($attr->{NamespaceURI}) { | |
1688 | $name = '{' . $attr->{NamespaceURI} . '}' . $name | |
1689 | } | |
1690 | $name = 'xmlns' if($name eq $bad_def_ns_jcn); | |
1691 | $attributes->{$name} = $attr->{Value}; | |
1692 | } | |
1693 | else { | |
1694 | $attributes->{$attr->{Name}} = $attr->{Value}; | |
1695 | } | |
1696 | } | |
1697 | } | |
1698 | my $newlist = [ $attributes ]; | |
1699 | push @{ $self->{lists} }, $self->{curlist}; | |
1700 | push @{ $self->{curlist} }, $name => $newlist; | |
1701 | $self->{curlist} = $newlist; | |
1702 | } | |
1703 | ||
1704 | ||
1705 | sub characters { | |
1706 | my $self = shift; | |
1707 | my $chars = shift; | |
1708 | ||
1709 | my $text = $chars->{Data}; | |
1710 | my $clist = $self->{curlist}; | |
1711 | my $pos = $#$clist; | |
1712 | ||
1713 | if ($pos > 0 and $clist->[$pos - 1] eq '0') { | |
1714 | $clist->[$pos] .= $text; | |
1715 | } | |
1716 | else { | |
1717 | push @$clist, 0 => $text; | |
1718 | } | |
1719 | } | |
1720 | ||
1721 | ||
1722 | sub end_element { | |
1723 | my $self = shift; | |
1724 | ||
1725 | $self->{curlist} = pop @{ $self->{lists} }; | |
1726 | } | |
1727 | ||
1728 | ||
1729 | sub end_document { | |
1730 | my $self = shift; | |
1731 | ||
1732 | delete($self->{curlist}); | |
1733 | delete($self->{lists}); | |
1734 | ||
1735 | my $tree = $self->{tree}; | |
1736 | delete($self->{tree}); | |
1737 | ||
1738 | ||
1739 | # Return tree as-is to XMLin() | |
1740 | ||
1741 | return($tree) if($self->{nocollapse}); | |
1742 | ||
1743 | ||
1744 | # Or collapse it before returning it to SAX parser class | |
1745 | ||
1746 | if($self->{opt}->{keeproot}) { | |
1747 | $tree = $self->collapse({}, @$tree); | |
1748 | } | |
1749 | else { | |
1750 | $tree = $self->collapse(@{$tree->[1]}); | |
1751 | } | |
1752 | ||
1753 | if($self->{opt}->{datahandler}) { | |
1754 | return($self->{opt}->{datahandler}->($self, $tree)); | |
1755 | } | |
1756 | ||
1757 | return($tree); | |
1758 | } | |
1759 | ||
1760 | *xml_in = \&XMLin; | |
1761 | *xml_out = \&XMLout; | |
1762 | ||
1763 | 1; | |
1764 | ||
1765 | __END__ | |
1766 | ||
1767 | =head1 QUICK START | |
1768 | ||
1769 | Say you have a script called B<foo> and a file of configuration options | |
1770 | called B<foo.xml> containing this: | |
1771 | ||
1772 | <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> | |
1773 | <server name="sahara" osname="solaris" osversion="2.6"> | |
1774 | <address>10.0.0.101</address> | |
1775 | <address>10.0.1.101</address> | |
1776 | </server> | |
1777 | <server name="gobi" osname="irix" osversion="6.5"> | |
1778 | <address>10.0.0.102</address> | |
1779 | </server> | |
1780 | <server name="kalahari" osname="linux" osversion="2.0.34"> | |
1781 | <address>10.0.0.103</address> | |
1782 | <address>10.0.1.103</address> | |
1783 | </server> | |
1784 | </config> | |
1785 | ||
1786 | The following lines of code in B<foo>: | |
1787 | ||
1788 | use XML::Simple; | |
1789 | ||
1790 | my $config = XMLin(); | |
1791 | ||
1792 | will 'slurp' the configuration options into the hashref $config (because no | |
1793 | arguments are passed to C<XMLin()> the name and location of the XML file will | |
1794 | be inferred from name and location of the script). You can dump out the | |
1795 | contents of the hashref using Data::Dumper: | |
1796 | ||
1797 | use Data::Dumper; | |
1798 | ||
1799 | print Dumper($config); | |
1800 | ||
1801 | which will produce something like this (formatting has been adjusted for | |
1802 | brevity): | |
1803 | ||
1804 | { | |
1805 | 'logdir' => '/var/log/foo/', | |
1806 | 'debugfile' => '/tmp/foo.debug', | |
1807 | 'server' => { | |
1808 | 'sahara' => { | |
1809 | 'osversion' => '2.6', | |
1810 | 'osname' => 'solaris', | |
1811 | 'address' => [ '10.0.0.101', '10.0.1.101' ] | |
1812 | }, | |
1813 | 'gobi' => { | |
1814 | 'osversion' => '6.5', | |
1815 | 'osname' => 'irix', | |
1816 | 'address' => '10.0.0.102' | |
1817 | }, | |
1818 | 'kalahari' => { | |
1819 | 'osversion' => '2.0.34', | |
1820 | 'osname' => 'linux', | |
1821 | 'address' => [ '10.0.0.103', '10.0.1.103' ] | |
1822 | } | |
1823 | } | |
1824 | } | |
1825 | ||
1826 | Your script could then access the name of the log directory like this: | |
1827 | ||
1828 | print $config->{logdir}; | |
1829 | ||
1830 | similarly, the second address on the server 'kalahari' could be referenced as: | |
1831 | ||
1832 | print $config->{server}->{kalahari}->{address}->[1]; | |
1833 | ||
1834 | What could be simpler? (Rhetorical). | |
1835 | ||
1836 | For simple requirements, that's really all there is to it. If you want to | |
1837 | store your XML in a different directory or file, or pass it in as a string or | |
1838 | even pass it in via some derivative of an IO::Handle, you'll need to check out | |
1839 | L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that | |
1840 | neat little transformation that produced $config->{server}) you'll find options | |
1841 | for that as well. | |
1842 | ||
1843 | If you want to generate XML (for example to write a modified version of | |
1844 | $config back out as XML), check out C<XMLout()>. | |
1845 | ||
1846 | If your needs are not so simple, this may not be the module for you. In that | |
1847 | case, you might want to read L<"WHERE TO FROM HERE?">. | |
1848 | ||
1849 | =head1 DESCRIPTION | |
1850 | ||
1851 | The XML::Simple module provides a simple API layer on top of an underlying XML | |
1852 | parsing module (either XML::Parser or one of the SAX2 parser modules). Two | |
1853 | functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity | |
1854 | request the lower case versions of the function names: C<xml_in()> and | |
1855 | C<xml_out()>. | |
1856 | ||
1857 | The simplest approach is to call these two functions directly, but an | |
1858 | optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) | |
1859 | allows them to be called as methods of an B<XML::Simple> object. The object | |
1860 | interface can also be used at either end of a SAX pipeline. | |
1861 | ||
1862 | =head2 XMLin() | |
1863 | ||
1864 | Parses XML formatted data and returns a reference to a data structure which | |
1865 | contains the same information in a more readily accessible form. (Skip | |
1866 | down to L<"EXAMPLES"> below, for more sample code). | |
1867 | ||
1868 | C<XMLin()> accepts an optional XML specifier followed by zero or more 'name => | |
1869 | value' option pairs. The XML specifier can be one of the following: | |
1870 | ||
1871 | =over 4 | |
1872 | ||
1873 | =item A filename | |
1874 | ||
1875 | If the filename contains no directory components C<XMLin()> will look for the | |
1876 | file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the | |
1877 | current directory if the SearchPath option is not defined. eg: | |
1878 | ||
1879 | $ref = XMLin('/etc/params.xml'); | |
1880 | ||
1881 | Note, the filename '-' can be used to parse from STDIN. | |
1882 | ||
1883 | =item undef | |
1884 | ||
1885 | If there is no XML specifier, C<XMLin()> will check the script directory and | |
1886 | each of the SearchPath directories for a file with the same name as the script | |
1887 | but with the extension '.xml'. Note: if you wish to specify options, you | |
1888 | must specify the value 'undef'. eg: | |
1889 | ||
1890 | $ref = XMLin(undef, ForceArray => 1); | |
1891 | ||
1892 | =item A string of XML | |
1893 | ||
1894 | A string containing XML (recognised by the presence of '<' and '>' characters) | |
1895 | will be parsed directly. eg: | |
1896 | ||
1897 | $ref = XMLin('<opt username="bob" password="flurp" />'); | |
1898 | ||
1899 | =item An IO::Handle object | |
1900 | ||
1901 | An IO::Handle object will be read to EOF and its contents parsed. eg: | |
1902 | ||
1903 | $fh = IO::File->new('/etc/params.xml'); | |
1904 | $ref = XMLin($fh); | |
1905 | ||
1906 | =back | |
1907 | ||
1908 | =head2 XMLout() | |
1909 | ||
1910 | Takes a data structure (generally a hashref) and returns an XML encoding of | |
1911 | that structure. If the resulting XML is parsed using C<XMLin()>, it should | |
1912 | return a data structure equivalent to the original (see caveats below). | |
1913 | ||
1914 | The C<XMLout()> function can also be used to output the XML as SAX events | |
1915 | see the C<Handler> option and L<"SAX SUPPORT"> for more details). | |
1916 | ||
1917 | When translating hashes to XML, hash keys which have a leading '-' will be | |
1918 | silently skipped. This is the approved method for marking elements of a | |
1919 | data structure which should be ignored by C<XMLout>. (Note: If these items | |
1920 | were not skipped the key names would be emitted as element or attribute names | |
1921 | with a leading '-' which would not be valid XML). | |
1922 | ||
1923 | =head2 Caveats | |
1924 | ||
1925 | Some care is required in creating data structures which will be passed to | |
1926 | C<XMLout()>. Hash keys from the data structure will be encoded as either XML | |
1927 | element names or attribute names. Therefore, you should use hash key names | |
1928 | which conform to the relatively strict XML naming rules: | |
1929 | ||
1930 | Names in XML must begin with a letter. The remaining characters may be | |
1931 | letters, digits, hyphens (-), underscores (_) or full stops (.). It is also | |
1932 | allowable to include one colon (:) in an element name but this should only be | |
1933 | used when working with namespaces (B<XML::Simple> can only usefully work with | |
1934 | namespaces when teamed with a SAX Parser). | |
1935 | ||
1936 | You can use other punctuation characters in hash values (just not in hash | |
1937 | keys) however B<XML::Simple> does not support dumping binary data. | |
1938 | ||
1939 | If you break these rules, the current implementation of C<XMLout()> will | |
1940 | simply emit non-compliant XML which will be rejected if you try to read it | |
1941 | back in. (A later version of B<XML::Simple> might take a more proactive | |
1942 | approach). | |
1943 | ||
1944 | Note also that although you can nest hashes and arrays to arbitrary levels, | |
1945 | circular data structures are not supported and will cause C<XMLout()> to die. | |
1946 | ||
1947 | If you wish to 'round-trip' arbitrary data structures from Perl to XML and back | |
1948 | to Perl, then you should probably disable array folding (using the KeyAttr | |
1949 | option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the | |
1950 | expected results, you may prefer to use L<XML::Dumper> which is designed for | |
1951 | exactly that purpose. | |
1952 | ||
1953 | Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs. | |
1954 | ||
1955 | ||
1956 | =head1 OPTIONS | |
1957 | ||
1958 | B<XML::Simple> supports a number of options (in fact as each release of | |
1959 | B<XML::Simple> adds more options, the module's claim to the name 'Simple' | |
1960 | becomes increasingly tenuous). If you find yourself repeatedly having to | |
1961 | specify the same options, you might like to investigate L<"OPTIONAL OO | |
1962 | INTERFACE"> below. | |
1963 | ||
1964 | If you can't be bothered reading the documentation, refer to | |
1965 | L<"STRICT MODE"> to automatically catch common mistakes. | |
1966 | ||
1967 | Because there are so many options, it's hard for new users to know which ones | |
1968 | are important, so here are the two you really need to know about: | |
1969 | ||
1970 | =over 4 | |
1971 | ||
1972 | =item * | |
1973 | ||
1974 | check out C<ForceArray> because you'll almost certainly want to turn it on | |
1975 | ||
1976 | =item * | |
1977 | ||
1978 | make sure you know what the C<KeyAttr> option does and what its default value is | |
1979 | because it may surprise you otherwise (note in particular that 'KeyAttr' | |
1980 | affects both C<XMLin> and C<XMLout>) | |
1981 | ||
1982 | =back | |
1983 | ||
1984 | The option name headings below have a trailing 'comment' - a hash followed by | |
1985 | two pieces of metadata: | |
1986 | ||
1987 | =over 4 | |
1988 | ||
1989 | =item * | |
1990 | ||
1991 | Options are marked with 'I<in>' if they are recognised by C<XMLin()> and | |
1992 | 'I<out>' if they are recognised by C<XMLout()>. | |
1993 | ||
1994 | =item * | |
1995 | ||
1996 | Each option is also flagged to indicate whether it is: | |
1997 | ||
1998 | 'important' - don't use the module until you understand this one | |
1999 | 'handy' - you can skip this on the first time through | |
2000 | 'advanced' - you can skip this on the second time through | |
2001 | 'SAX only' - don't worry about this unless you're using SAX (or | |
2002 | alternatively if you need this, you also need SAX) | |
2003 | 'seldom used' - you'll probably never use this unless you were the | |
2004 | person that requested the feature | |
2005 | ||
2006 | =back | |
2007 | ||
2008 | The options are listed alphabetically: | |
2009 | ||
2010 | Note: option names are no longer case sensitive so you can use the mixed case | |
2011 | versions shown here; all lower case as required by versions 2.03 and earlier; | |
2012 | or you can add underscores between the words (eg: key_attr). | |
2013 | ||
2014 | ||
2015 | =head2 AttrIndent => 1 I<# out - handy> | |
2016 | ||
2017 | When you are using C<XMLout()>, enable this option to have attributes printed | |
2018 | one-per-line with sensible indentation rather than all on one line. | |
2019 | ||
2020 | =head2 Cache => [ cache schemes ] I<# in - advanced> | |
2021 | ||
2022 | Because loading the B<XML::Parser> module and parsing an XML file can consume a | |
2023 | significant number of CPU cycles, it is often desirable to cache the output of | |
2024 | C<XMLin()> for later reuse. | |
2025 | ||
2026 | When parsing from a named file, B<XML::Simple> supports a number of caching | |
2027 | schemes. The 'Cache' option may be used to specify one or more schemes (using | |
2028 | an anonymous array). Each scheme will be tried in turn in the hope of finding | |
2029 | a cached pre-parsed representation of the XML file. If no cached copy is | |
2030 | found, the file will be parsed and the first cache scheme in the list will be | |
2031 | used to save a copy of the results. The following cache schemes have been | |
2032 | implemented: | |
2033 | ||
2034 | =over 4 | |
2035 | ||
2036 | =item storable | |
2037 | ||
2038 | Utilises B<Storable.pm> to read/write a cache file with the same name as the | |
2039 | XML file but with the extension .stor | |
2040 | ||
2041 | =item memshare | |
2042 | ||
2043 | When a file is first parsed, a copy of the resulting data structure is retained | |
2044 | in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse | |
2045 | the same file will return a reference to this structure. This cached version | |
2046 | will persist only for the life of the Perl interpreter (which in the case of | |
2047 | mod_perl for example, may be some significant time). | |
2048 | ||
2049 | Because each caller receives a reference to the same data structure, a change | |
2050 | made by one caller will be visible to all. For this reason, the reference | |
2051 | returned should be treated as read-only. | |
2052 | ||
2053 | =item memcopy | |
2054 | ||
2055 | This scheme works identically to 'memshare' (above) except that each caller | |
2056 | receives a reference to a new data structure which is a copy of the cached | |
2057 | version. Copying the data structure will add a little processing overhead, | |
2058 | therefore this scheme should only be used where the caller intends to modify | |
2059 | the data structure (or wishes to protect itself from others who might). This | |
2060 | scheme uses B<Storable.pm> to perform the copy. | |
2061 | ||
2062 | =back | |
2063 | ||
2064 | Warning! The memory-based caching schemes compare the timestamp on the file to | |
2065 | the time when it was last parsed. If the file is stored on an NFS filesystem | |
2066 | (or other network share) and the clock on the file server is not exactly | |
2067 | synchronised with the clock where your script is run, updates to the source XML | |
2068 | file may appear to be ignored. | |
2069 | ||
2070 | =head2 ContentKey => 'keyname' I<# in+out - seldom used> | |
2071 | ||
2072 | When text content is parsed to a hash value, this option let's you specify a | |
2073 | name for the hash key to override the default 'content'. So for example: | |
2074 | ||
2075 | XMLin('<opt one="1">Text</opt>', ContentKey => 'text') | |
2076 | ||
2077 | will parse to: | |
2078 | ||
2079 | { 'one' => 1, 'text' => 'Text' } | |
2080 | ||
2081 | instead of: | |
2082 | ||
2083 | { 'one' => 1, 'content' => 'Text' } | |
2084 | ||
2085 | C<XMLout()> will also honour the value of this option when converting a hashref | |
2086 | to XML. | |
2087 | ||
2088 | You can also prefix your selected key name with a '-' character to have | |
2089 | C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after | |
2090 | array folding. For example: | |
2091 | ||
2092 | XMLin( | |
2093 | '<opt><item name="one">First</item><item name="two">Second</item></opt>', | |
2094 | KeyAttr => {item => 'name'}, | |
2095 | ForceArray => [ 'item' ], | |
2096 | ContentKey => '-content' | |
2097 | ) | |
2098 | ||
2099 | will parse to: | |
2100 | ||
2101 | { | |
2102 | 'item' => { | |
2103 | 'one' => 'First' | |
2104 | 'two' => 'Second' | |
2105 | } | |
2106 | } | |
2107 | ||
2108 | rather than this (without the '-'): | |
2109 | ||
2110 | { | |
2111 | 'item' => { | |
2112 | 'one' => { 'content' => 'First' } | |
2113 | 'two' => { 'content' => 'Second' } | |
2114 | } | |
2115 | } | |
2116 | ||
2117 | =head2 DataHandler => code_ref I<# in - SAX only> | |
2118 | ||
2119 | When you use an B<XML::Simple> object as a SAX handler, it will return a | |
2120 | 'simple tree' data structure in the same format as C<XMLin()> would return. If | |
2121 | this option is set (to a subroutine reference), then when the tree is built the | |
2122 | subroutine will be called and passed two arguments: a reference to the | |
2123 | B<XML::Simple> object and a reference to the data tree. The return value from | |
2124 | the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for | |
2125 | more details). | |
2126 | ||
2127 | =head2 ForceArray => 1 I<# in - important> | |
2128 | ||
2129 | This option should be set to '1' to force nested elements to be represented | |
2130 | as arrays even when there is only one. Eg, with ForceArray enabled, this | |
2131 | XML: | |
2132 | ||
2133 | <opt> | |
2134 | <name>value</name> | |
2135 | </opt> | |
2136 | ||
2137 | would parse to this: | |
2138 | ||
2139 | { | |
2140 | 'name' => [ | |
2141 | 'value' | |
2142 | ] | |
2143 | } | |
2144 | ||
2145 | instead of this (the default): | |
2146 | ||
2147 | { | |
2148 | 'name' => 'value' | |
2149 | } | |
2150 | ||
2151 | This option is especially useful if the data structure is likely to be written | |
2152 | back out as XML and the default behaviour of rolling single nested elements up | |
2153 | into attributes is not desirable. | |
2154 | ||
2155 | If you are using the array folding feature, you should almost certainly enable | |
2156 | this option. If you do not, single nested elements will not be parsed to | |
2157 | arrays and therefore will not be candidates for folding to a hash. (Given that | |
2158 | the default value of 'KeyAttr' enables array folding, the default value of this | |
2159 | option should probably also have been enabled too - sorry). | |
2160 | ||
2161 | =head2 ForceArray => [ names ] I<# in - important> | |
2162 | ||
2163 | This alternative (and preferred) form of the 'ForceArray' option allows you to | |
2164 | specify a list of element names which should always be forced into an array | |
2165 | representation, rather than the 'all or nothing' approach above. | |
2166 | ||
2167 | It is also possible (since version 2.05) to include compiled regular | |
2168 | expressions in the list - any element names which match the pattern will be | |
2169 | forced to arrays. If the list contains only a single regex, then it is not | |
2170 | necessary to enclose it in an arrayref. Eg: | |
2171 | ||
2172 | ForceArray => qr/_list$/ | |
2173 | ||
2174 | =head2 ForceContent => 1 I<# in - seldom used> | |
2175 | ||
2176 | When C<XMLin()> parses elements which have text content as well as attributes, | |
2177 | the text content must be represented as a hash value rather than a simple | |
2178 | scalar. This option allows you to force text content to always parse to | |
2179 | a hash value even when there are no attributes. So for example: | |
2180 | ||
2181 | XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) | |
2182 | ||
2183 | will parse to: | |
2184 | ||
2185 | { | |
2186 | 'x' => { 'content' => 'text1' }, | |
2187 | 'y' => { 'a' => 2, 'content' => 'text2' } | |
2188 | } | |
2189 | ||
2190 | instead of: | |
2191 | ||
2192 | { | |
2193 | 'x' => 'text1', | |
2194 | 'y' => { 'a' => 2, 'content' => 'text2' } | |
2195 | } | |
2196 | ||
2197 | =head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> | |
2198 | ||
2199 | You can use this option to eliminate extra levels of indirection in your Perl | |
2200 | data structure. For example this XML: | |
2201 | ||
2202 | <opt> | |
2203 | <searchpath> | |
2204 | <dir>/usr/bin</dir> | |
2205 | <dir>/usr/local/bin</dir> | |
2206 | <dir>/usr/X11/bin</dir> | |
2207 | </searchpath> | |
2208 | </opt> | |
2209 | ||
2210 | Would normally be read into a structure like this: | |
2211 | ||
2212 | { | |
2213 | searchpath => { | |
2214 | dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
2215 | } | |
2216 | } | |
2217 | ||
2218 | But when read in with the appropriate value for 'GroupTags': | |
2219 | ||
2220 | my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); | |
2221 | ||
2222 | It will return this simpler structure: | |
2223 | ||
2224 | { | |
2225 | searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
2226 | } | |
2227 | ||
2228 | The grouping element (C<< <searchpath> >> in the example) must not contain any | |
2229 | attributes or elements other than the grouped element. | |
2230 | ||
2231 | You can specify multiple 'grouping element' to 'grouped element' mappings in | |
2232 | the same hashref. If this option is combined with C<KeyAttr>, the array | |
2233 | folding will occur first and then the grouped element names will be eliminated. | |
2234 | ||
2235 | C<XMLout> will also use the grouptag mappings to re-introduce the tags around | |
2236 | the grouped elements. Beware though that this will occur in all places that | |
2237 | the 'grouping tag' name occurs - you probably don't want to use the same name | |
2238 | for elements as well as attributes. | |
2239 | ||
2240 | =head2 Handler => object_ref I<# out - SAX only> | |
2241 | ||
2242 | Use the 'Handler' option to have C<XMLout()> generate SAX events rather than | |
2243 | returning a string of XML. For more details see L<"SAX SUPPORT"> below. | |
2244 | ||
2245 | Note: the current implementation of this option generates a string of XML | |
2246 | and uses a SAX parser to translate it into SAX events. The normal encoding | |
2247 | rules apply here - your data must be UTF8 encoded unless you specify an | |
2248 | alternative encoding via the 'XMLDecl' option; and by the time the data reaches | |
2249 | the handler object, it will be in UTF8 form regardless of the encoding you | |
2250 | supply. A future implementation of this option may generate the events | |
2251 | directly. | |
2252 | ||
2253 | =head2 KeepRoot => 1 I<# in+out - handy> | |
2254 | ||
2255 | In its attempt to return a data structure free of superfluous detail and | |
2256 | unnecessary levels of indirection, C<XMLin()> normally discards the root | |
2257 | element name. Setting the 'KeepRoot' option to '1' will cause the root element | |
2258 | name to be retained. So after executing this code: | |
2259 | ||
2260 | $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) | |
2261 | ||
2262 | You'll be able to reference the tempdir as | |
2263 | C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default | |
2264 | C<$config-E<gt>{tempdir}>. | |
2265 | ||
2266 | Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the | |
2267 | data structure already contains a root element name and it is not necessary to | |
2268 | add another. | |
2269 | ||
2270 | =head2 KeyAttr => [ list ] I<# in+out - important> | |
2271 | ||
2272 | This option controls the 'array folding' feature which translates nested | |
2273 | elements from an array to a hash. It also controls the 'unfolding' of hashes | |
2274 | to arrays. | |
2275 | ||
2276 | For example, this XML: | |
2277 | ||
2278 | <opt> | |
2279 | <user login="grep" fullname="Gary R Epstein" /> | |
2280 | <user login="stty" fullname="Simon T Tyson" /> | |
2281 | </opt> | |
2282 | ||
2283 | would, by default, parse to this: | |
2284 | ||
2285 | { | |
2286 | 'user' => [ | |
2287 | { | |
2288 | 'login' => 'grep', | |
2289 | 'fullname' => 'Gary R Epstein' | |
2290 | }, | |
2291 | { | |
2292 | 'login' => 'stty', | |
2293 | 'fullname' => 'Simon T Tyson' | |
2294 | } | |
2295 | ] | |
2296 | } | |
2297 | ||
2298 | If the option 'KeyAttr => "login"' were used to specify that the 'login' | |
2299 | attribute is a key, the same XML would parse to: | |
2300 | ||
2301 | { | |
2302 | 'user' => { | |
2303 | 'stty' => { | |
2304 | 'fullname' => 'Simon T Tyson' | |
2305 | }, | |
2306 | 'grep' => { | |
2307 | 'fullname' => 'Gary R Epstein' | |
2308 | } | |
2309 | } | |
2310 | } | |
2311 | ||
2312 | The key attribute names should be supplied in an arrayref if there is more | |
2313 | than one. C<XMLin()> will attempt to match attribute names in the order | |
2314 | supplied. C<XMLout()> will use the first attribute name supplied when | |
2315 | 'unfolding' a hash into an array. | |
2316 | ||
2317 | Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do | |
2318 | not want folding on input or unfolding on output you must setting this option | |
2319 | to an empty list to disable the feature. | |
2320 | ||
2321 | Note 2: If you wish to use this option, you should also enable the | |
2322 | C<ForceArray> option. Without 'ForceArray', a single nested element will be | |
2323 | rolled up into a scalar rather than an array and therefore will not be folded | |
2324 | (since only arrays get folded). | |
2325 | ||
2326 | =head2 KeyAttr => { list } I<# in+out - important> | |
2327 | ||
2328 | This alternative (and preferred) method of specifiying the key attributes | |
2329 | allows more fine grained control over which elements are folded and on which | |
2330 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause | |
2331 | any package elements to be folded on the 'id' attribute. No other elements | |
2332 | which have an 'id' attribute will be folded at all. | |
2333 | ||
2334 | Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">) | |
2335 | if this syntax is used and an element which does not have the specified key | |
2336 | attribute is encountered (eg: a 'package' element without an 'id' attribute, to | |
2337 | use the example above). Warnings will only be generated if B<-w> is in force. | |
2338 | ||
2339 | Two further variations are made possible by prefixing a '+' or a '-' character | |
2340 | to the attribute name: | |
2341 | ||
2342 | The option 'KeyAttr => { user => "+login" }' will cause this XML: | |
2343 | ||
2344 | <opt> | |
2345 | <user login="grep" fullname="Gary R Epstein" /> | |
2346 | <user login="stty" fullname="Simon T Tyson" /> | |
2347 | </opt> | |
2348 | ||
2349 | to parse to this data structure: | |
2350 | ||
2351 | { | |
2352 | 'user' => { | |
2353 | 'stty' => { | |
2354 | 'fullname' => 'Simon T Tyson', | |
2355 | 'login' => 'stty' | |
2356 | }, | |
2357 | 'grep' => { | |
2358 | 'fullname' => 'Gary R Epstein', | |
2359 | 'login' => 'grep' | |
2360 | } | |
2361 | } | |
2362 | } | |
2363 | ||
2364 | The '+' indicates that the value of the key attribute should be copied rather | |
2365 | than moved to the folded hash key. | |
2366 | ||
2367 | A '-' prefix would produce this result: | |
2368 | ||
2369 | { | |
2370 | 'user' => { | |
2371 | 'stty' => { | |
2372 | 'fullname' => 'Simon T Tyson', | |
2373 | '-login' => 'stty' | |
2374 | }, | |
2375 | 'grep' => { | |
2376 | 'fullname' => 'Gary R Epstein', | |
2377 | '-login' => 'grep' | |
2378 | } | |
2379 | } | |
2380 | } | |
2381 | ||
2382 | As described earlier, C<XMLout> will ignore hash keys starting with a '-'. | |
2383 | ||
2384 | =head2 NoAttr => 1 I<# in+out - handy> | |
2385 | ||
2386 | When used with C<XMLout()>, the generated XML will contain no attributes. | |
2387 | All hash key/values will be represented as nested elements instead. | |
2388 | ||
2389 | When used with C<XMLin()>, any attributes in the XML will be ignored. | |
2390 | ||
2391 | =head2 NoEscape => 1 I<# out - seldom used> | |
2392 | ||
2393 | By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and | |
2394 | '"' to '<', '>', '&' and '"' respectively. Use this option to | |
2395 | suppress escaping (presumably because you've already escaped the data in some | |
2396 | more sophisticated manner). | |
2397 | ||
2398 | =head2 NoIndent => 1 I<# out - seldom used> | |
2399 | ||
2400 | Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode. | |
2401 | With this option enabled, the XML output will all be on one line (unless there | |
2402 | are newlines in the data) - this may be easier for downstream processing. | |
2403 | ||
2404 | =head2 NoSort => 1 I<# out - seldom used> | |
2405 | ||
2406 | Newer versions of XML::Simple sort elements and attributes alphabetically (*), | |
2407 | by default. Enable this option to suppress the sorting - possibly for | |
2408 | backwards compatibility. | |
2409 | ||
2410 | * Actually, sorting is alphabetical but 'key' attribute or element names (as in | |
2411 | 'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements | |
2412 | are sorted alphabetically by the value of the key field. | |
2413 | ||
2414 | =head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> | |
2415 | ||
2416 | This option controls how whitespace in text content is handled. Recognised | |
2417 | values for the option are: | |
2418 | ||
2419 | =over 4 | |
2420 | ||
2421 | =item * | |
2422 | ||
2423 | 0 = (default) whitespace is passed through unaltered (except of course for the | |
2424 | normalisation of whitespace in attribute values which is mandated by the XML | |
2425 | recommendation) | |
2426 | ||
2427 | =item * | |
2428 | ||
2429 | 1 = whitespace is normalised in any value used as a hash key (normalising means | |
2430 | removing leading and trailing whitespace and collapsing sequences of whitespace | |
2431 | characters to a single space) | |
2432 | ||
2433 | =item * | |
2434 | ||
2435 | 2 = whitespace is normalised in all text content | |
2436 | ||
2437 | =back | |
2438 | ||
2439 | Note: you can spell this option with a 'z' if that is more natural for you. | |
2440 | ||
2441 | =head2 NSExpand => 1 I<# in+out handy - SAX only> | |
2442 | ||
2443 | This option controls namespace expansion - the translation of element and | |
2444 | attribute names of the form 'prefix:name' to '{uri}name'. For example the | |
2445 | element name 'xsl:template' might be expanded to: | |
2446 | '{http://www.w3.org/1999/XSL/Transform}template'. | |
2447 | ||
2448 | By default, C<XMLin()> will return element names and attribute names exactly as | |
2449 | they appear in the XML. Setting this option to 1 will cause all element and | |
2450 | attribute names to be expanded to include their namespace prefix. | |
2451 | ||
2452 | I<Note: You must be using a SAX parser for this option to work (ie: it does not | |
2453 | work with XML::Parser)>. | |
2454 | ||
2455 | This option also controls whether C<XMLout()> performs the reverse translation | |
2456 | from '{uri}name' back to 'prefix:name'. The default is no translation. If | |
2457 | your data contains expanded names, you should set this option to 1 otherwise | |
2458 | C<XMLout> will emit XML which is not well formed. | |
2459 | ||
2460 | I<Note: You must have the XML::NamespaceSupport module installed if you want | |
2461 | C<XMLout()> to translate URIs back to prefixes>. | |
2462 | ||
2463 | =head2 NumericEscape => 0 | 1 | 2 I<# out - handy> | |
2464 | ||
2465 | Use this option to have 'high' (non-ASCII) characters in your Perl data | |
2466 | structure converted to numeric entities (eg: €) in the XML output. Three | |
2467 | levels are possible: | |
2468 | ||
2469 | 0 - default: no numeric escaping (OK if you're writing out UTF8) | |
2470 | ||
2471 | 1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output | |
2472 | ||
2473 | 2 - all characters above 0x7F are escaped (good for plain ASCII output) | |
2474 | ||
2475 | =head2 OutputFile => <file specifier> I<# out - handy> | |
2476 | ||
2477 | The default behaviour of C<XMLout()> is to return the XML as a string. If you | |
2478 | wish to write the XML to a file, simply supply the filename using the | |
2479 | 'OutputFile' option. | |
2480 | ||
2481 | This option also accepts an IO handle object - especially useful in Perl 5.8.0 | |
2482 | and later for output using an encoding other than UTF-8, eg: | |
2483 | ||
2484 | open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; | |
2485 | XMLout($ref, OutputFile => $fh); | |
2486 | ||
2487 | Note, XML::Simple does not require that the object you pass in to the | |
2488 | OutputFile option inherits from L<IO::Handle> - it simply assumes the object | |
2489 | supports a C<print> method. | |
2490 | ||
2491 | =head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> | |
2492 | ||
2493 | I<Note: This option is now officially deprecated. If you find it useful, email | |
2494 | the author with an example of what you use it for. Do not use this option to | |
2495 | set the ProtocolEncoding, that's just plain wrong - fix the XML>. | |
2496 | ||
2497 | This option allows you to pass parameters to the constructor of the underlying | |
2498 | XML::Parser object (which of course assumes you're not using SAX). | |
2499 | ||
2500 | =head2 RootName => 'string' I<# out - handy> | |
2501 | ||
2502 | By default, when C<XMLout()> generates XML, the root element will be named | |
2503 | 'opt'. This option allows you to specify an alternative name. | |
2504 | ||
2505 | Specifying either undef or the empty string for the RootName option will | |
2506 | produce XML with no root elements. In most cases the resulting XML fragment | |
2507 | will not be 'well formed' and therefore could not be read back in by C<XMLin()>. | |
2508 | Nevertheless, the option has been found to be useful in certain circumstances. | |
2509 | ||
2510 | =head2 SearchPath => [ list ] I<# in - handy> | |
2511 | ||
2512 | If you pass C<XMLin()> a filename, but the filename include no directory | |
2513 | component, you can use this option to specify which directories should be | |
2514 | searched to locate the file. You might use this option to search first in the | |
2515 | user's home directory, then in a global directory such as /etc. | |
2516 | ||
2517 | If a filename is provided to C<XMLin()> but SearchPath is not defined, the | |
2518 | file is assumed to be in the current directory. | |
2519 | ||
2520 | If the first parameter to C<XMLin()> is undefined, the default SearchPath | |
2521 | will contain only the directory in which the script itself is located. | |
2522 | Otherwise the default SearchPath will be empty. | |
2523 | ||
2524 | =head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> | |
2525 | ||
2526 | This option controls what C<XMLin()> should do with empty elements (no | |
2527 | attributes and no content). The default behaviour is to represent them as | |
2528 | empty hashes. Setting this option to a true value (eg: 1) will cause empty | |
2529 | elements to be skipped altogether. Setting the option to 'undef' or the empty | |
2530 | string will cause empty elements to be represented as the undefined value or | |
2531 | the empty string respectively. The latter two alternatives are a little | |
2532 | easier to test for in your code than a hash with no keys. | |
2533 | ||
2534 | The option also controls what C<XMLout()> does with undefined values. Setting | |
2535 | the option to undef causes undefined values to be output as empty elements | |
2536 | (rather than empty attributes), it also suppresses the generation of warnings | |
2537 | about undefined values. Setting the option to a true value (eg: 1) causes | |
2538 | undefined values to be skipped altogether on output. | |
2539 | ||
2540 | =head2 ValueAttr => [ names ] I<# in - handy> | |
2541 | ||
2542 | Use this option to deal elements which always have a single attribute and no | |
2543 | content. Eg: | |
2544 | ||
2545 | <opt> | |
2546 | <colour value="red" /> | |
2547 | <size value="XXL" /> | |
2548 | </opt> | |
2549 | ||
2550 | Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: | |
2551 | ||
2552 | { | |
2553 | colour => 'red', | |
2554 | size => 'XXL' | |
2555 | } | |
2556 | ||
2557 | instead of this (the default): | |
2558 | ||
2559 | { | |
2560 | colour => { value => 'red' }, | |
2561 | size => { value => 'XXL' } | |
2562 | } | |
2563 | ||
2564 | Note: This form of the ValueAttr option is not compatible with C<XMLout()> - | |
2565 | since the attribute name is discarded at parse time, the original XML cannot be | |
2566 | reconstructed. | |
2567 | ||
2568 | =head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> | |
2569 | ||
2570 | This (preferred) form of the ValueAttr option requires you to specify both | |
2571 | the element and the attribute names. This is not only safer, it also allows | |
2572 | the original XML to be reconstructed by C<XMLout()>. | |
2573 | ||
2574 | Note: You probably don't want to use this option and the NoAttr option at the | |
2575 | same time. | |
2576 | ||
2577 | =head2 Variables => { name => value } I<# in - handy> | |
2578 | ||
2579 | This option allows variables in the XML to be expanded when the file is read. | |
2580 | (there is no facility for putting the variable names back if you regenerate | |
2581 | XML using C<XMLout>). | |
2582 | ||
2583 | A 'variable' is any text of the form C<${name}> which occurs in an attribute | |
2584 | value or in the text content of an element. If 'name' matches a key in the | |
2585 | supplied hashref, C<${name}> will be replaced with the corresponding value from | |
2586 | the hashref. If no matching key is found, the variable will not be replaced. | |
2587 | Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are | |
2588 | allowed). | |
2589 | ||
2590 | =head2 VarAttr => 'attr_name' I<# in - handy> | |
2591 | ||
2592 | In addition to the variables defined using C<Variables>, this option allows | |
2593 | variables to be defined in the XML. A variable definition consists of an | |
2594 | element with an attribute called 'attr_name' (the value of the C<VarAttr> | |
2595 | option). The value of the attribute will be used as the variable name and the | |
2596 | text content of the element will be used as the value. A variable defined in | |
2597 | this way will override a variable defined using the C<Variables> option. For | |
2598 | example: | |
2599 | ||
2600 | XMLin( '<opt> | |
2601 | <dir name="prefix">/usr/local/apache</dir> | |
2602 | <dir name="exec_prefix">${prefix}</dir> | |
2603 | <dir name="bindir">${exec_prefix}/bin</dir> | |
2604 | </opt>', | |
2605 | VarAttr => 'name', ContentKey => '-content' | |
2606 | ); | |
2607 | ||
2608 | produces the following data structure: | |
2609 | ||
2610 | { | |
2611 | dir => { | |
2612 | prefix => '/usr/local/apache', | |
2613 | exec_prefix => '/usr/local/apache', | |
2614 | bindir => '/usr/local/apache/bin', | |
2615 | } | |
2616 | } | |
2617 | ||
2618 | =head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> | |
2619 | ||
2620 | If you want the output from C<XMLout()> to start with the optional XML | |
2621 | declaration, simply set the option to '1'. The default XML declaration is: | |
2622 | ||
2623 | <?xml version='1.0' standalone='yes'?> | |
2624 | ||
2625 | If you want some other string (for example to declare an encoding value), set | |
2626 | the value of this option to the complete string you require. | |
2627 | ||
2628 | ||
2629 | =head1 OPTIONAL OO INTERFACE | |
2630 | ||
2631 | The procedural interface is both simple and convenient however there are a | |
2632 | couple of reasons why you might prefer to use the object oriented (OO) | |
2633 | interface: | |
2634 | ||
2635 | =over 4 | |
2636 | ||
2637 | =item * | |
2638 | ||
2639 | to define a set of default values which should be used on all subsequent calls | |
2640 | to C<XMLin()> or C<XMLout()> | |
2641 | ||
2642 | =item * | |
2643 | ||
2644 | to override methods in B<XML::Simple> to provide customised behaviour | |
2645 | ||
2646 | =back | |
2647 | ||
2648 | The default values for the options described above are unlikely to suit | |
2649 | everyone. The OO interface allows you to effectively override B<XML::Simple>'s | |
2650 | defaults with your preferred values. It works like this: | |
2651 | ||
2652 | First create an XML::Simple parser object with your preferred defaults: | |
2653 | ||
2654 | my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); | |
2655 | ||
2656 | then call C<XMLin()> or C<XMLout()> as a method of that object: | |
2657 | ||
2658 | my $ref = $xs->XMLin($xml); | |
2659 | my $xml = $xs->XMLout($ref); | |
2660 | ||
2661 | You can also specify options when you make the method calls and these values | |
2662 | will be merged with the values specified when the object was created. Values | |
2663 | specified in a method call take precedence. | |
2664 | ||
2665 | Overriding methods is a more advanced topic but might be useful if for example | |
2666 | you wished to provide an alternative routine for escaping character data (the | |
2667 | escape_value method) or for building the initial parse tree (the build_tree | |
2668 | method). | |
2669 | ||
2670 | Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be | |
2671 | called as C<xml_in()> or C<xml_out()>. The method names are aliased so the | |
2672 | only difference is the aesthetics. | |
2673 | ||
2674 | =head1 STRICT MODE | |
2675 | ||
2676 | If you import the B<XML::Simple> routines like this: | |
2677 | ||
2678 | use XML::Simple qw(:strict); | |
2679 | ||
2680 | the following common mistakes will be detected and treated as fatal errors | |
2681 | ||
2682 | =over 4 | |
2683 | ||
2684 | =item * | |
2685 | ||
2686 | Failing to explicitly set the C<KeyAttr> option - if you can't be bothered | |
2687 | reading about this option, turn it off with: KeyAttr => [ ] | |
2688 | ||
2689 | =item * | |
2690 | ||
2691 | Failing to explicitly set the C<ForceArray> option - if you can't be bothered | |
2692 | reading about this option, set it to the safest mode with: ForceArray => 1 | |
2693 | ||
2694 | =item * | |
2695 | ||
2696 | Setting ForceArray to an array, but failing to list all the elements from the | |
2697 | KeyAttr hash. | |
2698 | ||
2699 | =item * | |
2700 | ||
2701 | Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains | |
2702 | one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested | |
2703 | element). Note: if strict mode is not set but -w is, this condition triggers a | |
2704 | warning. | |
2705 | ||
2706 | =item * | |
2707 | ||
2708 | Data error - as above, but value of key attribute (eg: partnum) is not a | |
2709 | scalar string (due to nested elements etc). This will also trigger a warning | |
2710 | if strict mode is not enabled. | |
2711 | ||
2712 | =back | |
2713 | ||
2714 | =head1 SAX SUPPORT | |
2715 | ||
2716 | From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API | |
2717 | for XML) - specifically SAX2. | |
2718 | ||
2719 | In a typical SAX application, an XML parser (or SAX 'driver') module generates | |
2720 | SAX events (start of element, character data, end of element, etc) as it parses | |
2721 | an XML document and a 'handler' module processes the events to extract the | |
2722 | required data. This simple model allows for some interesting and powerful | |
2723 | possibilities: | |
2724 | ||
2725 | =over 4 | |
2726 | ||
2727 | =item * | |
2728 | ||
2729 | Applications written to the SAX API can extract data from huge XML documents | |
2730 | without the memory overheads of a DOM or tree API. | |
2731 | ||
2732 | =item * | |
2733 | ||
2734 | The SAX API allows for plug and play interchange of parser modules without | |
2735 | having to change your code to fit a new module's API. A number of SAX parsers | |
2736 | are available with capabilities ranging from extreme portability to blazing | |
2737 | performance. | |
2738 | ||
2739 | =item * | |
2740 | ||
2741 | A SAX 'filter' module can implement both a handler interface for receiving | |
2742 | data and a generator interface for passing modified data on to a downstream | |
2743 | handler. Filters can be chained together in 'pipelines'. | |
2744 | ||
2745 | =item * | |
2746 | ||
2747 | One filter module might split a data stream to direct data to two or more | |
2748 | downstream handlers. | |
2749 | ||
2750 | =item * | |
2751 | ||
2752 | Generating SAX events is not the exclusive preserve of XML parsing modules. | |
2753 | For example, a module might extract data from a relational database using DBI | |
2754 | and pass it on to a SAX pipeline for filtering and formatting. | |
2755 | ||
2756 | =back | |
2757 | ||
2758 | B<XML::Simple> can operate at either end of a SAX pipeline. For example, | |
2759 | you can take a data structure in the form of a hashref and pass it into a | |
2760 | SAX pipeline using the 'Handler' option on C<XMLout()>: | |
2761 | ||
2762 | use XML::Simple; | |
2763 | use Some::SAX::Filter; | |
2764 | use XML::SAX::Writer; | |
2765 | ||
2766 | my $ref = { | |
2767 | .... # your data here | |
2768 | }; | |
2769 | ||
2770 | my $writer = XML::SAX::Writer->new(); | |
2771 | my $filter = Some::SAX::Filter->new(Handler => $writer); | |
2772 | my $simple = XML::Simple->new(Handler => $filter); | |
2773 | $simple->XMLout($ref); | |
2774 | ||
2775 | You can also put B<XML::Simple> at the opposite end of the pipeline to take | |
2776 | advantage of the simple 'tree' data structure once the relevant data has been | |
2777 | isolated through filtering: | |
2778 | ||
2779 | use XML::SAX; | |
2780 | use Some::SAX::Filter; | |
2781 | use XML::Simple; | |
2782 | ||
2783 | my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); | |
2784 | my $filter = Some::SAX::Filter->new(Handler => $simple); | |
2785 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | |
2786 | ||
2787 | my $ref = $parser->parse_uri('some_huge_file.xml'); | |
2788 | ||
2789 | print $ref->{part}->{'555-1234'}; | |
2790 | ||
2791 | You can build a filter by using an XML::Simple object as a handler and setting | |
2792 | its DataHandler option to point to a routine which takes the resulting tree, | |
2793 | modifies it and sends it off as SAX events to a downstream handler: | |
2794 | ||
2795 | my $writer = XML::SAX::Writer->new(); | |
2796 | my $filter = XML::Simple->new( | |
2797 | DataHandler => sub { | |
2798 | my $simple = shift; | |
2799 | my $data = shift; | |
2800 | ||
2801 | # Modify $data here | |
2802 | ||
2803 | $simple->XMLout($data, Handler => $writer); | |
2804 | } | |
2805 | ); | |
2806 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | |
2807 | ||
2808 | $parser->parse_uri($filename); | |
2809 | ||
2810 | I<Note: In this last example, the 'Handler' option was specified in the call to | |
2811 | C<XMLout()> but it could also have been specified in the constructor>. | |
2812 | ||
2813 | =head1 ENVIRONMENT | |
2814 | ||
2815 | If you don't care which parser module B<XML::Simple> uses then skip this | |
2816 | section entirely (it looks more complicated than it really is). | |
2817 | ||
2818 | B<XML::Simple> will default to using a B<SAX> parser if one is available or | |
2819 | B<XML::Parser> if SAX is not available. | |
2820 | ||
2821 | You can dictate which parser module is used by setting either the environment | |
2822 | variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable | |
2823 | $XML::Simple::PREFERRED_PARSER to contain the module name. The following rules | |
2824 | are used: | |
2825 | ||
2826 | =over 4 | |
2827 | ||
2828 | =item * | |
2829 | ||
2830 | The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use | |
2831 | its default rules, you can set the package variable to an empty string. | |
2832 | ||
2833 | =item * | |
2834 | ||
2835 | If the 'preferred parser' is set to the string 'XML::Parser', then | |
2836 | L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not | |
2837 | installed). | |
2838 | ||
2839 | =item * | |
2840 | ||
2841 | If the 'preferred parser' is set to some other value, then it is assumed to be | |
2842 | the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.> | |
2843 | If L<XML::SAX> is not installed, or the requested parser module is not | |
2844 | installed, then C<XMLin()> will die. | |
2845 | ||
2846 | =item * | |
2847 | ||
2848 | If the 'preferred parser' is not defined at all (the normal default | |
2849 | state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is | |
2850 | installed, then a parser module will be selected according to | |
2851 | L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX | |
2852 | parser installed). | |
2853 | ||
2854 | =item * | |
2855 | ||
2856 | if the 'preferred parser' is not defined and B<XML::SAX> is not | |
2857 | installed, then B<XML::Parser> will be used. C<XMLin()> will die if | |
2858 | L<XML::Parser> is not installed. | |
2859 | ||
2860 | =back | |
2861 | ||
2862 | Note: The B<XML::SAX> distribution includes an XML parser written entirely in | |
2863 | Perl. It is very portable but it is not very fast. You should consider | |
2864 | installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your | |
2865 | platform. | |
2866 | ||
2867 | =head1 ERROR HANDLING | |
2868 | ||
2869 | The XML standard is very clear on the issue of non-compliant documents. An | |
2870 | error in parsing any single element (for example a missing end tag) must cause | |
2871 | the whole document to be rejected. B<XML::Simple> will die with an appropriate | |
2872 | message if it encounters a parsing error. | |
2873 | ||
2874 | If dying is not appropriate for your application, you should arrange to call | |
2875 | C<XMLin()> in an eval block and look for errors in $@. eg: | |
2876 | ||
2877 | my $config = eval { XMLin() }; | |
2878 | PopUpMessage($@) if($@); | |
2879 | ||
2880 | Note, there is a common misconception that use of B<eval> will significantly | |
2881 | slow down a script. While that may be true when the code being eval'd is in a | |
2882 | string, it is not true of code like the sample above. | |
2883 | ||
2884 | =head1 EXAMPLES | |
2885 | ||
2886 | When C<XMLin()> reads the following very simple piece of XML: | |
2887 | ||
2888 | <opt username="testuser" password="frodo"></opt> | |
2889 | ||
2890 | it returns the following data structure: | |
2891 | ||
2892 | { | |
2893 | 'username' => 'testuser', | |
2894 | 'password' => 'frodo' | |
2895 | } | |
2896 | ||
2897 | The identical result could have been produced with this alternative XML: | |
2898 | ||
2899 | <opt username="testuser" password="frodo" /> | |
2900 | ||
2901 | Or this (although see 'ForceArray' option for variations): | |
2902 | ||
2903 | <opt> | |
2904 | <username>testuser</username> | |
2905 | <password>frodo</password> | |
2906 | </opt> | |
2907 | ||
2908 | Repeated nested elements are represented as anonymous arrays: | |
2909 | ||
2910 | <opt> | |
2911 | <person firstname="Joe" lastname="Smith"> | |
2912 | <email>joe@smith.com</email> | |
2913 | <email>jsmith@yahoo.com</email> | |
2914 | </person> | |
2915 | <person firstname="Bob" lastname="Smith"> | |
2916 | <email>bob@smith.com</email> | |
2917 | </person> | |
2918 | </opt> | |
2919 | ||
2920 | { | |
2921 | 'person' => [ | |
2922 | { | |
2923 | 'email' => [ | |
2924 | 'joe@smith.com', | |
2925 | 'jsmith@yahoo.com' | |
2926 | ], | |
2927 | 'firstname' => 'Joe', | |
2928 | 'lastname' => 'Smith' | |
2929 | }, | |
2930 | { | |
2931 | 'email' => 'bob@smith.com', | |
2932 | 'firstname' => 'Bob', | |
2933 | 'lastname' => 'Smith' | |
2934 | } | |
2935 | ] | |
2936 | } | |
2937 | ||
2938 | Nested elements with a recognised key attribute are transformed (folded) from | |
2939 | an array into a hash keyed on the value of that attribute (see the C<KeyAttr> | |
2940 | option): | |
2941 | ||
2942 | <opt> | |
2943 | <person key="jsmith" firstname="Joe" lastname="Smith" /> | |
2944 | <person key="tsmith" firstname="Tom" lastname="Smith" /> | |
2945 | <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> | |
2946 | </opt> | |
2947 | ||
2948 | { | |
2949 | 'person' => { | |
2950 | 'jbloggs' => { | |
2951 | 'firstname' => 'Joe', | |
2952 | 'lastname' => 'Bloggs' | |
2953 | }, | |
2954 | 'tsmith' => { | |
2955 | 'firstname' => 'Tom', | |
2956 | 'lastname' => 'Smith' | |
2957 | }, | |
2958 | 'jsmith' => { | |
2959 | 'firstname' => 'Joe', | |
2960 | 'lastname' => 'Smith' | |
2961 | } | |
2962 | } | |
2963 | } | |
2964 | ||
2965 | ||
2966 | The <anon> tag can be used to form anonymous arrays: | |
2967 | ||
2968 | <opt> | |
2969 | <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> | |
2970 | <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> | |
2971 | <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> | |
2972 | <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> | |
2973 | </opt> | |
2974 | ||
2975 | { | |
2976 | 'head' => [ | |
2977 | [ 'Col 1', 'Col 2', 'Col 3' ] | |
2978 | ], | |
2979 | 'data' => [ | |
2980 | [ 'R1C1', 'R1C2', 'R1C3' ], | |
2981 | [ 'R2C1', 'R2C2', 'R2C3' ], | |
2982 | [ 'R3C1', 'R3C2', 'R3C3' ] | |
2983 | ] | |
2984 | } | |
2985 | ||
2986 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if | |
2987 | the surrounding tags for an XML document contain only an anonymous array the | |
2988 | arrayref will be returned directly rather than the usual hashref: | |
2989 | ||
2990 | <opt> | |
2991 | <anon><anon>Col 1</anon><anon>Col 2</anon></anon> | |
2992 | <anon><anon>R1C1</anon><anon>R1C2</anon></anon> | |
2993 | <anon><anon>R2C1</anon><anon>R2C2</anon></anon> | |
2994 | </opt> | |
2995 | ||
2996 | [ | |
2997 | [ 'Col 1', 'Col 2' ], | |
2998 | [ 'R1C1', 'R1C2' ], | |
2999 | [ 'R2C1', 'R2C2' ] | |
3000 | ] | |
3001 | ||
3002 | Elements which only contain text content will simply be represented as a | |
3003 | scalar. Where an element has both attributes and text content, the element | |
3004 | will be represented as a hashref with the text content in the 'content' key | |
3005 | (see the C<ContentKey> option): | |
3006 | ||
3007 | <opt> | |
3008 | <one>first</one> | |
3009 | <two attr="value">second</two> | |
3010 | </opt> | |
3011 | ||
3012 | { | |
3013 | 'one' => 'first', | |
3014 | 'two' => { 'attr' => 'value', 'content' => 'second' } | |
3015 | } | |
3016 | ||
3017 | Mixed content (elements which contain both text content and nested elements) | |
3018 | will be not be represented in a useful way - element order and significant | |
3019 | whitespace will be lost. If you need to work with mixed content, then | |
3020 | XML::Simple is not the right tool for your job - check out the next section. | |
3021 | ||
3022 | =head1 WHERE TO FROM HERE? | |
3023 | ||
3024 | B<XML::Simple> is able to present a simple API because it makes some | |
3025 | assumptions on your behalf. These include: | |
3026 | ||
3027 | =over 4 | |
3028 | ||
3029 | =item * | |
3030 | ||
3031 | You're not interested in text content consisting only of whitespace | |
3032 | ||
3033 | =item * | |
3034 | ||
3035 | You don't mind that when things get slurped into a hash the order is lost | |
3036 | ||
3037 | =item * | |
3038 | ||
3039 | You don't want fine-grained control of the formatting of generated XML | |
3040 | ||
3041 | =item * | |
3042 | ||
3043 | You would never use a hash key that was not a legal XML element name | |
3044 | ||
3045 | =item * | |
3046 | ||
3047 | You don't need help converting between different encodings | |
3048 | ||
3049 | =back | |
3050 | ||
3051 | In a serious XML project, you'll probably outgrow these assumptions fairly | |
3052 | quickly. This section of the document used to offer some advice on chosing a | |
3053 | more powerful option. That advice has now grown into the 'Perl-XML FAQ' | |
3054 | document which you can find at: L<http://perl-xml.sourceforge.net/faq/> | |
3055 | ||
3056 | The advice in the FAQ boils down to a quick explanation of tree versus | |
3057 | event based parsers and then recommends: | |
3058 | ||
3059 | For event based parsing, use SAX (do not set out to write any new code for | |
3060 | XML::Parser's handler API - it is obselete). | |
3061 | ||
3062 | For tree-based parsing, you could choose between the 'Perlish' approach of | |
3063 | L<XML::Twig> and more standards based DOM implementations - preferably one with | |
3064 | XPath support. | |
3065 | ||
3066 | ||
3067 | =head1 SEE ALSO | |
3068 | ||
3069 | B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>. | |
3070 | ||
3071 | To generate documents with namespaces, L<XML::NamespaceSupport> is required. | |
3072 | ||
3073 | The optional caching functions require L<Storable>. | |
3074 | ||
3075 | Answers to Frequently Asked Questions about XML::Simple are bundled with this | |
3076 | distribution as: L<XML::Simple::FAQ> | |
3077 | ||
3078 | =head1 COPYRIGHT | |
3079 | ||
3080 | Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt> | |
3081 | ||
3082 | This library is free software; you can redistribute it and/or modify it | |
3083 | under the same terms as Perl itself. | |
3084 | ||
3085 | =cut | |
3086 | ||
3087 |