git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame_incremental - sam-t2/devtools/v8plus/lib/perl5/site

... / ...

Commit	Line	Data
	1	# $Id: Simple.pm,v 1.34 2006/10/30 08:28:13 grantm Exp $
	2
	3	package XML::Simple;
	4
	5	=head1 NAME
	6
	7	XML::Simple - Easy API to maintain XML (esp config files)
	8
	9	=head1 SYNOPSIS
	10
	11	use XML::Simple;
	12
	13	my $ref = XMLin([<xml file or string>] [, <options>]);
	14
	15	my $xml = XMLout($hashref [, <options>]);
	16
	17	Or the object oriented way:
	18
	19	require XML::Simple;
	20
	21	my $xs = XML::Simple->new(options);
	22
	23	my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
	24
	25	my $xml = $xs->XMLout($hashref [, <options>]);
	26
	27	(or see L<"SAX SUPPORT"> for 'the SAX way').
	28
	29	To catch common errors:
	30
	31	use XML::Simple qw(:strict);
	32
	33	(see L<"STRICT MODE"> for more details).
	34
	35	=cut
	36
	37	# See after __END__ for more POD documentation
	38
	39
	40	# Load essentials here, other modules loaded on demand later
	41
	42	use strict;
	43	use Carp;
	44	require Exporter;
	45
	46
	47	##############################################################################
	48	# Define some constants
	49	#
	50
	51	use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
	52
	53	@ISA = qw(Exporter);
	54	@EXPORT = qw(XMLin XMLout);
	55	@EXPORT_OK = qw(xml_in xml_out);
	56	$VERSION = '2.16';
	57	$PREFERRED_PARSER = undef;
	58
	59	my $StrictMode = 0;
	60
	61	my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr
	62	searchpath forcearray cache suppressempty parseropts
	63	grouptags nsexpand datahandler varattr variables
	64	normalisespace normalizespace valueattr);
	65
	66	my @KnownOptOut = qw(keyattr keeproot contentkey noattr
	67	rootname xmldecl outputfile noescape suppressempty
	68	grouptags nsexpand handler noindent attrindent nosort
	69	valueattr numericescape);
	70
	71	my @DefKeyAttr = qw(name key id);
	72	my $DefRootName = qq(opt);
	73	my $DefContentKey = qq(content);
	74	my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>);
	75
	76	my $xmlns_ns = 'http://www.w3.org/2000/xmlns/';
	77	my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround
	78
	79
	80	##############################################################################
	81	# Globals for use by caching routines
	82	#
	83
	84	my %MemShareCache = ();
	85	my %MemCopyCache = ();
	86
	87
	88	##############################################################################
	89	# Wrapper for Exporter - handles ':strict'
	90	#
	91
	92	sub import {
	93
	94	# Handle the :strict tag
	95
	96	$StrictMode = 1 if grep(/^:strict$/, @_);
	97
	98	# Pass everything else to Exporter.pm
	99
	100	__PACKAGE__->export_to_level(1, grep(!/^:strict$/, @_));
	101	}
	102
	103
	104	##############################################################################
	105	# Constructor for optional object interface.
	106	#
	107
	108	sub new {
	109	my $class = shift;
	110
	111	if(@_ % 2) {
	112	croak "Default options must be name=>value pairs (odd number supplied)";
	113	}
	114
	115	my %known_opt;
	116	@known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100;
	117
	118	my %raw_opt = @_;
	119	my %def_opt;
	120	while(my($key, $val) = each %raw_opt) {
	121	my $lkey = lc($key);
	122	$lkey =~ s/_//g;
	123	croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
	124	$def_opt{$lkey} = $val;
	125	}
	126	my $self = { def_opt => \%def_opt };
	127
	128	return(bless($self, $class));
	129	}
	130
	131
	132	##############################################################################
	133	# Sub: _get_object()
	134	#
	135	# Helper routine called from XMLin() and XMLout() to create an object if none
	136	# was provided. Note, this routine does mess with the caller's @_ array.
	137	#
	138
	139	sub _get_object {
	140	my $self;
	141	if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) {
	142	$self = shift;
	143	}
	144	else {
	145	$self = XML::Simple->new();
	146	}
	147
	148	return $self;
	149	}
	150
	151	##############################################################################
	152	# Sub/Method: XMLin()
	153	#
	154	# Exported routine for slurping XML into a hashref - see pod for info.
	155	#
	156	# May be called as object method or as a plain function.
	157	#
	158	# Expects one arg for the source XML, optionally followed by a number of
	159	# name => value option pairs.
	160	#
	161
	162	sub XMLin {
	163	my $self = &_get_object; # note, @_ is passed implicitly
	164
	165	my $string = shift;
	166
	167	$self->handle_options('in', @_);
	168
	169
	170	# If no XML or filename supplied, look for scriptname.xml in script directory
	171
	172	unless(defined($string)) {
	173
	174	# Translate scriptname[.suffix] to scriptname.xml
	175
	176	require File::Basename;
	177
	178	my($ScriptName, $ScriptDir, $Extension) =
	179	File::Basename::fileparse($0, '\.[^\.]+');
	180
	181	$string = $ScriptName . '.xml';
	182
	183
	184	# Add script directory to searchpath
	185
	186	if($ScriptDir) {
	187	unshift(@{$self->{opt}->{searchpath}}, $ScriptDir);
	188	}
	189	}
	190
	191
	192	# Are we parsing from a file? If so, is there a valid cache available?
	193
	194	my($filename, $scheme);
	195	unless($string =~ m{<.*?>}s or ref($string) or $string eq '-') {
	196
	197	require File::Basename;
	198	require File::Spec;
	199
	200	$filename = $self->find_xml_file($string, @{$self->{opt}->{searchpath}});
	201
	202	if($self->{opt}->{cache}) {
	203	foreach $scheme (@{$self->{opt}->{cache}}) {
	204	my $method = 'cache_read_' . $scheme;
	205	my $opt = $self->$method($filename);
	206	return($opt) if($opt);
	207	}
	208	}
	209	}
	210	else {
	211	delete($self->{opt}->{cache});
	212	if($string eq '-') {
	213	# Read from standard input
	214
	215	local($/) = undef;
	216	$string = <STDIN>;
	217	}
	218	}
	219
	220
	221	# Parsing is required, so let's get on with it
	222
	223	my $tree = $self->build_tree($filename, ref($string) ? $string : \$string);
	224	undef($string);
	225
	226	# Now work some magic on the resulting parse tree
	227
	228	my($ref);
	229	if($self->{opt}->{keeproot}) {
	230	$ref = $self->collapse({}, @$tree);
	231	}
	232	else {
	233	$ref = $self->collapse(@{$tree->[1]});
	234	}
	235
	236	if($self->{opt}->{cache}) {
	237	my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
	238	$self->$method($ref, $filename);
	239	}
	240
	241	return($ref);
	242	}
	243
	244
	245	##############################################################################
	246	#Method: build_tree()
	247	#
	248	# This routine will be called if there is no suitable pre-parsed tree in a
	249	# cache. It parses the XML and returns an XML::Parser 'Tree' style data
	250	# structure (summarised in the comments for the collapse() routine below).
	251	#
	252	# XML::Simple requires the services of another module that knows how to parse
	253	# XML. If XML::SAX is installed, the default SAX parser will be used,
	254	# otherwise XML::Parser will be used.
	255	#
	256	# This routine expects to be passed a 'string' as argument 1 or a filename as
	257	# argument 2. The 'string' might be a string of XML (passed by reference to
	258	# save memory) or it might be a reference to an IO::Handle. (This
	259	# non-intuitive mess results in part from the way XML::Parser works but that's
	260	# really no excuse).
	261	#
	262
	263	sub build_tree {
	264	my $self = shift;
	265	my $filename = shift;
	266	my $string = shift;
	267
	268
	269	my $preferred_parser = $PREFERRED_PARSER;
	270	unless(defined($preferred_parser)) {
	271	$preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} \|\| '';
	272	}
	273	if($preferred_parser eq 'XML::Parser') {
	274	return($self->build_tree_xml_parser($filename, $string));
	275	}
	276
	277	eval { require XML::SAX; }; # We didn't need it until now
	278	if($@) { # No XML::SAX - fall back to XML::Parser
	279	if($preferred_parser) { # unless a SAX parser was expressly requested
	280	croak "XMLin() could not load XML::SAX";
	281	}
	282	return($self->build_tree_xml_parser($filename, $string));
	283	}
	284
	285	$XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
	286
	287	my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
	288
	289	$self->{nocollapse} = 1;
	290	my($tree);
	291	if($filename) {
	292	$tree = $sp->parse_uri($filename);
	293	}
	294	else {
	295	if(ref($string) && ref($string) ne 'SCALAR') {
	296	$tree = $sp->parse_file($string);
	297	}
	298	else {
	299	$tree = $sp->parse_string($$string);
	300	}
	301	}
	302
	303	return($tree);
	304	}
	305
	306
	307	##############################################################################
	308	# Method: build_tree_xml_parser()
	309	#
	310	# This routine will be called if XML::SAX is not installed, or if XML::Parser
	311	# was specifically requested. It takes the same arguments as build_tree() and
	312	# returns the same data structure (XML::Parser 'Tree' style).
	313	#
	314
	315	sub build_tree_xml_parser {
	316	my $self = shift;
	317	my $filename = shift;
	318	my $string = shift;
	319
	320
	321	eval {
	322	local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load()
	323	require XML::Parser; # We didn't need it until now
	324	};
	325	if($@) {
	326	croak "XMLin() requires either XML::SAX or XML::Parser";
	327	}
	328
	329	if($self->{opt}->{nsexpand}) {
	330	carp "'nsexpand' option requires XML::SAX";
	331	}
	332
	333	my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
	334	my($tree);
	335	if($filename) {
	336	# $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl
	337	local(*XML_FILE);
	338	open(XML_FILE, '<', $filename) \|\| croak qq($filename - $!);
	339	$tree = $xp->parse(*XML_FILE);
	340	close(XML_FILE);
	341	}
	342	else {
	343	$tree = $xp->parse($$string);
	344	}
	345
	346	return($tree);
	347	}
	348
	349
	350	##############################################################################
	351	# Method: cache_write_storable()
	352	#
	353	# Wrapper routine for invoking Storable::nstore() to cache a parsed data
	354	# structure.
	355	#
	356
	357	sub cache_write_storable {
	358	my($self, $data, $filename) = @_;
	359
	360	my $cachefile = $self->storable_filename($filename);
	361
	362	require Storable; # We didn't need it until now
	363
	364	if ('VMS' eq $^O) {
	365	Storable::nstore($data, $cachefile);
	366	}
	367	else {
	368	# If the following line fails for you, your Storable.pm is old - upgrade
	369	Storable::lock_nstore($data, $cachefile);
	370	}
	371
	372	}
	373
	374
	375	##############################################################################
	376	# Method: cache_read_storable()
	377	#
	378	# Wrapper routine for invoking Storable::retrieve() to read a cached parsed
	379	# data structure. Only returns cached data if the cache file exists and is
	380	# newer than the source XML file.
	381	#
	382
	383	sub cache_read_storable {
	384	my($self, $filename) = @_;
	385
	386	my $cachefile = $self->storable_filename($filename);
	387
	388	return unless(-r $cachefile);
	389	return unless((stat($cachefile))[9] > (stat($filename))[9]);
	390
	391	require Storable; # We didn't need it until now
	392
	393	if ('VMS' eq $^O) {
	394	return(Storable::retrieve($cachefile));
	395	}
	396	else {
	397	return(Storable::lock_retrieve($cachefile));
	398	}
	399
	400	}
	401
	402
	403	##############################################################################
	404	# Method: storable_filename()
	405	#
	406	# Translates the supplied source XML filename into a filename for the storable
	407	# cached data. A '.stor' suffix is added after stripping an optional '.xml'
	408	# suffix.
	409	#
	410
	411	sub storable_filename {
	412	my($self, $cachefile) = @_;
	413
	414	$cachefile =~ s{(\.xml)?$}{.stor};
	415	return $cachefile;
	416	}
	417
	418
	419	##############################################################################
	420	# Method: cache_write_memshare()
	421	#
	422	# Takes the supplied data structure reference and stores it away in a global
	423	# hash structure.
	424	#
	425
	426	sub cache_write_memshare {
	427	my($self, $data, $filename) = @_;
	428
	429	$MemShareCache{$filename} = [time(), $data];
	430	}
	431
	432
	433	##############################################################################
	434	# Method: cache_read_memshare()
	435	#
	436	# Takes a filename and looks in a global hash for a cached parsed version.
	437	#
	438
	439	sub cache_read_memshare {
	440	my($self, $filename) = @_;
	441
	442	return unless($MemShareCache{$filename});
	443	return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
	444
	445	return($MemShareCache{$filename}->[1]);
	446
	447	}
	448
	449
	450	##############################################################################
	451	# Method: cache_write_memcopy()
	452	#
	453	# Takes the supplied data structure and stores a copy of it in a global hash
	454	# structure.
	455	#
	456
	457	sub cache_write_memcopy {
	458	my($self, $data, $filename) = @_;
	459
	460	require Storable; # We didn't need it until now
	461
	462	$MemCopyCache{$filename} = [time(), Storable::dclone($data)];
	463	}
	464
	465
	466	##############################################################################
	467	# Method: cache_read_memcopy()
	468	#
	469	# Takes a filename and looks in a global hash for a cached parsed version.
	470	# Returns a reference to a copy of that data structure.
	471	#
	472
	473	sub cache_read_memcopy {
	474	my($self, $filename) = @_;
	475
	476	return unless($MemCopyCache{$filename});
	477	return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
	478
	479	return(Storable::dclone($MemCopyCache{$filename}->[1]));
	480
	481	}
	482
	483
	484	##############################################################################
	485	# Sub/Method: XMLout()
	486	#
	487	# Exported routine for 'unslurping' a data structure out to XML.
	488	#
	489	# Expects a reference to a data structure and an optional list of option
	490	# name => value pairs.
	491	#
	492
	493	sub XMLout {
	494	my $self = &_get_object; # note, @_ is passed implicitly
	495
	496	croak "XMLout() requires at least one argument" unless(@_);
	497	my $ref = shift;
	498
	499	$self->handle_options('out', @_);
	500
	501
	502	# If namespace expansion is set, XML::NamespaceSupport is required
	503
	504	if($self->{opt}->{nsexpand}) {
	505	require XML::NamespaceSupport;
	506	$self->{nsup} = XML::NamespaceSupport->new();
	507	$self->{ns_prefix} = 'aaa';
	508	}
	509
	510
	511	# Wrap top level arrayref in a hash
	512
	513	if(UNIVERSAL::isa($ref, 'ARRAY')) {
	514	$ref = { anon => $ref };
	515	}
	516
	517
	518	# Extract rootname from top level hash if keeproot enabled
	519
	520	if($self->{opt}->{keeproot}) {
	521	my(@keys) = keys(%$ref);
	522	if(@keys == 1) {
	523	$ref = $ref->{$keys[0]};
	524	$self->{opt}->{rootname} = $keys[0];
	525	}
	526	}
	527
	528	# Ensure there are no top level attributes if we're not adding root elements
	529
	530	elsif($self->{opt}->{rootname} eq '') {
	531	if(UNIVERSAL::isa($ref, 'HASH')) {
	532	my $refsave = $ref;
	533	$ref = {};
	534	foreach (keys(%$refsave)) {
	535	if(ref($refsave->{$_})) {
	536	$ref->{$_} = $refsave->{$_};
	537	}
	538	else {
	539	$ref->{$_} = [ $refsave->{$_} ];
	540	}
	541	}
	542	}
	543	}
	544
	545
	546	# Encode the hashref and write to file if necessary
	547
	548	$self->{_ancestors} = [];
	549	my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
	550	delete $self->{_ancestors};
	551
	552	if($self->{opt}->{xmldecl}) {
	553	$xml = $self->{opt}->{xmldecl} . "\n" . $xml;
	554	}
	555
	556	if($self->{opt}->{outputfile}) {
	557	if(ref($self->{opt}->{outputfile})) {
	558	my $fh = $self->{opt}->{outputfile};
	559	if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
	560	eval { require IO::Handle; };
	561	croak $@ if $@;
	562	}
	563	return($fh->print($xml));
	564	}
	565	else {
	566	local(*OUT);
	567	open(OUT, '>', "$self->{opt}->{outputfile}") \|\|
	568	croak "open($self->{opt}->{outputfile}): $!";
	569	binmode(OUT, ':utf8') if($] >= 5.008);
	570	print OUT $xml \|\| croak "print: $!";
	571	close(OUT);
	572	}
	573	}
	574	elsif($self->{opt}->{handler}) {
	575	require XML::SAX;
	576	my $sp = XML::SAX::ParserFactory->parser(
	577	Handler => $self->{opt}->{handler}
	578	);
	579	return($sp->parse_string($xml));
	580	}
	581	else {
	582	return($xml);
	583	}
	584	}
	585
	586
	587	##############################################################################
	588	# Method: handle_options()
	589	#
	590	# Helper routine for both XMLin() and XMLout(). Both routines handle their
	591	# first argument and assume all other args are options handled by this routine.
	592	# Saves a hash of options in $self->{opt}.
	593	#
	594	# If default options were passed to the constructor, they will be retrieved
	595	# here and merged with options supplied to the method call.
	596	#
	597	# First argument should be the string 'in' or the string 'out'.
	598	#
	599	# Remaining arguments should be name=>value pairs. Sets up default values
	600	# for options not supplied. Unrecognised options are a fatal error.
	601	#
	602
	603	sub handle_options {
	604	my $self = shift;
	605	my $dirn = shift;
	606
	607
	608	# Determine valid options based on context
	609
	610	my %known_opt;
	611	if($dirn eq 'in') {
	612	@known_opt{@KnownOptIn} = @KnownOptIn;
	613	}
	614	else {
	615	@known_opt{@KnownOptOut} = @KnownOptOut;
	616	}
	617
	618
	619	# Store supplied options in hashref and weed out invalid ones
	620
	621	if(@_ % 2) {
	622	croak "Options must be name=>value pairs (odd number supplied)";
	623	}
	624	my %raw_opt = @_;
	625	my $opt = {};
	626	$self->{opt} = $opt;
	627
	628	while(my($key, $val) = each %raw_opt) {
	629	my $lkey = lc($key);
	630	$lkey =~ s/_//g;
	631	croak "Unrecognised option: $key" unless($known_opt{$lkey});
	632	$opt->{$lkey} = $val;
	633	}
	634
	635
	636	# Merge in options passed to constructor
	637
	638	foreach (keys(%known_opt)) {
	639	unless(exists($opt->{$_})) {
	640	if(exists($self->{def_opt}->{$_})) {
	641	$opt->{$_} = $self->{def_opt}->{$_};
	642	}
	643	}
	644	}
	645
	646
	647	# Set sensible defaults if not supplied
	648
	649	if(exists($opt->{rootname})) {
	650	unless(defined($opt->{rootname})) {
	651	$opt->{rootname} = '';
	652	}
	653	}
	654	else {
	655	$opt->{rootname} = $DefRootName;
	656	}
	657
	658	if($opt->{xmldecl} and $opt->{xmldecl} eq '1') {
	659	$opt->{xmldecl} = $DefXmlDecl;
	660	}
	661
	662	if(exists($opt->{contentkey})) {
	663	if($opt->{contentkey} =~ m{^-(.*)$}) {
	664	$opt->{contentkey} = $1;
	665	$opt->{collapseagain} = 1;
	666	}
	667	}
	668	else {
	669	$opt->{contentkey} = $DefContentKey;
	670	}
	671
	672	unless(exists($opt->{normalisespace})) {
	673	$opt->{normalisespace} = $opt->{normalizespace};
	674	}
	675	$opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
	676
	677	# Cleanups for values assumed to be arrays later
	678
	679	if($opt->{searchpath}) {
	680	unless(ref($opt->{searchpath})) {
	681	$opt->{searchpath} = [ $opt->{searchpath} ];
	682	}
	683	}
	684	else {
	685	$opt->{searchpath} = [ ];
	686	}
	687
	688	if($opt->{cache} and !ref($opt->{cache})) {
	689	$opt->{cache} = [ $opt->{cache} ];
	690	}
	691	if($opt->{cache}) {
	692	$_ = lc($_) foreach (@{$opt->{cache}});
	693	foreach my $scheme (@{$opt->{cache}}) {
	694	my $method = 'cache_read_' . $scheme;
	695	croak "Unsupported caching scheme: $scheme"
	696	unless($self->can($method));
	697	}
	698	}
	699
	700	if(exists($opt->{parseropts})) {
	701	if($^W) {
	702	carp "Warning: " .
	703	"'ParserOpts' is deprecated, contact the author if you need it";
	704	}
	705	}
	706	else {
	707	$opt->{parseropts} = [ ];
	708	}
	709
	710
	711	# Special cleanup for {forcearray} which could be regex, arrayref or boolean
	712	# or left to default to 0
	713
	714	if(exists($opt->{forcearray})) {
	715	if(ref($opt->{forcearray}) eq 'Regexp') {
	716	$opt->{forcearray} = [ $opt->{forcearray} ];
	717	}
	718
	719	if(ref($opt->{forcearray}) eq 'ARRAY') {
	720	my @force_list = @{$opt->{forcearray}};
	721	if(@force_list) {
	722	$opt->{forcearray} = {};
	723	foreach my $tag (@force_list) {
	724	if(ref($tag) eq 'Regexp') {
	725	push @{$opt->{forcearray}->{_regex}}, $tag;
	726	}
	727	else {
	728	$opt->{forcearray}->{$tag} = 1;
	729	}
	730	}
	731	}
	732	else {
	733	$opt->{forcearray} = 0;
	734	}
	735	}
	736	else {
	737	$opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
	738	}
	739	}
	740	else {
	741	if($StrictMode and $dirn eq 'in') {
	742	croak "No value specified for 'ForceArray' option in call to XML$dirn()";
	743	}
	744	$opt->{forcearray} = 0;
	745	}
	746
	747
	748	# Special cleanup for {keyattr} which could be arrayref or hashref or left
	749	# to default to arrayref
	750
	751	if(exists($opt->{keyattr})) {
	752	if(ref($opt->{keyattr})) {
	753	if(ref($opt->{keyattr}) eq 'HASH') {
	754
	755	# Make a copy so we can mess with it
	756
	757	$opt->{keyattr} = { %{$opt->{keyattr}} };
	758
	759
	760	# Convert keyattr => { elem => '+attr' }
	761	# to keyattr => { elem => [ 'attr', '+' ] }
	762
	763	foreach my $el (keys(%{$opt->{keyattr}})) {
	764	if($opt->{keyattr}->{$el} =~ /^(\+\|-)?(.*)$/) {
	765	$opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
	766	if($StrictMode and $dirn eq 'in') {
	767	next if($opt->{forcearray} == 1);
	768	next if(ref($opt->{forcearray}) eq 'HASH'
	769	and $opt->{forcearray}->{$el});
	770	croak "<$el> set in KeyAttr but not in ForceArray";
	771	}
	772	}
	773	else {
	774	delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
	775	}
	776	}
	777	}
	778	else {
	779	if(@{$opt->{keyattr}} == 0) {
	780	delete($opt->{keyattr});
	781	}
	782	}
	783	}
	784	else {
	785	$opt->{keyattr} = [ $opt->{keyattr} ];
	786	}
	787	}
	788	else {
	789	if($StrictMode) {
	790	croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
	791	}
	792	$opt->{keyattr} = [ @DefKeyAttr ];
	793	}
	794
	795
	796	# Special cleanup for {valueattr} which could be arrayref or hashref
	797
	798	if(exists($opt->{valueattr})) {
	799	if(ref($opt->{valueattr}) eq 'ARRAY') {
	800	$opt->{valueattrlist} = {};
	801	$opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
	802	}
	803	}
	804
	805	# make sure there's nothing weird in {grouptags}
	806
	807	if($opt->{grouptags}) {
	808	croak "Illegal value for 'GroupTags' option - expected a hashref"
	809	unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
	810
	811	while(my($key, $val) = each %{$opt->{grouptags}}) {
	812	next if $key ne $val;
	813	croak "Bad value in GroupTags: '$key' => '$val'";
	814	}
	815	}
	816
	817
	818	# Check the {variables} option is valid and initialise variables hash
	819
	820	if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
	821	croak "Illegal value for 'Variables' option - expected a hashref";
	822	}
	823
	824	if($opt->{variables}) {
	825	$self->{_var_values} = { %{$opt->{variables}} };
	826	}
	827	elsif($opt->{varattr}) {
	828	$self->{_var_values} = {};
	829	}
	830
	831	}
	832
	833
	834	##############################################################################
	835	# Method: find_xml_file()
	836	#
	837	# Helper routine for XMLin().
	838	# Takes a filename, and a list of directories, attempts to locate the file in
	839	# the directories listed.
	840	# Returns a full pathname on success; croaks on failure.
	841	#
	842
	843	sub find_xml_file {
	844	my $self = shift;
	845	my $file = shift;
	846	my @search_path = @_;
	847
	848
	849	my($filename, $filedir) =
	850	File::Basename::fileparse($file);
	851
	852	if($filename ne $file) { # Ignore searchpath if dir component
	853	return($file) if(-e $file);
	854	}
	855	else {
	856	my($path);
	857	foreach $path (@search_path) {
	858	my $fullpath = File::Spec->catfile($path, $file);
	859	return($fullpath) if(-e $fullpath);
	860	}
	861	}
	862
	863	# If user did not supply a search path, default to current directory
	864
	865	if(!@search_path) {
	866	return($file) if(-e $file);
	867	croak "File does not exist: $file";
	868	}
	869
	870	croak "Could not find $file in ", join(':', @search_path);
	871	}
	872
	873
	874	##############################################################################
	875	# Method: collapse()
	876	#
	877	# Helper routine for XMLin(). This routine really comprises the 'smarts' (or
	878	# value add) of this module.
	879	#
	880	# Takes the parse tree that XML::Parser produced from the supplied XML and
	881	# recurses through it 'collapsing' unnecessary levels of indirection (nested
	882	# arrays etc) to produce a data structure that is easier to work with.
	883	#
	884	# Elements in the original parser tree are represented as an element name
	885	# followed by an arrayref. The first element of the array is a hashref
	886	# containing the attributes. The rest of the array contains a list of any
	887	# nested elements as name+arrayref pairs:
	888	#
	889	# <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
	890	#
	891	# The special element name '0' (zero) flags text content.
	892	#
	893	# This routine cuts down the noise by discarding any text content consisting of
	894	# only whitespace and then moves the nested elements into the attribute hash
	895	# using the name of the nested element as the hash key and the collapsed
	896	# version of the nested element as the value. Multiple nested elements with
	897	# the same name will initially be represented as an arrayref, but this may be
	898	# 'folded' into a hashref depending on the value of the keyattr option.
	899	#
	900
	901	sub collapse {
	902	my $self = shift;
	903
	904
	905	# Start with the hash of attributes
	906
	907	my $attr = shift;
	908	if($self->{opt}->{noattr}) { # Discard if 'noattr' set
	909	$attr = {};
	910	}
	911	elsif($self->{opt}->{normalisespace} == 2) {
	912	while(my($key, $value) = each %$attr) {
	913	$attr->{$key} = $self->normalise_space($value)
	914	}
	915	}
	916
	917
	918	# Do variable substitutions
	919
	920	if(my $var = $self->{_var_values}) {
	921	while(my($key, $val) = each(%$attr)) {
	922	$val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge;
	923	$attr->{$key} = $val;
	924	}
	925	}
	926
	927
	928	# Roll up 'value' attributes (but only if no nested elements)
	929
	930	if(!@_ and keys %$attr == 1) {
	931	my($k) = keys %$attr;
	932	if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) {
	933	return $attr->{$k};
	934	}
	935	}
	936
	937
	938	# Add any nested elements
	939
	940	my($key, $val);
	941	while(@_) {
	942	$key = shift;
	943	$val = shift;
	944
	945	if(ref($val)) {
	946	$val = $self->collapse(@$val);
	947	next if(!defined($val) and $self->{opt}->{suppressempty});
	948	}
	949	elsif($key eq '0') {
	950	next if($val =~ m{^\s*$}s); # Skip all whitespace content
	951
	952	$val = $self->normalise_space($val)
	953	if($self->{opt}->{normalisespace} == 2);
	954
	955	# do variable substitutions
	956
	957	if(my $var = $self->{_var_values}) {
	958	$val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge;
	959	}
	960
	961
	962	# look for variable definitions
	963
	964	if(my $var = $self->{opt}->{varattr}) {
	965	if(exists $attr->{$var}) {
	966	$self->set_var($attr->{$var}, $val);
	967	}
	968	}
	969
	970
	971	# Collapse text content in element with no attributes to a string
	972
	973	if(!%$attr and !@_) {
	974	return($self->{opt}->{forcecontent} ?
	975	{ $self->{opt}->{contentkey} => $val } : $val
	976	);
	977	}
	978	$key = $self->{opt}->{contentkey};
	979	}
	980
	981
	982	# Combine duplicate attributes into arrayref if required
	983
	984	if(exists($attr->{$key})) {
	985	if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
	986	push(@{$attr->{$key}}, $val);
	987	}
	988	else {
	989	$attr->{$key} = [ $attr->{$key}, $val ];
	990	}
	991	}
	992	elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
	993	$attr->{$key} = [ $val ];
	994	}
	995	else {
	996	if( $key ne $self->{opt}->{contentkey}
	997	and (
	998	($self->{opt}->{forcearray} == 1)
	999	or (
	1000	(ref($self->{opt}->{forcearray}) eq 'HASH')
	1001	and (
	1002	$self->{opt}->{forcearray}->{$key}
	1003	or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
	1004	)
	1005	)
	1006	)
	1007	) {
	1008	$attr->{$key} = [ $val ];
	1009	}
	1010	else {
	1011	$attr->{$key} = $val;
	1012	}
	1013	}
	1014
	1015	}
	1016
	1017
	1018	# Turn arrayrefs into hashrefs if key fields present
	1019
	1020	if($self->{opt}->{keyattr}) {
	1021	while(($key,$val) = each %$attr) {
	1022	if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
	1023	$attr->{$key} = $self->array_to_hash($key, $val);
	1024	}
	1025	}
	1026	}
	1027
	1028
	1029	# disintermediate grouped tags
	1030
	1031	if($self->{opt}->{grouptags}) {
	1032	while(my($key, $val) = each(%$attr)) {
	1033	next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
	1034	next unless(exists($self->{opt}->{grouptags}->{$key}));
	1035
	1036	my($child_key, $child_val) = %$val;
	1037
	1038	if($self->{opt}->{grouptags}->{$key} eq $child_key) {
	1039	$attr->{$key}= $child_val;
	1040	}
	1041	}
	1042	}
	1043
	1044
	1045	# Fold hashes containing a single anonymous array up into just the array
	1046
	1047	my $count = scalar keys %$attr;
	1048	if($count == 1
	1049	and exists $attr->{anon}
	1050	and UNIVERSAL::isa($attr->{anon}, 'ARRAY')
	1051	) {
	1052	return($attr->{anon});
	1053	}
	1054
	1055
	1056	# Do the right thing if hash is empty, otherwise just return it
	1057
	1058	if(!%$attr and exists($self->{opt}->{suppressempty})) {
	1059	if(defined($self->{opt}->{suppressempty}) and
	1060	$self->{opt}->{suppressempty} eq '') {
	1061	return('');
	1062	}
	1063	return(undef);
	1064	}
	1065
	1066
	1067	# Roll up named elements with named nested 'value' attributes
	1068
	1069	if($self->{opt}->{valueattr}) {
	1070	while(my($key, $val) = each(%$attr)) {
	1071	next unless($self->{opt}->{valueattr}->{$key});
	1072	next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
	1073	my($k) = keys %$val;
	1074	next unless($k eq $self->{opt}->{valueattr}->{$key});
	1075	$attr->{$key} = $val->{$k};
	1076	}
	1077	}
	1078
	1079	return($attr)
	1080
	1081	}
	1082
	1083
	1084	##############################################################################
	1085	# Method: set_var()
	1086	#
	1087	# Called when a variable definition is encountered in the XML. (A variable
	1088	# definition looks like <element attrname="name">value</element> where attrname
	1089	# matches the varattr setting).
	1090	#
	1091
	1092	sub set_var {
	1093	my($self, $name, $value) = @_;
	1094
	1095	$self->{_var_values}->{$name} = $value;
	1096	}
	1097
	1098
	1099	##############################################################################
	1100	# Method: get_var()
	1101	#
	1102	# Called during variable substitution to get the value for the named variable.
	1103	#
	1104
	1105	sub get_var {
	1106	my($self, $name) = @_;
	1107
	1108	my $value = $self->{_var_values}->{$name};
	1109	return $value if(defined($value));
	1110
	1111	return '${' . $name . '}';
	1112	}
	1113
	1114
	1115	##############################################################################
	1116	# Method: normalise_space()
	1117	#
	1118	# Strips leading and trailing whitespace and collapses sequences of whitespace
	1119	# characters to a single space.
	1120	#
	1121
	1122	sub normalise_space {
	1123	my($self, $text) = @_;
	1124
	1125	$text =~ s/^\s+//s;
	1126	$text =~ s/\s+$//s;
	1127	$text =~ s/\s\s+/ /sg;
	1128
	1129	return $text;
	1130	}
	1131
	1132
	1133	##############################################################################
	1134	# Method: array_to_hash()
	1135	#
	1136	# Helper routine for collapse().
	1137	# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a
	1138	# reference to the hash on success or the original array if folding is
	1139	# not possible. Behaviour is controlled by 'keyattr' option.
	1140	#
	1141
	1142	sub array_to_hash {
	1143	my $self = shift;
	1144	my $name = shift;
	1145	my $arrayref = shift;
	1146
	1147	my $hashref = $self->new_hashref;
	1148
	1149	my($i, $key, $val, $flag);
	1150
	1151
	1152	# Handle keyattr => { .... }
	1153
	1154	if(ref($self->{opt}->{keyattr}) eq 'HASH') {
	1155	return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
	1156	($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
	1157	for($i = 0; $i < @$arrayref; $i++) {
	1158	if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
	1159	exists($arrayref->[$i]->{$key})
	1160	) {
	1161	$val = $arrayref->[$i]->{$key};
	1162	if(ref($val)) {
	1163	if($StrictMode) {
	1164	croak "<$name> element has non-scalar '$key' key attribute";
	1165	}
	1166	if($^W) {
	1167	carp "Warning: <$name> element has non-scalar '$key' key attribute";
	1168	}
	1169	return($arrayref);
	1170	}
	1171	$val = $self->normalise_space($val)
	1172	if($self->{opt}->{normalisespace} == 1);
	1173	$hashref->{$val} = { %{$arrayref->[$i]} };
	1174	$hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
	1175	delete $hashref->{$val}->{$key} unless($flag eq '+');
	1176	}
	1177	else {
	1178	croak "<$name> element has no '$key' key attribute" if($StrictMode);
	1179	carp "Warning: <$name> element has no '$key' key attribute" if($^W);
	1180	return($arrayref);
	1181	}
	1182	}
	1183	}
	1184
	1185
	1186	# Or assume keyattr => [ .... ]
	1187
	1188	else {
	1189	ELEMENT: for($i = 0; $i < @$arrayref; $i++) {
	1190	return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
	1191
	1192	foreach $key (@{$self->{opt}->{keyattr}}) {
	1193	if(defined($arrayref->[$i]->{$key})) {
	1194	$val = $arrayref->[$i]->{$key};
	1195	return($arrayref) if(ref($val));
	1196	$val = $self->normalise_space($val)
	1197	if($self->{opt}->{normalisespace} == 1);
	1198	$hashref->{$val} = { %{$arrayref->[$i]} };
	1199	delete $hashref->{$val}->{$key};
	1200	next ELEMENT;
	1201	}
	1202	}
	1203
	1204	return($arrayref); # No keyfield matched
	1205	}
	1206	}
	1207
	1208	# collapse any hashes which now only have a 'content' key
	1209
	1210	if($self->{opt}->{collapseagain}) {
	1211	$hashref = $self->collapse_content($hashref);
	1212	}
	1213
	1214	return($hashref);
	1215	}
	1216
	1217
	1218	##############################################################################
	1219	# Method: new_hashref()
	1220	#
	1221	# This is a hook routine for overriding in a sub-class. Some people believe
	1222	# that using Tie::IxHash here will solve order-loss problems.
	1223	#
	1224
	1225	sub new_hashref {
	1226	my $self = shift;
	1227
	1228	return { @_ };
	1229	}
	1230
	1231
	1232	##############################################################################
	1233	# Method: collapse_content()
	1234	#
	1235	# Helper routine for array_to_hash
	1236	#
	1237	# Arguments expected are:
	1238	# - an XML::Simple object
	1239	# - a hasref
	1240	# the hashref is a former array, turned into a hash by array_to_hash because
	1241	# of the presence of key attributes
	1242	# at this point collapse_content avoids over-complicated structures like
	1243	# dir => { libexecdir => { content => '$exec_prefix/libexec' },
	1244	# localstatedir => { content => '$prefix' },
	1245	# }
	1246	# into
	1247	# dir => { libexecdir => '$exec_prefix/libexec',
	1248	# localstatedir => '$prefix',
	1249	# }
	1250
	1251	sub collapse_content {
	1252	my $self = shift;
	1253	my $hashref = shift;
	1254
	1255	my $contentkey = $self->{opt}->{contentkey};
	1256
	1257	# first go through the values,checking that they are fit to collapse
	1258	foreach my $val (values %$hashref) {
	1259	return $hashref unless ( (ref($val) eq 'HASH')
	1260	and (keys %$val == 1)
	1261	and (exists $val->{$contentkey})
	1262	);
	1263	}
	1264
	1265	# now collapse them
	1266	foreach my $key (keys %$hashref) {
	1267	$hashref->{$key}= $hashref->{$key}->{$contentkey};
	1268	}
	1269
	1270	return $hashref;
	1271	}
	1272
	1273
	1274	##############################################################################
	1275	# Method: value_to_xml()
	1276	#
	1277	# Helper routine for XMLout() - recurses through a data structure building up
	1278	# and returning an XML representation of that structure as a string.
	1279	#
	1280	# Arguments expected are:
	1281	# - the data structure to be encoded (usually a reference)
	1282	# - the XML tag name to use for this item
	1283	# - a string of spaces for use as the current indent level
	1284	#
	1285
	1286	sub value_to_xml {
	1287	my $self = shift;;
	1288
	1289
	1290	# Grab the other arguments
	1291
	1292	my($ref, $name, $indent) = @_;
	1293
	1294	my $named = (defined($name) and $name ne '' ? 1 : 0);
	1295
	1296	my $nl = "\n";
	1297
	1298	my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack!
	1299	if($self->{opt}->{noindent}) {
	1300	$indent = '';
	1301	$nl = '';
	1302	}
	1303
	1304
	1305	# Convert to XML
	1306
	1307	if(ref($ref)) {
	1308	croak "circular data structures not supported"
	1309	if(grep($_ == $ref, @{$self->{_ancestors}}));
	1310	push @{$self->{_ancestors}}, $ref;
	1311	}
	1312	else {
	1313	if($named) {
	1314	return(join('',
	1315	$indent, '<', $name, '>',
	1316	($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
	1317	'</', $name, ">", $nl
	1318	));
	1319	}
	1320	else {
	1321	return("$ref$nl");
	1322	}
	1323	}
	1324
	1325
	1326	# Unfold hash to array if possible
	1327
	1328	if(UNIVERSAL::isa($ref, 'HASH') # It is a hash
	1329	and keys %$ref # and it's not empty
	1330	and $self->{opt}->{keyattr} # and folding is enabled
	1331	and !$is_root # and its not the root element
	1332	) {
	1333	$ref = $self->hash_to_array($name, $ref);
	1334	}
	1335
	1336
	1337	my @result = ();
	1338	my($key, $value);
	1339
	1340
	1341	# Handle hashrefs
	1342
	1343	if(UNIVERSAL::isa($ref, 'HASH')) {
	1344
	1345	# Reintermediate grouped values if applicable
	1346
	1347	if($self->{opt}->{grouptags}) {
	1348	$ref = $self->copy_hash($ref);
	1349	while(my($key, $val) = each %$ref) {
	1350	if($self->{opt}->{grouptags}->{$key}) {
	1351	$ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val };
	1352	}
	1353	}
	1354	}
	1355
	1356
	1357	# Scan for namespace declaration attributes
	1358
	1359	my $nsdecls = '';
	1360	my $default_ns_uri;
	1361	if($self->{nsup}) {
	1362	$ref = $self->copy_hash($ref);
	1363	$self->{nsup}->push_context();
	1364
	1365	# Look for default namespace declaration first
	1366
	1367	if(exists($ref->{xmlns})) {
	1368	$self->{nsup}->declare_prefix('', $ref->{xmlns});
	1369	$nsdecls .= qq( xmlns="$ref->{xmlns}");
	1370	delete($ref->{xmlns});
	1371	}
	1372	$default_ns_uri = $self->{nsup}->get_uri('');
	1373
	1374
	1375	# Then check all the other keys
	1376
	1377	foreach my $qname (keys(%$ref)) {
	1378	my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
	1379	if($uri) {
	1380	if($uri eq $xmlns_ns) {
	1381	$self->{nsup}->declare_prefix($lname, $ref->{$qname});
	1382	$nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
	1383	delete($ref->{$qname});
	1384	}
	1385	}
	1386	}
	1387
	1388	# Translate any remaining Clarkian names
	1389
	1390	foreach my $qname (keys(%$ref)) {
	1391	my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
	1392	if($uri) {
	1393	if($default_ns_uri and $uri eq $default_ns_uri) {
	1394	$ref->{$lname} = $ref->{$qname};
	1395	delete($ref->{$qname});
	1396	}
	1397	else {
	1398	my $prefix = $self->{nsup}->get_prefix($uri);
	1399	unless($prefix) {
	1400	# $self->{nsup}->declare_prefix(undef, $uri);
	1401	# $prefix = $self->{nsup}->get_prefix($uri);
	1402	$prefix = $self->{ns_prefix}++;
	1403	$self->{nsup}->declare_prefix($prefix, $uri);
	1404	$nsdecls .= qq( xmlns:$prefix="$uri");
	1405	}
	1406	$ref->{"$prefix:$lname"} = $ref->{$qname};
	1407	delete($ref->{$qname});
	1408	}
	1409	}
	1410	}
	1411	}
	1412
	1413
	1414	my @nested = ();
	1415	my $text_content = undef;
	1416	if($named) {
	1417	push @result, $indent, '<', $name, $nsdecls;
	1418	}
	1419
	1420	if(keys %$ref) {
	1421	my $first_arg = 1;
	1422	foreach my $key ($self->sorted_keys($name, $ref)) {
	1423	my $value = $ref->{$key};
	1424	next if(substr($key, 0, 1) eq '-');
	1425	if(!defined($value)) {
	1426	next if $self->{opt}->{suppressempty};
	1427	unless(exists($self->{opt}->{suppressempty})
	1428	and !defined($self->{opt}->{suppressempty})
	1429	) {
	1430	carp 'Use of uninitialized value' if($^W);
	1431	}
	1432	if($key eq $self->{opt}->{contentkey}) {
	1433	$text_content = '';
	1434	}
	1435	else {
	1436	$value = exists($self->{opt}->{suppressempty}) ? {} : '';
	1437	}
	1438	}
	1439
	1440	if(!ref($value)
	1441	and $self->{opt}->{valueattr}
	1442	and $self->{opt}->{valueattr}->{$key}
	1443	) {
	1444	$value = { $self->{opt}->{valueattr}->{$key} => $value };
	1445	}
	1446
	1447	if(ref($value) or $self->{opt}->{noattr}) {
	1448	push @nested,
	1449	$self->value_to_xml($value, $key, "$indent ");
	1450	}
	1451	else {
	1452	$value = $self->escape_value($value) unless($self->{opt}->{noescape});
	1453	if($key eq $self->{opt}->{contentkey}) {
	1454	$text_content = $value;
	1455	}
	1456	else {
	1457	push @result, "\n$indent " . ' ' x length($name)
	1458	if($self->{opt}->{attrindent} and !$first_arg);
	1459	push @result, ' ', $key, '="', $value , '"';
	1460	$first_arg = 0;
	1461	}
	1462	}
	1463	}
	1464	}
	1465	else {
	1466	$text_content = '';
	1467	}
	1468
	1469	if(@nested or defined($text_content)) {
	1470	if($named) {
	1471	push @result, ">";
	1472	if(defined($text_content)) {
	1473	push @result, $text_content;
	1474	$nested[0] =~ s/^\s+// if(@nested);
	1475	}
	1476	else {
	1477	push @result, $nl;
	1478	}
	1479	if(@nested) {
	1480	push @result, @nested, $indent;
	1481	}
	1482	push @result, '</', $name, ">", $nl;
	1483	}
	1484	else {
	1485	push @result, @nested; # Special case if no root elements
	1486	}
	1487	}
	1488	else {
	1489	push @result, " />", $nl;
	1490	}
	1491	$self->{nsup}->pop_context() if($self->{nsup});
	1492	}
	1493
	1494
	1495	# Handle arrayrefs
	1496
	1497	elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
	1498	foreach $value (@$ref) {
	1499	next if !defined($value) and $self->{opt}->{suppressempty};
	1500	if(!ref($value)) {
	1501	push @result,
	1502	$indent, '<', $name, '>',
	1503	($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
	1504	'</', $name, ">$nl";
	1505	}
	1506	elsif(UNIVERSAL::isa($value, 'HASH')) {
	1507	push @result, $self->value_to_xml($value, $name, $indent);
	1508	}
	1509	else {
	1510	push @result,
	1511	$indent, '<', $name, ">$nl",
	1512	$self->value_to_xml($value, 'anon', "$indent "),
	1513	$indent, '</', $name, ">$nl";
	1514	}
	1515	}
	1516	}
	1517
	1518	else {
	1519	croak "Can't encode a value of type: " . ref($ref);
	1520	}
	1521
	1522
	1523	pop @{$self->{_ancestors}} if(ref($ref));
	1524
	1525	return(join('', @result));
	1526	}
	1527
	1528
	1529	##############################################################################
	1530	# Method: sorted_keys()
	1531	#
	1532	# Returns the keys of the referenced hash sorted into alphabetical order, but
	1533	# with the 'key' key (as in KeyAttr) first, if there is one.
	1534	#
	1535
	1536	sub sorted_keys {
	1537	my($self, $name, $ref) = @_;
	1538
	1539	return keys %$ref if $self->{opt}->{nosort};
	1540
	1541	my %hash = %$ref;
	1542	my $keyattr = $self->{opt}->{keyattr};
	1543
	1544	my @key;
	1545
	1546	if(ref $keyattr eq 'HASH') {
	1547	if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
	1548	push @key, $keyattr->{$name}->[0];
	1549	delete $hash{$keyattr->{$name}->[0]};
	1550	}
	1551	}
	1552	elsif(ref $keyattr eq 'ARRAY') {
	1553	foreach (@{$keyattr}) {
	1554	if(exists $hash{$_}) {
	1555	push @key, $_;
	1556	delete $hash{$_};
	1557	last;
	1558	}
	1559	}
	1560	}
	1561
	1562	return(@key, sort keys %hash);
	1563	}
	1564
	1565	##############################################################################
	1566	# Method: escape_value()
	1567	#
	1568	# Helper routine for automatically escaping values for XMLout().
	1569	# Expects a scalar data value. Returns escaped version.
	1570	#
	1571
	1572	sub escape_value {
	1573	my($self, $data) = @_;
	1574
	1575	return '' unless(defined($data));
	1576
	1577	$data =~ s/&/&/sg;
	1578	$data =~ s/</</sg;
	1579	$data =~ s/>/>/sg;
	1580	$data =~ s/"/"/sg;
	1581
	1582	my $level = $self->{opt}->{numericescape} or return $data;
	1583
	1584	return $self->numeric_escape($data, $level);
	1585	}
	1586
	1587	sub numeric_escape {
	1588	my($self, $data, $level) = @_;
	1589
	1590	use utf8; # required for 5.6
	1591
	1592	if($self->{opt}->{numericescape} eq '2') {
	1593	$data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
	1594	}
	1595	else {
	1596	$data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
	1597	}
	1598
	1599	return $data;
	1600	}
	1601
	1602
	1603	##############################################################################
	1604	# Method: hash_to_array()
	1605	#
	1606	# Helper routine for value_to_xml().
	1607	# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a
	1608	# reference to the array on success or the original hash if unfolding is
	1609	# not possible.
	1610	#
	1611
	1612	sub hash_to_array {
	1613	my $self = shift;
	1614	my $parent = shift;
	1615	my $hashref = shift;
	1616
	1617	my $arrayref = [];
	1618
	1619	my($key, $value);
	1620
	1621	my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
	1622	foreach $key (@keys) {
	1623	$value = $hashref->{$key};
	1624	return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
	1625
	1626	if(ref($self->{opt}->{keyattr}) eq 'HASH') {
	1627	return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
	1628	push @$arrayref, $self->copy_hash(
	1629	$value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
	1630	);
	1631	}
	1632	else {
	1633	push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
	1634	}
	1635	}
	1636
	1637	return($arrayref);
	1638	}
	1639
	1640
	1641	##############################################################################
	1642	# Method: copy_hash()
	1643	#
	1644	# Helper routine for hash_to_array(). When unfolding a hash of hashes into
	1645	# an array of hashes, we need to copy the key from the outer hash into the
	1646	# inner hash. This routine makes a copy of the original hash so we don't
	1647	# destroy the original data structure. You might wish to override this
	1648	# method if you're using tied hashes and don't want them to get untied.
	1649	#
	1650
	1651	sub copy_hash {
	1652	my($self, $orig, @extra) = @_;
	1653
	1654	return { @extra, %$orig };
	1655	}
	1656
	1657	##############################################################################
	1658	# Methods required for building trees from SAX events
	1659	##############################################################################
	1660
	1661	sub start_document {
	1662	my $self = shift;
	1663
	1664	$self->handle_options('in') unless($self->{opt});
	1665
	1666	$self->{lists} = [];
	1667	$self->{curlist} = $self->{tree} = [];
	1668	}
	1669
	1670
	1671	sub start_element {
	1672	my $self = shift;
	1673	my $element = shift;
	1674
	1675	my $name = $element->{Name};
	1676	if($self->{opt}->{nsexpand}) {
	1677	$name = $element->{LocalName} \|\| '';
	1678	if($element->{NamespaceURI}) {
	1679	$name = '{' . $element->{NamespaceURI} . '}' . $name;
	1680	}
	1681	}
	1682	my $attributes = {};
	1683	if($element->{Attributes}) { # Might be undef
	1684	foreach my $attr (values %{$element->{Attributes}}) {
	1685	if($self->{opt}->{nsexpand}) {
	1686	my $name = $attr->{LocalName} \|\| '';
	1687	if($attr->{NamespaceURI}) {
	1688	$name = '{' . $attr->{NamespaceURI} . '}' . $name
	1689	}
	1690	$name = 'xmlns' if($name eq $bad_def_ns_jcn);
	1691	$attributes->{$name} = $attr->{Value};
	1692	}
	1693	else {
	1694	$attributes->{$attr->{Name}} = $attr->{Value};
	1695	}
	1696	}
	1697	}
	1698	my $newlist = [ $attributes ];
	1699	push @{ $self->{lists} }, $self->{curlist};
	1700	push @{ $self->{curlist} }, $name => $newlist;
	1701	$self->{curlist} = $newlist;
	1702	}
	1703
	1704
	1705	sub characters {
	1706	my $self = shift;
	1707	my $chars = shift;
	1708
	1709	my $text = $chars->{Data};
	1710	my $clist = $self->{curlist};
	1711	my $pos = $#$clist;
	1712
	1713	if ($pos > 0 and $clist->[$pos - 1] eq '0') {
	1714	$clist->[$pos] .= $text;
	1715	}
	1716	else {
	1717	push @$clist, 0 => $text;
	1718	}
	1719	}
	1720
	1721
	1722	sub end_element {
	1723	my $self = shift;
	1724
	1725	$self->{curlist} = pop @{ $self->{lists} };
	1726	}
	1727
	1728
	1729	sub end_document {
	1730	my $self = shift;
	1731
	1732	delete($self->{curlist});
	1733	delete($self->{lists});
	1734
	1735	my $tree = $self->{tree};
	1736	delete($self->{tree});
	1737
	1738
	1739	# Return tree as-is to XMLin()
	1740
	1741	return($tree) if($self->{nocollapse});
	1742
	1743
	1744	# Or collapse it before returning it to SAX parser class
	1745
	1746	if($self->{opt}->{keeproot}) {
	1747	$tree = $self->collapse({}, @$tree);
	1748	}
	1749	else {
	1750	$tree = $self->collapse(@{$tree->[1]});
	1751	}
	1752
	1753	if($self->{opt}->{datahandler}) {
	1754	return($self->{opt}->{datahandler}->($self, $tree));
	1755	}
	1756
	1757	return($tree);
	1758	}
	1759
	1760	*xml_in = \&XMLin;
	1761	*xml_out = \&XMLout;
	1762
	1763	1;
	1764
	1765	__END__
	1766
	1767	=head1 QUICK START
	1768
	1769	Say you have a script called B<foo> and a file of configuration options
	1770	called B<foo.xml> containing this:
	1771
	1772	<config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
	1773	<server name="sahara" osname="solaris" osversion="2.6">
	1774	<address>10.0.0.101</address>
	1775	<address>10.0.1.101</address>
	1776	</server>
	1777	<server name="gobi" osname="irix" osversion="6.5">
	1778	<address>10.0.0.102</address>
	1779	</server>
	1780	<server name="kalahari" osname="linux" osversion="2.0.34">
	1781	<address>10.0.0.103</address>
	1782	<address>10.0.1.103</address>
	1783	</server>
	1784	</config>
	1785
	1786	The following lines of code in B<foo>:
	1787
	1788	use XML::Simple;
	1789
	1790	my $config = XMLin();
	1791
	1792	will 'slurp' the configuration options into the hashref $config (because no
	1793	arguments are passed to C<XMLin()> the name and location of the XML file will
	1794	be inferred from name and location of the script). You can dump out the
	1795	contents of the hashref using Data::Dumper:
	1796
	1797	use Data::Dumper;
	1798
	1799	print Dumper($config);
	1800
	1801	which will produce something like this (formatting has been adjusted for
	1802	brevity):
	1803
	1804	{
	1805	'logdir' => '/var/log/foo/',
	1806	'debugfile' => '/tmp/foo.debug',
	1807	'server' => {
	1808	'sahara' => {
	1809	'osversion' => '2.6',
	1810	'osname' => 'solaris',
	1811	'address' => [ '10.0.0.101', '10.0.1.101' ]
	1812	},
	1813	'gobi' => {
	1814	'osversion' => '6.5',
	1815	'osname' => 'irix',
	1816	'address' => '10.0.0.102'
	1817	},
	1818	'kalahari' => {
	1819	'osversion' => '2.0.34',
	1820	'osname' => 'linux',
	1821	'address' => [ '10.0.0.103', '10.0.1.103' ]
	1822	}
	1823	}
	1824	}
	1825
	1826	Your script could then access the name of the log directory like this:
	1827
	1828	print $config->{logdir};
	1829
	1830	similarly, the second address on the server 'kalahari' could be referenced as:
	1831
	1832	print $config->{server}->{kalahari}->{address}->[1];
	1833
	1834	What could be simpler? (Rhetorical).
	1835
	1836	For simple requirements, that's really all there is to it. If you want to
	1837	store your XML in a different directory or file, or pass it in as a string or
	1838	even pass it in via some derivative of an IO::Handle, you'll need to check out
	1839	L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that
	1840	neat little transformation that produced $config->{server}) you'll find options
	1841	for that as well.
	1842
	1843	If you want to generate XML (for example to write a modified version of
	1844	$config back out as XML), check out C<XMLout()>.
	1845
	1846	If your needs are not so simple, this may not be the module for you. In that
	1847	case, you might want to read L<"WHERE TO FROM HERE?">.
	1848
	1849	=head1 DESCRIPTION
	1850
	1851	The XML::Simple module provides a simple API layer on top of an underlying XML
	1852	parsing module (either XML::Parser or one of the SAX2 parser modules). Two
	1853	functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity
	1854	request the lower case versions of the function names: C<xml_in()> and
	1855	C<xml_out()>.
	1856
	1857	The simplest approach is to call these two functions directly, but an
	1858	optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
	1859	allows them to be called as methods of an B<XML::Simple> object. The object
	1860	interface can also be used at either end of a SAX pipeline.
	1861
	1862	=head2 XMLin()
	1863
	1864	Parses XML formatted data and returns a reference to a data structure which
	1865	contains the same information in a more readily accessible form. (Skip
	1866	down to L<"EXAMPLES"> below, for more sample code).
	1867
	1868	C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
	1869	value' option pairs. The XML specifier can be one of the following:
	1870
	1871	=over 4
	1872
	1873	=item A filename
	1874
	1875	If the filename contains no directory components C<XMLin()> will look for the
	1876	file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
	1877	current directory if the SearchPath option is not defined. eg:
	1878
	1879	$ref = XMLin('/etc/params.xml');
	1880
	1881	Note, the filename '-' can be used to parse from STDIN.
	1882
	1883	=item undef
	1884
	1885	If there is no XML specifier, C<XMLin()> will check the script directory and
	1886	each of the SearchPath directories for a file with the same name as the script
	1887	but with the extension '.xml'. Note: if you wish to specify options, you
	1888	must specify the value 'undef'. eg:
	1889
	1890	$ref = XMLin(undef, ForceArray => 1);
	1891
	1892	=item A string of XML
	1893
	1894	A string containing XML (recognised by the presence of '<' and '>' characters)
	1895	will be parsed directly. eg:
	1896
	1897	$ref = XMLin('<opt username="bob" password="flurp" />');
	1898
	1899	=item An IO::Handle object
	1900
	1901	An IO::Handle object will be read to EOF and its contents parsed. eg:
	1902
	1903	$fh = IO::File->new('/etc/params.xml');
	1904	$ref = XMLin($fh);
	1905
	1906	=back
	1907
	1908	=head2 XMLout()
	1909
	1910	Takes a data structure (generally a hashref) and returns an XML encoding of
	1911	that structure. If the resulting XML is parsed using C<XMLin()>, it should
	1912	return a data structure equivalent to the original (see caveats below).
	1913
	1914	The C<XMLout()> function can also be used to output the XML as SAX events
	1915	see the C<Handler> option and L<"SAX SUPPORT"> for more details).
	1916
	1917	When translating hashes to XML, hash keys which have a leading '-' will be
	1918	silently skipped. This is the approved method for marking elements of a
	1919	data structure which should be ignored by C<XMLout>. (Note: If these items
	1920	were not skipped the key names would be emitted as element or attribute names
	1921	with a leading '-' which would not be valid XML).
	1922
	1923	=head2 Caveats
	1924
	1925	Some care is required in creating data structures which will be passed to
	1926	C<XMLout()>. Hash keys from the data structure will be encoded as either XML
	1927	element names or attribute names. Therefore, you should use hash key names
	1928	which conform to the relatively strict XML naming rules:
	1929
	1930	Names in XML must begin with a letter. The remaining characters may be
	1931	letters, digits, hyphens (-), underscores (_) or full stops (.). It is also
	1932	allowable to include one colon (:) in an element name but this should only be
	1933	used when working with namespaces (B<XML::Simple> can only usefully work with
	1934	namespaces when teamed with a SAX Parser).
	1935
	1936	You can use other punctuation characters in hash values (just not in hash
	1937	keys) however B<XML::Simple> does not support dumping binary data.
	1938
	1939	If you break these rules, the current implementation of C<XMLout()> will
	1940	simply emit non-compliant XML which will be rejected if you try to read it
	1941	back in. (A later version of B<XML::Simple> might take a more proactive
	1942	approach).
	1943
	1944	Note also that although you can nest hashes and arrays to arbitrary levels,
	1945	circular data structures are not supported and will cause C<XMLout()> to die.
	1946
	1947	If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
	1948	to Perl, then you should probably disable array folding (using the KeyAttr
	1949	option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the
	1950	expected results, you may prefer to use L<XML::Dumper> which is designed for
	1951	exactly that purpose.
	1952
	1953	Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
	1954
	1955
	1956	=head1 OPTIONS
	1957
	1958	B<XML::Simple> supports a number of options (in fact as each release of
	1959	B<XML::Simple> adds more options, the module's claim to the name 'Simple'
	1960	becomes increasingly tenuous). If you find yourself repeatedly having to
	1961	specify the same options, you might like to investigate L<"OPTIONAL OO
	1962	INTERFACE"> below.
	1963
	1964	If you can't be bothered reading the documentation, refer to
	1965	L<"STRICT MODE"> to automatically catch common mistakes.
	1966
	1967	Because there are so many options, it's hard for new users to know which ones
	1968	are important, so here are the two you really need to know about:
	1969
	1970	=over 4
	1971
	1972	=item *
	1973
	1974	check out C<ForceArray> because you'll almost certainly want to turn it on
	1975
	1976	=item *
	1977
	1978	make sure you know what the C<KeyAttr> option does and what its default value is
	1979	because it may surprise you otherwise (note in particular that 'KeyAttr'
	1980	affects both C<XMLin> and C<XMLout>)
	1981
	1982	=back
	1983
	1984	The option name headings below have a trailing 'comment' - a hash followed by
	1985	two pieces of metadata:
	1986
	1987	=over 4
	1988
	1989	=item *
	1990
	1991	Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
	1992	'I<out>' if they are recognised by C<XMLout()>.
	1993
	1994	=item *
	1995
	1996	Each option is also flagged to indicate whether it is:
	1997
	1998	'important' - don't use the module until you understand this one
	1999	'handy' - you can skip this on the first time through
	2000	'advanced' - you can skip this on the second time through
	2001	'SAX only' - don't worry about this unless you're using SAX (or
	2002	alternatively if you need this, you also need SAX)
	2003	'seldom used' - you'll probably never use this unless you were the
	2004	person that requested the feature
	2005
	2006	=back
	2007
	2008	The options are listed alphabetically:
	2009
	2010	Note: option names are no longer case sensitive so you can use the mixed case
	2011	versions shown here; all lower case as required by versions 2.03 and earlier;
	2012	or you can add underscores between the words (eg: key_attr).
	2013
	2014
	2015	=head2 AttrIndent => 1 I<# out - handy>
	2016
	2017	When you are using C<XMLout()>, enable this option to have attributes printed
	2018	one-per-line with sensible indentation rather than all on one line.
	2019
	2020	=head2 Cache => [ cache schemes ] I<# in - advanced>
	2021
	2022	Because loading the B<XML::Parser> module and parsing an XML file can consume a
	2023	significant number of CPU cycles, it is often desirable to cache the output of
	2024	C<XMLin()> for later reuse.
	2025
	2026	When parsing from a named file, B<XML::Simple> supports a number of caching
	2027	schemes. The 'Cache' option may be used to specify one or more schemes (using
	2028	an anonymous array). Each scheme will be tried in turn in the hope of finding
	2029	a cached pre-parsed representation of the XML file. If no cached copy is
	2030	found, the file will be parsed and the first cache scheme in the list will be
	2031	used to save a copy of the results. The following cache schemes have been
	2032	implemented:
	2033
	2034	=over 4
	2035
	2036	=item storable
	2037
	2038	Utilises B<Storable.pm> to read/write a cache file with the same name as the
	2039	XML file but with the extension .stor
	2040
	2041	=item memshare
	2042
	2043	When a file is first parsed, a copy of the resulting data structure is retained
	2044	in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse
	2045	the same file will return a reference to this structure. This cached version
	2046	will persist only for the life of the Perl interpreter (which in the case of
	2047	mod_perl for example, may be some significant time).
	2048
	2049	Because each caller receives a reference to the same data structure, a change
	2050	made by one caller will be visible to all. For this reason, the reference
	2051	returned should be treated as read-only.
	2052
	2053	=item memcopy
	2054
	2055	This scheme works identically to 'memshare' (above) except that each caller
	2056	receives a reference to a new data structure which is a copy of the cached
	2057	version. Copying the data structure will add a little processing overhead,
	2058	therefore this scheme should only be used where the caller intends to modify
	2059	the data structure (or wishes to protect itself from others who might). This
	2060	scheme uses B<Storable.pm> to perform the copy.
	2061
	2062	=back
	2063
	2064	Warning! The memory-based caching schemes compare the timestamp on the file to
	2065	the time when it was last parsed. If the file is stored on an NFS filesystem
	2066	(or other network share) and the clock on the file server is not exactly
	2067	synchronised with the clock where your script is run, updates to the source XML
	2068	file may appear to be ignored.
	2069
	2070	=head2 ContentKey => 'keyname' I<# in+out - seldom used>
	2071
	2072	When text content is parsed to a hash value, this option let's you specify a
	2073	name for the hash key to override the default 'content'. So for example:
	2074
	2075	XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
	2076
	2077	will parse to:
	2078
	2079	{ 'one' => 1, 'text' => 'Text' }
	2080
	2081	instead of:
	2082
	2083	{ 'one' => 1, 'content' => 'Text' }
	2084
	2085	C<XMLout()> will also honour the value of this option when converting a hashref
	2086	to XML.
	2087
	2088	You can also prefix your selected key name with a '-' character to have
	2089	C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
	2090	array folding. For example:
	2091
	2092	XMLin(
	2093	'<opt><item name="one">First</item><item name="two">Second</item></opt>',
	2094	KeyAttr => {item => 'name'},
	2095	ForceArray => [ 'item' ],
	2096	ContentKey => '-content'
	2097	)
	2098
	2099	will parse to:
	2100
	2101	{
	2102	'item' => {
	2103	'one' => 'First'
	2104	'two' => 'Second'
	2105	}
	2106	}
	2107
	2108	rather than this (without the '-'):
	2109
	2110	{
	2111	'item' => {
	2112	'one' => { 'content' => 'First' }
	2113	'two' => { 'content' => 'Second' }
	2114	}
	2115	}
	2116
	2117	=head2 DataHandler => code_ref I<# in - SAX only>
	2118
	2119	When you use an B<XML::Simple> object as a SAX handler, it will return a
	2120	'simple tree' data structure in the same format as C<XMLin()> would return. If
	2121	this option is set (to a subroutine reference), then when the tree is built the
	2122	subroutine will be called and passed two arguments: a reference to the
	2123	B<XML::Simple> object and a reference to the data tree. The return value from
	2124	the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for
	2125	more details).
	2126
	2127	=head2 ForceArray => 1 I<# in - important>
	2128
	2129	This option should be set to '1' to force nested elements to be represented
	2130	as arrays even when there is only one. Eg, with ForceArray enabled, this
	2131	XML:
	2132
	2133	<opt>
	2134	<name>value</name>
	2135	</opt>
	2136
	2137	would parse to this:
	2138
	2139	{
	2140	'name' => [
	2141	'value'
	2142	]
	2143	}
	2144
	2145	instead of this (the default):
	2146
	2147	{
	2148	'name' => 'value'
	2149	}
	2150
	2151	This option is especially useful if the data structure is likely to be written
	2152	back out as XML and the default behaviour of rolling single nested elements up
	2153	into attributes is not desirable.
	2154
	2155	If you are using the array folding feature, you should almost certainly enable
	2156	this option. If you do not, single nested elements will not be parsed to
	2157	arrays and therefore will not be candidates for folding to a hash. (Given that
	2158	the default value of 'KeyAttr' enables array folding, the default value of this
	2159	option should probably also have been enabled too - sorry).
	2160
	2161	=head2 ForceArray => [ names ] I<# in - important>
	2162
	2163	This alternative (and preferred) form of the 'ForceArray' option allows you to
	2164	specify a list of element names which should always be forced into an array
	2165	representation, rather than the 'all or nothing' approach above.
	2166
	2167	It is also possible (since version 2.05) to include compiled regular
	2168	expressions in the list - any element names which match the pattern will be
	2169	forced to arrays. If the list contains only a single regex, then it is not
	2170	necessary to enclose it in an arrayref. Eg:
	2171
	2172	ForceArray => qr/_list$/
	2173
	2174	=head2 ForceContent => 1 I<# in - seldom used>
	2175
	2176	When C<XMLin()> parses elements which have text content as well as attributes,
	2177	the text content must be represented as a hash value rather than a simple
	2178	scalar. This option allows you to force text content to always parse to
	2179	a hash value even when there are no attributes. So for example:
	2180
	2181	XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
	2182
	2183	will parse to:
	2184
	2185	{
	2186	'x' => { 'content' => 'text1' },
	2187	'y' => { 'a' => 2, 'content' => 'text2' }
	2188	}
	2189
	2190	instead of:
	2191
	2192	{
	2193	'x' => 'text1',
	2194	'y' => { 'a' => 2, 'content' => 'text2' }
	2195	}
	2196
	2197	=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
	2198
	2199	You can use this option to eliminate extra levels of indirection in your Perl
	2200	data structure. For example this XML:
	2201
	2202	<opt>
	2203	<searchpath>
	2204	<dir>/usr/bin</dir>
	2205	<dir>/usr/local/bin</dir>
	2206	<dir>/usr/X11/bin</dir>
	2207	</searchpath>
	2208	</opt>
	2209
	2210	Would normally be read into a structure like this:
	2211
	2212	{
	2213	searchpath => {
	2214	dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
	2215	}
	2216	}
	2217
	2218	But when read in with the appropriate value for 'GroupTags':
	2219
	2220	my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
	2221
	2222	It will return this simpler structure:
	2223
	2224	{
	2225	searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
	2226	}
	2227
	2228	The grouping element (C<< <searchpath> >> in the example) must not contain any
	2229	attributes or elements other than the grouped element.
	2230
	2231	You can specify multiple 'grouping element' to 'grouped element' mappings in
	2232	the same hashref. If this option is combined with C<KeyAttr>, the array
	2233	folding will occur first and then the grouped element names will be eliminated.
	2234
	2235	C<XMLout> will also use the grouptag mappings to re-introduce the tags around
	2236	the grouped elements. Beware though that this will occur in all places that
	2237	the 'grouping tag' name occurs - you probably don't want to use the same name
	2238	for elements as well as attributes.
	2239
	2240	=head2 Handler => object_ref I<# out - SAX only>
	2241
	2242	Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
	2243	returning a string of XML. For more details see L<"SAX SUPPORT"> below.
	2244
	2245	Note: the current implementation of this option generates a string of XML
	2246	and uses a SAX parser to translate it into SAX events. The normal encoding
	2247	rules apply here - your data must be UTF8 encoded unless you specify an
	2248	alternative encoding via the 'XMLDecl' option; and by the time the data reaches
	2249	the handler object, it will be in UTF8 form regardless of the encoding you
	2250	supply. A future implementation of this option may generate the events
	2251	directly.
	2252
	2253	=head2 KeepRoot => 1 I<# in+out - handy>
	2254
	2255	In its attempt to return a data structure free of superfluous detail and
	2256	unnecessary levels of indirection, C<XMLin()> normally discards the root
	2257	element name. Setting the 'KeepRoot' option to '1' will cause the root element
	2258	name to be retained. So after executing this code:
	2259
	2260	$config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
	2261
	2262	You'll be able to reference the tempdir as
	2263	C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
	2264	C<$config-E<gt>{tempdir}>.
	2265
	2266	Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
	2267	data structure already contains a root element name and it is not necessary to
	2268	add another.
	2269
	2270	=head2 KeyAttr => [ list ] I<# in+out - important>
	2271
	2272	This option controls the 'array folding' feature which translates nested
	2273	elements from an array to a hash. It also controls the 'unfolding' of hashes
	2274	to arrays.
	2275
	2276	For example, this XML:
	2277
	2278	<opt>
	2279	<user login="grep" fullname="Gary R Epstein" />
	2280	<user login="stty" fullname="Simon T Tyson" />
	2281	</opt>
	2282
	2283	would, by default, parse to this:
	2284
	2285	{
	2286	'user' => [
	2287	{
	2288	'login' => 'grep',
	2289	'fullname' => 'Gary R Epstein'
	2290	},
	2291	{
	2292	'login' => 'stty',
	2293	'fullname' => 'Simon T Tyson'
	2294	}
	2295	]
	2296	}
	2297
	2298	If the option 'KeyAttr => "login"' were used to specify that the 'login'
	2299	attribute is a key, the same XML would parse to:
	2300
	2301	{
	2302	'user' => {
	2303	'stty' => {
	2304	'fullname' => 'Simon T Tyson'
	2305	},
	2306	'grep' => {
	2307	'fullname' => 'Gary R Epstein'
	2308	}
	2309	}
	2310	}
	2311
	2312	The key attribute names should be supplied in an arrayref if there is more
	2313	than one. C<XMLin()> will attempt to match attribute names in the order
	2314	supplied. C<XMLout()> will use the first attribute name supplied when
	2315	'unfolding' a hash into an array.
	2316
	2317	Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do
	2318	not want folding on input or unfolding on output you must setting this option
	2319	to an empty list to disable the feature.
	2320
	2321	Note 2: If you wish to use this option, you should also enable the
	2322	C<ForceArray> option. Without 'ForceArray', a single nested element will be
	2323	rolled up into a scalar rather than an array and therefore will not be folded
	2324	(since only arrays get folded).
	2325
	2326	=head2 KeyAttr => { list } I<# in+out - important>
	2327
	2328	This alternative (and preferred) method of specifiying the key attributes
	2329	allows more fine grained control over which elements are folded and on which
	2330	attributes. For example the option 'KeyAttr => { package => 'id' } will cause
	2331	any package elements to be folded on the 'id' attribute. No other elements
	2332	which have an 'id' attribute will be folded at all.
	2333
	2334	Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
	2335	if this syntax is used and an element which does not have the specified key
	2336	attribute is encountered (eg: a 'package' element without an 'id' attribute, to
	2337	use the example above). Warnings will only be generated if B<-w> is in force.
	2338
	2339	Two further variations are made possible by prefixing a '+' or a '-' character
	2340	to the attribute name:
	2341
	2342	The option 'KeyAttr => { user => "+login" }' will cause this XML:
	2343
	2344	<opt>
	2345	<user login="grep" fullname="Gary R Epstein" />
	2346	<user login="stty" fullname="Simon T Tyson" />
	2347	</opt>
	2348
	2349	to parse to this data structure:
	2350
	2351	{
	2352	'user' => {
	2353	'stty' => {
	2354	'fullname' => 'Simon T Tyson',
	2355	'login' => 'stty'
	2356	},
	2357	'grep' => {
	2358	'fullname' => 'Gary R Epstein',
	2359	'login' => 'grep'
	2360	}
	2361	}
	2362	}
	2363
	2364	The '+' indicates that the value of the key attribute should be copied rather
	2365	than moved to the folded hash key.
	2366
	2367	A '-' prefix would produce this result:
	2368
	2369	{
	2370	'user' => {
	2371	'stty' => {
	2372	'fullname' => 'Simon T Tyson',
	2373	'-login' => 'stty'
	2374	},
	2375	'grep' => {
	2376	'fullname' => 'Gary R Epstein',
	2377	'-login' => 'grep'
	2378	}
	2379	}
	2380	}
	2381
	2382	As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
	2383
	2384	=head2 NoAttr => 1 I<# in+out - handy>
	2385
	2386	When used with C<XMLout()>, the generated XML will contain no attributes.
	2387	All hash key/values will be represented as nested elements instead.
	2388
	2389	When used with C<XMLin()>, any attributes in the XML will be ignored.
	2390
	2391	=head2 NoEscape => 1 I<# out - seldom used>
	2392
	2393	By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
	2394	'"' to '<', '>', '&' and '&quot' respectively. Use this option to
	2395	suppress escaping (presumably because you've already escaped the data in some
	2396	more sophisticated manner).
	2397
	2398	=head2 NoIndent => 1 I<# out - seldom used>
	2399
	2400	Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
	2401	With this option enabled, the XML output will all be on one line (unless there
	2402	are newlines in the data) - this may be easier for downstream processing.
	2403
	2404	=head2 NoSort => 1 I<# out - seldom used>
	2405
	2406	Newer versions of XML::Simple sort elements and attributes alphabetically (*),
	2407	by default. Enable this option to suppress the sorting - possibly for
	2408	backwards compatibility.
	2409
	2410	* Actually, sorting is alphabetical but 'key' attribute or element names (as in
	2411	'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements
	2412	are sorted alphabetically by the value of the key field.
	2413
	2414	=head2 NormaliseSpace => 0 \| 1 \| 2 I<# in - handy>
	2415
	2416	This option controls how whitespace in text content is handled. Recognised
	2417	values for the option are:
	2418
	2419	=over 4
	2420
	2421	=item *
	2422
	2423	0 = (default) whitespace is passed through unaltered (except of course for the
	2424	normalisation of whitespace in attribute values which is mandated by the XML
	2425	recommendation)
	2426
	2427	=item *
	2428
	2429	1 = whitespace is normalised in any value used as a hash key (normalising means
	2430	removing leading and trailing whitespace and collapsing sequences of whitespace
	2431	characters to a single space)
	2432
	2433	=item *
	2434
	2435	2 = whitespace is normalised in all text content
	2436
	2437	=back
	2438
	2439	Note: you can spell this option with a 'z' if that is more natural for you.
	2440
	2441	=head2 NSExpand => 1 I<# in+out handy - SAX only>
	2442
	2443	This option controls namespace expansion - the translation of element and
	2444	attribute names of the form 'prefix:name' to '{uri}name'. For example the
	2445	element name 'xsl:template' might be expanded to:
	2446	'{http://www.w3.org/1999/XSL/Transform}template'.
	2447
	2448	By default, C<XMLin()> will return element names and attribute names exactly as
	2449	they appear in the XML. Setting this option to 1 will cause all element and
	2450	attribute names to be expanded to include their namespace prefix.
	2451
	2452	I<Note: You must be using a SAX parser for this option to work (ie: it does not
	2453	work with XML::Parser)>.
	2454
	2455	This option also controls whether C<XMLout()> performs the reverse translation
	2456	from '{uri}name' back to 'prefix:name'. The default is no translation. If
	2457	your data contains expanded names, you should set this option to 1 otherwise
	2458	C<XMLout> will emit XML which is not well formed.
	2459
	2460	I<Note: You must have the XML::NamespaceSupport module installed if you want
	2461	C<XMLout()> to translate URIs back to prefixes>.
	2462
	2463	=head2 NumericEscape => 0 \| 1 \| 2 I<# out - handy>
	2464
	2465	Use this option to have 'high' (non-ASCII) characters in your Perl data
	2466	structure converted to numeric entities (eg: €) in the XML output. Three
	2467	levels are possible:
	2468
	2469	0 - default: no numeric escaping (OK if you're writing out UTF8)
	2470
	2471	1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
	2472
	2473	2 - all characters above 0x7F are escaped (good for plain ASCII output)
	2474
	2475	=head2 OutputFile => <file specifier> I<# out - handy>
	2476
	2477	The default behaviour of C<XMLout()> is to return the XML as a string. If you
	2478	wish to write the XML to a file, simply supply the filename using the
	2479	'OutputFile' option.
	2480
	2481	This option also accepts an IO handle object - especially useful in Perl 5.8.0
	2482	and later for output using an encoding other than UTF-8, eg:
	2483
	2484	open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
	2485	XMLout($ref, OutputFile => $fh);
	2486
	2487	Note, XML::Simple does not require that the object you pass in to the
	2488	OutputFile option inherits from L<IO::Handle> - it simply assumes the object
	2489	supports a C<print> method.
	2490
	2491	=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
	2492
	2493	I<Note: This option is now officially deprecated. If you find it useful, email
	2494	the author with an example of what you use it for. Do not use this option to
	2495	set the ProtocolEncoding, that's just plain wrong - fix the XML>.
	2496
	2497	This option allows you to pass parameters to the constructor of the underlying
	2498	XML::Parser object (which of course assumes you're not using SAX).
	2499
	2500	=head2 RootName => 'string' I<# out - handy>
	2501
	2502	By default, when C<XMLout()> generates XML, the root element will be named
	2503	'opt'. This option allows you to specify an alternative name.
	2504
	2505	Specifying either undef or the empty string for the RootName option will
	2506	produce XML with no root elements. In most cases the resulting XML fragment
	2507	will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
	2508	Nevertheless, the option has been found to be useful in certain circumstances.
	2509
	2510	=head2 SearchPath => [ list ] I<# in - handy>
	2511
	2512	If you pass C<XMLin()> a filename, but the filename include no directory
	2513	component, you can use this option to specify which directories should be
	2514	searched to locate the file. You might use this option to search first in the
	2515	user's home directory, then in a global directory such as /etc.
	2516
	2517	If a filename is provided to C<XMLin()> but SearchPath is not defined, the
	2518	file is assumed to be in the current directory.
	2519
	2520	If the first parameter to C<XMLin()> is undefined, the default SearchPath
	2521	will contain only the directory in which the script itself is located.
	2522	Otherwise the default SearchPath will be empty.
	2523
	2524	=head2 SuppressEmpty => 1 \| '' \| undef I<# in+out - handy>
	2525
	2526	This option controls what C<XMLin()> should do with empty elements (no
	2527	attributes and no content). The default behaviour is to represent them as
	2528	empty hashes. Setting this option to a true value (eg: 1) will cause empty
	2529	elements to be skipped altogether. Setting the option to 'undef' or the empty
	2530	string will cause empty elements to be represented as the undefined value or
	2531	the empty string respectively. The latter two alternatives are a little
	2532	easier to test for in your code than a hash with no keys.
	2533
	2534	The option also controls what C<XMLout()> does with undefined values. Setting
	2535	the option to undef causes undefined values to be output as empty elements
	2536	(rather than empty attributes), it also suppresses the generation of warnings
	2537	about undefined values. Setting the option to a true value (eg: 1) causes
	2538	undefined values to be skipped altogether on output.
	2539
	2540	=head2 ValueAttr => [ names ] I<# in - handy>
	2541
	2542	Use this option to deal elements which always have a single attribute and no
	2543	content. Eg:
	2544
	2545	<opt>
	2546	<colour value="red" />
	2547	<size value="XXL" />
	2548	</opt>
	2549
	2550	Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
	2551
	2552	{
	2553	colour => 'red',
	2554	size => 'XXL'
	2555	}
	2556
	2557	instead of this (the default):
	2558
	2559	{
	2560	colour => { value => 'red' },
	2561	size => { value => 'XXL' }
	2562	}
	2563
	2564	Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
	2565	since the attribute name is discarded at parse time, the original XML cannot be
	2566	reconstructed.
	2567
	2568	=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
	2569
	2570	This (preferred) form of the ValueAttr option requires you to specify both
	2571	the element and the attribute names. This is not only safer, it also allows
	2572	the original XML to be reconstructed by C<XMLout()>.
	2573
	2574	Note: You probably don't want to use this option and the NoAttr option at the
	2575	same time.
	2576
	2577	=head2 Variables => { name => value } I<# in - handy>
	2578
	2579	This option allows variables in the XML to be expanded when the file is read.
	2580	(there is no facility for putting the variable names back if you regenerate
	2581	XML using C<XMLout>).
	2582
	2583	A 'variable' is any text of the form C<${name}> which occurs in an attribute
	2584	value or in the text content of an element. If 'name' matches a key in the
	2585	supplied hashref, C<${name}> will be replaced with the corresponding value from
	2586	the hashref. If no matching key is found, the variable will not be replaced.
	2587	Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
	2588	allowed).
	2589
	2590	=head2 VarAttr => 'attr_name' I<# in - handy>
	2591
	2592	In addition to the variables defined using C<Variables>, this option allows
	2593	variables to be defined in the XML. A variable definition consists of an
	2594	element with an attribute called 'attr_name' (the value of the C<VarAttr>
	2595	option). The value of the attribute will be used as the variable name and the
	2596	text content of the element will be used as the value. A variable defined in
	2597	this way will override a variable defined using the C<Variables> option. For
	2598	example:
	2599
	2600	XMLin( '<opt>
	2601	<dir name="prefix">/usr/local/apache</dir>
	2602	<dir name="exec_prefix">${prefix}</dir>
	2603	<dir name="bindir">${exec_prefix}/bin</dir>
	2604	</opt>',
	2605	VarAttr => 'name', ContentKey => '-content'
	2606	);
	2607
	2608	produces the following data structure:
	2609
	2610	{
	2611	dir => {
	2612	prefix => '/usr/local/apache',
	2613	exec_prefix => '/usr/local/apache',
	2614	bindir => '/usr/local/apache/bin',
	2615	}
	2616	}
	2617
	2618	=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy>
	2619
	2620	If you want the output from C<XMLout()> to start with the optional XML
	2621	declaration, simply set the option to '1'. The default XML declaration is:
	2622
	2623	<?xml version='1.0' standalone='yes'?>
	2624
	2625	If you want some other string (for example to declare an encoding value), set
	2626	the value of this option to the complete string you require.
	2627
	2628
	2629	=head1 OPTIONAL OO INTERFACE
	2630
	2631	The procedural interface is both simple and convenient however there are a
	2632	couple of reasons why you might prefer to use the object oriented (OO)
	2633	interface:
	2634
	2635	=over 4
	2636
	2637	=item *
	2638
	2639	to define a set of default values which should be used on all subsequent calls
	2640	to C<XMLin()> or C<XMLout()>
	2641
	2642	=item *
	2643
	2644	to override methods in B<XML::Simple> to provide customised behaviour
	2645
	2646	=back
	2647
	2648	The default values for the options described above are unlikely to suit
	2649	everyone. The OO interface allows you to effectively override B<XML::Simple>'s
	2650	defaults with your preferred values. It works like this:
	2651
	2652	First create an XML::Simple parser object with your preferred defaults:
	2653
	2654	my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
	2655
	2656	then call C<XMLin()> or C<XMLout()> as a method of that object:
	2657
	2658	my $ref = $xs->XMLin($xml);
	2659	my $xml = $xs->XMLout($ref);
	2660
	2661	You can also specify options when you make the method calls and these values
	2662	will be merged with the values specified when the object was created. Values
	2663	specified in a method call take precedence.
	2664
	2665	Overriding methods is a more advanced topic but might be useful if for example
	2666	you wished to provide an alternative routine for escaping character data (the
	2667	escape_value method) or for building the initial parse tree (the build_tree
	2668	method).
	2669
	2670	Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
	2671	called as C<xml_in()> or C<xml_out()>. The method names are aliased so the
	2672	only difference is the aesthetics.
	2673
	2674	=head1 STRICT MODE
	2675
	2676	If you import the B<XML::Simple> routines like this:
	2677
	2678	use XML::Simple qw(:strict);
	2679
	2680	the following common mistakes will be detected and treated as fatal errors
	2681
	2682	=over 4
	2683
	2684	=item *
	2685
	2686	Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
	2687	reading about this option, turn it off with: KeyAttr => [ ]
	2688
	2689	=item *
	2690
	2691	Failing to explicitly set the C<ForceArray> option - if you can't be bothered
	2692	reading about this option, set it to the safest mode with: ForceArray => 1
	2693
	2694	=item *
	2695
	2696	Setting ForceArray to an array, but failing to list all the elements from the
	2697	KeyAttr hash.
	2698
	2699	=item *
	2700
	2701	Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
	2702	one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
	2703	element). Note: if strict mode is not set but -w is, this condition triggers a
	2704	warning.
	2705
	2706	=item *
	2707
	2708	Data error - as above, but value of key attribute (eg: partnum) is not a
	2709	scalar string (due to nested elements etc). This will also trigger a warning
	2710	if strict mode is not enabled.
	2711
	2712	=back
	2713
	2714	=head1 SAX SUPPORT
	2715
	2716	From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
	2717	for XML) - specifically SAX2.
	2718
	2719	In a typical SAX application, an XML parser (or SAX 'driver') module generates
	2720	SAX events (start of element, character data, end of element, etc) as it parses
	2721	an XML document and a 'handler' module processes the events to extract the
	2722	required data. This simple model allows for some interesting and powerful
	2723	possibilities:
	2724
	2725	=over 4
	2726
	2727	=item *
	2728
	2729	Applications written to the SAX API can extract data from huge XML documents
	2730	without the memory overheads of a DOM or tree API.
	2731
	2732	=item *
	2733
	2734	The SAX API allows for plug and play interchange of parser modules without
	2735	having to change your code to fit a new module's API. A number of SAX parsers
	2736	are available with capabilities ranging from extreme portability to blazing
	2737	performance.
	2738
	2739	=item *
	2740
	2741	A SAX 'filter' module can implement both a handler interface for receiving
	2742	data and a generator interface for passing modified data on to a downstream
	2743	handler. Filters can be chained together in 'pipelines'.
	2744
	2745	=item *
	2746
	2747	One filter module might split a data stream to direct data to two or more
	2748	downstream handlers.
	2749
	2750	=item *
	2751
	2752	Generating SAX events is not the exclusive preserve of XML parsing modules.
	2753	For example, a module might extract data from a relational database using DBI
	2754	and pass it on to a SAX pipeline for filtering and formatting.
	2755
	2756	=back
	2757
	2758	B<XML::Simple> can operate at either end of a SAX pipeline. For example,
	2759	you can take a data structure in the form of a hashref and pass it into a
	2760	SAX pipeline using the 'Handler' option on C<XMLout()>:
	2761
	2762	use XML::Simple;
	2763	use Some::SAX::Filter;
	2764	use XML::SAX::Writer;
	2765
	2766	my $ref = {
	2767	.... # your data here
	2768	};
	2769
	2770	my $writer = XML::SAX::Writer->new();
	2771	my $filter = Some::SAX::Filter->new(Handler => $writer);
	2772	my $simple = XML::Simple->new(Handler => $filter);
	2773	$simple->XMLout($ref);
	2774
	2775	You can also put B<XML::Simple> at the opposite end of the pipeline to take
	2776	advantage of the simple 'tree' data structure once the relevant data has been
	2777	isolated through filtering:
	2778
	2779	use XML::SAX;
	2780	use Some::SAX::Filter;
	2781	use XML::Simple;
	2782
	2783	my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
	2784	my $filter = Some::SAX::Filter->new(Handler => $simple);
	2785	my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
	2786
	2787	my $ref = $parser->parse_uri('some_huge_file.xml');
	2788
	2789	print $ref->{part}->{'555-1234'};
	2790
	2791	You can build a filter by using an XML::Simple object as a handler and setting
	2792	its DataHandler option to point to a routine which takes the resulting tree,
	2793	modifies it and sends it off as SAX events to a downstream handler:
	2794
	2795	my $writer = XML::SAX::Writer->new();
	2796	my $filter = XML::Simple->new(
	2797	DataHandler => sub {
	2798	my $simple = shift;
	2799	my $data = shift;
	2800
	2801	# Modify $data here
	2802
	2803	$simple->XMLout($data, Handler => $writer);
	2804	}
	2805	);
	2806	my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
	2807
	2808	$parser->parse_uri($filename);
	2809
	2810	I<Note: In this last example, the 'Handler' option was specified in the call to
	2811	C<XMLout()> but it could also have been specified in the constructor>.
	2812
	2813	=head1 ENVIRONMENT
	2814
	2815	If you don't care which parser module B<XML::Simple> uses then skip this
	2816	section entirely (it looks more complicated than it really is).
	2817
	2818	B<XML::Simple> will default to using a B<SAX> parser if one is available or
	2819	B<XML::Parser> if SAX is not available.
	2820
	2821	You can dictate which parser module is used by setting either the environment
	2822	variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
	2823	$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules
	2824	are used:
	2825
	2826	=over 4
	2827
	2828	=item *
	2829
	2830	The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use
	2831	its default rules, you can set the package variable to an empty string.
	2832
	2833	=item *
	2834
	2835	If the 'preferred parser' is set to the string 'XML::Parser', then
	2836	L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
	2837	installed).
	2838
	2839	=item *
	2840
	2841	If the 'preferred parser' is set to some other value, then it is assumed to be
	2842	the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.>
	2843	If L<XML::SAX> is not installed, or the requested parser module is not
	2844	installed, then C<XMLin()> will die.
	2845
	2846	=item *
	2847
	2848	If the 'preferred parser' is not defined at all (the normal default
	2849	state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is
	2850	installed, then a parser module will be selected according to
	2851	L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
	2852	parser installed).
	2853
	2854	=item *
	2855
	2856	if the 'preferred parser' is not defined and B<XML::SAX> is not
	2857	installed, then B<XML::Parser> will be used. C<XMLin()> will die if
	2858	L<XML::Parser> is not installed.
	2859
	2860	=back
	2861
	2862	Note: The B<XML::SAX> distribution includes an XML parser written entirely in
	2863	Perl. It is very portable but it is not very fast. You should consider
	2864	installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
	2865	platform.
	2866
	2867	=head1 ERROR HANDLING
	2868
	2869	The XML standard is very clear on the issue of non-compliant documents. An
	2870	error in parsing any single element (for example a missing end tag) must cause
	2871	the whole document to be rejected. B<XML::Simple> will die with an appropriate
	2872	message if it encounters a parsing error.
	2873
	2874	If dying is not appropriate for your application, you should arrange to call
	2875	C<XMLin()> in an eval block and look for errors in $@. eg:
	2876
	2877	my $config = eval { XMLin() };
	2878	PopUpMessage($@) if($@);
	2879
	2880	Note, there is a common misconception that use of B<eval> will significantly
	2881	slow down a script. While that may be true when the code being eval'd is in a
	2882	string, it is not true of code like the sample above.
	2883
	2884	=head1 EXAMPLES
	2885
	2886	When C<XMLin()> reads the following very simple piece of XML:
	2887
	2888	<opt username="testuser" password="frodo"></opt>
	2889
	2890	it returns the following data structure:
	2891
	2892	{
	2893	'username' => 'testuser',
	2894	'password' => 'frodo'
	2895	}
	2896
	2897	The identical result could have been produced with this alternative XML:
	2898
	2899	<opt username="testuser" password="frodo" />
	2900
	2901	Or this (although see 'ForceArray' option for variations):
	2902
	2903	<opt>
	2904	<username>testuser</username>
	2905	<password>frodo</password>
	2906	</opt>
	2907
	2908	Repeated nested elements are represented as anonymous arrays:
	2909
	2910	<opt>
	2911	<person firstname="Joe" lastname="Smith">
	2912	<email>joe@smith.com</email>
	2913	<email>jsmith@yahoo.com</email>
	2914	</person>
	2915	<person firstname="Bob" lastname="Smith">
	2916	<email>bob@smith.com</email>
	2917	</person>
	2918	</opt>
	2919
	2920	{
	2921	'person' => [
	2922	{
	2923	'email' => [
	2924	'joe@smith.com',
	2925	'jsmith@yahoo.com'
	2926	],
	2927	'firstname' => 'Joe',
	2928	'lastname' => 'Smith'
	2929	},
	2930	{
	2931	'email' => 'bob@smith.com',
	2932	'firstname' => 'Bob',
	2933	'lastname' => 'Smith'
	2934	}
	2935	]
	2936	}
	2937
	2938	Nested elements with a recognised key attribute are transformed (folded) from
	2939	an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
	2940	option):
	2941
	2942	<opt>
	2943	<person key="jsmith" firstname="Joe" lastname="Smith" />
	2944	<person key="tsmith" firstname="Tom" lastname="Smith" />
	2945	<person key="jbloggs" firstname="Joe" lastname="Bloggs" />
	2946	</opt>
	2947
	2948	{
	2949	'person' => {
	2950	'jbloggs' => {
	2951	'firstname' => 'Joe',
	2952	'lastname' => 'Bloggs'
	2953	},
	2954	'tsmith' => {
	2955	'firstname' => 'Tom',
	2956	'lastname' => 'Smith'
	2957	},
	2958	'jsmith' => {
	2959	'firstname' => 'Joe',
	2960	'lastname' => 'Smith'
	2961	}
	2962	}
	2963	}
	2964
	2965
	2966	The <anon> tag can be used to form anonymous arrays:
	2967
	2968	<opt>
	2969	<head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
	2970	<data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
	2971	<data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
	2972	<data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
	2973	</opt>
	2974
	2975	{
	2976	'head' => [
	2977	[ 'Col 1', 'Col 2', 'Col 3' ]
	2978	],
	2979	'data' => [
	2980	[ 'R1C1', 'R1C2', 'R1C3' ],
	2981	[ 'R2C1', 'R2C2', 'R2C3' ],
	2982	[ 'R3C1', 'R3C2', 'R3C3' ]
	2983	]
	2984	}
	2985
	2986	Anonymous arrays can be nested to arbirtrary levels and as a special case, if
	2987	the surrounding tags for an XML document contain only an anonymous array the
	2988	arrayref will be returned directly rather than the usual hashref:
	2989
	2990	<opt>
	2991	<anon><anon>Col 1</anon><anon>Col 2</anon></anon>
	2992	<anon><anon>R1C1</anon><anon>R1C2</anon></anon>
	2993	<anon><anon>R2C1</anon><anon>R2C2</anon></anon>
	2994	</opt>
	2995
	2996	[
	2997	[ 'Col 1', 'Col 2' ],
	2998	[ 'R1C1', 'R1C2' ],
	2999	[ 'R2C1', 'R2C2' ]
	3000	]
	3001
	3002	Elements which only contain text content will simply be represented as a
	3003	scalar. Where an element has both attributes and text content, the element
	3004	will be represented as a hashref with the text content in the 'content' key
	3005	(see the C<ContentKey> option):
	3006
	3007	<opt>
	3008	<one>first</one>
	3009	<two attr="value">second</two>
	3010	</opt>
	3011
	3012	{
	3013	'one' => 'first',
	3014	'two' => { 'attr' => 'value', 'content' => 'second' }
	3015	}
	3016
	3017	Mixed content (elements which contain both text content and nested elements)
	3018	will be not be represented in a useful way - element order and significant
	3019	whitespace will be lost. If you need to work with mixed content, then
	3020	XML::Simple is not the right tool for your job - check out the next section.
	3021
	3022	=head1 WHERE TO FROM HERE?
	3023
	3024	B<XML::Simple> is able to present a simple API because it makes some
	3025	assumptions on your behalf. These include:
	3026
	3027	=over 4
	3028
	3029	=item *
	3030
	3031	You're not interested in text content consisting only of whitespace
	3032
	3033	=item *
	3034
	3035	You don't mind that when things get slurped into a hash the order is lost
	3036
	3037	=item *
	3038
	3039	You don't want fine-grained control of the formatting of generated XML
	3040
	3041	=item *
	3042
	3043	You would never use a hash key that was not a legal XML element name
	3044
	3045	=item *
	3046
	3047	You don't need help converting between different encodings
	3048
	3049	=back
	3050
	3051	In a serious XML project, you'll probably outgrow these assumptions fairly
	3052	quickly. This section of the document used to offer some advice on chosing a
	3053	more powerful option. That advice has now grown into the 'Perl-XML FAQ'
	3054	document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
	3055
	3056	The advice in the FAQ boils down to a quick explanation of tree versus
	3057	event based parsers and then recommends:
	3058
	3059	For event based parsing, use SAX (do not set out to write any new code for
	3060	XML::Parser's handler API - it is obselete).
	3061
	3062	For tree-based parsing, you could choose between the 'Perlish' approach of
	3063	L<XML::Twig> and more standards based DOM implementations - preferably one with
	3064	XPath support.
	3065
	3066
	3067	=head1 SEE ALSO
	3068
	3069	B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
	3070
	3071	To generate documents with namespaces, L<XML::NamespaceSupport> is required.
	3072
	3073	The optional caching functions require L<Storable>.
	3074
	3075	Answers to Frequently Asked Questions about XML::Simple are bundled with this
	3076	distribution as: L<XML::Simple::FAQ>
	3077
	3078	=head1 COPYRIGHT
	3079
	3080	Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
	3081
	3082	This library is free software; you can redistribute it and/or modify it
	3083	under the same terms as Perl itself.
	3084
	3085	=cut
	3086
	3087