Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / man / man3 / XML::Simple.3
CommitLineData
920dae64
AT
1.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "XML::Simple 3"
132.TH XML::Simple 3 "2004-11-19" "perl v5.8.8" "User Contributed Perl Documentation"
133.SH "NAME"
134XML::Simple \- Easy API to maintain XML (esp config files)
135.SH "SYNOPSIS"
136.IX Header "SYNOPSIS"
137.Vb 1
138\& use XML::Simple;
139.Ve
140.PP
141.Vb 1
142\& my $ref = XMLin([<xml file or string>] [, <options>]);
143.Ve
144.PP
145.Vb 1
146\& my $xml = XMLout($hashref [, <options>]);
147.Ve
148.PP
149Or the object oriented way:
150.PP
151.Vb 1
152\& require XML::Simple;
153.Ve
154.PP
155.Vb 1
156\& my $xs = XML::Simple->new(options);
157.Ve
158.PP
159.Vb 1
160\& my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
161.Ve
162.PP
163.Vb 1
164\& my $xml = $xs->XMLout($hashref [, <options>]);
165.Ve
166.PP
167(or see \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for 'the \s-1SAX\s0 way').
168.PP
169To catch common errors:
170.PP
171.Vb 1
172\& use XML::Simple qw(:strict);
173.Ve
174.PP
175(see \*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R" for more details).
176.SH "QUICK START"
177.IX Header "QUICK START"
178Say you have a script called \fBfoo\fR and a file of configuration options
179called \fBfoo.xml\fR containing this:
180.PP
181.Vb 13
182\& <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
183\& <server name="sahara" osname="solaris" osversion="2.6">
184\& <address>10.0.0.101</address>
185\& <address>10.0.1.101</address>
186\& </server>
187\& <server name="gobi" osname="irix" osversion="6.5">
188\& <address>10.0.0.102</address>
189\& </server>
190\& <server name="kalahari" osname="linux" osversion="2.0.34">
191\& <address>10.0.0.103</address>
192\& <address>10.0.1.103</address>
193\& </server>
194\& </config>
195.Ve
196.PP
197The following lines of code in \fBfoo\fR:
198.PP
199.Vb 1
200\& use XML::Simple;
201.Ve
202.PP
203.Vb 1
204\& my $config = XMLin();
205.Ve
206.PP
207will 'slurp' the configuration options into the hashref \f(CW$config\fR (because no
208arguments are passed to \f(CW\*(C`XMLin()\*(C'\fR the name and location of the \s-1XML\s0 file will
209be inferred from name and location of the script). You can dump out the
210contents of the hashref using Data::Dumper:
211.PP
212.Vb 1
213\& use Data::Dumper;
214.Ve
215.PP
216.Vb 1
217\& print Dumper($config);
218.Ve
219.PP
220which will produce something like this (formatting has been adjusted for
221brevity):
222.PP
223.Vb 21
224\& {
225\& 'logdir' => '/var/log/foo/',
226\& 'debugfile' => '/tmp/foo.debug',
227\& 'server' => {
228\& 'sahara' => {
229\& 'osversion' => '2.6',
230\& 'osname' => 'solaris',
231\& 'address' => [ '10.0.0.101', '10.0.1.101' ]
232\& },
233\& 'gobi' => {
234\& 'osversion' => '6.5',
235\& 'osname' => 'irix',
236\& 'address' => '10.0.0.102'
237\& },
238\& 'kalahari' => {
239\& 'osversion' => '2.0.34',
240\& 'osname' => 'linux',
241\& 'address' => [ '10.0.0.103', '10.0.1.103' ]
242\& }
243\& }
244\& }
245.Ve
246.PP
247Your script could then access the name of the log directory like this:
248.PP
249.Vb 1
250\& print $config->{logdir};
251.Ve
252.PP
253similarly, the second address on the server 'kalahari' could be referenced as:
254.PP
255.Vb 1
256\& print $config->{server}->{kalahari}->{address}->[1];
257.Ve
258.PP
259What could be simpler? (Rhetorical).
260.PP
261For simple requirements, that's really all there is to it. If you want to
262store your \s-1XML\s0 in a different directory or file, or pass it in as a string or
263even pass it in via some derivative of an IO::Handle, you'll need to check out
264\&\*(L"\s-1OPTIONS\s0\*(R". If you want to turn off or tweak the array folding feature (that
265neat little transformation that produced \f(CW$config\fR\->{server}) you'll find options
266for that as well.
267.PP
268If you want to generate \s-1XML\s0 (for example to write a modified version of
269\&\f(CW$config\fR back out as \s-1XML\s0), check out \f(CW\*(C`XMLout()\*(C'\fR.
270.PP
271If your needs are not so simple, this may not be the module for you. In that
272case, you might want to read \*(L"\s-1WHERE\s0 \s-1TO\s0 \s-1FROM\s0 \s-1HERE\s0?\*(R".
273.SH "DESCRIPTION"
274.IX Header "DESCRIPTION"
275The XML::Simple module provides a simple \s-1API\s0 layer on top of an underlying \s-1XML\s0
276parsing module (either XML::Parser or one of the \s-1SAX2\s0 parser modules). Two
277functions are exported: \f(CW\*(C`XMLin()\*(C'\fR and \f(CW\*(C`XMLout()\*(C'\fR. Note: you can explicity
278request the lower case versions of the function names: \f(CW\*(C`xml_in()\*(C'\fR and
279\&\f(CW\*(C`xml_out()\*(C'\fR.
280.PP
281The simplest approach is to call these two functions directly, but an
282optional object oriented interface (see \*(L"\s-1OPTIONAL\s0 \s-1OO\s0 \s-1INTERFACE\s0\*(R" below)
283allows them to be called as methods of an \fBXML::Simple\fR object. The object
284interface can also be used at either end of a \s-1SAX\s0 pipeline.
285.Sh "\fIXMLin()\fP"
286.IX Subsection "XMLin()"
287Parses \s-1XML\s0 formatted data and returns a reference to a data structure which
288contains the same information in a more readily accessible form. (Skip
289down to \*(L"\s-1EXAMPLES\s0\*(R" below, for more sample code).
290.PP
291\&\f(CW\*(C`XMLin()\*(C'\fR accepts an optional \s-1XML\s0 specifier followed by zero or more 'name =>
292value' option pairs. The \s-1XML\s0 specifier can be one of the following:
293.IP "A filename" 4
294.IX Item "A filename"
295If the filename contains no directory components \f(CW\*(C`XMLin()\*(C'\fR will look for the
296file in each directory in the SearchPath (see \*(L"\s-1OPTIONS\s0\*(R" below) or in the
297current directory if the SearchPath option is not defined. eg:
298.Sp
299.Vb 1
300\& $ref = XMLin('/etc/params.xml');
301.Ve
302.Sp
303Note, the filename '\-' can be used to parse from \s-1STDIN\s0.
304.IP "undef" 4
305.IX Item "undef"
306If there is no \s-1XML\s0 specifier, \f(CW\*(C`XMLin()\*(C'\fR will check the script directory and
307each of the SearchPath directories for a file with the same name as the script
308but with the extension '.xml'. Note: if you wish to specify options, you
309must specify the value 'undef'. eg:
310.Sp
311.Vb 1
312\& $ref = XMLin(undef, ForceArray => 1);
313.Ve
314.IP "A string of \s-1XML\s0" 4
315.IX Item "A string of XML"
316A string containing \s-1XML\s0 (recognised by the presence of '<' and '>' characters)
317will be parsed directly. eg:
318.Sp
319.Vb 1
320\& $ref = XMLin('<opt username="bob" password="flurp" />');
321.Ve
322.IP "An IO::Handle object" 4
323.IX Item "An IO::Handle object"
324An IO::Handle object will be read to \s-1EOF\s0 and its contents parsed. eg:
325.Sp
326.Vb 2
327\& $fh = IO::File->new('/etc/params.xml');
328\& $ref = XMLin($fh);
329.Ve
330.Sh "\fIXMLout()\fP"
331.IX Subsection "XMLout()"
332Takes a data structure (generally a hashref) and returns an \s-1XML\s0 encoding of
333that structure. If the resulting \s-1XML\s0 is parsed using \f(CW\*(C`XMLin()\*(C'\fR, it should
334return a data structure equivalent to the original (see caveats below).
335.PP
336The \f(CW\*(C`XMLout()\*(C'\fR function can also be used to output the \s-1XML\s0 as \s-1SAX\s0 events
337see the \f(CW\*(C`Handler\*(C'\fR option and \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for more details).
338.PP
339When translating hashes to \s-1XML\s0, hash keys which have a leading '\-' will be
340silently skipped. This is the approved method for marking elements of a
341data structure which should be ignored by \f(CW\*(C`XMLout\*(C'\fR. (Note: If these items
342were not skipped the key names would be emitted as element or attribute names
343with a leading '\-' which would not be valid \s-1XML\s0).
344.Sh "Caveats"
345.IX Subsection "Caveats"
346Some care is required in creating data structures which will be passed to
347\&\f(CW\*(C`XMLout()\*(C'\fR. Hash keys from the data structure will be encoded as either \s-1XML\s0
348element names or attribute names. Therefore, you should use hash key names
349which conform to the relatively strict \s-1XML\s0 naming rules:
350.PP
351Names in \s-1XML\s0 must begin with a letter. The remaining characters may be
352letters, digits, hyphens (\-), underscores (_) or full stops (.). It is also
353allowable to include one colon (:) in an element name but this should only be
354used when working with namespaces (\fBXML::Simple\fR can only usefully work with
355namespaces when teamed with a \s-1SAX\s0 Parser).
356.PP
357You can use other punctuation characters in hash values (just not in hash
358keys) however \fBXML::Simple\fR does not support dumping binary data.
359.PP
360If you break these rules, the current implementation of \f(CW\*(C`XMLout()\*(C'\fR will
361simply emit non-compliant \s-1XML\s0 which will be rejected if you try to read it
362back in. (A later version of \fBXML::Simple\fR might take a more proactive
363approach).
364.PP
365Note also that although you can nest hashes and arrays to arbitrary levels,
366circular data structures are not supported and will cause \f(CW\*(C`XMLout()\*(C'\fR to die.
367.PP
368If you wish to 'round\-trip' arbitrary data structures from Perl to \s-1XML\s0 and back
369to Perl, then you should probably disable array folding (using the KeyAttr
370option) both with \f(CW\*(C`XMLout()\*(C'\fR and with \f(CW\*(C`XMLin()\*(C'\fR. If you still don't get the
371expected results, you may prefer to use XML::Dumper which is designed for
372exactly that purpose.
373.PP
374Refer to \*(L"\s-1WHERE\s0 \s-1TO\s0 \s-1FROM\s0 \s-1HERE\s0?\*(R" if \f(CW\*(C`XMLout()\*(C'\fR is too simple for your needs.
375.SH "OPTIONS"
376.IX Header "OPTIONS"
377\&\fBXML::Simple\fR supports a number of options (in fact as each release of
378\&\fBXML::Simple\fR adds more options, the module's claim to the name 'Simple'
379becomes increasingly tenuous). If you find yourself repeatedly having to
380specify the same options, you might like to investigate \*(L"\s-1OPTIONAL\s0 \s-1OO\s0 \s-1INTERFACE\s0\*(R" below.
381.PP
382If you can't be bothered reading the documentation, refer to
383\&\*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R" to automatically catch common mistakes.
384.PP
385Because there are so many options, it's hard for new users to know which ones
386are important, so here are the two you really need to know about:
387.IP "\(bu" 4
388check out \f(CW\*(C`ForceArray\*(C'\fR because you'll almost certainly want to turn it on
389.IP "\(bu" 4
390make sure you know what the \f(CW\*(C`KeyAttr\*(C'\fR option does and what its default value is
391because it may surprise you otherwise (note in particular that 'KeyAttr'
392affects both \f(CW\*(C`XMLin\*(C'\fR and \f(CW\*(C`XMLout\*(C'\fR)
393.PP
394The option name headings below have a trailing 'comment' \- a hash followed by
395two pieces of metadata:
396.IP "\(bu" 4
397Options are marked with '\fIin\fR' if they are recognised by \f(CW\*(C`XMLin()\*(C'\fR and
398\&'\fIout\fR' if they are recognised by \f(CW\*(C`XMLout()\*(C'\fR.
399.IP "\(bu" 4
400Each option is also flagged to indicate whether it is:
401.Sp
402.Vb 7
403\& 'important' - don't use the module until you understand this one
404\& 'handy' - you can skip this on the first time through
405\& 'advanced' - you can skip this on the second time through
406\& 'SAX only' - don't worry about this unless you're using SAX (or
407\& alternatively if you need this, you also need SAX)
408\& 'seldom used' - you'll probably never use this unless you were the
409\& person that requested the feature
410.Ve
411.PP
412The options are listed alphabetically:
413.PP
414Note: option names are no longer case sensitive so you can use the mixed case
415versions shown here; all lower case as required by versions 2.03 and earlier;
416or you can add underscores between the words (eg: key_attr).
417.Sh "AttrIndent => 1 \fI# out \- handy\fP"
418.IX Subsection "AttrIndent => 1 # out - handy"
419When you are using \f(CW\*(C`XMLout()\*(C'\fR, enable this option to have attributes printed
420one-per-line with sensible indentation rather than all on one line.
421.Sh "Cache => [ cache schemes ] \fI# in \- advanced\fP"
422.IX Subsection "Cache => [ cache schemes ] # in - advanced"
423Because loading the \fBXML::Parser\fR module and parsing an \s-1XML\s0 file can consume a
424significant number of \s-1CPU\s0 cycles, it is often desirable to cache the output of
425\&\f(CW\*(C`XMLin()\*(C'\fR for later reuse.
426.PP
427When parsing from a named file, \fBXML::Simple\fR supports a number of caching
428schemes. The 'Cache' option may be used to specify one or more schemes (using
429an anonymous array). Each scheme will be tried in turn in the hope of finding
430a cached pre-parsed representation of the \s-1XML\s0 file. If no cached copy is
431found, the file will be parsed and the first cache scheme in the list will be
432used to save a copy of the results. The following cache schemes have been
433implemented:
434.IP "storable" 4
435.IX Item "storable"
436Utilises \fBStorable.pm\fR to read/write a cache file with the same name as the
437\&\s-1XML\s0 file but with the extension .stor
438.IP "memshare" 4
439.IX Item "memshare"
440When a file is first parsed, a copy of the resulting data structure is retained
441in memory in the \fBXML::Simple\fR module's namespace. Subsequent calls to parse
442the same file will return a reference to this structure. This cached version
443will persist only for the life of the Perl interpreter (which in the case of
444mod_perl for example, may be some significant time).
445.Sp
446Because each caller receives a reference to the same data structure, a change
447made by one caller will be visible to all. For this reason, the reference
448returned should be treated as read\-only.
449.IP "memcopy" 4
450.IX Item "memcopy"
451This scheme works identically to 'memshare' (above) except that each caller
452receives a reference to a new data structure which is a copy of the cached
453version. Copying the data structure will add a little processing overhead,
454therefore this scheme should only be used where the caller intends to modify
455the data structure (or wishes to protect itself from others who might). This
456scheme uses \fBStorable.pm\fR to perform the copy.
457.PP
458Warning! The memory-based caching schemes compare the timestamp on the file to
459the time when it was last parsed. If the file is stored on an \s-1NFS\s0 filesystem
460(or other network share) and the clock on the file server is not exactly
461synchronised with the clock where your script is run, updates to the source \s-1XML\s0
462file may appear to be ignored.
463.Sh "ContentKey => 'keyname' \fI# in+out \- seldom used\fP"
464.IX Subsection "ContentKey => 'keyname' # in+out - seldom used"
465When text content is parsed to a hash value, this option let's you specify a
466name for the hash key to override the default 'content'. So for example:
467.PP
468.Vb 1
469\& XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
470.Ve
471.PP
472will parse to:
473.PP
474.Vb 1
475\& { 'one' => 1, 'text' => 'Text' }
476.Ve
477.PP
478instead of:
479.PP
480.Vb 1
481\& { 'one' => 1, 'content' => 'Text' }
482.Ve
483.PP
484\&\f(CW\*(C`XMLout()\*(C'\fR will also honour the value of this option when converting a hashref
485to \s-1XML\s0.
486.PP
487You can also prefix your selected key name with a '\-' character to have
488\&\f(CW\*(C`XMLin()\*(C'\fR try a little harder to eliminate unnecessary 'content' keys after
489array folding. For example:
490.PP
491.Vb 6
492\& XMLin(
493\& '<opt><item name="one">First</item><item name="two">Second</item></opt>',
494\& KeyAttr => {item => 'name'},
495\& ForceArray => [ 'item' ],
496\& ContentKey => '-content'
497\& )
498.Ve
499.PP
500will parse to:
501.PP
502.Vb 6
503\& {
504\& 'item' => {
505\& 'one' => 'First'
506\& 'two' => 'Second'
507\& }
508\& }
509.Ve
510.PP
511rather than this (without the '\-'):
512.PP
513.Vb 6
514\& {
515\& 'item' => {
516\& 'one' => { 'content' => 'First' }
517\& 'two' => { 'content' => 'Second' }
518\& }
519\& }
520.Ve
521.Sh "DataHandler => code_ref \fI# in \- \s-1SAX\s0 only\fP"
522.IX Subsection "DataHandler => code_ref # in - SAX only"
523When you use an \fBXML::Simple\fR object as a \s-1SAX\s0 handler, it will return a
524\&'simple tree' data structure in the same format as \f(CW\*(C`XMLin()\*(C'\fR would return. If
525this option is set (to a subroutine reference), then when the tree is built the
526subroutine will be called and passed two arguments: a reference to the
527\&\fBXML::Simple\fR object and a reference to the data tree. The return value from
528the subroutine will be returned to the \s-1SAX\s0 driver. (See \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" for
529more details).
530.Sh "ForceArray => 1 \fI# in \- important\fP"
531.IX Subsection "ForceArray => 1 # in - important"
532This option should be set to '1' to force nested elements to be represented
533as arrays even when there is only one. Eg, with ForceArray enabled, this
534\&\s-1XML:\s0
535.PP
536.Vb 3
537\& <opt>
538\& <name>value</name>
539\& </opt>
540.Ve
541.PP
542would parse to this:
543.PP
544.Vb 5
545\& {
546\& 'name' => [
547\& 'value'
548\& ]
549\& }
550.Ve
551.PP
552instead of this (the default):
553.PP
554.Vb 3
555\& {
556\& 'name' => 'value'
557\& }
558.Ve
559.PP
560This option is especially useful if the data structure is likely to be written
561back out as \s-1XML\s0 and the default behaviour of rolling single nested elements up
562into attributes is not desirable.
563.PP
564If you are using the array folding feature, you should almost certainly enable
565this option. If you do not, single nested elements will not be parsed to
566arrays and therefore will not be candidates for folding to a hash. (Given that
567the default value of 'KeyAttr' enables array folding, the default value of this
568option should probably also have been enabled too \- sorry).
569.Sh "ForceArray => [ names ] \fI# in \- important\fP"
570.IX Subsection "ForceArray => [ names ] # in - important"
571This alternative (and preferred) form of the 'ForceArray' option allows you to
572specify a list of element names which should always be forced into an array
573representation, rather than the 'all or nothing' approach above.
574.PP
575It is also possible (since version 2.05) to include compiled regular
576expressions in the list \- any element names which match the pattern will be
577forced to arrays. If the list contains only a single regex, then it is not
578necessary to enclose it in an arrayref. Eg:
579.PP
580.Vb 1
581\& ForceArray => qr/_list$/
582.Ve
583.Sh "ForceContent => 1 \fI# in \- seldom used\fP"
584.IX Subsection "ForceContent => 1 # in - seldom used"
585When \f(CW\*(C`XMLin()\*(C'\fR parses elements which have text content as well as attributes,
586the text content must be represented as a hash value rather than a simple
587scalar. This option allows you to force text content to always parse to
588a hash value even when there are no attributes. So for example:
589.PP
590.Vb 1
591\& XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
592.Ve
593.PP
594will parse to:
595.PP
596.Vb 4
597\& {
598\& 'x' => { 'content' => 'text1' },
599\& 'y' => { 'a' => 2, 'content' => 'text2' }
600\& }
601.Ve
602.PP
603instead of:
604.PP
605.Vb 4
606\& {
607\& 'x' => 'text1',
608\& 'y' => { 'a' => 2, 'content' => 'text2' }
609\& }
610.Ve
611.Sh "GroupTags => { grouping tag => grouped tag } \fI# in+out \- handy\fP"
612.IX Subsection "GroupTags => { grouping tag => grouped tag } # in+out - handy"
613You can use this option to eliminate extra levels of indirection in your Perl
614data structure. For example this \s-1XML:\s0
615.PP
616.Vb 7
617\& <opt>
618\& <searchpath>
619\& <dir>/usr/bin</dir>
620\& <dir>/usr/local/bin</dir>
621\& <dir>/usr/X11/bin</dir>
622\& </searchpath>
623\& </opt>
624.Ve
625.PP
626Would normally be read into a structure like this:
627.PP
628.Vb 5
629\& {
630\& searchpath => {
631\& dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
632\& }
633\& }
634.Ve
635.PP
636But when read in with the appropriate value for 'GroupTags':
637.PP
638.Vb 1
639\& my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
640.Ve
641.PP
642It will return this simpler structure:
643.PP
644.Vb 3
645\& {
646\& searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
647\& }
648.Ve
649.PP
650The grouping element (\f(CW\*(C`<searchpath>\*(C'\fR in the example) must not contain any
651attributes or elements other than the grouped element.
652.PP
653You can specify multiple 'grouping element' to 'grouped element' mappings in
654the same hashref. If this option is combined with \f(CW\*(C`KeyAttr\*(C'\fR, the array
655folding will occur first and then the grouped element names will be eliminated.
656.PP
657\&\f(CW\*(C`XMLout\*(C'\fR will also use the grouptag mappings to re-introduce the tags around
658the grouped elements. Beware though that this will occur in all places that
659the 'grouping tag' name occurs \- you probably don't want to use the same name
660for elements as well as attributes.
661.Sh "Handler => object_ref \fI# out \- \s-1SAX\s0 only\fP"
662.IX Subsection "Handler => object_ref # out - SAX only"
663Use the 'Handler' option to have \f(CW\*(C`XMLout()\*(C'\fR generate \s-1SAX\s0 events rather than
664returning a string of \s-1XML\s0. For more details see \*(L"\s-1SAX\s0 \s-1SUPPORT\s0\*(R" below.
665.PP
666Note: the current implementation of this option generates a string of \s-1XML\s0
667and uses a \s-1SAX\s0 parser to translate it into \s-1SAX\s0 events. The normal encoding
668rules apply here \- your data must be \s-1UTF8\s0 encoded unless you specify an
669alternative encoding via the 'XMLDecl' option; and by the time the data reaches
670the handler object, it will be in \s-1UTF8\s0 form regardless of the encoding you
671supply. A future implementation of this option may generate the events
672directly.
673.Sh "KeepRoot => 1 \fI# in+out \- handy\fP"
674.IX Subsection "KeepRoot => 1 # in+out - handy"
675In its attempt to return a data structure free of superfluous detail and
676unnecessary levels of indirection, \f(CW\*(C`XMLin()\*(C'\fR normally discards the root
677element name. Setting the 'KeepRoot' option to '1' will cause the root element
678name to be retained. So after executing this code:
679.PP
680.Vb 1
681\& $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
682.Ve
683.PP
684You'll be able to reference the tempdir as
685\&\f(CW\*(C`$config\->{config}\->{tempdir}\*(C'\fR instead of the default
686\&\f(CW\*(C`$config\->{tempdir}\*(C'\fR.
687.PP
688Similarly, setting the 'KeepRoot' option to '1' will tell \f(CW\*(C`XMLout()\*(C'\fR that the
689data structure already contains a root element name and it is not necessary to
690add another.
691.Sh "KeyAttr => [ list ] \fI# in+out \- important\fP"
692.IX Subsection "KeyAttr => [ list ] # in+out - important"
693This option controls the 'array folding' feature which translates nested
694elements from an array to a hash. It also controls the 'unfolding' of hashes
695to arrays.
696.PP
697For example, this \s-1XML:\s0
698.PP
699.Vb 4
700\& <opt>
701\& <user login="grep" fullname="Gary R Epstein" />
702\& <user login="stty" fullname="Simon T Tyson" />
703\& </opt>
704.Ve
705.PP
706would, by default, parse to this:
707.PP
708.Vb 12
709\& {
710\& 'user' => [
711\& {
712\& 'login' => 'grep',
713\& 'fullname' => 'Gary R Epstein'
714\& },
715\& {
716\& 'login' => 'stty',
717\& 'fullname' => 'Simon T Tyson'
718\& }
719\& ]
720\& }
721.Ve
722.PP
723If the option 'KeyAttr => \*(L"login\*(R"' were used to specify that the 'login'
724attribute is a key, the same \s-1XML\s0 would parse to:
725.PP
726.Vb 10
727\& {
728\& 'user' => {
729\& 'stty' => {
730\& 'fullname' => 'Simon T Tyson'
731\& },
732\& 'grep' => {
733\& 'fullname' => 'Gary R Epstein'
734\& }
735\& }
736\& }
737.Ve
738.PP
739The key attribute names should be supplied in an arrayref if there is more
740than one. \f(CW\*(C`XMLin()\*(C'\fR will attempt to match attribute names in the order
741supplied. \f(CW\*(C`XMLout()\*(C'\fR will use the first attribute name supplied when
742\&'unfolding' a hash into an array.
743.PP
744Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do
745not want folding on input or unfolding on output you must setting this option
746to an empty list to disable the feature.
747.PP
748Note 2: If you wish to use this option, you should also enable the
749\&\f(CW\*(C`ForceArray\*(C'\fR option. Without 'ForceArray', a single nested element will be
750rolled up into a scalar rather than an array and therefore will not be folded
751(since only arrays get folded).
752.Sh "KeyAttr => { list } \fI# in+out \- important\fP"
753.IX Subsection "KeyAttr => { list } # in+out - important"
754This alternative (and preferred) method of specifiying the key attributes
755allows more fine grained control over which elements are folded and on which
756attributes. For example the option 'KeyAttr => { package => 'id' } will cause
757any package elements to be folded on the 'id' attribute. No other elements
758which have an 'id' attribute will be folded at all.
759.PP
760Note: \f(CW\*(C`XMLin()\*(C'\fR will generate a warning (or a fatal error in \*(L"\s-1STRICT\s0 \s-1MODE\s0\*(R")
761if this syntax is used and an element which does not have the specified key
762attribute is encountered (eg: a 'package' element without an 'id' attribute, to
763use the example above). Warnings will only be generated if \fB\-w\fR is in force.
764.PP
765Two further variations are made possible by prefixing a '+' or a '\-' character
766to the attribute name:
767.PP
768The option 'KeyAttr => { user => \*(L"+login\*(R" }' will cause this \s-1XML:\s0
769.PP
770.Vb 4
771\& <opt>
772\& <user login="grep" fullname="Gary R Epstein" />
773\& <user login="stty" fullname="Simon T Tyson" />
774\& </opt>
775.Ve
776.PP
777to parse to this data structure:
778.PP
779.Vb 12
780\& {
781\& 'user' => {
782\& 'stty' => {
783\& 'fullname' => 'Simon T Tyson',
784\& 'login' => 'stty'
785\& },
786\& 'grep' => {
787\& 'fullname' => 'Gary R Epstein',
788\& 'login' => 'grep'
789\& }
790\& }
791\& }
792.Ve
793.PP
794The '+' indicates that the value of the key attribute should be copied rather
795than moved to the folded hash key.
796.PP
797A '\-' prefix would produce this result:
798.PP
799.Vb 12
800\& {
801\& 'user' => {
802\& 'stty' => {
803\& 'fullname' => 'Simon T Tyson',
804\& '-login' => 'stty'
805\& },
806\& 'grep' => {
807\& 'fullname' => 'Gary R Epstein',
808\& '-login' => 'grep'
809\& }
810\& }
811\& }
812.Ve
813.PP
814As described earlier, \f(CW\*(C`XMLout\*(C'\fR will ignore hash keys starting with a '\-'.
815.Sh "NoAttr => 1 \fI# in+out \- handy\fP"
816.IX Subsection "NoAttr => 1 # in+out - handy"
817When used with \f(CW\*(C`XMLout()\*(C'\fR, the generated \s-1XML\s0 will contain no attributes.
818All hash key/values will be represented as nested elements instead.
819.PP
820When used with \f(CW\*(C`XMLin()\*(C'\fR, any attributes in the \s-1XML\s0 will be ignored.
821.Sh "NoEscape => 1 \fI# out \- seldom used\fP"
822.IX Subsection "NoEscape => 1 # out - seldom used"
823By default, \f(CW\*(C`XMLout()\*(C'\fR will translate the characters '<', '>', '&' and
824\&'"' to '&lt;', '&gt;', '&amp;' and '&quot' respectively. Use this option to
825suppress escaping (presumably because you've already escaped the data in some
826more sophisticated manner).
827.Sh "NoIndent => 1 \fI# out \- seldom used\fP"
828.IX Subsection "NoIndent => 1 # out - seldom used"
829Set this option to 1 to disable \f(CW\*(C`XMLout()\*(C'\fR's default 'pretty printing' mode.
830With this option enabled, the \s-1XML\s0 output will all be on one line (unless there
831are newlines in the data) \- this may be easier for downstream processing.
832.Sh "NoSort => 1 \fI# out \- seldom used\fP"
833.IX Subsection "NoSort => 1 # out - seldom used"
834Newer versions of XML::Simple sort elements and attributes alphabetically (*),
835by default. Enable this option to suppress the sorting \- possibly for
836backwards compatibility.
837.PP
838* Actually, sorting is alphabetical but 'key' attribute or element names (as in
839\&'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements
840are sorted alphabetically by the value of the key field.
841.Sh "NormaliseSpace => 0 | 1 | 2 \fI# in \- handy\fP"
842.IX Subsection "NormaliseSpace => 0 | 1 | 2 # in - handy"
843This option controls how whitespace in text content is handled. Recognised
844values for the option are:
845.IP "\(bu" 4
8460 = (default) whitespace is passed through unaltered (except of course for the
847normalisation of whitespace in attribute values which is mandated by the \s-1XML\s0
848recommendation)
849.IP "\(bu" 4
8501 = whitespace is normalised in any value used as a hash key (normalising means
851removing leading and trailing whitespace and collapsing sequences of whitespace
852characters to a single space)
853.IP "\(bu" 4
8542 = whitespace is normalised in all text content
855.PP
856Note: you can spell this option with a 'z' if that is more natural for you.
857.Sh "NSExpand => 1 \fI# in+out handy \- \s-1SAX\s0 only\fP"
858.IX Subsection "NSExpand => 1 # in+out handy - SAX only"
859This option controls namespace expansion \- the translation of element and
860attribute names of the form 'prefix:name' to '{uri}name'. For example the
861element name 'xsl:template' might be expanded to:
862\&'{http://www.w3.org/1999/XSL/Transform}template'.
863.PP
864By default, \f(CW\*(C`XMLin()\*(C'\fR will return element names and attribute names exactly as
865they appear in the \s-1XML\s0. Setting this option to 1 will cause all element and
866attribute names to be expanded to include their namespace prefix.
867.PP
868\&\fINote: You must be using a \s-1SAX\s0 parser for this option to work (ie: it does not
869work with XML::Parser)\fR.
870.PP
871This option also controls whether \f(CW\*(C`XMLout()\*(C'\fR performs the reverse translation
872from '{uri}name' back to 'prefix:name'. The default is no translation. If
873your data contains expanded names, you should set this option to 1 otherwise
874\&\f(CW\*(C`XMLout\*(C'\fR will emit \s-1XML\s0 which is not well formed.
875.PP
876\&\fINote: You must have the XML::NamespaceSupport module installed if you want
877\&\f(CI\*(C`XMLout()\*(C'\fI to translate URIs back to prefixes\fR.
878.Sh "NumericEscape => 0 | 1 | 2 \fI# out \- handy\fP"
879.IX Subsection "NumericEscape => 0 | 1 | 2 # out - handy"
880Use this option to have 'high' (non\-ASCII) characters in your Perl data
881structure converted to numeric entities (eg: &#8364;) in the \s-1XML\s0 output. Three
882levels are possible:
883.PP
8840 \- default: no numeric escaping (\s-1OK\s0 if you're writing out \s-1UTF8\s0)
885.PP
8861 \- only characters above 0xFF are escaped (ie: characters in the 0x80\-FF range are not escaped), possibly useful with \s-1ISO8859\-1\s0 output
887.PP
8882 \- all characters above 0x7F are escaped (good for plain \s-1ASCII\s0 output)
889.Sh "OutputFile => <file specifier> \fI# out \- handy\fP"
890.IX Subsection "OutputFile => <file specifier> # out - handy"
891The default behaviour of \f(CW\*(C`XMLout()\*(C'\fR is to return the \s-1XML\s0 as a string. If you
892wish to write the \s-1XML\s0 to a file, simply supply the filename using the
893\&'OutputFile' option.
894.PP
895This option also accepts an \s-1IO\s0 handle object \- especially useful in Perl 5.8.0
896and later for output using an encoding other than \s-1UTF\-8\s0, eg:
897.PP
898.Vb 2
899\& open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
900\& XMLout($ref, OutputFile => $fh);
901.Ve
902.PP
903Note, XML::Simple does not require that the object you pass in to the
904OutputFile option inherits from IO::Handle \- it simply assumes the object
905supports a \f(CW\*(C`print\*(C'\fR method.
906.Sh "ParserOpts => [ XML::Parser Options ] \fI# in \- don't use this\fP"
907.IX Subsection "ParserOpts => [ XML::Parser Options ] # in - don't use this"
908\&\fINote: This option is now officially deprecated. If you find it useful, email
909the author with an example of what you use it for. Do not use this option to
910set the ProtocolEncoding, that's just plain wrong \- fix the \s-1XML\s0\fR.
911.PP
912This option allows you to pass parameters to the constructor of the underlying
913XML::Parser object (which of course assumes you're not using \s-1SAX\s0).
914.Sh "RootName => 'string' \fI# out \- handy\fP"
915.IX Subsection "RootName => 'string' # out - handy"
916By default, when \f(CW\*(C`XMLout()\*(C'\fR generates \s-1XML\s0, the root element will be named
917\&'opt'. This option allows you to specify an alternative name.
918.PP
919Specifying either undef or the empty string for the RootName option will
920produce \s-1XML\s0 with no root elements. In most cases the resulting \s-1XML\s0 fragment
921will not be 'well formed' and therefore could not be read back in by \f(CW\*(C`XMLin()\*(C'\fR.
922Nevertheless, the option has been found to be useful in certain circumstances.
923.Sh "SearchPath => [ list ] \fI# in \- handy\fP"
924.IX Subsection "SearchPath => [ list ] # in - handy"
925If you pass \f(CW\*(C`XMLin()\*(C'\fR a filename, but the filename include no directory
926component, you can use this option to specify which directories should be
927searched to locate the file. You might use this option to search first in the
928user's home directory, then in a global directory such as /etc.
929.PP
930If a filename is provided to \f(CW\*(C`XMLin()\*(C'\fR but SearchPath is not defined, the
931file is assumed to be in the current directory.
932.PP
933If the first parameter to \f(CW\*(C`XMLin()\*(C'\fR is undefined, the default SearchPath
934will contain only the directory in which the script itself is located.
935Otherwise the default SearchPath will be empty.
936.Sh "SuppressEmpty => 1 | '' | undef \fI# in+out \- handy\fP"
937.IX Subsection "SuppressEmpty => 1 | '' | undef # in+out - handy"
938This option controls what \f(CW\*(C`XMLin()\*(C'\fR should do with empty elements (no
939attributes and no content). The default behaviour is to represent them as
940empty hashes. Setting this option to a true value (eg: 1) will cause empty
941elements to be skipped altogether. Setting the option to 'undef' or the empty
942string will cause empty elements to be represented as the undefined value or
943the empty string respectively. The latter two alternatives are a little
944easier to test for in your code than a hash with no keys.
945.PP
946The option also controls what \f(CW\*(C`XMLout()\*(C'\fR does with undefined values. Setting
947the option to undef causes undefined values to be output as empty elements
948(rather than empty attributes), it also suppresses the generation of warnings
949about undefined values. Setting the option to a true value (eg: 1) causes
950undefined values to be skipped altogether on output.
951.Sh "ValueAttr => [ names ] \fI# in \- handy\fP"
952.IX Subsection "ValueAttr => [ names ] # in - handy"
953Use this option to deal elements which always have a single attribute and no
954content. Eg:
955.PP
956.Vb 4
957\& <opt>
958\& <colour value="red" />
959\& <size value="XXL" />
960\& </opt>
961.Ve
962.PP
963Setting \f(CW\*(C`ValueAttr => [ 'value' ]\*(C'\fR will cause the above \s-1XML\s0 to parse to:
964.PP
965.Vb 4
966\& {
967\& colour => 'red',
968\& size => 'XXL'
969\& }
970.Ve
971.PP
972instead of this (the default):
973.PP
974.Vb 4
975\& {
976\& colour => { value => 'red' },
977\& size => { value => 'XXL' }
978\& }
979.Ve
980.PP
981Note: This form of the ValueAttr option is not compatible with \f(CW\*(C`XMLout()\*(C'\fR \-
982since the attribute name is discarded at parse time, the original \s-1XML\s0 cannot be
983reconstructed.
984.Sh "ValueAttr => { element => attribute, ... } \fI# in+out \- handy\fP"
985.IX Subsection "ValueAttr => { element => attribute, ... } # in+out - handy"
986This (preferred) form of the ValueAttr option requires you to specify both
987the element and the attribute names. This is not only safer, it also allows
988the original \s-1XML\s0 to be reconstructed by \f(CW\*(C`XMLout()\*(C'\fR.
989.PP
990Note: You probably don't want to use this option and the NoAttr option at the
991same time.
992.Sh "Variables => { name => value } \fI# in \- handy\fP"
993.IX Subsection "Variables => { name => value } # in - handy"
994This option allows variables in the \s-1XML\s0 to be expanded when the file is read.
995(there is no facility for putting the variable names back if you regenerate
996\&\s-1XML\s0 using \f(CW\*(C`XMLout\*(C'\fR).
997.PP
998A 'variable' is any text of the form \f(CW\*(C`${name}\*(C'\fR which occurs in an attribute
999value or in the text content of an element. If 'name' matches a key in the
1000supplied hashref, \f(CW\*(C`${name}\*(C'\fR will be replaced with the corresponding value from
1001the hashref. If no matching key is found, the variable will not be replaced.
1002Names must match the regex: \f(CW\*(C`[\ew.]+\*(C'\fR (ie: only 'word' characters and dots are
1003allowed).
1004.Sh "VarAttr => 'attr_name' \fI# in \- handy\fP"
1005.IX Subsection "VarAttr => 'attr_name' # in - handy"
1006In addition to the variables defined using \f(CW\*(C`Variables\*(C'\fR, this option allows
1007variables to be defined in the \s-1XML\s0. A variable definition consists of an
1008element with an attribute called 'attr_name' (the value of the \f(CW\*(C`VarAttr\*(C'\fR
1009option). The value of the attribute will be used as the variable name and the
1010text content of the element will be used as the value. A variable defined in
1011this way will override a variable defined using the \f(CW\*(C`Variables\*(C'\fR option. For
1012example:
1013.PP
1014.Vb 7
1015\& XMLin( '<opt>
1016\& <dir name="prefix">/usr/local/apache</dir>
1017\& <dir name="exec_prefix">${prefix}</dir>
1018\& <dir name="bindir">${exec_prefix}/bin</dir>
1019\& </opt>',
1020\& VarAttr => 'name', ContentKey => '-content'
1021\& );
1022.Ve
1023.PP
1024produces the following data structure:
1025.PP
1026.Vb 7
1027\& {
1028\& dir => {
1029\& prefix => '/usr/local/apache',
1030\& exec_prefix => '/usr/local/apache',
1031\& bindir => '/usr/local/apache/bin',
1032\& }
1033\& }
1034.Ve
1035.Sh "XMLDecl => 1 or XMLDecl => 'string' \fI# out \- handy\fP"
1036.IX Subsection "XMLDecl => 1 or XMLDecl => 'string' # out - handy"
1037If you want the output from \f(CW\*(C`XMLout()\*(C'\fR to start with the optional \s-1XML\s0
1038declaration, simply set the option to '1'. The default \s-1XML\s0 declaration is:
1039.PP
1040.Vb 1
1041\& <?xml version='1.0' standalone='yes'?>
1042.Ve
1043.PP
1044If you want some other string (for example to declare an encoding value), set
1045the value of this option to the complete string you require.
1046.SH "OPTIONAL OO INTERFACE"
1047.IX Header "OPTIONAL OO INTERFACE"
1048The procedural interface is both simple and convenient however there are a
1049couple of reasons why you might prefer to use the object oriented (\s-1OO\s0)
1050interface:
1051.IP "\(bu" 4
1052to define a set of default values which should be used on all subsequent calls
1053to \f(CW\*(C`XMLin()\*(C'\fR or \f(CW\*(C`XMLout()\*(C'\fR
1054.IP "\(bu" 4
1055to override methods in \fBXML::Simple\fR to provide customised behaviour
1056.PP
1057The default values for the options described above are unlikely to suit
1058everyone. The \s-1OO\s0 interface allows you to effectively override \fBXML::Simple\fR's
1059defaults with your preferred values. It works like this:
1060.PP
1061First create an XML::Simple parser object with your preferred defaults:
1062.PP
1063.Vb 1
1064\& my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
1065.Ve
1066.PP
1067then call \f(CW\*(C`XMLin()\*(C'\fR or \f(CW\*(C`XMLout()\*(C'\fR as a method of that object:
1068.PP
1069.Vb 2
1070\& my $ref = $xs->XMLin($xml);
1071\& my $xml = $xs->XMLout($ref);
1072.Ve
1073.PP
1074You can also specify options when you make the method calls and these values
1075will be merged with the values specified when the object was created. Values
1076specified in a method call take precedence.
1077.PP
1078Overriding methods is a more advanced topic but might be useful if for example
1079you wished to provide an alternative routine for escaping character data (the
1080escape_value method) or for building the initial parse tree (the build_tree
1081method).
1082.PP
1083Note: when called as methods, the \f(CW\*(C`XMLin()\*(C'\fR and \f(CW\*(C`XMLout()\*(C'\fR routines may be
1084called as \f(CW\*(C`xml_in()\*(C'\fR or \f(CW\*(C`xml_out()\*(C'\fR. The method names are aliased so the
1085only difference is the aesthetics.
1086.SH "STRICT MODE"
1087.IX Header "STRICT MODE"
1088If you import the \fBXML::Simple\fR routines like this:
1089.PP
1090.Vb 1
1091\& use XML::Simple qw(:strict);
1092.Ve
1093.PP
1094the following common mistakes will be detected and treated as fatal errors
1095.IP "\(bu" 4
1096Failing to explicitly set the \f(CW\*(C`KeyAttr\*(C'\fR option \- if you can't be bothered
1097reading about this option, turn it off with: KeyAttr => [ ]
1098.IP "\(bu" 4
1099Failing to explicitly set the \f(CW\*(C`ForceArray\*(C'\fR option \- if you can't be bothered
1100reading about this option, set it to the safest mode with: ForceArray => 1
1101.IP "\(bu" 4
1102Setting ForceArray to an array, but failing to list all the elements from the
1103KeyAttr hash.
1104.IP "\(bu" 4
1105Data error \- KeyAttr is set to say { part => 'partnum' } but the \s-1XML\s0 contains
1106one or more <part> elements without a 'partnum' attribute (or nested
1107element). Note: if strict mode is not set but \-w is, this condition triggers a
1108warning.
1109.IP "\(bu" 4
1110Data error \- as above, but value of key attribute (eg: partnum) is not a
1111scalar string (due to nested elements etc). This will also trigger a warning
1112if strict mode is not enabled.
1113.SH "SAX SUPPORT"
1114.IX Header "SAX SUPPORT"
1115From version 1.08_01, \fBXML::Simple\fR includes support for \s-1SAX\s0 (the Simple \s-1API\s0
1116for \s-1XML\s0) \- specifically \s-1SAX2\s0.
1117.PP
1118In a typical \s-1SAX\s0 application, an \s-1XML\s0 parser (or \s-1SAX\s0 'driver') module generates
1119\&\s-1SAX\s0 events (start of element, character data, end of element, etc) as it parses
1120an \s-1XML\s0 document and a 'handler' module processes the events to extract the
1121required data. This simple model allows for some interesting and powerful
1122possibilities:
1123.IP "\(bu" 4
1124Applications written to the \s-1SAX\s0 \s-1API\s0 can extract data from huge \s-1XML\s0 documents
1125without the memory overheads of a \s-1DOM\s0 or tree \s-1API\s0.
1126.IP "\(bu" 4
1127The \s-1SAX\s0 \s-1API\s0 allows for plug and play interchange of parser modules without
1128having to change your code to fit a new module's \s-1API\s0. A number of \s-1SAX\s0 parsers
1129are available with capabilities ranging from extreme portability to blazing
1130performance.
1131.IP "\(bu" 4
1132A \s-1SAX\s0 'filter' module can implement both a handler interface for receiving
1133data and a generator interface for passing modified data on to a downstream
1134handler. Filters can be chained together in 'pipelines'.
1135.IP "\(bu" 4
1136One filter module might split a data stream to direct data to two or more
1137downstream handlers.
1138.IP "\(bu" 4
1139Generating \s-1SAX\s0 events is not the exclusive preserve of \s-1XML\s0 parsing modules.
1140For example, a module might extract data from a relational database using \s-1DBI\s0
1141and pass it on to a \s-1SAX\s0 pipeline for filtering and formatting.
1142.PP
1143\&\fBXML::Simple\fR can operate at either end of a \s-1SAX\s0 pipeline. For example,
1144you can take a data structure in the form of a hashref and pass it into a
1145\&\s-1SAX\s0 pipeline using the 'Handler' option on \f(CW\*(C`XMLout()\*(C'\fR:
1146.PP
1147.Vb 3
1148\& use XML::Simple;
1149\& use Some::SAX::Filter;
1150\& use XML::SAX::Writer;
1151.Ve
1152.PP
1153.Vb 3
1154\& my $ref = {
1155\& .... # your data here
1156\& };
1157.Ve
1158.PP
1159.Vb 4
1160\& my $writer = XML::SAX::Writer->new();
1161\& my $filter = Some::SAX::Filter->new(Handler => $writer);
1162\& my $simple = XML::Simple->new(Handler => $filter);
1163\& $simple->XMLout($ref);
1164.Ve
1165.PP
1166You can also put \fBXML::Simple\fR at the opposite end of the pipeline to take
1167advantage of the simple 'tree' data structure once the relevant data has been
1168isolated through filtering:
1169.PP
1170.Vb 3
1171\& use XML::SAX;
1172\& use Some::SAX::Filter;
1173\& use XML::Simple;
1174.Ve
1175.PP
1176.Vb 3
1177\& my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
1178\& my $filter = Some::SAX::Filter->new(Handler => $simple);
1179\& my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
1180.Ve
1181.PP
1182.Vb 1
1183\& my $ref = $parser->parse_uri('some_huge_file.xml');
1184.Ve
1185.PP
1186.Vb 1
1187\& print $ref->{part}->{'555-1234'};
1188.Ve
1189.PP
1190You can build a filter by using an XML::Simple object as a handler and setting
1191its DataHandler option to point to a routine which takes the resulting tree,
1192modifies it and sends it off as \s-1SAX\s0 events to a downstream handler:
1193.PP
1194.Vb 5
1195\& my $writer = XML::SAX::Writer->new();
1196\& my $filter = XML::Simple->new(
1197\& DataHandler => sub {
1198\& my $simple = shift;
1199\& my $data = shift;
1200.Ve
1201.PP
1202.Vb 1
1203\& # Modify $data here
1204.Ve
1205.PP
1206.Vb 4
1207\& $simple->XMLout($data, Handler => $writer);
1208\& }
1209\& );
1210\& my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
1211.Ve
1212.PP
1213.Vb 1
1214\& $parser->parse_uri($filename);
1215.Ve
1216.PP
1217\&\fINote: In this last example, the 'Handler' option was specified in the call to
1218\&\f(CI\*(C`XMLout()\*(C'\fI but it could also have been specified in the constructor\fR.
1219.SH "ENVIRONMENT"
1220.IX Header "ENVIRONMENT"
1221If you don't care which parser module \fBXML::Simple\fR uses then skip this
1222section entirely (it looks more complicated than it really is).
1223.PP
1224\&\fBXML::Simple\fR will default to using a \fB\s-1SAX\s0\fR parser if one is available or
1225\&\fBXML::Parser\fR if \s-1SAX\s0 is not available.
1226.PP
1227You can dictate which parser module is used by setting either the environment
1228variable '\s-1XML_SIMPLE_PREFERRED_PARSER\s0' or the package variable
1229\&\f(CW$XML::Simple::PREFERRED_PARSER\fR to contain the module name. The following rules
1230are used:
1231.IP "\(bu" 4
1232The package variable takes precedence over the environment variable if both are defined. To force \fBXML::Simple\fR to ignore the environment settings and use
1233its default rules, you can set the package variable to an empty string.
1234.IP "\(bu" 4
1235If the 'preferred parser' is set to the string 'XML::Parser', then
1236XML::Parser will be used (or \f(CW\*(C`XMLin()\*(C'\fR will die if XML::Parser is not
1237installed).
1238.IP "\(bu" 4
1239If the 'preferred parser' is set to some other value, then it is assumed to be
1240the name of a \s-1SAX\s0 parser module and is passed to XML::SAX::ParserFactory.
1241If \s-1XML::SAX\s0 is not installed, or the requested parser module is not
1242installed, then \f(CW\*(C`XMLin()\*(C'\fR will die.
1243.IP "\(bu" 4
1244If the 'preferred parser' is not defined at all (the normal default
1245state), an attempt will be made to load \s-1XML::SAX\s0. If \s-1XML::SAX\s0 is
1246installed, then a parser module will be selected according to
1247XML::SAX::ParserFactory's normal rules (which typically means the last \s-1SAX\s0
1248parser installed).
1249.IP "\(bu" 4
1250if the 'preferred parser' is not defined and \fB\s-1XML::SAX\s0\fR is not
1251installed, then \fBXML::Parser\fR will be used. \f(CW\*(C`XMLin()\*(C'\fR will die if
1252XML::Parser is not installed.
1253.PP
1254Note: The \fB\s-1XML::SAX\s0\fR distribution includes an \s-1XML\s0 parser written entirely in
1255Perl. It is very portable but it is not very fast. You should consider
1256installing XML::LibXML or XML::SAX::Expat if they are available for your
1257platform.
1258.SH "ERROR HANDLING"
1259.IX Header "ERROR HANDLING"
1260The \s-1XML\s0 standard is very clear on the issue of non-compliant documents. An
1261error in parsing any single element (for example a missing end tag) must cause
1262the whole document to be rejected. \fBXML::Simple\fR will die with an appropriate
1263message if it encounters a parsing error.
1264.PP
1265If dying is not appropriate for your application, you should arrange to call
1266\&\f(CW\*(C`XMLin()\*(C'\fR in an eval block and look for errors in $@. eg:
1267.PP
1268.Vb 2
1269\& my $config = eval { XMLin() };
1270\& PopUpMessage($@) if($@);
1271.Ve
1272.PP
1273Note, there is a common misconception that use of \fBeval\fR will significantly
1274slow down a script. While that may be true when the code being eval'd is in a
1275string, it is not true of code like the sample above.
1276.SH "EXAMPLES"
1277.IX Header "EXAMPLES"
1278When \f(CW\*(C`XMLin()\*(C'\fR reads the following very simple piece of \s-1XML:\s0
1279.PP
1280.Vb 1
1281\& <opt username="testuser" password="frodo"></opt>
1282.Ve
1283.PP
1284it returns the following data structure:
1285.PP
1286.Vb 4
1287\& {
1288\& 'username' => 'testuser',
1289\& 'password' => 'frodo'
1290\& }
1291.Ve
1292.PP
1293The identical result could have been produced with this alternative \s-1XML:\s0
1294.PP
1295.Vb 1
1296\& <opt username="testuser" password="frodo" />
1297.Ve
1298.PP
1299Or this (although see 'ForceArray' option for variations):
1300.PP
1301.Vb 4
1302\& <opt>
1303\& <username>testuser</username>
1304\& <password>frodo</password>
1305\& </opt>
1306.Ve
1307.PP
1308Repeated nested elements are represented as anonymous arrays:
1309.PP
1310.Vb 9
1311\& <opt>
1312\& <person firstname="Joe" lastname="Smith">
1313\& <email>joe@smith.com</email>
1314\& <email>jsmith@yahoo.com</email>
1315\& </person>
1316\& <person firstname="Bob" lastname="Smith">
1317\& <email>bob@smith.com</email>
1318\& </person>
1319\& </opt>
1320.Ve
1321.PP
1322.Vb 17
1323\& {
1324\& 'person' => [
1325\& {
1326\& 'email' => [
1327\& 'joe@smith.com',
1328\& 'jsmith@yahoo.com'
1329\& ],
1330\& 'firstname' => 'Joe',
1331\& 'lastname' => 'Smith'
1332\& },
1333\& {
1334\& 'email' => 'bob@smith.com',
1335\& 'firstname' => 'Bob',
1336\& 'lastname' => 'Smith'
1337\& }
1338\& ]
1339\& }
1340.Ve
1341.PP
1342Nested elements with a recognised key attribute are transformed (folded) from
1343an array into a hash keyed on the value of that attribute (see the \f(CW\*(C`KeyAttr\*(C'\fR
1344option):
1345.PP
1346.Vb 5
1347\& <opt>
1348\& <person key="jsmith" firstname="Joe" lastname="Smith" />
1349\& <person key="tsmith" firstname="Tom" lastname="Smith" />
1350\& <person key="jbloggs" firstname="Joe" lastname="Bloggs" />
1351\& </opt>
1352.Ve
1353.PP
1354.Vb 16
1355\& {
1356\& 'person' => {
1357\& 'jbloggs' => {
1358\& 'firstname' => 'Joe',
1359\& 'lastname' => 'Bloggs'
1360\& },
1361\& 'tsmith' => {
1362\& 'firstname' => 'Tom',
1363\& 'lastname' => 'Smith'
1364\& },
1365\& 'jsmith' => {
1366\& 'firstname' => 'Joe',
1367\& 'lastname' => 'Smith'
1368\& }
1369\& }
1370\& }
1371.Ve
1372.PP
1373The <anon> tag can be used to form anonymous arrays:
1374.PP
1375.Vb 6
1376\& <opt>
1377\& <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
1378\& <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
1379\& <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
1380\& <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
1381\& </opt>
1382.Ve
1383.PP
1384.Vb 10
1385\& {
1386\& 'head' => [
1387\& [ 'Col 1', 'Col 2', 'Col 3' ]
1388\& ],
1389\& 'data' => [
1390\& [ 'R1C1', 'R1C2', 'R1C3' ],
1391\& [ 'R2C1', 'R2C2', 'R2C3' ],
1392\& [ 'R3C1', 'R3C2', 'R3C3' ]
1393\& ]
1394\& }
1395.Ve
1396.PP
1397Anonymous arrays can be nested to arbirtrary levels and as a special case, if
1398the surrounding tags for an \s-1XML\s0 document contain only an anonymous array the
1399arrayref will be returned directly rather than the usual hashref:
1400.PP
1401.Vb 5
1402\& <opt>
1403\& <anon><anon>Col 1</anon><anon>Col 2</anon></anon>
1404\& <anon><anon>R1C1</anon><anon>R1C2</anon></anon>
1405\& <anon><anon>R2C1</anon><anon>R2C2</anon></anon>
1406\& </opt>
1407.Ve
1408.PP
1409.Vb 5
1410\& [
1411\& [ 'Col 1', 'Col 2' ],
1412\& [ 'R1C1', 'R1C2' ],
1413\& [ 'R2C1', 'R2C2' ]
1414\& ]
1415.Ve
1416.PP
1417Elements which only contain text content will simply be represented as a
1418scalar. Where an element has both attributes and text content, the element
1419will be represented as a hashref with the text content in the 'content' key
1420(see the \f(CW\*(C`ContentKey\*(C'\fR option):
1421.PP
1422.Vb 4
1423\& <opt>
1424\& <one>first</one>
1425\& <two attr="value">second</two>
1426\& </opt>
1427.Ve
1428.PP
1429.Vb 4
1430\& {
1431\& 'one' => 'first',
1432\& 'two' => { 'attr' => 'value', 'content' => 'second' }
1433\& }
1434.Ve
1435.PP
1436Mixed content (elements which contain both text content and nested elements)
1437will be not be represented in a useful way \- element order and significant
1438whitespace will be lost. If you need to work with mixed content, then
1439XML::Simple is not the right tool for your job \- check out the next section.
1440.SH "WHERE TO FROM HERE?"
1441.IX Header "WHERE TO FROM HERE?"
1442\&\fBXML::Simple\fR is able to present a simple \s-1API\s0 because it makes some
1443assumptions on your behalf. These include:
1444.IP "\(bu" 4
1445You're not interested in text content consisting only of whitespace
1446.IP "\(bu" 4
1447You don't mind that when things get slurped into a hash the order is lost
1448.IP "\(bu" 4
1449You don't want fine-grained control of the formatting of generated \s-1XML\s0
1450.IP "\(bu" 4
1451You would never use a hash key that was not a legal \s-1XML\s0 element name
1452.IP "\(bu" 4
1453You don't need help converting between different encodings
1454.PP
1455In a serious \s-1XML\s0 project, you'll probably outgrow these assumptions fairly
1456quickly. This section of the document used to offer some advice on chosing a
1457more powerful option. That advice has now grown into the 'Perl\-XML \s-1FAQ\s0'
1458document which you can find at: <http://perl\-xml.sourceforge.net/faq/>
1459.PP
1460The advice in the \s-1FAQ\s0 boils down to a quick explanation of tree versus
1461event based parsers and then recommends:
1462.PP
1463For event based parsing, use \s-1SAX\s0 (do not set out to write any new code for
1464XML::Parser's handler \s-1API\s0 \- it is obselete).
1465.PP
1466For tree-based parsing, you could choose between the 'Perlish' approach of
1467XML::Twig and more standards based \s-1DOM\s0 implementations \- preferably one with
1468XPath support.
1469.SH "SEE ALSO"
1470.IX Header "SEE ALSO"
1471\&\fBXML::Simple\fR requires either XML::Parser or \s-1XML::SAX\s0.
1472.PP
1473To generate documents with namespaces, XML::NamespaceSupport is required.
1474.PP
1475The optional caching functions require Storable.
1476.PP
1477Answers to Frequently Asked Questions about XML::Simple are bundled with this
1478distribution as: XML::Simple::FAQ
1479.SH "COPYRIGHT"
1480.IX Header "COPYRIGHT"
1481Copyright 1999\-2004 Grant McLean <grantm@cpan.org>
1482.PP
1483This library is free software; you can redistribute it and/or modify it
1484under the same terms as Perl itself.