Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # $Id: Tree.pm,v 1.2 2003/07/31 07:54:51 matt Exp $ |
2 | ||
3 | package XML::Parser::Style::Tree; | |
4 | $XML::Parser::Built_In_Styles{Tree} = 1; | |
5 | ||
6 | sub Init { | |
7 | my $expat = shift; | |
8 | $expat->{Lists} = []; | |
9 | $expat->{Curlist} = $expat->{Tree} = []; | |
10 | } | |
11 | ||
12 | sub Start { | |
13 | my $expat = shift; | |
14 | my $tag = shift; | |
15 | my $newlist = [ { @_ } ]; | |
16 | push @{ $expat->{Lists} }, $expat->{Curlist}; | |
17 | push @{ $expat->{Curlist} }, $tag => $newlist; | |
18 | $expat->{Curlist} = $newlist; | |
19 | } | |
20 | ||
21 | sub End { | |
22 | my $expat = shift; | |
23 | my $tag = shift; | |
24 | $expat->{Curlist} = pop @{ $expat->{Lists} }; | |
25 | } | |
26 | ||
27 | sub Char { | |
28 | my $expat = shift; | |
29 | my $text = shift; | |
30 | my $clist = $expat->{Curlist}; | |
31 | my $pos = $#$clist; | |
32 | ||
33 | if ($pos > 0 and $clist->[$pos - 1] eq '0') { | |
34 | $clist->[$pos] .= $text; | |
35 | } else { | |
36 | push @$clist, 0 => $text; | |
37 | } | |
38 | } | |
39 | ||
40 | sub Final { | |
41 | my $expat = shift; | |
42 | delete $expat->{Curlist}; | |
43 | delete $expat->{Lists}; | |
44 | $expat->{Tree}; | |
45 | } | |
46 | ||
47 | 1; | |
48 | __END__ | |
49 | ||
50 | =head1 NAME | |
51 | ||
52 | XML::Parser::Style::Tree | |
53 | ||
54 | =head1 SYNOPSIS | |
55 | ||
56 | use XML::Parser; | |
57 | my $p = XML::Parser->new(Style => 'Tree'); | |
58 | my $tree = $p->parsefile('foo.xml'); | |
59 | ||
60 | =head1 DESCRIPTION | |
61 | ||
62 | This module implements XML::Parser's Tree style parser. | |
63 | ||
64 | When parsing a document, C<parse()> will return a parse tree for the | |
65 | document. Each node in the tree | |
66 | takes the form of a tag, content pair. Text nodes are represented with | |
67 | a pseudo-tag of "0" and the string that is their content. For elements, | |
68 | the content is an array reference. The first item in the array is a | |
69 | (possibly empty) hash reference containing attributes. The remainder of | |
70 | the array is a sequence of tag-content pairs representing the content | |
71 | of the element. | |
72 | ||
73 | So for example the result of parsing: | |
74 | ||
75 | <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> | |
76 | ||
77 | would be: | |
78 | Tag Content | |
79 | ================================================================== | |
80 | [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], | |
81 | bar, [ {}, 0, "Howdy", ref, [{}]], | |
82 | 0, "do" | |
83 | ] | |
84 | ] | |
85 | ||
86 | The root document "foo", has 3 children: a "head" element, a "bar" | |
87 | element and the text "do". After the empty attribute hash, these are | |
88 | represented in it's contents by 3 tag-content pairs. | |
89 | ||
90 | =cut |