Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # Very simple test - Parse a file and print what happens |
2 | ||
3 | # XXX TypeErrors on calling handlers, or on bad return values from a | |
4 | # handler, are obscure and unhelpful. | |
5 | ||
6 | import pyexpat | |
7 | from xml.parsers import expat | |
8 | ||
9 | from test.test_support import sortdict, TestFailed | |
10 | ||
11 | class Outputter: | |
12 | def StartElementHandler(self, name, attrs): | |
13 | print 'Start element:\n\t', repr(name), sortdict(attrs) | |
14 | ||
15 | def EndElementHandler(self, name): | |
16 | print 'End element:\n\t', repr(name) | |
17 | ||
18 | def CharacterDataHandler(self, data): | |
19 | data = data.strip() | |
20 | if data: | |
21 | print 'Character data:' | |
22 | print '\t', repr(data) | |
23 | ||
24 | def ProcessingInstructionHandler(self, target, data): | |
25 | print 'PI:\n\t', repr(target), repr(data) | |
26 | ||
27 | def StartNamespaceDeclHandler(self, prefix, uri): | |
28 | print 'NS decl:\n\t', repr(prefix), repr(uri) | |
29 | ||
30 | def EndNamespaceDeclHandler(self, prefix): | |
31 | print 'End of NS decl:\n\t', repr(prefix) | |
32 | ||
33 | def StartCdataSectionHandler(self): | |
34 | print 'Start of CDATA section' | |
35 | ||
36 | def EndCdataSectionHandler(self): | |
37 | print 'End of CDATA section' | |
38 | ||
39 | def CommentHandler(self, text): | |
40 | print 'Comment:\n\t', repr(text) | |
41 | ||
42 | def NotationDeclHandler(self, *args): | |
43 | name, base, sysid, pubid = args | |
44 | print 'Notation declared:', args | |
45 | ||
46 | def UnparsedEntityDeclHandler(self, *args): | |
47 | entityName, base, systemId, publicId, notationName = args | |
48 | print 'Unparsed entity decl:\n\t', args | |
49 | ||
50 | def NotStandaloneHandler(self, userData): | |
51 | print 'Not standalone' | |
52 | return 1 | |
53 | ||
54 | def ExternalEntityRefHandler(self, *args): | |
55 | context, base, sysId, pubId = args | |
56 | print 'External entity ref:', args[1:] | |
57 | return 1 | |
58 | ||
59 | def DefaultHandler(self, userData): | |
60 | pass | |
61 | ||
62 | def DefaultHandlerExpand(self, userData): | |
63 | pass | |
64 | ||
65 | ||
66 | def confirm(ok): | |
67 | if ok: | |
68 | print "OK." | |
69 | else: | |
70 | print "Not OK." | |
71 | ||
72 | out = Outputter() | |
73 | parser = expat.ParserCreate(namespace_separator='!') | |
74 | ||
75 | # Test getting/setting returns_unicode | |
76 | parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) | |
77 | parser.returns_unicode = 1; confirm(parser.returns_unicode == 1) | |
78 | parser.returns_unicode = 2; confirm(parser.returns_unicode == 1) | |
79 | parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) | |
80 | ||
81 | # Test getting/setting ordered_attributes | |
82 | parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0) | |
83 | parser.ordered_attributes = 1; confirm(parser.ordered_attributes == 1) | |
84 | parser.ordered_attributes = 2; confirm(parser.ordered_attributes == 1) | |
85 | parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0) | |
86 | ||
87 | # Test getting/setting specified_attributes | |
88 | parser.specified_attributes = 0; confirm(parser.specified_attributes == 0) | |
89 | parser.specified_attributes = 1; confirm(parser.specified_attributes == 1) | |
90 | parser.specified_attributes = 2; confirm(parser.specified_attributes == 1) | |
91 | parser.specified_attributes = 0; confirm(parser.specified_attributes == 0) | |
92 | ||
93 | HANDLER_NAMES = [ | |
94 | 'StartElementHandler', 'EndElementHandler', | |
95 | 'CharacterDataHandler', 'ProcessingInstructionHandler', | |
96 | 'UnparsedEntityDeclHandler', 'NotationDeclHandler', | |
97 | 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', | |
98 | 'CommentHandler', 'StartCdataSectionHandler', | |
99 | 'EndCdataSectionHandler', | |
100 | 'DefaultHandler', 'DefaultHandlerExpand', | |
101 | #'NotStandaloneHandler', | |
102 | 'ExternalEntityRefHandler' | |
103 | ] | |
104 | for name in HANDLER_NAMES: | |
105 | setattr(parser, name, getattr(out, name)) | |
106 | ||
107 | data = '''\ | |
108 | <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> | |
109 | <?xml-stylesheet href="stylesheet.css"?> | |
110 | <!-- comment data --> | |
111 | <!DOCTYPE quotations SYSTEM "quotations.dtd" [ | |
112 | <!ELEMENT root ANY> | |
113 | <!NOTATION notation SYSTEM "notation.jpeg"> | |
114 | <!ENTITY acirc "â"> | |
115 | <!ENTITY external_entity SYSTEM "entity.file"> | |
116 | <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> | |
117 | %unparsed_entity; | |
118 | ]> | |
119 | ||
120 | <root attr1="value1" attr2="value2ὀ"> | |
121 | <myns:subelement xmlns:myns="http://www.python.org/namespace"> | |
122 | Contents of subelements | |
123 | </myns:subelement> | |
124 | <sub2><![CDATA[contents of CDATA section]]></sub2> | |
125 | &external_entity; | |
126 | </root> | |
127 | ''' | |
128 | ||
129 | # Produce UTF-8 output | |
130 | parser.returns_unicode = 0 | |
131 | try: | |
132 | parser.Parse(data, 1) | |
133 | except expat.error: | |
134 | print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) | |
135 | print '** Line', parser.ErrorLineNumber | |
136 | print '** Column', parser.ErrorColumnNumber | |
137 | print '** Byte', parser.ErrorByteIndex | |
138 | ||
139 | # Try the parse again, this time producing Unicode output | |
140 | parser = expat.ParserCreate(namespace_separator='!') | |
141 | parser.returns_unicode = 1 | |
142 | ||
143 | for name in HANDLER_NAMES: | |
144 | setattr(parser, name, getattr(out, name)) | |
145 | try: | |
146 | parser.Parse(data, 1) | |
147 | except expat.error: | |
148 | print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) | |
149 | print '** Line', parser.ErrorLineNumber | |
150 | print '** Column', parser.ErrorColumnNumber | |
151 | print '** Byte', parser.ErrorByteIndex | |
152 | ||
153 | # Try parsing a file | |
154 | parser = expat.ParserCreate(namespace_separator='!') | |
155 | parser.returns_unicode = 1 | |
156 | ||
157 | for name in HANDLER_NAMES: | |
158 | setattr(parser, name, getattr(out, name)) | |
159 | import StringIO | |
160 | file = StringIO.StringIO(data) | |
161 | try: | |
162 | parser.ParseFile(file) | |
163 | except expat.error: | |
164 | print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) | |
165 | print '** Line', parser.ErrorLineNumber | |
166 | print '** Column', parser.ErrorColumnNumber | |
167 | print '** Byte', parser.ErrorByteIndex | |
168 | ||
169 | ||
170 | # Tests that make sure we get errors when the namespace_separator value | |
171 | # is illegal, and that we don't for good values: | |
172 | ||
173 | print "Testing constructor for proper handling of namespace_separator values:" | |
174 | expat.ParserCreate() | |
175 | expat.ParserCreate(namespace_separator=None) | |
176 | expat.ParserCreate(namespace_separator=' ') | |
177 | print "Legal values tested o.k." | |
178 | try: | |
179 | expat.ParserCreate(namespace_separator=42) | |
180 | except TypeError, e: | |
181 | print "Caught expected TypeError:" | |
182 | print e | |
183 | else: | |
184 | print "Failed to catch expected TypeError." | |
185 | ||
186 | try: | |
187 | expat.ParserCreate(namespace_separator='too long') | |
188 | except ValueError, e: | |
189 | print "Caught expected ValueError:" | |
190 | print e | |
191 | else: | |
192 | print "Failed to catch expected ValueError." | |
193 | ||
194 | # ParserCreate() needs to accept a namespace_separator of zero length | |
195 | # to satisfy the requirements of RDF applications that are required | |
196 | # to simply glue together the namespace URI and the localname. Though | |
197 | # considered a wart of the RDF specifications, it needs to be supported. | |
198 | # | |
199 | # See XML-SIG mailing list thread starting with | |
200 | # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html | |
201 | # | |
202 | expat.ParserCreate(namespace_separator='') # too short | |
203 | ||
204 | # Test the interning machinery. | |
205 | p = expat.ParserCreate() | |
206 | L = [] | |
207 | def collector(name, *args): | |
208 | L.append(name) | |
209 | p.StartElementHandler = collector | |
210 | p.EndElementHandler = collector | |
211 | p.Parse("<e> <e/> <e></e> </e>", 1) | |
212 | tag = L[0] | |
213 | if len(L) != 6: | |
214 | print "L should only contain 6 entries; found", len(L) | |
215 | for entry in L: | |
216 | if tag is not entry: | |
217 | print "expected L to contain many references to the same string", | |
218 | print "(it didn't)" | |
219 | print "L =", repr(L) | |
220 | break | |
221 | ||
222 | # Tests of the buffer_text attribute. | |
223 | import sys | |
224 | ||
225 | class TextCollector: | |
226 | def __init__(self, parser): | |
227 | self.stuff = [] | |
228 | ||
229 | def check(self, expected, label): | |
230 | require(self.stuff == expected, | |
231 | "%s\nstuff = %r\nexpected = %r" | |
232 | % (label, self.stuff, map(unicode, expected))) | |
233 | ||
234 | def CharacterDataHandler(self, text): | |
235 | self.stuff.append(text) | |
236 | ||
237 | def StartElementHandler(self, name, attrs): | |
238 | self.stuff.append("<%s>" % name) | |
239 | bt = attrs.get("buffer-text") | |
240 | if bt == "yes": | |
241 | parser.buffer_text = 1 | |
242 | elif bt == "no": | |
243 | parser.buffer_text = 0 | |
244 | ||
245 | def EndElementHandler(self, name): | |
246 | self.stuff.append("</%s>" % name) | |
247 | ||
248 | def CommentHandler(self, data): | |
249 | self.stuff.append("<!--%s-->" % data) | |
250 | ||
251 | def require(cond, label): | |
252 | # similar to confirm(), but no extraneous output | |
253 | if not cond: | |
254 | raise TestFailed(label) | |
255 | ||
256 | def setup(handlers=[]): | |
257 | parser = expat.ParserCreate() | |
258 | require(not parser.buffer_text, | |
259 | "buffer_text not disabled by default") | |
260 | parser.buffer_text = 1 | |
261 | handler = TextCollector(parser) | |
262 | parser.CharacterDataHandler = handler.CharacterDataHandler | |
263 | for name in handlers: | |
264 | setattr(parser, name, getattr(handler, name)) | |
265 | return parser, handler | |
266 | ||
267 | parser, handler = setup() | |
268 | require(parser.buffer_text, | |
269 | "text buffering either not acknowledged or not enabled") | |
270 | parser.Parse("<a>1<b/>2<c/>3</a>", 1) | |
271 | handler.check(["123"], | |
272 | "buffered text not properly collapsed") | |
273 | ||
274 | # XXX This test exposes more detail of Expat's text chunking than we | |
275 | # XXX like, but it tests what we need to concisely. | |
276 | parser, handler = setup(["StartElementHandler"]) | |
277 | parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) | |
278 | handler.check(["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], | |
279 | "buffering control not reacting as expected") | |
280 | ||
281 | parser, handler = setup() | |
282 | parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) | |
283 | handler.check(["1<2> \n 3"], | |
284 | "buffered text not properly collapsed") | |
285 | ||
286 | parser, handler = setup(["StartElementHandler"]) | |
287 | parser.Parse("<a>1<b/>2<c/>3</a>", 1) | |
288 | handler.check(["<a>", "1", "<b>", "2", "<c>", "3"], | |
289 | "buffered text not properly split") | |
290 | ||
291 | parser, handler = setup(["StartElementHandler", "EndElementHandler"]) | |
292 | parser.CharacterDataHandler = None | |
293 | parser.Parse("<a>1<b/>2<c/>3</a>", 1) | |
294 | handler.check(["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"], | |
295 | "huh?") | |
296 | ||
297 | parser, handler = setup(["StartElementHandler", "EndElementHandler"]) | |
298 | parser.Parse("<a>1<b></b>2<c/>3</a>", 1) | |
299 | handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"], | |
300 | "huh?") | |
301 | ||
302 | parser, handler = setup(["CommentHandler", "EndElementHandler", | |
303 | "StartElementHandler"]) | |
304 | parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) | |
305 | handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], | |
306 | "buffered text not properly split") | |
307 | ||
308 | parser, handler = setup(["CommentHandler", "EndElementHandler", | |
309 | "StartElementHandler"]) | |
310 | parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) | |
311 | handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", | |
312 | "<!--abc-->", "4", "<!--def-->", "5", "</a>"], | |
313 | "buffered text not properly split") | |
314 | ||
315 | # Test handling of exception from callback: | |
316 | def StartElementHandler(name, attrs): | |
317 | raise RuntimeError(name) | |
318 | ||
319 | parser = expat.ParserCreate() | |
320 | parser.StartElementHandler = StartElementHandler | |
321 | ||
322 | try: | |
323 | parser.Parse("<a><b><c/></b></a>", 1) | |
324 | except RuntimeError, e: | |
325 | if e.args[0] != "a": | |
326 | print "Expected RuntimeError for element 'a'; found %r" % e.args[0] | |
327 | else: | |
328 | print "Expected RuntimeError for 'a'" | |
329 | ||
330 | # Test Current* members: | |
331 | class PositionTest: | |
332 | ||
333 | def __init__(self, expected_list, parser): | |
334 | self.parser = parser | |
335 | self.parser.StartElementHandler = self.StartElementHandler | |
336 | self.parser.EndElementHandler = self.EndElementHandler | |
337 | self.expected_list = expected_list | |
338 | self.upto = 0 | |
339 | ||
340 | def StartElementHandler(self, name, attrs): | |
341 | self.check_pos('s') | |
342 | ||
343 | def EndElementHandler(self, name): | |
344 | self.check_pos('e') | |
345 | ||
346 | def check_pos(self, event): | |
347 | pos = (event, | |
348 | self.parser.CurrentByteIndex, | |
349 | self.parser.CurrentLineNumber, | |
350 | self.parser.CurrentColumnNumber) | |
351 | require(self.upto < len(self.expected_list), | |
352 | 'too many parser events') | |
353 | expected = self.expected_list[self.upto] | |
354 | require(pos == expected, | |
355 | 'expected position %s, got %s' % (expected, pos)) | |
356 | self.upto += 1 | |
357 | ||
358 | ||
359 | parser = expat.ParserCreate() | |
360 | handler = PositionTest([('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), | |
361 | ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)], | |
362 | parser) | |
363 | parser.Parse('''<a> | |
364 | <b> | |
365 | <c/> | |
366 | </b> | |
367 | </a>''', 1) |