Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | """An XML Reader is the SAX 2 name for an XML parser. XML Parsers |
2 | should be based on this code. """ | |
3 | ||
4 | import handler | |
5 | ||
6 | from _exceptions import SAXNotSupportedException, SAXNotRecognizedException | |
7 | ||
8 | ||
9 | # ===== XMLREADER ===== | |
10 | ||
11 | class XMLReader: | |
12 | """Interface for reading an XML document using callbacks. | |
13 | ||
14 | XMLReader is the interface that an XML parser's SAX2 driver must | |
15 | implement. This interface allows an application to set and query | |
16 | features and properties in the parser, to register event handlers | |
17 | for document processing, and to initiate a document parse. | |
18 | ||
19 | All SAX interfaces are assumed to be synchronous: the parse | |
20 | methods must not return until parsing is complete, and readers | |
21 | must wait for an event-handler callback to return before reporting | |
22 | the next event.""" | |
23 | ||
24 | def __init__(self): | |
25 | self._cont_handler = handler.ContentHandler() | |
26 | self._dtd_handler = handler.DTDHandler() | |
27 | self._ent_handler = handler.EntityResolver() | |
28 | self._err_handler = handler.ErrorHandler() | |
29 | ||
30 | def parse(self, source): | |
31 | "Parse an XML document from a system identifier or an InputSource." | |
32 | raise NotImplementedError("This method must be implemented!") | |
33 | ||
34 | def getContentHandler(self): | |
35 | "Returns the current ContentHandler." | |
36 | return self._cont_handler | |
37 | ||
38 | def setContentHandler(self, handler): | |
39 | "Registers a new object to receive document content events." | |
40 | self._cont_handler = handler | |
41 | ||
42 | def getDTDHandler(self): | |
43 | "Returns the current DTD handler." | |
44 | return self._dtd_handler | |
45 | ||
46 | def setDTDHandler(self, handler): | |
47 | "Register an object to receive basic DTD-related events." | |
48 | self._dtd_handler = handler | |
49 | ||
50 | def getEntityResolver(self): | |
51 | "Returns the current EntityResolver." | |
52 | return self._ent_handler | |
53 | ||
54 | def setEntityResolver(self, resolver): | |
55 | "Register an object to resolve external entities." | |
56 | self._ent_handler = resolver | |
57 | ||
58 | def getErrorHandler(self): | |
59 | "Returns the current ErrorHandler." | |
60 | return self._err_handler | |
61 | ||
62 | def setErrorHandler(self, handler): | |
63 | "Register an object to receive error-message events." | |
64 | self._err_handler = handler | |
65 | ||
66 | def setLocale(self, locale): | |
67 | """Allow an application to set the locale for errors and warnings. | |
68 | ||
69 | SAX parsers are not required to provide localization for errors | |
70 | and warnings; if they cannot support the requested locale, | |
71 | however, they must throw a SAX exception. Applications may | |
72 | request a locale change in the middle of a parse.""" | |
73 | raise SAXNotSupportedException("Locale support not implemented") | |
74 | ||
75 | def getFeature(self, name): | |
76 | "Looks up and returns the state of a SAX2 feature." | |
77 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | |
78 | ||
79 | def setFeature(self, name, state): | |
80 | "Sets the state of a SAX2 feature." | |
81 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | |
82 | ||
83 | def getProperty(self, name): | |
84 | "Looks up and returns the value of a SAX2 property." | |
85 | raise SAXNotRecognizedException("Property '%s' not recognized" % name) | |
86 | ||
87 | def setProperty(self, name, value): | |
88 | "Sets the value of a SAX2 property." | |
89 | raise SAXNotRecognizedException("Property '%s' not recognized" % name) | |
90 | ||
91 | class IncrementalParser(XMLReader): | |
92 | """This interface adds three extra methods to the XMLReader | |
93 | interface that allow XML parsers to support incremental | |
94 | parsing. Support for this interface is optional, since not all | |
95 | underlying XML parsers support this functionality. | |
96 | ||
97 | When the parser is instantiated it is ready to begin accepting | |
98 | data from the feed method immediately. After parsing has been | |
99 | finished with a call to close the reset method must be called to | |
100 | make the parser ready to accept new data, either from feed or | |
101 | using the parse method. | |
102 | ||
103 | Note that these methods must _not_ be called during parsing, that | |
104 | is, after parse has been called and before it returns. | |
105 | ||
106 | By default, the class also implements the parse method of the XMLReader | |
107 | interface using the feed, close and reset methods of the | |
108 | IncrementalParser interface as a convenience to SAX 2.0 driver | |
109 | writers.""" | |
110 | ||
111 | def __init__(self, bufsize=2**16): | |
112 | self._bufsize = bufsize | |
113 | XMLReader.__init__(self) | |
114 | ||
115 | def parse(self, source): | |
116 | import saxutils | |
117 | source = saxutils.prepare_input_source(source) | |
118 | ||
119 | self.prepareParser(source) | |
120 | file = source.getByteStream() | |
121 | buffer = file.read(self._bufsize) | |
122 | while buffer != "": | |
123 | self.feed(buffer) | |
124 | buffer = file.read(self._bufsize) | |
125 | self.close() | |
126 | ||
127 | def feed(self, data): | |
128 | """This method gives the raw XML data in the data parameter to | |
129 | the parser and makes it parse the data, emitting the | |
130 | corresponding events. It is allowed for XML constructs to be | |
131 | split across several calls to feed. | |
132 | ||
133 | feed may raise SAXException.""" | |
134 | raise NotImplementedError("This method must be implemented!") | |
135 | ||
136 | def prepareParser(self, source): | |
137 | """This method is called by the parse implementation to allow | |
138 | the SAX 2.0 driver to prepare itself for parsing.""" | |
139 | raise NotImplementedError("prepareParser must be overridden!") | |
140 | ||
141 | def close(self): | |
142 | """This method is called when the entire XML document has been | |
143 | passed to the parser through the feed method, to notify the | |
144 | parser that there are no more data. This allows the parser to | |
145 | do the final checks on the document and empty the internal | |
146 | data buffer. | |
147 | ||
148 | The parser will not be ready to parse another document until | |
149 | the reset method has been called. | |
150 | ||
151 | close may raise SAXException.""" | |
152 | raise NotImplementedError("This method must be implemented!") | |
153 | ||
154 | def reset(self): | |
155 | """This method is called after close has been called to reset | |
156 | the parser so that it is ready to parse new documents. The | |
157 | results of calling parse or feed after close without calling | |
158 | reset are undefined.""" | |
159 | raise NotImplementedError("This method must be implemented!") | |
160 | ||
161 | # ===== LOCATOR ===== | |
162 | ||
163 | class Locator: | |
164 | """Interface for associating a SAX event with a document | |
165 | location. A locator object will return valid results only during | |
166 | calls to DocumentHandler methods; at any other time, the | |
167 | results are unpredictable.""" | |
168 | ||
169 | def getColumnNumber(self): | |
170 | "Return the column number where the current event ends." | |
171 | return -1 | |
172 | ||
173 | def getLineNumber(self): | |
174 | "Return the line number where the current event ends." | |
175 | return -1 | |
176 | ||
177 | def getPublicId(self): | |
178 | "Return the public identifier for the current event." | |
179 | return None | |
180 | ||
181 | def getSystemId(self): | |
182 | "Return the system identifier for the current event." | |
183 | return None | |
184 | ||
185 | # ===== INPUTSOURCE ===== | |
186 | ||
187 | class InputSource: | |
188 | """Encapsulation of the information needed by the XMLReader to | |
189 | read entities. | |
190 | ||
191 | This class may include information about the public identifier, | |
192 | system identifier, byte stream (possibly with character encoding | |
193 | information) and/or the character stream of an entity. | |
194 | ||
195 | Applications will create objects of this class for use in the | |
196 | XMLReader.parse method and for returning from | |
197 | EntityResolver.resolveEntity. | |
198 | ||
199 | An InputSource belongs to the application, the XMLReader is not | |
200 | allowed to modify InputSource objects passed to it from the | |
201 | application, although it may make copies and modify those.""" | |
202 | ||
203 | def __init__(self, system_id = None): | |
204 | self.__system_id = system_id | |
205 | self.__public_id = None | |
206 | self.__encoding = None | |
207 | self.__bytefile = None | |
208 | self.__charfile = None | |
209 | ||
210 | def setPublicId(self, public_id): | |
211 | "Sets the public identifier of this InputSource." | |
212 | self.__public_id = public_id | |
213 | ||
214 | def getPublicId(self): | |
215 | "Returns the public identifier of this InputSource." | |
216 | return self.__public_id | |
217 | ||
218 | def setSystemId(self, system_id): | |
219 | "Sets the system identifier of this InputSource." | |
220 | self.__system_id = system_id | |
221 | ||
222 | def getSystemId(self): | |
223 | "Returns the system identifier of this InputSource." | |
224 | return self.__system_id | |
225 | ||
226 | def setEncoding(self, encoding): | |
227 | """Sets the character encoding of this InputSource. | |
228 | ||
229 | The encoding must be a string acceptable for an XML encoding | |
230 | declaration (see section 4.3.3 of the XML recommendation). | |
231 | ||
232 | The encoding attribute of the InputSource is ignored if the | |
233 | InputSource also contains a character stream.""" | |
234 | self.__encoding = encoding | |
235 | ||
236 | def getEncoding(self): | |
237 | "Get the character encoding of this InputSource." | |
238 | return self.__encoding | |
239 | ||
240 | def setByteStream(self, bytefile): | |
241 | """Set the byte stream (a Python file-like object which does | |
242 | not perform byte-to-character conversion) for this input | |
243 | source. | |
244 | ||
245 | The SAX parser will ignore this if there is also a character | |
246 | stream specified, but it will use a byte stream in preference | |
247 | to opening a URI connection itself. | |
248 | ||
249 | If the application knows the character encoding of the byte | |
250 | stream, it should set it with the setEncoding method.""" | |
251 | self.__bytefile = bytefile | |
252 | ||
253 | def getByteStream(self): | |
254 | """Get the byte stream for this input source. | |
255 | ||
256 | The getEncoding method will return the character encoding for | |
257 | this byte stream, or None if unknown.""" | |
258 | return self.__bytefile | |
259 | ||
260 | def setCharacterStream(self, charfile): | |
261 | """Set the character stream for this input source. (The stream | |
262 | must be a Python 2.0 Unicode-wrapped file-like that performs | |
263 | conversion to Unicode strings.) | |
264 | ||
265 | If there is a character stream specified, the SAX parser will | |
266 | ignore any byte stream and will not attempt to open a URI | |
267 | connection to the system identifier.""" | |
268 | self.__charfile = charfile | |
269 | ||
270 | def getCharacterStream(self): | |
271 | "Get the character stream for this input source." | |
272 | return self.__charfile | |
273 | ||
274 | # ===== ATTRIBUTESIMPL ===== | |
275 | ||
276 | class AttributesImpl: | |
277 | ||
278 | def __init__(self, attrs): | |
279 | """Non-NS-aware implementation. | |
280 | ||
281 | attrs should be of the form {name : value}.""" | |
282 | self._attrs = attrs | |
283 | ||
284 | def getLength(self): | |
285 | return len(self._attrs) | |
286 | ||
287 | def getType(self, name): | |
288 | return "CDATA" | |
289 | ||
290 | def getValue(self, name): | |
291 | return self._attrs[name] | |
292 | ||
293 | def getValueByQName(self, name): | |
294 | return self._attrs[name] | |
295 | ||
296 | def getNameByQName(self, name): | |
297 | if not self._attrs.has_key(name): | |
298 | raise KeyError, name | |
299 | return name | |
300 | ||
301 | def getQNameByName(self, name): | |
302 | if not self._attrs.has_key(name): | |
303 | raise KeyError, name | |
304 | return name | |
305 | ||
306 | def getNames(self): | |
307 | return self._attrs.keys() | |
308 | ||
309 | def getQNames(self): | |
310 | return self._attrs.keys() | |
311 | ||
312 | def __len__(self): | |
313 | return len(self._attrs) | |
314 | ||
315 | def __getitem__(self, name): | |
316 | return self._attrs[name] | |
317 | ||
318 | def keys(self): | |
319 | return self._attrs.keys() | |
320 | ||
321 | def has_key(self, name): | |
322 | return self._attrs.has_key(name) | |
323 | ||
324 | def __contains__(self, name): | |
325 | return self._attrs.has_key(name) | |
326 | ||
327 | def get(self, name, alternative=None): | |
328 | return self._attrs.get(name, alternative) | |
329 | ||
330 | def copy(self): | |
331 | return self.__class__(self._attrs) | |
332 | ||
333 | def items(self): | |
334 | return self._attrs.items() | |
335 | ||
336 | def values(self): | |
337 | return self._attrs.values() | |
338 | ||
339 | # ===== ATTRIBUTESNSIMPL ===== | |
340 | ||
341 | class AttributesNSImpl(AttributesImpl): | |
342 | ||
343 | def __init__(self, attrs, qnames): | |
344 | """NS-aware implementation. | |
345 | ||
346 | attrs should be of the form {(ns_uri, lname): value, ...}. | |
347 | qnames of the form {(ns_uri, lname): qname, ...}.""" | |
348 | self._attrs = attrs | |
349 | self._qnames = qnames | |
350 | ||
351 | def getValueByQName(self, name): | |
352 | for (nsname, qname) in self._qnames.items(): | |
353 | if qname == name: | |
354 | return self._attrs[nsname] | |
355 | ||
356 | raise KeyError, name | |
357 | ||
358 | def getNameByQName(self, name): | |
359 | for (nsname, qname) in self._qnames.items(): | |
360 | if qname == name: | |
361 | return nsname | |
362 | ||
363 | raise KeyError, name | |
364 | ||
365 | def getQNameByName(self, name): | |
366 | return self._qnames[name] | |
367 | ||
368 | def getQNames(self): | |
369 | return self._qnames.values() | |
370 | ||
371 | def copy(self): | |
372 | return self.__class__(self._attrs, self._qnames) | |
373 | ||
374 | ||
375 | def _test(): | |
376 | XMLReader() | |
377 | IncrementalParser() | |
378 | Locator() | |
379 | ||
380 | if __name__ == "__main__": | |
381 | _test() |