Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | """\ |
2 | A library of useful helper classes to the SAX classes, for the | |
3 | convenience of application and driver writers. | |
4 | """ | |
5 | ||
6 | import os, urlparse, urllib, types | |
7 | import handler | |
8 | import xmlreader | |
9 | ||
10 | try: | |
11 | _StringTypes = [types.StringType, types.UnicodeType] | |
12 | except AttributeError: | |
13 | _StringTypes = [types.StringType] | |
14 | ||
15 | # See whether the xmlcharrefreplace error handler is | |
16 | # supported | |
17 | try: | |
18 | from codecs import xmlcharrefreplace_errors | |
19 | _error_handling = "xmlcharrefreplace" | |
20 | del xmlcharrefreplace_errors | |
21 | except ImportError: | |
22 | _error_handling = "strict" | |
23 | ||
24 | def __dict_replace(s, d): | |
25 | """Replace substrings of a string using a dictionary.""" | |
26 | for key, value in d.items(): | |
27 | s = s.replace(key, value) | |
28 | return s | |
29 | ||
30 | def escape(data, entities={}): | |
31 | """Escape &, <, and > in a string of data. | |
32 | ||
33 | You can escape other strings of data by passing a dictionary as | |
34 | the optional entities parameter. The keys and values must all be | |
35 | strings; each key will be replaced with its corresponding value. | |
36 | """ | |
37 | ||
38 | # must do ampersand first | |
39 | data = data.replace("&", "&") | |
40 | data = data.replace(">", ">") | |
41 | data = data.replace("<", "<") | |
42 | if entities: | |
43 | data = __dict_replace(data, entities) | |
44 | return data | |
45 | ||
46 | def unescape(data, entities={}): | |
47 | """Unescape &, <, and > in a string of data. | |
48 | ||
49 | You can unescape other strings of data by passing a dictionary as | |
50 | the optional entities parameter. The keys and values must all be | |
51 | strings; each key will be replaced with its corresponding value. | |
52 | """ | |
53 | data = data.replace("<", "<") | |
54 | data = data.replace(">", ">") | |
55 | if entities: | |
56 | data = __dict_replace(data, entities) | |
57 | # must do ampersand last | |
58 | return data.replace("&", "&") | |
59 | ||
60 | def quoteattr(data, entities={}): | |
61 | """Escape and quote an attribute value. | |
62 | ||
63 | Escape &, <, and > in a string of data, then quote it for use as | |
64 | an attribute value. The \" character will be escaped as well, if | |
65 | necessary. | |
66 | ||
67 | You can escape other strings of data by passing a dictionary as | |
68 | the optional entities parameter. The keys and values must all be | |
69 | strings; each key will be replaced with its corresponding value. | |
70 | """ | |
71 | data = escape(data, entities) | |
72 | if '"' in data: | |
73 | if "'" in data: | |
74 | data = '"%s"' % data.replace('"', """) | |
75 | else: | |
76 | data = "'%s'" % data | |
77 | else: | |
78 | data = '"%s"' % data | |
79 | return data | |
80 | ||
81 | ||
82 | class XMLGenerator(handler.ContentHandler): | |
83 | ||
84 | def __init__(self, out=None, encoding="iso-8859-1"): | |
85 | if out is None: | |
86 | import sys | |
87 | out = sys.stdout | |
88 | handler.ContentHandler.__init__(self) | |
89 | self._out = out | |
90 | self._ns_contexts = [{}] # contains uri -> prefix dicts | |
91 | self._current_context = self._ns_contexts[-1] | |
92 | self._undeclared_ns_maps = [] | |
93 | self._encoding = encoding | |
94 | ||
95 | def _write(self, text): | |
96 | if isinstance(text, str): | |
97 | self._out.write(text) | |
98 | else: | |
99 | self._out.write(text.encode(self._encoding, _error_handling)) | |
100 | ||
101 | # ContentHandler methods | |
102 | ||
103 | def startDocument(self): | |
104 | self._write('<?xml version="1.0" encoding="%s"?>\n' % | |
105 | self._encoding) | |
106 | ||
107 | def startPrefixMapping(self, prefix, uri): | |
108 | self._ns_contexts.append(self._current_context.copy()) | |
109 | self._current_context[uri] = prefix | |
110 | self._undeclared_ns_maps.append((prefix, uri)) | |
111 | ||
112 | def endPrefixMapping(self, prefix): | |
113 | self._current_context = self._ns_contexts[-1] | |
114 | del self._ns_contexts[-1] | |
115 | ||
116 | def startElement(self, name, attrs): | |
117 | self._write('<' + name) | |
118 | for (name, value) in attrs.items(): | |
119 | self._write(' %s=%s' % (name, quoteattr(value))) | |
120 | self._write('>') | |
121 | ||
122 | def endElement(self, name): | |
123 | self._write('</%s>' % name) | |
124 | ||
125 | def startElementNS(self, name, qname, attrs): | |
126 | if name[0] is None: | |
127 | # if the name was not namespace-scoped, use the unqualified part | |
128 | name = name[1] | |
129 | else: | |
130 | # else try to restore the original prefix from the namespace | |
131 | name = self._current_context[name[0]] + ":" + name[1] | |
132 | self._write('<' + name) | |
133 | ||
134 | for pair in self._undeclared_ns_maps: | |
135 | self._write(' xmlns:%s="%s"' % pair) | |
136 | self._undeclared_ns_maps = [] | |
137 | ||
138 | for (name, value) in attrs.items(): | |
139 | name = self._current_context[name[0]] + ":" + name[1] | |
140 | self._write(' %s=%s' % (name, quoteattr(value))) | |
141 | self._write('>') | |
142 | ||
143 | def endElementNS(self, name, qname): | |
144 | if name[0] is None: | |
145 | name = name[1] | |
146 | else: | |
147 | name = self._current_context[name[0]] + ":" + name[1] | |
148 | self._write('</%s>' % name) | |
149 | ||
150 | def characters(self, content): | |
151 | self._write(escape(content)) | |
152 | ||
153 | def ignorableWhitespace(self, content): | |
154 | self._write(content) | |
155 | ||
156 | def processingInstruction(self, target, data): | |
157 | self._write('<?%s %s?>' % (target, data)) | |
158 | ||
159 | ||
160 | class XMLFilterBase(xmlreader.XMLReader): | |
161 | """This class is designed to sit between an XMLReader and the | |
162 | client application's event handlers. By default, it does nothing | |
163 | but pass requests up to the reader and events on to the handlers | |
164 | unmodified, but subclasses can override specific methods to modify | |
165 | the event stream or the configuration requests as they pass | |
166 | through.""" | |
167 | ||
168 | def __init__(self, parent = None): | |
169 | xmlreader.XMLReader.__init__(self) | |
170 | self._parent = parent | |
171 | ||
172 | # ErrorHandler methods | |
173 | ||
174 | def error(self, exception): | |
175 | self._err_handler.error(exception) | |
176 | ||
177 | def fatalError(self, exception): | |
178 | self._err_handler.fatalError(exception) | |
179 | ||
180 | def warning(self, exception): | |
181 | self._err_handler.warning(exception) | |
182 | ||
183 | # ContentHandler methods | |
184 | ||
185 | def setDocumentLocator(self, locator): | |
186 | self._cont_handler.setDocumentLocator(locator) | |
187 | ||
188 | def startDocument(self): | |
189 | self._cont_handler.startDocument() | |
190 | ||
191 | def endDocument(self): | |
192 | self._cont_handler.endDocument() | |
193 | ||
194 | def startPrefixMapping(self, prefix, uri): | |
195 | self._cont_handler.startPrefixMapping(prefix, uri) | |
196 | ||
197 | def endPrefixMapping(self, prefix): | |
198 | self._cont_handler.endPrefixMapping(prefix) | |
199 | ||
200 | def startElement(self, name, attrs): | |
201 | self._cont_handler.startElement(name, attrs) | |
202 | ||
203 | def endElement(self, name): | |
204 | self._cont_handler.endElement(name) | |
205 | ||
206 | def startElementNS(self, name, qname, attrs): | |
207 | self._cont_handler.startElementNS(name, qname, attrs) | |
208 | ||
209 | def endElementNS(self, name, qname): | |
210 | self._cont_handler.endElementNS(name, qname) | |
211 | ||
212 | def characters(self, content): | |
213 | self._cont_handler.characters(content) | |
214 | ||
215 | def ignorableWhitespace(self, chars): | |
216 | self._cont_handler.ignorableWhitespace(chars) | |
217 | ||
218 | def processingInstruction(self, target, data): | |
219 | self._cont_handler.processingInstruction(target, data) | |
220 | ||
221 | def skippedEntity(self, name): | |
222 | self._cont_handler.skippedEntity(name) | |
223 | ||
224 | # DTDHandler methods | |
225 | ||
226 | def notationDecl(self, name, publicId, systemId): | |
227 | self._dtd_handler.notationDecl(name, publicId, systemId) | |
228 | ||
229 | def unparsedEntityDecl(self, name, publicId, systemId, ndata): | |
230 | self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) | |
231 | ||
232 | # EntityResolver methods | |
233 | ||
234 | def resolveEntity(self, publicId, systemId): | |
235 | return self._ent_handler.resolveEntity(publicId, systemId) | |
236 | ||
237 | # XMLReader methods | |
238 | ||
239 | def parse(self, source): | |
240 | self._parent.setContentHandler(self) | |
241 | self._parent.setErrorHandler(self) | |
242 | self._parent.setEntityResolver(self) | |
243 | self._parent.setDTDHandler(self) | |
244 | self._parent.parse(source) | |
245 | ||
246 | def setLocale(self, locale): | |
247 | self._parent.setLocale(locale) | |
248 | ||
249 | def getFeature(self, name): | |
250 | return self._parent.getFeature(name) | |
251 | ||
252 | def setFeature(self, name, state): | |
253 | self._parent.setFeature(name, state) | |
254 | ||
255 | def getProperty(self, name): | |
256 | return self._parent.getProperty(name) | |
257 | ||
258 | def setProperty(self, name, value): | |
259 | self._parent.setProperty(name, value) | |
260 | ||
261 | # XMLFilter methods | |
262 | ||
263 | def getParent(self): | |
264 | return self._parent | |
265 | ||
266 | def setParent(self, parent): | |
267 | self._parent = parent | |
268 | ||
269 | # --- Utility functions | |
270 | ||
271 | def prepare_input_source(source, base = ""): | |
272 | """This function takes an InputSource and an optional base URL and | |
273 | returns a fully resolved InputSource object ready for reading.""" | |
274 | ||
275 | if type(source) in _StringTypes: | |
276 | source = xmlreader.InputSource(source) | |
277 | elif hasattr(source, "read"): | |
278 | f = source | |
279 | source = xmlreader.InputSource() | |
280 | source.setByteStream(f) | |
281 | if hasattr(f, "name"): | |
282 | source.setSystemId(f.name) | |
283 | ||
284 | if source.getByteStream() is None: | |
285 | sysid = source.getSystemId() | |
286 | basehead = os.path.dirname(os.path.normpath(base)) | |
287 | sysidfilename = os.path.join(basehead, sysid) | |
288 | if os.path.isfile(sysidfilename): | |
289 | source.setSystemId(sysidfilename) | |
290 | f = open(sysidfilename, "rb") | |
291 | else: | |
292 | source.setSystemId(urlparse.urljoin(base, sysid)) | |
293 | f = urllib.urlopen(source.getSystemId()) | |
294 | ||
295 | source.setByteStream(f) | |
296 | ||
297 | return source |