Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | """Implementation of the DOM Level 3 'LS-Load' feature.""" |
2 | ||
3 | import copy | |
4 | import xml.dom | |
5 | ||
6 | from xml.dom.minicompat import * | |
7 | ||
8 | from xml.dom.NodeFilter import NodeFilter | |
9 | ||
10 | ||
11 | __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] | |
12 | ||
13 | ||
14 | class Options: | |
15 | """Features object that has variables set for each DOMBuilder feature. | |
16 | ||
17 | The DOMBuilder class uses an instance of this class to pass settings to | |
18 | the ExpatBuilder class. | |
19 | """ | |
20 | ||
21 | # Note that the DOMBuilder class in LoadSave constrains which of these | |
22 | # values can be set using the DOM Level 3 LoadSave feature. | |
23 | ||
24 | namespaces = 1 | |
25 | namespace_declarations = True | |
26 | validation = False | |
27 | external_parameter_entities = True | |
28 | external_general_entities = True | |
29 | external_dtd_subset = True | |
30 | validate_if_schema = False | |
31 | validate = False | |
32 | datatype_normalization = False | |
33 | create_entity_ref_nodes = True | |
34 | entities = True | |
35 | whitespace_in_element_content = True | |
36 | cdata_sections = True | |
37 | comments = True | |
38 | charset_overrides_xml_encoding = True | |
39 | infoset = False | |
40 | supported_mediatypes_only = False | |
41 | ||
42 | errorHandler = None | |
43 | filter = None | |
44 | ||
45 | ||
46 | class DOMBuilder: | |
47 | entityResolver = None | |
48 | errorHandler = None | |
49 | filter = None | |
50 | ||
51 | ACTION_REPLACE = 1 | |
52 | ACTION_APPEND_AS_CHILDREN = 2 | |
53 | ACTION_INSERT_AFTER = 3 | |
54 | ACTION_INSERT_BEFORE = 4 | |
55 | ||
56 | _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, | |
57 | ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) | |
58 | ||
59 | def __init__(self): | |
60 | self._options = Options() | |
61 | ||
62 | def _get_entityResolver(self): | |
63 | return self.entityResolver | |
64 | def _set_entityResolver(self, entityResolver): | |
65 | self.entityResolver = entityResolver | |
66 | ||
67 | def _get_errorHandler(self): | |
68 | return self.errorHandler | |
69 | def _set_errorHandler(self, errorHandler): | |
70 | self.errorHandler = errorHandler | |
71 | ||
72 | def _get_filter(self): | |
73 | return self.filter | |
74 | def _set_filter(self, filter): | |
75 | self.filter = filter | |
76 | ||
77 | def setFeature(self, name, state): | |
78 | if self.supportsFeature(name): | |
79 | state = state and 1 or 0 | |
80 | try: | |
81 | settings = self._settings[(_name_xform(name), state)] | |
82 | except KeyError: | |
83 | raise xml.dom.NotSupportedErr( | |
84 | "unsupported feature: %r" % (name,)) | |
85 | else: | |
86 | for name, value in settings: | |
87 | setattr(self._options, name, value) | |
88 | else: | |
89 | raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) | |
90 | ||
91 | def supportsFeature(self, name): | |
92 | return hasattr(self._options, _name_xform(name)) | |
93 | ||
94 | def canSetFeature(self, name, state): | |
95 | key = (_name_xform(name), state and 1 or 0) | |
96 | return self._settings.has_key(key) | |
97 | ||
98 | # This dictionary maps from (feature,value) to a list of | |
99 | # (option,value) pairs that should be set on the Options object. | |
100 | # If a (feature,value) setting is not in this dictionary, it is | |
101 | # not supported by the DOMBuilder. | |
102 | # | |
103 | _settings = { | |
104 | ("namespace_declarations", 0): [ | |
105 | ("namespace_declarations", 0)], | |
106 | ("namespace_declarations", 1): [ | |
107 | ("namespace_declarations", 1)], | |
108 | ("validation", 0): [ | |
109 | ("validation", 0)], | |
110 | ("external_general_entities", 0): [ | |
111 | ("external_general_entities", 0)], | |
112 | ("external_general_entities", 1): [ | |
113 | ("external_general_entities", 1)], | |
114 | ("external_parameter_entities", 0): [ | |
115 | ("external_parameter_entities", 0)], | |
116 | ("external_parameter_entities", 1): [ | |
117 | ("external_parameter_entities", 1)], | |
118 | ("validate_if_schema", 0): [ | |
119 | ("validate_if_schema", 0)], | |
120 | ("create_entity_ref_nodes", 0): [ | |
121 | ("create_entity_ref_nodes", 0)], | |
122 | ("create_entity_ref_nodes", 1): [ | |
123 | ("create_entity_ref_nodes", 1)], | |
124 | ("entities", 0): [ | |
125 | ("create_entity_ref_nodes", 0), | |
126 | ("entities", 0)], | |
127 | ("entities", 1): [ | |
128 | ("entities", 1)], | |
129 | ("whitespace_in_element_content", 0): [ | |
130 | ("whitespace_in_element_content", 0)], | |
131 | ("whitespace_in_element_content", 1): [ | |
132 | ("whitespace_in_element_content", 1)], | |
133 | ("cdata_sections", 0): [ | |
134 | ("cdata_sections", 0)], | |
135 | ("cdata_sections", 1): [ | |
136 | ("cdata_sections", 1)], | |
137 | ("comments", 0): [ | |
138 | ("comments", 0)], | |
139 | ("comments", 1): [ | |
140 | ("comments", 1)], | |
141 | ("charset_overrides_xml_encoding", 0): [ | |
142 | ("charset_overrides_xml_encoding", 0)], | |
143 | ("charset_overrides_xml_encoding", 1): [ | |
144 | ("charset_overrides_xml_encoding", 1)], | |
145 | ("infoset", 0): [], | |
146 | ("infoset", 1): [ | |
147 | ("namespace_declarations", 0), | |
148 | ("validate_if_schema", 0), | |
149 | ("create_entity_ref_nodes", 0), | |
150 | ("entities", 0), | |
151 | ("cdata_sections", 0), | |
152 | ("datatype_normalization", 1), | |
153 | ("whitespace_in_element_content", 1), | |
154 | ("comments", 1), | |
155 | ("charset_overrides_xml_encoding", 1)], | |
156 | ("supported_mediatypes_only", 0): [ | |
157 | ("supported_mediatypes_only", 0)], | |
158 | ("namespaces", 0): [ | |
159 | ("namespaces", 0)], | |
160 | ("namespaces", 1): [ | |
161 | ("namespaces", 1)], | |
162 | } | |
163 | ||
164 | def getFeature(self, name): | |
165 | xname = _name_xform(name) | |
166 | try: | |
167 | return getattr(self._options, xname) | |
168 | except AttributeError: | |
169 | if name == "infoset": | |
170 | options = self._options | |
171 | return (options.datatype_normalization | |
172 | and options.whitespace_in_element_content | |
173 | and options.comments | |
174 | and options.charset_overrides_xml_encoding | |
175 | and not (options.namespace_declarations | |
176 | or options.validate_if_schema | |
177 | or options.create_entity_ref_nodes | |
178 | or options.entities | |
179 | or options.cdata_sections)) | |
180 | raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) | |
181 | ||
182 | def parseURI(self, uri): | |
183 | if self.entityResolver: | |
184 | input = self.entityResolver.resolveEntity(None, uri) | |
185 | else: | |
186 | input = DOMEntityResolver().resolveEntity(None, uri) | |
187 | return self.parse(input) | |
188 | ||
189 | def parse(self, input): | |
190 | options = copy.copy(self._options) | |
191 | options.filter = self.filter | |
192 | options.errorHandler = self.errorHandler | |
193 | fp = input.byteStream | |
194 | if fp is None and options.systemId: | |
195 | import urllib2 | |
196 | fp = urllib2.urlopen(input.systemId) | |
197 | return self._parse_bytestream(fp, options) | |
198 | ||
199 | def parseWithContext(self, input, cnode, action): | |
200 | if action not in self._legal_actions: | |
201 | raise ValueError("not a legal action") | |
202 | raise NotImplementedError("Haven't written this yet...") | |
203 | ||
204 | def _parse_bytestream(self, stream, options): | |
205 | import xml.dom.expatbuilder | |
206 | builder = xml.dom.expatbuilder.makeBuilder(options) | |
207 | return builder.parseFile(stream) | |
208 | ||
209 | ||
210 | def _name_xform(name): | |
211 | return name.lower().replace('-', '_') | |
212 | ||
213 | ||
214 | class DOMEntityResolver(NewStyle): | |
215 | __slots__ = '_opener', | |
216 | ||
217 | def resolveEntity(self, publicId, systemId): | |
218 | assert systemId is not None | |
219 | source = DOMInputSource() | |
220 | source.publicId = publicId | |
221 | source.systemId = systemId | |
222 | source.byteStream = self._get_opener().open(systemId) | |
223 | ||
224 | # determine the encoding if the transport provided it | |
225 | source.encoding = self._guess_media_encoding(source) | |
226 | ||
227 | # determine the base URI is we can | |
228 | import posixpath, urlparse | |
229 | parts = urlparse.urlparse(systemId) | |
230 | scheme, netloc, path, params, query, fragment = parts | |
231 | # XXX should we check the scheme here as well? | |
232 | if path and not path.endswith("/"): | |
233 | path = posixpath.dirname(path) + "/" | |
234 | parts = scheme, netloc, path, params, query, fragment | |
235 | source.baseURI = urlparse.urlunparse(parts) | |
236 | ||
237 | return source | |
238 | ||
239 | def _get_opener(self): | |
240 | try: | |
241 | return self._opener | |
242 | except AttributeError: | |
243 | self._opener = self._create_opener() | |
244 | return self._opener | |
245 | ||
246 | def _create_opener(self): | |
247 | import urllib2 | |
248 | return urllib2.build_opener() | |
249 | ||
250 | def _guess_media_encoding(self, source): | |
251 | info = source.byteStream.info() | |
252 | if info.has_key("Content-Type"): | |
253 | for param in info.getplist(): | |
254 | if param.startswith("charset="): | |
255 | return param.split("=", 1)[1].lower() | |
256 | ||
257 | ||
258 | class DOMInputSource(NewStyle): | |
259 | __slots__ = ('byteStream', 'characterStream', 'stringData', | |
260 | 'encoding', 'publicId', 'systemId', 'baseURI') | |
261 | ||
262 | def __init__(self): | |
263 | self.byteStream = None | |
264 | self.characterStream = None | |
265 | self.stringData = None | |
266 | self.encoding = None | |
267 | self.publicId = None | |
268 | self.systemId = None | |
269 | self.baseURI = None | |
270 | ||
271 | def _get_byteStream(self): | |
272 | return self.byteStream | |
273 | def _set_byteStream(self, byteStream): | |
274 | self.byteStream = byteStream | |
275 | ||
276 | def _get_characterStream(self): | |
277 | return self.characterStream | |
278 | def _set_characterStream(self, characterStream): | |
279 | self.characterStream = characterStream | |
280 | ||
281 | def _get_stringData(self): | |
282 | return self.stringData | |
283 | def _set_stringData(self, data): | |
284 | self.stringData = data | |
285 | ||
286 | def _get_encoding(self): | |
287 | return self.encoding | |
288 | def _set_encoding(self, encoding): | |
289 | self.encoding = encoding | |
290 | ||
291 | def _get_publicId(self): | |
292 | return self.publicId | |
293 | def _set_publicId(self, publicId): | |
294 | self.publicId = publicId | |
295 | ||
296 | def _get_systemId(self): | |
297 | return self.systemId | |
298 | def _set_systemId(self, systemId): | |
299 | self.systemId = systemId | |
300 | ||
301 | def _get_baseURI(self): | |
302 | return self.baseURI | |
303 | def _set_baseURI(self, uri): | |
304 | self.baseURI = uri | |
305 | ||
306 | ||
307 | class DOMBuilderFilter: | |
308 | """Element filter which can be used to tailor construction of | |
309 | a DOM instance. | |
310 | """ | |
311 | ||
312 | # There's really no need for this class; concrete implementations | |
313 | # should just implement the endElement() and startElement() | |
314 | # methods as appropriate. Using this makes it easy to only | |
315 | # implement one of them. | |
316 | ||
317 | FILTER_ACCEPT = 1 | |
318 | FILTER_REJECT = 2 | |
319 | FILTER_SKIP = 3 | |
320 | FILTER_INTERRUPT = 4 | |
321 | ||
322 | whatToShow = NodeFilter.SHOW_ALL | |
323 | ||
324 | def _get_whatToShow(self): | |
325 | return self.whatToShow | |
326 | ||
327 | def acceptNode(self, element): | |
328 | return self.FILTER_ACCEPT | |
329 | ||
330 | def startContainer(self, element): | |
331 | return self.FILTER_ACCEPT | |
332 | ||
333 | del NodeFilter | |
334 | ||
335 | ||
336 | class DocumentLS: | |
337 | """Mixin to create documents that conform to the load/save spec.""" | |
338 | ||
339 | async = False | |
340 | ||
341 | def _get_async(self): | |
342 | return False | |
343 | def _set_async(self, async): | |
344 | if async: | |
345 | raise xml.dom.NotSupportedErr( | |
346 | "asynchronous document loading is not supported") | |
347 | ||
348 | def abort(self): | |
349 | # What does it mean to "clear" a document? Does the | |
350 | # documentElement disappear? | |
351 | raise NotImplementedError( | |
352 | "haven't figured out what this means yet") | |
353 | ||
354 | def load(self, uri): | |
355 | raise NotImplementedError("haven't written this yet") | |
356 | ||
357 | def loadXML(self, source): | |
358 | raise NotImplementedError("haven't written this yet") | |
359 | ||
360 | def saveXML(self, snode): | |
361 | if snode is None: | |
362 | snode = self | |
363 | elif snode.ownerDocument is not self: | |
364 | raise xml.dom.WrongDocumentErr() | |
365 | return snode.toxml() | |
366 | ||
367 | ||
368 | class DOMImplementationLS: | |
369 | MODE_SYNCHRONOUS = 1 | |
370 | MODE_ASYNCHRONOUS = 2 | |
371 | ||
372 | def createDOMBuilder(self, mode, schemaType): | |
373 | if schemaType is not None: | |
374 | raise xml.dom.NotSupportedErr( | |
375 | "schemaType not yet supported") | |
376 | if mode == self.MODE_SYNCHRONOUS: | |
377 | return DOMBuilder() | |
378 | if mode == self.MODE_ASYNCHRONOUS: | |
379 | raise xml.dom.NotSupportedErr( | |
380 | "asynchronous builders are not supported") | |
381 | raise ValueError("unknown value for mode") | |
382 | ||
383 | def createDOMWriter(self): | |
384 | raise NotImplementedError( | |
385 | "the writer interface hasn't been written yet!") | |
386 | ||
387 | def createDOMInputSource(self): | |
388 | return DOMInputSource() |