minidom.py -- a lightweight DOM implementation.
parseString("<foo><bar/></foo>")
* convenience methods for getting elements and text.
* bring some of the writer and linearizer code into conformance with this
from xml
.dom
import EMPTY_NAMESPACE
, EMPTY_PREFIX
, XMLNS_NAMESPACE
, domreg
from xml
.dom
.minicompat
import *
from xml
.dom
.xmlbuilder
import DOMImplementationLS
, DocumentLS
# This is used by the ID-cache invalidation checks; the list isn't
# actually complete, since the nodes being checked will never be the
# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
# the node being added or removed, not the node being modified.)
_nodeTypes_with_children
= (xml
.dom
.Node
.ELEMENT_NODE
,
xml
.dom
.Node
.ENTITY_REFERENCE_NODE
)
class Node(xml
.dom
.Node
, GetattrMagic
):
namespaceURI
= None # this is non-null only for elements and attributes
prefix
= EMPTY_PREFIX
# non-null only for NS elements and attributes
def toxml(self
, encoding
= None):
return self
.toprettyxml("", "", encoding
)
def toprettyxml(self
, indent
="\t", newl
="\n", encoding
= None):
# indent = the indentation string to prepend, per level
# newl = the newline string to append
# Can't use codecs.getwriter to preserve 2.0 compatibility
writer
= codecs
.lookup(encoding
)[3](writer
)
if self
.nodeType
== Node
.DOCUMENT_NODE
:
# Can pass encoding only to document, to put it into XML header
self
.writexml(writer
, "", indent
, newl
, encoding
)
self
.writexml(writer
, "", indent
, newl
)
def _get_childNodes(self
):
def _get_firstChild(self
):
return self
.childNodes
[0]
def _get_lastChild(self
):
return self
.childNodes
[-1]
def insertBefore(self
, newChild
, refChild
):
if newChild
.nodeType
== self
.DOCUMENT_FRAGMENT_NODE
:
for c
in tuple(newChild
.childNodes
):
self
.insertBefore(c
, refChild
)
### The DOM does not clearly specify what to return in this case
if newChild
.nodeType
not in self
._child
_node
_types
:
raise xml
.dom
.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(newChild
), repr(self
)))
if newChild
.parentNode
is not None:
newChild
.parentNode
.removeChild(newChild
)
self
.appendChild(newChild
)
index
= self
.childNodes
.index(refChild
)
raise xml
.dom
.NotFoundErr()
if newChild
.nodeType
in _nodeTypes_with_children
:
self
.childNodes
.insert(index
, newChild
)
newChild
.nextSibling
= refChild
refChild
.previousSibling
= newChild
node
= self
.childNodes
[index
-1]
node
.nextSibling
= newChild
newChild
.previousSibling
= node
newChild
.previousSibling
= None
newChild
.parentNode
= self
def appendChild(self
, node
):
if node
.nodeType
== self
.DOCUMENT_FRAGMENT_NODE
:
for c
in tuple(node
.childNodes
):
### The DOM does not clearly specify what to return in this case
if node
.nodeType
not in self
._child
_node
_types
:
raise xml
.dom
.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(node
), repr(self
)))
elif node
.nodeType
in _nodeTypes_with_children
:
if node
.parentNode
is not None:
node
.parentNode
.removeChild(node
)
_append_child(self
, node
)
def replaceChild(self
, newChild
, oldChild
):
if newChild
.nodeType
== self
.DOCUMENT_FRAGMENT_NODE
:
refChild
= oldChild
.nextSibling
self
.removeChild(oldChild
)
return self
.insertBefore(newChild
, refChild
)
if newChild
.nodeType
not in self
._child
_node
_types
:
raise xml
.dom
.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(newChild
), repr(self
)))
if newChild
.parentNode
is not None:
newChild
.parentNode
.removeChild(newChild
)
index
= self
.childNodes
.index(oldChild
)
raise xml
.dom
.NotFoundErr()
self
.childNodes
[index
] = newChild
newChild
.parentNode
= self
oldChild
.parentNode
= None
if (newChild
.nodeType
in _nodeTypes_with_children
or oldChild
.nodeType
in _nodeTypes_with_children
):
newChild
.nextSibling
= oldChild
.nextSibling
newChild
.previousSibling
= oldChild
.previousSibling
oldChild
.nextSibling
= None
oldChild
.previousSibling
= None
if newChild
.previousSibling
:
newChild
.previousSibling
.nextSibling
= newChild
newChild
.nextSibling
.previousSibling
= newChild
def removeChild(self
, oldChild
):
self
.childNodes
.remove(oldChild
)
raise xml
.dom
.NotFoundErr()
if oldChild
.nextSibling
is not None:
oldChild
.nextSibling
.previousSibling
= oldChild
.previousSibling
if oldChild
.previousSibling
is not None:
oldChild
.previousSibling
.nextSibling
= oldChild
.nextSibling
oldChild
.nextSibling
= oldChild
.previousSibling
= None
if oldChild
.nodeType
in _nodeTypes_with_children
:
oldChild
.parentNode
= None
for child
in self
.childNodes
:
if child
.nodeType
== Node
.TEXT_NODE
:
if data
and L
and L
[-1].nodeType
== child
.nodeType
:
node
.data
= node
.data
+ child
.data
node
.nextSibling
= child
.nextSibling
L
[-1].nextSibling
= child
child
.previousSibling
= L
[-1]
child
.previousSibling
= None
# empty text node; discard
L
[-1].nextSibling
= child
child
.previousSibling
= L
[-1]
child
.previousSibling
= None
if child
.nodeType
== Node
.ELEMENT_NODE
:
def cloneNode(self
, deep
):
return _clone_node(self
, deep
, self
.ownerDocument
or self
)
def isSupported(self
, feature
, version
):
return self
.ownerDocument
.implementation
.hasFeature(feature
, version
)
def _get_localName(self
):
# Overridden in Element and Attr where localName can be Non-Null
# Node interfaces from Level 3 (WD 9 April 2002)
def isSameNode(self
, other
):
def getInterface(self
, feature
):
if self
.isSupported(feature
, None):
# The "user data" functions use a dictionary that is only present
# if some user data has been set, so be careful not to assume it
def getUserData(self
, key
):
return self
._user
_data
[key
][0]
except (AttributeError, KeyError):
def setUserData(self
, key
, data
, handler
):
# ignore handlers passed for None
def _call_user_data_handler(self
, operation
, src
, dst
):
if hasattr(self
, "_user_data"):
for key
, (data
, handler
) in self
._user
_data
.items():
handler
.handle(operation
, key
, data
, src
, dst
)
self
.parentNode
= self
.ownerDocument
= None
for child
in self
.childNodes
:
self
.childNodes
= NodeList()
self
.previousSibling
= None
defproperty(Node
, "firstChild", doc
="First child node, or None.")
defproperty(Node
, "lastChild", doc
="Last child node, or None.")
defproperty(Node
, "localName", doc
="Namespace-local name of this node.")
def _append_child(self
, node
):
# fast path with less checks; usable by DOM builders if careful
childNodes
= self
.childNodes
node
.__dict
__["previousSibling"] = last
last
.__dict
__["nextSibling"] = node
node
.__dict
__["parentNode"] = self
# return True iff node is part of a document tree
if node
.nodeType
== Node
.DOCUMENT_NODE
:
def _write_data(writer
, data
):
"Writes datachars to writer."
data
= data
.replace("&", "&").replace("<", "<")
data
= data
.replace("\"", """).replace(">", ">")
def _get_elements_by_tagName_helper(parent
, name
, rc
):
for node
in parent
.childNodes
:
if node
.nodeType
== Node
.ELEMENT_NODE
and \
(name
== "*" or node
.tagName
== name
):
_get_elements_by_tagName_helper(node
, name
, rc
)
def _get_elements_by_tagName_ns_helper(parent
, nsURI
, localName
, rc
):
for node
in parent
.childNodes
:
if node
.nodeType
== Node
.ELEMENT_NODE
:
if ((localName
== "*" or node
.localName
== localName
) and
(nsURI
== "*" or node
.namespaceURI
== nsURI
)):
_get_elements_by_tagName_ns_helper(node
, nsURI
, localName
, rc
)
class DocumentFragment(Node
):
nodeType
= Node
.DOCUMENT_FRAGMENT_NODE
nodeName
= "#document-fragment"
_child_node_types
= (Node
.ELEMENT_NODE
,
Node
.ENTITY_REFERENCE_NODE
,
Node
.PROCESSING_INSTRUCTION_NODE
,
self
.childNodes
= NodeList()
nodeType
= Node
.ATTRIBUTE_NODE
_child_node_types
= (Node
.TEXT_NODE
, Node
.ENTITY_REFERENCE_NODE
)
def __init__(self
, qName
, namespaceURI
=EMPTY_NAMESPACE
, localName
=None,
# skip setattr for performance
d
["nodeName"] = d
["name"] = qName
d
["namespaceURI"] = namespaceURI
d
['childNodes'] = NodeList()
# Add the single child node that represents the value of the attr
self
.childNodes
.append(Text())
# nodeValue and value are set elsewhere
def _get_localName(self
):
return self
.nodeName
.split(":", 1)[-1]
def _get_specified(self
):
def __setattr__(self
, name
, value
):
if name
in ("value", "nodeValue"):
d
["value"] = d
["nodeValue"] = value
d2
= self
.childNodes
[0].__dict
__
d2
["data"] = d2
["nodeValue"] = value
if self
.ownerElement
is not None:
_clear_id_cache(self
.ownerElement
)
elif name
in ("name", "nodeName"):
d
["name"] = d
["nodeName"] = value
if self
.ownerElement
is not None:
_clear_id_cache(self
.ownerElement
)
def _set_prefix(self
, prefix
):
nsuri
= self
.namespaceURI
if nsuri
and nsuri
!= XMLNS_NAMESPACE
:
raise xml
.dom
.NamespaceErr(
"illegal use of 'xmlns' prefix for the wrong namespace")
newName
= "%s:%s" % (prefix
, self
.localName
)
_clear_id_cache(self
.ownerElement
)
d
['nodeName'] = d
['name'] = newName
def _set_value(self
, value
):
d
['value'] = d
['nodeValue'] = value
_clear_id_cache(self
.ownerElement
)
self
.childNodes
[0].data
= value
# This implementation does not call the base implementation
# since most of that is not needed, and the expense of the
# method call is not warranted. We duplicate the removal of
# children, but that's all we needed from the base class.
del elem
._attrs
[self
.nodeName
]
del elem
._attrsNS
[(self
.namespaceURI
, self
.localName
)]
elem
._magic
_id
_nodes
-= 1
self
.ownerDocument
._magic
_id
_count
-= 1
for child
in self
.childNodes
:
if doc
is None or elem
is None:
info
= doc
._get
_elem
_info
(elem
)
return info
.isIdNS(self
.namespaceURI
, self
.localName
)
return info
.isId(self
.nodeName
)
def _get_schemaType(self
):
if doc
is None or elem
is None:
info
= doc
._get
_elem
_info
(elem
)
return info
.getAttributeTypeNS(self
.namespaceURI
, self
.localName
)
return info
.getAttributeType(self
.nodeName
)
defproperty(Attr
, "isId", doc
="True if this attribute is an ID.")
defproperty(Attr
, "localName", doc
="Namespace-local name of this attribute.")
defproperty(Attr
, "schemaType", doc
="Schema type for this attribute.")
class NamedNodeMap(NewStyle
, GetattrMagic
):
"""The attribute list is a transient interface to the underlying
dictionaries. Mutations here will change the underlying element's
Ordering is imposed artificially and does not reflect the order of
attributes as found in an input document.
__slots__
= ('_attrs', '_attrsNS', '_ownerElement')
def __init__(self
, attrs
, attrsNS
, ownerElement
):
self
._ownerElement
= ownerElement
return self
[self
._attrs
.keys()[index
]]
for node
in self
._attrs
.values():
L
.append((node
.nodeName
, node
.value
))
for node
in self
._attrs
.values():
L
.append(((node
.namespaceURI
, node
.localName
), node
.value
))
if isinstance(key
, StringTypes
):
return self
._attrs
.has_key(key
)
return self
._attrsNS
.has_key(key
)
return self
._attrs
.keys()
return self
._attrsNS
.keys()
return self
._attrs
.values()
def get(self
, name
, value
=None):
return self
._attrs
.get(name
, value
)
def __cmp__(self
, other
):
if self
._attrs
is getattr(other
, "_attrs", None):
return cmp(id(self
), id(other
))
def __getitem__(self
, attname_or_tuple
):
if isinstance(attname_or_tuple
, _TupleType
):
return self
._attrsNS
[attname_or_tuple
]
return self
._attrs
[attname_or_tuple
]
def __setitem__(self
, attname
, value
):
if isinstance(value
, StringTypes
):
node
= self
._attrs
[attname
]
node
.ownerDocument
= self
._ownerElement
.ownerDocument
if not isinstance(value
, Attr
):
raise TypeError, "value must be a string or Attr object"
def getNamedItem(self
, name
):
def getNamedItemNS(self
, namespaceURI
, localName
):
return self
._attrsNS
[(namespaceURI
, localName
)]
def removeNamedItem(self
, name
):
n
= self
.getNamedItem(name
)
_clear_id_cache(self
._ownerElement
)
del self
._attrs
[n
.nodeName
]
del self
._attrsNS
[(n
.namespaceURI
, n
.localName
)]
if n
.__dict
__.has_key('ownerElement'):
n
.__dict
__['ownerElement'] = None
raise xml
.dom
.NotFoundErr()
def removeNamedItemNS(self
, namespaceURI
, localName
):
n
= self
.getNamedItemNS(namespaceURI
, localName
)
_clear_id_cache(self
._ownerElement
)
del self
._attrsNS
[(n
.namespaceURI
, n
.localName
)]
del self
._attrs
[n
.nodeName
]
if n
.__dict
__.has_key('ownerElement'):
n
.__dict
__['ownerElement'] = None
raise xml
.dom
.NotFoundErr()
def setNamedItem(self
, node
):
if not isinstance(node
, Attr
):
raise xml
.dom
.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(node
), repr(self
)))
old
= self
._attrs
.get(node
.name
)
self
._attrs
[node
.name
] = node
self
._attrsNS
[(node
.namespaceURI
, node
.localName
)] = node
node
.ownerElement
= self
._ownerElement
_clear_id_cache(node
.ownerElement
)
def setNamedItemNS(self
, node
):
return self
.setNamedItem(node
)
def __delitem__(self
, attname_or_tuple
):
node
= self
[attname_or_tuple
]
_clear_id_cache(node
.ownerElement
)
return self
._attrs
, self
._attrsNS
, self
._ownerElement
def __setstate__(self
, state
):
self
._attrs
, self
._attrsNS
, self
._ownerElement
= state
defproperty(NamedNodeMap
, "length",
doc
="Number of nodes in the NamedNodeMap.")
AttributeList
= NamedNodeMap
class TypeInfo(NewStyle
):
__slots__
= 'namespace', 'name'
def __init__(self
, namespace
, name
):
self
.namespace
= namespace
return "<TypeInfo %r (from %r)>" % (self
.name
, self
.namespace
)
return "<TypeInfo %r>" % self
.name
def _get_namespace(self
):
_no_type
= TypeInfo(None, None)
nodeType
= Node
.ELEMENT_NODE
_child_node_types
= (Node
.ELEMENT_NODE
,
Node
.PROCESSING_INSTRUCTION_NODE
,
Node
.ENTITY_REFERENCE_NODE
)
def __init__(self
, tagName
, namespaceURI
=EMPTY_NAMESPACE
, prefix
=None,
self
.tagName
= self
.nodeName
= tagName
self
.namespaceURI
= namespaceURI
self
.childNodes
= NodeList()
self
._attrs
= {} # attributes are double-indexed:
self
._attrsNS
= {} # tagName -> Attribute
# URI,localName -> Attribute
# in the future: consider lazy generation
# of attribute objects this is too tricky
# for now because of headaches with
def _get_localName(self
):
return self
.tagName
.split(":", 1)[-1]
for attr
in self
._attrs
.values():
def getAttribute(self
, attname
):
return self
._attrs
[attname
].value
def getAttributeNS(self
, namespaceURI
, localName
):
return self
._attrsNS
[(namespaceURI
, localName
)].value
def setAttribute(self
, attname
, value
):
attr
= self
.getAttributeNode(attname
)
d
["value"] = d
["nodeValue"] = value
d
["ownerDocument"] = self
.ownerDocument
self
.setAttributeNode(attr
)
elif value
!= attr
.value
:
d
["value"] = d
["nodeValue"] = value
def setAttributeNS(self
, namespaceURI
, qualifiedName
, value
):
prefix
, localname
= _nssplit(qualifiedName
)
attr
= self
.getAttributeNodeNS(namespaceURI
, localname
)
attr
= Attr(qualifiedName
, namespaceURI
, localname
, prefix
)
d
["nodeName"] = qualifiedName
d
["value"] = d
["nodeValue"] = value
d
["ownerDocument"] = self
.ownerDocument
self
.setAttributeNode(attr
)
d
["value"] = d
["nodeValue"] = value
if attr
.prefix
!= prefix
:
d
["nodeName"] = qualifiedName
def getAttributeNode(self
, attrname
):
return self
._attrs
.get(attrname
)
def getAttributeNodeNS(self
, namespaceURI
, localName
):
return self
._attrsNS
.get((namespaceURI
, localName
))
def setAttributeNode(self
, attr
):
if attr
.ownerElement
not in (None, self
):
raise xml
.dom
.InuseAttributeErr("attribute node already owned")
old1
= self
._attrs
.get(attr
.name
, None)
self
.removeAttributeNode(old1
)
old2
= self
._attrsNS
.get((attr
.namespaceURI
, attr
.localName
), None)
if old2
is not None and old2
is not old1
:
self
.removeAttributeNode(old2
)
_set_attribute_node(self
, attr
)
# It might have already been part of this node, in which case
# it doesn't represent a change, and should not be returned.
setAttributeNodeNS
= setAttributeNode
def removeAttribute(self
, name
):
raise xml
.dom
.NotFoundErr()
self
.removeAttributeNode(attr
)
def removeAttributeNS(self
, namespaceURI
, localName
):
attr
= self
._attrsNS
[(namespaceURI
, localName
)]
raise xml
.dom
.NotFoundErr()
self
.removeAttributeNode(attr
)
def removeAttributeNode(self
, node
):
raise xml
.dom
.NotFoundErr()
raise xml
.dom
.NotFoundErr()
# Restore this since the node is still useful and otherwise
node
.ownerDocument
= self
.ownerDocument
removeAttributeNodeNS
= removeAttributeNode
def hasAttribute(self
, name
):
return self
._attrs
.has_key(name
)
def hasAttributeNS(self
, namespaceURI
, localName
):
return self
._attrsNS
.has_key((namespaceURI
, localName
))
def getElementsByTagName(self
, name
):
return _get_elements_by_tagName_helper(self
, name
, NodeList())
def getElementsByTagNameNS(self
, namespaceURI
, localName
):
return _get_elements_by_tagName_ns_helper(
self
, namespaceURI
, localName
, NodeList())
return "<DOM Element: %s at %#x>" % (self
.tagName
, id(self
))
def writexml(self
, writer
, indent
="", addindent
="", newl
=""):
# indent = current indentation
# addindent = indentation to add to higher levels
writer
.write(indent
+"<" + self
.tagName
)
attrs
= self
._get
_attributes
()
writer
.write(" %s=\"" % a_name
)
_write_data(writer
, attrs
[a_name
].value
)
writer
.write(">%s"%(newl))
for node
in self
.childNodes
:
node
.writexml(writer
,indent
+addindent
,addindent
,newl
)
writer
.write("%s</%s>%s" % (indent
,self
.tagName
,newl
))
writer
.write("/>%s"%(newl))
def _get_attributes(self
):
return NamedNodeMap(self
._attrs
, self
._attrsNS
, self
)
# DOM Level 3 attributes, based on the 22 Oct 2002 draft
def setIdAttribute(self
, name
):
idAttr
= self
.getAttributeNode(name
)
self
.setIdAttributeNode(idAttr
)
def setIdAttributeNS(self
, namespaceURI
, localName
):
idAttr
= self
.getAttributeNodeNS(namespaceURI
, localName
)
self
.setIdAttributeNode(idAttr
)
def setIdAttributeNode(self
, idAttr
):
if idAttr
is None or not self
.isSameNode(idAttr
.ownerElement
):
raise xml
.dom
.NotFoundErr()
if _get_containing_entref(self
) is not None:
raise xml
.dom
.NoModificationAllowedErr()
idAttr
.__dict
__['_is_id'] = True
self
._magic
_id
_nodes
+= 1
self
.ownerDocument
._magic
_id
_count
+= 1
defproperty(Element
, "attributes",
doc
="NamedNodeMap of attributes on the element.")
defproperty(Element
, "localName",
doc
="Namespace-local name of this element.")
def _set_attribute_node(element
, attr
):
element
._attrs
[attr
.name
] = attr
element
._attrsNS
[(attr
.namespaceURI
, attr
.localName
)] = attr
# This creates a circular reference, but Element.unlink()
# breaks the cycle since the references to the attribute
# dictionaries are tossed.
attr
.__dict
__['ownerElement'] = element
"""Mixin that makes childless-ness easy to implement and avoids
the complexity of the Node methods that deal with children.
childNodes
= EmptyNodeList()
def _get_firstChild(self
):
def _get_lastChild(self
):
def appendChild(self
, node
):
raise xml
.dom
.HierarchyRequestErr(
self
.nodeName
+ " nodes cannot have children")
def insertBefore(self
, newChild
, refChild
):
raise xml
.dom
.HierarchyRequestErr(
self
.nodeName
+ " nodes do not have children")
def removeChild(self
, oldChild
):
raise xml
.dom
.NotFoundErr(
self
.nodeName
+ " nodes do not have children")
def replaceChild(self
, newChild
, oldChild
):
raise xml
.dom
.HierarchyRequestErr(
self
.nodeName
+ " nodes do not have children")
class ProcessingInstruction(Childless
, Node
):
nodeType
= Node
.PROCESSING_INSTRUCTION_NODE
def __init__(self
, target
, data
):
self
.target
= self
.nodeName
= target
self
.data
= self
.nodeValue
= data
def _set_data(self
, value
):
d
['data'] = d
['nodeValue'] = value
def _set_target(self
, value
):
d
['target'] = d
['nodeName'] = value
def __setattr__(self
, name
, value
):
if name
== "data" or name
== "nodeValue":
self
.__dict
__['data'] = self
.__dict
__['nodeValue'] = value
elif name
== "target" or name
== "nodeName":
self
.__dict
__['target'] = self
.__dict
__['nodeName'] = value
self
.__dict
__[name
] = value
def writexml(self
, writer
, indent
="", addindent
="", newl
=""):
writer
.write("%s<?%s %s?>%s" % (indent
,self
.target
, self
.data
, newl
))
class CharacterData(Childless
, Node
):
return self
.__dict
__['data']
def _set_data(self
, data
):
d
['data'] = d
['nodeValue'] = data
_get_nodeValue
= _get_data
_set_nodeValue
= _set_data
def __setattr__(self
, name
, value
):
if name
== "data" or name
== "nodeValue":
self
.__dict
__['data'] = self
.__dict
__['nodeValue'] = value
self
.__dict
__[name
] = value
return "<DOM %s node \"%s%s\">" % (
self
.__class
__.__name
__, data
[0:10], dotdotdot
)
def substringData(self
, offset
, count
):
raise xml
.dom
.IndexSizeErr("offset cannot be negative")
if offset
>= len(self
.data
):
raise xml
.dom
.IndexSizeErr("offset cannot be beyond end of data")
raise xml
.dom
.IndexSizeErr("count cannot be negative")
return self
.data
[offset
:offset
+count
]
def appendData(self
, arg
):
self
.data
= self
.data
+ arg
def insertData(self
, offset
, arg
):
raise xml
.dom
.IndexSizeErr("offset cannot be negative")
if offset
>= len(self
.data
):
raise xml
.dom
.IndexSizeErr("offset cannot be beyond end of data")
self
.data
[:offset
], arg
, self
.data
[offset
:])
def deleteData(self
, offset
, count
):
raise xml
.dom
.IndexSizeErr("offset cannot be negative")
if offset
>= len(self
.data
):
raise xml
.dom
.IndexSizeErr("offset cannot be beyond end of data")
raise xml
.dom
.IndexSizeErr("count cannot be negative")
self
.data
= self
.data
[:offset
] + self
.data
[offset
+count
:]
def replaceData(self
, offset
, count
, arg
):
raise xml
.dom
.IndexSizeErr("offset cannot be negative")
if offset
>= len(self
.data
):
raise xml
.dom
.IndexSizeErr("offset cannot be beyond end of data")
raise xml
.dom
.IndexSizeErr("count cannot be negative")
self
.data
[:offset
], arg
, self
.data
[offset
+count
:])
defproperty(CharacterData
, "length", doc
="Length of the string data.")
class Text(CharacterData
):
# Make sure we don't add an instance __dict__ if we don't already
# have one, at least when that's possible:
# XXX this does not work, CharacterData is an old-style class
nodeType
= Node
.TEXT_NODE
def splitText(self
, offset
):
if offset
< 0 or offset
> len(self
.data
):
raise xml
.dom
.IndexSizeErr("illegal offset value")
newText
= self
.__class
__()
newText
.data
= self
.data
[offset
:]
newText
.ownerDocument
= self
.ownerDocument
if self
.parentNode
and self
in self
.parentNode
.childNodes
:
self
.parentNode
.appendChild(newText
)
self
.parentNode
.insertBefore(newText
, next
)
self
.data
= self
.data
[:offset
]
def writexml(self
, writer
, indent
="", addindent
="", newl
=""):
_write_data(writer
, "%s%s%s"%(indent
, self
.data
, newl
))
# DOM Level 3 (WD 9 April 2002)
def _get_wholeText(self
):
if n
.nodeType
in (Node
.TEXT_NODE
, Node
.CDATA_SECTION_NODE
):
if n
.nodeType
in (Node
.TEXT_NODE
, Node
.CDATA_SECTION_NODE
):
def replaceWholeText(self
, content
):
# XXX This needs to be seriously changed if minidom ever
# supports EntityReference nodes.
if n
.nodeType
in (Node
.TEXT_NODE
, Node
.CDATA_SECTION_NODE
):
if n
.nodeType
in (Node
.TEXT_NODE
, Node
.CDATA_SECTION_NODE
):
def _get_isWhitespaceInElementContent(self
):
elem
= _get_containing_element(self
)
info
= self
.ownerDocument
._get
_elem
_info
(elem
)
return info
.isElementContent()
defproperty(Text
, "isWhitespaceInElementContent",
doc
="True iff this text node contains only whitespace"
" and is in element content.")
defproperty(Text
, "wholeText",
doc
="The text of all logically-adjacent text nodes.")
def _get_containing_element(node
):
if c
.nodeType
== Node
.ELEMENT_NODE
:
def _get_containing_entref(node
):
if c
.nodeType
== Node
.ENTITY_REFERENCE_NODE
:
class Comment(Childless
, CharacterData
):
nodeType
= Node
.COMMENT_NODE
def __init__(self
, data
):
self
.data
= self
.nodeValue
= data
def writexml(self
, writer
, indent
="", addindent
="", newl
=""):
writer
.write("%s<!--%s-->%s" % (indent
, self
.data
, newl
))
class CDATASection(Text
):
# Make sure we don't add an instance __dict__ if we don't already
# have one, at least when that's possible:
# XXX this does not work, Text is an old-style class
nodeType
= Node
.CDATA_SECTION_NODE
nodeName
= "#cdata-section"
def writexml(self
, writer
, indent
="", addindent
="", newl
=""):
if self
.data
.find("]]>") >= 0:
raise ValueError("']]>' not allowed in a CDATA section")
writer
.write("<![CDATA[%s]]>" % self
.data
)
class ReadOnlySequentialNamedNodeMap(NewStyle
, GetattrMagic
):
def __init__(self
, seq
=()):
# seq should be a list or tuple
def getNamedItem(self
, name
):
def getNamedItemNS(self
, namespaceURI
, localName
):
if n
.namespaceURI
== namespaceURI
and n
.localName
== localName
:
def __getitem__(self
, name_or_tuple
):
if isinstance(name_or_tuple
, _TupleType
):
node
= self
.getNamedItemNS(*name_or_tuple
)
node
= self
.getNamedItem(name_or_tuple
)
raise KeyError, name_or_tuple
def removeNamedItem(self
, name
):
raise xml
.dom
.NoModificationAllowedErr(
"NamedNodeMap instance is read-only")
def removeNamedItemNS(self
, namespaceURI
, localName
):
raise xml
.dom
.NoModificationAllowedErr(
"NamedNodeMap instance is read-only")
def setNamedItem(self
, node
):
raise xml
.dom
.NoModificationAllowedErr(
"NamedNodeMap instance is read-only")
def setNamedItemNS(self
, node
):
raise xml
.dom
.NoModificationAllowedErr(
"NamedNodeMap instance is read-only")
def __setstate__(self
, state
):
defproperty(ReadOnlySequentialNamedNodeMap
, "length",
doc
="Number of entries in the NamedNodeMap.")
"""Mix-in class that supports the publicId and systemId attributes."""
# XXX this does not work, this is an old-style class
# __slots__ = 'publicId', 'systemId'
def _identified_mixin_init(self
, publicId
, systemId
):
class DocumentType(Identified
, Childless
, Node
):
nodeType
= Node
.DOCUMENT_TYPE_NODE
def __init__(self
, qualifiedName
):
self
.entities
= ReadOnlySequentialNamedNodeMap()
self
.notations
= ReadOnlySequentialNamedNodeMap()
prefix
, localname
= _nssplit(qualifiedName
)
self
.nodeName
= self
.name
def _get_internalSubset(self
):
return self
.internalSubset
def cloneNode(self
, deep
):
if self
.ownerDocument
is None:
clone
= DocumentType(None)
clone
.nodeName
= self
.name
operation
= xml
.dom
.UserDataHandler
.NODE_CLONED
clone
.notations
._seq
= []
for n
in self
.notations
._seq
:
notation
= Notation(n
.nodeName
, n
.publicId
, n
.systemId
)
clone
.notations
._seq
.append(notation
)
n
._call
_user
_data
_handler
(operation
, n
, notation
)
for e
in self
.entities
._seq
:
entity
= Entity(e
.nodeName
, e
.publicId
, e
.systemId
,
entity
.actualEncoding
= e
.actualEncoding
entity
.encoding
= e
.encoding
entity
.version
= e
.version
clone
.entities
._seq
.append(entity
)
e
._call
_user
_data
_handler
(operation
, n
, entity
)
self
._call
_user
_data
_handler
(operation
, self
, clone
)
def writexml(self
, writer
, indent
="", addindent
="", newl
=""):
writer
.write("<!DOCTYPE ")
writer
.write("%s PUBLIC '%s'%s '%s'"
% (newl
, self
.publicId
, newl
, self
.systemId
))
writer
.write("%s SYSTEM '%s'" % (newl
, self
.systemId
))
if self
.internalSubset
is not None:
writer
.write(self
.internalSubset
)
class Entity(Identified
, Node
):
nodeType
= Node
.ENTITY_NODE
def __init__(self
, name
, publicId
, systemId
, notation
):
self
.notationName
= notation
self
.childNodes
= NodeList()
self
._identified
_mixin
_init
(publicId
, systemId
)
def _get_actualEncoding(self
):
return self
.actualEncoding
def appendChild(self
, newChild
):
raise xml
.dom
.HierarchyRequestErr(
"cannot append children to an entity node")
def insertBefore(self
, newChild
, refChild
):
raise xml
.dom
.HierarchyRequestErr(
"cannot insert children below an entity node")
def removeChild(self
, oldChild
):
raise xml
.dom
.HierarchyRequestErr(
"cannot remove children from an entity node")
def replaceChild(self
, newChild
, oldChild
):
raise xml
.dom
.HierarchyRequestErr(
"cannot replace children of an entity node")
class Notation(Identified
, Childless
, Node
):
nodeType
= Node
.NOTATION_NODE
def __init__(self
, name
, publicId
, systemId
):
self
._identified
_mixin
_init
(publicId
, systemId
)
class DOMImplementation(DOMImplementationLS
):
_features
= [("core", "1.0"),
def hasFeature(self
, feature
, version
):
return (feature
.lower(), version
) in self
._features
def createDocument(self
, namespaceURI
, qualifiedName
, doctype
):
if doctype
and doctype
.parentNode
is not None:
raise xml
.dom
.WrongDocumentErr(
"doctype object owned by another DOM tree")
doc
= self
._create
_document
()
add_root_element
= not (namespaceURI
is None
and qualifiedName
is None
if not qualifiedName
and add_root_element
:
# The spec is unclear what to raise here; SyntaxErr
# would be the other obvious candidate. Since Xerces raises
# InvalidCharacterErr, and since SyntaxErr is not listed
# for createDocument, that seems to be the better choice.
# XXX: need to check for illegal characters here and in
# DOM Level III clears this up when talking about the return value
# of this function. If namespaceURI, qName and DocType are
# Null the document is returned without a document element
# Otherwise if doctype or namespaceURI are not None
# Then we go back to the above problem
raise xml
.dom
.InvalidCharacterErr("Element with no name")
prefix
, localname
= _nssplit(qualifiedName
)
and namespaceURI
!= "http://www.w3.org/XML/1998/namespace":
raise xml
.dom
.NamespaceErr("illegal use of 'xml' prefix")
if prefix
and not namespaceURI
:
raise xml
.dom
.NamespaceErr(
"illegal use of prefix without namespaces")
element
= doc
.createElementNS(namespaceURI
, qualifiedName
)
doctype
.parentNode
= doctype
.ownerDocument
= doc
doc
.implementation
= self
def createDocumentType(self
, qualifiedName
, publicId
, systemId
):
doctype
= DocumentType(qualifiedName
)
doctype
.publicId
= publicId
doctype
.systemId
= systemId
# DOM Level 3 (WD 9 April 2002)
def getInterface(self
, feature
):
if self
.hasFeature(feature
, None):
def _create_document(self
):
class ElementInfo(NewStyle
):
"""Object that represents content-model information for an element.
This implementation is not expected to be used in practice; DOM
builders should provide implementations which do the right thing
using information available to it.
def __init__(self
, name
):
def getAttributeType(self
, aname
):
def getAttributeTypeNS(self
, namespaceURI
, localName
):
def isElementContent(self
):
"""Returns true iff this element is declared to have an EMPTY
"""Returns true iff the named attribte is a DTD-style ID."""
def isIdNS(self
, namespaceURI
, localName
):
"""Returns true iff the identified attribute is a DTD-style ID."""
def __setstate__(self
, state
):
def _clear_id_cache(node
):
if node
.nodeType
== Node
.DOCUMENT_NODE
:
node
._id
_search
_stack
= None
node
.ownerDocument
._id
_cache
.clear()
node
.ownerDocument
._id
_search
_stack
= None
class Document(Node
, DocumentLS
):
_child_node_types
= (Node
.ELEMENT_NODE
, Node
.PROCESSING_INSTRUCTION_NODE
,
Node
.COMMENT_NODE
, Node
.DOCUMENT_TYPE_NODE
)
nodeType
= Node
.DOCUMENT_NODE
previousSibling
= nextSibling
= None
implementation
= DOMImplementation()
# Document attributes from Level 3 (WD 9 April 2002)
strictErrorChecking
= False
self
.childNodes
= NodeList()
# mapping of (namespaceURI, localName) -> ElementInfo
# and tagName -> ElementInfo
self
._id
_search
_stack
= None
def _get_elem_info(self
, element
):
key
= element
.namespaceURI
, element
.localName
return self
._elem
_info
.get(key
)
def _get_actualEncoding(self
):
return self
.actualEncoding
def _get_documentURI(self
):
def _get_errorHandler(self
):
def _get_standalone(self
):
def _get_strictErrorChecking(self
):
return self
.strictErrorChecking
def appendChild(self
, node
):
if node
.nodeType
not in self
._child
_node
_types
:
raise xml
.dom
.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(node
), repr(self
)))
if node
.parentNode
is not None:
# This needs to be done before the next test since this
# may *be* the document element, in which case it should
# end up re-ordered to the end.
node
.parentNode
.removeChild(node
)
if node
.nodeType
== Node
.ELEMENT_NODE \
and self
._get
_documentElement
():
raise xml
.dom
.HierarchyRequestErr(
"two document elements disallowed")
return Node
.appendChild(self
, node
)
def removeChild(self
, oldChild
):
self
.childNodes
.remove(oldChild
)
raise xml
.dom
.NotFoundErr()
oldChild
.nextSibling
= oldChild
.previousSibling
= None
oldChild
.parentNode
= None
if self
.documentElement
is oldChild
:
self
.documentElement
= None
def _get_documentElement(self
):
for node
in self
.childNodes
:
if node
.nodeType
== Node
.ELEMENT_NODE
:
if self
.doctype
is not None:
def cloneNode(self
, deep
):
clone
= self
.implementation
.createDocument(None, None, None)
clone
.encoding
= self
.encoding
clone
.standalone
= self
.standalone
clone
.version
= self
.version
for n
in self
.childNodes
:
childclone
= _clone_node(n
, deep
, clone
)
assert childclone
.ownerDocument
.isSameNode(clone
)
clone
.childNodes
.append(childclone
)
if childclone
.nodeType
== Node
.DOCUMENT_NODE
:
assert clone
.documentElement
is None
elif childclone
.nodeType
== Node
.DOCUMENT_TYPE_NODE
:
assert clone
.doctype
is None
clone
.doctype
= childclone
childclone
.parentNode
= clone
self
._call
_user
_data
_handler
(xml
.dom
.UserDataHandler
.NODE_CLONED
,
def createDocumentFragment(self
):
def createElement(self
, tagName
):
def createTextNode(self
, data
):
if not isinstance(data
, StringTypes
):
raise TypeError, "node contents must be a string"
def createCDATASection(self
, data
):
if not isinstance(data
, StringTypes
):
raise TypeError, "node contents must be a string"
def createComment(self
, data
):
def createProcessingInstruction(self
, target
, data
):
p
= ProcessingInstruction(target
, data
)
def createAttribute(self
, qName
):
def createElementNS(self
, namespaceURI
, qualifiedName
):
prefix
, localName
= _nssplit(qualifiedName
)
e
= Element(qualifiedName
, namespaceURI
, prefix
)
def createAttributeNS(self
, namespaceURI
, qualifiedName
):
prefix
, localName
= _nssplit(qualifiedName
)
a
= Attr(qualifiedName
, namespaceURI
, localName
, prefix
)
# A couple of implementation-specific helpers to create node types
# not supported by the W3C DOM specs:
def _create_entity(self
, name
, publicId
, systemId
, notationName
):
e
= Entity(name
, publicId
, systemId
, notationName
)
def _create_notation(self
, name
, publicId
, systemId
):
n
= Notation(name
, publicId
, systemId
)
def getElementById(self
, id):
if self
._id
_cache
.has_key(id):
return self
._id
_cache
[id]
if not (self
._elem
_info
or self
._magic
_id
_count
):
stack
= self
._id
_search
_stack
# we never searched before, or the cache has been cleared
stack
= [self
.documentElement
]
self
._id
_search
_stack
= stack
# Previous search was completed and cache is still valid;
# add child elements to stack for continued searching
stack
.extend([child
for child
in node
.childNodes
if child
.nodeType
in _nodeTypes_with_children
])
info
= self
._get
_elem
_info
(node
)
# We have to process all ID attributes before
# returning in order to get all the attributes set to
# be IDs using Element.setIdAttribute*().
for attr
in node
.attributes
.values():
if info
.isIdNS(attr
.namespaceURI
, attr
.localName
):
self
._id
_cache
[attr
.value
] = node
elif not node
._magic
_id
_nodes
:
elif info
.isId(attr
.name
):
self
._id
_cache
[attr
.value
] = node
elif not node
._magic
_id
_nodes
:
self
._id
_cache
[attr
.value
] = node
elif node
._magic
_id
_nodes
== 1:
elif node
._magic
_id
_nodes
:
for attr
in node
.attributes
.values():
self
._id
_cache
[attr
.value
] = node
def getElementsByTagName(self
, name
):
return _get_elements_by_tagName_helper(self
, name
, NodeList())
def getElementsByTagNameNS(self
, namespaceURI
, localName
):
return _get_elements_by_tagName_ns_helper(
self
, namespaceURI
, localName
, NodeList())
def isSupported(self
, feature
, version
):
return self
.implementation
.hasFeature(feature
, version
)
def importNode(self
, node
, deep
):
if node
.nodeType
== Node
.DOCUMENT_NODE
:
raise xml
.dom
.NotSupportedErr("cannot import document nodes")
elif node
.nodeType
== Node
.DOCUMENT_TYPE_NODE
:
raise xml
.dom
.NotSupportedErr("cannot import document type nodes")
return _clone_node(node
, deep
, self
)
def writexml(self
, writer
, indent
="", addindent
="", newl
="",
writer
.write('<?xml version="1.0" ?>'+newl
)
writer
.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding
, newl
))
for node
in self
.childNodes
:
node
.writexml(writer
, indent
, addindent
, newl
)
# DOM Level 3 (WD 9 April 2002)
def renameNode(self
, n
, namespaceURI
, name
):
if n
.ownerDocument
is not self
:
raise xml
.dom
.WrongDocumentErr(
"cannot rename nodes from other documents;\n"
"expected %s,\nfound %s" % (self
, n
.ownerDocument
))
if n
.nodeType
not in (Node
.ELEMENT_NODE
, Node
.ATTRIBUTE_NODE
):
raise xml
.dom
.NotSupportedErr(
"renameNode() only applies to element and attribute nodes")
if namespaceURI
!= EMPTY_NAMESPACE
:
prefix
, localName
= name
.split(':', 1)
and namespaceURI
!= xml
.dom
.XMLNS_NAMESPACE
):
raise xml
.dom
.NamespaceErr(
"illegal use of 'xmlns' prefix")
and namespaceURI
!= xml
.dom
.XMLNS_NAMESPACE
and n
.nodeType
== Node
.ATTRIBUTE_NODE
):
raise xml
.dom
.NamespaceErr(
"illegal use of the 'xmlns' attribute")
if n
.nodeType
== Node
.ATTRIBUTE_NODE
:
element
.removeAttributeNode(n
)
d
['localName'] = localName
d
['namespaceURI'] = namespaceURI
if n
.nodeType
== Node
.ELEMENT_NODE
:
element
.setAttributeNode(n
)
element
.setIdAttributeNode(n
)
# It's not clear from a semantic perspective whether we should
# call the user data handlers for the NODE_RENAMED event since
# we're re-using the existing node. The draft spec has been
# interpreted as meaning "no, don't call the handler unless a
defproperty(Document
, "documentElement",
doc
="Top-level element of this document.")
def _clone_node(node
, deep
, newOwnerDocument
):
Clone a node and give it the new owner document.
Called by Node.cloneNode and Document.importNode
if node
.ownerDocument
.isSameNode(newOwnerDocument
):
operation
= xml
.dom
.UserDataHandler
.NODE_CLONED
operation
= xml
.dom
.UserDataHandler
.NODE_IMPORTED
if node
.nodeType
== Node
.ELEMENT_NODE
:
clone
= newOwnerDocument
.createElementNS(node
.namespaceURI
,
for attr
in node
.attributes
.values():
clone
.setAttributeNS(attr
.namespaceURI
, attr
.nodeName
, attr
.value
)
a
= clone
.getAttributeNodeNS(attr
.namespaceURI
, attr
.localName
)
a
.specified
= attr
.specified
for child
in node
.childNodes
:
c
= _clone_node(child
, deep
, newOwnerDocument
)
elif node
.nodeType
== Node
.DOCUMENT_FRAGMENT_NODE
:
clone
= newOwnerDocument
.createDocumentFragment()
for child
in node
.childNodes
:
c
= _clone_node(child
, deep
, newOwnerDocument
)
elif node
.nodeType
== Node
.TEXT_NODE
:
clone
= newOwnerDocument
.createTextNode(node
.data
)
elif node
.nodeType
== Node
.CDATA_SECTION_NODE
:
clone
= newOwnerDocument
.createCDATASection(node
.data
)
elif node
.nodeType
== Node
.PROCESSING_INSTRUCTION_NODE
:
clone
= newOwnerDocument
.createProcessingInstruction(node
.target
,
elif node
.nodeType
== Node
.COMMENT_NODE
:
clone
= newOwnerDocument
.createComment(node
.data
)
elif node
.nodeType
== Node
.ATTRIBUTE_NODE
:
clone
= newOwnerDocument
.createAttributeNS(node
.namespaceURI
,
elif node
.nodeType
== Node
.DOCUMENT_TYPE_NODE
:
assert node
.ownerDocument
is not newOwnerDocument
operation
= xml
.dom
.UserDataHandler
.NODE_IMPORTED
clone
= newOwnerDocument
.implementation
.createDocumentType(
node
.name
, node
.publicId
, node
.systemId
)
clone
.ownerDocument
= newOwnerDocument
clone
.notations
._seq
= []
for n
in node
.notations
._seq
:
notation
= Notation(n
.nodeName
, n
.publicId
, n
.systemId
)
notation
.ownerDocument
= newOwnerDocument
clone
.notations
._seq
.append(notation
)
if hasattr(n
, '_call_user_data_handler'):
n
._call
_user
_data
_handler
(operation
, n
, notation
)
for e
in node
.entities
._seq
:
entity
= Entity(e
.nodeName
, e
.publicId
, e
.systemId
,
entity
.actualEncoding
= e
.actualEncoding
entity
.encoding
= e
.encoding
entity
.version
= e
.version
entity
.ownerDocument
= newOwnerDocument
clone
.entities
._seq
.append(entity
)
if hasattr(e
, '_call_user_data_handler'):
e
._call
_user
_data
_handler
(operation
, n
, entity
)
# Note the cloning of Document and DocumentType nodes is
# implemenetation specific. minidom handles those cases
# directly in the cloneNode() methods.
raise xml
.dom
.NotSupportedErr("Cannot clone node %s" % repr(node
))
# Check for _call_user_data_handler() since this could conceivably
# used with other DOM implementations (one of the FourThought
if hasattr(node
, '_call_user_data_handler'):
node
._call
_user
_data
_handler
(operation
, node
, clone
)
def _nssplit(qualifiedName
):
fields
= qualifiedName
.split(':', 1)
# we can't use cStringIO since it doesn't support Unicode strings
from StringIO
import StringIO
def _do_pulldom_parse(func
, args
, kwargs
):
events
= func(*args
, **kwargs
)
toktype
, rootNode
= events
.getEvent()
events
.expandNode(rootNode
)
def parse(file, parser
=None, bufsize
=None):
"""Parse a file into a DOM by filename or file object."""
if parser
is None and not bufsize
:
from xml
.dom
import expatbuilder
return expatbuilder
.parse(file)
from xml
.dom
import pulldom
return _do_pulldom_parse(pulldom
.parse
, (file,),
{'parser': parser
, 'bufsize': bufsize
})
def parseString(string
, parser
=None):
"""Parse a file into a DOM from a string."""
from xml
.dom
import expatbuilder
return expatbuilder
.parseString(string
)
from xml
.dom
import pulldom
return _do_pulldom_parse(pulldom
.parseString
, (string
,),
def getDOMImplementation(features
=None):
if isinstance(features
, StringTypes
):
features
= domreg
._parse
_feature
_string
(features
)
if not Document
.implementation
.hasFeature(f
, v
):
return Document
.implementation