diff options
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders')
5 files changed, 1449 insertions, 0 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py new file mode 100644 index 0000000..6a6b2a4 --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py @@ -0,0 +1,76 @@ +"""A collection of modules for building different kinds of tree from +HTML documents. + +To create a treebuilder for a new type of tree, you need to do +implement several things: + +1) A set of classes for various types of elements: Document, Doctype, +Comment, Element. These must implement the interface of +_base.treebuilders.Node (although comment nodes have a different +signature for their constructor, see treebuilders.etree.Comment) +Textual content may also be implemented as another node type, or not, as +your tree implementation requires. + +2) A treebuilder object (called TreeBuilder by convention) that +inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: +documentClass - the class to use for the bottommost node of a document +elementClass - the class to use for HTML Elements +commentClass - the class to use for comments +doctypeClass - the class to use for doctypes +It also has one required method: +getDocument - Returns the root node of the complete document tree + +3) If you wish to run the unit tests, you must also create a +testSerializer method on your treebuilder which accepts a node and +returns a string containing Node and its children serialized according +to the format used in the unittests +""" + +from __future__ import absolute_import, division, unicode_literals + +from ..utils import default_etree + +treeBuilderCache = {} + + +def getTreeBuilder(treeType, implementation=None, **kwargs): + """Get a TreeBuilder class for various types of tree with built-in support + + treeType - the name of the tree type required (case-insensitive). Supported + values are: + + "dom" - A generic builder for DOM implementations, defaulting to + a xml.dom.minidom based implementation. + "etree" - A generic builder for tree implementations exposing an + ElementTree-like interface, defaulting to + xml.etree.cElementTree if available and + xml.etree.ElementTree if not. + "lxml" - A etree-based builder for lxml.etree, handling + limitations of lxml's implementation. + + implementation - (Currently applies to the "etree" and "dom" tree types). A + module implementing the tree type e.g. + xml.etree.ElementTree or xml.etree.cElementTree.""" + + treeType = treeType.lower() + if treeType not in treeBuilderCache: + if treeType == "dom": + from . import dom + # Come up with a sane default (pref. from the stdlib) + if implementation is None: + from xml.dom import minidom + implementation = minidom + # NEVER cache here, caching is done in the dom submodule + return dom.getDomModule(implementation, **kwargs).TreeBuilder + elif treeType == "lxml": + from . import etree_lxml + treeBuilderCache[treeType] = etree_lxml.TreeBuilder + elif treeType == "etree": + from . import etree + if implementation is None: + implementation = default_etree + # NEVER cache here, caching is done in the etree submodule + return etree.getETreeModule(implementation, **kwargs).TreeBuilder + else: + raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) + return treeBuilderCache.get(treeType) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py new file mode 100644 index 0000000..970c9ad --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py @@ -0,0 +1,377 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +from ..constants import scopingElements, tableInsertModeElements, namespaces + +# The scope markers are inserted when entering object elements, +# marquees, table cells, and table captions, and are used to prevent formatting +# from "leaking" into tables, object elements, and marquees. +Marker = None + +listElementsMap = { + None: (frozenset(scopingElements), False), + "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), + "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), + (namespaces["html"], "ul")])), False), + "table": (frozenset([(namespaces["html"], "html"), + (namespaces["html"], "table")]), False), + "select": (frozenset([(namespaces["html"], "optgroup"), + (namespaces["html"], "option")]), True) +} + + +class Node(object): + def __init__(self, name): + """Node representing an item in the tree. + name - The tag name associated with the node + parent - The parent of the current node (or None for the document node) + value - The value of the current node (applies to text nodes and + comments + attributes - a dict holding name, value pairs for attributes of the node + childNodes - a list of child nodes of the current node. This must + include all elements but not necessarily other node types + _flags - A list of miscellaneous flags that can be set on the node + """ + self.name = name + self.parent = None + self.value = None + self.attributes = {} + self.childNodes = [] + self._flags = [] + + def __str__(self): + attributesStr = " ".join(["%s=\"%s\"" % (name, value) + for name, value in + self.attributes.items()]) + if attributesStr: + return "<%s %s>" % (self.name, attributesStr) + else: + return "<%s>" % (self.name) + + def __repr__(self): + return "<%s>" % (self.name) + + def appendChild(self, node): + """Insert node as a child of the current node + """ + raise NotImplementedError + + def insertText(self, data, insertBefore=None): + """Insert data as text in the current node, positioned before the + start of node insertBefore or to the end of the node's text. + """ + raise NotImplementedError + + def insertBefore(self, node, refNode): + """Insert node as a child of the current node, before refNode in the + list of child nodes. Raises ValueError if refNode is not a child of + the current node""" + raise NotImplementedError + + def removeChild(self, node): + """Remove node from the children of the current node + """ + raise NotImplementedError + + def reparentChildren(self, newParent): + """Move all the children of the current node to newParent. + This is needed so that trees that don't store text as nodes move the + text in the correct way + """ + # XXX - should this method be made more general? + for child in self.childNodes: + newParent.appendChild(child) + self.childNodes = [] + + def cloneNode(self): + """Return a shallow copy of the current node i.e. a node with the same + name and attributes but with no parent or child nodes + """ + raise NotImplementedError + + def hasContent(self): + """Return true if the node has children or text, false otherwise + """ + raise NotImplementedError + + +class ActiveFormattingElements(list): + def append(self, node): + equalCount = 0 + if node != Marker: + for element in self[::-1]: + if element == Marker: + break + if self.nodesEqual(element, node): + equalCount += 1 + if equalCount == 3: + self.remove(element) + break + list.append(self, node) + + def nodesEqual(self, node1, node2): + if not node1.nameTuple == node2.nameTuple: + return False + + if not node1.attributes == node2.attributes: + return False + + return True + + +class TreeBuilder(object): + """Base treebuilder implementation + documentClass - the class to use for the bottommost node of a document + elementClass - the class to use for HTML Elements + commentClass - the class to use for comments + doctypeClass - the class to use for doctypes + """ + + # Document class + documentClass = None + + # The class to use for creating a node + elementClass = None + + # The class to use for creating comments + commentClass = None + + # The class to use for creating doctypes + doctypeClass = None + + # Fragment class + fragmentClass = None + + def __init__(self, namespaceHTMLElements): + if namespaceHTMLElements: + self.defaultNamespace = "http://www.w3.org/1999/xhtml" + else: + self.defaultNamespace = None + self.reset() + + def reset(self): + self.openElements = [] + self.activeFormattingElements = ActiveFormattingElements() + + # XXX - rename these to headElement, formElement + self.headPointer = None + self.formPointer = None + + self.insertFromTable = False + + self.document = self.documentClass() + + def elementInScope(self, target, variant=None): + + # If we pass a node in we match that. if we pass a string + # match any node with that name + exactNode = hasattr(target, "nameTuple") + + listElements, invert = listElementsMap[variant] + + for node in reversed(self.openElements): + if (node.name == target and not exactNode or + node == target and exactNode): + return True + elif (invert ^ (node.nameTuple in listElements)): + return False + + assert False # We should never reach this point + + def reconstructActiveFormattingElements(self): + # Within this algorithm the order of steps described in the + # specification is not quite the same as the order of steps in the + # code. It should still do the same though. + + # Step 1: stop the algorithm when there's nothing to do. + if not self.activeFormattingElements: + return + + # Step 2 and step 3: we start with the last element. So i is -1. + i = len(self.activeFormattingElements) - 1 + entry = self.activeFormattingElements[i] + if entry == Marker or entry in self.openElements: + return + + # Step 6 + while entry != Marker and entry not in self.openElements: + if i == 0: + # This will be reset to 0 below + i = -1 + break + i -= 1 + # Step 5: let entry be one earlier in the list. + entry = self.activeFormattingElements[i] + + while True: + # Step 7 + i += 1 + + # Step 8 + entry = self.activeFormattingElements[i] + clone = entry.cloneNode() # Mainly to get a new copy of the attributes + + # Step 9 + element = self.insertElement({"type": "StartTag", + "name": clone.name, + "namespace": clone.namespace, + "data": clone.attributes}) + + # Step 10 + self.activeFormattingElements[i] = element + + # Step 11 + if element == self.activeFormattingElements[-1]: + break + + def clearActiveFormattingElements(self): + entry = self.activeFormattingElements.pop() + while self.activeFormattingElements and entry != Marker: + entry = self.activeFormattingElements.pop() + + def elementInActiveFormattingElements(self, name): + """Check if an element exists between the end of the active + formatting elements and the last marker. If it does, return it, else + return false""" + + for item in self.activeFormattingElements[::-1]: + # Check for Marker first because if it's a Marker it doesn't have a + # name attribute. + if item == Marker: + break + elif item.name == name: + return item + return False + + def insertRoot(self, token): + element = self.createElement(token) + self.openElements.append(element) + self.document.appendChild(element) + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + doctype = self.doctypeClass(name, publicId, systemId) + self.document.appendChild(doctype) + + def insertComment(self, token, parent=None): + if parent is None: + parent = self.openElements[-1] + parent.appendChild(self.commentClass(token["data"])) + + def createElement(self, token): + """Create an element but don't insert it anywhere""" + name = token["name"] + namespace = token.get("namespace", self.defaultNamespace) + element = self.elementClass(name, namespace) + element.attributes = token["data"] + return element + + def _getInsertFromTable(self): + return self._insertFromTable + + def _setInsertFromTable(self, value): + """Switch the function used to insert an element from the + normal one to the misnested table one and back again""" + self._insertFromTable = value + if value: + self.insertElement = self.insertElementTable + else: + self.insertElement = self.insertElementNormal + + insertFromTable = property(_getInsertFromTable, _setInsertFromTable) + + def insertElementNormal(self, token): + name = token["name"] + assert isinstance(name, text_type), "Element %s not unicode" % name + namespace = token.get("namespace", self.defaultNamespace) + element = self.elementClass(name, namespace) + element.attributes = token["data"] + self.openElements[-1].appendChild(element) + self.openElements.append(element) + return element + + def insertElementTable(self, token): + """Create an element and insert it into the tree""" + element = self.createElement(token) + if self.openElements[-1].name not in tableInsertModeElements: + return self.insertElementNormal(token) + else: + # We should be in the InTable mode. This means we want to do + # special magic element rearranging + parent, insertBefore = self.getTableMisnestedNodePosition() + if insertBefore is None: + parent.appendChild(element) + else: + parent.insertBefore(element, insertBefore) + self.openElements.append(element) + return element + + def insertText(self, data, parent=None): + """Insert text data.""" + if parent is None: + parent = self.openElements[-1] + + if (not self.insertFromTable or (self.insertFromTable and + self.openElements[-1].name + not in tableInsertModeElements)): + parent.insertText(data) + else: + # We should be in the InTable mode. This means we want to do + # special magic element rearranging + parent, insertBefore = self.getTableMisnestedNodePosition() + parent.insertText(data, insertBefore) + + def getTableMisnestedNodePosition(self): + """Get the foster parent element, and sibling to insert before + (or None) when inserting a misnested table node""" + # The foster parent element is the one which comes before the most + # recently opened table element + # XXX - this is really inelegant + lastTable = None + fosterParent = None + insertBefore = None + for elm in self.openElements[::-1]: + if elm.name == "table": + lastTable = elm + break + if lastTable: + # XXX - we should really check that this parent is actually a + # node here + if lastTable.parent: + fosterParent = lastTable.parent + insertBefore = lastTable + else: + fosterParent = self.openElements[ + self.openElements.index(lastTable) - 1] + else: + fosterParent = self.openElements[0] + return fosterParent, insertBefore + + def generateImpliedEndTags(self, exclude=None): + name = self.openElements[-1].name + # XXX td, th and tr are not actually needed + if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) + and name != exclude): + self.openElements.pop() + # XXX This is not entirely what the specification says. We should + # investigate it more closely. + self.generateImpliedEndTags(exclude) + + def getDocument(self): + "Return the final tree" + return self.document + + def getFragment(self): + "Return the final fragment" + # assert self.innerHTML + fragment = self.fragmentClass() + self.openElements[0].reparentChildren(fragment) + return fragment + + def testSerializer(self, node): + """Serialize the subtree of node in the format required by unit tests + node - the node from which to start serializing""" + raise NotImplementedError diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py new file mode 100644 index 0000000..f9e0d76 --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py @@ -0,0 +1,290 @@ +from __future__ import absolute_import, division, unicode_literals + + +from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE +import weakref + +from . import _base +from .. import constants +from ..constants import namespaces +from ..utils import moduleFactoryFactory + + +def getDomBuilder(DomImplementation): + Dom = DomImplementation + + class AttrList(object): + def __init__(self, element): + self.element = element + + def __iter__(self): + return list(self.element.attributes.items()).__iter__() + + def __setitem__(self, name, value): + self.element.setAttribute(name, value) + + def __len__(self): + return len(list(self.element.attributes.items())) + + def items(self): + return [(item[0], item[1]) for item in + list(self.element.attributes.items())] + + def keys(self): + return list(self.element.attributes.keys()) + + def __getitem__(self, name): + return self.element.getAttribute(name) + + def __contains__(self, name): + if isinstance(name, tuple): + raise NotImplementedError + else: + return self.element.hasAttribute(name) + + class NodeBuilder(_base.Node): + def __init__(self, element): + _base.Node.__init__(self, element.nodeName) + self.element = element + + namespace = property(lambda self: hasattr(self.element, "namespaceURI") + and self.element.namespaceURI or None) + + def appendChild(self, node): + node.parent = self + self.element.appendChild(node.element) + + def insertText(self, data, insertBefore=None): + text = self.element.ownerDocument.createTextNode(data) + if insertBefore: + self.element.insertBefore(text, insertBefore.element) + else: + self.element.appendChild(text) + + def insertBefore(self, node, refNode): + self.element.insertBefore(node.element, refNode.element) + node.parent = self + + def removeChild(self, node): + if node.element.parentNode == self.element: + self.element.removeChild(node.element) + node.parent = None + + def reparentChildren(self, newParent): + while self.element.hasChildNodes(): + child = self.element.firstChild + self.element.removeChild(child) + newParent.element.appendChild(child) + self.childNodes = [] + + def getAttributes(self): + return AttrList(self.element) + + def setAttributes(self, attributes): + if attributes: + for name, value in list(attributes.items()): + if isinstance(name, tuple): + if name[0] is not None: + qualifiedName = (name[0] + ":" + name[1]) + else: + qualifiedName = name[1] + self.element.setAttributeNS(name[2], qualifiedName, + value) + else: + self.element.setAttribute( + name, value) + attributes = property(getAttributes, setAttributes) + + def cloneNode(self): + return NodeBuilder(self.element.cloneNode(False)) + + def hasContent(self): + return self.element.hasChildNodes() + + def getNameTuple(self): + if self.namespace is None: + return namespaces["html"], self.name + else: + return self.namespace, self.name + + nameTuple = property(getNameTuple) + + class TreeBuilder(_base.TreeBuilder): + def documentClass(self): + self.dom = Dom.getDOMImplementation().createDocument(None, None, None) + return weakref.proxy(self) + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + domimpl = Dom.getDOMImplementation() + doctype = domimpl.createDocumentType(name, publicId, systemId) + self.document.appendChild(NodeBuilder(doctype)) + if Dom == minidom: + doctype.ownerDocument = self.dom + + def elementClass(self, name, namespace=None): + if namespace is None and self.defaultNamespace is None: + node = self.dom.createElement(name) + else: + node = self.dom.createElementNS(namespace, name) + + return NodeBuilder(node) + + def commentClass(self, data): + return NodeBuilder(self.dom.createComment(data)) + + def fragmentClass(self): + return NodeBuilder(self.dom.createDocumentFragment()) + + def appendChild(self, node): + self.dom.appendChild(node.element) + + def testSerializer(self, element): + return testSerializer(element) + + def getDocument(self): + return self.dom + + def getFragment(self): + return _base.TreeBuilder.getFragment(self).element + + def insertText(self, data, parent=None): + data = data + if parent != self: + _base.TreeBuilder.insertText(self, data, parent) + else: + # HACK: allow text nodes as children of the document node + if hasattr(self.dom, '_child_node_types'): + if not Node.TEXT_NODE in self.dom._child_node_types: + self.dom._child_node_types = list(self.dom._child_node_types) + self.dom._child_node_types.append(Node.TEXT_NODE) + self.dom.appendChild(self.dom.createTextNode(data)) + + implementation = DomImplementation + name = None + + def testSerializer(element): + element.normalize() + rv = [] + + def serializeElement(element, indent=0): + if element.nodeType == Node.DOCUMENT_TYPE_NODE: + if element.name: + if element.publicId or element.systemId: + publicId = element.publicId or "" + systemId = element.systemId or "" + rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % + (' ' * indent, element.name, publicId, systemId)) + else: + rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name)) + else: + rv.append("|%s<!DOCTYPE >" % (' ' * indent,)) + elif element.nodeType == Node.DOCUMENT_NODE: + rv.append("#document") + elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: + rv.append("#document-fragment") + elif element.nodeType == Node.COMMENT_NODE: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue)) + elif element.nodeType == Node.TEXT_NODE: + rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) + else: + if (hasattr(element, "namespaceURI") and + element.namespaceURI is not None): + name = "%s %s" % (constants.prefixes[element.namespaceURI], + element.nodeName) + else: + name = element.nodeName + rv.append("|%s<%s>" % (' ' * indent, name)) + if element.hasAttributes(): + attributes = [] + for i in range(len(element.attributes)): + attr = element.attributes.item(i) + name = attr.nodeName + value = attr.value + ns = attr.namespaceURI + if ns: + name = "%s %s" % (constants.prefixes[ns], attr.localName) + else: + name = attr.nodeName + attributes.append((name, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + indent += 2 + for child in element.childNodes: + serializeElement(child, indent) + serializeElement(element, 0) + + return "\n".join(rv) + + def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}): + if node.nodeType == Node.ELEMENT_NODE: + if not nsmap: + handler.startElement(node.nodeName, node.attributes) + for child in node.childNodes: + dom2sax(child, handler, nsmap) + handler.endElement(node.nodeName) + else: + attributes = dict(node.attributes.itemsNS()) + + # gather namespace declarations + prefixes = [] + for attrname in list(node.attributes.keys()): + attr = node.getAttributeNode(attrname) + if (attr.namespaceURI == XMLNS_NAMESPACE or + (attr.namespaceURI is None and attr.nodeName.startswith('xmlns'))): + prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None) + handler.startPrefixMapping(prefix, attr.nodeValue) + prefixes.append(prefix) + nsmap = nsmap.copy() + nsmap[prefix] = attr.nodeValue + del attributes[(attr.namespaceURI, attr.nodeName)] + + # apply namespace declarations + for attrname in list(node.attributes.keys()): + attr = node.getAttributeNode(attrname) + if attr.namespaceURI is None and ':' in attr.nodeName: + prefix = attr.nodeName.split(':')[0] + if prefix in nsmap: + del attributes[(attr.namespaceURI, attr.nodeName)] + attributes[(nsmap[prefix], attr.nodeName)] = attr.nodeValue + + # SAX events + ns = node.namespaceURI or nsmap.get(None, None) + handler.startElementNS((ns, node.nodeName), node.nodeName, attributes) + for child in node.childNodes: + dom2sax(child, handler, nsmap) + handler.endElementNS((ns, node.nodeName), node.nodeName) + for prefix in prefixes: + handler.endPrefixMapping(prefix) + + elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]: + handler.characters(node.nodeValue) + + elif node.nodeType == Node.DOCUMENT_NODE: + handler.startDocument() + for child in node.childNodes: + dom2sax(child, handler, nsmap) + handler.endDocument() + + elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: + for child in node.childNodes: + dom2sax(child, handler, nsmap) + + else: + # ATTRIBUTE_NODE + # ENTITY_NODE + # PROCESSING_INSTRUCTION_NODE + # COMMENT_NODE + # DOCUMENT_TYPE_NODE + # NOTATION_NODE + pass + + return locals() + + +# The actual means to get a module! +getDomModule = moduleFactoryFactory(getDomBuilder) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py new file mode 100644 index 0000000..48fead7 --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py @@ -0,0 +1,337 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +import re + +from . import _base +from .. import ihatexml +from .. import constants +from ..constants import namespaces +from ..utils import moduleFactoryFactory + +tag_regexp = re.compile("{([^}]*)}(.*)") + + +def getETreeBuilder(ElementTreeImplementation, fullTree=False): + ElementTree = ElementTreeImplementation + ElementTreeCommentType = ElementTree.Comment("asd").tag + + class Element(_base.Node): + def __init__(self, name, namespace=None): + self._name = name + self._namespace = namespace + self._element = ElementTree.Element(self._getETreeTag(name, + namespace)) + if namespace is None: + self.nameTuple = namespaces["html"], self._name + else: + self.nameTuple = self._namespace, self._name + self.parent = None + self._childNodes = [] + self._flags = [] + + def _getETreeTag(self, name, namespace): + if namespace is None: + etree_tag = name + else: + etree_tag = "{%s}%s" % (namespace, name) + return etree_tag + + def _setName(self, name): + self._name = name + self._element.tag = self._getETreeTag(self._name, self._namespace) + + def _getName(self): + return self._name + + name = property(_getName, _setName) + + def _setNamespace(self, namespace): + self._namespace = namespace + self._element.tag = self._getETreeTag(self._name, self._namespace) + + def _getNamespace(self): + return self._namespace + + namespace = property(_getNamespace, _setNamespace) + + def _getAttributes(self): + return self._element.attrib + + def _setAttributes(self, attributes): + # Delete existing attributes first + # XXX - there may be a better way to do this... + for key in list(self._element.attrib.keys()): + del self._element.attrib[key] + for key, value in attributes.items(): + if isinstance(key, tuple): + name = "{%s}%s" % (key[2], key[1]) + else: + name = key + self._element.set(name, value) + + attributes = property(_getAttributes, _setAttributes) + + def _getChildNodes(self): + return self._childNodes + + def _setChildNodes(self, value): + del self._element[:] + self._childNodes = [] + for element in value: + self.insertChild(element) + + childNodes = property(_getChildNodes, _setChildNodes) + + def hasContent(self): + """Return true if the node has children or text""" + return bool(self._element.text or len(self._element)) + + def appendChild(self, node): + self._childNodes.append(node) + self._element.append(node._element) + node.parent = self + + def insertBefore(self, node, refNode): + index = list(self._element).index(refNode._element) + self._element.insert(index, node._element) + node.parent = self + + def removeChild(self, node): + self._element.remove(node._element) + node.parent = None + + def insertText(self, data, insertBefore=None): + if not(len(self._element)): + if not self._element.text: + self._element.text = "" + self._element.text += data + elif insertBefore is None: + # Insert the text as the tail of the last child element + if not self._element[-1].tail: + self._element[-1].tail = "" + self._element[-1].tail += data + else: + # Insert the text before the specified node + children = list(self._element) + index = children.index(insertBefore._element) + if index > 0: + if not self._element[index - 1].tail: + self._element[index - 1].tail = "" + self._element[index - 1].tail += data + else: + if not self._element.text: + self._element.text = "" + self._element.text += data + + def cloneNode(self): + element = type(self)(self.name, self.namespace) + for name, value in self.attributes.items(): + element.attributes[name] = value + return element + + def reparentChildren(self, newParent): + if newParent.childNodes: + newParent.childNodes[-1]._element.tail += self._element.text + else: + if not newParent._element.text: + newParent._element.text = "" + if self._element.text is not None: + newParent._element.text += self._element.text + self._element.text = "" + _base.Node.reparentChildren(self, newParent) + + class Comment(Element): + def __init__(self, data): + # Use the superclass constructor to set all properties on the + # wrapper element + self._element = ElementTree.Comment(data) + self.parent = None + self._childNodes = [] + self._flags = [] + + def _getData(self): + return self._element.text + + def _setData(self, value): + self._element.text = value + + data = property(_getData, _setData) + + class DocumentType(Element): + def __init__(self, name, publicId, systemId): + Element.__init__(self, "<!DOCTYPE>") + self._element.text = name + self.publicId = publicId + self.systemId = systemId + + def _getPublicId(self): + return self._element.get("publicId", "") + + def _setPublicId(self, value): + if value is not None: + self._element.set("publicId", value) + + publicId = property(_getPublicId, _setPublicId) + + def _getSystemId(self): + return self._element.get("systemId", "") + + def _setSystemId(self, value): + if value is not None: + self._element.set("systemId", value) + + systemId = property(_getSystemId, _setSystemId) + + class Document(Element): + def __init__(self): + Element.__init__(self, "DOCUMENT_ROOT") + + class DocumentFragment(Element): + def __init__(self): + Element.__init__(self, "DOCUMENT_FRAGMENT") + + def testSerializer(element): + rv = [] + + def serializeElement(element, indent=0): + if not(hasattr(element, "tag")): + element = element.getroot() + if element.tag == "<!DOCTYPE>": + if element.get("publicId") or element.get("systemId"): + publicId = element.get("publicId") or "" + systemId = element.get("systemId") or "" + rv.append("""<!DOCTYPE %s "%s" "%s">""" % + (element.text, publicId, systemId)) + else: + rv.append("<!DOCTYPE %s>" % (element.text,)) + elif element.tag == "DOCUMENT_ROOT": + rv.append("#document") + if element.text is not None: + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + if element.tail is not None: + raise TypeError("Document node cannot have tail") + if hasattr(element, "attrib") and len(element.attrib): + raise TypeError("Document node cannot have attributes") + elif element.tag == ElementTreeCommentType: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) + else: + assert isinstance(element.tag, text_type), \ + "Expected unicode, got %s, %s" % (type(element.tag), element.tag) + nsmatch = tag_regexp.match(element.tag) + + if nsmatch is None: + name = element.tag + else: + ns, name = nsmatch.groups() + prefix = constants.prefixes[ns] + name = "%s %s" % (prefix, name) + rv.append("|%s<%s>" % (' ' * indent, name)) + + if hasattr(element, "attrib"): + attributes = [] + for name, value in element.attrib.items(): + nsmatch = tag_regexp.match(name) + if nsmatch is not None: + ns, name = nsmatch.groups() + prefix = constants.prefixes[ns] + attr_string = "%s %s" % (prefix, name) + else: + attr_string = name + attributes.append((attr_string, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + if element.text: + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + indent += 2 + for child in element: + serializeElement(child, indent) + if element.tail: + rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + serializeElement(element, 0) + + return "\n".join(rv) + + def tostring(element): + """Serialize an element and its child nodes to a string""" + rv = [] + filter = ihatexml.InfosetFilter() + + def serializeElement(element): + if isinstance(element, ElementTree.ElementTree): + element = element.getroot() + + if element.tag == "<!DOCTYPE>": + if element.get("publicId") or element.get("systemId"): + publicId = element.get("publicId") or "" + systemId = element.get("systemId") or "" + rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % + (element.text, publicId, systemId)) + else: + rv.append("<!DOCTYPE %s>" % (element.text,)) + elif element.tag == "DOCUMENT_ROOT": + if element.text is not None: + rv.append(element.text) + if element.tail is not None: + raise TypeError("Document node cannot have tail") + if hasattr(element, "attrib") and len(element.attrib): + raise TypeError("Document node cannot have attributes") + + for child in element: + serializeElement(child) + + elif element.tag == ElementTreeCommentType: + rv.append("<!--%s-->" % (element.text,)) + else: + # This is assumed to be an ordinary element + if not element.attrib: + rv.append("<%s>" % (filter.fromXmlName(element.tag),)) + else: + attr = " ".join(["%s=\"%s\"" % ( + filter.fromXmlName(name), value) + for name, value in element.attrib.items()]) + rv.append("<%s %s>" % (element.tag, attr)) + if element.text: + rv.append(element.text) + + for child in element: + serializeElement(child) + + rv.append("</%s>" % (element.tag,)) + + if element.tail: + rv.append(element.tail) + + serializeElement(element) + + return "".join(rv) + + class TreeBuilder(_base.TreeBuilder): + documentClass = Document + doctypeClass = DocumentType + elementClass = Element + commentClass = Comment + fragmentClass = DocumentFragment + implementation = ElementTreeImplementation + + def testSerializer(self, element): + return testSerializer(element) + + def getDocument(self): + if fullTree: + return self.document._element + else: + if self.defaultNamespace is not None: + return self.document._element.find( + "{%s}html" % self.defaultNamespace) + else: + return self.document._element.find("html") + + def getFragment(self): + return _base.TreeBuilder.getFragment(self)._element + + return locals() + + +getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py new file mode 100644 index 0000000..35d08ef --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py @@ -0,0 +1,369 @@ +"""Module for supporting the lxml.etree library. The idea here is to use as much +of the native library as possible, without using fragile hacks like custom element +names that break between releases. The downside of this is that we cannot represent +all possible trees; specifically the following are known to cause problems: + +Text or comments as siblings of the root element +Docypes with no name + +When any of these things occur, we emit a DataLossWarning +""" + +from __future__ import absolute_import, division, unicode_literals + +import warnings +import re +import sys + +from . import _base +from ..constants import DataLossWarning +from .. import constants +from . import etree as etree_builders +from .. import ihatexml + +import lxml.etree as etree + + +fullTree = True +tag_regexp = re.compile("{([^}]*)}(.*)") + +comment_type = etree.Comment("asd").tag + + +class DocumentType(object): + def __init__(self, name, publicId, systemId): + self.name = name + self.publicId = publicId + self.systemId = systemId + + +class Document(object): + def __init__(self): + self._elementTree = None + self._childNodes = [] + + def appendChild(self, element): + self._elementTree.getroot().addnext(element._element) + + def _getChildNodes(self): + return self._childNodes + + childNodes = property(_getChildNodes) + + +def testSerializer(element): + rv = [] + finalText = None + infosetFilter = ihatexml.InfosetFilter() + + def serializeElement(element, indent=0): + if not hasattr(element, "tag"): + if hasattr(element, "getroot"): + # Full tree case + rv.append("#document") + if element.docinfo.internalDTD: + if not (element.docinfo.public_id or + element.docinfo.system_url): + dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name + else: + dtd_str = """<!DOCTYPE %s "%s" "%s">""" % ( + element.docinfo.root_name, + element.docinfo.public_id, + element.docinfo.system_url) + rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) + next_element = element.getroot() + while next_element.getprevious() is not None: + next_element = next_element.getprevious() + while next_element is not None: + serializeElement(next_element, indent + 2) + next_element = next_element.getnext() + elif isinstance(element, str) or isinstance(element, bytes): + # Text in a fragment + assert isinstance(element, str) or sys.version_info.major == 2 + rv.append("|%s\"%s\"" % (' ' * indent, element)) + else: + # Fragment case + rv.append("#document-fragment") + for next_element in element: + serializeElement(next_element, indent + 2) + elif element.tag == comment_type: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) + if hasattr(element, "tail") and element.tail: + rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) + else: + assert isinstance(element, etree._Element) + nsmatch = etree_builders.tag_regexp.match(element.tag) + if nsmatch is not None: + ns = nsmatch.group(1) + tag = nsmatch.group(2) + prefix = constants.prefixes[ns] + rv.append("|%s<%s %s>" % (' ' * indent, prefix, + infosetFilter.fromXmlName(tag))) + else: + rv.append("|%s<%s>" % (' ' * indent, + infosetFilter.fromXmlName(element.tag))) + + if hasattr(element, "attrib"): + attributes = [] + for name, value in element.attrib.items(): + nsmatch = tag_regexp.match(name) + if nsmatch is not None: + ns, name = nsmatch.groups() + name = infosetFilter.fromXmlName(name) + prefix = constants.prefixes[ns] + attr_string = "%s %s" % (prefix, name) + else: + attr_string = infosetFilter.fromXmlName(name) + attributes.append((attr_string, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + + if element.text: + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + indent += 2 + for child in element: + serializeElement(child, indent) + if hasattr(element, "tail") and element.tail: + rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + serializeElement(element, 0) + + if finalText is not None: + rv.append("|%s\"%s\"" % (' ' * 2, finalText)) + + return "\n".join(rv) + + +def tostring(element): + """Serialize an element and its child nodes to a string""" + rv = [] + finalText = None + + def serializeElement(element): + if not hasattr(element, "tag"): + if element.docinfo.internalDTD: + if element.docinfo.doctype: + dtd_str = element.docinfo.doctype + else: + dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name + rv.append(dtd_str) + serializeElement(element.getroot()) + + elif element.tag == comment_type: + rv.append("<!--%s-->" % (element.text,)) + + else: + # This is assumed to be an ordinary element + if not element.attrib: + rv.append("<%s>" % (element.tag,)) + else: + attr = " ".join(["%s=\"%s\"" % (name, value) + for name, value in element.attrib.items()]) + rv.append("<%s %s>" % (element.tag, attr)) + if element.text: + rv.append(element.text) + + for child in element: + serializeElement(child) + + rv.append("</%s>" % (element.tag,)) + + if hasattr(element, "tail") and element.tail: + rv.append(element.tail) + + serializeElement(element) + + if finalText is not None: + rv.append("%s\"" % (' ' * 2, finalText)) + + return "".join(rv) + + +class TreeBuilder(_base.TreeBuilder): + documentClass = Document + doctypeClass = DocumentType + elementClass = None + commentClass = None + fragmentClass = Document + implementation = etree + + def __init__(self, namespaceHTMLElements, fullTree=False): + builder = etree_builders.getETreeModule(etree, fullTree=fullTree) + infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() + self.namespaceHTMLElements = namespaceHTMLElements + + class Attributes(dict): + def __init__(self, element, value={}): + self._element = element + dict.__init__(self, value) + for key, value in self.items(): + if isinstance(key, tuple): + name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) + else: + name = infosetFilter.coerceAttribute(key) + self._element._element.attrib[name] = value + + def __setitem__(self, key, value): + dict.__setitem__(self, key, value) + if isinstance(key, tuple): + name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) + else: + name = infosetFilter.coerceAttribute(key) + self._element._element.attrib[name] = value + + class Element(builder.Element): + def __init__(self, name, namespace): + name = infosetFilter.coerceElement(name) + builder.Element.__init__(self, name, namespace=namespace) + self._attributes = Attributes(self) + + def _setName(self, name): + self._name = infosetFilter.coerceElement(name) + self._element.tag = self._getETreeTag( + self._name, self._namespace) + + def _getName(self): + return infosetFilter.fromXmlName(self._name) + + name = property(_getName, _setName) + + def _getAttributes(self): + return self._attributes + + def _setAttributes(self, attributes): + self._attributes = Attributes(self, attributes) + + attributes = property(_getAttributes, _setAttributes) + + def insertText(self, data, insertBefore=None): + data = infosetFilter.coerceCharacters(data) + builder.Element.insertText(self, data, insertBefore) + + def appendChild(self, child): + builder.Element.appendChild(self, child) + + class Comment(builder.Comment): + def __init__(self, data): + data = infosetFilter.coerceComment(data) + builder.Comment.__init__(self, data) + + def _setData(self, data): + data = infosetFilter.coerceComment(data) + self._element.text = data + + def _getData(self): + return self._element.text + + data = property(_getData, _setData) + + self.elementClass = Element + self.commentClass = builder.Comment + # self.fragmentClass = builder.DocumentFragment + _base.TreeBuilder.__init__(self, namespaceHTMLElements) + + def reset(self): + _base.TreeBuilder.reset(self) + self.insertComment = self.insertCommentInitial + self.initial_comments = [] + self.doctype = None + + def testSerializer(self, element): + return testSerializer(element) + + def getDocument(self): + if fullTree: + return self.document._elementTree + else: + return self.document._elementTree.getroot() + + def getFragment(self): + fragment = [] + element = self.openElements[0]._element + if element.text: + fragment.append(element.text) + fragment.extend(list(element)) + if element.tail: + fragment.append(element.tail) + return fragment + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + if not name: + warnings.warn("lxml cannot represent empty doctype", DataLossWarning) + self.doctype = None + else: + coercedName = self.infosetFilter.coerceElement(name) + if coercedName != name: + warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) + + doctype = self.doctypeClass(coercedName, publicId, systemId) + self.doctype = doctype + + def insertCommentInitial(self, data, parent=None): + self.initial_comments.append(data) + + def insertCommentMain(self, data, parent=None): + if (parent == self.document and + self.document._elementTree.getroot()[-1].tag == comment_type): + warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) + super(TreeBuilder, self).insertComment(data, parent) + + def insertRoot(self, token): + """Create the document root""" + # Because of the way libxml2 works, it doesn't seem to be possible to + # alter information like the doctype after the tree has been parsed. + # Therefore we need to use the built-in parser to create our iniial + # tree, after which we can add elements like normal + docStr = "" + if self.doctype: + assert self.doctype.name + docStr += "<!DOCTYPE %s" % self.doctype.name + if (self.doctype.publicId is not None or + self.doctype.systemId is not None): + docStr += (' PUBLIC "%s" ' % + (self.infosetFilter.coercePubid(self.doctype.publicId or ""))) + if self.doctype.systemId: + sysid = self.doctype.systemId + if sysid.find("'") >= 0 and sysid.find('"') >= 0: + warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) + sysid = sysid.replace("'", 'U00027') + if sysid.find("'") >= 0: + docStr += '"%s"' % sysid + else: + docStr += "'%s'" % sysid + else: + docStr += "''" + docStr += ">" + if self.doctype.name != token["name"]: + warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) + docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>" + root = etree.fromstring(docStr) + + # Append the initial comments: + for comment_token in self.initial_comments: + root.addprevious(etree.Comment(comment_token["data"])) + + # Create the root document and add the ElementTree to it + self.document = self.documentClass() + self.document._elementTree = root.getroottree() + + # Give the root element the right name + name = token["name"] + namespace = token.get("namespace", self.defaultNamespace) + if namespace is None: + etree_tag = name + else: + etree_tag = "{%s}%s" % (namespace, name) + root.tag = etree_tag + + # Add the root element to the internal child/open data structures + root_element = self.elementClass(name, namespace) + root_element._element = root + self.document._childNodes.append(root_element) + self.openElements.append(root_element) + + # Reset to the default insert comment function + self.insertComment = self.insertCommentMain |