diff options
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders')
5 files changed, 0 insertions, 1449 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py deleted file mode 100644 index 6a6b2a4..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -"""A collection of modules for building different kinds of tree from -HTML documents. - -To create a treebuilder for a new type of tree, you need to do -implement several things: - -1) A set of classes for various types of elements: Document, Doctype, -Comment, Element. These must implement the interface of -_base.treebuilders.Node (although comment nodes have a different -signature for their constructor, see treebuilders.etree.Comment) -Textual content may also be implemented as another node type, or not, as -your tree implementation requires. - -2) A treebuilder object (called TreeBuilder by convention) that -inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: -documentClass - the class to use for the bottommost node of a document -elementClass - the class to use for HTML Elements -commentClass - the class to use for comments -doctypeClass - the class to use for doctypes -It also has one required method: -getDocument - Returns the root node of the complete document tree - -3) If you wish to run the unit tests, you must also create a -testSerializer method on your treebuilder which accepts a node and -returns a string containing Node and its children serialized according -to the format used in the unittests -""" - -from __future__ import absolute_import, division, unicode_literals - -from ..utils import default_etree - -treeBuilderCache = {} - - -def getTreeBuilder(treeType, implementation=None, **kwargs): - """Get a TreeBuilder class for various types of tree with built-in support - - treeType - the name of the tree type required (case-insensitive). Supported - values are: - - "dom" - A generic builder for DOM implementations, defaulting to - a xml.dom.minidom based implementation. - "etree" - A generic builder for tree implementations exposing an - ElementTree-like interface, defaulting to - xml.etree.cElementTree if available and - xml.etree.ElementTree if not. - "lxml" - A etree-based builder for lxml.etree, handling - limitations of lxml's implementation. - - implementation - (Currently applies to the "etree" and "dom" tree types). A - module implementing the tree type e.g. - xml.etree.ElementTree or xml.etree.cElementTree.""" - - treeType = treeType.lower() - if treeType not in treeBuilderCache: - if treeType == "dom": - from . import dom - # Come up with a sane default (pref. from the stdlib) - if implementation is None: - from xml.dom import minidom - implementation = minidom - # NEVER cache here, caching is done in the dom submodule - return dom.getDomModule(implementation, **kwargs).TreeBuilder - elif treeType == "lxml": - from . import etree_lxml - treeBuilderCache[treeType] = etree_lxml.TreeBuilder - elif treeType == "etree": - from . import etree - if implementation is None: - implementation = default_etree - # NEVER cache here, caching is done in the etree submodule - return etree.getETreeModule(implementation, **kwargs).TreeBuilder - else: - raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) - return treeBuilderCache.get(treeType) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py deleted file mode 100644 index 970c9ad..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py +++ /dev/null @@ -1,377 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type - -from ..constants import scopingElements, tableInsertModeElements, namespaces - -# The scope markers are inserted when entering object elements, -# marquees, table cells, and table captions, and are used to prevent formatting -# from "leaking" into tables, object elements, and marquees. -Marker = None - -listElementsMap = { - None: (frozenset(scopingElements), False), - "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), - "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), - (namespaces["html"], "ul")])), False), - "table": (frozenset([(namespaces["html"], "html"), - (namespaces["html"], "table")]), False), - "select": (frozenset([(namespaces["html"], "optgroup"), - (namespaces["html"], "option")]), True) -} - - -class Node(object): - def __init__(self, name): - """Node representing an item in the tree. - name - The tag name associated with the node - parent - The parent of the current node (or None for the document node) - value - The value of the current node (applies to text nodes and - comments - attributes - a dict holding name, value pairs for attributes of the node - childNodes - a list of child nodes of the current node. This must - include all elements but not necessarily other node types - _flags - A list of miscellaneous flags that can be set on the node - """ - self.name = name - self.parent = None - self.value = None - self.attributes = {} - self.childNodes = [] - self._flags = [] - - def __str__(self): - attributesStr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in - self.attributes.items()]) - if attributesStr: - return "<%s %s>" % (self.name, attributesStr) - else: - return "<%s>" % (self.name) - - def __repr__(self): - return "<%s>" % (self.name) - - def appendChild(self, node): - """Insert node as a child of the current node - """ - raise NotImplementedError - - def insertText(self, data, insertBefore=None): - """Insert data as text in the current node, positioned before the - start of node insertBefore or to the end of the node's text. - """ - raise NotImplementedError - - def insertBefore(self, node, refNode): - """Insert node as a child of the current node, before refNode in the - list of child nodes. Raises ValueError if refNode is not a child of - the current node""" - raise NotImplementedError - - def removeChild(self, node): - """Remove node from the children of the current node - """ - raise NotImplementedError - - def reparentChildren(self, newParent): - """Move all the children of the current node to newParent. - This is needed so that trees that don't store text as nodes move the - text in the correct way - """ - # XXX - should this method be made more general? - for child in self.childNodes: - newParent.appendChild(child) - self.childNodes = [] - - def cloneNode(self): - """Return a shallow copy of the current node i.e. a node with the same - name and attributes but with no parent or child nodes - """ - raise NotImplementedError - - def hasContent(self): - """Return true if the node has children or text, false otherwise - """ - raise NotImplementedError - - -class ActiveFormattingElements(list): - def append(self, node): - equalCount = 0 - if node != Marker: - for element in self[::-1]: - if element == Marker: - break - if self.nodesEqual(element, node): - equalCount += 1 - if equalCount == 3: - self.remove(element) - break - list.append(self, node) - - def nodesEqual(self, node1, node2): - if not node1.nameTuple == node2.nameTuple: - return False - - if not node1.attributes == node2.attributes: - return False - - return True - - -class TreeBuilder(object): - """Base treebuilder implementation - documentClass - the class to use for the bottommost node of a document - elementClass - the class to use for HTML Elements - commentClass - the class to use for comments - doctypeClass - the class to use for doctypes - """ - - # Document class - documentClass = None - - # The class to use for creating a node - elementClass = None - - # The class to use for creating comments - commentClass = None - - # The class to use for creating doctypes - doctypeClass = None - - # Fragment class - fragmentClass = None - - def __init__(self, namespaceHTMLElements): - if namespaceHTMLElements: - self.defaultNamespace = "http://www.w3.org/1999/xhtml" - else: - self.defaultNamespace = None - self.reset() - - def reset(self): - self.openElements = [] - self.activeFormattingElements = ActiveFormattingElements() - - # XXX - rename these to headElement, formElement - self.headPointer = None - self.formPointer = None - - self.insertFromTable = False - - self.document = self.documentClass() - - def elementInScope(self, target, variant=None): - - # If we pass a node in we match that. if we pass a string - # match any node with that name - exactNode = hasattr(target, "nameTuple") - - listElements, invert = listElementsMap[variant] - - for node in reversed(self.openElements): - if (node.name == target and not exactNode or - node == target and exactNode): - return True - elif (invert ^ (node.nameTuple in listElements)): - return False - - assert False # We should never reach this point - - def reconstructActiveFormattingElements(self): - # Within this algorithm the order of steps described in the - # specification is not quite the same as the order of steps in the - # code. It should still do the same though. - - # Step 1: stop the algorithm when there's nothing to do. - if not self.activeFormattingElements: - return - - # Step 2 and step 3: we start with the last element. So i is -1. - i = len(self.activeFormattingElements) - 1 - entry = self.activeFormattingElements[i] - if entry == Marker or entry in self.openElements: - return - - # Step 6 - while entry != Marker and entry not in self.openElements: - if i == 0: - # This will be reset to 0 below - i = -1 - break - i -= 1 - # Step 5: let entry be one earlier in the list. - entry = self.activeFormattingElements[i] - - while True: - # Step 7 - i += 1 - - # Step 8 - entry = self.activeFormattingElements[i] - clone = entry.cloneNode() # Mainly to get a new copy of the attributes - - # Step 9 - element = self.insertElement({"type": "StartTag", - "name": clone.name, - "namespace": clone.namespace, - "data": clone.attributes}) - - # Step 10 - self.activeFormattingElements[i] = element - - # Step 11 - if element == self.activeFormattingElements[-1]: - break - - def clearActiveFormattingElements(self): - entry = self.activeFormattingElements.pop() - while self.activeFormattingElements and entry != Marker: - entry = self.activeFormattingElements.pop() - - def elementInActiveFormattingElements(self, name): - """Check if an element exists between the end of the active - formatting elements and the last marker. If it does, return it, else - return false""" - - for item in self.activeFormattingElements[::-1]: - # Check for Marker first because if it's a Marker it doesn't have a - # name attribute. - if item == Marker: - break - elif item.name == name: - return item - return False - - def insertRoot(self, token): - element = self.createElement(token) - self.openElements.append(element) - self.document.appendChild(element) - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - doctype = self.doctypeClass(name, publicId, systemId) - self.document.appendChild(doctype) - - def insertComment(self, token, parent=None): - if parent is None: - parent = self.openElements[-1] - parent.appendChild(self.commentClass(token["data"])) - - def createElement(self, token): - """Create an element but don't insert it anywhere""" - name = token["name"] - namespace = token.get("namespace", self.defaultNamespace) - element = self.elementClass(name, namespace) - element.attributes = token["data"] - return element - - def _getInsertFromTable(self): - return self._insertFromTable - - def _setInsertFromTable(self, value): - """Switch the function used to insert an element from the - normal one to the misnested table one and back again""" - self._insertFromTable = value - if value: - self.insertElement = self.insertElementTable - else: - self.insertElement = self.insertElementNormal - - insertFromTable = property(_getInsertFromTable, _setInsertFromTable) - - def insertElementNormal(self, token): - name = token["name"] - assert isinstance(name, text_type), "Element %s not unicode" % name - namespace = token.get("namespace", self.defaultNamespace) - element = self.elementClass(name, namespace) - element.attributes = token["data"] - self.openElements[-1].appendChild(element) - self.openElements.append(element) - return element - - def insertElementTable(self, token): - """Create an element and insert it into the tree""" - element = self.createElement(token) - if self.openElements[-1].name not in tableInsertModeElements: - return self.insertElementNormal(token) - else: - # We should be in the InTable mode. This means we want to do - # special magic element rearranging - parent, insertBefore = self.getTableMisnestedNodePosition() - if insertBefore is None: - parent.appendChild(element) - else: - parent.insertBefore(element, insertBefore) - self.openElements.append(element) - return element - - def insertText(self, data, parent=None): - """Insert text data.""" - if parent is None: - parent = self.openElements[-1] - - if (not self.insertFromTable or (self.insertFromTable and - self.openElements[-1].name - not in tableInsertModeElements)): - parent.insertText(data) - else: - # We should be in the InTable mode. This means we want to do - # special magic element rearranging - parent, insertBefore = self.getTableMisnestedNodePosition() - parent.insertText(data, insertBefore) - - def getTableMisnestedNodePosition(self): - """Get the foster parent element, and sibling to insert before - (or None) when inserting a misnested table node""" - # The foster parent element is the one which comes before the most - # recently opened table element - # XXX - this is really inelegant - lastTable = None - fosterParent = None - insertBefore = None - for elm in self.openElements[::-1]: - if elm.name == "table": - lastTable = elm - break - if lastTable: - # XXX - we should really check that this parent is actually a - # node here - if lastTable.parent: - fosterParent = lastTable.parent - insertBefore = lastTable - else: - fosterParent = self.openElements[ - self.openElements.index(lastTable) - 1] - else: - fosterParent = self.openElements[0] - return fosterParent, insertBefore - - def generateImpliedEndTags(self, exclude=None): - name = self.openElements[-1].name - # XXX td, th and tr are not actually needed - if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) - and name != exclude): - self.openElements.pop() - # XXX This is not entirely what the specification says. We should - # investigate it more closely. - self.generateImpliedEndTags(exclude) - - def getDocument(self): - "Return the final tree" - return self.document - - def getFragment(self): - "Return the final fragment" - # assert self.innerHTML - fragment = self.fragmentClass() - self.openElements[0].reparentChildren(fragment) - return fragment - - def testSerializer(self, node): - """Serialize the subtree of node in the format required by unit tests - node - the node from which to start serializing""" - raise NotImplementedError diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py deleted file mode 100644 index f9e0d76..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py +++ /dev/null @@ -1,290 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - - -from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE -import weakref - -from . import _base -from .. import constants -from ..constants import namespaces -from ..utils import moduleFactoryFactory - - -def getDomBuilder(DomImplementation): - Dom = DomImplementation - - class AttrList(object): - def __init__(self, element): - self.element = element - - def __iter__(self): - return list(self.element.attributes.items()).__iter__() - - def __setitem__(self, name, value): - self.element.setAttribute(name, value) - - def __len__(self): - return len(list(self.element.attributes.items())) - - def items(self): - return [(item[0], item[1]) for item in - list(self.element.attributes.items())] - - def keys(self): - return list(self.element.attributes.keys()) - - def __getitem__(self, name): - return self.element.getAttribute(name) - - def __contains__(self, name): - if isinstance(name, tuple): - raise NotImplementedError - else: - return self.element.hasAttribute(name) - - class NodeBuilder(_base.Node): - def __init__(self, element): - _base.Node.__init__(self, element.nodeName) - self.element = element - - namespace = property(lambda self: hasattr(self.element, "namespaceURI") - and self.element.namespaceURI or None) - - def appendChild(self, node): - node.parent = self - self.element.appendChild(node.element) - - def insertText(self, data, insertBefore=None): - text = self.element.ownerDocument.createTextNode(data) - if insertBefore: - self.element.insertBefore(text, insertBefore.element) - else: - self.element.appendChild(text) - - def insertBefore(self, node, refNode): - self.element.insertBefore(node.element, refNode.element) - node.parent = self - - def removeChild(self, node): - if node.element.parentNode == self.element: - self.element.removeChild(node.element) - node.parent = None - - def reparentChildren(self, newParent): - while self.element.hasChildNodes(): - child = self.element.firstChild - self.element.removeChild(child) - newParent.element.appendChild(child) - self.childNodes = [] - - def getAttributes(self): - return AttrList(self.element) - - def setAttributes(self, attributes): - if attributes: - for name, value in list(attributes.items()): - if isinstance(name, tuple): - if name[0] is not None: - qualifiedName = (name[0] + ":" + name[1]) - else: - qualifiedName = name[1] - self.element.setAttributeNS(name[2], qualifiedName, - value) - else: - self.element.setAttribute( - name, value) - attributes = property(getAttributes, setAttributes) - - def cloneNode(self): - return NodeBuilder(self.element.cloneNode(False)) - - def hasContent(self): - return self.element.hasChildNodes() - - def getNameTuple(self): - if self.namespace is None: - return namespaces["html"], self.name - else: - return self.namespace, self.name - - nameTuple = property(getNameTuple) - - class TreeBuilder(_base.TreeBuilder): - def documentClass(self): - self.dom = Dom.getDOMImplementation().createDocument(None, None, None) - return weakref.proxy(self) - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - domimpl = Dom.getDOMImplementation() - doctype = domimpl.createDocumentType(name, publicId, systemId) - self.document.appendChild(NodeBuilder(doctype)) - if Dom == minidom: - doctype.ownerDocument = self.dom - - def elementClass(self, name, namespace=None): - if namespace is None and self.defaultNamespace is None: - node = self.dom.createElement(name) - else: - node = self.dom.createElementNS(namespace, name) - - return NodeBuilder(node) - - def commentClass(self, data): - return NodeBuilder(self.dom.createComment(data)) - - def fragmentClass(self): - return NodeBuilder(self.dom.createDocumentFragment()) - - def appendChild(self, node): - self.dom.appendChild(node.element) - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - return self.dom - - def getFragment(self): - return _base.TreeBuilder.getFragment(self).element - - def insertText(self, data, parent=None): - data = data - if parent != self: - _base.TreeBuilder.insertText(self, data, parent) - else: - # HACK: allow text nodes as children of the document node - if hasattr(self.dom, '_child_node_types'): - if not Node.TEXT_NODE in self.dom._child_node_types: - self.dom._child_node_types = list(self.dom._child_node_types) - self.dom._child_node_types.append(Node.TEXT_NODE) - self.dom.appendChild(self.dom.createTextNode(data)) - - implementation = DomImplementation - name = None - - def testSerializer(element): - element.normalize() - rv = [] - - def serializeElement(element, indent=0): - if element.nodeType == Node.DOCUMENT_TYPE_NODE: - if element.name: - if element.publicId or element.systemId: - publicId = element.publicId or "" - systemId = element.systemId or "" - rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % - (' ' * indent, element.name, publicId, systemId)) - else: - rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name)) - else: - rv.append("|%s<!DOCTYPE >" % (' ' * indent,)) - elif element.nodeType == Node.DOCUMENT_NODE: - rv.append("#document") - elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: - rv.append("#document-fragment") - elif element.nodeType == Node.COMMENT_NODE: - rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue)) - elif element.nodeType == Node.TEXT_NODE: - rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) - else: - if (hasattr(element, "namespaceURI") and - element.namespaceURI is not None): - name = "%s %s" % (constants.prefixes[element.namespaceURI], - element.nodeName) - else: - name = element.nodeName - rv.append("|%s<%s>" % (' ' * indent, name)) - if element.hasAttributes(): - attributes = [] - for i in range(len(element.attributes)): - attr = element.attributes.item(i) - name = attr.nodeName - value = attr.value - ns = attr.namespaceURI - if ns: - name = "%s %s" % (constants.prefixes[ns], attr.localName) - else: - name = attr.nodeName - attributes.append((name, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - indent += 2 - for child in element.childNodes: - serializeElement(child, indent) - serializeElement(element, 0) - - return "\n".join(rv) - - def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}): - if node.nodeType == Node.ELEMENT_NODE: - if not nsmap: - handler.startElement(node.nodeName, node.attributes) - for child in node.childNodes: - dom2sax(child, handler, nsmap) - handler.endElement(node.nodeName) - else: - attributes = dict(node.attributes.itemsNS()) - - # gather namespace declarations - prefixes = [] - for attrname in list(node.attributes.keys()): - attr = node.getAttributeNode(attrname) - if (attr.namespaceURI == XMLNS_NAMESPACE or - (attr.namespaceURI is None and attr.nodeName.startswith('xmlns'))): - prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None) - handler.startPrefixMapping(prefix, attr.nodeValue) - prefixes.append(prefix) - nsmap = nsmap.copy() - nsmap[prefix] = attr.nodeValue - del attributes[(attr.namespaceURI, attr.nodeName)] - - # apply namespace declarations - for attrname in list(node.attributes.keys()): - attr = node.getAttributeNode(attrname) - if attr.namespaceURI is None and ':' in attr.nodeName: - prefix = attr.nodeName.split(':')[0] - if prefix in nsmap: - del attributes[(attr.namespaceURI, attr.nodeName)] - attributes[(nsmap[prefix], attr.nodeName)] = attr.nodeValue - - # SAX events - ns = node.namespaceURI or nsmap.get(None, None) - handler.startElementNS((ns, node.nodeName), node.nodeName, attributes) - for child in node.childNodes: - dom2sax(child, handler, nsmap) - handler.endElementNS((ns, node.nodeName), node.nodeName) - for prefix in prefixes: - handler.endPrefixMapping(prefix) - - elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]: - handler.characters(node.nodeValue) - - elif node.nodeType == Node.DOCUMENT_NODE: - handler.startDocument() - for child in node.childNodes: - dom2sax(child, handler, nsmap) - handler.endDocument() - - elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: - for child in node.childNodes: - dom2sax(child, handler, nsmap) - - else: - # ATTRIBUTE_NODE - # ENTITY_NODE - # PROCESSING_INSTRUCTION_NODE - # COMMENT_NODE - # DOCUMENT_TYPE_NODE - # NOTATION_NODE - pass - - return locals() - - -# The actual means to get a module! -getDomModule = moduleFactoryFactory(getDomBuilder) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py deleted file mode 100644 index 48fead7..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py +++ /dev/null @@ -1,337 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type - -import re - -from . import _base -from .. import ihatexml -from .. import constants -from ..constants import namespaces -from ..utils import moduleFactoryFactory - -tag_regexp = re.compile("{([^}]*)}(.*)") - - -def getETreeBuilder(ElementTreeImplementation, fullTree=False): - ElementTree = ElementTreeImplementation - ElementTreeCommentType = ElementTree.Comment("asd").tag - - class Element(_base.Node): - def __init__(self, name, namespace=None): - self._name = name - self._namespace = namespace - self._element = ElementTree.Element(self._getETreeTag(name, - namespace)) - if namespace is None: - self.nameTuple = namespaces["html"], self._name - else: - self.nameTuple = self._namespace, self._name - self.parent = None - self._childNodes = [] - self._flags = [] - - def _getETreeTag(self, name, namespace): - if namespace is None: - etree_tag = name - else: - etree_tag = "{%s}%s" % (namespace, name) - return etree_tag - - def _setName(self, name): - self._name = name - self._element.tag = self._getETreeTag(self._name, self._namespace) - - def _getName(self): - return self._name - - name = property(_getName, _setName) - - def _setNamespace(self, namespace): - self._namespace = namespace - self._element.tag = self._getETreeTag(self._name, self._namespace) - - def _getNamespace(self): - return self._namespace - - namespace = property(_getNamespace, _setNamespace) - - def _getAttributes(self): - return self._element.attrib - - def _setAttributes(self, attributes): - # Delete existing attributes first - # XXX - there may be a better way to do this... - for key in list(self._element.attrib.keys()): - del self._element.attrib[key] - for key, value in attributes.items(): - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], key[1]) - else: - name = key - self._element.set(name, value) - - attributes = property(_getAttributes, _setAttributes) - - def _getChildNodes(self): - return self._childNodes - - def _setChildNodes(self, value): - del self._element[:] - self._childNodes = [] - for element in value: - self.insertChild(element) - - childNodes = property(_getChildNodes, _setChildNodes) - - def hasContent(self): - """Return true if the node has children or text""" - return bool(self._element.text or len(self._element)) - - def appendChild(self, node): - self._childNodes.append(node) - self._element.append(node._element) - node.parent = self - - def insertBefore(self, node, refNode): - index = list(self._element).index(refNode._element) - self._element.insert(index, node._element) - node.parent = self - - def removeChild(self, node): - self._element.remove(node._element) - node.parent = None - - def insertText(self, data, insertBefore=None): - if not(len(self._element)): - if not self._element.text: - self._element.text = "" - self._element.text += data - elif insertBefore is None: - # Insert the text as the tail of the last child element - if not self._element[-1].tail: - self._element[-1].tail = "" - self._element[-1].tail += data - else: - # Insert the text before the specified node - children = list(self._element) - index = children.index(insertBefore._element) - if index > 0: - if not self._element[index - 1].tail: - self._element[index - 1].tail = "" - self._element[index - 1].tail += data - else: - if not self._element.text: - self._element.text = "" - self._element.text += data - - def cloneNode(self): - element = type(self)(self.name, self.namespace) - for name, value in self.attributes.items(): - element.attributes[name] = value - return element - - def reparentChildren(self, newParent): - if newParent.childNodes: - newParent.childNodes[-1]._element.tail += self._element.text - else: - if not newParent._element.text: - newParent._element.text = "" - if self._element.text is not None: - newParent._element.text += self._element.text - self._element.text = "" - _base.Node.reparentChildren(self, newParent) - - class Comment(Element): - def __init__(self, data): - # Use the superclass constructor to set all properties on the - # wrapper element - self._element = ElementTree.Comment(data) - self.parent = None - self._childNodes = [] - self._flags = [] - - def _getData(self): - return self._element.text - - def _setData(self, value): - self._element.text = value - - data = property(_getData, _setData) - - class DocumentType(Element): - def __init__(self, name, publicId, systemId): - Element.__init__(self, "<!DOCTYPE>") - self._element.text = name - self.publicId = publicId - self.systemId = systemId - - def _getPublicId(self): - return self._element.get("publicId", "") - - def _setPublicId(self, value): - if value is not None: - self._element.set("publicId", value) - - publicId = property(_getPublicId, _setPublicId) - - def _getSystemId(self): - return self._element.get("systemId", "") - - def _setSystemId(self, value): - if value is not None: - self._element.set("systemId", value) - - systemId = property(_getSystemId, _setSystemId) - - class Document(Element): - def __init__(self): - Element.__init__(self, "DOCUMENT_ROOT") - - class DocumentFragment(Element): - def __init__(self): - Element.__init__(self, "DOCUMENT_FRAGMENT") - - def testSerializer(element): - rv = [] - - def serializeElement(element, indent=0): - if not(hasattr(element, "tag")): - element = element.getroot() - if element.tag == "<!DOCTYPE>": - if element.get("publicId") or element.get("systemId"): - publicId = element.get("publicId") or "" - systemId = element.get("systemId") or "" - rv.append("""<!DOCTYPE %s "%s" "%s">""" % - (element.text, publicId, systemId)) - else: - rv.append("<!DOCTYPE %s>" % (element.text,)) - elif element.tag == "DOCUMENT_ROOT": - rv.append("#document") - if element.text is not None: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) - if element.tail is not None: - raise TypeError("Document node cannot have tail") - if hasattr(element, "attrib") and len(element.attrib): - raise TypeError("Document node cannot have attributes") - elif element.tag == ElementTreeCommentType: - rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) - else: - assert isinstance(element.tag, text_type), \ - "Expected unicode, got %s, %s" % (type(element.tag), element.tag) - nsmatch = tag_regexp.match(element.tag) - - if nsmatch is None: - name = element.tag - else: - ns, name = nsmatch.groups() - prefix = constants.prefixes[ns] - name = "%s %s" % (prefix, name) - rv.append("|%s<%s>" % (' ' * indent, name)) - - if hasattr(element, "attrib"): - attributes = [] - for name, value in element.attrib.items(): - nsmatch = tag_regexp.match(name) - if nsmatch is not None: - ns, name = nsmatch.groups() - prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) - else: - attr_string = name - attributes.append((attr_string, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) - indent += 2 - for child in element: - serializeElement(child, indent) - if element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) - serializeElement(element, 0) - - return "\n".join(rv) - - def tostring(element): - """Serialize an element and its child nodes to a string""" - rv = [] - filter = ihatexml.InfosetFilter() - - def serializeElement(element): - if isinstance(element, ElementTree.ElementTree): - element = element.getroot() - - if element.tag == "<!DOCTYPE>": - if element.get("publicId") or element.get("systemId"): - publicId = element.get("publicId") or "" - systemId = element.get("systemId") or "" - rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % - (element.text, publicId, systemId)) - else: - rv.append("<!DOCTYPE %s>" % (element.text,)) - elif element.tag == "DOCUMENT_ROOT": - if element.text is not None: - rv.append(element.text) - if element.tail is not None: - raise TypeError("Document node cannot have tail") - if hasattr(element, "attrib") and len(element.attrib): - raise TypeError("Document node cannot have attributes") - - for child in element: - serializeElement(child) - - elif element.tag == ElementTreeCommentType: - rv.append("<!--%s-->" % (element.text,)) - else: - # This is assumed to be an ordinary element - if not element.attrib: - rv.append("<%s>" % (filter.fromXmlName(element.tag),)) - else: - attr = " ".join(["%s=\"%s\"" % ( - filter.fromXmlName(name), value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) - if element.text: - rv.append(element.text) - - for child in element: - serializeElement(child) - - rv.append("</%s>" % (element.tag,)) - - if element.tail: - rv.append(element.tail) - - serializeElement(element) - - return "".join(rv) - - class TreeBuilder(_base.TreeBuilder): - documentClass = Document - doctypeClass = DocumentType - elementClass = Element - commentClass = Comment - fragmentClass = DocumentFragment - implementation = ElementTreeImplementation - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - if fullTree: - return self.document._element - else: - if self.defaultNamespace is not None: - return self.document._element.find( - "{%s}html" % self.defaultNamespace) - else: - return self.document._element.find("html") - - def getFragment(self): - return _base.TreeBuilder.getFragment(self)._element - - return locals() - - -getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py deleted file mode 100644 index 35d08ef..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py +++ /dev/null @@ -1,369 +0,0 @@ -"""Module for supporting the lxml.etree library. The idea here is to use as much -of the native library as possible, without using fragile hacks like custom element -names that break between releases. The downside of this is that we cannot represent -all possible trees; specifically the following are known to cause problems: - -Text or comments as siblings of the root element -Docypes with no name - -When any of these things occur, we emit a DataLossWarning -""" - -from __future__ import absolute_import, division, unicode_literals - -import warnings -import re -import sys - -from . import _base -from ..constants import DataLossWarning -from .. import constants -from . import etree as etree_builders -from .. import ihatexml - -import lxml.etree as etree - - -fullTree = True -tag_regexp = re.compile("{([^}]*)}(.*)") - -comment_type = etree.Comment("asd").tag - - -class DocumentType(object): - def __init__(self, name, publicId, systemId): - self.name = name - self.publicId = publicId - self.systemId = systemId - - -class Document(object): - def __init__(self): - self._elementTree = None - self._childNodes = [] - - def appendChild(self, element): - self._elementTree.getroot().addnext(element._element) - - def _getChildNodes(self): - return self._childNodes - - childNodes = property(_getChildNodes) - - -def testSerializer(element): - rv = [] - finalText = None - infosetFilter = ihatexml.InfosetFilter() - - def serializeElement(element, indent=0): - if not hasattr(element, "tag"): - if hasattr(element, "getroot"): - # Full tree case - rv.append("#document") - if element.docinfo.internalDTD: - if not (element.docinfo.public_id or - element.docinfo.system_url): - dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name - else: - dtd_str = """<!DOCTYPE %s "%s" "%s">""" % ( - element.docinfo.root_name, - element.docinfo.public_id, - element.docinfo.system_url) - rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) - next_element = element.getroot() - while next_element.getprevious() is not None: - next_element = next_element.getprevious() - while next_element is not None: - serializeElement(next_element, indent + 2) - next_element = next_element.getnext() - elif isinstance(element, str) or isinstance(element, bytes): - # Text in a fragment - assert isinstance(element, str) or sys.version_info.major == 2 - rv.append("|%s\"%s\"" % (' ' * indent, element)) - else: - # Fragment case - rv.append("#document-fragment") - for next_element in element: - serializeElement(next_element, indent + 2) - elif element.tag == comment_type: - rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) - if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) - else: - assert isinstance(element, etree._Element) - nsmatch = etree_builders.tag_regexp.match(element.tag) - if nsmatch is not None: - ns = nsmatch.group(1) - tag = nsmatch.group(2) - prefix = constants.prefixes[ns] - rv.append("|%s<%s %s>" % (' ' * indent, prefix, - infosetFilter.fromXmlName(tag))) - else: - rv.append("|%s<%s>" % (' ' * indent, - infosetFilter.fromXmlName(element.tag))) - - if hasattr(element, "attrib"): - attributes = [] - for name, value in element.attrib.items(): - nsmatch = tag_regexp.match(name) - if nsmatch is not None: - ns, name = nsmatch.groups() - name = infosetFilter.fromXmlName(name) - prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) - else: - attr_string = infosetFilter.fromXmlName(name) - attributes.append((attr_string, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - - if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) - indent += 2 - for child in element: - serializeElement(child, indent) - if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) - serializeElement(element, 0) - - if finalText is not None: - rv.append("|%s\"%s\"" % (' ' * 2, finalText)) - - return "\n".join(rv) - - -def tostring(element): - """Serialize an element and its child nodes to a string""" - rv = [] - finalText = None - - def serializeElement(element): - if not hasattr(element, "tag"): - if element.docinfo.internalDTD: - if element.docinfo.doctype: - dtd_str = element.docinfo.doctype - else: - dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name - rv.append(dtd_str) - serializeElement(element.getroot()) - - elif element.tag == comment_type: - rv.append("<!--%s-->" % (element.text,)) - - else: - # This is assumed to be an ordinary element - if not element.attrib: - rv.append("<%s>" % (element.tag,)) - else: - attr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) - if element.text: - rv.append(element.text) - - for child in element: - serializeElement(child) - - rv.append("</%s>" % (element.tag,)) - - if hasattr(element, "tail") and element.tail: - rv.append(element.tail) - - serializeElement(element) - - if finalText is not None: - rv.append("%s\"" % (' ' * 2, finalText)) - - return "".join(rv) - - -class TreeBuilder(_base.TreeBuilder): - documentClass = Document - doctypeClass = DocumentType - elementClass = None - commentClass = None - fragmentClass = Document - implementation = etree - - def __init__(self, namespaceHTMLElements, fullTree=False): - builder = etree_builders.getETreeModule(etree, fullTree=fullTree) - infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() - self.namespaceHTMLElements = namespaceHTMLElements - - class Attributes(dict): - def __init__(self, element, value={}): - self._element = element - dict.__init__(self, value) - for key, value in self.items(): - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) - else: - name = infosetFilter.coerceAttribute(key) - self._element._element.attrib[name] = value - - def __setitem__(self, key, value): - dict.__setitem__(self, key, value) - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) - else: - name = infosetFilter.coerceAttribute(key) - self._element._element.attrib[name] = value - - class Element(builder.Element): - def __init__(self, name, namespace): - name = infosetFilter.coerceElement(name) - builder.Element.__init__(self, name, namespace=namespace) - self._attributes = Attributes(self) - - def _setName(self, name): - self._name = infosetFilter.coerceElement(name) - self._element.tag = self._getETreeTag( - self._name, self._namespace) - - def _getName(self): - return infosetFilter.fromXmlName(self._name) - - name = property(_getName, _setName) - - def _getAttributes(self): - return self._attributes - - def _setAttributes(self, attributes): - self._attributes = Attributes(self, attributes) - - attributes = property(_getAttributes, _setAttributes) - - def insertText(self, data, insertBefore=None): - data = infosetFilter.coerceCharacters(data) - builder.Element.insertText(self, data, insertBefore) - - def appendChild(self, child): - builder.Element.appendChild(self, child) - - class Comment(builder.Comment): - def __init__(self, data): - data = infosetFilter.coerceComment(data) - builder.Comment.__init__(self, data) - - def _setData(self, data): - data = infosetFilter.coerceComment(data) - self._element.text = data - - def _getData(self): - return self._element.text - - data = property(_getData, _setData) - - self.elementClass = Element - self.commentClass = builder.Comment - # self.fragmentClass = builder.DocumentFragment - _base.TreeBuilder.__init__(self, namespaceHTMLElements) - - def reset(self): - _base.TreeBuilder.reset(self) - self.insertComment = self.insertCommentInitial - self.initial_comments = [] - self.doctype = None - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - if fullTree: - return self.document._elementTree - else: - return self.document._elementTree.getroot() - - def getFragment(self): - fragment = [] - element = self.openElements[0]._element - if element.text: - fragment.append(element.text) - fragment.extend(list(element)) - if element.tail: - fragment.append(element.tail) - return fragment - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - if not name: - warnings.warn("lxml cannot represent empty doctype", DataLossWarning) - self.doctype = None - else: - coercedName = self.infosetFilter.coerceElement(name) - if coercedName != name: - warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) - - doctype = self.doctypeClass(coercedName, publicId, systemId) - self.doctype = doctype - - def insertCommentInitial(self, data, parent=None): - self.initial_comments.append(data) - - def insertCommentMain(self, data, parent=None): - if (parent == self.document and - self.document._elementTree.getroot()[-1].tag == comment_type): - warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) - super(TreeBuilder, self).insertComment(data, parent) - - def insertRoot(self, token): - """Create the document root""" - # Because of the way libxml2 works, it doesn't seem to be possible to - # alter information like the doctype after the tree has been parsed. - # Therefore we need to use the built-in parser to create our iniial - # tree, after which we can add elements like normal - docStr = "" - if self.doctype: - assert self.doctype.name - docStr += "<!DOCTYPE %s" % self.doctype.name - if (self.doctype.publicId is not None or - self.doctype.systemId is not None): - docStr += (' PUBLIC "%s" ' % - (self.infosetFilter.coercePubid(self.doctype.publicId or ""))) - if self.doctype.systemId: - sysid = self.doctype.systemId - if sysid.find("'") >= 0 and sysid.find('"') >= 0: - warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) - sysid = sysid.replace("'", 'U00027') - if sysid.find("'") >= 0: - docStr += '"%s"' % sysid - else: - docStr += "'%s'" % sysid - else: - docStr += "''" - docStr += ">" - if self.doctype.name != token["name"]: - warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) - docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>" - root = etree.fromstring(docStr) - - # Append the initial comments: - for comment_token in self.initial_comments: - root.addprevious(etree.Comment(comment_token["data"])) - - # Create the root document and add the ElementTree to it - self.document = self.documentClass() - self.document._elementTree = root.getroottree() - - # Give the root element the right name - name = token["name"] - namespace = token.get("namespace", self.defaultNamespace) - if namespace is None: - etree_tag = name - else: - etree_tag = "{%s}%s" % (namespace, name) - root.tag = etree_tag - - # Add the root element to the internal child/open data structures - root_element = self.elementClass(name, namespace) - root_element._element = root - self.document._childNodes.append(root_element) - self.openElements.append(root_element) - - # Reset to the default insert comment function - self.insertComment = self.insertCommentMain |