From 04387deeab820e75c6d37d2ddd5b514cb7bcfd9e Mon Sep 17 00:00:00 2001 From: Pavel Aharoni Date: Sun, 11 Jun 2017 14:33:57 +0300 Subject: [SDC-32] separate Tosca Parser from DC Change-Id: I7e7f31ff2bd92fec22031f75b7051d129a21d01b Signed-off-by: Pavel Aharoni --- .../_vendor/html5lib/treebuilders/etree_lxml.py | 369 --------------------- 1 file changed, 369 deletions(-) delete mode 100644 jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py') diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py deleted file mode 100644 index 35d08ef..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py +++ /dev/null @@ -1,369 +0,0 @@ -"""Module for supporting the lxml.etree library. The idea here is to use as much -of the native library as possible, without using fragile hacks like custom element -names that break between releases. The downside of this is that we cannot represent -all possible trees; specifically the following are known to cause problems: - -Text or comments as siblings of the root element -Docypes with no name - -When any of these things occur, we emit a DataLossWarning -""" - -from __future__ import absolute_import, division, unicode_literals - -import warnings -import re -import sys - -from . import _base -from ..constants import DataLossWarning -from .. import constants -from . import etree as etree_builders -from .. import ihatexml - -import lxml.etree as etree - - -fullTree = True -tag_regexp = re.compile("{([^}]*)}(.*)") - -comment_type = etree.Comment("asd").tag - - -class DocumentType(object): - def __init__(self, name, publicId, systemId): - self.name = name - self.publicId = publicId - self.systemId = systemId - - -class Document(object): - def __init__(self): - self._elementTree = None - self._childNodes = [] - - def appendChild(self, element): - self._elementTree.getroot().addnext(element._element) - - def _getChildNodes(self): - return self._childNodes - - childNodes = property(_getChildNodes) - - -def testSerializer(element): - rv = [] - finalText = None - infosetFilter = ihatexml.InfosetFilter() - - def serializeElement(element, indent=0): - if not hasattr(element, "tag"): - if hasattr(element, "getroot"): - # Full tree case - rv.append("#document") - if element.docinfo.internalDTD: - if not (element.docinfo.public_id or - element.docinfo.system_url): - dtd_str = "" % element.docinfo.root_name - else: - dtd_str = """""" % ( - element.docinfo.root_name, - element.docinfo.public_id, - element.docinfo.system_url) - rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) - next_element = element.getroot() - while next_element.getprevious() is not None: - next_element = next_element.getprevious() - while next_element is not None: - serializeElement(next_element, indent + 2) - next_element = next_element.getnext() - elif isinstance(element, str) or isinstance(element, bytes): - # Text in a fragment - assert isinstance(element, str) or sys.version_info.major == 2 - rv.append("|%s\"%s\"" % (' ' * indent, element)) - else: - # Fragment case - rv.append("#document-fragment") - for next_element in element: - serializeElement(next_element, indent + 2) - elif element.tag == comment_type: - rv.append("|%s" % (' ' * indent, element.text)) - if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) - else: - assert isinstance(element, etree._Element) - nsmatch = etree_builders.tag_regexp.match(element.tag) - if nsmatch is not None: - ns = nsmatch.group(1) - tag = nsmatch.group(2) - prefix = constants.prefixes[ns] - rv.append("|%s<%s %s>" % (' ' * indent, prefix, - infosetFilter.fromXmlName(tag))) - else: - rv.append("|%s<%s>" % (' ' * indent, - infosetFilter.fromXmlName(element.tag))) - - if hasattr(element, "attrib"): - attributes = [] - for name, value in element.attrib.items(): - nsmatch = tag_regexp.match(name) - if nsmatch is not None: - ns, name = nsmatch.groups() - name = infosetFilter.fromXmlName(name) - prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) - else: - attr_string = infosetFilter.fromXmlName(name) - attributes.append((attr_string, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - - if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) - indent += 2 - for child in element: - serializeElement(child, indent) - if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) - serializeElement(element, 0) - - if finalText is not None: - rv.append("|%s\"%s\"" % (' ' * 2, finalText)) - - return "\n".join(rv) - - -def tostring(element): - """Serialize an element and its child nodes to a string""" - rv = [] - finalText = None - - def serializeElement(element): - if not hasattr(element, "tag"): - if element.docinfo.internalDTD: - if element.docinfo.doctype: - dtd_str = element.docinfo.doctype - else: - dtd_str = "" % element.docinfo.root_name - rv.append(dtd_str) - serializeElement(element.getroot()) - - elif element.tag == comment_type: - rv.append("" % (element.text,)) - - else: - # This is assumed to be an ordinary element - if not element.attrib: - rv.append("<%s>" % (element.tag,)) - else: - attr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) - if element.text: - rv.append(element.text) - - for child in element: - serializeElement(child) - - rv.append("" % (element.tag,)) - - if hasattr(element, "tail") and element.tail: - rv.append(element.tail) - - serializeElement(element) - - if finalText is not None: - rv.append("%s\"" % (' ' * 2, finalText)) - - return "".join(rv) - - -class TreeBuilder(_base.TreeBuilder): - documentClass = Document - doctypeClass = DocumentType - elementClass = None - commentClass = None - fragmentClass = Document - implementation = etree - - def __init__(self, namespaceHTMLElements, fullTree=False): - builder = etree_builders.getETreeModule(etree, fullTree=fullTree) - infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() - self.namespaceHTMLElements = namespaceHTMLElements - - class Attributes(dict): - def __init__(self, element, value={}): - self._element = element - dict.__init__(self, value) - for key, value in self.items(): - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) - else: - name = infosetFilter.coerceAttribute(key) - self._element._element.attrib[name] = value - - def __setitem__(self, key, value): - dict.__setitem__(self, key, value) - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) - else: - name = infosetFilter.coerceAttribute(key) - self._element._element.attrib[name] = value - - class Element(builder.Element): - def __init__(self, name, namespace): - name = infosetFilter.coerceElement(name) - builder.Element.__init__(self, name, namespace=namespace) - self._attributes = Attributes(self) - - def _setName(self, name): - self._name = infosetFilter.coerceElement(name) - self._element.tag = self._getETreeTag( - self._name, self._namespace) - - def _getName(self): - return infosetFilter.fromXmlName(self._name) - - name = property(_getName, _setName) - - def _getAttributes(self): - return self._attributes - - def _setAttributes(self, attributes): - self._attributes = Attributes(self, attributes) - - attributes = property(_getAttributes, _setAttributes) - - def insertText(self, data, insertBefore=None): - data = infosetFilter.coerceCharacters(data) - builder.Element.insertText(self, data, insertBefore) - - def appendChild(self, child): - builder.Element.appendChild(self, child) - - class Comment(builder.Comment): - def __init__(self, data): - data = infosetFilter.coerceComment(data) - builder.Comment.__init__(self, data) - - def _setData(self, data): - data = infosetFilter.coerceComment(data) - self._element.text = data - - def _getData(self): - return self._element.text - - data = property(_getData, _setData) - - self.elementClass = Element - self.commentClass = builder.Comment - # self.fragmentClass = builder.DocumentFragment - _base.TreeBuilder.__init__(self, namespaceHTMLElements) - - def reset(self): - _base.TreeBuilder.reset(self) - self.insertComment = self.insertCommentInitial - self.initial_comments = [] - self.doctype = None - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - if fullTree: - return self.document._elementTree - else: - return self.document._elementTree.getroot() - - def getFragment(self): - fragment = [] - element = self.openElements[0]._element - if element.text: - fragment.append(element.text) - fragment.extend(list(element)) - if element.tail: - fragment.append(element.tail) - return fragment - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - if not name: - warnings.warn("lxml cannot represent empty doctype", DataLossWarning) - self.doctype = None - else: - coercedName = self.infosetFilter.coerceElement(name) - if coercedName != name: - warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) - - doctype = self.doctypeClass(coercedName, publicId, systemId) - self.doctype = doctype - - def insertCommentInitial(self, data, parent=None): - self.initial_comments.append(data) - - def insertCommentMain(self, data, parent=None): - if (parent == self.document and - self.document._elementTree.getroot()[-1].tag == comment_type): - warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) - super(TreeBuilder, self).insertComment(data, parent) - - def insertRoot(self, token): - """Create the document root""" - # Because of the way libxml2 works, it doesn't seem to be possible to - # alter information like the doctype after the tree has been parsed. - # Therefore we need to use the built-in parser to create our iniial - # tree, after which we can add elements like normal - docStr = "" - if self.doctype: - assert self.doctype.name - docStr += "= 0 and sysid.find('"') >= 0: - warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) - sysid = sysid.replace("'", 'U00027') - if sysid.find("'") >= 0: - docStr += '"%s"' % sysid - else: - docStr += "'%s'" % sysid - else: - docStr += "''" - docStr += ">" - if self.doctype.name != token["name"]: - warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) - docStr += "" - root = etree.fromstring(docStr) - - # Append the initial comments: - for comment_token in self.initial_comments: - root.addprevious(etree.Comment(comment_token["data"])) - - # Create the root document and add the ElementTree to it - self.document = self.documentClass() - self.document._elementTree = root.getroottree() - - # Give the root element the right name - name = token["name"] - namespace = token.get("namespace", self.defaultNamespace) - if namespace is None: - etree_tag = name - else: - etree_tag = "{%s}%s" % (namespace, name) - root.tag = etree_tag - - # Add the root element to the internal child/open data structures - root_element = self.elementClass(name, namespace) - root_element._element = root - self.document._childNodes.append(root_element) - self.openElements.append(root_element) - - # Reset to the default insert comment function - self.insertComment = self.insertCommentMain -- cgit 1.2.3-korg