diff options
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers')
7 files changed, 0 insertions, 770 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py deleted file mode 100644 index 18124e7..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -"""A collection of modules for iterating through different kinds of -tree, generating tokens identical to those produced by the tokenizer -module. - -To create a tree walker for a new type of tree, you need to do -implement a tree walker object (called TreeWalker by convention) that -implements a 'serialize' method taking a tree as sole argument and -returning an iterator generating tokens. -""" - -from __future__ import absolute_import, division, unicode_literals - -import sys - -from ..utils import default_etree - -treeWalkerCache = {} - - -def getTreeWalker(treeType, implementation=None, **kwargs): - """Get a TreeWalker class for various types of tree with built-in support - - treeType - the name of the tree type required (case-insensitive). Supported - values are: - - "dom" - The xml.dom.minidom DOM implementation - "pulldom" - The xml.dom.pulldom event stream - "etree" - A generic walker for tree implementations exposing an - elementtree-like interface (known to work with - ElementTree, cElementTree and lxml.etree). - "lxml" - Optimized walker for lxml.etree - "genshi" - a Genshi stream - - implementation - (Currently applies to the "etree" tree type only). A module - implementing the tree type e.g. xml.etree.ElementTree or - cElementTree.""" - - treeType = treeType.lower() - if treeType not in treeWalkerCache: - if treeType in ("dom", "pulldom"): - name = "%s.%s" % (__name__, treeType) - __import__(name) - mod = sys.modules[name] - treeWalkerCache[treeType] = mod.TreeWalker - elif treeType == "genshi": - from . import genshistream - treeWalkerCache[treeType] = genshistream.TreeWalker - elif treeType == "lxml": - from . import lxmletree - treeWalkerCache[treeType] = lxmletree.TreeWalker - elif treeType == "etree": - from . import etree - if implementation is None: - implementation = default_etree - # XXX: NEVER cache here, caching is done in the etree submodule - return etree.getETreeModule(implementation, **kwargs).TreeWalker - return treeWalkerCache.get(treeType) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py deleted file mode 100644 index a202359..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py +++ /dev/null @@ -1,196 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type - -import gettext -_ = gettext.gettext - -from ..constants import voidElements, spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -class TreeWalker(object): - def __init__(self, tree): - self.tree = tree - - def __iter__(self): - raise NotImplementedError - - def error(self, msg): - return {"type": "SerializeError", "data": msg} - - def emptyTag(self, namespace, name, attrs, hasChildren=False): - assert namespace is None or isinstance(namespace, text_type), type(namespace) - assert isinstance(name, text_type), type(name) - assert all((namespace is None or isinstance(namespace, text_type)) and - isinstance(name, text_type) and - isinstance(value, text_type) - for (namespace, name), value in attrs.items()) - - yield {"type": "EmptyTag", "name": name, - "namespace": namespace, - "data": attrs} - if hasChildren: - yield self.error(_("Void element has children")) - - def startTag(self, namespace, name, attrs): - assert namespace is None or isinstance(namespace, text_type), type(namespace) - assert isinstance(name, text_type), type(name) - assert all((namespace is None or isinstance(namespace, text_type)) and - isinstance(name, text_type) and - isinstance(value, text_type) - for (namespace, name), value in attrs.items()) - - return {"type": "StartTag", - "name": name, - "namespace": namespace, - "data": attrs} - - def endTag(self, namespace, name): - assert namespace is None or isinstance(namespace, text_type), type(namespace) - assert isinstance(name, text_type), type(namespace) - - return {"type": "EndTag", - "name": name, - "namespace": namespace, - "data": {}} - - def text(self, data): - assert isinstance(data, text_type), type(data) - - data = data - middle = data.lstrip(spaceCharacters) - left = data[:len(data) - len(middle)] - if left: - yield {"type": "SpaceCharacters", "data": left} - data = middle - middle = data.rstrip(spaceCharacters) - right = data[len(middle):] - if middle: - yield {"type": "Characters", "data": middle} - if right: - yield {"type": "SpaceCharacters", "data": right} - - def comment(self, data): - assert isinstance(data, text_type), type(data) - - return {"type": "Comment", "data": data} - - def doctype(self, name, publicId=None, systemId=None, correct=True): - assert name is None or isinstance(name, text_type), type(name) - assert publicId is None or isinstance(publicId, text_type), type(publicId) - assert systemId is None or isinstance(systemId, text_type), type(systemId) - - return {"type": "Doctype", - "name": name if name is not None else "", - "publicId": publicId, - "systemId": systemId, - "correct": correct} - - def entity(self, name): - assert isinstance(name, text_type), type(name) - - return {"type": "Entity", "name": name} - - def unknown(self, nodeType): - return self.error(_("Unknown node type: ") + nodeType) - - -class RecursiveTreeWalker(TreeWalker): - def walkChildren(self, node): - raise NotImplementedError - - def element(self, node, namespace, name, attrs, hasChildren): - if name in voidElements: - for token in self.emptyTag(namespace, name, attrs, hasChildren): - yield token - else: - yield self.startTag(name, attrs) - if hasChildren: - for token in self.walkChildren(node): - yield token - yield self.endTag(name) - -from xml.dom import Node - -DOCUMENT = Node.DOCUMENT_NODE -DOCTYPE = Node.DOCUMENT_TYPE_NODE -TEXT = Node.TEXT_NODE -ELEMENT = Node.ELEMENT_NODE -COMMENT = Node.COMMENT_NODE -ENTITY = Node.ENTITY_NODE -UNKNOWN = "<#UNKNOWN#>" - - -class NonRecursiveTreeWalker(TreeWalker): - def getNodeDetails(self, node): - raise NotImplementedError - - def getFirstChild(self, node): - raise NotImplementedError - - def getNextSibling(self, node): - raise NotImplementedError - - def getParentNode(self, node): - raise NotImplementedError - - def __iter__(self): - currentNode = self.tree - while currentNode is not None: - details = self.getNodeDetails(currentNode) - type, details = details[0], details[1:] - hasChildren = False - - if type == DOCTYPE: - yield self.doctype(*details) - - elif type == TEXT: - for token in self.text(*details): - yield token - - elif type == ELEMENT: - namespace, name, attributes, hasChildren = details - if name in voidElements: - for token in self.emptyTag(namespace, name, attributes, - hasChildren): - yield token - hasChildren = False - else: - yield self.startTag(namespace, name, attributes) - - elif type == COMMENT: - yield self.comment(details[0]) - - elif type == ENTITY: - yield self.entity(details[0]) - - elif type == DOCUMENT: - hasChildren = True - - else: - yield self.unknown(details[0]) - - if hasChildren: - firstChild = self.getFirstChild(currentNode) - else: - firstChild = None - - if firstChild is not None: - currentNode = firstChild - else: - while currentNode is not None: - details = self.getNodeDetails(currentNode) - type, details = details[0], details[1:] - if type == ELEMENT: - namespace, name, attributes, hasChildren = details - if name not in voidElements: - yield self.endTag(namespace, name) - if self.tree is currentNode: - currentNode = None - break - nextSibling = self.getNextSibling(currentNode) - if nextSibling is not None: - currentNode = nextSibling - break - else: - currentNode = self.getParentNode(currentNode) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py deleted file mode 100644 index a01287a..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom import Node - -import gettext -_ = gettext.gettext - -from . import _base - - -class TreeWalker(_base.NonRecursiveTreeWalker): - def getNodeDetails(self, node): - if node.nodeType == Node.DOCUMENT_TYPE_NODE: - return _base.DOCTYPE, node.name, node.publicId, node.systemId - - elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - return _base.TEXT, node.nodeValue - - elif node.nodeType == Node.ELEMENT_NODE: - attrs = {} - for attr in list(node.attributes.keys()): - attr = node.getAttributeNode(attr) - if attr.namespaceURI: - attrs[(attr.namespaceURI, attr.localName)] = attr.value - else: - attrs[(None, attr.name)] = attr.value - return (_base.ELEMENT, node.namespaceURI, node.nodeName, - attrs, node.hasChildNodes()) - - elif node.nodeType == Node.COMMENT_NODE: - return _base.COMMENT, node.nodeValue - - elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): - return (_base.DOCUMENT,) - - else: - return _base.UNKNOWN, node.nodeType - - def getFirstChild(self, node): - return node.firstChild - - def getNextSibling(self, node): - return node.nextSibling - - def getParentNode(self, node): - return node.parentNode diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py deleted file mode 100644 index 88fb981..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import gettext -_ = gettext.gettext - -import re - -from pip._vendor.six import text_type - -from . import _base -from ..utils import moduleFactoryFactory - -tag_regexp = re.compile("{([^}]*)}(.*)") - - -def getETreeBuilder(ElementTreeImplementation): - ElementTree = ElementTreeImplementation - ElementTreeCommentType = ElementTree.Comment("asd").tag - - class TreeWalker(_base.NonRecursiveTreeWalker): - """Given the particular ElementTree representation, this implementation, - to avoid using recursion, returns "nodes" as tuples with the following - content: - - 1. The current element - - 2. The index of the element relative to its parent - - 3. A stack of ancestor elements - - 4. A flag "text", "tail" or None to indicate if the current node is a - text node; either the text or tail of the current element (1) - """ - def getNodeDetails(self, node): - if isinstance(node, tuple): # It might be the root Element - elt, key, parents, flag = node - if flag in ("text", "tail"): - return _base.TEXT, getattr(elt, flag) - else: - node = elt - - if not(hasattr(node, "tag")): - node = node.getroot() - - if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): - return (_base.DOCUMENT,) - - elif node.tag == "<!DOCTYPE>": - return (_base.DOCTYPE, node.text, - node.get("publicId"), node.get("systemId")) - - elif node.tag == ElementTreeCommentType: - return _base.COMMENT, node.text - - else: - assert type(node.tag) == text_type, type(node.tag) - # This is assumed to be an ordinary element - match = tag_regexp.match(node.tag) - if match: - namespace, tag = match.groups() - else: - namespace = None - tag = node.tag - attrs = {} - for name, value in list(node.attrib.items()): - match = tag_regexp.match(name) - if match: - attrs[(match.group(1), match.group(2))] = value - else: - attrs[(None, name)] = value - return (_base.ELEMENT, namespace, tag, - attrs, len(node) or node.text) - - def getFirstChild(self, node): - if isinstance(node, tuple): - element, key, parents, flag = node - else: - element, key, parents, flag = node, None, [], None - - if flag in ("text", "tail"): - return None - else: - if element.text: - return element, key, parents, "text" - elif len(element): - parents.append(element) - return element[0], 0, parents, None - else: - return None - - def getNextSibling(self, node): - if isinstance(node, tuple): - element, key, parents, flag = node - else: - return None - - if flag == "text": - if len(element): - parents.append(element) - return element[0], 0, parents, None - else: - return None - else: - if element.tail and flag != "tail": - return element, key, parents, "tail" - elif key < len(parents[-1]) - 1: - return parents[-1][key + 1], key + 1, parents, None - else: - return None - - def getParentNode(self, node): - if isinstance(node, tuple): - element, key, parents, flag = node - else: - return None - - if flag == "text": - if not parents: - return element - else: - return element, key, parents, None - else: - parent = parents.pop() - if not parents: - return parent - else: - return parent, list(parents[-1]).index(parent), parents, None - - return locals() - -getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py deleted file mode 100644 index f559c45..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from genshi.core import QName -from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT -from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT - -from . import _base - -from ..constants import voidElements, namespaces - - -class TreeWalker(_base.TreeWalker): - def __iter__(self): - # Buffer the events so we can pass in the following one - previous = None - for event in self.tree: - if previous is not None: - for token in self.tokens(previous, event): - yield token - previous = event - - # Don't forget the final event! - if previous is not None: - for token in self.tokens(previous, None): - yield token - - def tokens(self, event, next): - kind, data, pos = event - if kind == START: - tag, attribs = data - name = tag.localname - namespace = tag.namespace - converted_attribs = {} - for k, v in attribs: - if isinstance(k, QName): - converted_attribs[(k.namespace, k.localname)] = v - else: - converted_attribs[(None, k)] = v - - if namespace == namespaces["html"] and name in voidElements: - for token in self.emptyTag(namespace, name, converted_attribs, - not next or next[0] != END - or next[1] != tag): - yield token - else: - yield self.startTag(namespace, name, converted_attribs) - - elif kind == END: - name = data.localname - namespace = data.namespace - if name not in voidElements: - yield self.endTag(namespace, name) - - elif kind == COMMENT: - yield self.comment(data) - - elif kind == TEXT: - for token in self.text(data): - yield token - - elif kind == DOCTYPE: - yield self.doctype(*data) - - elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, - START_CDATA, END_CDATA, PI): - pass - - else: - yield self.unknown(kind) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py deleted file mode 100644 index 4373383..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type - -from lxml import etree -from ..treebuilders.etree import tag_regexp - -from gettext import gettext -_ = gettext - -from . import _base - -from .. import ihatexml - - -def ensure_str(s): - if s is None: - return None - elif isinstance(s, text_type): - return s - else: - return s.decode("utf-8", "strict") - - -class Root(object): - def __init__(self, et): - self.elementtree = et - self.children = [] - if et.docinfo.internalDTD: - self.children.append(Doctype(self, - ensure_str(et.docinfo.root_name), - ensure_str(et.docinfo.public_id), - ensure_str(et.docinfo.system_url))) - root = et.getroot() - node = root - - while node.getprevious() is not None: - node = node.getprevious() - while node is not None: - self.children.append(node) - node = node.getnext() - - self.text = None - self.tail = None - - def __getitem__(self, key): - return self.children[key] - - def getnext(self): - return None - - def __len__(self): - return 1 - - -class Doctype(object): - def __init__(self, root_node, name, public_id, system_id): - self.root_node = root_node - self.name = name - self.public_id = public_id - self.system_id = system_id - - self.text = None - self.tail = None - - def getnext(self): - return self.root_node.children[1] - - -class FragmentRoot(Root): - def __init__(self, children): - self.children = [FragmentWrapper(self, child) for child in children] - self.text = self.tail = None - - def getnext(self): - return None - - -class FragmentWrapper(object): - def __init__(self, fragment_root, obj): - self.root_node = fragment_root - self.obj = obj - if hasattr(self.obj, 'text'): - self.text = ensure_str(self.obj.text) - else: - self.text = None - if hasattr(self.obj, 'tail'): - self.tail = ensure_str(self.obj.tail) - else: - self.tail = None - self.isstring = isinstance(obj, str) or isinstance(obj, bytes) - # Support for bytes here is Py2 - if self.isstring: - self.obj = ensure_str(self.obj) - - def __getattr__(self, name): - return getattr(self.obj, name) - - def getnext(self): - siblings = self.root_node.children - idx = siblings.index(self) - if idx < len(siblings) - 1: - return siblings[idx + 1] - else: - return None - - def __getitem__(self, key): - return self.obj[key] - - def __bool__(self): - return bool(self.obj) - - def getparent(self): - return None - - def __str__(self): - return str(self.obj) - - def __unicode__(self): - return str(self.obj) - - def __len__(self): - return len(self.obj) - - -class TreeWalker(_base.NonRecursiveTreeWalker): - def __init__(self, tree): - if hasattr(tree, "getroot"): - tree = Root(tree) - elif isinstance(tree, list): - tree = FragmentRoot(tree) - _base.NonRecursiveTreeWalker.__init__(self, tree) - self.filter = ihatexml.InfosetFilter() - - def getNodeDetails(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key - return _base.TEXT, ensure_str(getattr(node, key)) - - elif isinstance(node, Root): - return (_base.DOCUMENT,) - - elif isinstance(node, Doctype): - return _base.DOCTYPE, node.name, node.public_id, node.system_id - - elif isinstance(node, FragmentWrapper) and node.isstring: - return _base.TEXT, node.obj - - elif node.tag == etree.Comment: - return _base.COMMENT, ensure_str(node.text) - - elif node.tag == etree.Entity: - return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &; - - else: - # This is assumed to be an ordinary element - match = tag_regexp.match(ensure_str(node.tag)) - if match: - namespace, tag = match.groups() - else: - namespace = None - tag = ensure_str(node.tag) - attrs = {} - for name, value in list(node.attrib.items()): - name = ensure_str(name) - value = ensure_str(value) - match = tag_regexp.match(name) - if match: - attrs[(match.group(1), match.group(2))] = value - else: - attrs[(None, name)] = value - return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), - attrs, len(node) > 0 or node.text) - - def getFirstChild(self, node): - assert not isinstance(node, tuple), _("Text nodes have no children") - - assert len(node) or node.text, "Node has no children" - if node.text: - return (node, "text") - else: - return node[0] - - def getNextSibling(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key - if key == "text": - # XXX: we cannot use a "bool(node) and node[0] or None" construct here - # because node[0] might evaluate to False if it has no child element - if len(node): - return node[0] - else: - return None - else: # tail - return node.getnext() - - return (node, "tail") if node.tail else node.getnext() - - def getParentNode(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key - if key == "text": - return node - # else: fallback to "normal" processing - - return node.getparent() diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py deleted file mode 100644 index 0b0f515..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ - COMMENT, IGNORABLE_WHITESPACE, CHARACTERS - -from . import _base - -from ..constants import voidElements - - -class TreeWalker(_base.TreeWalker): - def __iter__(self): - ignore_until = None - previous = None - for event in self.tree: - if previous is not None and \ - (ignore_until is None or previous[1] is ignore_until): - if previous[1] is ignore_until: - ignore_until = None - for token in self.tokens(previous, event): - yield token - if token["type"] == "EmptyTag": - ignore_until = previous[1] - previous = event - if ignore_until is None or previous[1] is ignore_until: - for token in self.tokens(previous, None): - yield token - elif ignore_until is not None: - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") - - def tokens(self, event, next): - type, node = event - if type == START_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - attrs = {} - for attr in list(node.attributes.keys()): - attr = node.getAttributeNode(attr) - attrs[(attr.namespaceURI, attr.localName)] = attr.value - if name in voidElements: - for token in self.emptyTag(namespace, - name, - attrs, - not next or next[1] is not node): - yield token - else: - yield self.startTag(namespace, name, attrs) - - elif type == END_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - if name not in voidElements: - yield self.endTag(namespace, name) - - elif type == COMMENT: - yield self.comment(node.nodeValue) - - elif type in (IGNORABLE_WHITESPACE, CHARACTERS): - for token in self.text(node.nodeValue): - yield token - - else: - yield self.unknown(type) |