summaryrefslogtreecommitdiffstats
path: root/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers
diff options
context:
space:
mode:
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers')
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py57
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py196
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py46
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py131
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py69
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py208
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py63
7 files changed, 0 insertions, 770 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py
deleted file mode 100644
index 18124e7..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""A collection of modules for iterating through different kinds of
-tree, generating tokens identical to those produced by the tokenizer
-module.
-
-To create a tree walker for a new type of tree, you need to do
-implement a tree walker object (called TreeWalker by convention) that
-implements a 'serialize' method taking a tree as sole argument and
-returning an iterator generating tokens.
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-
-from ..utils import default_etree
-
-treeWalkerCache = {}
-
-
-def getTreeWalker(treeType, implementation=None, **kwargs):
- """Get a TreeWalker class for various types of tree with built-in support
-
- treeType - the name of the tree type required (case-insensitive). Supported
- values are:
-
- "dom" - The xml.dom.minidom DOM implementation
- "pulldom" - The xml.dom.pulldom event stream
- "etree" - A generic walker for tree implementations exposing an
- elementtree-like interface (known to work with
- ElementTree, cElementTree and lxml.etree).
- "lxml" - Optimized walker for lxml.etree
- "genshi" - a Genshi stream
-
- implementation - (Currently applies to the "etree" tree type only). A module
- implementing the tree type e.g. xml.etree.ElementTree or
- cElementTree."""
-
- treeType = treeType.lower()
- if treeType not in treeWalkerCache:
- if treeType in ("dom", "pulldom"):
- name = "%s.%s" % (__name__, treeType)
- __import__(name)
- mod = sys.modules[name]
- treeWalkerCache[treeType] = mod.TreeWalker
- elif treeType == "genshi":
- from . import genshistream
- treeWalkerCache[treeType] = genshistream.TreeWalker
- elif treeType == "lxml":
- from . import lxmletree
- treeWalkerCache[treeType] = lxmletree.TreeWalker
- elif treeType == "etree":
- from . import etree
- if implementation is None:
- implementation = default_etree
- # XXX: NEVER cache here, caching is done in the etree submodule
- return etree.getETreeModule(implementation, **kwargs).TreeWalker
- return treeWalkerCache.get(treeType)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py
deleted file mode 100644
index a202359..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-import gettext
-_ = gettext.gettext
-
-from ..constants import voidElements, spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-class TreeWalker(object):
- def __init__(self, tree):
- self.tree = tree
-
- def __iter__(self):
- raise NotImplementedError
-
- def error(self, msg):
- return {"type": "SerializeError", "data": msg}
-
- def emptyTag(self, namespace, name, attrs, hasChildren=False):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
- for (namespace, name), value in attrs.items())
-
- yield {"type": "EmptyTag", "name": name,
- "namespace": namespace,
- "data": attrs}
- if hasChildren:
- yield self.error(_("Void element has children"))
-
- def startTag(self, namespace, name, attrs):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
- for (namespace, name), value in attrs.items())
-
- return {"type": "StartTag",
- "name": name,
- "namespace": namespace,
- "data": attrs}
-
- def endTag(self, namespace, name):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(namespace)
-
- return {"type": "EndTag",
- "name": name,
- "namespace": namespace,
- "data": {}}
-
- def text(self, data):
- assert isinstance(data, text_type), type(data)
-
- data = data
- middle = data.lstrip(spaceCharacters)
- left = data[:len(data) - len(middle)]
- if left:
- yield {"type": "SpaceCharacters", "data": left}
- data = middle
- middle = data.rstrip(spaceCharacters)
- right = data[len(middle):]
- if middle:
- yield {"type": "Characters", "data": middle}
- if right:
- yield {"type": "SpaceCharacters", "data": right}
-
- def comment(self, data):
- assert isinstance(data, text_type), type(data)
-
- return {"type": "Comment", "data": data}
-
- def doctype(self, name, publicId=None, systemId=None, correct=True):
- assert name is None or isinstance(name, text_type), type(name)
- assert publicId is None or isinstance(publicId, text_type), type(publicId)
- assert systemId is None or isinstance(systemId, text_type), type(systemId)
-
- return {"type": "Doctype",
- "name": name if name is not None else "",
- "publicId": publicId,
- "systemId": systemId,
- "correct": correct}
-
- def entity(self, name):
- assert isinstance(name, text_type), type(name)
-
- return {"type": "Entity", "name": name}
-
- def unknown(self, nodeType):
- return self.error(_("Unknown node type: ") + nodeType)
-
-
-class RecursiveTreeWalker(TreeWalker):
- def walkChildren(self, node):
- raise NotImplementedError
-
- def element(self, node, namespace, name, attrs, hasChildren):
- if name in voidElements:
- for token in self.emptyTag(namespace, name, attrs, hasChildren):
- yield token
- else:
- yield self.startTag(name, attrs)
- if hasChildren:
- for token in self.walkChildren(node):
- yield token
- yield self.endTag(name)
-
-from xml.dom import Node
-
-DOCUMENT = Node.DOCUMENT_NODE
-DOCTYPE = Node.DOCUMENT_TYPE_NODE
-TEXT = Node.TEXT_NODE
-ELEMENT = Node.ELEMENT_NODE
-COMMENT = Node.COMMENT_NODE
-ENTITY = Node.ENTITY_NODE
-UNKNOWN = "<#UNKNOWN#>"
-
-
-class NonRecursiveTreeWalker(TreeWalker):
- def getNodeDetails(self, node):
- raise NotImplementedError
-
- def getFirstChild(self, node):
- raise NotImplementedError
-
- def getNextSibling(self, node):
- raise NotImplementedError
-
- def getParentNode(self, node):
- raise NotImplementedError
-
- def __iter__(self):
- currentNode = self.tree
- while currentNode is not None:
- details = self.getNodeDetails(currentNode)
- type, details = details[0], details[1:]
- hasChildren = False
-
- if type == DOCTYPE:
- yield self.doctype(*details)
-
- elif type == TEXT:
- for token in self.text(*details):
- yield token
-
- elif type == ELEMENT:
- namespace, name, attributes, hasChildren = details
- if name in voidElements:
- for token in self.emptyTag(namespace, name, attributes,
- hasChildren):
- yield token
- hasChildren = False
- else:
- yield self.startTag(namespace, name, attributes)
-
- elif type == COMMENT:
- yield self.comment(details[0])
-
- elif type == ENTITY:
- yield self.entity(details[0])
-
- elif type == DOCUMENT:
- hasChildren = True
-
- else:
- yield self.unknown(details[0])
-
- if hasChildren:
- firstChild = self.getFirstChild(currentNode)
- else:
- firstChild = None
-
- if firstChild is not None:
- currentNode = firstChild
- else:
- while currentNode is not None:
- details = self.getNodeDetails(currentNode)
- type, details = details[0], details[1:]
- if type == ELEMENT:
- namespace, name, attributes, hasChildren = details
- if name not in voidElements:
- yield self.endTag(namespace, name)
- if self.tree is currentNode:
- currentNode = None
- break
- nextSibling = self.getNextSibling(currentNode)
- if nextSibling is not None:
- currentNode = nextSibling
- break
- else:
- currentNode = self.getParentNode(currentNode)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py
deleted file mode 100644
index a01287a..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom import Node
-
-import gettext
-_ = gettext.gettext
-
-from . import _base
-
-
-class TreeWalker(_base.NonRecursiveTreeWalker):
- def getNodeDetails(self, node):
- if node.nodeType == Node.DOCUMENT_TYPE_NODE:
- return _base.DOCTYPE, node.name, node.publicId, node.systemId
-
- elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
- return _base.TEXT, node.nodeValue
-
- elif node.nodeType == Node.ELEMENT_NODE:
- attrs = {}
- for attr in list(node.attributes.keys()):
- attr = node.getAttributeNode(attr)
- if attr.namespaceURI:
- attrs[(attr.namespaceURI, attr.localName)] = attr.value
- else:
- attrs[(None, attr.name)] = attr.value
- return (_base.ELEMENT, node.namespaceURI, node.nodeName,
- attrs, node.hasChildNodes())
-
- elif node.nodeType == Node.COMMENT_NODE:
- return _base.COMMENT, node.nodeValue
-
- elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
- return (_base.DOCUMENT,)
-
- else:
- return _base.UNKNOWN, node.nodeType
-
- def getFirstChild(self, node):
- return node.firstChild
-
- def getNextSibling(self, node):
- return node.nextSibling
-
- def getParentNode(self, node):
- return node.parentNode
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py
deleted file mode 100644
index 88fb981..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import gettext
-_ = gettext.gettext
-
-import re
-
-from pip._vendor.six import text_type
-
-from . import _base
-from ..utils import moduleFactoryFactory
-
-tag_regexp = re.compile("{([^}]*)}(.*)")
-
-
-def getETreeBuilder(ElementTreeImplementation):
- ElementTree = ElementTreeImplementation
- ElementTreeCommentType = ElementTree.Comment("asd").tag
-
- class TreeWalker(_base.NonRecursiveTreeWalker):
- """Given the particular ElementTree representation, this implementation,
- to avoid using recursion, returns "nodes" as tuples with the following
- content:
-
- 1. The current element
-
- 2. The index of the element relative to its parent
-
- 3. A stack of ancestor elements
-
- 4. A flag "text", "tail" or None to indicate if the current node is a
- text node; either the text or tail of the current element (1)
- """
- def getNodeDetails(self, node):
- if isinstance(node, tuple): # It might be the root Element
- elt, key, parents, flag = node
- if flag in ("text", "tail"):
- return _base.TEXT, getattr(elt, flag)
- else:
- node = elt
-
- if not(hasattr(node, "tag")):
- node = node.getroot()
-
- if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
- return (_base.DOCUMENT,)
-
- elif node.tag == "<!DOCTYPE>":
- return (_base.DOCTYPE, node.text,
- node.get("publicId"), node.get("systemId"))
-
- elif node.tag == ElementTreeCommentType:
- return _base.COMMENT, node.text
-
- else:
- assert type(node.tag) == text_type, type(node.tag)
- # This is assumed to be an ordinary element
- match = tag_regexp.match(node.tag)
- if match:
- namespace, tag = match.groups()
- else:
- namespace = None
- tag = node.tag
- attrs = {}
- for name, value in list(node.attrib.items()):
- match = tag_regexp.match(name)
- if match:
- attrs[(match.group(1), match.group(2))] = value
- else:
- attrs[(None, name)] = value
- return (_base.ELEMENT, namespace, tag,
- attrs, len(node) or node.text)
-
- def getFirstChild(self, node):
- if isinstance(node, tuple):
- element, key, parents, flag = node
- else:
- element, key, parents, flag = node, None, [], None
-
- if flag in ("text", "tail"):
- return None
- else:
- if element.text:
- return element, key, parents, "text"
- elif len(element):
- parents.append(element)
- return element[0], 0, parents, None
- else:
- return None
-
- def getNextSibling(self, node):
- if isinstance(node, tuple):
- element, key, parents, flag = node
- else:
- return None
-
- if flag == "text":
- if len(element):
- parents.append(element)
- return element[0], 0, parents, None
- else:
- return None
- else:
- if element.tail and flag != "tail":
- return element, key, parents, "tail"
- elif key < len(parents[-1]) - 1:
- return parents[-1][key + 1], key + 1, parents, None
- else:
- return None
-
- def getParentNode(self, node):
- if isinstance(node, tuple):
- element, key, parents, flag = node
- else:
- return None
-
- if flag == "text":
- if not parents:
- return element
- else:
- return element, key, parents, None
- else:
- parent = parents.pop()
- if not parents:
- return parent
- else:
- return parent, list(parents[-1]).index(parent), parents, None
-
- return locals()
-
-getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py
deleted file mode 100644
index f559c45..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from genshi.core import QName
-from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
-from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
-
-from . import _base
-
-from ..constants import voidElements, namespaces
-
-
-class TreeWalker(_base.TreeWalker):
- def __iter__(self):
- # Buffer the events so we can pass in the following one
- previous = None
- for event in self.tree:
- if previous is not None:
- for token in self.tokens(previous, event):
- yield token
- previous = event
-
- # Don't forget the final event!
- if previous is not None:
- for token in self.tokens(previous, None):
- yield token
-
- def tokens(self, event, next):
- kind, data, pos = event
- if kind == START:
- tag, attribs = data
- name = tag.localname
- namespace = tag.namespace
- converted_attribs = {}
- for k, v in attribs:
- if isinstance(k, QName):
- converted_attribs[(k.namespace, k.localname)] = v
- else:
- converted_attribs[(None, k)] = v
-
- if namespace == namespaces["html"] and name in voidElements:
- for token in self.emptyTag(namespace, name, converted_attribs,
- not next or next[0] != END
- or next[1] != tag):
- yield token
- else:
- yield self.startTag(namespace, name, converted_attribs)
-
- elif kind == END:
- name = data.localname
- namespace = data.namespace
- if name not in voidElements:
- yield self.endTag(namespace, name)
-
- elif kind == COMMENT:
- yield self.comment(data)
-
- elif kind == TEXT:
- for token in self.text(data):
- yield token
-
- elif kind == DOCTYPE:
- yield self.doctype(*data)
-
- elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
- START_CDATA, END_CDATA, PI):
- pass
-
- else:
- yield self.unknown(kind)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py
deleted file mode 100644
index 4373383..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-from lxml import etree
-from ..treebuilders.etree import tag_regexp
-
-from gettext import gettext
-_ = gettext
-
-from . import _base
-
-from .. import ihatexml
-
-
-def ensure_str(s):
- if s is None:
- return None
- elif isinstance(s, text_type):
- return s
- else:
- return s.decode("utf-8", "strict")
-
-
-class Root(object):
- def __init__(self, et):
- self.elementtree = et
- self.children = []
- if et.docinfo.internalDTD:
- self.children.append(Doctype(self,
- ensure_str(et.docinfo.root_name),
- ensure_str(et.docinfo.public_id),
- ensure_str(et.docinfo.system_url)))
- root = et.getroot()
- node = root
-
- while node.getprevious() is not None:
- node = node.getprevious()
- while node is not None:
- self.children.append(node)
- node = node.getnext()
-
- self.text = None
- self.tail = None
-
- def __getitem__(self, key):
- return self.children[key]
-
- def getnext(self):
- return None
-
- def __len__(self):
- return 1
-
-
-class Doctype(object):
- def __init__(self, root_node, name, public_id, system_id):
- self.root_node = root_node
- self.name = name
- self.public_id = public_id
- self.system_id = system_id
-
- self.text = None
- self.tail = None
-
- def getnext(self):
- return self.root_node.children[1]
-
-
-class FragmentRoot(Root):
- def __init__(self, children):
- self.children = [FragmentWrapper(self, child) for child in children]
- self.text = self.tail = None
-
- def getnext(self):
- return None
-
-
-class FragmentWrapper(object):
- def __init__(self, fragment_root, obj):
- self.root_node = fragment_root
- self.obj = obj
- if hasattr(self.obj, 'text'):
- self.text = ensure_str(self.obj.text)
- else:
- self.text = None
- if hasattr(self.obj, 'tail'):
- self.tail = ensure_str(self.obj.tail)
- else:
- self.tail = None
- self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
- # Support for bytes here is Py2
- if self.isstring:
- self.obj = ensure_str(self.obj)
-
- def __getattr__(self, name):
- return getattr(self.obj, name)
-
- def getnext(self):
- siblings = self.root_node.children
- idx = siblings.index(self)
- if idx < len(siblings) - 1:
- return siblings[idx + 1]
- else:
- return None
-
- def __getitem__(self, key):
- return self.obj[key]
-
- def __bool__(self):
- return bool(self.obj)
-
- def getparent(self):
- return None
-
- def __str__(self):
- return str(self.obj)
-
- def __unicode__(self):
- return str(self.obj)
-
- def __len__(self):
- return len(self.obj)
-
-
-class TreeWalker(_base.NonRecursiveTreeWalker):
- def __init__(self, tree):
- if hasattr(tree, "getroot"):
- tree = Root(tree)
- elif isinstance(tree, list):
- tree = FragmentRoot(tree)
- _base.NonRecursiveTreeWalker.__init__(self, tree)
- self.filter = ihatexml.InfosetFilter()
-
- def getNodeDetails(self, node):
- if isinstance(node, tuple): # Text node
- node, key = node
- assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
- return _base.TEXT, ensure_str(getattr(node, key))
-
- elif isinstance(node, Root):
- return (_base.DOCUMENT,)
-
- elif isinstance(node, Doctype):
- return _base.DOCTYPE, node.name, node.public_id, node.system_id
-
- elif isinstance(node, FragmentWrapper) and node.isstring:
- return _base.TEXT, node.obj
-
- elif node.tag == etree.Comment:
- return _base.COMMENT, ensure_str(node.text)
-
- elif node.tag == etree.Entity:
- return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
-
- else:
- # This is assumed to be an ordinary element
- match = tag_regexp.match(ensure_str(node.tag))
- if match:
- namespace, tag = match.groups()
- else:
- namespace = None
- tag = ensure_str(node.tag)
- attrs = {}
- for name, value in list(node.attrib.items()):
- name = ensure_str(name)
- value = ensure_str(value)
- match = tag_regexp.match(name)
- if match:
- attrs[(match.group(1), match.group(2))] = value
- else:
- attrs[(None, name)] = value
- return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
- attrs, len(node) > 0 or node.text)
-
- def getFirstChild(self, node):
- assert not isinstance(node, tuple), _("Text nodes have no children")
-
- assert len(node) or node.text, "Node has no children"
- if node.text:
- return (node, "text")
- else:
- return node[0]
-
- def getNextSibling(self, node):
- if isinstance(node, tuple): # Text node
- node, key = node
- assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
- if key == "text":
- # XXX: we cannot use a "bool(node) and node[0] or None" construct here
- # because node[0] might evaluate to False if it has no child element
- if len(node):
- return node[0]
- else:
- return None
- else: # tail
- return node.getnext()
-
- return (node, "tail") if node.tail else node.getnext()
-
- def getParentNode(self, node):
- if isinstance(node, tuple): # Text node
- node, key = node
- assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
- if key == "text":
- return node
- # else: fallback to "normal" processing
-
- return node.getparent()
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py
deleted file mode 100644
index 0b0f515..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
- COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
-
-from . import _base
-
-from ..constants import voidElements
-
-
-class TreeWalker(_base.TreeWalker):
- def __iter__(self):
- ignore_until = None
- previous = None
- for event in self.tree:
- if previous is not None and \
- (ignore_until is None or previous[1] is ignore_until):
- if previous[1] is ignore_until:
- ignore_until = None
- for token in self.tokens(previous, event):
- yield token
- if token["type"] == "EmptyTag":
- ignore_until = previous[1]
- previous = event
- if ignore_until is None or previous[1] is ignore_until:
- for token in self.tokens(previous, None):
- yield token
- elif ignore_until is not None:
- raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
-
- def tokens(self, event, next):
- type, node = event
- if type == START_ELEMENT:
- name = node.nodeName
- namespace = node.namespaceURI
- attrs = {}
- for attr in list(node.attributes.keys()):
- attr = node.getAttributeNode(attr)
- attrs[(attr.namespaceURI, attr.localName)] = attr.value
- if name in voidElements:
- for token in self.emptyTag(namespace,
- name,
- attrs,
- not next or next[1] is not node):
- yield token
- else:
- yield self.startTag(namespace, name, attrs)
-
- elif type == END_ELEMENT:
- name = node.nodeName
- namespace = node.namespaceURI
- if name not in voidElements:
- yield self.endTag(namespace, name)
-
- elif type == COMMENT:
- yield self.comment(node.nodeValue)
-
- elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
- for token in self.text(node.nodeValue):
- yield token
-
- else:
- yield self.unknown(type)