diff options
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters')
8 files changed, 0 insertions, 445 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py +++ /dev/null diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py deleted file mode 100644 index c7dbaed..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - - -class Filter(object): - def __init__(self, source): - self.source = source - - def __iter__(self): - return iter(self.source) - - def __getattr__(self, name): - return getattr(self.source, name) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py deleted file mode 100644 index fed6996..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base - -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict - - -class Filter(_base.Filter): - def __iter__(self): - for token in _base.Filter.__iter__(self): - if token["type"] in ("StartTag", "EmptyTag"): - attrs = OrderedDict() - for name, value in sorted(token["data"].items(), - key=lambda x: x[0]): - attrs[name] = value - token["data"] = attrs - yield token diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py deleted file mode 100644 index ca33b70..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base - - -class Filter(_base.Filter): - def __init__(self, source, encoding): - _base.Filter.__init__(self, source) - self.encoding = encoding - - def __iter__(self): - state = "pre_head" - meta_found = (self.encoding is None) - pending = [] - - for token in _base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag": - if token["name"].lower() == "head": - state = "in_head" - - elif type == "EmptyTag": - if token["name"].lower() == "meta": - # replace charset with actual encoding - has_http_equiv_content_type = False - for (namespace, name), value in token["data"].items(): - if namespace is not None: - continue - elif name.lower() == 'charset': - token["data"][(namespace, name)] = self.encoding - meta_found = True - break - elif name == 'http-equiv' and value.lower() == 'content-type': - has_http_equiv_content_type = True - else: - if has_http_equiv_content_type and (None, "content") in token["data"]: - token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding - meta_found = True - - elif token["name"].lower() == "head" and not meta_found: - # insert meta into empty head - yield {"type": "StartTag", "name": "head", - "data": token["data"]} - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - yield {"type": "EndTag", "name": "head"} - meta_found = True - continue - - elif type == "EndTag": - if token["name"].lower() == "head" and pending: - # insert meta into head (if necessary) and flush pending queue - yield pending.pop(0) - if not meta_found: - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - while pending: - yield pending.pop(0) - meta_found = True - state = "post_head" - - if state == "in_head": - pending.append(token) - else: - yield token diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py deleted file mode 100644 index 83ad639..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from gettext import gettext -_ = gettext - -from . import _base -from ..constants import cdataElements, rcdataElements, voidElements - -from ..constants import spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -class LintError(Exception): - pass - - -class Filter(_base.Filter): - def __iter__(self): - open_elements = [] - contentModelFlag = "PCDATA" - for token in _base.Filter.__iter__(self): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError(_("StartTag not in PCDATA content model flag: %s") % name) - if not isinstance(name, str): - raise LintError(_("Tag name is not a string: %r") % name) - if not name: - raise LintError(_("Empty tag name")) - if type == "StartTag" and name in voidElements: - raise LintError(_("Void element reported as StartTag token: %s") % name) - elif type == "EmptyTag" and name not in voidElements: - raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"]) - if type == "StartTag": - open_elements.append(name) - for name, value in token["data"]: - if not isinstance(name, str): - raise LintError(_("Attribute name is not a string: %r") % name) - if not name: - raise LintError(_("Empty attribute name")) - if not isinstance(value, str): - raise LintError(_("Attribute value is not a string: %r") % value) - if name in cdataElements: - contentModelFlag = "CDATA" - elif name in rcdataElements: - contentModelFlag = "RCDATA" - elif name == "plaintext": - contentModelFlag = "PLAINTEXT" - - elif type == "EndTag": - name = token["name"] - if not isinstance(name, str): - raise LintError(_("Tag name is not a string: %r") % name) - if not name: - raise LintError(_("Empty tag name")) - if name in voidElements: - raise LintError(_("Void element reported as EndTag token: %s") % name) - start_name = open_elements.pop() - if start_name != name: - raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name)) - contentModelFlag = "PCDATA" - - elif type == "Comment": - if contentModelFlag != "PCDATA": - raise LintError(_("Comment not in PCDATA content model flag")) - - elif type in ("Characters", "SpaceCharacters"): - data = token["data"] - if not isinstance(data, str): - raise LintError(_("Attribute name is not a string: %r") % data) - if not data: - raise LintError(_("%s token with empty data") % type) - if type == "SpaceCharacters": - data = data.strip(spaceCharacters) - if data: - raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data) - - elif type == "Doctype": - name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError(_("Doctype not in PCDATA content model flag: %s") % name) - if not isinstance(name, str): - raise LintError(_("Tag name is not a string: %r") % name) - # XXX: what to do with token["data"] ? - - elif type in ("ParseError", "SerializeError"): - pass - - else: - raise LintError(_("Unknown token type: %s") % type) - - yield token diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py deleted file mode 100644 index fefe0b3..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base - - -class Filter(_base.Filter): - def slider(self): - previous1 = previous2 = None - for token in self.source: - if previous1 is not None: - yield previous2, previous1, token - previous2 = previous1 - previous1 = token - yield previous2, previous1, None - - def __iter__(self): - for previous, token, next in self.slider(): - type = token["type"] - if type == "StartTag": - if (token["data"] or - not self.is_optional_start(token["name"], previous, next)): - yield token - elif type == "EndTag": - if not self.is_optional_end(token["name"], next): - yield token - else: - yield token - - def is_optional_start(self, tagname, previous, next): - type = next and next["type"] or None - if tagname in 'html': - # An html element's start tag may be omitted if the first thing - # inside the html element is not a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname == 'head': - # A head element's start tag may be omitted if the first thing - # inside the head element is an element. - # XXX: we also omit the start tag if the head element is empty - if type in ("StartTag", "EmptyTag"): - return True - elif type == "EndTag": - return next["name"] == "head" - elif tagname == 'body': - # A body element's start tag may be omitted if the first thing - # inside the body element is not a space character or a comment, - # except if the first thing inside the body element is a script - # or style element and the node immediately preceding the body - # element is a head element whose end tag has been omitted. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we do not look at the preceding event, so we never omit - # the body element's start tag if it's followed by a script or - # a style element. - return next["name"] not in ('script', 'style') - else: - return True - elif tagname == 'colgroup': - # A colgroup element's start tag may be omitted if the first thing - # inside the colgroup element is a col element, and if the element - # is not immediately preceeded by another colgroup element whose - # end tag has been omitted. - if type in ("StartTag", "EmptyTag"): - # XXX: we do not look at the preceding event, so instead we never - # omit the colgroup element's end tag when it is immediately - # followed by another colgroup element. See is_optional_end. - return next["name"] == "col" - else: - return False - elif tagname == 'tbody': - # A tbody element's start tag may be omitted if the first thing - # inside the tbody element is a tr element, and if the element is - # not immediately preceeded by a tbody, thead, or tfoot element - # whose end tag has been omitted. - if type == "StartTag": - # omit the thead and tfoot elements' end tag when they are - # immediately followed by a tbody element. See is_optional_end. - if previous and previous['type'] == 'EndTag' and \ - previous['name'] in ('tbody', 'thead', 'tfoot'): - return False - return next["name"] == 'tr' - else: - return False - return False - - def is_optional_end(self, tagname, next): - type = next and next["type"] or None - if tagname in ('html', 'head', 'body'): - # An html element's end tag may be omitted if the html element - # is not immediately followed by a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname in ('li', 'optgroup', 'tr'): - # A li element's end tag may be omitted if the li element is - # immediately followed by another li element or if there is - # no more content in the parent element. - # An optgroup element's end tag may be omitted if the optgroup - # element is immediately followed by another optgroup element, - # or if there is no more content in the parent element. - # A tr element's end tag may be omitted if the tr element is - # immediately followed by another tr element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] == tagname - else: - return type == "EndTag" or type is None - elif tagname in ('dt', 'dd'): - # A dt element's end tag may be omitted if the dt element is - # immediately followed by another dt element or a dd element. - # A dd element's end tag may be omitted if the dd element is - # immediately followed by another dd element or a dt element, - # or if there is no more content in the parent element. - if type == "StartTag": - return next["name"] in ('dt', 'dd') - elif tagname == 'dd': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'p': - # A p element's end tag may be omitted if the p element is - # immediately followed by an address, article, aside, - # blockquote, datagrid, dialog, dir, div, dl, fieldset, - # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, - # nav, ol, p, pre, section, table, or ul, element, or if - # there is no more content in the parent element. - if type in ("StartTag", "EmptyTag"): - return next["name"] in ('address', 'article', 'aside', - 'blockquote', 'datagrid', 'dialog', - 'dir', 'div', 'dl', 'fieldset', 'footer', - 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'header', 'hr', 'menu', 'nav', 'ol', - 'p', 'pre', 'section', 'table', 'ul') - else: - return type == "EndTag" or type is None - elif tagname == 'option': - # An option element's end tag may be omitted if the option - # element is immediately followed by another option element, - # or if it is immediately followed by an <code>optgroup</code> - # element, or if there is no more content in the parent - # element. - if type == "StartTag": - return next["name"] in ('option', 'optgroup') - else: - return type == "EndTag" or type is None - elif tagname in ('rt', 'rp'): - # An rt element's end tag may be omitted if the rt element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - # An rp element's end tag may be omitted if the rp element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('rt', 'rp') - else: - return type == "EndTag" or type is None - elif tagname == 'colgroup': - # A colgroup element's end tag may be omitted if the colgroup - # element is not immediately followed by a space character or - # a comment. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we also look for an immediately following colgroup - # element. See is_optional_start. - return next["name"] != 'colgroup' - else: - return True - elif tagname in ('thead', 'tbody'): - # A thead element's end tag may be omitted if the thead element - # is immediately followed by a tbody or tfoot element. - # A tbody element's end tag may be omitted if the tbody element - # is immediately followed by a tbody or tfoot element, or if - # there is no more content in the parent element. - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] in ['tbody', 'tfoot'] - elif tagname == 'tbody': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'tfoot': - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] == 'tbody' - else: - return type == "EndTag" or type is None - elif tagname in ('td', 'th'): - # A td element's end tag may be omitted if the td element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - # A th element's end tag may be omitted if the th element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('td', 'th') - else: - return type == "EndTag" or type is None - return False diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py deleted file mode 100644 index b206b54..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base -from ..sanitizer import HTMLSanitizerMixin - - -class Filter(_base.Filter, HTMLSanitizerMixin): - def __iter__(self): - for token in _base.Filter.__iter__(self): - token = self.sanitize_token(token) - if token: - yield token diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py deleted file mode 100644 index dfc60ee..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re - -from . import _base -from ..constants import rcdataElements, spaceCharacters -spaceCharacters = "".join(spaceCharacters) - -SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) - - -class Filter(_base.Filter): - - spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) - - def __iter__(self): - preserve = 0 - for token in _base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag" \ - and (preserve or token["name"] in self.spacePreserveElements): - preserve += 1 - - elif type == "EndTag" and preserve: - preserve -= 1 - - elif not preserve and type == "SpaceCharacters" and token["data"]: - # Test on token["data"] above to not introduce spaces where there were not - token["data"] = " " - - elif not preserve and type == "Characters": - token["data"] = collapse_spaces(token["data"]) - - yield token - - -def collapse_spaces(text): - return SPACES_REGEX.sub(' ', text) |