aboutsummaryrefslogtreecommitdiffstats
path: root/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters
diff options
context:
space:
mode:
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters')
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py0
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py12
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py20
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py65
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py93
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py205
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py12
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py38
8 files changed, 0 insertions, 445 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py
+++ /dev/null
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py
deleted file mode 100644
index c7dbaed..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-class Filter(object):
- def __init__(self, source):
- self.source = source
-
- def __iter__(self):
- return iter(self.source)
-
- def __getattr__(self, name):
- return getattr(self.source, name)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py
deleted file mode 100644
index fed6996..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-try:
- from collections import OrderedDict
-except ImportError:
- from ordereddict import OrderedDict
-
-
-class Filter(_base.Filter):
- def __iter__(self):
- for token in _base.Filter.__iter__(self):
- if token["type"] in ("StartTag", "EmptyTag"):
- attrs = OrderedDict()
- for name, value in sorted(token["data"].items(),
- key=lambda x: x[0]):
- attrs[name] = value
- token["data"] = attrs
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py
deleted file mode 100644
index ca33b70..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-
-class Filter(_base.Filter):
- def __init__(self, source, encoding):
- _base.Filter.__init__(self, source)
- self.encoding = encoding
-
- def __iter__(self):
- state = "pre_head"
- meta_found = (self.encoding is None)
- pending = []
-
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type == "StartTag":
- if token["name"].lower() == "head":
- state = "in_head"
-
- elif type == "EmptyTag":
- if token["name"].lower() == "meta":
- # replace charset with actual encoding
- has_http_equiv_content_type = False
- for (namespace, name), value in token["data"].items():
- if namespace is not None:
- continue
- elif name.lower() == 'charset':
- token["data"][(namespace, name)] = self.encoding
- meta_found = True
- break
- elif name == 'http-equiv' and value.lower() == 'content-type':
- has_http_equiv_content_type = True
- else:
- if has_http_equiv_content_type and (None, "content") in token["data"]:
- token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
- meta_found = True
-
- elif token["name"].lower() == "head" and not meta_found:
- # insert meta into empty head
- yield {"type": "StartTag", "name": "head",
- "data": token["data"]}
- yield {"type": "EmptyTag", "name": "meta",
- "data": {(None, "charset"): self.encoding}}
- yield {"type": "EndTag", "name": "head"}
- meta_found = True
- continue
-
- elif type == "EndTag":
- if token["name"].lower() == "head" and pending:
- # insert meta into head (if necessary) and flush pending queue
- yield pending.pop(0)
- if not meta_found:
- yield {"type": "EmptyTag", "name": "meta",
- "data": {(None, "charset"): self.encoding}}
- while pending:
- yield pending.pop(0)
- meta_found = True
- state = "post_head"
-
- if state == "in_head":
- pending.append(token)
- else:
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py
deleted file mode 100644
index 83ad639..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from gettext import gettext
-_ = gettext
-
-from . import _base
-from ..constants import cdataElements, rcdataElements, voidElements
-
-from ..constants import spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-class LintError(Exception):
- pass
-
-
-class Filter(_base.Filter):
- def __iter__(self):
- open_elements = []
- contentModelFlag = "PCDATA"
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type in ("StartTag", "EmptyTag"):
- name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
- if not name:
- raise LintError(_("Empty tag name"))
- if type == "StartTag" and name in voidElements:
- raise LintError(_("Void element reported as StartTag token: %s") % name)
- elif type == "EmptyTag" and name not in voidElements:
- raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"])
- if type == "StartTag":
- open_elements.append(name)
- for name, value in token["data"]:
- if not isinstance(name, str):
- raise LintError(_("Attribute name is not a string: %r") % name)
- if not name:
- raise LintError(_("Empty attribute name"))
- if not isinstance(value, str):
- raise LintError(_("Attribute value is not a string: %r") % value)
- if name in cdataElements:
- contentModelFlag = "CDATA"
- elif name in rcdataElements:
- contentModelFlag = "RCDATA"
- elif name == "plaintext":
- contentModelFlag = "PLAINTEXT"
-
- elif type == "EndTag":
- name = token["name"]
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
- if not name:
- raise LintError(_("Empty tag name"))
- if name in voidElements:
- raise LintError(_("Void element reported as EndTag token: %s") % name)
- start_name = open_elements.pop()
- if start_name != name:
- raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name))
- contentModelFlag = "PCDATA"
-
- elif type == "Comment":
- if contentModelFlag != "PCDATA":
- raise LintError(_("Comment not in PCDATA content model flag"))
-
- elif type in ("Characters", "SpaceCharacters"):
- data = token["data"]
- if not isinstance(data, str):
- raise LintError(_("Attribute name is not a string: %r") % data)
- if not data:
- raise LintError(_("%s token with empty data") % type)
- if type == "SpaceCharacters":
- data = data.strip(spaceCharacters)
- if data:
- raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data)
-
- elif type == "Doctype":
- name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
- # XXX: what to do with token["data"] ?
-
- elif type in ("ParseError", "SerializeError"):
- pass
-
- else:
- raise LintError(_("Unknown token type: %s") % type)
-
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py
deleted file mode 100644
index fefe0b3..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py
+++ /dev/null
@@ -1,205 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-
-class Filter(_base.Filter):
- def slider(self):
- previous1 = previous2 = None
- for token in self.source:
- if previous1 is not None:
- yield previous2, previous1, token
- previous2 = previous1
- previous1 = token
- yield previous2, previous1, None
-
- def __iter__(self):
- for previous, token, next in self.slider():
- type = token["type"]
- if type == "StartTag":
- if (token["data"] or
- not self.is_optional_start(token["name"], previous, next)):
- yield token
- elif type == "EndTag":
- if not self.is_optional_end(token["name"], next):
- yield token
- else:
- yield token
-
- def is_optional_start(self, tagname, previous, next):
- type = next and next["type"] or None
- if tagname in 'html':
- # An html element's start tag may be omitted if the first thing
- # inside the html element is not a space character or a comment.
- return type not in ("Comment", "SpaceCharacters")
- elif tagname == 'head':
- # A head element's start tag may be omitted if the first thing
- # inside the head element is an element.
- # XXX: we also omit the start tag if the head element is empty
- if type in ("StartTag", "EmptyTag"):
- return True
- elif type == "EndTag":
- return next["name"] == "head"
- elif tagname == 'body':
- # A body element's start tag may be omitted if the first thing
- # inside the body element is not a space character or a comment,
- # except if the first thing inside the body element is a script
- # or style element and the node immediately preceding the body
- # element is a head element whose end tag has been omitted.
- if type in ("Comment", "SpaceCharacters"):
- return False
- elif type == "StartTag":
- # XXX: we do not look at the preceding event, so we never omit
- # the body element's start tag if it's followed by a script or
- # a style element.
- return next["name"] not in ('script', 'style')
- else:
- return True
- elif tagname == 'colgroup':
- # A colgroup element's start tag may be omitted if the first thing
- # inside the colgroup element is a col element, and if the element
- # is not immediately preceeded by another colgroup element whose
- # end tag has been omitted.
- if type in ("StartTag", "EmptyTag"):
- # XXX: we do not look at the preceding event, so instead we never
- # omit the colgroup element's end tag when it is immediately
- # followed by another colgroup element. See is_optional_end.
- return next["name"] == "col"
- else:
- return False
- elif tagname == 'tbody':
- # A tbody element's start tag may be omitted if the first thing
- # inside the tbody element is a tr element, and if the element is
- # not immediately preceeded by a tbody, thead, or tfoot element
- # whose end tag has been omitted.
- if type == "StartTag":
- # omit the thead and tfoot elements' end tag when they are
- # immediately followed by a tbody element. See is_optional_end.
- if previous and previous['type'] == 'EndTag' and \
- previous['name'] in ('tbody', 'thead', 'tfoot'):
- return False
- return next["name"] == 'tr'
- else:
- return False
- return False
-
- def is_optional_end(self, tagname, next):
- type = next and next["type"] or None
- if tagname in ('html', 'head', 'body'):
- # An html element's end tag may be omitted if the html element
- # is not immediately followed by a space character or a comment.
- return type not in ("Comment", "SpaceCharacters")
- elif tagname in ('li', 'optgroup', 'tr'):
- # A li element's end tag may be omitted if the li element is
- # immediately followed by another li element or if there is
- # no more content in the parent element.
- # An optgroup element's end tag may be omitted if the optgroup
- # element is immediately followed by another optgroup element,
- # or if there is no more content in the parent element.
- # A tr element's end tag may be omitted if the tr element is
- # immediately followed by another tr element, or if there is
- # no more content in the parent element.
- if type == "StartTag":
- return next["name"] == tagname
- else:
- return type == "EndTag" or type is None
- elif tagname in ('dt', 'dd'):
- # A dt element's end tag may be omitted if the dt element is
- # immediately followed by another dt element or a dd element.
- # A dd element's end tag may be omitted if the dd element is
- # immediately followed by another dd element or a dt element,
- # or if there is no more content in the parent element.
- if type == "StartTag":
- return next["name"] in ('dt', 'dd')
- elif tagname == 'dd':
- return type == "EndTag" or type is None
- else:
- return False
- elif tagname == 'p':
- # A p element's end tag may be omitted if the p element is
- # immediately followed by an address, article, aside,
- # blockquote, datagrid, dialog, dir, div, dl, fieldset,
- # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
- # nav, ol, p, pre, section, table, or ul, element, or if
- # there is no more content in the parent element.
- if type in ("StartTag", "EmptyTag"):
- return next["name"] in ('address', 'article', 'aside',
- 'blockquote', 'datagrid', 'dialog',
- 'dir', 'div', 'dl', 'fieldset', 'footer',
- 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
- 'header', 'hr', 'menu', 'nav', 'ol',
- 'p', 'pre', 'section', 'table', 'ul')
- else:
- return type == "EndTag" or type is None
- elif tagname == 'option':
- # An option element's end tag may be omitted if the option
- # element is immediately followed by another option element,
- # or if it is immediately followed by an <code>optgroup</code>
- # element, or if there is no more content in the parent
- # element.
- if type == "StartTag":
- return next["name"] in ('option', 'optgroup')
- else:
- return type == "EndTag" or type is None
- elif tagname in ('rt', 'rp'):
- # An rt element's end tag may be omitted if the rt element is
- # immediately followed by an rt or rp element, or if there is
- # no more content in the parent element.
- # An rp element's end tag may be omitted if the rp element is
- # immediately followed by an rt or rp element, or if there is
- # no more content in the parent element.
- if type == "StartTag":
- return next["name"] in ('rt', 'rp')
- else:
- return type == "EndTag" or type is None
- elif tagname == 'colgroup':
- # A colgroup element's end tag may be omitted if the colgroup
- # element is not immediately followed by a space character or
- # a comment.
- if type in ("Comment", "SpaceCharacters"):
- return False
- elif type == "StartTag":
- # XXX: we also look for an immediately following colgroup
- # element. See is_optional_start.
- return next["name"] != 'colgroup'
- else:
- return True
- elif tagname in ('thead', 'tbody'):
- # A thead element's end tag may be omitted if the thead element
- # is immediately followed by a tbody or tfoot element.
- # A tbody element's end tag may be omitted if the tbody element
- # is immediately followed by a tbody or tfoot element, or if
- # there is no more content in the parent element.
- # A tfoot element's end tag may be omitted if the tfoot element
- # is immediately followed by a tbody element, or if there is no
- # more content in the parent element.
- # XXX: we never omit the end tag when the following element is
- # a tbody. See is_optional_start.
- if type == "StartTag":
- return next["name"] in ['tbody', 'tfoot']
- elif tagname == 'tbody':
- return type == "EndTag" or type is None
- else:
- return False
- elif tagname == 'tfoot':
- # A tfoot element's end tag may be omitted if the tfoot element
- # is immediately followed by a tbody element, or if there is no
- # more content in the parent element.
- # XXX: we never omit the end tag when the following element is
- # a tbody. See is_optional_start.
- if type == "StartTag":
- return next["name"] == 'tbody'
- else:
- return type == "EndTag" or type is None
- elif tagname in ('td', 'th'):
- # A td element's end tag may be omitted if the td element is
- # immediately followed by a td or th element, or if there is
- # no more content in the parent element.
- # A th element's end tag may be omitted if the th element is
- # immediately followed by a td or th element, or if there is
- # no more content in the parent element.
- if type == "StartTag":
- return next["name"] in ('td', 'th')
- else:
- return type == "EndTag" or type is None
- return False
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
deleted file mode 100644
index b206b54..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-from ..sanitizer import HTMLSanitizerMixin
-
-
-class Filter(_base.Filter, HTMLSanitizerMixin):
- def __iter__(self):
- for token in _base.Filter.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py
deleted file mode 100644
index dfc60ee..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-
-from . import _base
-from ..constants import rcdataElements, spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
-
-
-class Filter(_base.Filter):
-
- spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
-
- def __iter__(self):
- preserve = 0
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type == "StartTag" \
- and (preserve or token["name"] in self.spacePreserveElements):
- preserve += 1
-
- elif type == "EndTag" and preserve:
- preserve -= 1
-
- elif not preserve and type == "SpaceCharacters" and token["data"]:
- # Test on token["data"] above to not introduce spaces where there were not
- token["data"] = " "
-
- elif not preserve and type == "Characters":
- token["data"] = collapse_spaces(token["data"])
-
- yield token
-
-
-def collapse_spaces(text):
- return SPACES_REGEX.sub(' ', text)