summaryrefslogtreecommitdiffstats
path: root/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib
diff options
context:
space:
mode:
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib')
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/__init__.py23
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/constants.py3086
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py0
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py12
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py20
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py65
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py93
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py205
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py12
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py38
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/html5parser.py2725
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/ihatexml.py285
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/inputstream.py905
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/sanitizer.py271
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py16
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py309
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/tokenizer.py1731
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py76
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py377
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py290
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py337
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py369
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py57
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py196
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py46
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py131
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py69
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py208
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py63
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/__init__.py12
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/_base.py37
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/datrie.py44
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/py.py67
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/utils.py78
34 files changed, 0 insertions, 12253 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/__init__.py
deleted file mode 100644
index 10e2b74..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-HTML parsing library based on the WHATWG "HTML5"
-specification. The parser is designed to be compatible with existing
-HTML found in the wild and implements well-defined error recovery that
-is largely compatible with modern desktop web browsers.
-
-Example usage:
-
-import html5lib
-f = open("my_document.html")
-tree = html5lib.parse(f)
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from .html5parser import HTMLParser, parse, parseFragment
-from .treebuilders import getTreeBuilder
-from .treewalkers import getTreeWalker
-from .serializer import serialize
-
-__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
- "getTreeWalker", "serialize"]
-__version__ = "1.0b1"
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/constants.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/constants.py
deleted file mode 100644
index 1866dd7..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/constants.py
+++ /dev/null
@@ -1,3086 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import string
-import gettext
-_ = gettext.gettext
-
-EOF = None
-
-E = {
- "null-character":
- _("Null character in input stream, replaced with U+FFFD."),
- "invalid-codepoint":
- _("Invalid codepoint in stream."),
- "incorrectly-placed-solidus":
- _("Solidus (/) incorrectly placed in tag."),
- "incorrect-cr-newline-entity":
- _("Incorrect CR newline entity, replaced with LF."),
- "illegal-windows-1252-entity":
- _("Entity used with illegal number (windows-1252 reference)."),
- "cant-convert-numeric-entity":
- _("Numeric entity couldn't be converted to character "
- "(codepoint U+%(charAsInt)08x)."),
- "illegal-codepoint-for-numeric-entity":
- _("Numeric entity represents an illegal codepoint: "
- "U+%(charAsInt)08x."),
- "numeric-entity-without-semicolon":
- _("Numeric entity didn't end with ';'."),
- "expected-numeric-entity-but-got-eof":
- _("Numeric entity expected. Got end of file instead."),
- "expected-numeric-entity":
- _("Numeric entity expected but none found."),
- "named-entity-without-semicolon":
- _("Named entity didn't end with ';'."),
- "expected-named-entity":
- _("Named entity expected. Got none."),
- "attributes-in-end-tag":
- _("End tag contains unexpected attributes."),
- 'self-closing-flag-on-end-tag':
- _("End tag contains unexpected self-closing flag."),
- "expected-tag-name-but-got-right-bracket":
- _("Expected tag name. Got '>' instead."),
- "expected-tag-name-but-got-question-mark":
- _("Expected tag name. Got '?' instead. (HTML doesn't "
- "support processing instructions.)"),
- "expected-tag-name":
- _("Expected tag name. Got something else instead"),
- "expected-closing-tag-but-got-right-bracket":
- _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
- "expected-closing-tag-but-got-eof":
- _("Expected closing tag. Unexpected end of file."),
- "expected-closing-tag-but-got-char":
- _("Expected closing tag. Unexpected character '%(data)s' found."),
- "eof-in-tag-name":
- _("Unexpected end of file in the tag name."),
- "expected-attribute-name-but-got-eof":
- _("Unexpected end of file. Expected attribute name instead."),
- "eof-in-attribute-name":
- _("Unexpected end of file in attribute name."),
- "invalid-character-in-attribute-name":
- _("Invalid character in attribute name"),
- "duplicate-attribute":
- _("Dropped duplicate attribute on tag."),
- "expected-end-of-tag-name-but-got-eof":
- _("Unexpected end of file. Expected = or end of tag."),
- "expected-attribute-value-but-got-eof":
- _("Unexpected end of file. Expected attribute value."),
- "expected-attribute-value-but-got-right-bracket":
- _("Expected attribute value. Got '>' instead."),
- 'equals-in-unquoted-attribute-value':
- _("Unexpected = in unquoted attribute"),
- 'unexpected-character-in-unquoted-attribute-value':
- _("Unexpected character in unquoted attribute"),
- "invalid-character-after-attribute-name":
- _("Unexpected character after attribute name."),
- "unexpected-character-after-attribute-value":
- _("Unexpected character after attribute value."),
- "eof-in-attribute-value-double-quote":
- _("Unexpected end of file in attribute value (\")."),
- "eof-in-attribute-value-single-quote":
- _("Unexpected end of file in attribute value (')."),
- "eof-in-attribute-value-no-quotes":
- _("Unexpected end of file in attribute value."),
- "unexpected-EOF-after-solidus-in-tag":
- _("Unexpected end of file in tag. Expected >"),
- "unexpected-character-after-solidus-in-tag":
- _("Unexpected character after / in tag. Expected >"),
- "expected-dashes-or-doctype":
- _("Expected '--' or 'DOCTYPE'. Not found."),
- "unexpected-bang-after-double-dash-in-comment":
- _("Unexpected ! after -- in comment"),
- "unexpected-space-after-double-dash-in-comment":
- _("Unexpected space after -- in comment"),
- "incorrect-comment":
- _("Incorrect comment."),
- "eof-in-comment":
- _("Unexpected end of file in comment."),
- "eof-in-comment-end-dash":
- _("Unexpected end of file in comment (-)"),
- "unexpected-dash-after-double-dash-in-comment":
- _("Unexpected '-' after '--' found in comment."),
- "eof-in-comment-double-dash":
- _("Unexpected end of file in comment (--)."),
- "eof-in-comment-end-space-state":
- _("Unexpected end of file in comment."),
- "eof-in-comment-end-bang-state":
- _("Unexpected end of file in comment."),
- "unexpected-char-in-comment":
- _("Unexpected character in comment found."),
- "need-space-after-doctype":
- _("No space after literal string 'DOCTYPE'."),
- "expected-doctype-name-but-got-right-bracket":
- _("Unexpected > character. Expected DOCTYPE name."),
- "expected-doctype-name-but-got-eof":
- _("Unexpected end of file. Expected DOCTYPE name."),
- "eof-in-doctype-name":
- _("Unexpected end of file in DOCTYPE name."),
- "eof-in-doctype":
- _("Unexpected end of file in DOCTYPE."),
- "expected-space-or-right-bracket-in-doctype":
- _("Expected space or '>'. Got '%(data)s'"),
- "unexpected-end-of-doctype":
- _("Unexpected end of DOCTYPE."),
- "unexpected-char-in-doctype":
- _("Unexpected character in DOCTYPE."),
- "eof-in-innerhtml":
- _("XXX innerHTML EOF"),
- "unexpected-doctype":
- _("Unexpected DOCTYPE. Ignored."),
- "non-html-root":
- _("html needs to be the first start tag."),
- "expected-doctype-but-got-eof":
- _("Unexpected End of file. Expected DOCTYPE."),
- "unknown-doctype":
- _("Erroneous DOCTYPE."),
- "expected-doctype-but-got-chars":
- _("Unexpected non-space characters. Expected DOCTYPE."),
- "expected-doctype-but-got-start-tag":
- _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
- "expected-doctype-but-got-end-tag":
- _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
- "end-tag-after-implied-root":
- _("Unexpected end tag (%(name)s) after the (implied) root element."),
- "expected-named-closing-tag-but-got-eof":
- _("Unexpected end of file. Expected end tag (%(name)s)."),
- "two-heads-are-not-better-than-one":
- _("Unexpected start tag head in existing head. Ignored."),
- "unexpected-end-tag":
- _("Unexpected end tag (%(name)s). Ignored."),
- "unexpected-start-tag-out-of-my-head":
- _("Unexpected start tag (%(name)s) that can be in head. Moved."),
- "unexpected-start-tag":
- _("Unexpected start tag (%(name)s)."),
- "missing-end-tag":
- _("Missing end tag (%(name)s)."),
- "missing-end-tags":
- _("Missing end tags (%(name)s)."),
- "unexpected-start-tag-implies-end-tag":
- _("Unexpected start tag (%(startName)s) "
- "implies end tag (%(endName)s)."),
- "unexpected-start-tag-treated-as":
- _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
- "deprecated-tag":
- _("Unexpected start tag %(name)s. Don't use it!"),
- "unexpected-start-tag-ignored":
- _("Unexpected start tag %(name)s. Ignored."),
- "expected-one-end-tag-but-got-another":
- _("Unexpected end tag (%(gotName)s). "
- "Missing end tag (%(expectedName)s)."),
- "end-tag-too-early":
- _("End tag (%(name)s) seen too early. Expected other end tag."),
- "end-tag-too-early-named":
- _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
- "end-tag-too-early-ignored":
- _("End tag (%(name)s) seen too early. Ignored."),
- "adoption-agency-1.1":
- _("End tag (%(name)s) violates step 1, "
- "paragraph 1 of the adoption agency algorithm."),
- "adoption-agency-1.2":
- _("End tag (%(name)s) violates step 1, "
- "paragraph 2 of the adoption agency algorithm."),
- "adoption-agency-1.3":
- _("End tag (%(name)s) violates step 1, "
- "paragraph 3 of the adoption agency algorithm."),
- "adoption-agency-4.4":
- _("End tag (%(name)s) violates step 4, "
- "paragraph 4 of the adoption agency algorithm."),
- "unexpected-end-tag-treated-as":
- _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
- "no-end-tag":
- _("This element (%(name)s) has no end tag."),
- "unexpected-implied-end-tag-in-table":
- _("Unexpected implied end tag (%(name)s) in the table phase."),
- "unexpected-implied-end-tag-in-table-body":
- _("Unexpected implied end tag (%(name)s) in the table body phase."),
- "unexpected-char-implies-table-voodoo":
- _("Unexpected non-space characters in "
- "table context caused voodoo mode."),
- "unexpected-hidden-input-in-table":
- _("Unexpected input with type hidden in table context."),
- "unexpected-form-in-table":
- _("Unexpected form in table context."),
- "unexpected-start-tag-implies-table-voodoo":
- _("Unexpected start tag (%(name)s) in "
- "table context caused voodoo mode."),
- "unexpected-end-tag-implies-table-voodoo":
- _("Unexpected end tag (%(name)s) in "
- "table context caused voodoo mode."),
- "unexpected-cell-in-table-body":
- _("Unexpected table cell start tag (%(name)s) "
- "in the table body phase."),
- "unexpected-cell-end-tag":
- _("Got table cell end tag (%(name)s) "
- "while required end tags are missing."),
- "unexpected-end-tag-in-table-body":
- _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
- "unexpected-implied-end-tag-in-table-row":
- _("Unexpected implied end tag (%(name)s) in the table row phase."),
- "unexpected-end-tag-in-table-row":
- _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
- "unexpected-select-in-select":
- _("Unexpected select start tag in the select phase "
- "treated as select end tag."),
- "unexpected-input-in-select":
- _("Unexpected input start tag in the select phase."),
- "unexpected-start-tag-in-select":
- _("Unexpected start tag token (%(name)s in the select phase. "
- "Ignored."),
- "unexpected-end-tag-in-select":
- _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
- "unexpected-table-element-start-tag-in-select-in-table":
- _("Unexpected table element start tag (%(name)s) in the select in table phase."),
- "unexpected-table-element-end-tag-in-select-in-table":
- _("Unexpected table element end tag (%(name)s) in the select in table phase."),
- "unexpected-char-after-body":
- _("Unexpected non-space characters in the after body phase."),
- "unexpected-start-tag-after-body":
- _("Unexpected start tag token (%(name)s)"
- " in the after body phase."),
- "unexpected-end-tag-after-body":
- _("Unexpected end tag token (%(name)s)"
- " in the after body phase."),
- "unexpected-char-in-frameset":
- _("Unexpected characters in the frameset phase. Characters ignored."),
- "unexpected-start-tag-in-frameset":
- _("Unexpected start tag token (%(name)s)"
- " in the frameset phase. Ignored."),
- "unexpected-frameset-in-frameset-innerhtml":
- _("Unexpected end tag token (frameset) "
- "in the frameset phase (innerHTML)."),
- "unexpected-end-tag-in-frameset":
- _("Unexpected end tag token (%(name)s)"
- " in the frameset phase. Ignored."),
- "unexpected-char-after-frameset":
- _("Unexpected non-space characters in the "
- "after frameset phase. Ignored."),
- "unexpected-start-tag-after-frameset":
- _("Unexpected start tag (%(name)s)"
- " in the after frameset phase. Ignored."),
- "unexpected-end-tag-after-frameset":
- _("Unexpected end tag (%(name)s)"
- " in the after frameset phase. Ignored."),
- "unexpected-end-tag-after-body-innerhtml":
- _("Unexpected end tag after body(innerHtml)"),
- "expected-eof-but-got-char":
- _("Unexpected non-space characters. Expected end of file."),
- "expected-eof-but-got-start-tag":
- _("Unexpected start tag (%(name)s)"
- ". Expected end of file."),
- "expected-eof-but-got-end-tag":
- _("Unexpected end tag (%(name)s)"
- ". Expected end of file."),
- "eof-in-table":
- _("Unexpected end of file. Expected table content."),
- "eof-in-select":
- _("Unexpected end of file. Expected select content."),
- "eof-in-frameset":
- _("Unexpected end of file. Expected frameset content."),
- "eof-in-script-in-script":
- _("Unexpected end of file. Expected script content."),
- "eof-in-foreign-lands":
- _("Unexpected end of file. Expected foreign content"),
- "non-void-element-with-trailing-solidus":
- _("Trailing solidus not allowed on element %(name)s"),
- "unexpected-html-element-in-foreign-content":
- _("Element %(name)s not allowed in a non-html context"),
- "unexpected-end-tag-before-html":
- _("Unexpected end tag (%(name)s) before html."),
- "XXX-undefined-error":
- _("Undefined error (this sucks and should be fixed)"),
-}
-
-namespaces = {
- "html": "http://www.w3.org/1999/xhtml",
- "mathml": "http://www.w3.org/1998/Math/MathML",
- "svg": "http://www.w3.org/2000/svg",
- "xlink": "http://www.w3.org/1999/xlink",
- "xml": "http://www.w3.org/XML/1998/namespace",
- "xmlns": "http://www.w3.org/2000/xmlns/"
-}
-
-scopingElements = frozenset((
- (namespaces["html"], "applet"),
- (namespaces["html"], "caption"),
- (namespaces["html"], "html"),
- (namespaces["html"], "marquee"),
- (namespaces["html"], "object"),
- (namespaces["html"], "table"),
- (namespaces["html"], "td"),
- (namespaces["html"], "th"),
- (namespaces["mathml"], "mi"),
- (namespaces["mathml"], "mo"),
- (namespaces["mathml"], "mn"),
- (namespaces["mathml"], "ms"),
- (namespaces["mathml"], "mtext"),
- (namespaces["mathml"], "annotation-xml"),
- (namespaces["svg"], "foreignObject"),
- (namespaces["svg"], "desc"),
- (namespaces["svg"], "title"),
-))
-
-formattingElements = frozenset((
- (namespaces["html"], "a"),
- (namespaces["html"], "b"),
- (namespaces["html"], "big"),
- (namespaces["html"], "code"),
- (namespaces["html"], "em"),
- (namespaces["html"], "font"),
- (namespaces["html"], "i"),
- (namespaces["html"], "nobr"),
- (namespaces["html"], "s"),
- (namespaces["html"], "small"),
- (namespaces["html"], "strike"),
- (namespaces["html"], "strong"),
- (namespaces["html"], "tt"),
- (namespaces["html"], "u")
-))
-
-specialElements = frozenset((
- (namespaces["html"], "address"),
- (namespaces["html"], "applet"),
- (namespaces["html"], "area"),
- (namespaces["html"], "article"),
- (namespaces["html"], "aside"),
- (namespaces["html"], "base"),
- (namespaces["html"], "basefont"),
- (namespaces["html"], "bgsound"),
- (namespaces["html"], "blockquote"),
- (namespaces["html"], "body"),
- (namespaces["html"], "br"),
- (namespaces["html"], "button"),
- (namespaces["html"], "caption"),
- (namespaces["html"], "center"),
- (namespaces["html"], "col"),
- (namespaces["html"], "colgroup"),
- (namespaces["html"], "command"),
- (namespaces["html"], "dd"),
- (namespaces["html"], "details"),
- (namespaces["html"], "dir"),
- (namespaces["html"], "div"),
- (namespaces["html"], "dl"),
- (namespaces["html"], "dt"),
- (namespaces["html"], "embed"),
- (namespaces["html"], "fieldset"),
- (namespaces["html"], "figure"),
- (namespaces["html"], "footer"),
- (namespaces["html"], "form"),
- (namespaces["html"], "frame"),
- (namespaces["html"], "frameset"),
- (namespaces["html"], "h1"),
- (namespaces["html"], "h2"),
- (namespaces["html"], "h3"),
- (namespaces["html"], "h4"),
- (namespaces["html"], "h5"),
- (namespaces["html"], "h6"),
- (namespaces["html"], "head"),
- (namespaces["html"], "header"),
- (namespaces["html"], "hr"),
- (namespaces["html"], "html"),
- (namespaces["html"], "iframe"),
- # Note that image is commented out in the spec as "this isn't an
- # element that can end up on the stack, so it doesn't matter,"
- (namespaces["html"], "image"),
- (namespaces["html"], "img"),
- (namespaces["html"], "input"),
- (namespaces["html"], "isindex"),
- (namespaces["html"], "li"),
- (namespaces["html"], "link"),
- (namespaces["html"], "listing"),
- (namespaces["html"], "marquee"),
- (namespaces["html"], "menu"),
- (namespaces["html"], "meta"),
- (namespaces["html"], "nav"),
- (namespaces["html"], "noembed"),
- (namespaces["html"], "noframes"),
- (namespaces["html"], "noscript"),
- (namespaces["html"], "object"),
- (namespaces["html"], "ol"),
- (namespaces["html"], "p"),
- (namespaces["html"], "param"),
- (namespaces["html"], "plaintext"),
- (namespaces["html"], "pre"),
- (namespaces["html"], "script"),
- (namespaces["html"], "section"),
- (namespaces["html"], "select"),
- (namespaces["html"], "style"),
- (namespaces["html"], "table"),
- (namespaces["html"], "tbody"),
- (namespaces["html"], "td"),
- (namespaces["html"], "textarea"),
- (namespaces["html"], "tfoot"),
- (namespaces["html"], "th"),
- (namespaces["html"], "thead"),
- (namespaces["html"], "title"),
- (namespaces["html"], "tr"),
- (namespaces["html"], "ul"),
- (namespaces["html"], "wbr"),
- (namespaces["html"], "xmp"),
- (namespaces["svg"], "foreignObject")
-))
-
-htmlIntegrationPointElements = frozenset((
- (namespaces["mathml"], "annotaion-xml"),
- (namespaces["svg"], "foreignObject"),
- (namespaces["svg"], "desc"),
- (namespaces["svg"], "title")
-))
-
-mathmlTextIntegrationPointElements = frozenset((
- (namespaces["mathml"], "mi"),
- (namespaces["mathml"], "mo"),
- (namespaces["mathml"], "mn"),
- (namespaces["mathml"], "ms"),
- (namespaces["mathml"], "mtext")
-))
-
-spaceCharacters = frozenset((
- "\t",
- "\n",
- "\u000C",
- " ",
- "\r"
-))
-
-tableInsertModeElements = frozenset((
- "table",
- "tbody",
- "tfoot",
- "thead",
- "tr"
-))
-
-asciiLowercase = frozenset(string.ascii_lowercase)
-asciiUppercase = frozenset(string.ascii_uppercase)
-asciiLetters = frozenset(string.ascii_letters)
-digits = frozenset(string.digits)
-hexDigits = frozenset(string.hexdigits)
-
-asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
- for c in string.ascii_uppercase])
-
-# Heading elements need to be ordered
-headingElements = (
- "h1",
- "h2",
- "h3",
- "h4",
- "h5",
- "h6"
-)
-
-voidElements = frozenset((
- "base",
- "command",
- "event-source",
- "link",
- "meta",
- "hr",
- "br",
- "img",
- "embed",
- "param",
- "area",
- "col",
- "input",
- "source",
- "track"
-))
-
-cdataElements = frozenset(('title', 'textarea'))
-
-rcdataElements = frozenset((
- 'style',
- 'script',
- 'xmp',
- 'iframe',
- 'noembed',
- 'noframes',
- 'noscript'
-))
-
-booleanAttributes = {
- "": frozenset(("irrelevant",)),
- "style": frozenset(("scoped",)),
- "img": frozenset(("ismap",)),
- "audio": frozenset(("autoplay", "controls")),
- "video": frozenset(("autoplay", "controls")),
- "script": frozenset(("defer", "async")),
- "details": frozenset(("open",)),
- "datagrid": frozenset(("multiple", "disabled")),
- "command": frozenset(("hidden", "disabled", "checked", "default")),
- "hr": frozenset(("noshade")),
- "menu": frozenset(("autosubmit",)),
- "fieldset": frozenset(("disabled", "readonly")),
- "option": frozenset(("disabled", "readonly", "selected")),
- "optgroup": frozenset(("disabled", "readonly")),
- "button": frozenset(("disabled", "autofocus")),
- "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
- "select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
- "output": frozenset(("disabled", "readonly")),
-}
-
-# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
-# therefore can't be a frozenset.
-entitiesWindows1252 = (
- 8364, # 0x80 0x20AC EURO SIGN
- 65533, # 0x81 UNDEFINED
- 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
- 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
- 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
- 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
- 8224, # 0x86 0x2020 DAGGER
- 8225, # 0x87 0x2021 DOUBLE DAGGER
- 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
- 8240, # 0x89 0x2030 PER MILLE SIGN
- 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
- 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
- 65533, # 0x8D UNDEFINED
- 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
- 65533, # 0x8F UNDEFINED
- 65533, # 0x90 UNDEFINED
- 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
- 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
- 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
- 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
- 8226, # 0x95 0x2022 BULLET
- 8211, # 0x96 0x2013 EN DASH
- 8212, # 0x97 0x2014 EM DASH
- 732, # 0x98 0x02DC SMALL TILDE
- 8482, # 0x99 0x2122 TRADE MARK SIGN
- 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
- 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
- 65533, # 0x9D UNDEFINED
- 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
- 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
-)
-
-xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
-
-entities = {
- "AElig": "\xc6",
- "AElig;": "\xc6",
- "AMP": "&",
- "AMP;": "&",
- "Aacute": "\xc1",
- "Aacute;": "\xc1",
- "Abreve;": "\u0102",
- "Acirc": "\xc2",
- "Acirc;": "\xc2",
- "Acy;": "\u0410",
- "Afr;": "\U0001d504",
- "Agrave": "\xc0",
- "Agrave;": "\xc0",
- "Alpha;": "\u0391",
- "Amacr;": "\u0100",
- "And;": "\u2a53",
- "Aogon;": "\u0104",
- "Aopf;": "\U0001d538",
- "ApplyFunction;": "\u2061",
- "Aring": "\xc5",
- "Aring;": "\xc5",
- "Ascr;": "\U0001d49c",
- "Assign;": "\u2254",
- "Atilde": "\xc3",
- "Atilde;": "\xc3",
- "Auml": "\xc4",
- "Auml;": "\xc4",
- "Backslash;": "\u2216",
- "Barv;": "\u2ae7",
- "Barwed;": "\u2306",
- "Bcy;": "\u0411",
- "Because;": "\u2235",
- "Bernoullis;": "\u212c",
- "Beta;": "\u0392",
- "Bfr;": "\U0001d505",
- "Bopf;": "\U0001d539",
- "Breve;": "\u02d8",
- "Bscr;": "\u212c",
- "Bumpeq;": "\u224e",
- "CHcy;": "\u0427",
- "COPY": "\xa9",
- "COPY;": "\xa9",
- "Cacute;": "\u0106",
- "Cap;": "\u22d2",
- "CapitalDifferentialD;": "\u2145",
- "Cayleys;": "\u212d",
- "Ccaron;": "\u010c",
- "Ccedil": "\xc7",
- "Ccedil;": "\xc7",
- "Ccirc;": "\u0108",
- "Cconint;": "\u2230",
- "Cdot;": "\u010a",
- "Cedilla;": "\xb8",
- "CenterDot;": "\xb7",
- "Cfr;": "\u212d",
- "Chi;": "\u03a7",
- "CircleDot;": "\u2299",
- "CircleMinus;": "\u2296",
- "CirclePlus;": "\u2295",
- "CircleTimes;": "\u2297",
- "ClockwiseContourIntegral;": "\u2232",
- "CloseCurlyDoubleQuote;": "\u201d",
- "CloseCurlyQuote;": "\u2019",
- "Colon;": "\u2237",
- "Colone;": "\u2a74",
- "Congruent;": "\u2261",
- "Conint;": "\u222f",
- "ContourIntegral;": "\u222e",
- "Copf;": "\u2102",
- "Coproduct;": "\u2210",
- "CounterClockwiseContourIntegral;": "\u2233",
- "Cross;": "\u2a2f",
- "Cscr;": "\U0001d49e",
- "Cup;": "\u22d3",
- "CupCap;": "\u224d",
- "DD;": "\u2145",
- "DDotrahd;": "\u2911",
- "DJcy;": "\u0402",
- "DScy;": "\u0405",
- "DZcy;": "\u040f",
- "Dagger;": "\u2021",
- "Darr;": "\u21a1",
- "Dashv;": "\u2ae4",
- "Dcaron;": "\u010e",
- "Dcy;": "\u0414",
- "Del;": "\u2207",
- "Delta;": "\u0394",
- "Dfr;": "\U0001d507",
- "DiacriticalAcute;": "\xb4",
- "DiacriticalDot;": "\u02d9",
- "DiacriticalDoubleAcute;": "\u02dd",
- "DiacriticalGrave;": "`",
- "DiacriticalTilde;": "\u02dc",
- "Diamond;": "\u22c4",
- "DifferentialD;": "\u2146",
- "Dopf;": "\U0001d53b",
- "Dot;": "\xa8",
- "DotDot;": "\u20dc",
- "DotEqual;": "\u2250",
- "DoubleContourIntegral;": "\u222f",
- "DoubleDot;": "\xa8",
- "DoubleDownArrow;": "\u21d3",
- "DoubleLeftArrow;": "\u21d0",
- "DoubleLeftRightArrow;": "\u21d4",
- "DoubleLeftTee;": "\u2ae4",
- "DoubleLongLeftArrow;": "\u27f8",
- "DoubleLongLeftRightArrow;": "\u27fa",
- "DoubleLongRightArrow;": "\u27f9",
- "DoubleRightArrow;": "\u21d2",
- "DoubleRightTee;": "\u22a8",
- "DoubleUpArrow;": "\u21d1",
- "DoubleUpDownArrow;": "\u21d5",
- "DoubleVerticalBar;": "\u2225",
- "DownArrow;": "\u2193",
- "DownArrowBar;": "\u2913",
- "DownArrowUpArrow;": "\u21f5",
- "DownBreve;": "\u0311",
- "DownLeftRightVector;": "\u2950",
- "DownLeftTeeVector;": "\u295e",
- "DownLeftVector;": "\u21bd",
- "DownLeftVectorBar;": "\u2956",
- "DownRightTeeVector;": "\u295f",
- "DownRightVector;": "\u21c1",
- "DownRightVectorBar;": "\u2957",
- "DownTee;": "\u22a4",
- "DownTeeArrow;": "\u21a7",
- "Downarrow;": "\u21d3",
- "Dscr;": "\U0001d49f",
- "Dstrok;": "\u0110",
- "ENG;": "\u014a",
- "ETH": "\xd0",
- "ETH;": "\xd0",
- "Eacute": "\xc9",
- "Eacute;": "\xc9",
- "Ecaron;": "\u011a",
- "Ecirc": "\xca",
- "Ecirc;": "\xca",
- "Ecy;": "\u042d",
- "Edot;": "\u0116",
- "Efr;": "\U0001d508",
- "Egrave": "\xc8",
- "Egrave;": "\xc8",
- "Element;": "\u2208",
- "Emacr;": "\u0112",
- "EmptySmallSquare;": "\u25fb",
- "EmptyVerySmallSquare;": "\u25ab",
- "Eogon;": "\u0118",
- "Eopf;": "\U0001d53c",
- "Epsilon;": "\u0395",
- "Equal;": "\u2a75",
- "EqualTilde;": "\u2242",
- "Equilibrium;": "\u21cc",
- "Escr;": "\u2130",
- "Esim;": "\u2a73",
- "Eta;": "\u0397",
- "Euml": "\xcb",
- "Euml;": "\xcb",
- "Exists;": "\u2203",
- "ExponentialE;": "\u2147",
- "Fcy;": "\u0424",
- "Ffr;": "\U0001d509",
- "FilledSmallSquare;": "\u25fc",
- "FilledVerySmallSquare;": "\u25aa",
- "Fopf;": "\U0001d53d",
- "ForAll;": "\u2200",
- "Fouriertrf;": "\u2131",
- "Fscr;": "\u2131",
- "GJcy;": "\u0403",
- "GT": ">",
- "GT;": ">",
- "Gamma;": "\u0393",
- "Gammad;": "\u03dc",
- "Gbreve;": "\u011e",
- "Gcedil;": "\u0122",
- "Gcirc;": "\u011c",
- "Gcy;": "\u0413",
- "Gdot;": "\u0120",
- "Gfr;": "\U0001d50a",
- "Gg;": "\u22d9",
- "Gopf;": "\U0001d53e",
- "GreaterEqual;": "\u2265",
- "GreaterEqualLess;": "\u22db",
- "GreaterFullEqual;": "\u2267",
- "GreaterGreater;": "\u2aa2",
- "GreaterLess;": "\u2277",
- "GreaterSlantEqual;": "\u2a7e",
- "GreaterTilde;": "\u2273",
- "Gscr;": "\U0001d4a2",
- "Gt;": "\u226b",
- "HARDcy;": "\u042a",
- "Hacek;": "\u02c7",
- "Hat;": "^",
- "Hcirc;": "\u0124",
- "Hfr;": "\u210c",
- "HilbertSpace;": "\u210b",
- "Hopf;": "\u210d",
- "HorizontalLine;": "\u2500",
- "Hscr;": "\u210b",
- "Hstrok;": "\u0126",
- "HumpDownHump;": "\u224e",
- "HumpEqual;": "\u224f",
- "IEcy;": "\u0415",
- "IJlig;": "\u0132",
- "IOcy;": "\u0401",
- "Iacute": "\xcd",
- "Iacute;": "\xcd",
- "Icirc": "\xce",
- "Icirc;": "\xce",
- "Icy;": "\u0418",
- "Idot;": "\u0130",
- "Ifr;": "\u2111",
- "Igrave": "\xcc",
- "Igrave;": "\xcc",
- "Im;": "\u2111",
- "Imacr;": "\u012a",
- "ImaginaryI;": "\u2148",
- "Implies;": "\u21d2",
- "Int;": "\u222c",
- "Integral;": "\u222b",
- "Intersection;": "\u22c2",
- "InvisibleComma;": "\u2063",
- "InvisibleTimes;": "\u2062",
- "Iogon;": "\u012e",
- "Iopf;": "\U0001d540",
- "Iota;": "\u0399",
- "Iscr;": "\u2110",
- "Itilde;": "\u0128",
- "Iukcy;": "\u0406",
- "Iuml": "\xcf",
- "Iuml;": "\xcf",
- "Jcirc;": "\u0134",
- "Jcy;": "\u0419",
- "Jfr;": "\U0001d50d",
- "Jopf;": "\U0001d541",
- "Jscr;": "\U0001d4a5",
- "Jsercy;": "\u0408",
- "Jukcy;": "\u0404",
- "KHcy;": "\u0425",
- "KJcy;": "\u040c",
- "Kappa;": "\u039a",
- "Kcedil;": "\u0136",
- "Kcy;": "\u041a",
- "Kfr;": "\U0001d50e",
- "Kopf;": "\U0001d542",
- "Kscr;": "\U0001d4a6",
- "LJcy;": "\u0409",
- "LT": "<",
- "LT;": "<",
- "Lacute;": "\u0139",
- "Lambda;": "\u039b",
- "Lang;": "\u27ea",
- "Laplacetrf;": "\u2112",
- "Larr;": "\u219e",
- "Lcaron;": "\u013d",
- "Lcedil;": "\u013b",
- "Lcy;": "\u041b",
- "LeftAngleBracket;": "\u27e8",
- "LeftArrow;": "\u2190",
- "LeftArrowBar;": "\u21e4",
- "LeftArrowRightArrow;": "\u21c6",
- "LeftCeiling;": "\u2308",
- "LeftDoubleBracket;": "\u27e6",
- "LeftDownTeeVector;": "\u2961",
- "LeftDownVector;": "\u21c3",
- "LeftDownVectorBar;": "\u2959",
- "LeftFloor;": "\u230a",
- "LeftRightArrow;": "\u2194",
- "LeftRightVector;": "\u294e",
- "LeftTee;": "\u22a3",
- "LeftTeeArrow;": "\u21a4",
- "LeftTeeVector;": "\u295a",
- "LeftTriangle;": "\u22b2",
- "LeftTriangleBar;": "\u29cf",
- "LeftTriangleEqual;": "\u22b4",
- "LeftUpDownVector;": "\u2951",
- "LeftUpTeeVector;": "\u2960",
- "LeftUpVector;": "\u21bf",
- "LeftUpVectorBar;": "\u2958",
- "LeftVector;": "\u21bc",
- "LeftVectorBar;": "\u2952",
- "Leftarrow;": "\u21d0",
- "Leftrightarrow;": "\u21d4",
- "LessEqualGreater;": "\u22da",
- "LessFullEqual;": "\u2266",
- "LessGreater;": "\u2276",
- "LessLess;": "\u2aa1",
- "LessSlantEqual;": "\u2a7d",
- "LessTilde;": "\u2272",
- "Lfr;": "\U0001d50f",
- "Ll;": "\u22d8",
- "Lleftarrow;": "\u21da",
- "Lmidot;": "\u013f",
- "LongLeftArrow;": "\u27f5",
- "LongLeftRightArrow;": "\u27f7",
- "LongRightArrow;": "\u27f6",
- "Longleftarrow;": "\u27f8",
- "Longleftrightarrow;": "\u27fa",
- "Longrightarrow;": "\u27f9",
- "Lopf;": "\U0001d543",
- "LowerLeftArrow;": "\u2199",
- "LowerRightArrow;": "\u2198",
- "Lscr;": "\u2112",
- "Lsh;": "\u21b0",
- "Lstrok;": "\u0141",
- "Lt;": "\u226a",
- "Map;": "\u2905",
- "Mcy;": "\u041c",
- "MediumSpace;": "\u205f",
- "Mellintrf;": "\u2133",
- "Mfr;": "\U0001d510",
- "MinusPlus;": "\u2213",
- "Mopf;": "\U0001d544",
- "Mscr;": "\u2133",
- "Mu;": "\u039c",
- "NJcy;": "\u040a",
- "Nacute;": "\u0143",
- "Ncaron;": "\u0147",
- "Ncedil;": "\u0145",
- "Ncy;": "\u041d",
- "NegativeMediumSpace;": "\u200b",
- "NegativeThickSpace;": "\u200b",
- "NegativeThinSpace;": "\u200b",
- "NegativeVeryThinSpace;": "\u200b",
- "NestedGreaterGreater;": "\u226b",
- "NestedLessLess;": "\u226a",
- "NewLine;": "\n",
- "Nfr;": "\U0001d511",
- "NoBreak;": "\u2060",
- "NonBreakingSpace;": "\xa0",
- "Nopf;": "\u2115",
- "Not;": "\u2aec",
- "NotCongruent;": "\u2262",
- "NotCupCap;": "\u226d",
- "NotDoubleVerticalBar;": "\u2226",
- "NotElement;": "\u2209",
- "NotEqual;": "\u2260",
- "NotEqualTilde;": "\u2242\u0338",
- "NotExists;": "\u2204",
- "NotGreater;": "\u226f",
- "NotGreaterEqual;": "\u2271",
- "NotGreaterFullEqual;": "\u2267\u0338",
- "NotGreaterGreater;": "\u226b\u0338",
- "NotGreaterLess;": "\u2279",
- "NotGreaterSlantEqual;": "\u2a7e\u0338",
- "NotGreaterTilde;": "\u2275",
- "NotHumpDownHump;": "\u224e\u0338",
- "NotHumpEqual;": "\u224f\u0338",
- "NotLeftTriangle;": "\u22ea",
- "NotLeftTriangleBar;": "\u29cf\u0338",
- "NotLeftTriangleEqual;": "\u22ec",
- "NotLess;": "\u226e",
- "NotLessEqual;": "\u2270",
- "NotLessGreater;": "\u2278",
- "NotLessLess;": "\u226a\u0338",
- "NotLessSlantEqual;": "\u2a7d\u0338",
- "NotLessTilde;": "\u2274",
- "NotNestedGreaterGreater;": "\u2aa2\u0338",
- "NotNestedLessLess;": "\u2aa1\u0338",
- "NotPrecedes;": "\u2280",
- "NotPrecedesEqual;": "\u2aaf\u0338",
- "NotPrecedesSlantEqual;": "\u22e0",
- "NotReverseElement;": "\u220c",
- "NotRightTriangle;": "\u22eb",
- "NotRightTriangleBar;": "\u29d0\u0338",
- "NotRightTriangleEqual;": "\u22ed",
- "NotSquareSubset;": "\u228f\u0338",
- "NotSquareSubsetEqual;": "\u22e2",
- "NotSquareSuperset;": "\u2290\u0338",
- "NotSquareSupersetEqual;": "\u22e3",
- "NotSubset;": "\u2282\u20d2",
- "NotSubsetEqual;": "\u2288",
- "NotSucceeds;": "\u2281",
- "NotSucceedsEqual;": "\u2ab0\u0338",
- "NotSucceedsSlantEqual;": "\u22e1",
- "NotSucceedsTilde;": "\u227f\u0338",
- "NotSuperset;": "\u2283\u20d2",
- "NotSupersetEqual;": "\u2289",
- "NotTilde;": "\u2241",
- "NotTildeEqual;": "\u2244",
- "NotTildeFullEqual;": "\u2247",
- "NotTildeTilde;": "\u2249",
- "NotVerticalBar;": "\u2224",
- "Nscr;": "\U0001d4a9",
- "Ntilde": "\xd1",
- "Ntilde;": "\xd1",
- "Nu;": "\u039d",
- "OElig;": "\u0152",
- "Oacute": "\xd3",
- "Oacute;": "\xd3",
- "Ocirc": "\xd4",
- "Ocirc;": "\xd4",
- "Ocy;": "\u041e",
- "Odblac;": "\u0150",
- "Ofr;": "\U0001d512",
- "Ograve": "\xd2",
- "Ograve;": "\xd2",
- "Omacr;": "\u014c",
- "Omega;": "\u03a9",
- "Omicron;": "\u039f",
- "Oopf;": "\U0001d546",
- "OpenCurlyDoubleQuote;": "\u201c",
- "OpenCurlyQuote;": "\u2018",
- "Or;": "\u2a54",
- "Oscr;": "\U0001d4aa",
- "Oslash": "\xd8",
- "Oslash;": "\xd8",
- "Otilde": "\xd5",
- "Otilde;": "\xd5",
- "Otimes;": "\u2a37",
- "Ouml": "\xd6",
- "Ouml;": "\xd6",
- "OverBar;": "\u203e",
- "OverBrace;": "\u23de",
- "OverBracket;": "\u23b4",
- "OverParenthesis;": "\u23dc",
- "PartialD;": "\u2202",
- "Pcy;": "\u041f",
- "Pfr;": "\U0001d513",
- "Phi;": "\u03a6",
- "Pi;": "\u03a0",
- "PlusMinus;": "\xb1",
- "Poincareplane;": "\u210c",
- "Popf;": "\u2119",
- "Pr;": "\u2abb",
- "Precedes;": "\u227a",
- "PrecedesEqual;": "\u2aaf",
- "PrecedesSlantEqual;": "\u227c",
- "PrecedesTilde;": "\u227e",
- "Prime;": "\u2033",
- "Product;": "\u220f",
- "Proportion;": "\u2237",
- "Proportional;": "\u221d",
- "Pscr;": "\U0001d4ab",
- "Psi;": "\u03a8",
- "QUOT": "\"",
- "QUOT;": "\"",
- "Qfr;": "\U0001d514",
- "Qopf;": "\u211a",
- "Qscr;": "\U0001d4ac",
- "RBarr;": "\u2910",
- "REG": "\xae",
- "REG;": "\xae",
- "Racute;": "\u0154",
- "Rang;": "\u27eb",
- "Rarr;": "\u21a0",
- "Rarrtl;": "\u2916",
- "Rcaron;": "\u0158",
- "Rcedil;": "\u0156",
- "Rcy;": "\u0420",
- "Re;": "\u211c",
- "ReverseElement;": "\u220b",
- "ReverseEquilibrium;": "\u21cb",
- "ReverseUpEquilibrium;": "\u296f",
- "Rfr;": "\u211c",
- "Rho;": "\u03a1",
- "RightAngleBracket;": "\u27e9",
- "RightArrow;": "\u2192",
- "RightArrowBar;": "\u21e5",
- "RightArrowLeftArrow;": "\u21c4",
- "RightCeiling;": "\u2309",
- "RightDoubleBracket;": "\u27e7",
- "RightDownTeeVector;": "\u295d",
- "RightDownVector;": "\u21c2",
- "RightDownVectorBar;": "\u2955",
- "RightFloor;": "\u230b",
- "RightTee;": "\u22a2",
- "RightTeeArrow;": "\u21a6",
- "RightTeeVector;": "\u295b",
- "RightTriangle;": "\u22b3",
- "RightTriangleBar;": "\u29d0",
- "RightTriangleEqual;": "\u22b5",
- "RightUpDownVector;": "\u294f",
- "RightUpTeeVector;": "\u295c",
- "RightUpVector;": "\u21be",
- "RightUpVectorBar;": "\u2954",
- "RightVector;": "\u21c0",
- "RightVectorBar;": "\u2953",
- "Rightarrow;": "\u21d2",
- "Ropf;": "\u211d",
- "RoundImplies;": "\u2970",
- "Rrightarrow;": "\u21db",
- "Rscr;": "\u211b",
- "Rsh;": "\u21b1",
- "RuleDelayed;": "\u29f4",
- "SHCHcy;": "\u0429",
- "SHcy;": "\u0428",
- "SOFTcy;": "\u042c",
- "Sacute;": "\u015a",
- "Sc;": "\u2abc",
- "Scaron;": "\u0160",
- "Scedil;": "\u015e",
- "Scirc;": "\u015c",
- "Scy;": "\u0421",
- "Sfr;": "\U0001d516",
- "ShortDownArrow;": "\u2193",
- "ShortLeftArrow;": "\u2190",
- "ShortRightArrow;": "\u2192",
- "ShortUpArrow;": "\u2191",
- "Sigma;": "\u03a3",
- "SmallCircle;": "\u2218",
- "Sopf;": "\U0001d54a",
- "Sqrt;": "\u221a",
- "Square;": "\u25a1",
- "SquareIntersection;": "\u2293",
- "SquareSubset;": "\u228f",
- "SquareSubsetEqual;": "\u2291",
- "SquareSuperset;": "\u2290",
- "SquareSupersetEqual;": "\u2292",
- "SquareUnion;": "\u2294",
- "Sscr;": "\U0001d4ae",
- "Star;": "\u22c6",
- "Sub;": "\u22d0",
- "Subset;": "\u22d0",
- "SubsetEqual;": "\u2286",
- "Succeeds;": "\u227b",
- "SucceedsEqual;": "\u2ab0",
- "SucceedsSlantEqual;": "\u227d",
- "SucceedsTilde;": "\u227f",
- "SuchThat;": "\u220b",
- "Sum;": "\u2211",
- "Sup;": "\u22d1",
- "Superset;": "\u2283",
- "SupersetEqual;": "\u2287",
- "Supset;": "\u22d1",
- "THORN": "\xde",
- "THORN;": "\xde",
- "TRADE;": "\u2122",
- "TSHcy;": "\u040b",
- "TScy;": "\u0426",
- "Tab;": "\t",
- "Tau;": "\u03a4",
- "Tcaron;": "\u0164",
- "Tcedil;": "\u0162",
- "Tcy;": "\u0422",
- "Tfr;": "\U0001d517",
- "Therefore;": "\u2234",
- "Theta;": "\u0398",
- "ThickSpace;": "\u205f\u200a",
- "ThinSpace;": "\u2009",
- "Tilde;": "\u223c",
- "TildeEqual;": "\u2243",
- "TildeFullEqual;": "\u2245",
- "TildeTilde;": "\u2248",
- "Topf;": "\U0001d54b",
- "TripleDot;": "\u20db",
- "Tscr;": "\U0001d4af",
- "Tstrok;": "\u0166",
- "Uacute": "\xda",
- "Uacute;": "\xda",
- "Uarr;": "\u219f",
- "Uarrocir;": "\u2949",
- "Ubrcy;": "\u040e",
- "Ubreve;": "\u016c",
- "Ucirc": "\xdb",
- "Ucirc;": "\xdb",
- "Ucy;": "\u0423",
- "Udblac;": "\u0170",
- "Ufr;": "\U0001d518",
- "Ugrave": "\xd9",
- "Ugrave;": "\xd9",
- "Umacr;": "\u016a",
- "UnderBar;": "_",
- "UnderBrace;": "\u23df",
- "UnderBracket;": "\u23b5",
- "UnderParenthesis;": "\u23dd",
- "Union;": "\u22c3",
- "UnionPlus;": "\u228e",
- "Uogon;": "\u0172",
- "Uopf;": "\U0001d54c",
- "UpArrow;": "\u2191",
- "UpArrowBar;": "\u2912",
- "UpArrowDownArrow;": "\u21c5",
- "UpDownArrow;": "\u2195",
- "UpEquilibrium;": "\u296e",
- "UpTee;": "\u22a5",
- "UpTeeArrow;": "\u21a5",
- "Uparrow;": "\u21d1",
- "Updownarrow;": "\u21d5",
- "UpperLeftArrow;": "\u2196",
- "UpperRightArrow;": "\u2197",
- "Upsi;": "\u03d2",
- "Upsilon;": "\u03a5",
- "Uring;": "\u016e",
- "Uscr;": "\U0001d4b0",
- "Utilde;": "\u0168",
- "Uuml": "\xdc",
- "Uuml;": "\xdc",
- "VDash;": "\u22ab",
- "Vbar;": "\u2aeb",
- "Vcy;": "\u0412",
- "Vdash;": "\u22a9",
- "Vdashl;": "\u2ae6",
- "Vee;": "\u22c1",
- "Verbar;": "\u2016",
- "Vert;": "\u2016",
- "VerticalBar;": "\u2223",
- "VerticalLine;": "|",
- "VerticalSeparator;": "\u2758",
- "VerticalTilde;": "\u2240",
- "VeryThinSpace;": "\u200a",
- "Vfr;": "\U0001d519",
- "Vopf;": "\U0001d54d",
- "Vscr;": "\U0001d4b1",
- "Vvdash;": "\u22aa",
- "Wcirc;": "\u0174",
- "Wedge;": "\u22c0",
- "Wfr;": "\U0001d51a",
- "Wopf;": "\U0001d54e",
- "Wscr;": "\U0001d4b2",
- "Xfr;": "\U0001d51b",
- "Xi;": "\u039e",
- "Xopf;": "\U0001d54f",
- "Xscr;": "\U0001d4b3",
- "YAcy;": "\u042f",
- "YIcy;": "\u0407",
- "YUcy;": "\u042e",
- "Yacute": "\xdd",
- "Yacute;": "\xdd",
- "Ycirc;": "\u0176",
- "Ycy;": "\u042b",
- "Yfr;": "\U0001d51c",
- "Yopf;": "\U0001d550",
- "Yscr;": "\U0001d4b4",
- "Yuml;": "\u0178",
- "ZHcy;": "\u0416",
- "Zacute;": "\u0179",
- "Zcaron;": "\u017d",
- "Zcy;": "\u0417",
- "Zdot;": "\u017b",
- "ZeroWidthSpace;": "\u200b",
- "Zeta;": "\u0396",
- "Zfr;": "\u2128",
- "Zopf;": "\u2124",
- "Zscr;": "\U0001d4b5",
- "aacute": "\xe1",
- "aacute;": "\xe1",
- "abreve;": "\u0103",
- "ac;": "\u223e",
- "acE;": "\u223e\u0333",
- "acd;": "\u223f",
- "acirc": "\xe2",
- "acirc;": "\xe2",
- "acute": "\xb4",
- "acute;": "\xb4",
- "acy;": "\u0430",
- "aelig": "\xe6",
- "aelig;": "\xe6",
- "af;": "\u2061",
- "afr;": "\U0001d51e",
- "agrave": "\xe0",
- "agrave;": "\xe0",
- "alefsym;": "\u2135",
- "aleph;": "\u2135",
- "alpha;": "\u03b1",
- "amacr;": "\u0101",
- "amalg;": "\u2a3f",
- "amp": "&",
- "amp;": "&",
- "and;": "\u2227",
- "andand;": "\u2a55",
- "andd;": "\u2a5c",
- "andslope;": "\u2a58",
- "andv;": "\u2a5a",
- "ang;": "\u2220",
- "ange;": "\u29a4",
- "angle;": "\u2220",
- "angmsd;": "\u2221",
- "angmsdaa;": "\u29a8",
- "angmsdab;": "\u29a9",
- "angmsdac;": "\u29aa",
- "angmsdad;": "\u29ab",
- "angmsdae;": "\u29ac",
- "angmsdaf;": "\u29ad",
- "angmsdag;": "\u29ae",
- "angmsdah;": "\u29af",
- "angrt;": "\u221f",
- "angrtvb;": "\u22be",
- "angrtvbd;": "\u299d",
- "angsph;": "\u2222",
- "angst;": "\xc5",
- "angzarr;": "\u237c",
- "aogon;": "\u0105",
- "aopf;": "\U0001d552",
- "ap;": "\u2248",
- "apE;": "\u2a70",
- "apacir;": "\u2a6f",
- "ape;": "\u224a",
- "apid;": "\u224b",
- "apos;": "'",
- "approx;": "\u2248",
- "approxeq;": "\u224a",
- "aring": "\xe5",
- "aring;": "\xe5",
- "ascr;": "\U0001d4b6",
- "ast;": "*",
- "asymp;": "\u2248",
- "asympeq;": "\u224d",
- "atilde": "\xe3",
- "atilde;": "\xe3",
- "auml": "\xe4",
- "auml;": "\xe4",
- "awconint;": "\u2233",
- "awint;": "\u2a11",
- "bNot;": "\u2aed",
- "backcong;": "\u224c",
- "backepsilon;": "\u03f6",
- "backprime;": "\u2035",
- "backsim;": "\u223d",
- "backsimeq;": "\u22cd",
- "barvee;": "\u22bd",
- "barwed;": "\u2305",
- "barwedge;": "\u2305",
- "bbrk;": "\u23b5",
- "bbrktbrk;": "\u23b6",
- "bcong;": "\u224c",
- "bcy;": "\u0431",
- "bdquo;": "\u201e",
- "becaus;": "\u2235",
- "because;": "\u2235",
- "bemptyv;": "\u29b0",
- "bepsi;": "\u03f6",
- "bernou;": "\u212c",
- "beta;": "\u03b2",
- "beth;": "\u2136",
- "between;": "\u226c",
- "bfr;": "\U0001d51f",
- "bigcap;": "\u22c2",
- "bigcirc;": "\u25ef",
- "bigcup;": "\u22c3",
- "bigodot;": "\u2a00",
- "bigoplus;": "\u2a01",
- "bigotimes;": "\u2a02",
- "bigsqcup;": "\u2a06",
- "bigstar;": "\u2605",
- "bigtriangledown;": "\u25bd",
- "bigtriangleup;": "\u25b3",
- "biguplus;": "\u2a04",
- "bigvee;": "\u22c1",
- "bigwedge;": "\u22c0",
- "bkarow;": "\u290d",
- "blacklozenge;": "\u29eb",
- "blacksquare;": "\u25aa",
- "blacktriangle;": "\u25b4",
- "blacktriangledown;": "\u25be",
- "blacktriangleleft;": "\u25c2",
- "blacktriangleright;": "\u25b8",
- "blank;": "\u2423",
- "blk12;": "\u2592",
- "blk14;": "\u2591",
- "blk34;": "\u2593",
- "block;": "\u2588",
- "bne;": "=\u20e5",
- "bnequiv;": "\u2261\u20e5",
- "bnot;": "\u2310",
- "bopf;": "\U0001d553",
- "bot;": "\u22a5",
- "bottom;": "\u22a5",
- "bowtie;": "\u22c8",
- "boxDL;": "\u2557",
- "boxDR;": "\u2554",
- "boxDl;": "\u2556",
- "boxDr;": "\u2553",
- "boxH;": "\u2550",
- "boxHD;": "\u2566",
- "boxHU;": "\u2569",
- "boxHd;": "\u2564",
- "boxHu;": "\u2567",
- "boxUL;": "\u255d",
- "boxUR;": "\u255a",
- "boxUl;": "\u255c",
- "boxUr;": "\u2559",
- "boxV;": "\u2551",
- "boxVH;": "\u256c",
- "boxVL;": "\u2563",
- "boxVR;": "\u2560",
- "boxVh;": "\u256b",
- "boxVl;": "\u2562",
- "boxVr;": "\u255f",
- "boxbox;": "\u29c9",
- "boxdL;": "\u2555",
- "boxdR;": "\u2552",
- "boxdl;": "\u2510",
- "boxdr;": "\u250c",
- "boxh;": "\u2500",
- "boxhD;": "\u2565",
- "boxhU;": "\u2568",
- "boxhd;": "\u252c",
- "boxhu;": "\u2534",
- "boxminus;": "\u229f",
- "boxplus;": "\u229e",
- "boxtimes;": "\u22a0",
- "boxuL;": "\u255b",
- "boxuR;": "\u2558",
- "boxul;": "\u2518",
- "boxur;": "\u2514",
- "boxv;": "\u2502",
- "boxvH;": "\u256a",
- "boxvL;": "\u2561",
- "boxvR;": "\u255e",
- "boxvh;": "\u253c",
- "boxvl;": "\u2524",
- "boxvr;": "\u251c",
- "bprime;": "\u2035",
- "breve;": "\u02d8",
- "brvbar": "\xa6",
- "brvbar;": "\xa6",
- "bscr;": "\U0001d4b7",
- "bsemi;": "\u204f",
- "bsim;": "\u223d",
- "bsime;": "\u22cd",
- "bsol;": "\\",
- "bsolb;": "\u29c5",
- "bsolhsub;": "\u27c8",
- "bull;": "\u2022",
- "bullet;": "\u2022",
- "bump;": "\u224e",
- "bumpE;": "\u2aae",
- "bumpe;": "\u224f",
- "bumpeq;": "\u224f",
- "cacute;": "\u0107",
- "cap;": "\u2229",
- "capand;": "\u2a44",
- "capbrcup;": "\u2a49",
- "capcap;": "\u2a4b",
- "capcup;": "\u2a47",
- "capdot;": "\u2a40",
- "caps;": "\u2229\ufe00",
- "caret;": "\u2041",
- "caron;": "\u02c7",
- "ccaps;": "\u2a4d",
- "ccaron;": "\u010d",
- "ccedil": "\xe7",
- "ccedil;": "\xe7",
- "ccirc;": "\u0109",
- "ccups;": "\u2a4c",
- "ccupssm;": "\u2a50",
- "cdot;": "\u010b",
- "cedil": "\xb8",
- "cedil;": "\xb8",
- "cemptyv;": "\u29b2",
- "cent": "\xa2",
- "cent;": "\xa2",
- "centerdot;": "\xb7",
- "cfr;": "\U0001d520",
- "chcy;": "\u0447",
- "check;": "\u2713",
- "checkmark;": "\u2713",
- "chi;": "\u03c7",
- "cir;": "\u25cb",
- "cirE;": "\u29c3",
- "circ;": "\u02c6",
- "circeq;": "\u2257",
- "circlearrowleft;": "\u21ba",
- "circlearrowright;": "\u21bb",
- "circledR;": "\xae",
- "circledS;": "\u24c8",
- "circledast;": "\u229b",
- "circledcirc;": "\u229a",
- "circleddash;": "\u229d",
- "cire;": "\u2257",
- "cirfnint;": "\u2a10",
- "cirmid;": "\u2aef",
- "cirscir;": "\u29c2",
- "clubs;": "\u2663",
- "clubsuit;": "\u2663",
- "colon;": ":",
- "colone;": "\u2254",
- "coloneq;": "\u2254",
- "comma;": ",",
- "commat;": "@",
- "comp;": "\u2201",
- "compfn;": "\u2218",
- "complement;": "\u2201",
- "complexes;": "\u2102",
- "cong;": "\u2245",
- "congdot;": "\u2a6d",
- "conint;": "\u222e",
- "copf;": "\U0001d554",
- "coprod;": "\u2210",
- "copy": "\xa9",
- "copy;": "\xa9",
- "copysr;": "\u2117",
- "crarr;": "\u21b5",
- "cross;": "\u2717",
- "cscr;": "\U0001d4b8",
- "csub;": "\u2acf",
- "csube;": "\u2ad1",
- "csup;": "\u2ad0",
- "csupe;": "\u2ad2",
- "ctdot;": "\u22ef",
- "cudarrl;": "\u2938",
- "cudarrr;": "\u2935",
- "cuepr;": "\u22de",
- "cuesc;": "\u22df",
- "cularr;": "\u21b6",
- "cularrp;": "\u293d",
- "cup;": "\u222a",
- "cupbrcap;": "\u2a48",
- "cupcap;": "\u2a46",
- "cupcup;": "\u2a4a",
- "cupdot;": "\u228d",
- "cupor;": "\u2a45",
- "cups;": "\u222a\ufe00",
- "curarr;": "\u21b7",
- "curarrm;": "\u293c",
- "curlyeqprec;": "\u22de",
- "curlyeqsucc;": "\u22df",
- "curlyvee;": "\u22ce",
- "curlywedge;": "\u22cf",
- "curren": "\xa4",
- "curren;": "\xa4",
- "curvearrowleft;": "\u21b6",
- "curvearrowright;": "\u21b7",
- "cuvee;": "\u22ce",
- "cuwed;": "\u22cf",
- "cwconint;": "\u2232",
- "cwint;": "\u2231",
- "cylcty;": "\u232d",
- "dArr;": "\u21d3",
- "dHar;": "\u2965",
- "dagger;": "\u2020",
- "daleth;": "\u2138",
- "darr;": "\u2193",
- "dash;": "\u2010",
- "dashv;": "\u22a3",
- "dbkarow;": "\u290f",
- "dblac;": "\u02dd",
- "dcaron;": "\u010f",
- "dcy;": "\u0434",
- "dd;": "\u2146",
- "ddagger;": "\u2021",
- "ddarr;": "\u21ca",
- "ddotseq;": "\u2a77",
- "deg": "\xb0",
- "deg;": "\xb0",
- "delta;": "\u03b4",
- "demptyv;": "\u29b1",
- "dfisht;": "\u297f",
- "dfr;": "\U0001d521",
- "dharl;": "\u21c3",
- "dharr;": "\u21c2",
- "diam;": "\u22c4",
- "diamond;": "\u22c4",
- "diamondsuit;": "\u2666",
- "diams;": "\u2666",
- "die;": "\xa8",
- "digamma;": "\u03dd",
- "disin;": "\u22f2",
- "div;": "\xf7",
- "divide": "\xf7",
- "divide;": "\xf7",
- "divideontimes;": "\u22c7",
- "divonx;": "\u22c7",
- "djcy;": "\u0452",
- "dlcorn;": "\u231e",
- "dlcrop;": "\u230d",
- "dollar;": "$",
- "dopf;": "\U0001d555",
- "dot;": "\u02d9",
- "doteq;": "\u2250",
- "doteqdot;": "\u2251",
- "dotminus;": "\u2238",
- "dotplus;": "\u2214",
- "dotsquare;": "\u22a1",
- "doublebarwedge;": "\u2306",
- "downarrow;": "\u2193",
- "downdownarrows;": "\u21ca",
- "downharpoonleft;": "\u21c3",
- "downharpoonright;": "\u21c2",
- "drbkarow;": "\u2910",
- "drcorn;": "\u231f",
- "drcrop;": "\u230c",
- "dscr;": "\U0001d4b9",
- "dscy;": "\u0455",
- "dsol;": "\u29f6",
- "dstrok;": "\u0111",
- "dtdot;": "\u22f1",
- "dtri;": "\u25bf",
- "dtrif;": "\u25be",
- "duarr;": "\u21f5",
- "duhar;": "\u296f",
- "dwangle;": "\u29a6",
- "dzcy;": "\u045f",
- "dzigrarr;": "\u27ff",
- "eDDot;": "\u2a77",
- "eDot;": "\u2251",
- "eacute": "\xe9",
- "eacute;": "\xe9",
- "easter;": "\u2a6e",
- "ecaron;": "\u011b",
- "ecir;": "\u2256",
- "ecirc": "\xea",
- "ecirc;": "\xea",
- "ecolon;": "\u2255",
- "ecy;": "\u044d",
- "edot;": "\u0117",
- "ee;": "\u2147",
- "efDot;": "\u2252",
- "efr;": "\U0001d522",
- "eg;": "\u2a9a",
- "egrave": "\xe8",
- "egrave;": "\xe8",
- "egs;": "\u2a96",
- "egsdot;": "\u2a98",
- "el;": "\u2a99",
- "elinters;": "\u23e7",
- "ell;": "\u2113",
- "els;": "\u2a95",
- "elsdot;": "\u2a97",
- "emacr;": "\u0113",
- "empty;": "\u2205",
- "emptyset;": "\u2205",
- "emptyv;": "\u2205",
- "emsp13;": "\u2004",
- "emsp14;": "\u2005",
- "emsp;": "\u2003",
- "eng;": "\u014b",
- "ensp;": "\u2002",
- "eogon;": "\u0119",
- "eopf;": "\U0001d556",
- "epar;": "\u22d5",
- "eparsl;": "\u29e3",
- "eplus;": "\u2a71",
- "epsi;": "\u03b5",
- "epsilon;": "\u03b5",
- "epsiv;": "\u03f5",
- "eqcirc;": "\u2256",
- "eqcolon;": "\u2255",
- "eqsim;": "\u2242",
- "eqslantgtr;": "\u2a96",
- "eqslantless;": "\u2a95",
- "equals;": "=",
- "equest;": "\u225f",
- "equiv;": "\u2261",
- "equivDD;": "\u2a78",
- "eqvparsl;": "\u29e5",
- "erDot;": "\u2253",
- "erarr;": "\u2971",
- "escr;": "\u212f",
- "esdot;": "\u2250",
- "esim;": "\u2242",
- "eta;": "\u03b7",
- "eth": "\xf0",
- "eth;": "\xf0",
- "euml": "\xeb",
- "euml;": "\xeb",
- "euro;": "\u20ac",
- "excl;": "!",
- "exist;": "\u2203",
- "expectation;": "\u2130",
- "exponentiale;": "\u2147",
- "fallingdotseq;": "\u2252",
- "fcy;": "\u0444",
- "female;": "\u2640",
- "ffilig;": "\ufb03",
- "fflig;": "\ufb00",
- "ffllig;": "\ufb04",
- "ffr;": "\U0001d523",
- "filig;": "\ufb01",
- "fjlig;": "fj",
- "flat;": "\u266d",
- "fllig;": "\ufb02",
- "fltns;": "\u25b1",
- "fnof;": "\u0192",
- "fopf;": "\U0001d557",
- "forall;": "\u2200",
- "fork;": "\u22d4",
- "forkv;": "\u2ad9",
- "fpartint;": "\u2a0d",
- "frac12": "\xbd",
- "frac12;": "\xbd",
- "frac13;": "\u2153",
- "frac14": "\xbc",
- "frac14;": "\xbc",
- "frac15;": "\u2155",
- "frac16;": "\u2159",
- "frac18;": "\u215b",
- "frac23;": "\u2154",
- "frac25;": "\u2156",
- "frac34": "\xbe",
- "frac34;": "\xbe",
- "frac35;": "\u2157",
- "frac38;": "\u215c",
- "frac45;": "\u2158",
- "frac56;": "\u215a",
- "frac58;": "\u215d",
- "frac78;": "\u215e",
- "frasl;": "\u2044",
- "frown;": "\u2322",
- "fscr;": "\U0001d4bb",
- "gE;": "\u2267",
- "gEl;": "\u2a8c",
- "gacute;": "\u01f5",
- "gamma;": "\u03b3",
- "gammad;": "\u03dd",
- "gap;": "\u2a86",
- "gbreve;": "\u011f",
- "gcirc;": "\u011d",
- "gcy;": "\u0433",
- "gdot;": "\u0121",
- "ge;": "\u2265",
- "gel;": "\u22db",
- "geq;": "\u2265",
- "geqq;": "\u2267",
- "geqslant;": "\u2a7e",
- "ges;": "\u2a7e",
- "gescc;": "\u2aa9",
- "gesdot;": "\u2a80",
- "gesdoto;": "\u2a82",
- "gesdotol;": "\u2a84",
- "gesl;": "\u22db\ufe00",
- "gesles;": "\u2a94",
- "gfr;": "\U0001d524",
- "gg;": "\u226b",
- "ggg;": "\u22d9",
- "gimel;": "\u2137",
- "gjcy;": "\u0453",
- "gl;": "\u2277",
- "glE;": "\u2a92",
- "gla;": "\u2aa5",
- "glj;": "\u2aa4",
- "gnE;": "\u2269",
- "gnap;": "\u2a8a",
- "gnapprox;": "\u2a8a",
- "gne;": "\u2a88",
- "gneq;": "\u2a88",
- "gneqq;": "\u2269",
- "gnsim;": "\u22e7",
- "gopf;": "\U0001d558",
- "grave;": "`",
- "gscr;": "\u210a",
- "gsim;": "\u2273",
- "gsime;": "\u2a8e",
- "gsiml;": "\u2a90",
- "gt": ">",
- "gt;": ">",
- "gtcc;": "\u2aa7",
- "gtcir;": "\u2a7a",
- "gtdot;": "\u22d7",
- "gtlPar;": "\u2995",
- "gtquest;": "\u2a7c",
- "gtrapprox;": "\u2a86",
- "gtrarr;": "\u2978",
- "gtrdot;": "\u22d7",
- "gtreqless;": "\u22db",
- "gtreqqless;": "\u2a8c",
- "gtrless;": "\u2277",
- "gtrsim;": "\u2273",
- "gvertneqq;": "\u2269\ufe00",
- "gvnE;": "\u2269\ufe00",
- "hArr;": "\u21d4",
- "hairsp;": "\u200a",
- "half;": "\xbd",
- "hamilt;": "\u210b",
- "hardcy;": "\u044a",
- "harr;": "\u2194",
- "harrcir;": "\u2948",
- "harrw;": "\u21ad",
- "hbar;": "\u210f",
- "hcirc;": "\u0125",
- "hearts;": "\u2665",
- "heartsuit;": "\u2665",
- "hellip;": "\u2026",
- "hercon;": "\u22b9",
- "hfr;": "\U0001d525",
- "hksearow;": "\u2925",
- "hkswarow;": "\u2926",
- "hoarr;": "\u21ff",
- "homtht;": "\u223b",
- "hookleftarrow;": "\u21a9",
- "hookrightarrow;": "\u21aa",
- "hopf;": "\U0001d559",
- "horbar;": "\u2015",
- "hscr;": "\U0001d4bd",
- "hslash;": "\u210f",
- "hstrok;": "\u0127",
- "hybull;": "\u2043",
- "hyphen;": "\u2010",
- "iacute": "\xed",
- "iacute;": "\xed",
- "ic;": "\u2063",
- "icirc": "\xee",
- "icirc;": "\xee",
- "icy;": "\u0438",
- "iecy;": "\u0435",
- "iexcl": "\xa1",
- "iexcl;": "\xa1",
- "iff;": "\u21d4",
- "ifr;": "\U0001d526",
- "igrave": "\xec",
- "igrave;": "\xec",
- "ii;": "\u2148",
- "iiiint;": "\u2a0c",
- "iiint;": "\u222d",
- "iinfin;": "\u29dc",
- "iiota;": "\u2129",
- "ijlig;": "\u0133",
- "imacr;": "\u012b",
- "image;": "\u2111",
- "imagline;": "\u2110",
- "imagpart;": "\u2111",
- "imath;": "\u0131",
- "imof;": "\u22b7",
- "imped;": "\u01b5",
- "in;": "\u2208",
- "incare;": "\u2105",
- "infin;": "\u221e",
- "infintie;": "\u29dd",
- "inodot;": "\u0131",
- "int;": "\u222b",
- "intcal;": "\u22ba",
- "integers;": "\u2124",
- "intercal;": "\u22ba",
- "intlarhk;": "\u2a17",
- "intprod;": "\u2a3c",
- "iocy;": "\u0451",
- "iogon;": "\u012f",
- "iopf;": "\U0001d55a",
- "iota;": "\u03b9",
- "iprod;": "\u2a3c",
- "iquest": "\xbf",
- "iquest;": "\xbf",
- "iscr;": "\U0001d4be",
- "isin;": "\u2208",
- "isinE;": "\u22f9",
- "isindot;": "\u22f5",
- "isins;": "\u22f4",
- "isinsv;": "\u22f3",
- "isinv;": "\u2208",
- "it;": "\u2062",
- "itilde;": "\u0129",
- "iukcy;": "\u0456",
- "iuml": "\xef",
- "iuml;": "\xef",
- "jcirc;": "\u0135",
- "jcy;": "\u0439",
- "jfr;": "\U0001d527",
- "jmath;": "\u0237",
- "jopf;": "\U0001d55b",
- "jscr;": "\U0001d4bf",
- "jsercy;": "\u0458",
- "jukcy;": "\u0454",
- "kappa;": "\u03ba",
- "kappav;": "\u03f0",
- "kcedil;": "\u0137",
- "kcy;": "\u043a",
- "kfr;": "\U0001d528",
- "kgreen;": "\u0138",
- "khcy;": "\u0445",
- "kjcy;": "\u045c",
- "kopf;": "\U0001d55c",
- "kscr;": "\U0001d4c0",
- "lAarr;": "\u21da",
- "lArr;": "\u21d0",
- "lAtail;": "\u291b",
- "lBarr;": "\u290e",
- "lE;": "\u2266",
- "lEg;": "\u2a8b",
- "lHar;": "\u2962",
- "lacute;": "\u013a",
- "laemptyv;": "\u29b4",
- "lagran;": "\u2112",
- "lambda;": "\u03bb",
- "lang;": "\u27e8",
- "langd;": "\u2991",
- "langle;": "\u27e8",
- "lap;": "\u2a85",
- "laquo": "\xab",
- "laquo;": "\xab",
- "larr;": "\u2190",
- "larrb;": "\u21e4",
- "larrbfs;": "\u291f",
- "larrfs;": "\u291d",
- "larrhk;": "\u21a9",
- "larrlp;": "\u21ab",
- "larrpl;": "\u2939",
- "larrsim;": "\u2973",
- "larrtl;": "\u21a2",
- "lat;": "\u2aab",
- "latail;": "\u2919",
- "late;": "\u2aad",
- "lates;": "\u2aad\ufe00",
- "lbarr;": "\u290c",
- "lbbrk;": "\u2772",
- "lbrace;": "{",
- "lbrack;": "[",
- "lbrke;": "\u298b",
- "lbrksld;": "\u298f",
- "lbrkslu;": "\u298d",
- "lcaron;": "\u013e",
- "lcedil;": "\u013c",
- "lceil;": "\u2308",
- "lcub;": "{",
- "lcy;": "\u043b",
- "ldca;": "\u2936",
- "ldquo;": "\u201c",
- "ldquor;": "\u201e",
- "ldrdhar;": "\u2967",
- "ldrushar;": "\u294b",
- "ldsh;": "\u21b2",
- "le;": "\u2264",
- "leftarrow;": "\u2190",
- "leftarrowtail;": "\u21a2",
- "leftharpoondown;": "\u21bd",
- "leftharpoonup;": "\u21bc",
- "leftleftarrows;": "\u21c7",
- "leftrightarrow;": "\u2194",
- "leftrightarrows;": "\u21c6",
- "leftrightharpoons;": "\u21cb",
- "leftrightsquigarrow;": "\u21ad",
- "leftthreetimes;": "\u22cb",
- "leg;": "\u22da",
- "leq;": "\u2264",
- "leqq;": "\u2266",
- "leqslant;": "\u2a7d",
- "les;": "\u2a7d",
- "lescc;": "\u2aa8",
- "lesdot;": "\u2a7f",
- "lesdoto;": "\u2a81",
- "lesdotor;": "\u2a83",
- "lesg;": "\u22da\ufe00",
- "lesges;": "\u2a93",
- "lessapprox;": "\u2a85",
- "lessdot;": "\u22d6",
- "lesseqgtr;": "\u22da",
- "lesseqqgtr;": "\u2a8b",
- "lessgtr;": "\u2276",
- "lesssim;": "\u2272",
- "lfisht;": "\u297c",
- "lfloor;": "\u230a",
- "lfr;": "\U0001d529",
- "lg;": "\u2276",
- "lgE;": "\u2a91",
- "lhard;": "\u21bd",
- "lharu;": "\u21bc",
- "lharul;": "\u296a",
- "lhblk;": "\u2584",
- "ljcy;": "\u0459",
- "ll;": "\u226a",
- "llarr;": "\u21c7",
- "llcorner;": "\u231e",
- "llhard;": "\u296b",
- "lltri;": "\u25fa",
- "lmidot;": "\u0140",
- "lmoust;": "\u23b0",
- "lmoustache;": "\u23b0",
- "lnE;": "\u2268",
- "lnap;": "\u2a89",
- "lnapprox;": "\u2a89",
- "lne;": "\u2a87",
- "lneq;": "\u2a87",
- "lneqq;": "\u2268",
- "lnsim;": "\u22e6",
- "loang;": "\u27ec",
- "loarr;": "\u21fd",
- "lobrk;": "\u27e6",
- "longleftarrow;": "\u27f5",
- "longleftrightarrow;": "\u27f7",
- "longmapsto;": "\u27fc",
- "longrightarrow;": "\u27f6",
- "looparrowleft;": "\u21ab",
- "looparrowright;": "\u21ac",
- "lopar;": "\u2985",
- "lopf;": "\U0001d55d",
- "loplus;": "\u2a2d",
- "lotimes;": "\u2a34",
- "lowast;": "\u2217",
- "lowbar;": "_",
- "loz;": "\u25ca",
- "lozenge;": "\u25ca",
- "lozf;": "\u29eb",
- "lpar;": "(",
- "lparlt;": "\u2993",
- "lrarr;": "\u21c6",
- "lrcorner;": "\u231f",
- "lrhar;": "\u21cb",
- "lrhard;": "\u296d",
- "lrm;": "\u200e",
- "lrtri;": "\u22bf",
- "lsaquo;": "\u2039",
- "lscr;": "\U0001d4c1",
- "lsh;": "\u21b0",
- "lsim;": "\u2272",
- "lsime;": "\u2a8d",
- "lsimg;": "\u2a8f",
- "lsqb;": "[",
- "lsquo;": "\u2018",
- "lsquor;": "\u201a",
- "lstrok;": "\u0142",
- "lt": "<",
- "lt;": "<",
- "ltcc;": "\u2aa6",
- "ltcir;": "\u2a79",
- "ltdot;": "\u22d6",
- "lthree;": "\u22cb",
- "ltimes;": "\u22c9",
- "ltlarr;": "\u2976",
- "ltquest;": "\u2a7b",
- "ltrPar;": "\u2996",
- "ltri;": "\u25c3",
- "ltrie;": "\u22b4",
- "ltrif;": "\u25c2",
- "lurdshar;": "\u294a",
- "luruhar;": "\u2966",
- "lvertneqq;": "\u2268\ufe00",
- "lvnE;": "\u2268\ufe00",
- "mDDot;": "\u223a",
- "macr": "\xaf",
- "macr;": "\xaf",
- "male;": "\u2642",
- "malt;": "\u2720",
- "maltese;": "\u2720",
- "map;": "\u21a6",
- "mapsto;": "\u21a6",
- "mapstodown;": "\u21a7",
- "mapstoleft;": "\u21a4",
- "mapstoup;": "\u21a5",
- "marker;": "\u25ae",
- "mcomma;": "\u2a29",
- "mcy;": "\u043c",
- "mdash;": "\u2014",
- "measuredangle;": "\u2221",
- "mfr;": "\U0001d52a",
- "mho;": "\u2127",
- "micro": "\xb5",
- "micro;": "\xb5",
- "mid;": "\u2223",
- "midast;": "*",
- "midcir;": "\u2af0",
- "middot": "\xb7",
- "middot;": "\xb7",
- "minus;": "\u2212",
- "minusb;": "\u229f",
- "minusd;": "\u2238",
- "minusdu;": "\u2a2a",
- "mlcp;": "\u2adb",
- "mldr;": "\u2026",
- "mnplus;": "\u2213",
- "models;": "\u22a7",
- "mopf;": "\U0001d55e",
- "mp;": "\u2213",
- "mscr;": "\U0001d4c2",
- "mstpos;": "\u223e",
- "mu;": "\u03bc",
- "multimap;": "\u22b8",
- "mumap;": "\u22b8",
- "nGg;": "\u22d9\u0338",
- "nGt;": "\u226b\u20d2",
- "nGtv;": "\u226b\u0338",
- "nLeftarrow;": "\u21cd",
- "nLeftrightarrow;": "\u21ce",
- "nLl;": "\u22d8\u0338",
- "nLt;": "\u226a\u20d2",
- "nLtv;": "\u226a\u0338",
- "nRightarrow;": "\u21cf",
- "nVDash;": "\u22af",
- "nVdash;": "\u22ae",
- "nabla;": "\u2207",
- "nacute;": "\u0144",
- "nang;": "\u2220\u20d2",
- "nap;": "\u2249",
- "napE;": "\u2a70\u0338",
- "napid;": "\u224b\u0338",
- "napos;": "\u0149",
- "napprox;": "\u2249",
- "natur;": "\u266e",
- "natural;": "\u266e",
- "naturals;": "\u2115",
- "nbsp": "\xa0",
- "nbsp;": "\xa0",
- "nbump;": "\u224e\u0338",
- "nbumpe;": "\u224f\u0338",
- "ncap;": "\u2a43",
- "ncaron;": "\u0148",
- "ncedil;": "\u0146",
- "ncong;": "\u2247",
- "ncongdot;": "\u2a6d\u0338",
- "ncup;": "\u2a42",
- "ncy;": "\u043d",
- "ndash;": "\u2013",
- "ne;": "\u2260",
- "neArr;": "\u21d7",
- "nearhk;": "\u2924",
- "nearr;": "\u2197",
- "nearrow;": "\u2197",
- "nedot;": "\u2250\u0338",
- "nequiv;": "\u2262",
- "nesear;": "\u2928",
- "nesim;": "\u2242\u0338",
- "nexist;": "\u2204",
- "nexists;": "\u2204",
- "nfr;": "\U0001d52b",
- "ngE;": "\u2267\u0338",
- "nge;": "\u2271",
- "ngeq;": "\u2271",
- "ngeqq;": "\u2267\u0338",
- "ngeqslant;": "\u2a7e\u0338",
- "nges;": "\u2a7e\u0338",
- "ngsim;": "\u2275",
- "ngt;": "\u226f",
- "ngtr;": "\u226f",
- "nhArr;": "\u21ce",
- "nharr;": "\u21ae",
- "nhpar;": "\u2af2",
- "ni;": "\u220b",
- "nis;": "\u22fc",
- "nisd;": "\u22fa",
- "niv;": "\u220b",
- "njcy;": "\u045a",
- "nlArr;": "\u21cd",
- "nlE;": "\u2266\u0338",
- "nlarr;": "\u219a",
- "nldr;": "\u2025",
- "nle;": "\u2270",
- "nleftarrow;": "\u219a",
- "nleftrightarrow;": "\u21ae",
- "nleq;": "\u2270",
- "nleqq;": "\u2266\u0338",
- "nleqslant;": "\u2a7d\u0338",
- "nles;": "\u2a7d\u0338",
- "nless;": "\u226e",
- "nlsim;": "\u2274",
- "nlt;": "\u226e",
- "nltri;": "\u22ea",
- "nltrie;": "\u22ec",
- "nmid;": "\u2224",
- "nopf;": "\U0001d55f",
- "not": "\xac",
- "not;": "\xac",
- "notin;": "\u2209",
- "notinE;": "\u22f9\u0338",
- "notindot;": "\u22f5\u0338",
- "notinva;": "\u2209",
- "notinvb;": "\u22f7",
- "notinvc;": "\u22f6",
- "notni;": "\u220c",
- "notniva;": "\u220c",
- "notnivb;": "\u22fe",
- "notnivc;": "\u22fd",
- "npar;": "\u2226",
- "nparallel;": "\u2226",
- "nparsl;": "\u2afd\u20e5",
- "npart;": "\u2202\u0338",
- "npolint;": "\u2a14",
- "npr;": "\u2280",
- "nprcue;": "\u22e0",
- "npre;": "\u2aaf\u0338",
- "nprec;": "\u2280",
- "npreceq;": "\u2aaf\u0338",
- "nrArr;": "\u21cf",
- "nrarr;": "\u219b",
- "nrarrc;": "\u2933\u0338",
- "nrarrw;": "\u219d\u0338",
- "nrightarrow;": "\u219b",
- "nrtri;": "\u22eb",
- "nrtrie;": "\u22ed",
- "nsc;": "\u2281",
- "nsccue;": "\u22e1",
- "nsce;": "\u2ab0\u0338",
- "nscr;": "\U0001d4c3",
- "nshortmid;": "\u2224",
- "nshortparallel;": "\u2226",
- "nsim;": "\u2241",
- "nsime;": "\u2244",
- "nsimeq;": "\u2244",
- "nsmid;": "\u2224",
- "nspar;": "\u2226",
- "nsqsube;": "\u22e2",
- "nsqsupe;": "\u22e3",
- "nsub;": "\u2284",
- "nsubE;": "\u2ac5\u0338",
- "nsube;": "\u2288",
- "nsubset;": "\u2282\u20d2",
- "nsubseteq;": "\u2288",
- "nsubseteqq;": "\u2ac5\u0338",
- "nsucc;": "\u2281",
- "nsucceq;": "\u2ab0\u0338",
- "nsup;": "\u2285",
- "nsupE;": "\u2ac6\u0338",
- "nsupe;": "\u2289",
- "nsupset;": "\u2283\u20d2",
- "nsupseteq;": "\u2289",
- "nsupseteqq;": "\u2ac6\u0338",
- "ntgl;": "\u2279",
- "ntilde": "\xf1",
- "ntilde;": "\xf1",
- "ntlg;": "\u2278",
- "ntriangleleft;": "\u22ea",
- "ntrianglelefteq;": "\u22ec",
- "ntriangleright;": "\u22eb",
- "ntrianglerighteq;": "\u22ed",
- "nu;": "\u03bd",
- "num;": "#",
- "numero;": "\u2116",
- "numsp;": "\u2007",
- "nvDash;": "\u22ad",
- "nvHarr;": "\u2904",
- "nvap;": "\u224d\u20d2",
- "nvdash;": "\u22ac",
- "nvge;": "\u2265\u20d2",
- "nvgt;": ">\u20d2",
- "nvinfin;": "\u29de",
- "nvlArr;": "\u2902",
- "nvle;": "\u2264\u20d2",
- "nvlt;": "<\u20d2",
- "nvltrie;": "\u22b4\u20d2",
- "nvrArr;": "\u2903",
- "nvrtrie;": "\u22b5\u20d2",
- "nvsim;": "\u223c\u20d2",
- "nwArr;": "\u21d6",
- "nwarhk;": "\u2923",
- "nwarr;": "\u2196",
- "nwarrow;": "\u2196",
- "nwnear;": "\u2927",
- "oS;": "\u24c8",
- "oacute": "\xf3",
- "oacute;": "\xf3",
- "oast;": "\u229b",
- "ocir;": "\u229a",
- "ocirc": "\xf4",
- "ocirc;": "\xf4",
- "ocy;": "\u043e",
- "odash;": "\u229d",
- "odblac;": "\u0151",
- "odiv;": "\u2a38",
- "odot;": "\u2299",
- "odsold;": "\u29bc",
- "oelig;": "\u0153",
- "ofcir;": "\u29bf",
- "ofr;": "\U0001d52c",
- "ogon;": "\u02db",
- "ograve": "\xf2",
- "ograve;": "\xf2",
- "ogt;": "\u29c1",
- "ohbar;": "\u29b5",
- "ohm;": "\u03a9",
- "oint;": "\u222e",
- "olarr;": "\u21ba",
- "olcir;": "\u29be",
- "olcross;": "\u29bb",
- "oline;": "\u203e",
- "olt;": "\u29c0",
- "omacr;": "\u014d",
- "omega;": "\u03c9",
- "omicron;": "\u03bf",
- "omid;": "\u29b6",
- "ominus;": "\u2296",
- "oopf;": "\U0001d560",
- "opar;": "\u29b7",
- "operp;": "\u29b9",
- "oplus;": "\u2295",
- "or;": "\u2228",
- "orarr;": "\u21bb",
- "ord;": "\u2a5d",
- "order;": "\u2134",
- "orderof;": "\u2134",
- "ordf": "\xaa",
- "ordf;": "\xaa",
- "ordm": "\xba",
- "ordm;": "\xba",
- "origof;": "\u22b6",
- "oror;": "\u2a56",
- "orslope;": "\u2a57",
- "orv;": "\u2a5b",
- "oscr;": "\u2134",
- "oslash": "\xf8",
- "oslash;": "\xf8",
- "osol;": "\u2298",
- "otilde": "\xf5",
- "otilde;": "\xf5",
- "otimes;": "\u2297",
- "otimesas;": "\u2a36",
- "ouml": "\xf6",
- "ouml;": "\xf6",
- "ovbar;": "\u233d",
- "par;": "\u2225",
- "para": "\xb6",
- "para;": "\xb6",
- "parallel;": "\u2225",
- "parsim;": "\u2af3",
- "parsl;": "\u2afd",
- "part;": "\u2202",
- "pcy;": "\u043f",
- "percnt;": "%",
- "period;": ".",
- "permil;": "\u2030",
- "perp;": "\u22a5",
- "pertenk;": "\u2031",
- "pfr;": "\U0001d52d",
- "phi;": "\u03c6",
- "phiv;": "\u03d5",
- "phmmat;": "\u2133",
- "phone;": "\u260e",
- "pi;": "\u03c0",
- "pitchfork;": "\u22d4",
- "piv;": "\u03d6",
- "planck;": "\u210f",
- "planckh;": "\u210e",
- "plankv;": "\u210f",
- "plus;": "+",
- "plusacir;": "\u2a23",
- "plusb;": "\u229e",
- "pluscir;": "\u2a22",
- "plusdo;": "\u2214",
- "plusdu;": "\u2a25",
- "pluse;": "\u2a72",
- "plusmn": "\xb1",
- "plusmn;": "\xb1",
- "plussim;": "\u2a26",
- "plustwo;": "\u2a27",
- "pm;": "\xb1",
- "pointint;": "\u2a15",
- "popf;": "\U0001d561",
- "pound": "\xa3",
- "pound;": "\xa3",
- "pr;": "\u227a",
- "prE;": "\u2ab3",
- "prap;": "\u2ab7",
- "prcue;": "\u227c",
- "pre;": "\u2aaf",
- "prec;": "\u227a",
- "precapprox;": "\u2ab7",
- "preccurlyeq;": "\u227c",
- "preceq;": "\u2aaf",
- "precnapprox;": "\u2ab9",
- "precneqq;": "\u2ab5",
- "precnsim;": "\u22e8",
- "precsim;": "\u227e",
- "prime;": "\u2032",
- "primes;": "\u2119",
- "prnE;": "\u2ab5",
- "prnap;": "\u2ab9",
- "prnsim;": "\u22e8",
- "prod;": "\u220f",
- "profalar;": "\u232e",
- "profline;": "\u2312",
- "profsurf;": "\u2313",
- "prop;": "\u221d",
- "propto;": "\u221d",
- "prsim;": "\u227e",
- "prurel;": "\u22b0",
- "pscr;": "\U0001d4c5",
- "psi;": "\u03c8",
- "puncsp;": "\u2008",
- "qfr;": "\U0001d52e",
- "qint;": "\u2a0c",
- "qopf;": "\U0001d562",
- "qprime;": "\u2057",
- "qscr;": "\U0001d4c6",
- "quaternions;": "\u210d",
- "quatint;": "\u2a16",
- "quest;": "?",
- "questeq;": "\u225f",
- "quot": "\"",
- "quot;": "\"",
- "rAarr;": "\u21db",
- "rArr;": "\u21d2",
- "rAtail;": "\u291c",
- "rBarr;": "\u290f",
- "rHar;": "\u2964",
- "race;": "\u223d\u0331",
- "racute;": "\u0155",
- "radic;": "\u221a",
- "raemptyv;": "\u29b3",
- "rang;": "\u27e9",
- "rangd;": "\u2992",
- "range;": "\u29a5",
- "rangle;": "\u27e9",
- "raquo": "\xbb",
- "raquo;": "\xbb",
- "rarr;": "\u2192",
- "rarrap;": "\u2975",
- "rarrb;": "\u21e5",
- "rarrbfs;": "\u2920",
- "rarrc;": "\u2933",
- "rarrfs;": "\u291e",
- "rarrhk;": "\u21aa",
- "rarrlp;": "\u21ac",
- "rarrpl;": "\u2945",
- "rarrsim;": "\u2974",
- "rarrtl;": "\u21a3",
- "rarrw;": "\u219d",
- "ratail;": "\u291a",
- "ratio;": "\u2236",
- "rationals;": "\u211a",
- "rbarr;": "\u290d",
- "rbbrk;": "\u2773",
- "rbrace;": "}",
- "rbrack;": "]",
- "rbrke;": "\u298c",
- "rbrksld;": "\u298e",
- "rbrkslu;": "\u2990",
- "rcaron;": "\u0159",
- "rcedil;": "\u0157",
- "rceil;": "\u2309",
- "rcub;": "}",
- "rcy;": "\u0440",
- "rdca;": "\u2937",
- "rdldhar;": "\u2969",
- "rdquo;": "\u201d",
- "rdquor;": "\u201d",
- "rdsh;": "\u21b3",
- "real;": "\u211c",
- "realine;": "\u211b",
- "realpart;": "\u211c",
- "reals;": "\u211d",
- "rect;": "\u25ad",
- "reg": "\xae",
- "reg;": "\xae",
- "rfisht;": "\u297d",
- "rfloor;": "\u230b",
- "rfr;": "\U0001d52f",
- "rhard;": "\u21c1",
- "rharu;": "\u21c0",
- "rharul;": "\u296c",
- "rho;": "\u03c1",
- "rhov;": "\u03f1",
- "rightarrow;": "\u2192",
- "rightarrowtail;": "\u21a3",
- "rightharpoondown;": "\u21c1",
- "rightharpoonup;": "\u21c0",
- "rightleftarrows;": "\u21c4",
- "rightleftharpoons;": "\u21cc",
- "rightrightarrows;": "\u21c9",
- "rightsquigarrow;": "\u219d",
- "rightthreetimes;": "\u22cc",
- "ring;": "\u02da",
- "risingdotseq;": "\u2253",
- "rlarr;": "\u21c4",
- "rlhar;": "\u21cc",
- "rlm;": "\u200f",
- "rmoust;": "\u23b1",
- "rmoustache;": "\u23b1",
- "rnmid;": "\u2aee",
- "roang;": "\u27ed",
- "roarr;": "\u21fe",
- "robrk;": "\u27e7",
- "ropar;": "\u2986",
- "ropf;": "\U0001d563",
- "roplus;": "\u2a2e",
- "rotimes;": "\u2a35",
- "rpar;": ")",
- "rpargt;": "\u2994",
- "rppolint;": "\u2a12",
- "rrarr;": "\u21c9",
- "rsaquo;": "\u203a",
- "rscr;": "\U0001d4c7",
- "rsh;": "\u21b1",
- "rsqb;": "]",
- "rsquo;": "\u2019",
- "rsquor;": "\u2019",
- "rthree;": "\u22cc",
- "rtimes;": "\u22ca",
- "rtri;": "\u25b9",
- "rtrie;": "\u22b5",
- "rtrif;": "\u25b8",
- "rtriltri;": "\u29ce",
- "ruluhar;": "\u2968",
- "rx;": "\u211e",
- "sacute;": "\u015b",
- "sbquo;": "\u201a",
- "sc;": "\u227b",
- "scE;": "\u2ab4",
- "scap;": "\u2ab8",
- "scaron;": "\u0161",
- "sccue;": "\u227d",
- "sce;": "\u2ab0",
- "scedil;": "\u015f",
- "scirc;": "\u015d",
- "scnE;": "\u2ab6",
- "scnap;": "\u2aba",
- "scnsim;": "\u22e9",
- "scpolint;": "\u2a13",
- "scsim;": "\u227f",
- "scy;": "\u0441",
- "sdot;": "\u22c5",
- "sdotb;": "\u22a1",
- "sdote;": "\u2a66",
- "seArr;": "\u21d8",
- "searhk;": "\u2925",
- "searr;": "\u2198",
- "searrow;": "\u2198",
- "sect": "\xa7",
- "sect;": "\xa7",
- "semi;": ";",
- "seswar;": "\u2929",
- "setminus;": "\u2216",
- "setmn;": "\u2216",
- "sext;": "\u2736",
- "sfr;": "\U0001d530",
- "sfrown;": "\u2322",
- "sharp;": "\u266f",
- "shchcy;": "\u0449",
- "shcy;": "\u0448",
- "shortmid;": "\u2223",
- "shortparallel;": "\u2225",
- "shy": "\xad",
- "shy;": "\xad",
- "sigma;": "\u03c3",
- "sigmaf;": "\u03c2",
- "sigmav;": "\u03c2",
- "sim;": "\u223c",
- "simdot;": "\u2a6a",
- "sime;": "\u2243",
- "simeq;": "\u2243",
- "simg;": "\u2a9e",
- "simgE;": "\u2aa0",
- "siml;": "\u2a9d",
- "simlE;": "\u2a9f",
- "simne;": "\u2246",
- "simplus;": "\u2a24",
- "simrarr;": "\u2972",
- "slarr;": "\u2190",
- "smallsetminus;": "\u2216",
- "smashp;": "\u2a33",
- "smeparsl;": "\u29e4",
- "smid;": "\u2223",
- "smile;": "\u2323",
- "smt;": "\u2aaa",
- "smte;": "\u2aac",
- "smtes;": "\u2aac\ufe00",
- "softcy;": "\u044c",
- "sol;": "/",
- "solb;": "\u29c4",
- "solbar;": "\u233f",
- "sopf;": "\U0001d564",
- "spades;": "\u2660",
- "spadesuit;": "\u2660",
- "spar;": "\u2225",
- "sqcap;": "\u2293",
- "sqcaps;": "\u2293\ufe00",
- "sqcup;": "\u2294",
- "sqcups;": "\u2294\ufe00",
- "sqsub;": "\u228f",
- "sqsube;": "\u2291",
- "sqsubset;": "\u228f",
- "sqsubseteq;": "\u2291",
- "sqsup;": "\u2290",
- "sqsupe;": "\u2292",
- "sqsupset;": "\u2290",
- "sqsupseteq;": "\u2292",
- "squ;": "\u25a1",
- "square;": "\u25a1",
- "squarf;": "\u25aa",
- "squf;": "\u25aa",
- "srarr;": "\u2192",
- "sscr;": "\U0001d4c8",
- "ssetmn;": "\u2216",
- "ssmile;": "\u2323",
- "sstarf;": "\u22c6",
- "star;": "\u2606",
- "starf;": "\u2605",
- "straightepsilon;": "\u03f5",
- "straightphi;": "\u03d5",
- "strns;": "\xaf",
- "sub;": "\u2282",
- "subE;": "\u2ac5",
- "subdot;": "\u2abd",
- "sube;": "\u2286",
- "subedot;": "\u2ac3",
- "submult;": "\u2ac1",
- "subnE;": "\u2acb",
- "subne;": "\u228a",
- "subplus;": "\u2abf",
- "subrarr;": "\u2979",
- "subset;": "\u2282",
- "subseteq;": "\u2286",
- "subseteqq;": "\u2ac5",
- "subsetneq;": "\u228a",
- "subsetneqq;": "\u2acb",
- "subsim;": "\u2ac7",
- "subsub;": "\u2ad5",
- "subsup;": "\u2ad3",
- "succ;": "\u227b",
- "succapprox;": "\u2ab8",
- "succcurlyeq;": "\u227d",
- "succeq;": "\u2ab0",
- "succnapprox;": "\u2aba",
- "succneqq;": "\u2ab6",
- "succnsim;": "\u22e9",
- "succsim;": "\u227f",
- "sum;": "\u2211",
- "sung;": "\u266a",
- "sup1": "\xb9",
- "sup1;": "\xb9",
- "sup2": "\xb2",
- "sup2;": "\xb2",
- "sup3": "\xb3",
- "sup3;": "\xb3",
- "sup;": "\u2283",
- "supE;": "\u2ac6",
- "supdot;": "\u2abe",
- "supdsub;": "\u2ad8",
- "supe;": "\u2287",
- "supedot;": "\u2ac4",
- "suphsol;": "\u27c9",
- "suphsub;": "\u2ad7",
- "suplarr;": "\u297b",
- "supmult;": "\u2ac2",
- "supnE;": "\u2acc",
- "supne;": "\u228b",
- "supplus;": "\u2ac0",
- "supset;": "\u2283",
- "supseteq;": "\u2287",
- "supseteqq;": "\u2ac6",
- "supsetneq;": "\u228b",
- "supsetneqq;": "\u2acc",
- "supsim;": "\u2ac8",
- "supsub;": "\u2ad4",
- "supsup;": "\u2ad6",
- "swArr;": "\u21d9",
- "swarhk;": "\u2926",
- "swarr;": "\u2199",
- "swarrow;": "\u2199",
- "swnwar;": "\u292a",
- "szlig": "\xdf",
- "szlig;": "\xdf",
- "target;": "\u2316",
- "tau;": "\u03c4",
- "tbrk;": "\u23b4",
- "tcaron;": "\u0165",
- "tcedil;": "\u0163",
- "tcy;": "\u0442",
- "tdot;": "\u20db",
- "telrec;": "\u2315",
- "tfr;": "\U0001d531",
- "there4;": "\u2234",
- "therefore;": "\u2234",
- "theta;": "\u03b8",
- "thetasym;": "\u03d1",
- "thetav;": "\u03d1",
- "thickapprox;": "\u2248",
- "thicksim;": "\u223c",
- "thinsp;": "\u2009",
- "thkap;": "\u2248",
- "thksim;": "\u223c",
- "thorn": "\xfe",
- "thorn;": "\xfe",
- "tilde;": "\u02dc",
- "times": "\xd7",
- "times;": "\xd7",
- "timesb;": "\u22a0",
- "timesbar;": "\u2a31",
- "timesd;": "\u2a30",
- "tint;": "\u222d",
- "toea;": "\u2928",
- "top;": "\u22a4",
- "topbot;": "\u2336",
- "topcir;": "\u2af1",
- "topf;": "\U0001d565",
- "topfork;": "\u2ada",
- "tosa;": "\u2929",
- "tprime;": "\u2034",
- "trade;": "\u2122",
- "triangle;": "\u25b5",
- "triangledown;": "\u25bf",
- "triangleleft;": "\u25c3",
- "trianglelefteq;": "\u22b4",
- "triangleq;": "\u225c",
- "triangleright;": "\u25b9",
- "trianglerighteq;": "\u22b5",
- "tridot;": "\u25ec",
- "trie;": "\u225c",
- "triminus;": "\u2a3a",
- "triplus;": "\u2a39",
- "trisb;": "\u29cd",
- "tritime;": "\u2a3b",
- "trpezium;": "\u23e2",
- "tscr;": "\U0001d4c9",
- "tscy;": "\u0446",
- "tshcy;": "\u045b",
- "tstrok;": "\u0167",
- "twixt;": "\u226c",
- "twoheadleftarrow;": "\u219e",
- "twoheadrightarrow;": "\u21a0",
- "uArr;": "\u21d1",
- "uHar;": "\u2963",
- "uacute": "\xfa",
- "uacute;": "\xfa",
- "uarr;": "\u2191",
- "ubrcy;": "\u045e",
- "ubreve;": "\u016d",
- "ucirc": "\xfb",
- "ucirc;": "\xfb",
- "ucy;": "\u0443",
- "udarr;": "\u21c5",
- "udblac;": "\u0171",
- "udhar;": "\u296e",
- "ufisht;": "\u297e",
- "ufr;": "\U0001d532",
- "ugrave": "\xf9",
- "ugrave;": "\xf9",
- "uharl;": "\u21bf",
- "uharr;": "\u21be",
- "uhblk;": "\u2580",
- "ulcorn;": "\u231c",
- "ulcorner;": "\u231c",
- "ulcrop;": "\u230f",
- "ultri;": "\u25f8",
- "umacr;": "\u016b",
- "uml": "\xa8",
- "uml;": "\xa8",
- "uogon;": "\u0173",
- "uopf;": "\U0001d566",
- "uparrow;": "\u2191",
- "updownarrow;": "\u2195",
- "upharpoonleft;": "\u21bf",
- "upharpoonright;": "\u21be",
- "uplus;": "\u228e",
- "upsi;": "\u03c5",
- "upsih;": "\u03d2",
- "upsilon;": "\u03c5",
- "upuparrows;": "\u21c8",
- "urcorn;": "\u231d",
- "urcorner;": "\u231d",
- "urcrop;": "\u230e",
- "uring;": "\u016f",
- "urtri;": "\u25f9",
- "uscr;": "\U0001d4ca",
- "utdot;": "\u22f0",
- "utilde;": "\u0169",
- "utri;": "\u25b5",
- "utrif;": "\u25b4",
- "uuarr;": "\u21c8",
- "uuml": "\xfc",
- "uuml;": "\xfc",
- "uwangle;": "\u29a7",
- "vArr;": "\u21d5",
- "vBar;": "\u2ae8",
- "vBarv;": "\u2ae9",
- "vDash;": "\u22a8",
- "vangrt;": "\u299c",
- "varepsilon;": "\u03f5",
- "varkappa;": "\u03f0",
- "varnothing;": "\u2205",
- "varphi;": "\u03d5",
- "varpi;": "\u03d6",
- "varpropto;": "\u221d",
- "varr;": "\u2195",
- "varrho;": "\u03f1",
- "varsigma;": "\u03c2",
- "varsubsetneq;": "\u228a\ufe00",
- "varsubsetneqq;": "\u2acb\ufe00",
- "varsupsetneq;": "\u228b\ufe00",
- "varsupsetneqq;": "\u2acc\ufe00",
- "vartheta;": "\u03d1",
- "vartriangleleft;": "\u22b2",
- "vartriangleright;": "\u22b3",
- "vcy;": "\u0432",
- "vdash;": "\u22a2",
- "vee;": "\u2228",
- "veebar;": "\u22bb",
- "veeeq;": "\u225a",
- "vellip;": "\u22ee",
- "verbar;": "|",
- "vert;": "|",
- "vfr;": "\U0001d533",
- "vltri;": "\u22b2",
- "vnsub;": "\u2282\u20d2",
- "vnsup;": "\u2283\u20d2",
- "vopf;": "\U0001d567",
- "vprop;": "\u221d",
- "vrtri;": "\u22b3",
- "vscr;": "\U0001d4cb",
- "vsubnE;": "\u2acb\ufe00",
- "vsubne;": "\u228a\ufe00",
- "vsupnE;": "\u2acc\ufe00",
- "vsupne;": "\u228b\ufe00",
- "vzigzag;": "\u299a",
- "wcirc;": "\u0175",
- "wedbar;": "\u2a5f",
- "wedge;": "\u2227",
- "wedgeq;": "\u2259",
- "weierp;": "\u2118",
- "wfr;": "\U0001d534",
- "wopf;": "\U0001d568",
- "wp;": "\u2118",
- "wr;": "\u2240",
- "wreath;": "\u2240",
- "wscr;": "\U0001d4cc",
- "xcap;": "\u22c2",
- "xcirc;": "\u25ef",
- "xcup;": "\u22c3",
- "xdtri;": "\u25bd",
- "xfr;": "\U0001d535",
- "xhArr;": "\u27fa",
- "xharr;": "\u27f7",
- "xi;": "\u03be",
- "xlArr;": "\u27f8",
- "xlarr;": "\u27f5",
- "xmap;": "\u27fc",
- "xnis;": "\u22fb",
- "xodot;": "\u2a00",
- "xopf;": "\U0001d569",
- "xoplus;": "\u2a01",
- "xotime;": "\u2a02",
- "xrArr;": "\u27f9",
- "xrarr;": "\u27f6",
- "xscr;": "\U0001d4cd",
- "xsqcup;": "\u2a06",
- "xuplus;": "\u2a04",
- "xutri;": "\u25b3",
- "xvee;": "\u22c1",
- "xwedge;": "\u22c0",
- "yacute": "\xfd",
- "yacute;": "\xfd",
- "yacy;": "\u044f",
- "ycirc;": "\u0177",
- "ycy;": "\u044b",
- "yen": "\xa5",
- "yen;": "\xa5",
- "yfr;": "\U0001d536",
- "yicy;": "\u0457",
- "yopf;": "\U0001d56a",
- "yscr;": "\U0001d4ce",
- "yucy;": "\u044e",
- "yuml": "\xff",
- "yuml;": "\xff",
- "zacute;": "\u017a",
- "zcaron;": "\u017e",
- "zcy;": "\u0437",
- "zdot;": "\u017c",
- "zeetrf;": "\u2128",
- "zeta;": "\u03b6",
- "zfr;": "\U0001d537",
- "zhcy;": "\u0436",
- "zigrarr;": "\u21dd",
- "zopf;": "\U0001d56b",
- "zscr;": "\U0001d4cf",
- "zwj;": "\u200d",
- "zwnj;": "\u200c",
-}
-
-replacementCharacters = {
- 0x0: "\uFFFD",
- 0x0d: "\u000D",
- 0x80: "\u20AC",
- 0x81: "\u0081",
- 0x81: "\u0081",
- 0x82: "\u201A",
- 0x83: "\u0192",
- 0x84: "\u201E",
- 0x85: "\u2026",
- 0x86: "\u2020",
- 0x87: "\u2021",
- 0x88: "\u02C6",
- 0x89: "\u2030",
- 0x8A: "\u0160",
- 0x8B: "\u2039",
- 0x8C: "\u0152",
- 0x8D: "\u008D",
- 0x8E: "\u017D",
- 0x8F: "\u008F",
- 0x90: "\u0090",
- 0x91: "\u2018",
- 0x92: "\u2019",
- 0x93: "\u201C",
- 0x94: "\u201D",
- 0x95: "\u2022",
- 0x96: "\u2013",
- 0x97: "\u2014",
- 0x98: "\u02DC",
- 0x99: "\u2122",
- 0x9A: "\u0161",
- 0x9B: "\u203A",
- 0x9C: "\u0153",
- 0x9D: "\u009D",
- 0x9E: "\u017E",
- 0x9F: "\u0178",
-}
-
-encodings = {
- '437': 'cp437',
- '850': 'cp850',
- '852': 'cp852',
- '855': 'cp855',
- '857': 'cp857',
- '860': 'cp860',
- '861': 'cp861',
- '862': 'cp862',
- '863': 'cp863',
- '865': 'cp865',
- '866': 'cp866',
- '869': 'cp869',
- 'ansix341968': 'ascii',
- 'ansix341986': 'ascii',
- 'arabic': 'iso8859-6',
- 'ascii': 'ascii',
- 'asmo708': 'iso8859-6',
- 'big5': 'big5',
- 'big5hkscs': 'big5hkscs',
- 'chinese': 'gbk',
- 'cp037': 'cp037',
- 'cp1026': 'cp1026',
- 'cp154': 'ptcp154',
- 'cp367': 'ascii',
- 'cp424': 'cp424',
- 'cp437': 'cp437',
- 'cp500': 'cp500',
- 'cp775': 'cp775',
- 'cp819': 'windows-1252',
- 'cp850': 'cp850',
- 'cp852': 'cp852',
- 'cp855': 'cp855',
- 'cp857': 'cp857',
- 'cp860': 'cp860',
- 'cp861': 'cp861',
- 'cp862': 'cp862',
- 'cp863': 'cp863',
- 'cp864': 'cp864',
- 'cp865': 'cp865',
- 'cp866': 'cp866',
- 'cp869': 'cp869',
- 'cp936': 'gbk',
- 'cpgr': 'cp869',
- 'cpis': 'cp861',
- 'csascii': 'ascii',
- 'csbig5': 'big5',
- 'cseuckr': 'cp949',
- 'cseucpkdfmtjapanese': 'euc_jp',
- 'csgb2312': 'gbk',
- 'cshproman8': 'hp-roman8',
- 'csibm037': 'cp037',
- 'csibm1026': 'cp1026',
- 'csibm424': 'cp424',
- 'csibm500': 'cp500',
- 'csibm855': 'cp855',
- 'csibm857': 'cp857',
- 'csibm860': 'cp860',
- 'csibm861': 'cp861',
- 'csibm863': 'cp863',
- 'csibm864': 'cp864',
- 'csibm865': 'cp865',
- 'csibm866': 'cp866',
- 'csibm869': 'cp869',
- 'csiso2022jp': 'iso2022_jp',
- 'csiso2022jp2': 'iso2022_jp_2',
- 'csiso2022kr': 'iso2022_kr',
- 'csiso58gb231280': 'gbk',
- 'csisolatin1': 'windows-1252',
- 'csisolatin2': 'iso8859-2',
- 'csisolatin3': 'iso8859-3',
- 'csisolatin4': 'iso8859-4',
- 'csisolatin5': 'windows-1254',
- 'csisolatin6': 'iso8859-10',
- 'csisolatinarabic': 'iso8859-6',
- 'csisolatincyrillic': 'iso8859-5',
- 'csisolatingreek': 'iso8859-7',
- 'csisolatinhebrew': 'iso8859-8',
- 'cskoi8r': 'koi8-r',
- 'csksc56011987': 'cp949',
- 'cspc775baltic': 'cp775',
- 'cspc850multilingual': 'cp850',
- 'cspc862latinhebrew': 'cp862',
- 'cspc8codepage437': 'cp437',
- 'cspcp852': 'cp852',
- 'csptcp154': 'ptcp154',
- 'csshiftjis': 'shift_jis',
- 'csunicode11utf7': 'utf-7',
- 'cyrillic': 'iso8859-5',
- 'cyrillicasian': 'ptcp154',
- 'ebcdiccpbe': 'cp500',
- 'ebcdiccpca': 'cp037',
- 'ebcdiccpch': 'cp500',
- 'ebcdiccphe': 'cp424',
- 'ebcdiccpnl': 'cp037',
- 'ebcdiccpus': 'cp037',
- 'ebcdiccpwt': 'cp037',
- 'ecma114': 'iso8859-6',
- 'ecma118': 'iso8859-7',
- 'elot928': 'iso8859-7',
- 'eucjp': 'euc_jp',
- 'euckr': 'cp949',
- 'extendedunixcodepackedformatforjapanese': 'euc_jp',
- 'gb18030': 'gb18030',
- 'gb2312': 'gbk',
- 'gb231280': 'gbk',
- 'gbk': 'gbk',
- 'greek': 'iso8859-7',
- 'greek8': 'iso8859-7',
- 'hebrew': 'iso8859-8',
- 'hproman8': 'hp-roman8',
- 'hzgb2312': 'hz',
- 'ibm037': 'cp037',
- 'ibm1026': 'cp1026',
- 'ibm367': 'ascii',
- 'ibm424': 'cp424',
- 'ibm437': 'cp437',
- 'ibm500': 'cp500',
- 'ibm775': 'cp775',
- 'ibm819': 'windows-1252',
- 'ibm850': 'cp850',
- 'ibm852': 'cp852',
- 'ibm855': 'cp855',
- 'ibm857': 'cp857',
- 'ibm860': 'cp860',
- 'ibm861': 'cp861',
- 'ibm862': 'cp862',
- 'ibm863': 'cp863',
- 'ibm864': 'cp864',
- 'ibm865': 'cp865',
- 'ibm866': 'cp866',
- 'ibm869': 'cp869',
- 'iso2022jp': 'iso2022_jp',
- 'iso2022jp2': 'iso2022_jp_2',
- 'iso2022kr': 'iso2022_kr',
- 'iso646irv1991': 'ascii',
- 'iso646us': 'ascii',
- 'iso88591': 'windows-1252',
- 'iso885910': 'iso8859-10',
- 'iso8859101992': 'iso8859-10',
- 'iso885911987': 'windows-1252',
- 'iso885913': 'iso8859-13',
- 'iso885914': 'iso8859-14',
- 'iso8859141998': 'iso8859-14',
- 'iso885915': 'iso8859-15',
- 'iso885916': 'iso8859-16',
- 'iso8859162001': 'iso8859-16',
- 'iso88592': 'iso8859-2',
- 'iso885921987': 'iso8859-2',
- 'iso88593': 'iso8859-3',
- 'iso885931988': 'iso8859-3',
- 'iso88594': 'iso8859-4',
- 'iso885941988': 'iso8859-4',
- 'iso88595': 'iso8859-5',
- 'iso885951988': 'iso8859-5',
- 'iso88596': 'iso8859-6',
- 'iso885961987': 'iso8859-6',
- 'iso88597': 'iso8859-7',
- 'iso885971987': 'iso8859-7',
- 'iso88598': 'iso8859-8',
- 'iso885981988': 'iso8859-8',
- 'iso88599': 'windows-1254',
- 'iso885991989': 'windows-1254',
- 'isoceltic': 'iso8859-14',
- 'isoir100': 'windows-1252',
- 'isoir101': 'iso8859-2',
- 'isoir109': 'iso8859-3',
- 'isoir110': 'iso8859-4',
- 'isoir126': 'iso8859-7',
- 'isoir127': 'iso8859-6',
- 'isoir138': 'iso8859-8',
- 'isoir144': 'iso8859-5',
- 'isoir148': 'windows-1254',
- 'isoir149': 'cp949',
- 'isoir157': 'iso8859-10',
- 'isoir199': 'iso8859-14',
- 'isoir226': 'iso8859-16',
- 'isoir58': 'gbk',
- 'isoir6': 'ascii',
- 'koi8r': 'koi8-r',
- 'koi8u': 'koi8-u',
- 'korean': 'cp949',
- 'ksc5601': 'cp949',
- 'ksc56011987': 'cp949',
- 'ksc56011989': 'cp949',
- 'l1': 'windows-1252',
- 'l10': 'iso8859-16',
- 'l2': 'iso8859-2',
- 'l3': 'iso8859-3',
- 'l4': 'iso8859-4',
- 'l5': 'windows-1254',
- 'l6': 'iso8859-10',
- 'l8': 'iso8859-14',
- 'latin1': 'windows-1252',
- 'latin10': 'iso8859-16',
- 'latin2': 'iso8859-2',
- 'latin3': 'iso8859-3',
- 'latin4': 'iso8859-4',
- 'latin5': 'windows-1254',
- 'latin6': 'iso8859-10',
- 'latin8': 'iso8859-14',
- 'latin9': 'iso8859-15',
- 'ms936': 'gbk',
- 'mskanji': 'shift_jis',
- 'pt154': 'ptcp154',
- 'ptcp154': 'ptcp154',
- 'r8': 'hp-roman8',
- 'roman8': 'hp-roman8',
- 'shiftjis': 'shift_jis',
- 'tis620': 'cp874',
- 'unicode11utf7': 'utf-7',
- 'us': 'ascii',
- 'usascii': 'ascii',
- 'utf16': 'utf-16',
- 'utf16be': 'utf-16-be',
- 'utf16le': 'utf-16-le',
- 'utf8': 'utf-8',
- 'windows1250': 'cp1250',
- 'windows1251': 'cp1251',
- 'windows1252': 'cp1252',
- 'windows1253': 'cp1253',
- 'windows1254': 'cp1254',
- 'windows1255': 'cp1255',
- 'windows1256': 'cp1256',
- 'windows1257': 'cp1257',
- 'windows1258': 'cp1258',
- 'windows936': 'gbk',
- 'x-x-big5': 'big5'}
-
-tokenTypes = {
- "Doctype": 0,
- "Characters": 1,
- "SpaceCharacters": 2,
- "StartTag": 3,
- "EndTag": 4,
- "EmptyTag": 5,
- "Comment": 6,
- "ParseError": 7
-}
-
-tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
- tokenTypes["EmptyTag"]))
-
-
-prefixes = dict([(v, k) for k, v in namespaces.items()])
-prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
-
-
-class DataLossWarning(UserWarning):
- pass
-
-
-class ReparseException(Exception):
- pass
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/__init__.py
+++ /dev/null
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py
deleted file mode 100644
index c7dbaed..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/_base.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-class Filter(object):
- def __init__(self, source):
- self.source = source
-
- def __iter__(self):
- return iter(self.source)
-
- def __getattr__(self, name):
- return getattr(self.source, name)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py
deleted file mode 100644
index fed6996..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-try:
- from collections import OrderedDict
-except ImportError:
- from ordereddict import OrderedDict
-
-
-class Filter(_base.Filter):
- def __iter__(self):
- for token in _base.Filter.__iter__(self):
- if token["type"] in ("StartTag", "EmptyTag"):
- attrs = OrderedDict()
- for name, value in sorted(token["data"].items(),
- key=lambda x: x[0]):
- attrs[name] = value
- token["data"] = attrs
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py
deleted file mode 100644
index ca33b70..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-
-class Filter(_base.Filter):
- def __init__(self, source, encoding):
- _base.Filter.__init__(self, source)
- self.encoding = encoding
-
- def __iter__(self):
- state = "pre_head"
- meta_found = (self.encoding is None)
- pending = []
-
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type == "StartTag":
- if token["name"].lower() == "head":
- state = "in_head"
-
- elif type == "EmptyTag":
- if token["name"].lower() == "meta":
- # replace charset with actual encoding
- has_http_equiv_content_type = False
- for (namespace, name), value in token["data"].items():
- if namespace is not None:
- continue
- elif name.lower() == 'charset':
- token["data"][(namespace, name)] = self.encoding
- meta_found = True
- break
- elif name == 'http-equiv' and value.lower() == 'content-type':
- has_http_equiv_content_type = True
- else:
- if has_http_equiv_content_type and (None, "content") in token["data"]:
- token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
- meta_found = True
-
- elif token["name"].lower() == "head" and not meta_found:
- # insert meta into empty head
- yield {"type": "StartTag", "name": "head",
- "data": token["data"]}
- yield {"type": "EmptyTag", "name": "meta",
- "data": {(None, "charset"): self.encoding}}
- yield {"type": "EndTag", "name": "head"}
- meta_found = True
- continue
-
- elif type == "EndTag":
- if token["name"].lower() == "head" and pending:
- # insert meta into head (if necessary) and flush pending queue
- yield pending.pop(0)
- if not meta_found:
- yield {"type": "EmptyTag", "name": "meta",
- "data": {(None, "charset"): self.encoding}}
- while pending:
- yield pending.pop(0)
- meta_found = True
- state = "post_head"
-
- if state == "in_head":
- pending.append(token)
- else:
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py
deleted file mode 100644
index 83ad639..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/lint.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from gettext import gettext
-_ = gettext
-
-from . import _base
-from ..constants import cdataElements, rcdataElements, voidElements
-
-from ..constants import spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-class LintError(Exception):
- pass
-
-
-class Filter(_base.Filter):
- def __iter__(self):
- open_elements = []
- contentModelFlag = "PCDATA"
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type in ("StartTag", "EmptyTag"):
- name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
- if not name:
- raise LintError(_("Empty tag name"))
- if type == "StartTag" and name in voidElements:
- raise LintError(_("Void element reported as StartTag token: %s") % name)
- elif type == "EmptyTag" and name not in voidElements:
- raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"])
- if type == "StartTag":
- open_elements.append(name)
- for name, value in token["data"]:
- if not isinstance(name, str):
- raise LintError(_("Attribute name is not a string: %r") % name)
- if not name:
- raise LintError(_("Empty attribute name"))
- if not isinstance(value, str):
- raise LintError(_("Attribute value is not a string: %r") % value)
- if name in cdataElements:
- contentModelFlag = "CDATA"
- elif name in rcdataElements:
- contentModelFlag = "RCDATA"
- elif name == "plaintext":
- contentModelFlag = "PLAINTEXT"
-
- elif type == "EndTag":
- name = token["name"]
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
- if not name:
- raise LintError(_("Empty tag name"))
- if name in voidElements:
- raise LintError(_("Void element reported as EndTag token: %s") % name)
- start_name = open_elements.pop()
- if start_name != name:
- raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name))
- contentModelFlag = "PCDATA"
-
- elif type == "Comment":
- if contentModelFlag != "PCDATA":
- raise LintError(_("Comment not in PCDATA content model flag"))
-
- elif type in ("Characters", "SpaceCharacters"):
- data = token["data"]
- if not isinstance(data, str):
- raise LintError(_("Attribute name is not a string: %r") % data)
- if not data:
- raise LintError(_("%s token with empty data") % type)
- if type == "SpaceCharacters":
- data = data.strip(spaceCharacters)
- if data:
- raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data)
-
- elif type == "Doctype":
- name = token["name"]
- if contentModelFlag != "PCDATA":
- raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
- if not isinstance(name, str):
- raise LintError(_("Tag name is not a string: %r") % name)
- # XXX: what to do with token["data"] ?
-
- elif type in ("ParseError", "SerializeError"):
- pass
-
- else:
- raise LintError(_("Unknown token type: %s") % type)
-
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py
deleted file mode 100644
index fefe0b3..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py
+++ /dev/null
@@ -1,205 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-
-
-class Filter(_base.Filter):
- def slider(self):
- previous1 = previous2 = None
- for token in self.source:
- if previous1 is not None:
- yield previous2, previous1, token
- previous2 = previous1
- previous1 = token
- yield previous2, previous1, None
-
- def __iter__(self):
- for previous, token, next in self.slider():
- type = token["type"]
- if type == "StartTag":
- if (token["data"] or
- not self.is_optional_start(token["name"], previous, next)):
- yield token
- elif type == "EndTag":
- if not self.is_optional_end(token["name"], next):
- yield token
- else:
- yield token
-
- def is_optional_start(self, tagname, previous, next):
- type = next and next["type"] or None
- if tagname in 'html':
- # An html element's start tag may be omitted if the first thing
- # inside the html element is not a space character or a comment.
- return type not in ("Comment", "SpaceCharacters")
- elif tagname == 'head':
- # A head element's start tag may be omitted if the first thing
- # inside the head element is an element.
- # XXX: we also omit the start tag if the head element is empty
- if type in ("StartTag", "EmptyTag"):
- return True
- elif type == "EndTag":
- return next["name"] == "head"
- elif tagname == 'body':
- # A body element's start tag may be omitted if the first thing
- # inside the body element is not a space character or a comment,
- # except if the first thing inside the body element is a script
- # or style element and the node immediately preceding the body
- # element is a head element whose end tag has been omitted.
- if type in ("Comment", "SpaceCharacters"):
- return False
- elif type == "StartTag":
- # XXX: we do not look at the preceding event, so we never omit
- # the body element's start tag if it's followed by a script or
- # a style element.
- return next["name"] not in ('script', 'style')
- else:
- return True
- elif tagname == 'colgroup':
- # A colgroup element's start tag may be omitted if the first thing
- # inside the colgroup element is a col element, and if the element
- # is not immediately preceeded by another colgroup element whose
- # end tag has been omitted.
- if type in ("StartTag", "EmptyTag"):
- # XXX: we do not look at the preceding event, so instead we never
- # omit the colgroup element's end tag when it is immediately
- # followed by another colgroup element. See is_optional_end.
- return next["name"] == "col"
- else:
- return False
- elif tagname == 'tbody':
- # A tbody element's start tag may be omitted if the first thing
- # inside the tbody element is a tr element, and if the element is
- # not immediately preceeded by a tbody, thead, or tfoot element
- # whose end tag has been omitted.
- if type == "StartTag":
- # omit the thead and tfoot elements' end tag when they are
- # immediately followed by a tbody element. See is_optional_end.
- if previous and previous['type'] == 'EndTag' and \
- previous['name'] in ('tbody', 'thead', 'tfoot'):
- return False
- return next["name"] == 'tr'
- else:
- return False
- return False
-
- def is_optional_end(self, tagname, next):
- type = next and next["type"] or None
- if tagname in ('html', 'head', 'body'):
- # An html element's end tag may be omitted if the html element
- # is not immediately followed by a space character or a comment.
- return type not in ("Comment", "SpaceCharacters")
- elif tagname in ('li', 'optgroup', 'tr'):
- # A li element's end tag may be omitted if the li element is
- # immediately followed by another li element or if there is
- # no more content in the parent element.
- # An optgroup element's end tag may be omitted if the optgroup
- # element is immediately followed by another optgroup element,
- # or if there is no more content in the parent element.
- # A tr element's end tag may be omitted if the tr element is
- # immediately followed by another tr element, or if there is
- # no more content in the parent element.
- if type == "StartTag":
- return next["name"] == tagname
- else:
- return type == "EndTag" or type is None
- elif tagname in ('dt', 'dd'):
- # A dt element's end tag may be omitted if the dt element is
- # immediately followed by another dt element or a dd element.
- # A dd element's end tag may be omitted if the dd element is
- # immediately followed by another dd element or a dt element,
- # or if there is no more content in the parent element.
- if type == "StartTag":
- return next["name"] in ('dt', 'dd')
- elif tagname == 'dd':
- return type == "EndTag" or type is None
- else:
- return False
- elif tagname == 'p':
- # A p element's end tag may be omitted if the p element is
- # immediately followed by an address, article, aside,
- # blockquote, datagrid, dialog, dir, div, dl, fieldset,
- # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
- # nav, ol, p, pre, section, table, or ul, element, or if
- # there is no more content in the parent element.
- if type in ("StartTag", "EmptyTag"):
- return next["name"] in ('address', 'article', 'aside',
- 'blockquote', 'datagrid', 'dialog',
- 'dir', 'div', 'dl', 'fieldset', 'footer',
- 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
- 'header', 'hr', 'menu', 'nav', 'ol',
- 'p', 'pre', 'section', 'table', 'ul')
- else:
- return type == "EndTag" or type is None
- elif tagname == 'option':
- # An option element's end tag may be omitted if the option
- # element is immediately followed by another option element,
- # or if it is immediately followed by an <code>optgroup</code>
- # element, or if there is no more content in the parent
- # element.
- if type == "StartTag":
- return next["name"] in ('option', 'optgroup')
- else:
- return type == "EndTag" or type is None
- elif tagname in ('rt', 'rp'):
- # An rt element's end tag may be omitted if the rt element is
- # immediately followed by an rt or rp element, or if there is
- # no more content in the parent element.
- # An rp element's end tag may be omitted if the rp element is
- # immediately followed by an rt or rp element, or if there is
- # no more content in the parent element.
- if type == "StartTag":
- return next["name"] in ('rt', 'rp')
- else:
- return type == "EndTag" or type is None
- elif tagname == 'colgroup':
- # A colgroup element's end tag may be omitted if the colgroup
- # element is not immediately followed by a space character or
- # a comment.
- if type in ("Comment", "SpaceCharacters"):
- return False
- elif type == "StartTag":
- # XXX: we also look for an immediately following colgroup
- # element. See is_optional_start.
- return next["name"] != 'colgroup'
- else:
- return True
- elif tagname in ('thead', 'tbody'):
- # A thead element's end tag may be omitted if the thead element
- # is immediately followed by a tbody or tfoot element.
- # A tbody element's end tag may be omitted if the tbody element
- # is immediately followed by a tbody or tfoot element, or if
- # there is no more content in the parent element.
- # A tfoot element's end tag may be omitted if the tfoot element
- # is immediately followed by a tbody element, or if there is no
- # more content in the parent element.
- # XXX: we never omit the end tag when the following element is
- # a tbody. See is_optional_start.
- if type == "StartTag":
- return next["name"] in ['tbody', 'tfoot']
- elif tagname == 'tbody':
- return type == "EndTag" or type is None
- else:
- return False
- elif tagname == 'tfoot':
- # A tfoot element's end tag may be omitted if the tfoot element
- # is immediately followed by a tbody element, or if there is no
- # more content in the parent element.
- # XXX: we never omit the end tag when the following element is
- # a tbody. See is_optional_start.
- if type == "StartTag":
- return next["name"] == 'tbody'
- else:
- return type == "EndTag" or type is None
- elif tagname in ('td', 'th'):
- # A td element's end tag may be omitted if the td element is
- # immediately followed by a td or th element, or if there is
- # no more content in the parent element.
- # A th element's end tag may be omitted if the th element is
- # immediately followed by a td or th element, or if there is
- # no more content in the parent element.
- if type == "StartTag":
- return next["name"] in ('td', 'th')
- else:
- return type == "EndTag" or type is None
- return False
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
deleted file mode 100644
index b206b54..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import _base
-from ..sanitizer import HTMLSanitizerMixin
-
-
-class Filter(_base.Filter, HTMLSanitizerMixin):
- def __iter__(self):
- for token in _base.Filter.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py
deleted file mode 100644
index dfc60ee..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-
-from . import _base
-from ..constants import rcdataElements, spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
-
-
-class Filter(_base.Filter):
-
- spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
-
- def __iter__(self):
- preserve = 0
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type == "StartTag" \
- and (preserve or token["name"] in self.spacePreserveElements):
- preserve += 1
-
- elif type == "EndTag" and preserve:
- preserve -= 1
-
- elif not preserve and type == "SpaceCharacters" and token["data"]:
- # Test on token["data"] above to not introduce spaces where there were not
- token["data"] = " "
-
- elif not preserve and type == "Characters":
- token["data"] = collapse_spaces(token["data"])
-
- yield token
-
-
-def collapse_spaces(text):
- return SPACES_REGEX.sub(' ', text)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/html5parser.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/html5parser.py
deleted file mode 100644
index 8a5acfe..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/html5parser.py
+++ /dev/null
@@ -1,2725 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import with_metaclass
-
-import types
-
-from . import inputstream
-from . import tokenizer
-
-from . import treebuilders
-from .treebuilders._base import Marker
-
-from . import utils
-from . import constants
-from .constants import spaceCharacters, asciiUpper2Lower
-from .constants import specialElements
-from .constants import headingElements
-from .constants import cdataElements, rcdataElements
-from .constants import tokenTypes, ReparseException, namespaces
-from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
-
-
-def parse(doc, treebuilder="etree", encoding=None,
- namespaceHTMLElements=True):
- """Parse a string or file-like object into a tree"""
- tb = treebuilders.getTreeBuilder(treebuilder)
- p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
- return p.parse(doc, encoding=encoding)
-
-
-def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
- namespaceHTMLElements=True):
- tb = treebuilders.getTreeBuilder(treebuilder)
- p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
- return p.parseFragment(doc, container=container, encoding=encoding)
-
-
-def method_decorator_metaclass(function):
- class Decorated(type):
- def __new__(meta, classname, bases, classDict):
- for attributeName, attribute in classDict.items():
- if isinstance(attribute, types.FunctionType):
- attribute = function(attribute)
-
- classDict[attributeName] = attribute
- return type.__new__(meta, classname, bases, classDict)
- return Decorated
-
-
-class HTMLParser(object):
- """HTML parser. Generates a tree structure from a stream of (possibly
- malformed) HTML"""
-
- def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
- strict=False, namespaceHTMLElements=True, debug=False):
- """
- strict - raise an exception when a parse error is encountered
-
- tree - a treebuilder class controlling the type of tree that will be
- returned. Built in treebuilders can be accessed through
- html5lib.treebuilders.getTreeBuilder(treeType)
-
- tokenizer - a class that provides a stream of tokens to the treebuilder.
- This may be replaced for e.g. a sanitizer which converts some tags to
- text
- """
-
- # Raise an exception on the first error encountered
- self.strict = strict
-
- if tree is None:
- tree = treebuilders.getTreeBuilder("etree")
- self.tree = tree(namespaceHTMLElements)
- self.tokenizer_class = tokenizer
- self.errors = []
-
- self.phases = dict([(name, cls(self, self.tree)) for name, cls in
- getPhases(debug).items()])
-
- def _parse(self, stream, innerHTML=False, container="div",
- encoding=None, parseMeta=True, useChardet=True, **kwargs):
-
- self.innerHTMLMode = innerHTML
- self.container = container
- self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
- parseMeta=parseMeta,
- useChardet=useChardet,
- parser=self, **kwargs)
- self.reset()
-
- while True:
- try:
- self.mainLoop()
- break
- except ReparseException:
- self.reset()
-
- def reset(self):
- self.tree.reset()
- self.firstStartTag = False
- self.errors = []
- self.log = [] # only used with debug mode
- # "quirks" / "limited quirks" / "no quirks"
- self.compatMode = "no quirks"
-
- if self.innerHTMLMode:
- self.innerHTML = self.container.lower()
-
- if self.innerHTML in cdataElements:
- self.tokenizer.state = self.tokenizer.rcdataState
- elif self.innerHTML in rcdataElements:
- self.tokenizer.state = self.tokenizer.rawtextState
- elif self.innerHTML == 'plaintext':
- self.tokenizer.state = self.tokenizer.plaintextState
- else:
- # state already is data state
- # self.tokenizer.state = self.tokenizer.dataState
- pass
- self.phase = self.phases["beforeHtml"]
- self.phase.insertHtmlElement()
- self.resetInsertionMode()
- else:
- self.innerHTML = False
- self.phase = self.phases["initial"]
-
- self.lastPhase = None
-
- self.beforeRCDataPhase = None
-
- self.framesetOK = True
-
- def isHTMLIntegrationPoint(self, element):
- if (element.name == "annotation-xml" and
- element.namespace == namespaces["mathml"]):
- return ("encoding" in element.attributes and
- element.attributes["encoding"].translate(
- asciiUpper2Lower) in
- ("text/html", "application/xhtml+xml"))
- else:
- return (element.namespace, element.name) in htmlIntegrationPointElements
-
- def isMathMLTextIntegrationPoint(self, element):
- return (element.namespace, element.name) in mathmlTextIntegrationPointElements
-
- def mainLoop(self):
- CharactersToken = tokenTypes["Characters"]
- SpaceCharactersToken = tokenTypes["SpaceCharacters"]
- StartTagToken = tokenTypes["StartTag"]
- EndTagToken = tokenTypes["EndTag"]
- CommentToken = tokenTypes["Comment"]
- DoctypeToken = tokenTypes["Doctype"]
- ParseErrorToken = tokenTypes["ParseError"]
-
- for token in self.normalizedTokens():
- new_token = token
- while new_token is not None:
- currentNode = self.tree.openElements[-1] if self.tree.openElements else None
- currentNodeNamespace = currentNode.namespace if currentNode else None
- currentNodeName = currentNode.name if currentNode else None
-
- type = new_token["type"]
-
- if type == ParseErrorToken:
- self.parseError(new_token["data"], new_token.get("datavars", {}))
- new_token = None
- else:
- if (len(self.tree.openElements) == 0 or
- currentNodeNamespace == self.tree.defaultNamespace or
- (self.isMathMLTextIntegrationPoint(currentNode) and
- ((type == StartTagToken and
- token["name"] not in frozenset(["mglyph", "malignmark"])) or
- type in (CharactersToken, SpaceCharactersToken))) or
- (currentNodeNamespace == namespaces["mathml"] and
- currentNodeName == "annotation-xml" and
- token["name"] == "svg") or
- (self.isHTMLIntegrationPoint(currentNode) and
- type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
- phase = self.phase
- else:
- phase = self.phases["inForeignContent"]
-
- if type == CharactersToken:
- new_token = phase.processCharacters(new_token)
- elif type == SpaceCharactersToken:
- new_token = phase.processSpaceCharacters(new_token)
- elif type == StartTagToken:
- new_token = phase.processStartTag(new_token)
- elif type == EndTagToken:
- new_token = phase.processEndTag(new_token)
- elif type == CommentToken:
- new_token = phase.processComment(new_token)
- elif type == DoctypeToken:
- new_token = phase.processDoctype(new_token)
-
- if (type == StartTagToken and token["selfClosing"]
- and not token["selfClosingAcknowledged"]):
- self.parseError("non-void-element-with-trailing-solidus",
- {"name": token["name"]})
-
- # When the loop finishes it's EOF
- reprocess = True
- phases = []
- while reprocess:
- phases.append(self.phase)
- reprocess = self.phase.processEOF()
- if reprocess:
- assert self.phase not in phases
-
- def normalizedTokens(self):
- for token in self.tokenizer:
- yield self.normalizeToken(token)
-
- def parse(self, stream, encoding=None, parseMeta=True, useChardet=True):
- """Parse a HTML document into a well-formed tree
-
- stream - a filelike object or string containing the HTML to be parsed
-
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
- """
- self._parse(stream, innerHTML=False, encoding=encoding,
- parseMeta=parseMeta, useChardet=useChardet)
- return self.tree.getDocument()
-
- def parseFragment(self, stream, container="div", encoding=None,
- parseMeta=False, useChardet=True):
- """Parse a HTML fragment into a well-formed tree fragment
-
- container - name of the element we're setting the innerHTML property
- if set to None, default to 'div'
-
- stream - a filelike object or string containing the HTML to be parsed
-
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
- """
- self._parse(stream, True, container=container, encoding=encoding)
- return self.tree.getFragment()
-
- def parseError(self, errorcode="XXX-undefined-error", datavars={}):
- # XXX The idea is to make errorcode mandatory.
- self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
- if self.strict:
- raise ParseError
-
- def normalizeToken(self, token):
- """ HTML5 specific normalizations to the token stream """
-
- if token["type"] == tokenTypes["StartTag"]:
- token["data"] = dict(token["data"][::-1])
-
- return token
-
- def adjustMathMLAttributes(self, token):
- replacements = {"definitionurl": "definitionURL"}
- for k, v in replacements.items():
- if k in token["data"]:
- token["data"][v] = token["data"][k]
- del token["data"][k]
-
- def adjustSVGAttributes(self, token):
- replacements = {
- "attributename": "attributeName",
- "attributetype": "attributeType",
- "basefrequency": "baseFrequency",
- "baseprofile": "baseProfile",
- "calcmode": "calcMode",
- "clippathunits": "clipPathUnits",
- "contentscripttype": "contentScriptType",
- "contentstyletype": "contentStyleType",
- "diffuseconstant": "diffuseConstant",
- "edgemode": "edgeMode",
- "externalresourcesrequired": "externalResourcesRequired",
- "filterres": "filterRes",
- "filterunits": "filterUnits",
- "glyphref": "glyphRef",
- "gradienttransform": "gradientTransform",
- "gradientunits": "gradientUnits",
- "kernelmatrix": "kernelMatrix",
- "kernelunitlength": "kernelUnitLength",
- "keypoints": "keyPoints",
- "keysplines": "keySplines",
- "keytimes": "keyTimes",
- "lengthadjust": "lengthAdjust",
- "limitingconeangle": "limitingConeAngle",
- "markerheight": "markerHeight",
- "markerunits": "markerUnits",
- "markerwidth": "markerWidth",
- "maskcontentunits": "maskContentUnits",
- "maskunits": "maskUnits",
- "numoctaves": "numOctaves",
- "pathlength": "pathLength",
- "patterncontentunits": "patternContentUnits",
- "patterntransform": "patternTransform",
- "patternunits": "patternUnits",
- "pointsatx": "pointsAtX",
- "pointsaty": "pointsAtY",
- "pointsatz": "pointsAtZ",
- "preservealpha": "preserveAlpha",
- "preserveaspectratio": "preserveAspectRatio",
- "primitiveunits": "primitiveUnits",
- "refx": "refX",
- "refy": "refY",
- "repeatcount": "repeatCount",
- "repeatdur": "repeatDur",
- "requiredextensions": "requiredExtensions",
- "requiredfeatures": "requiredFeatures",
- "specularconstant": "specularConstant",
- "specularexponent": "specularExponent",
- "spreadmethod": "spreadMethod",
- "startoffset": "startOffset",
- "stddeviation": "stdDeviation",
- "stitchtiles": "stitchTiles",
- "surfacescale": "surfaceScale",
- "systemlanguage": "systemLanguage",
- "tablevalues": "tableValues",
- "targetx": "targetX",
- "targety": "targetY",
- "textlength": "textLength",
- "viewbox": "viewBox",
- "viewtarget": "viewTarget",
- "xchannelselector": "xChannelSelector",
- "ychannelselector": "yChannelSelector",
- "zoomandpan": "zoomAndPan"
- }
- for originalName in list(token["data"].keys()):
- if originalName in replacements:
- svgName = replacements[originalName]
- token["data"][svgName] = token["data"][originalName]
- del token["data"][originalName]
-
- def adjustForeignAttributes(self, token):
- replacements = {
- "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
- "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
- "xlink:href": ("xlink", "href", namespaces["xlink"]),
- "xlink:role": ("xlink", "role", namespaces["xlink"]),
- "xlink:show": ("xlink", "show", namespaces["xlink"]),
- "xlink:title": ("xlink", "title", namespaces["xlink"]),
- "xlink:type": ("xlink", "type", namespaces["xlink"]),
- "xml:base": ("xml", "base", namespaces["xml"]),
- "xml:lang": ("xml", "lang", namespaces["xml"]),
- "xml:space": ("xml", "space", namespaces["xml"]),
- "xmlns": (None, "xmlns", namespaces["xmlns"]),
- "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
- }
-
- for originalName in token["data"].keys():
- if originalName in replacements:
- foreignName = replacements[originalName]
- token["data"][foreignName] = token["data"][originalName]
- del token["data"][originalName]
-
- def reparseTokenNormal(self, token):
- self.parser.phase()
-
- def resetInsertionMode(self):
- # The name of this method is mostly historical. (It's also used in the
- # specification.)
- last = False
- newModes = {
- "select": "inSelect",
- "td": "inCell",
- "th": "inCell",
- "tr": "inRow",
- "tbody": "inTableBody",
- "thead": "inTableBody",
- "tfoot": "inTableBody",
- "caption": "inCaption",
- "colgroup": "inColumnGroup",
- "table": "inTable",
- "head": "inBody",
- "body": "inBody",
- "frameset": "inFrameset",
- "html": "beforeHead"
- }
- for node in self.tree.openElements[::-1]:
- nodeName = node.name
- new_phase = None
- if node == self.tree.openElements[0]:
- assert self.innerHTML
- last = True
- nodeName = self.innerHTML
- # Check for conditions that should only happen in the innerHTML
- # case
- if nodeName in ("select", "colgroup", "head", "html"):
- assert self.innerHTML
-
- if not last and node.namespace != self.tree.defaultNamespace:
- continue
-
- if nodeName in newModes:
- new_phase = self.phases[newModes[nodeName]]
- break
- elif last:
- new_phase = self.phases["inBody"]
- break
-
- self.phase = new_phase
-
- def parseRCDataRawtext(self, token, contentType):
- """Generic RCDATA/RAWTEXT Parsing algorithm
- contentType - RCDATA or RAWTEXT
- """
- assert contentType in ("RAWTEXT", "RCDATA")
-
- self.tree.insertElement(token)
-
- if contentType == "RAWTEXT":
- self.tokenizer.state = self.tokenizer.rawtextState
- else:
- self.tokenizer.state = self.tokenizer.rcdataState
-
- self.originalPhase = self.phase
-
- self.phase = self.phases["text"]
-
-
-def getPhases(debug):
- def log(function):
- """Logger that records which phase processes each token"""
- type_names = dict((value, key) for key, value in
- constants.tokenTypes.items())
-
- def wrapped(self, *args, **kwargs):
- if function.__name__.startswith("process") and len(args) > 0:
- token = args[0]
- try:
- info = {"type": type_names[token['type']]}
- except:
- raise
- if token['type'] in constants.tagTokenTypes:
- info["name"] = token['name']
-
- self.parser.log.append((self.parser.tokenizer.state.__name__,
- self.parser.phase.__class__.__name__,
- self.__class__.__name__,
- function.__name__,
- info))
- return function(self, *args, **kwargs)
- else:
- return function(self, *args, **kwargs)
- return wrapped
-
- def getMetaclass(use_metaclass, metaclass_func):
- if use_metaclass:
- return method_decorator_metaclass(metaclass_func)
- else:
- return type
-
- class Phase(with_metaclass(getMetaclass(debug, log))):
- """Base class for helper object that implements each phase of processing
- """
-
- def __init__(self, parser, tree):
- self.parser = parser
- self.tree = tree
-
- def processEOF(self):
- raise NotImplementedError
-
- def processComment(self, token):
- # For most phases the following is correct. Where it's not it will be
- # overridden.
- self.tree.insertComment(token, self.tree.openElements[-1])
-
- def processDoctype(self, token):
- self.parser.parseError("unexpected-doctype")
-
- def processCharacters(self, token):
- self.tree.insertText(token["data"])
-
- def processSpaceCharacters(self, token):
- self.tree.insertText(token["data"])
-
- def processStartTag(self, token):
- return self.startTagHandler[token["name"]](token)
-
- def startTagHtml(self, token):
- if not self.parser.firstStartTag and token["name"] == "html":
- self.parser.parseError("non-html-root")
- # XXX Need a check here to see if the first start tag token emitted is
- # this token... If it's not, invoke self.parser.parseError().
- for attr, value in token["data"].items():
- if attr not in self.tree.openElements[0].attributes:
- self.tree.openElements[0].attributes[attr] = value
- self.parser.firstStartTag = False
-
- def processEndTag(self, token):
- return self.endTagHandler[token["name"]](token)
-
- class InitialPhase(Phase):
- def processSpaceCharacters(self, token):
- pass
-
- def processComment(self, token):
- self.tree.insertComment(token, self.tree.document)
-
- def processDoctype(self, token):
- name = token["name"]
- publicId = token["publicId"]
- systemId = token["systemId"]
- correct = token["correct"]
-
- if (name != "html" or publicId is not None or
- systemId is not None and systemId != "about:legacy-compat"):
- self.parser.parseError("unknown-doctype")
-
- if publicId is None:
- publicId = ""
-
- self.tree.insertDoctype(token)
-
- if publicId != "":
- publicId = publicId.translate(asciiUpper2Lower)
-
- if (not correct or token["name"] != "html"
- or publicId.startswith(
- ("+//silmaril//dtd html pro v0r11 19970101//",
- "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
- "-//as//dtd html 3.0 aswedit + extensions//",
- "-//ietf//dtd html 2.0 level 1//",
- "-//ietf//dtd html 2.0 level 2//",
- "-//ietf//dtd html 2.0 strict level 1//",
- "-//ietf//dtd html 2.0 strict level 2//",
- "-//ietf//dtd html 2.0 strict//",
- "-//ietf//dtd html 2.0//",
- "-//ietf//dtd html 2.1e//",
- "-//ietf//dtd html 3.0//",
- "-//ietf//dtd html 3.2 final//",
- "-//ietf//dtd html 3.2//",
- "-//ietf//dtd html 3//",
- "-//ietf//dtd html level 0//",
- "-//ietf//dtd html level 1//",
- "-//ietf//dtd html level 2//",
- "-//ietf//dtd html level 3//",
- "-//ietf//dtd html strict level 0//",
- "-//ietf//dtd html strict level 1//",
- "-//ietf//dtd html strict level 2//",
- "-//ietf//dtd html strict level 3//",
- "-//ietf//dtd html strict//",
- "-//ietf//dtd html//",
- "-//metrius//dtd metrius presentational//",
- "-//microsoft//dtd internet explorer 2.0 html strict//",
- "-//microsoft//dtd internet explorer 2.0 html//",
- "-//microsoft//dtd internet explorer 2.0 tables//",
- "-//microsoft//dtd internet explorer 3.0 html strict//",
- "-//microsoft//dtd internet explorer 3.0 html//",
- "-//microsoft//dtd internet explorer 3.0 tables//",
- "-//netscape comm. corp.//dtd html//",
- "-//netscape comm. corp.//dtd strict html//",
- "-//o'reilly and associates//dtd html 2.0//",
- "-//o'reilly and associates//dtd html extended 1.0//",
- "-//o'reilly and associates//dtd html extended relaxed 1.0//",
- "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
- "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
- "-//spyglass//dtd html 2.0 extended//",
- "-//sq//dtd html 2.0 hotmetal + extensions//",
- "-//sun microsystems corp.//dtd hotjava html//",
- "-//sun microsystems corp.//dtd hotjava strict html//",
- "-//w3c//dtd html 3 1995-03-24//",
- "-//w3c//dtd html 3.2 draft//",
- "-//w3c//dtd html 3.2 final//",
- "-//w3c//dtd html 3.2//",
- "-//w3c//dtd html 3.2s draft//",
- "-//w3c//dtd html 4.0 frameset//",
- "-//w3c//dtd html 4.0 transitional//",
- "-//w3c//dtd html experimental 19960712//",
- "-//w3c//dtd html experimental 970421//",
- "-//w3c//dtd w3 html//",
- "-//w3o//dtd w3 html 3.0//",
- "-//webtechs//dtd mozilla html 2.0//",
- "-//webtechs//dtd mozilla html//"))
- or publicId in
- ("-//w3o//dtd w3 html strict 3.0//en//",
- "-/w3c/dtd html 4.0 transitional/en",
- "html")
- or publicId.startswith(
- ("-//w3c//dtd html 4.01 frameset//",
- "-//w3c//dtd html 4.01 transitional//")) and
- systemId is None
- or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
- self.parser.compatMode = "quirks"
- elif (publicId.startswith(
- ("-//w3c//dtd xhtml 1.0 frameset//",
- "-//w3c//dtd xhtml 1.0 transitional//"))
- or publicId.startswith(
- ("-//w3c//dtd html 4.01 frameset//",
- "-//w3c//dtd html 4.01 transitional//")) and
- systemId is not None):
- self.parser.compatMode = "limited quirks"
-
- self.parser.phase = self.parser.phases["beforeHtml"]
-
- def anythingElse(self):
- self.parser.compatMode = "quirks"
- self.parser.phase = self.parser.phases["beforeHtml"]
-
- def processCharacters(self, token):
- self.parser.parseError("expected-doctype-but-got-chars")
- self.anythingElse()
- return token
-
- def processStartTag(self, token):
- self.parser.parseError("expected-doctype-but-got-start-tag",
- {"name": token["name"]})
- self.anythingElse()
- return token
-
- def processEndTag(self, token):
- self.parser.parseError("expected-doctype-but-got-end-tag",
- {"name": token["name"]})
- self.anythingElse()
- return token
-
- def processEOF(self):
- self.parser.parseError("expected-doctype-but-got-eof")
- self.anythingElse()
- return True
-
- class BeforeHtmlPhase(Phase):
- # helper methods
- def insertHtmlElement(self):
- self.tree.insertRoot(impliedTagToken("html", "StartTag"))
- self.parser.phase = self.parser.phases["beforeHead"]
-
- # other
- def processEOF(self):
- self.insertHtmlElement()
- return True
-
- def processComment(self, token):
- self.tree.insertComment(token, self.tree.document)
-
- def processSpaceCharacters(self, token):
- pass
-
- def processCharacters(self, token):
- self.insertHtmlElement()
- return token
-
- def processStartTag(self, token):
- if token["name"] == "html":
- self.parser.firstStartTag = True
- self.insertHtmlElement()
- return token
-
- def processEndTag(self, token):
- if token["name"] not in ("head", "body", "html", "br"):
- self.parser.parseError("unexpected-end-tag-before-html",
- {"name": token["name"]})
- else:
- self.insertHtmlElement()
- return token
-
- class BeforeHeadPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("head", self.startTagHead)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- (("head", "body", "html", "br"), self.endTagImplyHead)
- ])
- self.endTagHandler.default = self.endTagOther
-
- def processEOF(self):
- self.startTagHead(impliedTagToken("head", "StartTag"))
- return True
-
- def processSpaceCharacters(self, token):
- pass
-
- def processCharacters(self, token):
- self.startTagHead(impliedTagToken("head", "StartTag"))
- return token
-
- def startTagHtml(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def startTagHead(self, token):
- self.tree.insertElement(token)
- self.tree.headPointer = self.tree.openElements[-1]
- self.parser.phase = self.parser.phases["inHead"]
-
- def startTagOther(self, token):
- self.startTagHead(impliedTagToken("head", "StartTag"))
- return token
-
- def endTagImplyHead(self, token):
- self.startTagHead(impliedTagToken("head", "StartTag"))
- return token
-
- def endTagOther(self, token):
- self.parser.parseError("end-tag-after-implied-root",
- {"name": token["name"]})
-
- class InHeadPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("title", self.startTagTitle),
- (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
- ("script", self.startTagScript),
- (("base", "basefont", "bgsound", "command", "link"),
- self.startTagBaseLinkCommand),
- ("meta", self.startTagMeta),
- ("head", self.startTagHead)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self. endTagHandler = utils.MethodDispatcher([
- ("head", self.endTagHead),
- (("br", "html", "body"), self.endTagHtmlBodyBr)
- ])
- self.endTagHandler.default = self.endTagOther
-
- # the real thing
- def processEOF(self):
- self.anythingElse()
- return True
-
- def processCharacters(self, token):
- self.anythingElse()
- return token
-
- def startTagHtml(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def startTagHead(self, token):
- self.parser.parseError("two-heads-are-not-better-than-one")
-
- def startTagBaseLinkCommand(self, token):
- self.tree.insertElement(token)
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
-
- def startTagMeta(self, token):
- self.tree.insertElement(token)
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
-
- attributes = token["data"]
- if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
- if "charset" in attributes:
- self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
- elif ("content" in attributes and
- "http-equiv" in attributes and
- attributes["http-equiv"].lower() == "content-type"):
- # Encoding it as UTF-8 here is a hack, as really we should pass
- # the abstract Unicode string, and just use the
- # ContentAttrParser on that, but using UTF-8 allows all chars
- # to be encoded and as a ASCII-superset works.
- data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
- parser = inputstream.ContentAttrParser(data)
- codec = parser.parse()
- self.parser.tokenizer.stream.changeEncoding(codec)
-
- def startTagTitle(self, token):
- self.parser.parseRCDataRawtext(token, "RCDATA")
-
- def startTagNoScriptNoFramesStyle(self, token):
- # Need to decide whether to implement the scripting-disabled case
- self.parser.parseRCDataRawtext(token, "RAWTEXT")
-
- def startTagScript(self, token):
- self.tree.insertElement(token)
- self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
- self.parser.originalPhase = self.parser.phase
- self.parser.phase = self.parser.phases["text"]
-
- def startTagOther(self, token):
- self.anythingElse()
- return token
-
- def endTagHead(self, token):
- node = self.parser.tree.openElements.pop()
- assert node.name == "head", "Expected head got %s" % node.name
- self.parser.phase = self.parser.phases["afterHead"]
-
- def endTagHtmlBodyBr(self, token):
- self.anythingElse()
- return token
-
- def endTagOther(self, token):
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
- def anythingElse(self):
- self.endTagHead(impliedTagToken("head"))
-
- # XXX If we implement a parser for which scripting is disabled we need to
- # implement this phase.
- #
- # class InHeadNoScriptPhase(Phase):
- class AfterHeadPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("body", self.startTagBody),
- ("frameset", self.startTagFrameset),
- (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
- "style", "title"),
- self.startTagFromHead),
- ("head", self.startTagHead)
- ])
- self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"),
- self.endTagHtmlBodyBr)])
- self.endTagHandler.default = self.endTagOther
-
- def processEOF(self):
- self.anythingElse()
- return True
-
- def processCharacters(self, token):
- self.anythingElse()
- return token
-
- def startTagHtml(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def startTagBody(self, token):
- self.parser.framesetOK = False
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inBody"]
-
- def startTagFrameset(self, token):
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inFrameset"]
-
- def startTagFromHead(self, token):
- self.parser.parseError("unexpected-start-tag-out-of-my-head",
- {"name": token["name"]})
- self.tree.openElements.append(self.tree.headPointer)
- self.parser.phases["inHead"].processStartTag(token)
- for node in self.tree.openElements[::-1]:
- if node.name == "head":
- self.tree.openElements.remove(node)
- break
-
- def startTagHead(self, token):
- self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
-
- def startTagOther(self, token):
- self.anythingElse()
- return token
-
- def endTagHtmlBodyBr(self, token):
- self.anythingElse()
- return token
-
- def endTagOther(self, token):
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
- def anythingElse(self):
- self.tree.insertElement(impliedTagToken("body", "StartTag"))
- self.parser.phase = self.parser.phases["inBody"]
- self.parser.framesetOK = True
-
- class InBodyPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
- # the really-really-really-very crazy mode
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- # Keep a ref to this for special handling of whitespace in <pre>
- self.processSpaceCharactersNonPre = self.processSpaceCharacters
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- (("base", "basefont", "bgsound", "command", "link", "meta",
- "noframes", "script", "style", "title"),
- self.startTagProcessInHead),
- ("body", self.startTagBody),
- ("frameset", self.startTagFrameset),
- (("address", "article", "aside", "blockquote", "center", "details",
- "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
- "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
- "section", "summary", "ul"),
- self.startTagCloseP),
- (headingElements, self.startTagHeading),
- (("pre", "listing"), self.startTagPreListing),
- ("form", self.startTagForm),
- (("li", "dd", "dt"), self.startTagListItem),
- ("plaintext", self.startTagPlaintext),
- ("a", self.startTagA),
- (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
- "strong", "tt", "u"), self.startTagFormatting),
- ("nobr", self.startTagNobr),
- ("button", self.startTagButton),
- (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
- ("xmp", self.startTagXmp),
- ("table", self.startTagTable),
- (("area", "br", "embed", "img", "keygen", "wbr"),
- self.startTagVoidFormatting),
- (("param", "source", "track"), self.startTagParamSource),
- ("input", self.startTagInput),
- ("hr", self.startTagHr),
- ("image", self.startTagImage),
- ("isindex", self.startTagIsIndex),
- ("textarea", self.startTagTextarea),
- ("iframe", self.startTagIFrame),
- (("noembed", "noframes", "noscript"), self.startTagRawtext),
- ("select", self.startTagSelect),
- (("rp", "rt"), self.startTagRpRt),
- (("option", "optgroup"), self.startTagOpt),
- (("math"), self.startTagMath),
- (("svg"), self.startTagSvg),
- (("caption", "col", "colgroup", "frame", "head",
- "tbody", "td", "tfoot", "th", "thead",
- "tr"), self.startTagMisplaced)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("body", self.endTagBody),
- ("html", self.endTagHtml),
- (("address", "article", "aside", "blockquote", "button", "center",
- "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
- "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
- "section", "summary", "ul"), self.endTagBlock),
- ("form", self.endTagForm),
- ("p", self.endTagP),
- (("dd", "dt", "li"), self.endTagListItem),
- (headingElements, self.endTagHeading),
- (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
- "strike", "strong", "tt", "u"), self.endTagFormatting),
- (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
- ("br", self.endTagBr),
- ])
- self.endTagHandler.default = self.endTagOther
-
- def isMatchingFormattingElement(self, node1, node2):
- if node1.name != node2.name or node1.namespace != node2.namespace:
- return False
- elif len(node1.attributes) != len(node2.attributes):
- return False
- else:
- attributes1 = sorted(node1.attributes.items())
- attributes2 = sorted(node2.attributes.items())
- for attr1, attr2 in zip(attributes1, attributes2):
- if attr1 != attr2:
- return False
- return True
-
- # helper
- def addFormattingElement(self, token):
- self.tree.insertElement(token)
- element = self.tree.openElements[-1]
-
- matchingElements = []
- for node in self.tree.activeFormattingElements[::-1]:
- if node is Marker:
- break
- elif self.isMatchingFormattingElement(node, element):
- matchingElements.append(node)
-
- assert len(matchingElements) <= 3
- if len(matchingElements) == 3:
- self.tree.activeFormattingElements.remove(matchingElements[-1])
- self.tree.activeFormattingElements.append(element)
-
- # the real deal
- def processEOF(self):
- allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
- "tfoot", "th", "thead", "tr", "body",
- "html"))
- for node in self.tree.openElements[::-1]:
- if node.name not in allowed_elements:
- self.parser.parseError("expected-closing-tag-but-got-eof")
- break
- # Stop parsing
-
- def processSpaceCharactersDropNewline(self, token):
- # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
- # want to drop leading newlines
- data = token["data"]
- self.processSpaceCharacters = self.processSpaceCharactersNonPre
- if (data.startswith("\n") and
- self.tree.openElements[-1].name in ("pre", "listing", "textarea")
- and not self.tree.openElements[-1].hasContent()):
- data = data[1:]
- if data:
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertText(data)
-
- def processCharacters(self, token):
- if token["data"] == "\u0000":
- # The tokenizer should always emit null on its own
- return
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertText(token["data"])
- # This must be bad for performance
- if (self.parser.framesetOK and
- any([char not in spaceCharacters
- for char in token["data"]])):
- self.parser.framesetOK = False
-
- def processSpaceCharacters(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertText(token["data"])
-
- def startTagProcessInHead(self, token):
- return self.parser.phases["inHead"].processStartTag(token)
-
- def startTagBody(self, token):
- self.parser.parseError("unexpected-start-tag", {"name": "body"})
- if (len(self.tree.openElements) == 1
- or self.tree.openElements[1].name != "body"):
- assert self.parser.innerHTML
- else:
- self.parser.framesetOK = False
- for attr, value in token["data"].items():
- if attr not in self.tree.openElements[1].attributes:
- self.tree.openElements[1].attributes[attr] = value
-
- def startTagFrameset(self, token):
- self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
- if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
- assert self.parser.innerHTML
- elif not self.parser.framesetOK:
- pass
- else:
- if self.tree.openElements[1].parent:
- self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
- while self.tree.openElements[-1].name != "html":
- self.tree.openElements.pop()
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inFrameset"]
-
- def startTagCloseP(self, token):
- if self.tree.elementInScope("p", variant="button"):
- self.endTagP(impliedTagToken("p"))
- self.tree.insertElement(token)
-
- def startTagPreListing(self, token):
- if self.tree.elementInScope("p", variant="button"):
- self.endTagP(impliedTagToken("p"))
- self.tree.insertElement(token)
- self.parser.framesetOK = False
- self.processSpaceCharacters = self.processSpaceCharactersDropNewline
-
- def startTagForm(self, token):
- if self.tree.formPointer:
- self.parser.parseError("unexpected-start-tag", {"name": "form"})
- else:
- if self.tree.elementInScope("p", variant="button"):
- self.endTagP(impliedTagToken("p"))
- self.tree.insertElement(token)
- self.tree.formPointer = self.tree.openElements[-1]
-
- def startTagListItem(self, token):
- self.parser.framesetOK = False
-
- stopNamesMap = {"li": ["li"],
- "dt": ["dt", "dd"],
- "dd": ["dt", "dd"]}
- stopNames = stopNamesMap[token["name"]]
- for node in reversed(self.tree.openElements):
- if node.name in stopNames:
- self.parser.phase.processEndTag(
- impliedTagToken(node.name, "EndTag"))
- break
- if (node.nameTuple in specialElements and
- node.name not in ("address", "div", "p")):
- break
-
- if self.tree.elementInScope("p", variant="button"):
- self.parser.phase.processEndTag(
- impliedTagToken("p", "EndTag"))
-
- self.tree.insertElement(token)
-
- def startTagPlaintext(self, token):
- if self.tree.elementInScope("p", variant="button"):
- self.endTagP(impliedTagToken("p"))
- self.tree.insertElement(token)
- self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
-
- def startTagHeading(self, token):
- if self.tree.elementInScope("p", variant="button"):
- self.endTagP(impliedTagToken("p"))
- if self.tree.openElements[-1].name in headingElements:
- self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
- self.tree.openElements.pop()
- self.tree.insertElement(token)
-
- def startTagA(self, token):
- afeAElement = self.tree.elementInActiveFormattingElements("a")
- if afeAElement:
- self.parser.parseError("unexpected-start-tag-implies-end-tag",
- {"startName": "a", "endName": "a"})
- self.endTagFormatting(impliedTagToken("a"))
- if afeAElement in self.tree.openElements:
- self.tree.openElements.remove(afeAElement)
- if afeAElement in self.tree.activeFormattingElements:
- self.tree.activeFormattingElements.remove(afeAElement)
- self.tree.reconstructActiveFormattingElements()
- self.addFormattingElement(token)
-
- def startTagFormatting(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.addFormattingElement(token)
-
- def startTagNobr(self, token):
- self.tree.reconstructActiveFormattingElements()
- if self.tree.elementInScope("nobr"):
- self.parser.parseError("unexpected-start-tag-implies-end-tag",
- {"startName": "nobr", "endName": "nobr"})
- self.processEndTag(impliedTagToken("nobr"))
- # XXX Need tests that trigger the following
- self.tree.reconstructActiveFormattingElements()
- self.addFormattingElement(token)
-
- def startTagButton(self, token):
- if self.tree.elementInScope("button"):
- self.parser.parseError("unexpected-start-tag-implies-end-tag",
- {"startName": "button", "endName": "button"})
- self.processEndTag(impliedTagToken("button"))
- return token
- else:
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertElement(token)
- self.parser.framesetOK = False
-
- def startTagAppletMarqueeObject(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertElement(token)
- self.tree.activeFormattingElements.append(Marker)
- self.parser.framesetOK = False
-
- def startTagXmp(self, token):
- if self.tree.elementInScope("p", variant="button"):
- self.endTagP(impliedTagToken("p"))
- self.tree.reconstructActiveFormattingElements()
- self.parser.framesetOK = False
- self.parser.parseRCDataRawtext(token, "RAWTEXT")
-
- def startTagTable(self, token):
- if self.parser.compatMode != "quirks":
- if self.tree.elementInScope("p", variant="button"):
- self.processEndTag(impliedTagToken("p"))
- self.tree.insertElement(token)
- self.parser.framesetOK = False
- self.parser.phase = self.parser.phases["inTable"]
-
- def startTagVoidFormatting(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertElement(token)
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
- self.parser.framesetOK = False
-
- def startTagInput(self, token):
- framesetOK = self.parser.framesetOK
- self.startTagVoidFormatting(token)
- if ("type" in token["data"] and
- token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
- # input type=hidden doesn't change framesetOK
- self.parser.framesetOK = framesetOK
-
- def startTagParamSource(self, token):
- self.tree.insertElement(token)
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
-
- def startTagHr(self, token):
- if self.tree.elementInScope("p", variant="button"):
- self.endTagP(impliedTagToken("p"))
- self.tree.insertElement(token)
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
- self.parser.framesetOK = False
-
- def startTagImage(self, token):
- # No really...
- self.parser.parseError("unexpected-start-tag-treated-as",
- {"originalName": "image", "newName": "img"})
- self.processStartTag(impliedTagToken("img", "StartTag",
- attributes=token["data"],
- selfClosing=token["selfClosing"]))
-
- def startTagIsIndex(self, token):
- self.parser.parseError("deprecated-tag", {"name": "isindex"})
- if self.tree.formPointer:
- return
- form_attrs = {}
- if "action" in token["data"]:
- form_attrs["action"] = token["data"]["action"]
- self.processStartTag(impliedTagToken("form", "StartTag",
- attributes=form_attrs))
- self.processStartTag(impliedTagToken("hr", "StartTag"))
- self.processStartTag(impliedTagToken("label", "StartTag"))
- # XXX Localization ...
- if "prompt" in token["data"]:
- prompt = token["data"]["prompt"]
- else:
- prompt = "This is a searchable index. Enter search keywords: "
- self.processCharacters(
- {"type": tokenTypes["Characters"], "data": prompt})
- attributes = token["data"].copy()
- if "action" in attributes:
- del attributes["action"]
- if "prompt" in attributes:
- del attributes["prompt"]
- attributes["name"] = "isindex"
- self.processStartTag(impliedTagToken("input", "StartTag",
- attributes=attributes,
- selfClosing=
- token["selfClosing"]))
- self.processEndTag(impliedTagToken("label"))
- self.processStartTag(impliedTagToken("hr", "StartTag"))
- self.processEndTag(impliedTagToken("form"))
-
- def startTagTextarea(self, token):
- self.tree.insertElement(token)
- self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
- self.processSpaceCharacters = self.processSpaceCharactersDropNewline
- self.parser.framesetOK = False
-
- def startTagIFrame(self, token):
- self.parser.framesetOK = False
- self.startTagRawtext(token)
-
- def startTagRawtext(self, token):
- """iframe, noembed noframes, noscript(if scripting enabled)"""
- self.parser.parseRCDataRawtext(token, "RAWTEXT")
-
- def startTagOpt(self, token):
- if self.tree.openElements[-1].name == "option":
- self.parser.phase.processEndTag(impliedTagToken("option"))
- self.tree.reconstructActiveFormattingElements()
- self.parser.tree.insertElement(token)
-
- def startTagSelect(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertElement(token)
- self.parser.framesetOK = False
- if self.parser.phase in (self.parser.phases["inTable"],
- self.parser.phases["inCaption"],
- self.parser.phases["inColumnGroup"],
- self.parser.phases["inTableBody"],
- self.parser.phases["inRow"],
- self.parser.phases["inCell"]):
- self.parser.phase = self.parser.phases["inSelectInTable"]
- else:
- self.parser.phase = self.parser.phases["inSelect"]
-
- def startTagRpRt(self, token):
- if self.tree.elementInScope("ruby"):
- self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1].name != "ruby":
- self.parser.parseError()
- self.tree.insertElement(token)
-
- def startTagMath(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.parser.adjustMathMLAttributes(token)
- self.parser.adjustForeignAttributes(token)
- token["namespace"] = namespaces["mathml"]
- self.tree.insertElement(token)
- # Need to get the parse error right for the case where the token
- # has a namespace not equal to the xmlns attribute
- if token["selfClosing"]:
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
-
- def startTagSvg(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.parser.adjustSVGAttributes(token)
- self.parser.adjustForeignAttributes(token)
- token["namespace"] = namespaces["svg"]
- self.tree.insertElement(token)
- # Need to get the parse error right for the case where the token
- # has a namespace not equal to the xmlns attribute
- if token["selfClosing"]:
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
-
- def startTagMisplaced(self, token):
- """ Elements that should be children of other elements that have a
- different insertion mode; here they are ignored
- "caption", "col", "colgroup", "frame", "frameset", "head",
- "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
- "tr", "noscript"
- """
- self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
-
- def startTagOther(self, token):
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertElement(token)
-
- def endTagP(self, token):
- if not self.tree.elementInScope("p", variant="button"):
- self.startTagCloseP(impliedTagToken("p", "StartTag"))
- self.parser.parseError("unexpected-end-tag", {"name": "p"})
- self.endTagP(impliedTagToken("p", "EndTag"))
- else:
- self.tree.generateImpliedEndTags("p")
- if self.tree.openElements[-1].name != "p":
- self.parser.parseError("unexpected-end-tag", {"name": "p"})
- node = self.tree.openElements.pop()
- while node.name != "p":
- node = self.tree.openElements.pop()
-
- def endTagBody(self, token):
- if not self.tree.elementInScope("body"):
- self.parser.parseError()
- return
- elif self.tree.openElements[-1].name != "body":
- for node in self.tree.openElements[2:]:
- if node.name not in frozenset(("dd", "dt", "li", "optgroup",
- "option", "p", "rp", "rt",
- "tbody", "td", "tfoot",
- "th", "thead", "tr", "body",
- "html")):
- # Not sure this is the correct name for the parse error
- self.parser.parseError(
- "expected-one-end-tag-but-got-another",
- {"expectedName": "body", "gotName": node.name})
- break
- self.parser.phase = self.parser.phases["afterBody"]
-
- def endTagHtml(self, token):
- # We repeat the test for the body end tag token being ignored here
- if self.tree.elementInScope("body"):
- self.endTagBody(impliedTagToken("body"))
- return token
-
- def endTagBlock(self, token):
- # Put us back in the right whitespace handling mode
- if token["name"] == "pre":
- self.processSpaceCharacters = self.processSpaceCharactersNonPre
- inScope = self.tree.elementInScope(token["name"])
- if inScope:
- self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1].name != token["name"]:
- self.parser.parseError("end-tag-too-early", {"name": token["name"]})
- if inScope:
- node = self.tree.openElements.pop()
- while node.name != token["name"]:
- node = self.tree.openElements.pop()
-
- def endTagForm(self, token):
- node = self.tree.formPointer
- self.tree.formPointer = None
- if node is None or not self.tree.elementInScope(node):
- self.parser.parseError("unexpected-end-tag",
- {"name": "form"})
- else:
- self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1] != node:
- self.parser.parseError("end-tag-too-early-ignored",
- {"name": "form"})
- self.tree.openElements.remove(node)
-
- def endTagListItem(self, token):
- if token["name"] == "li":
- variant = "list"
- else:
- variant = None
- if not self.tree.elementInScope(token["name"], variant=variant):
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
- else:
- self.tree.generateImpliedEndTags(exclude=token["name"])
- if self.tree.openElements[-1].name != token["name"]:
- self.parser.parseError(
- "end-tag-too-early",
- {"name": token["name"]})
- node = self.tree.openElements.pop()
- while node.name != token["name"]:
- node = self.tree.openElements.pop()
-
- def endTagHeading(self, token):
- for item in headingElements:
- if self.tree.elementInScope(item):
- self.tree.generateImpliedEndTags()
- break
- if self.tree.openElements[-1].name != token["name"]:
- self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-
- for item in headingElements:
- if self.tree.elementInScope(item):
- item = self.tree.openElements.pop()
- while item.name not in headingElements:
- item = self.tree.openElements.pop()
- break
-
- def endTagFormatting(self, token):
- """The much-feared adoption agency algorithm"""
- # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
- # XXX Better parseError messages appreciated.
-
- # Step 1
- outerLoopCounter = 0
-
- # Step 2
- while outerLoopCounter < 8:
-
- # Step 3
- outerLoopCounter += 1
-
- # Step 4:
-
- # Let the formatting element be the last element in
- # the list of active formatting elements that:
- # - is between the end of the list and the last scope
- # marker in the list, if any, or the start of the list
- # otherwise, and
- # - has the same tag name as the token.
- formattingElement = self.tree.elementInActiveFormattingElements(
- token["name"])
- if (not formattingElement or
- (formattingElement in self.tree.openElements and
- not self.tree.elementInScope(formattingElement.name))):
- # If there is no such node, then abort these steps
- # and instead act as described in the "any other
- # end tag" entry below.
- self.endTagOther(token)
- return
-
- # Otherwise, if there is such a node, but that node is
- # not in the stack of open elements, then this is a
- # parse error; remove the element from the list, and
- # abort these steps.
- elif formattingElement not in self.tree.openElements:
- self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
- self.tree.activeFormattingElements.remove(formattingElement)
- return
-
- # Otherwise, if there is such a node, and that node is
- # also in the stack of open elements, but the element
- # is not in scope, then this is a parse error; ignore
- # the token, and abort these steps.
- elif not self.tree.elementInScope(formattingElement.name):
- self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
- return
-
- # Otherwise, there is a formatting element and that
- # element is in the stack and is in scope. If the
- # element is not the current node, this is a parse
- # error. In any case, proceed with the algorithm as
- # written in the following steps.
- else:
- if formattingElement != self.tree.openElements[-1]:
- self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
-
- # Step 5:
-
- # Let the furthest block be the topmost node in the
- # stack of open elements that is lower in the stack
- # than the formatting element, and is an element in
- # the special category. There might not be one.
- afeIndex = self.tree.openElements.index(formattingElement)
- furthestBlock = None
- for element in self.tree.openElements[afeIndex:]:
- if element.nameTuple in specialElements:
- furthestBlock = element
- break
-
- # Step 6:
-
- # If there is no furthest block, then the UA must
- # first pop all the nodes from the bottom of the stack
- # of open elements, from the current node up to and
- # including the formatting element, then remove the
- # formatting element from the list of active
- # formatting elements, and finally abort these steps.
- if furthestBlock is None:
- element = self.tree.openElements.pop()
- while element != formattingElement:
- element = self.tree.openElements.pop()
- self.tree.activeFormattingElements.remove(element)
- return
-
- # Step 7
- commonAncestor = self.tree.openElements[afeIndex - 1]
-
- # Step 8:
- # The bookmark is supposed to help us identify where to reinsert
- # nodes in step 15. We have to ensure that we reinsert nodes after
- # the node before the active formatting element. Note the bookmark
- # can move in step 9.7
- bookmark = self.tree.activeFormattingElements.index(formattingElement)
-
- # Step 9
- lastNode = node = furthestBlock
- innerLoopCounter = 0
-
- index = self.tree.openElements.index(node)
- while innerLoopCounter < 3:
- innerLoopCounter += 1
- # Node is element before node in open elements
- index -= 1
- node = self.tree.openElements[index]
- if node not in self.tree.activeFormattingElements:
- self.tree.openElements.remove(node)
- continue
- # Step 9.6
- if node == formattingElement:
- break
- # Step 9.7
- if lastNode == furthestBlock:
- bookmark = self.tree.activeFormattingElements.index(node) + 1
- # Step 9.8
- clone = node.cloneNode()
- # Replace node with clone
- self.tree.activeFormattingElements[
- self.tree.activeFormattingElements.index(node)] = clone
- self.tree.openElements[
- self.tree.openElements.index(node)] = clone
- node = clone
- # Step 9.9
- # Remove lastNode from its parents, if any
- if lastNode.parent:
- lastNode.parent.removeChild(lastNode)
- node.appendChild(lastNode)
- # Step 9.10
- lastNode = node
-
- # Step 10
- # Foster parent lastNode if commonAncestor is a
- # table, tbody, tfoot, thead, or tr we need to foster
- # parent the lastNode
- if lastNode.parent:
- lastNode.parent.removeChild(lastNode)
-
- if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
- parent, insertBefore = self.tree.getTableMisnestedNodePosition()
- parent.insertBefore(lastNode, insertBefore)
- else:
- commonAncestor.appendChild(lastNode)
-
- # Step 11
- clone = formattingElement.cloneNode()
-
- # Step 12
- furthestBlock.reparentChildren(clone)
-
- # Step 13
- furthestBlock.appendChild(clone)
-
- # Step 14
- self.tree.activeFormattingElements.remove(formattingElement)
- self.tree.activeFormattingElements.insert(bookmark, clone)
-
- # Step 15
- self.tree.openElements.remove(formattingElement)
- self.tree.openElements.insert(
- self.tree.openElements.index(furthestBlock) + 1, clone)
-
- def endTagAppletMarqueeObject(self, token):
- if self.tree.elementInScope(token["name"]):
- self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1].name != token["name"]:
- self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-
- if self.tree.elementInScope(token["name"]):
- element = self.tree.openElements.pop()
- while element.name != token["name"]:
- element = self.tree.openElements.pop()
- self.tree.clearActiveFormattingElements()
-
- def endTagBr(self, token):
- self.parser.parseError("unexpected-end-tag-treated-as",
- {"originalName": "br", "newName": "br element"})
- self.tree.reconstructActiveFormattingElements()
- self.tree.insertElement(impliedTagToken("br", "StartTag"))
- self.tree.openElements.pop()
-
- def endTagOther(self, token):
- for node in self.tree.openElements[::-1]:
- if node.name == token["name"]:
- self.tree.generateImpliedEndTags(exclude=token["name"])
- if self.tree.openElements[-1].name != token["name"]:
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
- while self.tree.openElements.pop() != node:
- pass
- break
- else:
- if node.nameTuple in specialElements:
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
- break
-
- class TextPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([])
- self.startTagHandler.default = self.startTagOther
- self.endTagHandler = utils.MethodDispatcher([
- ("script", self.endTagScript)])
- self.endTagHandler.default = self.endTagOther
-
- def processCharacters(self, token):
- self.tree.insertText(token["data"])
-
- def processEOF(self):
- self.parser.parseError("expected-named-closing-tag-but-got-eof",
- {"name": self.tree.openElements[-1].name})
- self.tree.openElements.pop()
- self.parser.phase = self.parser.originalPhase
- return True
-
- def startTagOther(self, token):
- assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
-
- def endTagScript(self, token):
- node = self.tree.openElements.pop()
- assert node.name == "script"
- self.parser.phase = self.parser.originalPhase
- # The rest of this method is all stuff that only happens if
- # document.write works
-
- def endTagOther(self, token):
- self.tree.openElements.pop()
- self.parser.phase = self.parser.originalPhase
-
- class InTablePhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#in-table
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("caption", self.startTagCaption),
- ("colgroup", self.startTagColgroup),
- ("col", self.startTagCol),
- (("tbody", "tfoot", "thead"), self.startTagRowGroup),
- (("td", "th", "tr"), self.startTagImplyTbody),
- ("table", self.startTagTable),
- (("style", "script"), self.startTagStyleScript),
- ("input", self.startTagInput),
- ("form", self.startTagForm)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("table", self.endTagTable),
- (("body", "caption", "col", "colgroup", "html", "tbody", "td",
- "tfoot", "th", "thead", "tr"), self.endTagIgnore)
- ])
- self.endTagHandler.default = self.endTagOther
-
- # helper methods
- def clearStackToTableContext(self):
- # "clear the stack back to a table context"
- while self.tree.openElements[-1].name not in ("table", "html"):
- # self.parser.parseError("unexpected-implied-end-tag-in-table",
- # {"name": self.tree.openElements[-1].name})
- self.tree.openElements.pop()
- # When the current node is <html> it's an innerHTML case
-
- # processing methods
- def processEOF(self):
- if self.tree.openElements[-1].name != "html":
- self.parser.parseError("eof-in-table")
- else:
- assert self.parser.innerHTML
- # Stop parsing
-
- def processSpaceCharacters(self, token):
- originalPhase = self.parser.phase
- self.parser.phase = self.parser.phases["inTableText"]
- self.parser.phase.originalPhase = originalPhase
- self.parser.phase.processSpaceCharacters(token)
-
- def processCharacters(self, token):
- originalPhase = self.parser.phase
- self.parser.phase = self.parser.phases["inTableText"]
- self.parser.phase.originalPhase = originalPhase
- self.parser.phase.processCharacters(token)
-
- def insertText(self, token):
- # If we get here there must be at least one non-whitespace character
- # Do the table magic!
- self.tree.insertFromTable = True
- self.parser.phases["inBody"].processCharacters(token)
- self.tree.insertFromTable = False
-
- def startTagCaption(self, token):
- self.clearStackToTableContext()
- self.tree.activeFormattingElements.append(Marker)
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inCaption"]
-
- def startTagColgroup(self, token):
- self.clearStackToTableContext()
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inColumnGroup"]
-
- def startTagCol(self, token):
- self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
- return token
-
- def startTagRowGroup(self, token):
- self.clearStackToTableContext()
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inTableBody"]
-
- def startTagImplyTbody(self, token):
- self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
- return token
-
- def startTagTable(self, token):
- self.parser.parseError("unexpected-start-tag-implies-end-tag",
- {"startName": "table", "endName": "table"})
- self.parser.phase.processEndTag(impliedTagToken("table"))
- if not self.parser.innerHTML:
- return token
-
- def startTagStyleScript(self, token):
- return self.parser.phases["inHead"].processStartTag(token)
-
- def startTagInput(self, token):
- if ("type" in token["data"] and
- token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
- self.parser.parseError("unexpected-hidden-input-in-table")
- self.tree.insertElement(token)
- # XXX associate with form
- self.tree.openElements.pop()
- else:
- self.startTagOther(token)
-
- def startTagForm(self, token):
- self.parser.parseError("unexpected-form-in-table")
- if self.tree.formPointer is None:
- self.tree.insertElement(token)
- self.tree.formPointer = self.tree.openElements[-1]
- self.tree.openElements.pop()
-
- def startTagOther(self, token):
- self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
- # Do the table magic!
- self.tree.insertFromTable = True
- self.parser.phases["inBody"].processStartTag(token)
- self.tree.insertFromTable = False
-
- def endTagTable(self, token):
- if self.tree.elementInScope("table", variant="table"):
- self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1].name != "table":
- self.parser.parseError("end-tag-too-early-named",
- {"gotName": "table",
- "expectedName": self.tree.openElements[-1].name})
- while self.tree.openElements[-1].name != "table":
- self.tree.openElements.pop()
- self.tree.openElements.pop()
- self.parser.resetInsertionMode()
- else:
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
-
- def endTagIgnore(self, token):
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
- def endTagOther(self, token):
- self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
- # Do the table magic!
- self.tree.insertFromTable = True
- self.parser.phases["inBody"].processEndTag(token)
- self.tree.insertFromTable = False
-
- class InTableTextPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
- self.originalPhase = None
- self.characterTokens = []
-
- def flushCharacters(self):
- data = "".join([item["data"] for item in self.characterTokens])
- if any([item not in spaceCharacters for item in data]):
- token = {"type": tokenTypes["Characters"], "data": data}
- self.parser.phases["inTable"].insertText(token)
- elif data:
- self.tree.insertText(data)
- self.characterTokens = []
-
- def processComment(self, token):
- self.flushCharacters()
- self.parser.phase = self.originalPhase
- return token
-
- def processEOF(self):
- self.flushCharacters()
- self.parser.phase = self.originalPhase
- return True
-
- def processCharacters(self, token):
- if token["data"] == "\u0000":
- return
- self.characterTokens.append(token)
-
- def processSpaceCharacters(self, token):
- # pretty sure we should never reach here
- self.characterTokens.append(token)
- # assert False
-
- def processStartTag(self, token):
- self.flushCharacters()
- self.parser.phase = self.originalPhase
- return token
-
- def processEndTag(self, token):
- self.flushCharacters()
- self.parser.phase = self.originalPhase
- return token
-
- class InCaptionPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
- "thead", "tr"), self.startTagTableElement)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("caption", self.endTagCaption),
- ("table", self.endTagTable),
- (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
- "thead", "tr"), self.endTagIgnore)
- ])
- self.endTagHandler.default = self.endTagOther
-
- def ignoreEndTagCaption(self):
- return not self.tree.elementInScope("caption", variant="table")
-
- def processEOF(self):
- self.parser.phases["inBody"].processEOF()
-
- def processCharacters(self, token):
- return self.parser.phases["inBody"].processCharacters(token)
-
- def startTagTableElement(self, token):
- self.parser.parseError()
- # XXX Have to duplicate logic here to find out if the tag is ignored
- ignoreEndTag = self.ignoreEndTagCaption()
- self.parser.phase.processEndTag(impliedTagToken("caption"))
- if not ignoreEndTag:
- return token
-
- def startTagOther(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def endTagCaption(self, token):
- if not self.ignoreEndTagCaption():
- # AT this code is quite similar to endTagTable in "InTable"
- self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1].name != "caption":
- self.parser.parseError("expected-one-end-tag-but-got-another",
- {"gotName": "caption",
- "expectedName": self.tree.openElements[-1].name})
- while self.tree.openElements[-1].name != "caption":
- self.tree.openElements.pop()
- self.tree.openElements.pop()
- self.tree.clearActiveFormattingElements()
- self.parser.phase = self.parser.phases["inTable"]
- else:
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
-
- def endTagTable(self, token):
- self.parser.parseError()
- ignoreEndTag = self.ignoreEndTagCaption()
- self.parser.phase.processEndTag(impliedTagToken("caption"))
- if not ignoreEndTag:
- return token
-
- def endTagIgnore(self, token):
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
- def endTagOther(self, token):
- return self.parser.phases["inBody"].processEndTag(token)
-
- class InColumnGroupPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#in-column
-
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("col", self.startTagCol)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("colgroup", self.endTagColgroup),
- ("col", self.endTagCol)
- ])
- self.endTagHandler.default = self.endTagOther
-
- def ignoreEndTagColgroup(self):
- return self.tree.openElements[-1].name == "html"
-
- def processEOF(self):
- if self.tree.openElements[-1].name == "html":
- assert self.parser.innerHTML
- return
- else:
- ignoreEndTag = self.ignoreEndTagColgroup()
- self.endTagColgroup(impliedTagToken("colgroup"))
- if not ignoreEndTag:
- return True
-
- def processCharacters(self, token):
- ignoreEndTag = self.ignoreEndTagColgroup()
- self.endTagColgroup(impliedTagToken("colgroup"))
- if not ignoreEndTag:
- return token
-
- def startTagCol(self, token):
- self.tree.insertElement(token)
- self.tree.openElements.pop()
-
- def startTagOther(self, token):
- ignoreEndTag = self.ignoreEndTagColgroup()
- self.endTagColgroup(impliedTagToken("colgroup"))
- if not ignoreEndTag:
- return token
-
- def endTagColgroup(self, token):
- if self.ignoreEndTagColgroup():
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
- else:
- self.tree.openElements.pop()
- self.parser.phase = self.parser.phases["inTable"]
-
- def endTagCol(self, token):
- self.parser.parseError("no-end-tag", {"name": "col"})
-
- def endTagOther(self, token):
- ignoreEndTag = self.ignoreEndTagColgroup()
- self.endTagColgroup(impliedTagToken("colgroup"))
- if not ignoreEndTag:
- return token
-
- class InTableBodyPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("tr", self.startTagTr),
- (("td", "th"), self.startTagTableCell),
- (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
- self.startTagTableOther)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
- ("table", self.endTagTable),
- (("body", "caption", "col", "colgroup", "html", "td", "th",
- "tr"), self.endTagIgnore)
- ])
- self.endTagHandler.default = self.endTagOther
-
- # helper methods
- def clearStackToTableBodyContext(self):
- while self.tree.openElements[-1].name not in ("tbody", "tfoot",
- "thead", "html"):
- # self.parser.parseError("unexpected-implied-end-tag-in-table",
- # {"name": self.tree.openElements[-1].name})
- self.tree.openElements.pop()
- if self.tree.openElements[-1].name == "html":
- assert self.parser.innerHTML
-
- # the rest
- def processEOF(self):
- self.parser.phases["inTable"].processEOF()
-
- def processSpaceCharacters(self, token):
- return self.parser.phases["inTable"].processSpaceCharacters(token)
-
- def processCharacters(self, token):
- return self.parser.phases["inTable"].processCharacters(token)
-
- def startTagTr(self, token):
- self.clearStackToTableBodyContext()
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inRow"]
-
- def startTagTableCell(self, token):
- self.parser.parseError("unexpected-cell-in-table-body",
- {"name": token["name"]})
- self.startTagTr(impliedTagToken("tr", "StartTag"))
- return token
-
- def startTagTableOther(self, token):
- # XXX AT Any ideas on how to share this with endTagTable?
- if (self.tree.elementInScope("tbody", variant="table") or
- self.tree.elementInScope("thead", variant="table") or
- self.tree.elementInScope("tfoot", variant="table")):
- self.clearStackToTableBodyContext()
- self.endTagTableRowGroup(
- impliedTagToken(self.tree.openElements[-1].name))
- return token
- else:
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
-
- def startTagOther(self, token):
- return self.parser.phases["inTable"].processStartTag(token)
-
- def endTagTableRowGroup(self, token):
- if self.tree.elementInScope(token["name"], variant="table"):
- self.clearStackToTableBodyContext()
- self.tree.openElements.pop()
- self.parser.phase = self.parser.phases["inTable"]
- else:
- self.parser.parseError("unexpected-end-tag-in-table-body",
- {"name": token["name"]})
-
- def endTagTable(self, token):
- if (self.tree.elementInScope("tbody", variant="table") or
- self.tree.elementInScope("thead", variant="table") or
- self.tree.elementInScope("tfoot", variant="table")):
- self.clearStackToTableBodyContext()
- self.endTagTableRowGroup(
- impliedTagToken(self.tree.openElements[-1].name))
- return token
- else:
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
-
- def endTagIgnore(self, token):
- self.parser.parseError("unexpected-end-tag-in-table-body",
- {"name": token["name"]})
-
- def endTagOther(self, token):
- return self.parser.phases["inTable"].processEndTag(token)
-
- class InRowPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#in-row
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- (("td", "th"), self.startTagTableCell),
- (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
- "tr"), self.startTagTableOther)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("tr", self.endTagTr),
- ("table", self.endTagTable),
- (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
- (("body", "caption", "col", "colgroup", "html", "td", "th"),
- self.endTagIgnore)
- ])
- self.endTagHandler.default = self.endTagOther
-
- # helper methods (XXX unify this with other table helper methods)
- def clearStackToTableRowContext(self):
- while self.tree.openElements[-1].name not in ("tr", "html"):
- self.parser.parseError("unexpected-implied-end-tag-in-table-row",
- {"name": self.tree.openElements[-1].name})
- self.tree.openElements.pop()
-
- def ignoreEndTagTr(self):
- return not self.tree.elementInScope("tr", variant="table")
-
- # the rest
- def processEOF(self):
- self.parser.phases["inTable"].processEOF()
-
- def processSpaceCharacters(self, token):
- return self.parser.phases["inTable"].processSpaceCharacters(token)
-
- def processCharacters(self, token):
- return self.parser.phases["inTable"].processCharacters(token)
-
- def startTagTableCell(self, token):
- self.clearStackToTableRowContext()
- self.tree.insertElement(token)
- self.parser.phase = self.parser.phases["inCell"]
- self.tree.activeFormattingElements.append(Marker)
-
- def startTagTableOther(self, token):
- ignoreEndTag = self.ignoreEndTagTr()
- self.endTagTr(impliedTagToken("tr"))
- # XXX how are we sure it's always ignored in the innerHTML case?
- if not ignoreEndTag:
- return token
-
- def startTagOther(self, token):
- return self.parser.phases["inTable"].processStartTag(token)
-
- def endTagTr(self, token):
- if not self.ignoreEndTagTr():
- self.clearStackToTableRowContext()
- self.tree.openElements.pop()
- self.parser.phase = self.parser.phases["inTableBody"]
- else:
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
-
- def endTagTable(self, token):
- ignoreEndTag = self.ignoreEndTagTr()
- self.endTagTr(impliedTagToken("tr"))
- # Reprocess the current tag if the tr end tag was not ignored
- # XXX how are we sure it's always ignored in the innerHTML case?
- if not ignoreEndTag:
- return token
-
- def endTagTableRowGroup(self, token):
- if self.tree.elementInScope(token["name"], variant="table"):
- self.endTagTr(impliedTagToken("tr"))
- return token
- else:
- self.parser.parseError()
-
- def endTagIgnore(self, token):
- self.parser.parseError("unexpected-end-tag-in-table-row",
- {"name": token["name"]})
-
- def endTagOther(self, token):
- return self.parser.phases["inTable"].processEndTag(token)
-
- class InCellPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
- "thead", "tr"), self.startTagTableOther)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- (("td", "th"), self.endTagTableCell),
- (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
- (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
- ])
- self.endTagHandler.default = self.endTagOther
-
- # helper
- def closeCell(self):
- if self.tree.elementInScope("td", variant="table"):
- self.endTagTableCell(impliedTagToken("td"))
- elif self.tree.elementInScope("th", variant="table"):
- self.endTagTableCell(impliedTagToken("th"))
-
- # the rest
- def processEOF(self):
- self.parser.phases["inBody"].processEOF()
-
- def processCharacters(self, token):
- return self.parser.phases["inBody"].processCharacters(token)
-
- def startTagTableOther(self, token):
- if (self.tree.elementInScope("td", variant="table") or
- self.tree.elementInScope("th", variant="table")):
- self.closeCell()
- return token
- else:
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
-
- def startTagOther(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def endTagTableCell(self, token):
- if self.tree.elementInScope(token["name"], variant="table"):
- self.tree.generateImpliedEndTags(token["name"])
- if self.tree.openElements[-1].name != token["name"]:
- self.parser.parseError("unexpected-cell-end-tag",
- {"name": token["name"]})
- while True:
- node = self.tree.openElements.pop()
- if node.name == token["name"]:
- break
- else:
- self.tree.openElements.pop()
- self.tree.clearActiveFormattingElements()
- self.parser.phase = self.parser.phases["inRow"]
- else:
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
- def endTagIgnore(self, token):
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
- def endTagImply(self, token):
- if self.tree.elementInScope(token["name"], variant="table"):
- self.closeCell()
- return token
- else:
- # sometimes innerHTML case
- self.parser.parseError()
-
- def endTagOther(self, token):
- return self.parser.phases["inBody"].processEndTag(token)
-
- class InSelectPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("option", self.startTagOption),
- ("optgroup", self.startTagOptgroup),
- ("select", self.startTagSelect),
- (("input", "keygen", "textarea"), self.startTagInput),
- ("script", self.startTagScript)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("option", self.endTagOption),
- ("optgroup", self.endTagOptgroup),
- ("select", self.endTagSelect)
- ])
- self.endTagHandler.default = self.endTagOther
-
- # http://www.whatwg.org/specs/web-apps/current-work/#in-select
- def processEOF(self):
- if self.tree.openElements[-1].name != "html":
- self.parser.parseError("eof-in-select")
- else:
- assert self.parser.innerHTML
-
- def processCharacters(self, token):
- if token["data"] == "\u0000":
- return
- self.tree.insertText(token["data"])
-
- def startTagOption(self, token):
- # We need to imply </option> if <option> is the current node.
- if self.tree.openElements[-1].name == "option":
- self.tree.openElements.pop()
- self.tree.insertElement(token)
-
- def startTagOptgroup(self, token):
- if self.tree.openElements[-1].name == "option":
- self.tree.openElements.pop()
- if self.tree.openElements[-1].name == "optgroup":
- self.tree.openElements.pop()
- self.tree.insertElement(token)
-
- def startTagSelect(self, token):
- self.parser.parseError("unexpected-select-in-select")
- self.endTagSelect(impliedTagToken("select"))
-
- def startTagInput(self, token):
- self.parser.parseError("unexpected-input-in-select")
- if self.tree.elementInScope("select", variant="select"):
- self.endTagSelect(impliedTagToken("select"))
- return token
- else:
- assert self.parser.innerHTML
-
- def startTagScript(self, token):
- return self.parser.phases["inHead"].processStartTag(token)
-
- def startTagOther(self, token):
- self.parser.parseError("unexpected-start-tag-in-select",
- {"name": token["name"]})
-
- def endTagOption(self, token):
- if self.tree.openElements[-1].name == "option":
- self.tree.openElements.pop()
- else:
- self.parser.parseError("unexpected-end-tag-in-select",
- {"name": "option"})
-
- def endTagOptgroup(self, token):
- # </optgroup> implicitly closes <option>
- if (self.tree.openElements[-1].name == "option" and
- self.tree.openElements[-2].name == "optgroup"):
- self.tree.openElements.pop()
- # It also closes </optgroup>
- if self.tree.openElements[-1].name == "optgroup":
- self.tree.openElements.pop()
- # But nothing else
- else:
- self.parser.parseError("unexpected-end-tag-in-select",
- {"name": "optgroup"})
-
- def endTagSelect(self, token):
- if self.tree.elementInScope("select", variant="select"):
- node = self.tree.openElements.pop()
- while node.name != "select":
- node = self.tree.openElements.pop()
- self.parser.resetInsertionMode()
- else:
- # innerHTML case
- assert self.parser.innerHTML
- self.parser.parseError()
-
- def endTagOther(self, token):
- self.parser.parseError("unexpected-end-tag-in-select",
- {"name": token["name"]})
-
- class InSelectInTablePhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
- self.startTagTable)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
- self.endTagTable)
- ])
- self.endTagHandler.default = self.endTagOther
-
- def processEOF(self):
- self.parser.phases["inSelect"].processEOF()
-
- def processCharacters(self, token):
- return self.parser.phases["inSelect"].processCharacters(token)
-
- def startTagTable(self, token):
- self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
- self.endTagOther(impliedTagToken("select"))
- return token
-
- def startTagOther(self, token):
- return self.parser.phases["inSelect"].processStartTag(token)
-
- def endTagTable(self, token):
- self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
- if self.tree.elementInScope(token["name"], variant="table"):
- self.endTagOther(impliedTagToken("select"))
- return token
-
- def endTagOther(self, token):
- return self.parser.phases["inSelect"].processEndTag(token)
-
- class InForeignContentPhase(Phase):
- breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
- "center", "code", "dd", "div", "dl", "dt",
- "em", "embed", "h1", "h2", "h3",
- "h4", "h5", "h6", "head", "hr", "i", "img",
- "li", "listing", "menu", "meta", "nobr",
- "ol", "p", "pre", "ruby", "s", "small",
- "span", "strong", "strike", "sub", "sup",
- "table", "tt", "u", "ul", "var"])
-
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- def adjustSVGTagNames(self, token):
- replacements = {"altglyph": "altGlyph",
- "altglyphdef": "altGlyphDef",
- "altglyphitem": "altGlyphItem",
- "animatecolor": "animateColor",
- "animatemotion": "animateMotion",
- "animatetransform": "animateTransform",
- "clippath": "clipPath",
- "feblend": "feBlend",
- "fecolormatrix": "feColorMatrix",
- "fecomponenttransfer": "feComponentTransfer",
- "fecomposite": "feComposite",
- "feconvolvematrix": "feConvolveMatrix",
- "fediffuselighting": "feDiffuseLighting",
- "fedisplacementmap": "feDisplacementMap",
- "fedistantlight": "feDistantLight",
- "feflood": "feFlood",
- "fefunca": "feFuncA",
- "fefuncb": "feFuncB",
- "fefuncg": "feFuncG",
- "fefuncr": "feFuncR",
- "fegaussianblur": "feGaussianBlur",
- "feimage": "feImage",
- "femerge": "feMerge",
- "femergenode": "feMergeNode",
- "femorphology": "feMorphology",
- "feoffset": "feOffset",
- "fepointlight": "fePointLight",
- "fespecularlighting": "feSpecularLighting",
- "fespotlight": "feSpotLight",
- "fetile": "feTile",
- "feturbulence": "feTurbulence",
- "foreignobject": "foreignObject",
- "glyphref": "glyphRef",
- "lineargradient": "linearGradient",
- "radialgradient": "radialGradient",
- "textpath": "textPath"}
-
- if token["name"] in replacements:
- token["name"] = replacements[token["name"]]
-
- def processCharacters(self, token):
- if token["data"] == "\u0000":
- token["data"] = "\uFFFD"
- elif (self.parser.framesetOK and
- any(char not in spaceCharacters for char in token["data"])):
- self.parser.framesetOK = False
- Phase.processCharacters(self, token)
-
- def processStartTag(self, token):
- currentNode = self.tree.openElements[-1]
- if (token["name"] in self.breakoutElements or
- (token["name"] == "font" and
- set(token["data"].keys()) & set(["color", "face", "size"]))):
- self.parser.parseError("unexpected-html-element-in-foreign-content",
- {"name": token["name"]})
- while (self.tree.openElements[-1].namespace !=
- self.tree.defaultNamespace and
- not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
- not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
- self.tree.openElements.pop()
- return token
-
- else:
- if currentNode.namespace == namespaces["mathml"]:
- self.parser.adjustMathMLAttributes(token)
- elif currentNode.namespace == namespaces["svg"]:
- self.adjustSVGTagNames(token)
- self.parser.adjustSVGAttributes(token)
- self.parser.adjustForeignAttributes(token)
- token["namespace"] = currentNode.namespace
- self.tree.insertElement(token)
- if token["selfClosing"]:
- self.tree.openElements.pop()
- token["selfClosingAcknowledged"] = True
-
- def processEndTag(self, token):
- nodeIndex = len(self.tree.openElements) - 1
- node = self.tree.openElements[-1]
- if node.name != token["name"]:
- self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
- while True:
- if node.name.translate(asciiUpper2Lower) == token["name"]:
- # XXX this isn't in the spec but it seems necessary
- if self.parser.phase == self.parser.phases["inTableText"]:
- self.parser.phase.flushCharacters()
- self.parser.phase = self.parser.phase.originalPhase
- while self.tree.openElements.pop() != node:
- assert self.tree.openElements
- new_token = None
- break
- nodeIndex -= 1
-
- node = self.tree.openElements[nodeIndex]
- if node.namespace != self.tree.defaultNamespace:
- continue
- else:
- new_token = self.parser.phase.processEndTag(token)
- break
- return new_token
-
- class AfterBodyPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
- self.endTagHandler.default = self.endTagOther
-
- def processEOF(self):
- # Stop parsing
- pass
-
- def processComment(self, token):
- # This is needed because data is to be appended to the <html> element
- # here and not to whatever is currently open.
- self.tree.insertComment(token, self.tree.openElements[0])
-
- def processCharacters(self, token):
- self.parser.parseError("unexpected-char-after-body")
- self.parser.phase = self.parser.phases["inBody"]
- return token
-
- def startTagHtml(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def startTagOther(self, token):
- self.parser.parseError("unexpected-start-tag-after-body",
- {"name": token["name"]})
- self.parser.phase = self.parser.phases["inBody"]
- return token
-
- def endTagHtml(self, name):
- if self.parser.innerHTML:
- self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
- else:
- self.parser.phase = self.parser.phases["afterAfterBody"]
-
- def endTagOther(self, token):
- self.parser.parseError("unexpected-end-tag-after-body",
- {"name": token["name"]})
- self.parser.phase = self.parser.phases["inBody"]
- return token
-
- class InFramesetPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("frameset", self.startTagFrameset),
- ("frame", self.startTagFrame),
- ("noframes", self.startTagNoframes)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("frameset", self.endTagFrameset)
- ])
- self.endTagHandler.default = self.endTagOther
-
- def processEOF(self):
- if self.tree.openElements[-1].name != "html":
- self.parser.parseError("eof-in-frameset")
- else:
- assert self.parser.innerHTML
-
- def processCharacters(self, token):
- self.parser.parseError("unexpected-char-in-frameset")
-
- def startTagFrameset(self, token):
- self.tree.insertElement(token)
-
- def startTagFrame(self, token):
- self.tree.insertElement(token)
- self.tree.openElements.pop()
-
- def startTagNoframes(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def startTagOther(self, token):
- self.parser.parseError("unexpected-start-tag-in-frameset",
- {"name": token["name"]})
-
- def endTagFrameset(self, token):
- if self.tree.openElements[-1].name == "html":
- # innerHTML case
- self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
- else:
- self.tree.openElements.pop()
- if (not self.parser.innerHTML and
- self.tree.openElements[-1].name != "frameset"):
- # If we're not in innerHTML mode and the the current node is not a
- # "frameset" element (anymore) then switch.
- self.parser.phase = self.parser.phases["afterFrameset"]
-
- def endTagOther(self, token):
- self.parser.parseError("unexpected-end-tag-in-frameset",
- {"name": token["name"]})
-
- class AfterFramesetPhase(Phase):
- # http://www.whatwg.org/specs/web-apps/current-work/#after3
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("noframes", self.startTagNoframes)
- ])
- self.startTagHandler.default = self.startTagOther
-
- self.endTagHandler = utils.MethodDispatcher([
- ("html", self.endTagHtml)
- ])
- self.endTagHandler.default = self.endTagOther
-
- def processEOF(self):
- # Stop parsing
- pass
-
- def processCharacters(self, token):
- self.parser.parseError("unexpected-char-after-frameset")
-
- def startTagNoframes(self, token):
- return self.parser.phases["inHead"].processStartTag(token)
-
- def startTagOther(self, token):
- self.parser.parseError("unexpected-start-tag-after-frameset",
- {"name": token["name"]})
-
- def endTagHtml(self, token):
- self.parser.phase = self.parser.phases["afterAfterFrameset"]
-
- def endTagOther(self, token):
- self.parser.parseError("unexpected-end-tag-after-frameset",
- {"name": token["name"]})
-
- class AfterAfterBodyPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml)
- ])
- self.startTagHandler.default = self.startTagOther
-
- def processEOF(self):
- pass
-
- def processComment(self, token):
- self.tree.insertComment(token, self.tree.document)
-
- def processSpaceCharacters(self, token):
- return self.parser.phases["inBody"].processSpaceCharacters(token)
-
- def processCharacters(self, token):
- self.parser.parseError("expected-eof-but-got-char")
- self.parser.phase = self.parser.phases["inBody"]
- return token
-
- def startTagHtml(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def startTagOther(self, token):
- self.parser.parseError("expected-eof-but-got-start-tag",
- {"name": token["name"]})
- self.parser.phase = self.parser.phases["inBody"]
- return token
-
- def processEndTag(self, token):
- self.parser.parseError("expected-eof-but-got-end-tag",
- {"name": token["name"]})
- self.parser.phase = self.parser.phases["inBody"]
- return token
-
- class AfterAfterFramesetPhase(Phase):
- def __init__(self, parser, tree):
- Phase.__init__(self, parser, tree)
-
- self.startTagHandler = utils.MethodDispatcher([
- ("html", self.startTagHtml),
- ("noframes", self.startTagNoFrames)
- ])
- self.startTagHandler.default = self.startTagOther
-
- def processEOF(self):
- pass
-
- def processComment(self, token):
- self.tree.insertComment(token, self.tree.document)
-
- def processSpaceCharacters(self, token):
- return self.parser.phases["inBody"].processSpaceCharacters(token)
-
- def processCharacters(self, token):
- self.parser.parseError("expected-eof-but-got-char")
-
- def startTagHtml(self, token):
- return self.parser.phases["inBody"].processStartTag(token)
-
- def startTagNoFrames(self, token):
- return self.parser.phases["inHead"].processStartTag(token)
-
- def startTagOther(self, token):
- self.parser.parseError("expected-eof-but-got-start-tag",
- {"name": token["name"]})
-
- def processEndTag(self, token):
- self.parser.parseError("expected-eof-but-got-end-tag",
- {"name": token["name"]})
-
- return {
- "initial": InitialPhase,
- "beforeHtml": BeforeHtmlPhase,
- "beforeHead": BeforeHeadPhase,
- "inHead": InHeadPhase,
- # XXX "inHeadNoscript": InHeadNoScriptPhase,
- "afterHead": AfterHeadPhase,
- "inBody": InBodyPhase,
- "text": TextPhase,
- "inTable": InTablePhase,
- "inTableText": InTableTextPhase,
- "inCaption": InCaptionPhase,
- "inColumnGroup": InColumnGroupPhase,
- "inTableBody": InTableBodyPhase,
- "inRow": InRowPhase,
- "inCell": InCellPhase,
- "inSelect": InSelectPhase,
- "inSelectInTable": InSelectInTablePhase,
- "inForeignContent": InForeignContentPhase,
- "afterBody": AfterBodyPhase,
- "inFrameset": InFramesetPhase,
- "afterFrameset": AfterFramesetPhase,
- "afterAfterBody": AfterAfterBodyPhase,
- "afterAfterFrameset": AfterAfterFramesetPhase,
- # XXX after after frameset
- }
-
-
-def impliedTagToken(name, type="EndTag", attributes=None,
- selfClosing=False):
- if attributes is None:
- attributes = {}
- return {"type": tokenTypes[type], "name": name, "data": attributes,
- "selfClosing": selfClosing}
-
-
-class ParseError(Exception):
- """Error in parsed document"""
- pass
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/ihatexml.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/ihatexml.py
deleted file mode 100644
index 0fc7930..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/ihatexml.py
+++ /dev/null
@@ -1,285 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-import warnings
-
-from .constants import DataLossWarning
-
-baseChar = """
-[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
-[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
-[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
-[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
-[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
-[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
-[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
-[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
-[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
-[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
-[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
-[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
-[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
-[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
-[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
-[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
-[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
-[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
-[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
-[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
-[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
-[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
-[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
-[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
-[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
-[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
-[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
-[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
-[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
-[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
-#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
-#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
-#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
-[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
-[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
-#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
-[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
-[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
-[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
-[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
-[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
-#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
-[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
-[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
-[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
-[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
-
-ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
-
-combiningCharacter = """
-[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
-[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
-[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
-[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
-#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
-[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
-[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
-#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
-[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
-[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
-#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
-[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
-[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
-[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
-[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
-[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
-#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
-[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
-#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
-[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
-[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
-#x3099 | #x309A"""
-
-digit = """
-[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
-[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
-[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
-[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
-
-extender = """
-#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
-#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
-
-letter = " | ".join([baseChar, ideographic])
-
-# Without the
-name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
- extender])
-nameFirst = " | ".join([letter, "_"])
-
-reChar = re.compile(r"#x([\d|A-F]{4,4})")
-reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
-
-
-def charStringToList(chars):
- charRanges = [item.strip() for item in chars.split(" | ")]
- rv = []
- for item in charRanges:
- foundMatch = False
- for regexp in (reChar, reCharRange):
- match = regexp.match(item)
- if match is not None:
- rv.append([hexToInt(item) for item in match.groups()])
- if len(rv[-1]) == 1:
- rv[-1] = rv[-1] * 2
- foundMatch = True
- break
- if not foundMatch:
- assert len(item) == 1
-
- rv.append([ord(item)] * 2)
- rv = normaliseCharList(rv)
- return rv
-
-
-def normaliseCharList(charList):
- charList = sorted(charList)
- for item in charList:
- assert item[1] >= item[0]
- rv = []
- i = 0
- while i < len(charList):
- j = 1
- rv.append(charList[i])
- while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
- rv[-1][1] = charList[i + j][1]
- j += 1
- i += j
- return rv
-
-# We don't really support characters above the BMP :(
-max_unicode = int("FFFF", 16)
-
-
-def missingRanges(charList):
- rv = []
- if charList[0] != 0:
- rv.append([0, charList[0][0] - 1])
- for i, item in enumerate(charList[:-1]):
- rv.append([item[1] + 1, charList[i + 1][0] - 1])
- if charList[-1][1] != max_unicode:
- rv.append([charList[-1][1] + 1, max_unicode])
- return rv
-
-
-def listToRegexpStr(charList):
- rv = []
- for item in charList:
- if item[0] == item[1]:
- rv.append(escapeRegexp(chr(item[0])))
- else:
- rv.append(escapeRegexp(chr(item[0])) + "-" +
- escapeRegexp(chr(item[1])))
- return "[%s]" % "".join(rv)
-
-
-def hexToInt(hex_str):
- return int(hex_str, 16)
-
-
-def escapeRegexp(string):
- specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
- "[", "]", "|", "(", ")", "-")
- for char in specialCharacters:
- string = string.replace(char, "\\" + char)
-
- return string
-
-# output from the above
-nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
-
-nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
-
-# Simpler things
-nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
-
-
-class InfosetFilter(object):
- replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
-
- def __init__(self, replaceChars=None,
- dropXmlnsLocalName=False,
- dropXmlnsAttrNs=False,
- preventDoubleDashComments=False,
- preventDashAtCommentEnd=False,
- replaceFormFeedCharacters=True,
- preventSingleQuotePubid=False):
-
- self.dropXmlnsLocalName = dropXmlnsLocalName
- self.dropXmlnsAttrNs = dropXmlnsAttrNs
-
- self.preventDoubleDashComments = preventDoubleDashComments
- self.preventDashAtCommentEnd = preventDashAtCommentEnd
-
- self.replaceFormFeedCharacters = replaceFormFeedCharacters
-
- self.preventSingleQuotePubid = preventSingleQuotePubid
-
- self.replaceCache = {}
-
- def coerceAttribute(self, name, namespace=None):
- if self.dropXmlnsLocalName and name.startswith("xmlns:"):
- warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
- return None
- elif (self.dropXmlnsAttrNs and
- namespace == "http://www.w3.org/2000/xmlns/"):
- warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
- return None
- else:
- return self.toXmlName(name)
-
- def coerceElement(self, name, namespace=None):
- return self.toXmlName(name)
-
- def coerceComment(self, data):
- if self.preventDoubleDashComments:
- while "--" in data:
- warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
- data = data.replace("--", "- -")
- return data
-
- def coerceCharacters(self, data):
- if self.replaceFormFeedCharacters:
- for i in range(data.count("\x0C")):
- warnings.warn("Text cannot contain U+000C", DataLossWarning)
- data = data.replace("\x0C", " ")
- # Other non-xml characters
- return data
-
- def coercePubid(self, data):
- dataOutput = data
- for char in nonPubidCharRegexp.findall(data):
- warnings.warn("Coercing non-XML pubid", DataLossWarning)
- replacement = self.getReplacementCharacter(char)
- dataOutput = dataOutput.replace(char, replacement)
- if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
- warnings.warn("Pubid cannot contain single quote", DataLossWarning)
- dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
- return dataOutput
-
- def toXmlName(self, name):
- nameFirst = name[0]
- nameRest = name[1:]
- m = nonXmlNameFirstBMPRegexp.match(nameFirst)
- if m:
- warnings.warn("Coercing non-XML name", DataLossWarning)
- nameFirstOutput = self.getReplacementCharacter(nameFirst)
- else:
- nameFirstOutput = nameFirst
-
- nameRestOutput = nameRest
- replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
- for char in replaceChars:
- warnings.warn("Coercing non-XML name", DataLossWarning)
- replacement = self.getReplacementCharacter(char)
- nameRestOutput = nameRestOutput.replace(char, replacement)
- return nameFirstOutput + nameRestOutput
-
- def getReplacementCharacter(self, char):
- if char in self.replaceCache:
- replacement = self.replaceCache[char]
- else:
- replacement = self.escapeChar(char)
- return replacement
-
- def fromXmlName(self, name):
- for item in set(self.replacementRegexp.findall(name)):
- name = name.replace(item, self.unescapeChar(item))
- return name
-
- def escapeChar(self, char):
- replacement = "U%05X" % ord(char)
- self.replaceCache[char] = replacement
- return replacement
-
- def unescapeChar(self, charcode):
- return chr(int(charcode[1:], 16))
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/inputstream.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/inputstream.py
deleted file mode 100644
index dc39ad0..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/inputstream.py
+++ /dev/null
@@ -1,905 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-import codecs
-import platform
-import re
-
-from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import encodings, ReparseException
-from . import utils
-
-from io import StringIO
-
-try:
- from io import BytesIO
-except ImportError:
- BytesIO = StringIO
-
-try:
- from io import BufferedIOBase
-except ImportError:
- class BufferedIOBase(object):
- pass
-
-# Non-unicode versions of constants for use in the pre-parser
-spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
-asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
-asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
-spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
-
-invalid_unicode_template = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF%s]"
-
-if platform.python_implementation() == "Jython":
- # Jython does not allow the use of solitary surrogate escapes
- # (\uD800-\uDFFF) in literals or other usage. This is because it
- # uses UTF-16, which is based on the use of such surrogates.
- invalid_unicode_re = re.compile(invalid_unicode_template % "")
-else:
- # Instead use one extra step of indirection and create surrogates with
- # unichr
- invalid_unicode_re = re.compile(invalid_unicode_template % (
- "%s-%s" % (unichr(0xD800), unichr(0xDFFF)),))
-
-non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
- 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
- 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
- 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
- 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
- 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
- 0x10FFFE, 0x10FFFF])
-
-ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
-
-# Cache for charsUntil()
-charsUntilRegEx = {}
-
-
-class BufferedStream:
- """Buffering for streams that do not have buffering of their own
-
- The buffer is implemented as a list of chunks on the assumption that
- joining many strings will be slow since it is O(n**2)
- """
-
- def __init__(self, stream):
- self.stream = stream
- self.buffer = []
- self.position = [-1, 0] # chunk number, offset
-
- def tell(self):
- pos = 0
- for chunk in self.buffer[:self.position[0]]:
- pos += len(chunk)
- pos += self.position[1]
- return pos
-
- def seek(self, pos):
- assert pos < self._bufferedBytes()
- offset = pos
- i = 0
- while len(self.buffer[i]) < offset:
- offset -= pos
- i += 1
- self.position = [i, offset]
-
- def read(self, bytes):
- if not self.buffer:
- return self._readStream(bytes)
- elif (self.position[0] == len(self.buffer) and
- self.position[1] == len(self.buffer[-1])):
- return self._readStream(bytes)
- else:
- return self._readFromBuffer(bytes)
-
- def _bufferedBytes(self):
- return sum([len(item) for item in self.buffer])
-
- def _readStream(self, bytes):
- data = self.stream.read(bytes)
- self.buffer.append(data)
- self.position[0] += 1
- self.position[1] = len(data)
- return data
-
- def _readFromBuffer(self, bytes):
- remainingBytes = bytes
- rv = []
- bufferIndex = self.position[0]
- bufferOffset = self.position[1]
- while bufferIndex < len(self.buffer) and remainingBytes != 0:
- assert remainingBytes > 0
- bufferedData = self.buffer[bufferIndex]
-
- if remainingBytes <= len(bufferedData) - bufferOffset:
- bytesToRead = remainingBytes
- self.position = [bufferIndex, bufferOffset + bytesToRead]
- else:
- bytesToRead = len(bufferedData) - bufferOffset
- self.position = [bufferIndex, len(bufferedData)]
- bufferIndex += 1
- rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
- remainingBytes -= bytesToRead
-
- bufferOffset = 0
-
- if remainingBytes:
- rv.append(self._readStream(remainingBytes))
-
- return "".join(rv)
-
-
-def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
- if hasattr(source, "read"):
- isUnicode = isinstance(source.read(0), text_type)
- else:
- isUnicode = isinstance(source, text_type)
-
- if isUnicode:
- if encoding is not None:
- raise TypeError("Cannot explicitly set an encoding with a unicode string")
-
- return HTMLUnicodeInputStream(source)
- else:
- return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
-
-
-class HTMLUnicodeInputStream:
- """Provides a unicode stream of characters to the HTMLTokenizer.
-
- This class takes care of character encoding and removing or replacing
- incorrect byte-sequences and also provides column and line tracking.
-
- """
-
- _defaultChunkSize = 10240
-
- def __init__(self, source):
- """Initialises the HTMLInputStream.
-
- HTMLInputStream(source, [encoding]) -> Normalized stream from source
- for use by html5lib.
-
- source can be either a file-object, local filename or a string.
-
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
-
- parseMeta - Look for a <meta> element containing encoding information
-
- """
-
- # Craziness
- if platform.python_implementation() == "Jython":
- # By its nature Jython's UTF-16 support does not allow
- # surrogate errors, so no need to do this check.
- self.reportCharacterErrors = None
- self.replaceCharactersRegexp = None
- elif len("\U0010FFFF") == 1:
- self.reportCharacterErrors = self.characterErrorsUCS4
- self.replaceCharactersRegexp = re.compile("{}".format(
- "[{}-{}]".format(unichr(0xD800), unichr(0xDFFF))))
- else:
- self.reportCharacterErrors = self.characterErrorsUCS2
- self.replaceCharactersRegexp = re.compile("{}".format(
- "([{}-{}](?![{}-{})|(?<![{}-{}])[{}-{}])".format(
- unichr(0xD800), unichr(0xDBFF), unichr(0xDC00), unichr(0xDFFF),
- unichr(0xD800), unichr(0xDBFF), unichr(0xDC00), unichr(0xDFFF))))
-
- # List of where new lines occur
- self.newLines = [0]
-
- self.charEncoding = ("utf-8", "certain")
- self.dataStream = self.openStream(source)
-
- self.reset()
-
- def reset(self):
- self.chunk = ""
- self.chunkSize = 0
- self.chunkOffset = 0
- self.errors = []
-
- # number of (complete) lines in previous chunks
- self.prevNumLines = 0
- # number of columns in the last line of the previous chunk
- self.prevNumCols = 0
-
- # Deal with CR LF and surrogates split over chunk boundaries
- self._bufferedCharacter = None
-
- def openStream(self, source):
- """Produces a file object from source.
-
- source can be either a file object, local filename or a string.
-
- """
- # Already a file object
- if hasattr(source, 'read'):
- stream = source
- else:
- stream = StringIO(source)
-
- return stream
-
- def _position(self, offset):
- chunk = self.chunk
- nLines = chunk.count('\n', 0, offset)
- positionLine = self.prevNumLines + nLines
- lastLinePos = chunk.rfind('\n', 0, offset)
- if lastLinePos == -1:
- positionColumn = self.prevNumCols + offset
- else:
- positionColumn = offset - (lastLinePos + 1)
- return (positionLine, positionColumn)
-
- def position(self):
- """Returns (line, col) of the current position in the stream."""
- line, col = self._position(self.chunkOffset)
- return (line + 1, col)
-
- def char(self):
- """ Read one character from the stream or queue if available. Return
- EOF when EOF is reached.
- """
- # Read a new chunk from the input stream if necessary
- if self.chunkOffset >= self.chunkSize:
- if not self.readChunk():
- return EOF
-
- chunkOffset = self.chunkOffset
- char = self.chunk[chunkOffset]
- self.chunkOffset = chunkOffset + 1
-
- return char
-
- def readChunk(self, chunkSize=None):
- if chunkSize is None:
- chunkSize = self._defaultChunkSize
-
- self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
-
- self.chunk = ""
- self.chunkSize = 0
- self.chunkOffset = 0
-
- data = self.dataStream.read(chunkSize)
-
- # Deal with CR LF and surrogates broken across chunks
- if self._bufferedCharacter:
- data = self._bufferedCharacter + data
- self._bufferedCharacter = None
- elif not data:
- # We have no more data, bye-bye stream
- return False
-
- if len(data) > 1:
- lastv = ord(data[-1])
- if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
- self._bufferedCharacter = data[-1]
- data = data[:-1]
-
- if platform.python_implementation() != "Jython":
- # data is already Unicode, so Jython already has dealt
- # with any surrogate character errors, no need to go here
- self.reportCharacterErrors(data)
-
- # Replace invalid characters
- # Note U+0000 is dealt with in the tokenizer
- data = self.replaceCharactersRegexp.sub("\ufffd", data)
-
- data = data.replace("\r\n", "\n")
- data = data.replace("\r", "\n")
-
- self.chunk = data
- self.chunkSize = len(data)
-
- return True
-
- def characterErrorsUCS4(self, data):
- for i in range(len(invalid_unicode_re.findall(data))):
- self.errors.append("invalid-codepoint")
-
- def characterErrorsUCS2(self, data):
- # Someone picked the wrong compile option
- # You lose
- skip = False
- for match in invalid_unicode_re.finditer(data):
- if skip:
- continue
- codepoint = ord(match.group())
- pos = match.start()
- # Pretty sure there should be endianness issues here
- if utils.isSurrogatePair(data[pos:pos + 2]):
- # We have a surrogate pair!
- char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
- if char_val in non_bmp_invalid_codepoints:
- self.errors.append("invalid-codepoint")
- skip = True
- elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
- pos == len(data) - 1):
- self.errors.append("invalid-codepoint")
- else:
- skip = False
- self.errors.append("invalid-codepoint")
-
- def charsUntil(self, characters, opposite=False):
- """ Returns a string of characters from the stream up to but not
- including any character in 'characters' or EOF. 'characters' must be
- a container that supports the 'in' method and iteration over its
- characters.
- """
-
- # Use a cache of regexps to find the required characters
- try:
- chars = charsUntilRegEx[(characters, opposite)]
- except KeyError:
- if __debug__:
- for c in characters:
- assert(ord(c) < 128)
- regex = "".join(["\\x%02x" % ord(c) for c in characters])
- if not opposite:
- regex = "^%s" % regex
- chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
-
- rv = []
-
- while True:
- # Find the longest matching prefix
- m = chars.match(self.chunk, self.chunkOffset)
- if m is None:
- # If nothing matched, and it wasn't because we ran out of chunk,
- # then stop
- if self.chunkOffset != self.chunkSize:
- break
- else:
- end = m.end()
- # If not the whole chunk matched, return everything
- # up to the part that didn't match
- if end != self.chunkSize:
- rv.append(self.chunk[self.chunkOffset:end])
- self.chunkOffset = end
- break
- # If the whole remainder of the chunk matched,
- # use it all and read the next chunk
- rv.append(self.chunk[self.chunkOffset:])
- if not self.readChunk():
- # Reached EOF
- break
-
- r = "".join(rv)
- return r
-
- def unget(self, char):
- # Only one character is allowed to be ungotten at once - it must
- # be consumed again before any further call to unget
- if char is not None:
- if self.chunkOffset == 0:
- # unget is called quite rarely, so it's a good idea to do
- # more work here if it saves a bit of work in the frequently
- # called char and charsUntil.
- # So, just prepend the ungotten character onto the current
- # chunk:
- self.chunk = char + self.chunk
- self.chunkSize += 1
- else:
- self.chunkOffset -= 1
- assert self.chunk[self.chunkOffset] == char
-
-
-class HTMLBinaryInputStream(HTMLUnicodeInputStream):
- """Provides a unicode stream of characters to the HTMLTokenizer.
-
- This class takes care of character encoding and removing or replacing
- incorrect byte-sequences and also provides column and line tracking.
-
- """
-
- def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
- """Initialises the HTMLInputStream.
-
- HTMLInputStream(source, [encoding]) -> Normalized stream from source
- for use by html5lib.
-
- source can be either a file-object, local filename or a string.
-
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
-
- parseMeta - Look for a <meta> element containing encoding information
-
- """
- # Raw Stream - for unicode objects this will encode to utf-8 and set
- # self.charEncoding as appropriate
- self.rawStream = self.openStream(source)
-
- HTMLUnicodeInputStream.__init__(self, self.rawStream)
-
- self.charEncoding = (codecName(encoding), "certain")
-
- # Encoding Information
- # Number of bytes to use when looking for a meta element with
- # encoding information
- self.numBytesMeta = 512
- # Number of bytes to use when using detecting encoding using chardet
- self.numBytesChardet = 100
- # Encoding to use if no other information can be found
- self.defaultEncoding = "windows-1252"
-
- # Detect encoding iff no explicit "transport level" encoding is supplied
- if (self.charEncoding[0] is None):
- self.charEncoding = self.detectEncoding(parseMeta, chardet)
-
- # Call superclass
- self.reset()
-
- def reset(self):
- self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
- 'replace')
- HTMLUnicodeInputStream.reset(self)
-
- def openStream(self, source):
- """Produces a file object from source.
-
- source can be either a file object, local filename or a string.
-
- """
- # Already a file object
- if hasattr(source, 'read'):
- stream = source
- else:
- stream = BytesIO(source)
-
- try:
- stream.seek(stream.tell())
- except:
- stream = BufferedStream(stream)
-
- return stream
-
- def detectEncoding(self, parseMeta=True, chardet=True):
- # First look for a BOM
- # This will also read past the BOM if present
- encoding = self.detectBOM()
- confidence = "certain"
- # If there is no BOM need to look for meta elements with encoding
- # information
- if encoding is None and parseMeta:
- encoding = self.detectEncodingMeta()
- confidence = "tentative"
- # Guess with chardet, if avaliable
- if encoding is None and chardet:
- confidence = "tentative"
- try:
- try:
- from charade.universaldetector import UniversalDetector
- except ImportError:
- from chardet.universaldetector import UniversalDetector
- buffers = []
- detector = UniversalDetector()
- while not detector.done:
- buffer = self.rawStream.read(self.numBytesChardet)
- assert isinstance(buffer, bytes)
- if not buffer:
- break
- buffers.append(buffer)
- detector.feed(buffer)
- detector.close()
- encoding = detector.result['encoding']
- self.rawStream.seek(0)
- except ImportError:
- pass
- # If all else fails use the default encoding
- if encoding is None:
- confidence = "tentative"
- encoding = self.defaultEncoding
-
- # Substitute for equivalent encodings:
- encodingSub = {"iso-8859-1": "windows-1252"}
-
- if encoding.lower() in encodingSub:
- encoding = encodingSub[encoding.lower()]
-
- return encoding, confidence
-
- def changeEncoding(self, newEncoding):
- assert self.charEncoding[1] != "certain"
- newEncoding = codecName(newEncoding)
- if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
- newEncoding = "utf-8"
- if newEncoding is None:
- return
- elif newEncoding == self.charEncoding[0]:
- self.charEncoding = (self.charEncoding[0], "certain")
- else:
- self.rawStream.seek(0)
- self.reset()
- self.charEncoding = (newEncoding, "certain")
- raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
-
- def detectBOM(self):
- """Attempts to detect at BOM at the start of the stream. If
- an encoding can be determined from the BOM return the name of the
- encoding otherwise return None"""
- bomDict = {
- codecs.BOM_UTF8: 'utf-8',
- codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
- codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
- }
-
- # Go to beginning of file and read in 4 bytes
- string = self.rawStream.read(4)
- assert isinstance(string, bytes)
-
- # Try detecting the BOM using bytes from the string
- encoding = bomDict.get(string[:3]) # UTF-8
- seek = 3
- if not encoding:
- # Need to detect UTF-32 before UTF-16
- encoding = bomDict.get(string) # UTF-32
- seek = 4
- if not encoding:
- encoding = bomDict.get(string[:2]) # UTF-16
- seek = 2
-
- # Set the read position past the BOM if one was found, otherwise
- # set it to the start of the stream
- self.rawStream.seek(encoding and seek or 0)
-
- return encoding
-
- def detectEncodingMeta(self):
- """Report the encoding declared by the meta element
- """
- buffer = self.rawStream.read(self.numBytesMeta)
- assert isinstance(buffer, bytes)
- parser = EncodingParser(buffer)
- self.rawStream.seek(0)
- encoding = parser.getEncoding()
-
- if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
- encoding = "utf-8"
-
- return encoding
-
-
-class EncodingBytes(bytes):
- """String-like object with an associated position and various extra methods
- If the position is ever greater than the string length then an exception is
- raised"""
- def __new__(self, value):
- assert isinstance(value, bytes)
- return bytes.__new__(self, value.lower())
-
- def __init__(self, value):
- self._position = -1
-
- def __iter__(self):
- return self
-
- def __next__(self):
- p = self._position = self._position + 1
- if p >= len(self):
- raise StopIteration
- elif p < 0:
- raise TypeError
- return self[p:p + 1]
-
- def next(self):
- # Py2 compat
- return self.__next__()
-
- def previous(self):
- p = self._position
- if p >= len(self):
- raise StopIteration
- elif p < 0:
- raise TypeError
- self._position = p = p - 1
- return self[p:p + 1]
-
- def setPosition(self, position):
- if self._position >= len(self):
- raise StopIteration
- self._position = position
-
- def getPosition(self):
- if self._position >= len(self):
- raise StopIteration
- if self._position >= 0:
- return self._position
- else:
- return None
-
- position = property(getPosition, setPosition)
-
- def getCurrentByte(self):
- return self[self.position:self.position + 1]
-
- currentByte = property(getCurrentByte)
-
- def skip(self, chars=spaceCharactersBytes):
- """Skip past a list of characters"""
- p = self.position # use property for the error-checking
- while p < len(self):
- c = self[p:p + 1]
- if c not in chars:
- self._position = p
- return c
- p += 1
- self._position = p
- return None
-
- def skipUntil(self, chars):
- p = self.position
- while p < len(self):
- c = self[p:p + 1]
- if c in chars:
- self._position = p
- return c
- p += 1
- self._position = p
- return None
-
- def matchBytes(self, bytes):
- """Look for a sequence of bytes at the start of a string. If the bytes
- are found return True and advance the position to the byte after the
- match. Otherwise return False and leave the position alone"""
- p = self.position
- data = self[p:p + len(bytes)]
- rv = data.startswith(bytes)
- if rv:
- self.position += len(bytes)
- return rv
-
- def jumpTo(self, bytes):
- """Look for the next sequence of bytes matching a given sequence. If
- a match is found advance the position to the last byte of the match"""
- newPosition = self[self.position:].find(bytes)
- if newPosition > -1:
- # XXX: This is ugly, but I can't see a nicer way to fix this.
- if self._position == -1:
- self._position = 0
- self._position += (newPosition + len(bytes) - 1)
- return True
- else:
- raise StopIteration
-
-
-class EncodingParser(object):
- """Mini parser for detecting character encoding from meta elements"""
-
- def __init__(self, data):
- """string - the data to work on for encoding detection"""
- self.data = EncodingBytes(data)
- self.encoding = None
-
- def getEncoding(self):
- methodDispatch = (
- (b"<!--", self.handleComment),
- (b"<meta", self.handleMeta),
- (b"</", self.handlePossibleEndTag),
- (b"<!", self.handleOther),
- (b"<?", self.handleOther),
- (b"<", self.handlePossibleStartTag))
- for byte in self.data:
- keepParsing = True
- for key, method in methodDispatch:
- if self.data.matchBytes(key):
- try:
- keepParsing = method()
- break
- except StopIteration:
- keepParsing = False
- break
- if not keepParsing:
- break
-
- return self.encoding
-
- def handleComment(self):
- """Skip over comments"""
- return self.data.jumpTo(b"-->")
-
- def handleMeta(self):
- if self.data.currentByte not in spaceCharactersBytes:
- # if we have <meta not followed by a space so just keep going
- return True
- # We have a valid meta element we want to search for attributes
- hasPragma = False
- pendingEncoding = None
- while True:
- # Try to find the next attribute after the current position
- attr = self.getAttribute()
- if attr is None:
- return True
- else:
- if attr[0] == b"http-equiv":
- hasPragma = attr[1] == b"content-type"
- if hasPragma and pendingEncoding is not None:
- self.encoding = pendingEncoding
- return False
- elif attr[0] == b"charset":
- tentativeEncoding = attr[1]
- codec = codecName(tentativeEncoding)
- if codec is not None:
- self.encoding = codec
- return False
- elif attr[0] == b"content":
- contentParser = ContentAttrParser(EncodingBytes(attr[1]))
- tentativeEncoding = contentParser.parse()
- if tentativeEncoding is not None:
- codec = codecName(tentativeEncoding)
- if codec is not None:
- if hasPragma:
- self.encoding = codec
- return False
- else:
- pendingEncoding = codec
-
- def handlePossibleStartTag(self):
- return self.handlePossibleTag(False)
-
- def handlePossibleEndTag(self):
- next(self.data)
- return self.handlePossibleTag(True)
-
- def handlePossibleTag(self, endTag):
- data = self.data
- if data.currentByte not in asciiLettersBytes:
- # If the next byte is not an ascii letter either ignore this
- # fragment (possible start tag case) or treat it according to
- # handleOther
- if endTag:
- data.previous()
- self.handleOther()
- return True
-
- c = data.skipUntil(spacesAngleBrackets)
- if c == b"<":
- # return to the first step in the overall "two step" algorithm
- # reprocessing the < byte
- data.previous()
- else:
- # Read all attributes
- attr = self.getAttribute()
- while attr is not None:
- attr = self.getAttribute()
- return True
-
- def handleOther(self):
- return self.data.jumpTo(b">")
-
- def getAttribute(self):
- """Return a name,value pair for the next attribute in the stream,
- if one is found, or None"""
- data = self.data
- # Step 1 (skip chars)
- c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
- assert c is None or len(c) == 1
- # Step 2
- if c in (b">", None):
- return None
- # Step 3
- attrName = []
- attrValue = []
- # Step 4 attribute name
- while True:
- if c == b"=" and attrName:
- break
- elif c in spaceCharactersBytes:
- # Step 6!
- c = data.skip()
- break
- elif c in (b"/", b">"):
- return b"".join(attrName), b""
- elif c in asciiUppercaseBytes:
- attrName.append(c.lower())
- elif c is None:
- return None
- else:
- attrName.append(c)
- # Step 5
- c = next(data)
- # Step 7
- if c != b"=":
- data.previous()
- return b"".join(attrName), b""
- # Step 8
- next(data)
- # Step 9
- c = data.skip()
- # Step 10
- if c in (b"'", b'"'):
- # 10.1
- quoteChar = c
- while True:
- # 10.2
- c = next(data)
- # 10.3
- if c == quoteChar:
- next(data)
- return b"".join(attrName), b"".join(attrValue)
- # 10.4
- elif c in asciiUppercaseBytes:
- attrValue.append(c.lower())
- # 10.5
- else:
- attrValue.append(c)
- elif c == b">":
- return b"".join(attrName), b""
- elif c in asciiUppercaseBytes:
- attrValue.append(c.lower())
- elif c is None:
- return None
- else:
- attrValue.append(c)
- # Step 11
- while True:
- c = next(data)
- if c in spacesAngleBrackets:
- return b"".join(attrName), b"".join(attrValue)
- elif c in asciiUppercaseBytes:
- attrValue.append(c.lower())
- elif c is None:
- return None
- else:
- attrValue.append(c)
-
-
-class ContentAttrParser(object):
- def __init__(self, data):
- assert isinstance(data, bytes)
- self.data = data
-
- def parse(self):
- try:
- # Check if the attr name is charset
- # otherwise return
- self.data.jumpTo(b"charset")
- self.data.position += 1
- self.data.skip()
- if not self.data.currentByte == b"=":
- # If there is no = sign keep looking for attrs
- return None
- self.data.position += 1
- self.data.skip()
- # Look for an encoding between matching quote marks
- if self.data.currentByte in (b'"', b"'"):
- quoteMark = self.data.currentByte
- self.data.position += 1
- oldPosition = self.data.position
- if self.data.jumpTo(quoteMark):
- return self.data[oldPosition:self.data.position]
- else:
- return None
- else:
- # Unquoted value
- oldPosition = self.data.position
- try:
- self.data.skipUntil(spaceCharactersBytes)
- return self.data[oldPosition:self.data.position]
- except StopIteration:
- # Return the whole remaining value
- return self.data[oldPosition:]
- except StopIteration:
- return None
-
-
-def codecName(encoding):
- """Return the python codec name corresponding to an encoding or None if the
- string doesn't correspond to a valid encoding."""
- if isinstance(encoding, bytes):
- try:
- encoding = encoding.decode("ascii")
- except UnicodeDecodeError:
- return None
- if encoding:
- canonicalName = ascii_punctuation_re.sub("", encoding).lower()
- return encodings.get(canonicalName, None)
- else:
- return None
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/sanitizer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/sanitizer.py
deleted file mode 100644
index 71dc521..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/sanitizer.py
+++ /dev/null
@@ -1,271 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-from xml.sax.saxutils import escape, unescape
-
-from .tokenizer import HTMLTokenizer
-from .constants import tokenTypes
-
-
-class HTMLSanitizerMixin(object):
- """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
-
- acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
- 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
- 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
- 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
- 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
- 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
- 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
- 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
- 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
- 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
- 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
- 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
- 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
-
- mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
- 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
- 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
- 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
- 'munderover', 'none']
-
- svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
- 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
- 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
- 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
- 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
- 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-
- acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
- 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
- 'background', 'balance', 'bgcolor', 'bgproperties', 'border',
- 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
- 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
- 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
- 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
- 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
- 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
- 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
- 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
- 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
- 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
- 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
- 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
- 'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
- 'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
- 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
- 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
- 'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
- 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
- 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
- 'width', 'wrap', 'xml:lang']
-
- mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
- 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
- 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
- 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
- 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
- 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
- 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
- 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
- 'xlink:type', 'xmlns', 'xmlns:xlink']
-
- svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
- 'arabic-form', 'ascent', 'attributeName', 'attributeType',
- 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
- 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
- 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
- 'fill-opacity', 'fill-rule', 'font-family', 'font-size',
- 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
- 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
- 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
- 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
- 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
- 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
- 'opacity', 'orient', 'origin', 'overline-position',
- 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
- 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
- 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
- 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
- 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
- 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
- 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
- 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
- 'transform', 'type', 'u1', 'u2', 'underline-position',
- 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
- 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
- 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
- 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
- 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
- 'y1', 'y2', 'zoomAndPan']
-
- attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
- 'xlink:href', 'xml:base']
-
- svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
- 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
- 'mask', 'stroke']
-
- svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
- 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
- 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
- 'set', 'use']
-
- acceptable_css_properties = ['azimuth', 'background-color',
- 'border-bottom-color', 'border-collapse', 'border-color',
- 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
- 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
- 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
- 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
- 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
- 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
- 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
- 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
- 'white-space', 'width']
-
- acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
- 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
- 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
- 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
- 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
- 'transparent', 'underline', 'white', 'yellow']
-
- acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
- 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
- 'stroke-opacity']
-
- acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
- 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
- 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
- 'ssh', 'sftp', 'rtsp', 'afs']
-
- # subclasses may define their own versions of these constants
- allowed_elements = acceptable_elements + mathml_elements + svg_elements
- allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
- allowed_css_properties = acceptable_css_properties
- allowed_css_keywords = acceptable_css_keywords
- allowed_svg_properties = acceptable_svg_properties
- allowed_protocols = acceptable_protocols
-
- # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
- # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
- # attributes are parsed, and a restricted set, # specified by
- # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
- # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
- # in ALLOWED_PROTOCOLS are allowed.
- #
- # sanitize_html('<script> do_nasty_stuff() </script>')
- # => &lt;script> do_nasty_stuff() &lt;/script>
- # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
- # => <a>Click here for $100</a>
- def sanitize_token(self, token):
-
- # accommodate filters which use token_type differently
- token_type = token["type"]
- if token_type in list(tokenTypes.keys()):
- token_type = tokenTypes[token_type]
-
- if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
- tokenTypes["EmptyTag"]):
- if token["name"] in self.allowed_elements:
- return self.allowed_token(token, token_type)
- else:
- return self.disallowed_token(token, token_type)
- elif token_type == tokenTypes["Comment"]:
- pass
- else:
- return token
-
- def allowed_token(self, token, token_type):
- if "data" in token:
- attrs = dict([(name, val) for name, val in
- token["data"][::-1]
- if name in self.allowed_attributes])
- for attr in self.attr_val_is_uri:
- if attr not in attrs:
- continue
- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
- unescape(attrs[attr])).lower()
- # remove replacement characters from unescaped characters
- val_unescaped = val_unescaped.replace("\ufffd", "")
- if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
- (val_unescaped.split(':')[0] not in
- self.allowed_protocols)):
- del attrs[attr]
- for attr in self.svg_attr_val_allows_ref:
- if attr in attrs:
- attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
- ' ',
- unescape(attrs[attr]))
- if (token["name"] in self.svg_allow_local_href and
- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
- attrs['xlink:href'])):
- del attrs['xlink:href']
- if 'style' in attrs:
- attrs['style'] = self.sanitize_css(attrs['style'])
- token["data"] = [[name, val] for name, val in list(attrs.items())]
- return token
-
- def disallowed_token(self, token, token_type):
- if token_type == tokenTypes["EndTag"]:
- token["data"] = "</%s>" % token["name"]
- elif token["data"]:
- attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
- token["data"] = "<%s%s>" % (token["name"], attrs)
- else:
- token["data"] = "<%s>" % token["name"]
- if token.get("selfClosing"):
- token["data"] = token["data"][:-1] + "/>"
-
- if token["type"] in list(tokenTypes.keys()):
- token["type"] = "Characters"
- else:
- token["type"] = tokenTypes["Characters"]
-
- del token["name"]
- return token
-
- def sanitize_css(self, style):
- # disallow urls
- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
- # gauntlet
- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
- return ''
- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
- return ''
-
- clean = []
- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
- if not value:
- continue
- if prop.lower() in self.allowed_css_properties:
- clean.append(prop + ': ' + value + ';')
- elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
- 'padding']:
- for keyword in value.split():
- if not keyword in self.acceptable_css_keywords and \
- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
- break
- else:
- clean.append(prop + ': ' + value + ';')
- elif prop.lower() in self.allowed_svg_properties:
- clean.append(prop + ': ' + value + ';')
-
- return ' '.join(clean)
-
-
-class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
- def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
- lowercaseElementName=False, lowercaseAttrName=False, parser=None):
- # Change case matching defaults as we only output lowercase html anyway
- # This solution doesn't seem ideal...
- HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
- lowercaseElementName, lowercaseAttrName, parser=parser)
-
- def __iter__(self):
- for token in HTMLTokenizer.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py
deleted file mode 100644
index 8380839..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .. import treewalkers
-
-from .htmlserializer import HTMLSerializer
-
-
-def serialize(input, tree="etree", format="html", encoding=None,
- **serializer_opts):
- # XXX: Should we cache this?
- walker = treewalkers.getTreeWalker(tree)
- if format == "html":
- s = HTMLSerializer(**serializer_opts)
- else:
- raise ValueError("type must be html")
- return s.render(walker(input), encoding)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py
deleted file mode 100644
index 08b60df..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py
+++ /dev/null
@@ -1,309 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-import gettext
-_ = gettext.gettext
-
-try:
- from functools import reduce
-except ImportError:
- pass
-
-from ..constants import voidElements, booleanAttributes, spaceCharacters
-from ..constants import rcdataElements, entities, xmlEntities
-from .. import utils
-from xml.sax.saxutils import escape
-
-spaceCharacters = "".join(spaceCharacters)
-
-try:
- from codecs import register_error, xmlcharrefreplace_errors
-except ImportError:
- unicode_encode_errors = "strict"
-else:
- unicode_encode_errors = "htmlentityreplace"
-
- encode_entity_map = {}
- is_ucs4 = len("\U0010FFFF") == 1
- for k, v in list(entities.items()):
- # skip multi-character entities
- if ((is_ucs4 and len(v) > 1) or
- (not is_ucs4 and len(v) > 2)):
- continue
- if v != "&":
- if len(v) == 2:
- v = utils.surrogatePairToCodepoint(v)
- else:
- v = ord(v)
- if not v in encode_entity_map or k.islower():
- # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
- encode_entity_map[v] = k
-
- def htmlentityreplace_errors(exc):
- if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
- res = []
- codepoints = []
- skip = False
- for i, c in enumerate(exc.object[exc.start:exc.end]):
- if skip:
- skip = False
- continue
- index = i + exc.start
- if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
- codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
- skip = True
- else:
- codepoint = ord(c)
- codepoints.append(codepoint)
- for cp in codepoints:
- e = encode_entity_map.get(cp)
- if e:
- res.append("&")
- res.append(e)
- if not e.endswith(";"):
- res.append(";")
- else:
- res.append("&#x%s;" % (hex(cp)[2:]))
- return ("".join(res), exc.end)
- else:
- return xmlcharrefreplace_errors(exc)
-
- register_error(unicode_encode_errors, htmlentityreplace_errors)
-
- del register_error
-
-
-class HTMLSerializer(object):
-
- # attribute quoting options
- quote_attr_values = False
- quote_char = '"'
- use_best_quote_char = True
-
- # tag syntax options
- omit_optional_tags = True
- minimize_boolean_attributes = True
- use_trailing_solidus = False
- space_before_trailing_solidus = True
-
- # escaping options
- escape_lt_in_attrs = False
- escape_rcdata = False
- resolve_entities = True
-
- # miscellaneous options
- inject_meta_charset = True
- strip_whitespace = False
- sanitize = False
-
- options = ("quote_attr_values", "quote_char", "use_best_quote_char",
- "minimize_boolean_attributes", "use_trailing_solidus",
- "space_before_trailing_solidus", "omit_optional_tags",
- "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
- "escape_rcdata", "resolve_entities", "sanitize")
-
- def __init__(self, **kwargs):
- """Initialize HTMLSerializer.
-
- Keyword options (default given first unless specified) include:
-
- inject_meta_charset=True|False
- Whether it insert a meta element to define the character set of the
- document.
- quote_attr_values=True|False
- Whether to quote attribute values that don't require quoting
- per HTML5 parsing rules.
- quote_char=u'"'|u"'"
- Use given quote character for attribute quoting. Default is to
- use double quote unless attribute value contains a double quote,
- in which case single quotes are used instead.
- escape_lt_in_attrs=False|True
- Whether to escape < in attribute values.
- escape_rcdata=False|True
- Whether to escape characters that need to be escaped within normal
- elements within rcdata elements such as style.
- resolve_entities=True|False
- Whether to resolve named character entities that appear in the
- source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
- are unaffected by this setting.
- strip_whitespace=False|True
- Whether to remove semantically meaningless whitespace. (This
- compresses all whitespace to a single space except within pre.)
- minimize_boolean_attributes=True|False
- Shortens boolean attributes to give just the attribute value,
- for example <input disabled="disabled"> becomes <input disabled>.
- use_trailing_solidus=False|True
- Includes a close-tag slash at the end of the start tag of void
- elements (empty elements whose end tag is forbidden). E.g. <hr/>.
- space_before_trailing_solidus=True|False
- Places a space immediately before the closing slash in a tag
- using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
- sanitize=False|True
- Strip all unsafe or unknown constructs from output.
- See `html5lib user documentation`_
- omit_optional_tags=True|False
- Omit start/end tags that are optional.
-
- .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
- """
- if 'quote_char' in kwargs:
- self.use_best_quote_char = False
- for attr in self.options:
- setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
- self.errors = []
- self.strict = False
-
- def encode(self, string):
- assert(isinstance(string, text_type))
- if self.encoding:
- return string.encode(self.encoding, unicode_encode_errors)
- else:
- return string
-
- def encodeStrict(self, string):
- assert(isinstance(string, text_type))
- if self.encoding:
- return string.encode(self.encoding, "strict")
- else:
- return string
-
- def serialize(self, treewalker, encoding=None):
- self.encoding = encoding
- in_cdata = False
- self.errors = []
- if encoding and self.inject_meta_charset:
- from ..filters.inject_meta_charset import Filter
- treewalker = Filter(treewalker, encoding)
- # XXX: WhitespaceFilter should be used before OptionalTagFilter
- # for maximum efficiently of this latter filter
- if self.strip_whitespace:
- from ..filters.whitespace import Filter
- treewalker = Filter(treewalker)
- if self.sanitize:
- from ..filters.sanitizer import Filter
- treewalker = Filter(treewalker)
- if self.omit_optional_tags:
- from ..filters.optionaltags import Filter
- treewalker = Filter(treewalker)
- for token in treewalker:
- type = token["type"]
- if type == "Doctype":
- doctype = "<!DOCTYPE %s" % token["name"]
-
- if token["publicId"]:
- doctype += ' PUBLIC "%s"' % token["publicId"]
- elif token["systemId"]:
- doctype += " SYSTEM"
- if token["systemId"]:
- if token["systemId"].find('"') >= 0:
- if token["systemId"].find("'") >= 0:
- self.serializeError(_("System identifer contains both single and double quote characters"))
- quote_char = "'"
- else:
- quote_char = '"'
- doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
-
- doctype += ">"
- yield self.encodeStrict(doctype)
-
- elif type in ("Characters", "SpaceCharacters"):
- if type == "SpaceCharacters" or in_cdata:
- if in_cdata and token["data"].find("</") >= 0:
- self.serializeError(_("Unexpected </ in CDATA"))
- yield self.encode(token["data"])
- else:
- yield self.encode(escape(token["data"]))
-
- elif type in ("StartTag", "EmptyTag"):
- name = token["name"]
- yield self.encodeStrict("<%s" % name)
- if name in rcdataElements and not self.escape_rcdata:
- in_cdata = True
- elif in_cdata:
- self.serializeError(_("Unexpected child element of a CDATA element"))
- for (attr_namespace, attr_name), attr_value in token["data"].items():
- # TODO: Add namespace support here
- k = attr_name
- v = attr_value
- yield self.encodeStrict(' ')
-
- yield self.encodeStrict(k)
- if not self.minimize_boolean_attributes or \
- (k not in booleanAttributes.get(name, tuple())
- and k not in booleanAttributes.get("", tuple())):
- yield self.encodeStrict("=")
- if self.quote_attr_values or not v:
- quote_attr = True
- else:
- quote_attr = reduce(lambda x, y: x or (y in v),
- spaceCharacters + ">\"'=", False)
- v = v.replace("&", "&amp;")
- if self.escape_lt_in_attrs:
- v = v.replace("<", "&lt;")
- if quote_attr:
- quote_char = self.quote_char
- if self.use_best_quote_char:
- if "'" in v and '"' not in v:
- quote_char = '"'
- elif '"' in v and "'" not in v:
- quote_char = "'"
- if quote_char == "'":
- v = v.replace("'", "&#39;")
- else:
- v = v.replace('"', "&quot;")
- yield self.encodeStrict(quote_char)
- yield self.encode(v)
- yield self.encodeStrict(quote_char)
- else:
- yield self.encode(v)
- if name in voidElements and self.use_trailing_solidus:
- if self.space_before_trailing_solidus:
- yield self.encodeStrict(" /")
- else:
- yield self.encodeStrict("/")
- yield self.encode(">")
-
- elif type == "EndTag":
- name = token["name"]
- if name in rcdataElements:
- in_cdata = False
- elif in_cdata:
- self.serializeError(_("Unexpected child element of a CDATA element"))
- yield self.encodeStrict("</%s>" % name)
-
- elif type == "Comment":
- data = token["data"]
- if data.find("--") >= 0:
- self.serializeError(_("Comment contains --"))
- yield self.encodeStrict("<!--%s-->" % token["data"])
-
- elif type == "Entity":
- name = token["name"]
- key = name + ";"
- if not key in entities:
- self.serializeError(_("Entity %s not recognized" % name))
- if self.resolve_entities and key not in xmlEntities:
- data = entities[key]
- else:
- data = "&%s;" % name
- yield self.encodeStrict(data)
-
- else:
- self.serializeError(token["data"])
-
- def render(self, treewalker, encoding=None):
- if encoding:
- return b"".join(list(self.serialize(treewalker, encoding)))
- else:
- return "".join(list(self.serialize(treewalker)))
-
- def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
- # XXX The idea is to make data mandatory.
- self.errors.append(data)
- if self.strict:
- raise SerializeError
-
-
-def SerializeError(Exception):
- """Error in serialized tree"""
- pass
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/tokenizer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/tokenizer.py
deleted file mode 100644
index 7977457..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/tokenizer.py
+++ /dev/null
@@ -1,1731 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-try:
- chr = unichr # flake8: noqa
-except NameError:
- pass
-
-from collections import deque
-
-from .constants import spaceCharacters
-from .constants import entities
-from .constants import asciiLetters, asciiUpper2Lower
-from .constants import digits, hexDigits, EOF
-from .constants import tokenTypes, tagTokenTypes
-from .constants import replacementCharacters
-
-from .inputstream import HTMLInputStream
-
-from .trie import Trie
-
-entitiesTrie = Trie(entities)
-
-
-class HTMLTokenizer(object):
- """ This class takes care of tokenizing HTML.
-
- * self.currentToken
- Holds the token that is currently being processed.
-
- * self.state
- Holds a reference to the method to be invoked... XXX
-
- * self.stream
- Points to HTMLInputStream object.
- """
-
- def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
- lowercaseElementName=True, lowercaseAttrName=True, parser=None):
-
- self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
- self.parser = parser
-
- # Perform case conversions?
- self.lowercaseElementName = lowercaseElementName
- self.lowercaseAttrName = lowercaseAttrName
-
- # Setup the initial tokenizer state
- self.escapeFlag = False
- self.lastFourChars = []
- self.state = self.dataState
- self.escape = False
-
- # The current token being created
- self.currentToken = None
- super(HTMLTokenizer, self).__init__()
-
- def __iter__(self):
- """ This is where the magic happens.
-
- We do our usually processing through the states and when we have a token
- to return we yield the token which pauses processing until the next token
- is requested.
- """
- self.tokenQueue = deque([])
- # Start processing. When EOF is reached self.state will return False
- # instead of True and the loop will terminate.
- while self.state():
- while self.stream.errors:
- yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)}
- while self.tokenQueue:
- yield self.tokenQueue.popleft()
-
- def consumeNumberEntity(self, isHex):
- """This function returns either U+FFFD or the character based on the
- decimal or hexadecimal representation. It also discards ";" if present.
- If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
- """
-
- allowed = digits
- radix = 10
- if isHex:
- allowed = hexDigits
- radix = 16
-
- charStack = []
-
- # Consume all the characters that are in range while making sure we
- # don't hit an EOF.
- c = self.stream.char()
- while c in allowed and c is not EOF:
- charStack.append(c)
- c = self.stream.char()
-
- # Convert the set of characters consumed to an int.
- charAsInt = int("".join(charStack), radix)
-
- # Certain characters get replaced with others
- if charAsInt in replacementCharacters:
- char = replacementCharacters[charAsInt]
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "illegal-codepoint-for-numeric-entity",
- "datavars": {"charAsInt": charAsInt}})
- elif ((0xD800 <= charAsInt <= 0xDFFF) or
- (charAsInt > 0x10FFFF)):
- char = "\uFFFD"
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "illegal-codepoint-for-numeric-entity",
- "datavars": {"charAsInt": charAsInt}})
- else:
- # Should speed up this check somehow (e.g. move the set to a constant)
- if ((0x0001 <= charAsInt <= 0x0008) or
- (0x000E <= charAsInt <= 0x001F) or
- (0x007F <= charAsInt <= 0x009F) or
- (0xFDD0 <= charAsInt <= 0xFDEF) or
- charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
- 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
- 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
- 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
- 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
- 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,
- 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
- 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,
- 0xFFFFF, 0x10FFFE, 0x10FFFF])):
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data":
- "illegal-codepoint-for-numeric-entity",
- "datavars": {"charAsInt": charAsInt}})
- try:
- # Try/except needed as UCS-2 Python builds' unichar only works
- # within the BMP.
- char = chr(charAsInt)
- except ValueError:
- v = charAsInt - 0x10000
- char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF))
-
- # Discard the ; if present. Otherwise, put it back on the queue and
- # invoke parseError on parser.
- if c != ";":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "numeric-entity-without-semicolon"})
- self.stream.unget(c)
-
- return char
-
- def consumeEntity(self, allowedChar=None, fromAttribute=False):
- # Initialise to the default output for when no entity is matched
- output = "&"
-
- charStack = [self.stream.char()]
- if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
- or (allowedChar is not None and allowedChar == charStack[0])):
- self.stream.unget(charStack[0])
-
- elif charStack[0] == "#":
- # Read the next character to see if it's hex or decimal
- hex = False
- charStack.append(self.stream.char())
- if charStack[-1] in ("x", "X"):
- hex = True
- charStack.append(self.stream.char())
-
- # charStack[-1] should be the first digit
- if (hex and charStack[-1] in hexDigits) \
- or (not hex and charStack[-1] in digits):
- # At least one digit found, so consume the whole number
- self.stream.unget(charStack[-1])
- output = self.consumeNumberEntity(hex)
- else:
- # No digits found
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "expected-numeric-entity"})
- self.stream.unget(charStack.pop())
- output = "&" + "".join(charStack)
-
- else:
- # At this point in the process might have named entity. Entities
- # are stored in the global variable "entities".
- #
- # Consume characters and compare to these to a substring of the
- # entity names in the list until the substring no longer matches.
- while (charStack[-1] is not EOF):
- if not entitiesTrie.has_keys_with_prefix("".join(charStack)):
- break
- charStack.append(self.stream.char())
-
- # At this point we have a string that starts with some characters
- # that may match an entity
- # Try to find the longest entity the string will match to take care
- # of &noti for instance.
- try:
- entityName = entitiesTrie.longest_prefix("".join(charStack[:-1]))
- entityLength = len(entityName)
- except KeyError:
- entityName = None
-
- if entityName is not None:
- if entityName[-1] != ";":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "named-entity-without-semicolon"})
- if (entityName[-1] != ";" and fromAttribute and
- (charStack[entityLength] in asciiLetters or
- charStack[entityLength] in digits or
- charStack[entityLength] == "=")):
- self.stream.unget(charStack.pop())
- output = "&" + "".join(charStack)
- else:
- output = entities[entityName]
- self.stream.unget(charStack.pop())
- output += "".join(charStack[entityLength:])
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-named-entity"})
- self.stream.unget(charStack.pop())
- output = "&" + "".join(charStack)
-
- if fromAttribute:
- self.currentToken["data"][-1][1] += output
- else:
- if output in spaceCharacters:
- tokenType = "SpaceCharacters"
- else:
- tokenType = "Characters"
- self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
-
- def processEntityInAttribute(self, allowedChar):
- """This method replaces the need for "entityInAttributeValueState".
- """
- self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
-
- def emitCurrentToken(self):
- """This method is a generic handler for emitting the tags. It also sets
- the state to "data" because that's what's needed after a token has been
- emitted.
- """
- token = self.currentToken
- # Add token to the queue to be yielded
- if (token["type"] in tagTokenTypes):
- if self.lowercaseElementName:
- token["name"] = token["name"].translate(asciiUpper2Lower)
- if token["type"] == tokenTypes["EndTag"]:
- if token["data"]:
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "attributes-in-end-tag"})
- if token["selfClosing"]:
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "self-closing-flag-on-end-tag"})
- self.tokenQueue.append(token)
- self.state = self.dataState
-
- # Below are the various tokenizer states worked out.
- def dataState(self):
- data = self.stream.char()
- if data == "&":
- self.state = self.entityDataState
- elif data == "<":
- self.state = self.tagOpenState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\u0000"})
- elif data is EOF:
- # Tokenization ends.
- return False
- elif data in spaceCharacters:
- # Directly after emitting a token you switch back to the "data
- # state". At that point spaceCharacters are important so they are
- # emitted separately.
- self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
- data + self.stream.charsUntil(spaceCharacters, True)})
- # No need to update lastFourChars here, since the first space will
- # have already been appended to lastFourChars and will have broken
- # any <!-- or --> sequences
- else:
- chars = self.stream.charsUntil(("&", "<", "\u0000"))
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
- data + chars})
- return True
-
- def entityDataState(self):
- self.consumeEntity()
- self.state = self.dataState
- return True
-
- def rcdataState(self):
- data = self.stream.char()
- if data == "&":
- self.state = self.characterReferenceInRcdata
- elif data == "<":
- self.state = self.rcdataLessThanSignState
- elif data == EOF:
- # Tokenization ends.
- return False
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- elif data in spaceCharacters:
- # Directly after emitting a token you switch back to the "data
- # state". At that point spaceCharacters are important so they are
- # emitted separately.
- self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
- data + self.stream.charsUntil(spaceCharacters, True)})
- # No need to update lastFourChars here, since the first space will
- # have already been appended to lastFourChars and will have broken
- # any <!-- or --> sequences
- else:
- chars = self.stream.charsUntil(("&", "<", "\u0000"))
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
- data + chars})
- return True
-
- def characterReferenceInRcdata(self):
- self.consumeEntity()
- self.state = self.rcdataState
- return True
-
- def rawtextState(self):
- data = self.stream.char()
- if data == "<":
- self.state = self.rawtextLessThanSignState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- elif data == EOF:
- # Tokenization ends.
- return False
- else:
- chars = self.stream.charsUntil(("<", "\u0000"))
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
- data + chars})
- return True
-
- def scriptDataState(self):
- data = self.stream.char()
- if data == "<":
- self.state = self.scriptDataLessThanSignState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- elif data == EOF:
- # Tokenization ends.
- return False
- else:
- chars = self.stream.charsUntil(("<", "\u0000"))
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
- data + chars})
- return True
-
- def plaintextState(self):
- data = self.stream.char()
- if data == EOF:
- # Tokenization ends.
- return False
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
- data + self.stream.charsUntil("\u0000")})
- return True
-
- def tagOpenState(self):
- data = self.stream.char()
- if data == "!":
- self.state = self.markupDeclarationOpenState
- elif data == "/":
- self.state = self.closeTagOpenState
- elif data in asciiLetters:
- self.currentToken = {"type": tokenTypes["StartTag"],
- "name": data, "data": [],
- "selfClosing": False,
- "selfClosingAcknowledged": False}
- self.state = self.tagNameState
- elif data == ">":
- # XXX In theory it could be something besides a tag name. But
- # do we really care?
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-tag-name-but-got-right-bracket"})
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
- self.state = self.dataState
- elif data == "?":
- # XXX In theory it could be something besides a tag name. But
- # do we really care?
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-tag-name-but-got-question-mark"})
- self.stream.unget(data)
- self.state = self.bogusCommentState
- else:
- # XXX
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-tag-name"})
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.stream.unget(data)
- self.state = self.dataState
- return True
-
- def closeTagOpenState(self):
- data = self.stream.char()
- if data in asciiLetters:
- self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
- "data": [], "selfClosing": False}
- self.state = self.tagNameState
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-closing-tag-but-got-right-bracket"})
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-closing-tag-but-got-eof"})
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
- self.state = self.dataState
- else:
- # XXX data can be _'_...
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-closing-tag-but-got-char",
- "datavars": {"data": data}})
- self.stream.unget(data)
- self.state = self.bogusCommentState
- return True
-
- def tagNameState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.state = self.beforeAttributeNameState
- elif data == ">":
- self.emitCurrentToken()
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-tag-name"})
- self.state = self.dataState
- elif data == "/":
- self.state = self.selfClosingStartTagState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["name"] += "\uFFFD"
- else:
- self.currentToken["name"] += data
- # (Don't use charsUntil here, because tag names are
- # very short and it's faster to not do anything fancy)
- return True
-
- def rcdataLessThanSignState(self):
- data = self.stream.char()
- if data == "/":
- self.temporaryBuffer = ""
- self.state = self.rcdataEndTagOpenState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.stream.unget(data)
- self.state = self.rcdataState
- return True
-
- def rcdataEndTagOpenState(self):
- data = self.stream.char()
- if data in asciiLetters:
- self.temporaryBuffer += data
- self.state = self.rcdataEndTagNameState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
- self.stream.unget(data)
- self.state = self.rcdataState
- return True
-
- def rcdataEndTagNameState(self):
- appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
- data = self.stream.char()
- if data in spaceCharacters and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.beforeAttributeNameState
- elif data == "/" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.selfClosingStartTagState
- elif data == ">" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.emitCurrentToken()
- self.state = self.dataState
- elif data in asciiLetters:
- self.temporaryBuffer += data
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "</" + self.temporaryBuffer})
- self.stream.unget(data)
- self.state = self.rcdataState
- return True
-
- def rawtextLessThanSignState(self):
- data = self.stream.char()
- if data == "/":
- self.temporaryBuffer = ""
- self.state = self.rawtextEndTagOpenState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.stream.unget(data)
- self.state = self.rawtextState
- return True
-
- def rawtextEndTagOpenState(self):
- data = self.stream.char()
- if data in asciiLetters:
- self.temporaryBuffer += data
- self.state = self.rawtextEndTagNameState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
- self.stream.unget(data)
- self.state = self.rawtextState
- return True
-
- def rawtextEndTagNameState(self):
- appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
- data = self.stream.char()
- if data in spaceCharacters and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.beforeAttributeNameState
- elif data == "/" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.selfClosingStartTagState
- elif data == ">" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.emitCurrentToken()
- self.state = self.dataState
- elif data in asciiLetters:
- self.temporaryBuffer += data
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "</" + self.temporaryBuffer})
- self.stream.unget(data)
- self.state = self.rawtextState
- return True
-
- def scriptDataLessThanSignState(self):
- data = self.stream.char()
- if data == "/":
- self.temporaryBuffer = ""
- self.state = self.scriptDataEndTagOpenState
- elif data == "!":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"})
- self.state = self.scriptDataEscapeStartState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.stream.unget(data)
- self.state = self.scriptDataState
- return True
-
- def scriptDataEndTagOpenState(self):
- data = self.stream.char()
- if data in asciiLetters:
- self.temporaryBuffer += data
- self.state = self.scriptDataEndTagNameState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
- self.stream.unget(data)
- self.state = self.scriptDataState
- return True
-
- def scriptDataEndTagNameState(self):
- appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
- data = self.stream.char()
- if data in spaceCharacters and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.beforeAttributeNameState
- elif data == "/" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.selfClosingStartTagState
- elif data == ">" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.emitCurrentToken()
- self.state = self.dataState
- elif data in asciiLetters:
- self.temporaryBuffer += data
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "</" + self.temporaryBuffer})
- self.stream.unget(data)
- self.state = self.scriptDataState
- return True
-
- def scriptDataEscapeStartState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- self.state = self.scriptDataEscapeStartDashState
- else:
- self.stream.unget(data)
- self.state = self.scriptDataState
- return True
-
- def scriptDataEscapeStartDashState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- self.state = self.scriptDataEscapedDashDashState
- else:
- self.stream.unget(data)
- self.state = self.scriptDataState
- return True
-
- def scriptDataEscapedState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- self.state = self.scriptDataEscapedDashState
- elif data == "<":
- self.state = self.scriptDataEscapedLessThanSignState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- elif data == EOF:
- self.state = self.dataState
- else:
- chars = self.stream.charsUntil(("<", "-", "\u0000"))
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
- data + chars})
- return True
-
- def scriptDataEscapedDashState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- self.state = self.scriptDataEscapedDashDashState
- elif data == "<":
- self.state = self.scriptDataEscapedLessThanSignState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- self.state = self.scriptDataEscapedState
- elif data == EOF:
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- self.state = self.scriptDataEscapedState
- return True
-
- def scriptDataEscapedDashDashState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- elif data == "<":
- self.state = self.scriptDataEscapedLessThanSignState
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
- self.state = self.scriptDataState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- self.state = self.scriptDataEscapedState
- elif data == EOF:
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- self.state = self.scriptDataEscapedState
- return True
-
- def scriptDataEscapedLessThanSignState(self):
- data = self.stream.char()
- if data == "/":
- self.temporaryBuffer = ""
- self.state = self.scriptDataEscapedEndTagOpenState
- elif data in asciiLetters:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data})
- self.temporaryBuffer = data
- self.state = self.scriptDataDoubleEscapeStartState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.stream.unget(data)
- self.state = self.scriptDataEscapedState
- return True
-
- def scriptDataEscapedEndTagOpenState(self):
- data = self.stream.char()
- if data in asciiLetters:
- self.temporaryBuffer = data
- self.state = self.scriptDataEscapedEndTagNameState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
- self.stream.unget(data)
- self.state = self.scriptDataEscapedState
- return True
-
- def scriptDataEscapedEndTagNameState(self):
- appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
- data = self.stream.char()
- if data in spaceCharacters and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.beforeAttributeNameState
- elif data == "/" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.state = self.selfClosingStartTagState
- elif data == ">" and appropriate:
- self.currentToken = {"type": tokenTypes["EndTag"],
- "name": self.temporaryBuffer,
- "data": [], "selfClosing": False}
- self.emitCurrentToken()
- self.state = self.dataState
- elif data in asciiLetters:
- self.temporaryBuffer += data
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "</" + self.temporaryBuffer})
- self.stream.unget(data)
- self.state = self.scriptDataEscapedState
- return True
-
- def scriptDataDoubleEscapeStartState(self):
- data = self.stream.char()
- if data in (spaceCharacters | frozenset(("/", ">"))):
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- if self.temporaryBuffer.lower() == "script":
- self.state = self.scriptDataDoubleEscapedState
- else:
- self.state = self.scriptDataEscapedState
- elif data in asciiLetters:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- self.temporaryBuffer += data
- else:
- self.stream.unget(data)
- self.state = self.scriptDataEscapedState
- return True
-
- def scriptDataDoubleEscapedState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- self.state = self.scriptDataDoubleEscapedDashState
- elif data == "<":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.state = self.scriptDataDoubleEscapedLessThanSignState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- elif data == EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-script-in-script"})
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- return True
-
- def scriptDataDoubleEscapedDashState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- self.state = self.scriptDataDoubleEscapedDashDashState
- elif data == "<":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.state = self.scriptDataDoubleEscapedLessThanSignState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- self.state = self.scriptDataDoubleEscapedState
- elif data == EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-script-in-script"})
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- self.state = self.scriptDataDoubleEscapedState
- return True
-
- def scriptDataDoubleEscapedDashDashState(self):
- data = self.stream.char()
- if data == "-":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
- elif data == "<":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
- self.state = self.scriptDataDoubleEscapedLessThanSignState
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
- self.state = self.scriptDataState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": "\uFFFD"})
- self.state = self.scriptDataDoubleEscapedState
- elif data == EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-script-in-script"})
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- self.state = self.scriptDataDoubleEscapedState
- return True
-
- def scriptDataDoubleEscapedLessThanSignState(self):
- data = self.stream.char()
- if data == "/":
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"})
- self.temporaryBuffer = ""
- self.state = self.scriptDataDoubleEscapeEndState
- else:
- self.stream.unget(data)
- self.state = self.scriptDataDoubleEscapedState
- return True
-
- def scriptDataDoubleEscapeEndState(self):
- data = self.stream.char()
- if data in (spaceCharacters | frozenset(("/", ">"))):
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- if self.temporaryBuffer.lower() == "script":
- self.state = self.scriptDataEscapedState
- else:
- self.state = self.scriptDataDoubleEscapedState
- elif data in asciiLetters:
- self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
- self.temporaryBuffer += data
- else:
- self.stream.unget(data)
- self.state = self.scriptDataDoubleEscapedState
- return True
-
- def beforeAttributeNameState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.stream.charsUntil(spaceCharacters, True)
- elif data in asciiLetters:
- self.currentToken["data"].append([data, ""])
- self.state = self.attributeNameState
- elif data == ">":
- self.emitCurrentToken()
- elif data == "/":
- self.state = self.selfClosingStartTagState
- elif data in ("'", '"', "=", "<"):
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "invalid-character-in-attribute-name"})
- self.currentToken["data"].append([data, ""])
- self.state = self.attributeNameState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"].append(["\uFFFD", ""])
- self.state = self.attributeNameState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-attribute-name-but-got-eof"})
- self.state = self.dataState
- else:
- self.currentToken["data"].append([data, ""])
- self.state = self.attributeNameState
- return True
-
- def attributeNameState(self):
- data = self.stream.char()
- leavingThisState = True
- emitToken = False
- if data == "=":
- self.state = self.beforeAttributeValueState
- elif data in asciiLetters:
- self.currentToken["data"][-1][0] += data +\
- self.stream.charsUntil(asciiLetters, True)
- leavingThisState = False
- elif data == ">":
- # XXX If we emit here the attributes are converted to a dict
- # without being checked and when the code below runs we error
- # because data is a dict not a list
- emitToken = True
- elif data in spaceCharacters:
- self.state = self.afterAttributeNameState
- elif data == "/":
- self.state = self.selfClosingStartTagState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"][-1][0] += "\uFFFD"
- leavingThisState = False
- elif data in ("'", '"', "<"):
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data":
- "invalid-character-in-attribute-name"})
- self.currentToken["data"][-1][0] += data
- leavingThisState = False
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "eof-in-attribute-name"})
- self.state = self.dataState
- else:
- self.currentToken["data"][-1][0] += data
- leavingThisState = False
-
- if leavingThisState:
- # Attributes are not dropped at this stage. That happens when the
- # start tag token is emitted so values can still be safely appended
- # to attributes, but we do want to report the parse error in time.
- if self.lowercaseAttrName:
- self.currentToken["data"][-1][0] = (
- self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
- for name, value in self.currentToken["data"][:-1]:
- if self.currentToken["data"][-1][0] == name:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "duplicate-attribute"})
- break
- # XXX Fix for above XXX
- if emitToken:
- self.emitCurrentToken()
- return True
-
- def afterAttributeNameState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.stream.charsUntil(spaceCharacters, True)
- elif data == "=":
- self.state = self.beforeAttributeValueState
- elif data == ">":
- self.emitCurrentToken()
- elif data in asciiLetters:
- self.currentToken["data"].append([data, ""])
- self.state = self.attributeNameState
- elif data == "/":
- self.state = self.selfClosingStartTagState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"].append(["\uFFFD", ""])
- self.state = self.attributeNameState
- elif data in ("'", '"', "<"):
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "invalid-character-after-attribute-name"})
- self.currentToken["data"].append([data, ""])
- self.state = self.attributeNameState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-end-of-tag-but-got-eof"})
- self.state = self.dataState
- else:
- self.currentToken["data"].append([data, ""])
- self.state = self.attributeNameState
- return True
-
- def beforeAttributeValueState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.stream.charsUntil(spaceCharacters, True)
- elif data == "\"":
- self.state = self.attributeValueDoubleQuotedState
- elif data == "&":
- self.state = self.attributeValueUnQuotedState
- self.stream.unget(data)
- elif data == "'":
- self.state = self.attributeValueSingleQuotedState
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-attribute-value-but-got-right-bracket"})
- self.emitCurrentToken()
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"][-1][1] += "\uFFFD"
- self.state = self.attributeValueUnQuotedState
- elif data in ("=", "<", "`"):
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "equals-in-unquoted-attribute-value"})
- self.currentToken["data"][-1][1] += data
- self.state = self.attributeValueUnQuotedState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-attribute-value-but-got-eof"})
- self.state = self.dataState
- else:
- self.currentToken["data"][-1][1] += data
- self.state = self.attributeValueUnQuotedState
- return True
-
- def attributeValueDoubleQuotedState(self):
- data = self.stream.char()
- if data == "\"":
- self.state = self.afterAttributeValueState
- elif data == "&":
- self.processEntityInAttribute('"')
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"][-1][1] += "\uFFFD"
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-attribute-value-double-quote"})
- self.state = self.dataState
- else:
- self.currentToken["data"][-1][1] += data +\
- self.stream.charsUntil(("\"", "&", "\u0000"))
- return True
-
- def attributeValueSingleQuotedState(self):
- data = self.stream.char()
- if data == "'":
- self.state = self.afterAttributeValueState
- elif data == "&":
- self.processEntityInAttribute("'")
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"][-1][1] += "\uFFFD"
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-attribute-value-single-quote"})
- self.state = self.dataState
- else:
- self.currentToken["data"][-1][1] += data +\
- self.stream.charsUntil(("'", "&", "\u0000"))
- return True
-
- def attributeValueUnQuotedState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.state = self.beforeAttributeNameState
- elif data == "&":
- self.processEntityInAttribute(">")
- elif data == ">":
- self.emitCurrentToken()
- elif data in ('"', "'", "=", "<", "`"):
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-character-in-unquoted-attribute-value"})
- self.currentToken["data"][-1][1] += data
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"][-1][1] += "\uFFFD"
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-attribute-value-no-quotes"})
- self.state = self.dataState
- else:
- self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
- frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters)
- return True
-
- def afterAttributeValueState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.state = self.beforeAttributeNameState
- elif data == ">":
- self.emitCurrentToken()
- elif data == "/":
- self.state = self.selfClosingStartTagState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-EOF-after-attribute-value"})
- self.stream.unget(data)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-character-after-attribute-value"})
- self.stream.unget(data)
- self.state = self.beforeAttributeNameState
- return True
-
- def selfClosingStartTagState(self):
- data = self.stream.char()
- if data == ">":
- self.currentToken["selfClosing"] = True
- self.emitCurrentToken()
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data":
- "unexpected-EOF-after-solidus-in-tag"})
- self.stream.unget(data)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-character-after-solidus-in-tag"})
- self.stream.unget(data)
- self.state = self.beforeAttributeNameState
- return True
-
- def bogusCommentState(self):
- # Make a new comment token and give it as value all the characters
- # until the first > or EOF (charsUntil checks for EOF automatically)
- # and emit it.
- data = self.stream.charsUntil(">")
- data = data.replace("\u0000", "\uFFFD")
- self.tokenQueue.append(
- {"type": tokenTypes["Comment"], "data": data})
-
- # Eat the character directly after the bogus comment which is either a
- # ">" or an EOF.
- self.stream.char()
- self.state = self.dataState
- return True
-
- def markupDeclarationOpenState(self):
- charStack = [self.stream.char()]
- if charStack[-1] == "-":
- charStack.append(self.stream.char())
- if charStack[-1] == "-":
- self.currentToken = {"type": tokenTypes["Comment"], "data": ""}
- self.state = self.commentStartState
- return True
- elif charStack[-1] in ('d', 'D'):
- matched = True
- for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'),
- ('y', 'Y'), ('p', 'P'), ('e', 'E')):
- charStack.append(self.stream.char())
- if charStack[-1] not in expected:
- matched = False
- break
- if matched:
- self.currentToken = {"type": tokenTypes["Doctype"],
- "name": "",
- "publicId": None, "systemId": None,
- "correct": True}
- self.state = self.doctypeState
- return True
- elif (charStack[-1] == "[" and
- self.parser is not None and
- self.parser.tree.openElements and
- self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
- matched = True
- for expected in ["C", "D", "A", "T", "A", "["]:
- charStack.append(self.stream.char())
- if charStack[-1] != expected:
- matched = False
- break
- if matched:
- self.state = self.cdataSectionState
- return True
-
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-dashes-or-doctype"})
-
- while charStack:
- self.stream.unget(charStack.pop())
- self.state = self.bogusCommentState
- return True
-
- def commentStartState(self):
- data = self.stream.char()
- if data == "-":
- self.state = self.commentStartDashState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"] += "\uFFFD"
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "incorrect-comment"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-comment"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["data"] += data
- self.state = self.commentState
- return True
-
- def commentStartDashState(self):
- data = self.stream.char()
- if data == "-":
- self.state = self.commentEndState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"] += "-\uFFFD"
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "incorrect-comment"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-comment"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["data"] += "-" + data
- self.state = self.commentState
- return True
-
- def commentState(self):
- data = self.stream.char()
- if data == "-":
- self.state = self.commentEndDashState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"] += "\uFFFD"
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "eof-in-comment"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["data"] += data + \
- self.stream.charsUntil(("-", "\u0000"))
- return True
-
- def commentEndDashState(self):
- data = self.stream.char()
- if data == "-":
- self.state = self.commentEndState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"] += "-\uFFFD"
- self.state = self.commentState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-comment-end-dash"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["data"] += "-" + data
- self.state = self.commentState
- return True
-
- def commentEndState(self):
- data = self.stream.char()
- if data == ">":
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"] += "--\uFFFD"
- self.state = self.commentState
- elif data == "!":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-bang-after-double-dash-in-comment"})
- self.state = self.commentEndBangState
- elif data == "-":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-dash-after-double-dash-in-comment"})
- self.currentToken["data"] += data
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-comment-double-dash"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- # XXX
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-comment"})
- self.currentToken["data"] += "--" + data
- self.state = self.commentState
- return True
-
- def commentEndBangState(self):
- data = self.stream.char()
- if data == ">":
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data == "-":
- self.currentToken["data"] += "--!"
- self.state = self.commentEndDashState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["data"] += "--!\uFFFD"
- self.state = self.commentState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-comment-end-bang-state"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["data"] += "--!" + data
- self.state = self.commentState
- return True
-
- def doctypeState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.state = self.beforeDoctypeNameState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-doctype-name-but-got-eof"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "need-space-after-doctype"})
- self.stream.unget(data)
- self.state = self.beforeDoctypeNameState
- return True
-
- def beforeDoctypeNameState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- pass
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-doctype-name-but-got-right-bracket"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["name"] = "\uFFFD"
- self.state = self.doctypeNameState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-doctype-name-but-got-eof"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["name"] = data
- self.state = self.doctypeNameState
- return True
-
- def doctypeNameState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
- self.state = self.afterDoctypeNameState
- elif data == ">":
- self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["name"] += "\uFFFD"
- self.state = self.doctypeNameState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype-name"})
- self.currentToken["correct"] = False
- self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["name"] += data
- return True
-
- def afterDoctypeNameState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- pass
- elif data == ">":
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.currentToken["correct"] = False
- self.stream.unget(data)
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- if data in ("p", "P"):
- matched = True
- for expected in (("u", "U"), ("b", "B"), ("l", "L"),
- ("i", "I"), ("c", "C")):
- data = self.stream.char()
- if data not in expected:
- matched = False
- break
- if matched:
- self.state = self.afterDoctypePublicKeywordState
- return True
- elif data in ("s", "S"):
- matched = True
- for expected in (("y", "Y"), ("s", "S"), ("t", "T"),
- ("e", "E"), ("m", "M")):
- data = self.stream.char()
- if data not in expected:
- matched = False
- break
- if matched:
- self.state = self.afterDoctypeSystemKeywordState
- return True
-
- # All the characters read before the current 'data' will be
- # [a-zA-Z], so they're garbage in the bogus doctype and can be
- # discarded; only the latest character might be '>' or EOF
- # and needs to be ungetted
- self.stream.unget(data)
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "expected-space-or-right-bracket-in-doctype", "datavars":
- {"data": data}})
- self.currentToken["correct"] = False
- self.state = self.bogusDoctypeState
-
- return True
-
- def afterDoctypePublicKeywordState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.state = self.beforeDoctypePublicIdentifierState
- elif data in ("'", '"'):
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.stream.unget(data)
- self.state = self.beforeDoctypePublicIdentifierState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.stream.unget(data)
- self.state = self.beforeDoctypePublicIdentifierState
- return True
-
- def beforeDoctypePublicIdentifierState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- pass
- elif data == "\"":
- self.currentToken["publicId"] = ""
- self.state = self.doctypePublicIdentifierDoubleQuotedState
- elif data == "'":
- self.currentToken["publicId"] = ""
- self.state = self.doctypePublicIdentifierSingleQuotedState
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-end-of-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.currentToken["correct"] = False
- self.state = self.bogusDoctypeState
- return True
-
- def doctypePublicIdentifierDoubleQuotedState(self):
- data = self.stream.char()
- if data == "\"":
- self.state = self.afterDoctypePublicIdentifierState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["publicId"] += "\uFFFD"
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-end-of-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["publicId"] += data
- return True
-
- def doctypePublicIdentifierSingleQuotedState(self):
- data = self.stream.char()
- if data == "'":
- self.state = self.afterDoctypePublicIdentifierState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["publicId"] += "\uFFFD"
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-end-of-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["publicId"] += data
- return True
-
- def afterDoctypePublicIdentifierState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.state = self.betweenDoctypePublicAndSystemIdentifiersState
- elif data == ">":
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data == '"':
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.currentToken["systemId"] = ""
- self.state = self.doctypeSystemIdentifierDoubleQuotedState
- elif data == "'":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.currentToken["systemId"] = ""
- self.state = self.doctypeSystemIdentifierSingleQuotedState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.currentToken["correct"] = False
- self.state = self.bogusDoctypeState
- return True
-
- def betweenDoctypePublicAndSystemIdentifiersState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- pass
- elif data == ">":
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data == '"':
- self.currentToken["systemId"] = ""
- self.state = self.doctypeSystemIdentifierDoubleQuotedState
- elif data == "'":
- self.currentToken["systemId"] = ""
- self.state = self.doctypeSystemIdentifierSingleQuotedState
- elif data == EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.currentToken["correct"] = False
- self.state = self.bogusDoctypeState
- return True
-
- def afterDoctypeSystemKeywordState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- self.state = self.beforeDoctypeSystemIdentifierState
- elif data in ("'", '"'):
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.stream.unget(data)
- self.state = self.beforeDoctypeSystemIdentifierState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.stream.unget(data)
- self.state = self.beforeDoctypeSystemIdentifierState
- return True
-
- def beforeDoctypeSystemIdentifierState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- pass
- elif data == "\"":
- self.currentToken["systemId"] = ""
- self.state = self.doctypeSystemIdentifierDoubleQuotedState
- elif data == "'":
- self.currentToken["systemId"] = ""
- self.state = self.doctypeSystemIdentifierSingleQuotedState
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.currentToken["correct"] = False
- self.state = self.bogusDoctypeState
- return True
-
- def doctypeSystemIdentifierDoubleQuotedState(self):
- data = self.stream.char()
- if data == "\"":
- self.state = self.afterDoctypeSystemIdentifierState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["systemId"] += "\uFFFD"
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-end-of-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["systemId"] += data
- return True
-
- def doctypeSystemIdentifierSingleQuotedState(self):
- data = self.stream.char()
- if data == "'":
- self.state = self.afterDoctypeSystemIdentifierState
- elif data == "\u0000":
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- self.currentToken["systemId"] += "\uFFFD"
- elif data == ">":
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-end-of-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.currentToken["systemId"] += data
- return True
-
- def afterDoctypeSystemIdentifierState(self):
- data = self.stream.char()
- if data in spaceCharacters:
- pass
- elif data == ">":
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "eof-in-doctype"})
- self.currentToken["correct"] = False
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
- "unexpected-char-in-doctype"})
- self.state = self.bogusDoctypeState
- return True
-
- def bogusDoctypeState(self):
- data = self.stream.char()
- if data == ">":
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- elif data is EOF:
- # XXX EMIT
- self.stream.unget(data)
- self.tokenQueue.append(self.currentToken)
- self.state = self.dataState
- else:
- pass
- return True
-
- def cdataSectionState(self):
- data = []
- while True:
- data.append(self.stream.charsUntil("]"))
- data.append(self.stream.charsUntil(">"))
- char = self.stream.char()
- if char == EOF:
- break
- else:
- assert char == ">"
- if data[-1][-2:] == "]]":
- data[-1] = data[-1][:-2]
- break
- else:
- data.append(char)
-
- data = "".join(data)
- # Deal with null here rather than in the parser
- nullCount = data.count("\u0000")
- if nullCount > 0:
- for i in range(nullCount):
- self.tokenQueue.append({"type": tokenTypes["ParseError"],
- "data": "invalid-codepoint"})
- data = data.replace("\u0000", "\uFFFD")
- if data:
- self.tokenQueue.append({"type": tokenTypes["Characters"],
- "data": data})
- self.state = self.dataState
- return True
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py
deleted file mode 100644
index 6a6b2a4..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""A collection of modules for building different kinds of tree from
-HTML documents.
-
-To create a treebuilder for a new type of tree, you need to do
-implement several things:
-
-1) A set of classes for various types of elements: Document, Doctype,
-Comment, Element. These must implement the interface of
-_base.treebuilders.Node (although comment nodes have a different
-signature for their constructor, see treebuilders.etree.Comment)
-Textual content may also be implemented as another node type, or not, as
-your tree implementation requires.
-
-2) A treebuilder object (called TreeBuilder by convention) that
-inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
-documentClass - the class to use for the bottommost node of a document
-elementClass - the class to use for HTML Elements
-commentClass - the class to use for comments
-doctypeClass - the class to use for doctypes
-It also has one required method:
-getDocument - Returns the root node of the complete document tree
-
-3) If you wish to run the unit tests, you must also create a
-testSerializer method on your treebuilder which accepts a node and
-returns a string containing Node and its children serialized according
-to the format used in the unittests
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from ..utils import default_etree
-
-treeBuilderCache = {}
-
-
-def getTreeBuilder(treeType, implementation=None, **kwargs):
- """Get a TreeBuilder class for various types of tree with built-in support
-
- treeType - the name of the tree type required (case-insensitive). Supported
- values are:
-
- "dom" - A generic builder for DOM implementations, defaulting to
- a xml.dom.minidom based implementation.
- "etree" - A generic builder for tree implementations exposing an
- ElementTree-like interface, defaulting to
- xml.etree.cElementTree if available and
- xml.etree.ElementTree if not.
- "lxml" - A etree-based builder for lxml.etree, handling
- limitations of lxml's implementation.
-
- implementation - (Currently applies to the "etree" and "dom" tree types). A
- module implementing the tree type e.g.
- xml.etree.ElementTree or xml.etree.cElementTree."""
-
- treeType = treeType.lower()
- if treeType not in treeBuilderCache:
- if treeType == "dom":
- from . import dom
- # Come up with a sane default (pref. from the stdlib)
- if implementation is None:
- from xml.dom import minidom
- implementation = minidom
- # NEVER cache here, caching is done in the dom submodule
- return dom.getDomModule(implementation, **kwargs).TreeBuilder
- elif treeType == "lxml":
- from . import etree_lxml
- treeBuilderCache[treeType] = etree_lxml.TreeBuilder
- elif treeType == "etree":
- from . import etree
- if implementation is None:
- implementation = default_etree
- # NEVER cache here, caching is done in the etree submodule
- return etree.getETreeModule(implementation, **kwargs).TreeBuilder
- else:
- raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
- return treeBuilderCache.get(treeType)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py
deleted file mode 100644
index 970c9ad..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/_base.py
+++ /dev/null
@@ -1,377 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-from ..constants import scopingElements, tableInsertModeElements, namespaces
-
-# The scope markers are inserted when entering object elements,
-# marquees, table cells, and table captions, and are used to prevent formatting
-# from "leaking" into tables, object elements, and marquees.
-Marker = None
-
-listElementsMap = {
- None: (frozenset(scopingElements), False),
- "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
- "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
- (namespaces["html"], "ul")])), False),
- "table": (frozenset([(namespaces["html"], "html"),
- (namespaces["html"], "table")]), False),
- "select": (frozenset([(namespaces["html"], "optgroup"),
- (namespaces["html"], "option")]), True)
-}
-
-
-class Node(object):
- def __init__(self, name):
- """Node representing an item in the tree.
- name - The tag name associated with the node
- parent - The parent of the current node (or None for the document node)
- value - The value of the current node (applies to text nodes and
- comments
- attributes - a dict holding name, value pairs for attributes of the node
- childNodes - a list of child nodes of the current node. This must
- include all elements but not necessarily other node types
- _flags - A list of miscellaneous flags that can be set on the node
- """
- self.name = name
- self.parent = None
- self.value = None
- self.attributes = {}
- self.childNodes = []
- self._flags = []
-
- def __str__(self):
- attributesStr = " ".join(["%s=\"%s\"" % (name, value)
- for name, value in
- self.attributes.items()])
- if attributesStr:
- return "<%s %s>" % (self.name, attributesStr)
- else:
- return "<%s>" % (self.name)
-
- def __repr__(self):
- return "<%s>" % (self.name)
-
- def appendChild(self, node):
- """Insert node as a child of the current node
- """
- raise NotImplementedError
-
- def insertText(self, data, insertBefore=None):
- """Insert data as text in the current node, positioned before the
- start of node insertBefore or to the end of the node's text.
- """
- raise NotImplementedError
-
- def insertBefore(self, node, refNode):
- """Insert node as a child of the current node, before refNode in the
- list of child nodes. Raises ValueError if refNode is not a child of
- the current node"""
- raise NotImplementedError
-
- def removeChild(self, node):
- """Remove node from the children of the current node
- """
- raise NotImplementedError
-
- def reparentChildren(self, newParent):
- """Move all the children of the current node to newParent.
- This is needed so that trees that don't store text as nodes move the
- text in the correct way
- """
- # XXX - should this method be made more general?
- for child in self.childNodes:
- newParent.appendChild(child)
- self.childNodes = []
-
- def cloneNode(self):
- """Return a shallow copy of the current node i.e. a node with the same
- name and attributes but with no parent or child nodes
- """
- raise NotImplementedError
-
- def hasContent(self):
- """Return true if the node has children or text, false otherwise
- """
- raise NotImplementedError
-
-
-class ActiveFormattingElements(list):
- def append(self, node):
- equalCount = 0
- if node != Marker:
- for element in self[::-1]:
- if element == Marker:
- break
- if self.nodesEqual(element, node):
- equalCount += 1
- if equalCount == 3:
- self.remove(element)
- break
- list.append(self, node)
-
- def nodesEqual(self, node1, node2):
- if not node1.nameTuple == node2.nameTuple:
- return False
-
- if not node1.attributes == node2.attributes:
- return False
-
- return True
-
-
-class TreeBuilder(object):
- """Base treebuilder implementation
- documentClass - the class to use for the bottommost node of a document
- elementClass - the class to use for HTML Elements
- commentClass - the class to use for comments
- doctypeClass - the class to use for doctypes
- """
-
- # Document class
- documentClass = None
-
- # The class to use for creating a node
- elementClass = None
-
- # The class to use for creating comments
- commentClass = None
-
- # The class to use for creating doctypes
- doctypeClass = None
-
- # Fragment class
- fragmentClass = None
-
- def __init__(self, namespaceHTMLElements):
- if namespaceHTMLElements:
- self.defaultNamespace = "http://www.w3.org/1999/xhtml"
- else:
- self.defaultNamespace = None
- self.reset()
-
- def reset(self):
- self.openElements = []
- self.activeFormattingElements = ActiveFormattingElements()
-
- # XXX - rename these to headElement, formElement
- self.headPointer = None
- self.formPointer = None
-
- self.insertFromTable = False
-
- self.document = self.documentClass()
-
- def elementInScope(self, target, variant=None):
-
- # If we pass a node in we match that. if we pass a string
- # match any node with that name
- exactNode = hasattr(target, "nameTuple")
-
- listElements, invert = listElementsMap[variant]
-
- for node in reversed(self.openElements):
- if (node.name == target and not exactNode or
- node == target and exactNode):
- return True
- elif (invert ^ (node.nameTuple in listElements)):
- return False
-
- assert False # We should never reach this point
-
- def reconstructActiveFormattingElements(self):
- # Within this algorithm the order of steps described in the
- # specification is not quite the same as the order of steps in the
- # code. It should still do the same though.
-
- # Step 1: stop the algorithm when there's nothing to do.
- if not self.activeFormattingElements:
- return
-
- # Step 2 and step 3: we start with the last element. So i is -1.
- i = len(self.activeFormattingElements) - 1
- entry = self.activeFormattingElements[i]
- if entry == Marker or entry in self.openElements:
- return
-
- # Step 6
- while entry != Marker and entry not in self.openElements:
- if i == 0:
- # This will be reset to 0 below
- i = -1
- break
- i -= 1
- # Step 5: let entry be one earlier in the list.
- entry = self.activeFormattingElements[i]
-
- while True:
- # Step 7
- i += 1
-
- # Step 8
- entry = self.activeFormattingElements[i]
- clone = entry.cloneNode() # Mainly to get a new copy of the attributes
-
- # Step 9
- element = self.insertElement({"type": "StartTag",
- "name": clone.name,
- "namespace": clone.namespace,
- "data": clone.attributes})
-
- # Step 10
- self.activeFormattingElements[i] = element
-
- # Step 11
- if element == self.activeFormattingElements[-1]:
- break
-
- def clearActiveFormattingElements(self):
- entry = self.activeFormattingElements.pop()
- while self.activeFormattingElements and entry != Marker:
- entry = self.activeFormattingElements.pop()
-
- def elementInActiveFormattingElements(self, name):
- """Check if an element exists between the end of the active
- formatting elements and the last marker. If it does, return it, else
- return false"""
-
- for item in self.activeFormattingElements[::-1]:
- # Check for Marker first because if it's a Marker it doesn't have a
- # name attribute.
- if item == Marker:
- break
- elif item.name == name:
- return item
- return False
-
- def insertRoot(self, token):
- element = self.createElement(token)
- self.openElements.append(element)
- self.document.appendChild(element)
-
- def insertDoctype(self, token):
- name = token["name"]
- publicId = token["publicId"]
- systemId = token["systemId"]
-
- doctype = self.doctypeClass(name, publicId, systemId)
- self.document.appendChild(doctype)
-
- def insertComment(self, token, parent=None):
- if parent is None:
- parent = self.openElements[-1]
- parent.appendChild(self.commentClass(token["data"]))
-
- def createElement(self, token):
- """Create an element but don't insert it anywhere"""
- name = token["name"]
- namespace = token.get("namespace", self.defaultNamespace)
- element = self.elementClass(name, namespace)
- element.attributes = token["data"]
- return element
-
- def _getInsertFromTable(self):
- return self._insertFromTable
-
- def _setInsertFromTable(self, value):
- """Switch the function used to insert an element from the
- normal one to the misnested table one and back again"""
- self._insertFromTable = value
- if value:
- self.insertElement = self.insertElementTable
- else:
- self.insertElement = self.insertElementNormal
-
- insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
-
- def insertElementNormal(self, token):
- name = token["name"]
- assert isinstance(name, text_type), "Element %s not unicode" % name
- namespace = token.get("namespace", self.defaultNamespace)
- element = self.elementClass(name, namespace)
- element.attributes = token["data"]
- self.openElements[-1].appendChild(element)
- self.openElements.append(element)
- return element
-
- def insertElementTable(self, token):
- """Create an element and insert it into the tree"""
- element = self.createElement(token)
- if self.openElements[-1].name not in tableInsertModeElements:
- return self.insertElementNormal(token)
- else:
- # We should be in the InTable mode. This means we want to do
- # special magic element rearranging
- parent, insertBefore = self.getTableMisnestedNodePosition()
- if insertBefore is None:
- parent.appendChild(element)
- else:
- parent.insertBefore(element, insertBefore)
- self.openElements.append(element)
- return element
-
- def insertText(self, data, parent=None):
- """Insert text data."""
- if parent is None:
- parent = self.openElements[-1]
-
- if (not self.insertFromTable or (self.insertFromTable and
- self.openElements[-1].name
- not in tableInsertModeElements)):
- parent.insertText(data)
- else:
- # We should be in the InTable mode. This means we want to do
- # special magic element rearranging
- parent, insertBefore = self.getTableMisnestedNodePosition()
- parent.insertText(data, insertBefore)
-
- def getTableMisnestedNodePosition(self):
- """Get the foster parent element, and sibling to insert before
- (or None) when inserting a misnested table node"""
- # The foster parent element is the one which comes before the most
- # recently opened table element
- # XXX - this is really inelegant
- lastTable = None
- fosterParent = None
- insertBefore = None
- for elm in self.openElements[::-1]:
- if elm.name == "table":
- lastTable = elm
- break
- if lastTable:
- # XXX - we should really check that this parent is actually a
- # node here
- if lastTable.parent:
- fosterParent = lastTable.parent
- insertBefore = lastTable
- else:
- fosterParent = self.openElements[
- self.openElements.index(lastTable) - 1]
- else:
- fosterParent = self.openElements[0]
- return fosterParent, insertBefore
-
- def generateImpliedEndTags(self, exclude=None):
- name = self.openElements[-1].name
- # XXX td, th and tr are not actually needed
- if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
- and name != exclude):
- self.openElements.pop()
- # XXX This is not entirely what the specification says. We should
- # investigate it more closely.
- self.generateImpliedEndTags(exclude)
-
- def getDocument(self):
- "Return the final tree"
- return self.document
-
- def getFragment(self):
- "Return the final fragment"
- # assert self.innerHTML
- fragment = self.fragmentClass()
- self.openElements[0].reparentChildren(fragment)
- return fragment
-
- def testSerializer(self, node):
- """Serialize the subtree of node in the format required by unit tests
- node - the node from which to start serializing"""
- raise NotImplementedError
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py
deleted file mode 100644
index f9e0d76..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/dom.py
+++ /dev/null
@@ -1,290 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
-import weakref
-
-from . import _base
-from .. import constants
-from ..constants import namespaces
-from ..utils import moduleFactoryFactory
-
-
-def getDomBuilder(DomImplementation):
- Dom = DomImplementation
-
- class AttrList(object):
- def __init__(self, element):
- self.element = element
-
- def __iter__(self):
- return list(self.element.attributes.items()).__iter__()
-
- def __setitem__(self, name, value):
- self.element.setAttribute(name, value)
-
- def __len__(self):
- return len(list(self.element.attributes.items()))
-
- def items(self):
- return [(item[0], item[1]) for item in
- list(self.element.attributes.items())]
-
- def keys(self):
- return list(self.element.attributes.keys())
-
- def __getitem__(self, name):
- return self.element.getAttribute(name)
-
- def __contains__(self, name):
- if isinstance(name, tuple):
- raise NotImplementedError
- else:
- return self.element.hasAttribute(name)
-
- class NodeBuilder(_base.Node):
- def __init__(self, element):
- _base.Node.__init__(self, element.nodeName)
- self.element = element
-
- namespace = property(lambda self: hasattr(self.element, "namespaceURI")
- and self.element.namespaceURI or None)
-
- def appendChild(self, node):
- node.parent = self
- self.element.appendChild(node.element)
-
- def insertText(self, data, insertBefore=None):
- text = self.element.ownerDocument.createTextNode(data)
- if insertBefore:
- self.element.insertBefore(text, insertBefore.element)
- else:
- self.element.appendChild(text)
-
- def insertBefore(self, node, refNode):
- self.element.insertBefore(node.element, refNode.element)
- node.parent = self
-
- def removeChild(self, node):
- if node.element.parentNode == self.element:
- self.element.removeChild(node.element)
- node.parent = None
-
- def reparentChildren(self, newParent):
- while self.element.hasChildNodes():
- child = self.element.firstChild
- self.element.removeChild(child)
- newParent.element.appendChild(child)
- self.childNodes = []
-
- def getAttributes(self):
- return AttrList(self.element)
-
- def setAttributes(self, attributes):
- if attributes:
- for name, value in list(attributes.items()):
- if isinstance(name, tuple):
- if name[0] is not None:
- qualifiedName = (name[0] + ":" + name[1])
- else:
- qualifiedName = name[1]
- self.element.setAttributeNS(name[2], qualifiedName,
- value)
- else:
- self.element.setAttribute(
- name, value)
- attributes = property(getAttributes, setAttributes)
-
- def cloneNode(self):
- return NodeBuilder(self.element.cloneNode(False))
-
- def hasContent(self):
- return self.element.hasChildNodes()
-
- def getNameTuple(self):
- if self.namespace is None:
- return namespaces["html"], self.name
- else:
- return self.namespace, self.name
-
- nameTuple = property(getNameTuple)
-
- class TreeBuilder(_base.TreeBuilder):
- def documentClass(self):
- self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
- return weakref.proxy(self)
-
- def insertDoctype(self, token):
- name = token["name"]
- publicId = token["publicId"]
- systemId = token["systemId"]
-
- domimpl = Dom.getDOMImplementation()
- doctype = domimpl.createDocumentType(name, publicId, systemId)
- self.document.appendChild(NodeBuilder(doctype))
- if Dom == minidom:
- doctype.ownerDocument = self.dom
-
- def elementClass(self, name, namespace=None):
- if namespace is None and self.defaultNamespace is None:
- node = self.dom.createElement(name)
- else:
- node = self.dom.createElementNS(namespace, name)
-
- return NodeBuilder(node)
-
- def commentClass(self, data):
- return NodeBuilder(self.dom.createComment(data))
-
- def fragmentClass(self):
- return NodeBuilder(self.dom.createDocumentFragment())
-
- def appendChild(self, node):
- self.dom.appendChild(node.element)
-
- def testSerializer(self, element):
- return testSerializer(element)
-
- def getDocument(self):
- return self.dom
-
- def getFragment(self):
- return _base.TreeBuilder.getFragment(self).element
-
- def insertText(self, data, parent=None):
- data = data
- if parent != self:
- _base.TreeBuilder.insertText(self, data, parent)
- else:
- # HACK: allow text nodes as children of the document node
- if hasattr(self.dom, '_child_node_types'):
- if not Node.TEXT_NODE in self.dom._child_node_types:
- self.dom._child_node_types = list(self.dom._child_node_types)
- self.dom._child_node_types.append(Node.TEXT_NODE)
- self.dom.appendChild(self.dom.createTextNode(data))
-
- implementation = DomImplementation
- name = None
-
- def testSerializer(element):
- element.normalize()
- rv = []
-
- def serializeElement(element, indent=0):
- if element.nodeType == Node.DOCUMENT_TYPE_NODE:
- if element.name:
- if element.publicId or element.systemId:
- publicId = element.publicId or ""
- systemId = element.systemId or ""
- rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
- (' ' * indent, element.name, publicId, systemId))
- else:
- rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
- else:
- rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
- elif element.nodeType == Node.DOCUMENT_NODE:
- rv.append("#document")
- elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
- rv.append("#document-fragment")
- elif element.nodeType == Node.COMMENT_NODE:
- rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
- elif element.nodeType == Node.TEXT_NODE:
- rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
- else:
- if (hasattr(element, "namespaceURI") and
- element.namespaceURI is not None):
- name = "%s %s" % (constants.prefixes[element.namespaceURI],
- element.nodeName)
- else:
- name = element.nodeName
- rv.append("|%s<%s>" % (' ' * indent, name))
- if element.hasAttributes():
- attributes = []
- for i in range(len(element.attributes)):
- attr = element.attributes.item(i)
- name = attr.nodeName
- value = attr.value
- ns = attr.namespaceURI
- if ns:
- name = "%s %s" % (constants.prefixes[ns], attr.localName)
- else:
- name = attr.nodeName
- attributes.append((name, value))
-
- for name, value in sorted(attributes):
- rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
- indent += 2
- for child in element.childNodes:
- serializeElement(child, indent)
- serializeElement(element, 0)
-
- return "\n".join(rv)
-
- def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}):
- if node.nodeType == Node.ELEMENT_NODE:
- if not nsmap:
- handler.startElement(node.nodeName, node.attributes)
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
- handler.endElement(node.nodeName)
- else:
- attributes = dict(node.attributes.itemsNS())
-
- # gather namespace declarations
- prefixes = []
- for attrname in list(node.attributes.keys()):
- attr = node.getAttributeNode(attrname)
- if (attr.namespaceURI == XMLNS_NAMESPACE or
- (attr.namespaceURI is None and attr.nodeName.startswith('xmlns'))):
- prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
- handler.startPrefixMapping(prefix, attr.nodeValue)
- prefixes.append(prefix)
- nsmap = nsmap.copy()
- nsmap[prefix] = attr.nodeValue
- del attributes[(attr.namespaceURI, attr.nodeName)]
-
- # apply namespace declarations
- for attrname in list(node.attributes.keys()):
- attr = node.getAttributeNode(attrname)
- if attr.namespaceURI is None and ':' in attr.nodeName:
- prefix = attr.nodeName.split(':')[0]
- if prefix in nsmap:
- del attributes[(attr.namespaceURI, attr.nodeName)]
- attributes[(nsmap[prefix], attr.nodeName)] = attr.nodeValue
-
- # SAX events
- ns = node.namespaceURI or nsmap.get(None, None)
- handler.startElementNS((ns, node.nodeName), node.nodeName, attributes)
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
- handler.endElementNS((ns, node.nodeName), node.nodeName)
- for prefix in prefixes:
- handler.endPrefixMapping(prefix)
-
- elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
- handler.characters(node.nodeValue)
-
- elif node.nodeType == Node.DOCUMENT_NODE:
- handler.startDocument()
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
- handler.endDocument()
-
- elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
- for child in node.childNodes:
- dom2sax(child, handler, nsmap)
-
- else:
- # ATTRIBUTE_NODE
- # ENTITY_NODE
- # PROCESSING_INSTRUCTION_NODE
- # COMMENT_NODE
- # DOCUMENT_TYPE_NODE
- # NOTATION_NODE
- pass
-
- return locals()
-
-
-# The actual means to get a module!
-getDomModule = moduleFactoryFactory(getDomBuilder)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py
deleted file mode 100644
index 48fead7..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree.py
+++ /dev/null
@@ -1,337 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-import re
-
-from . import _base
-from .. import ihatexml
-from .. import constants
-from ..constants import namespaces
-from ..utils import moduleFactoryFactory
-
-tag_regexp = re.compile("{([^}]*)}(.*)")
-
-
-def getETreeBuilder(ElementTreeImplementation, fullTree=False):
- ElementTree = ElementTreeImplementation
- ElementTreeCommentType = ElementTree.Comment("asd").tag
-
- class Element(_base.Node):
- def __init__(self, name, namespace=None):
- self._name = name
- self._namespace = namespace
- self._element = ElementTree.Element(self._getETreeTag(name,
- namespace))
- if namespace is None:
- self.nameTuple = namespaces["html"], self._name
- else:
- self.nameTuple = self._namespace, self._name
- self.parent = None
- self._childNodes = []
- self._flags = []
-
- def _getETreeTag(self, name, namespace):
- if namespace is None:
- etree_tag = name
- else:
- etree_tag = "{%s}%s" % (namespace, name)
- return etree_tag
-
- def _setName(self, name):
- self._name = name
- self._element.tag = self._getETreeTag(self._name, self._namespace)
-
- def _getName(self):
- return self._name
-
- name = property(_getName, _setName)
-
- def _setNamespace(self, namespace):
- self._namespace = namespace
- self._element.tag = self._getETreeTag(self._name, self._namespace)
-
- def _getNamespace(self):
- return self._namespace
-
- namespace = property(_getNamespace, _setNamespace)
-
- def _getAttributes(self):
- return self._element.attrib
-
- def _setAttributes(self, attributes):
- # Delete existing attributes first
- # XXX - there may be a better way to do this...
- for key in list(self._element.attrib.keys()):
- del self._element.attrib[key]
- for key, value in attributes.items():
- if isinstance(key, tuple):
- name = "{%s}%s" % (key[2], key[1])
- else:
- name = key
- self._element.set(name, value)
-
- attributes = property(_getAttributes, _setAttributes)
-
- def _getChildNodes(self):
- return self._childNodes
-
- def _setChildNodes(self, value):
- del self._element[:]
- self._childNodes = []
- for element in value:
- self.insertChild(element)
-
- childNodes = property(_getChildNodes, _setChildNodes)
-
- def hasContent(self):
- """Return true if the node has children or text"""
- return bool(self._element.text or len(self._element))
-
- def appendChild(self, node):
- self._childNodes.append(node)
- self._element.append(node._element)
- node.parent = self
-
- def insertBefore(self, node, refNode):
- index = list(self._element).index(refNode._element)
- self._element.insert(index, node._element)
- node.parent = self
-
- def removeChild(self, node):
- self._element.remove(node._element)
- node.parent = None
-
- def insertText(self, data, insertBefore=None):
- if not(len(self._element)):
- if not self._element.text:
- self._element.text = ""
- self._element.text += data
- elif insertBefore is None:
- # Insert the text as the tail of the last child element
- if not self._element[-1].tail:
- self._element[-1].tail = ""
- self._element[-1].tail += data
- else:
- # Insert the text before the specified node
- children = list(self._element)
- index = children.index(insertBefore._element)
- if index > 0:
- if not self._element[index - 1].tail:
- self._element[index - 1].tail = ""
- self._element[index - 1].tail += data
- else:
- if not self._element.text:
- self._element.text = ""
- self._element.text += data
-
- def cloneNode(self):
- element = type(self)(self.name, self.namespace)
- for name, value in self.attributes.items():
- element.attributes[name] = value
- return element
-
- def reparentChildren(self, newParent):
- if newParent.childNodes:
- newParent.childNodes[-1]._element.tail += self._element.text
- else:
- if not newParent._element.text:
- newParent._element.text = ""
- if self._element.text is not None:
- newParent._element.text += self._element.text
- self._element.text = ""
- _base.Node.reparentChildren(self, newParent)
-
- class Comment(Element):
- def __init__(self, data):
- # Use the superclass constructor to set all properties on the
- # wrapper element
- self._element = ElementTree.Comment(data)
- self.parent = None
- self._childNodes = []
- self._flags = []
-
- def _getData(self):
- return self._element.text
-
- def _setData(self, value):
- self._element.text = value
-
- data = property(_getData, _setData)
-
- class DocumentType(Element):
- def __init__(self, name, publicId, systemId):
- Element.__init__(self, "<!DOCTYPE>")
- self._element.text = name
- self.publicId = publicId
- self.systemId = systemId
-
- def _getPublicId(self):
- return self._element.get("publicId", "")
-
- def _setPublicId(self, value):
- if value is not None:
- self._element.set("publicId", value)
-
- publicId = property(_getPublicId, _setPublicId)
-
- def _getSystemId(self):
- return self._element.get("systemId", "")
-
- def _setSystemId(self, value):
- if value is not None:
- self._element.set("systemId", value)
-
- systemId = property(_getSystemId, _setSystemId)
-
- class Document(Element):
- def __init__(self):
- Element.__init__(self, "DOCUMENT_ROOT")
-
- class DocumentFragment(Element):
- def __init__(self):
- Element.__init__(self, "DOCUMENT_FRAGMENT")
-
- def testSerializer(element):
- rv = []
-
- def serializeElement(element, indent=0):
- if not(hasattr(element, "tag")):
- element = element.getroot()
- if element.tag == "<!DOCTYPE>":
- if element.get("publicId") or element.get("systemId"):
- publicId = element.get("publicId") or ""
- systemId = element.get("systemId") or ""
- rv.append("""<!DOCTYPE %s "%s" "%s">""" %
- (element.text, publicId, systemId))
- else:
- rv.append("<!DOCTYPE %s>" % (element.text,))
- elif element.tag == "DOCUMENT_ROOT":
- rv.append("#document")
- if element.text is not None:
- rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
- if element.tail is not None:
- raise TypeError("Document node cannot have tail")
- if hasattr(element, "attrib") and len(element.attrib):
- raise TypeError("Document node cannot have attributes")
- elif element.tag == ElementTreeCommentType:
- rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
- else:
- assert isinstance(element.tag, text_type), \
- "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
- nsmatch = tag_regexp.match(element.tag)
-
- if nsmatch is None:
- name = element.tag
- else:
- ns, name = nsmatch.groups()
- prefix = constants.prefixes[ns]
- name = "%s %s" % (prefix, name)
- rv.append("|%s<%s>" % (' ' * indent, name))
-
- if hasattr(element, "attrib"):
- attributes = []
- for name, value in element.attrib.items():
- nsmatch = tag_regexp.match(name)
- if nsmatch is not None:
- ns, name = nsmatch.groups()
- prefix = constants.prefixes[ns]
- attr_string = "%s %s" % (prefix, name)
- else:
- attr_string = name
- attributes.append((attr_string, value))
-
- for name, value in sorted(attributes):
- rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
- if element.text:
- rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
- indent += 2
- for child in element:
- serializeElement(child, indent)
- if element.tail:
- rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
- serializeElement(element, 0)
-
- return "\n".join(rv)
-
- def tostring(element):
- """Serialize an element and its child nodes to a string"""
- rv = []
- filter = ihatexml.InfosetFilter()
-
- def serializeElement(element):
- if isinstance(element, ElementTree.ElementTree):
- element = element.getroot()
-
- if element.tag == "<!DOCTYPE>":
- if element.get("publicId") or element.get("systemId"):
- publicId = element.get("publicId") or ""
- systemId = element.get("systemId") or ""
- rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
- (element.text, publicId, systemId))
- else:
- rv.append("<!DOCTYPE %s>" % (element.text,))
- elif element.tag == "DOCUMENT_ROOT":
- if element.text is not None:
- rv.append(element.text)
- if element.tail is not None:
- raise TypeError("Document node cannot have tail")
- if hasattr(element, "attrib") and len(element.attrib):
- raise TypeError("Document node cannot have attributes")
-
- for child in element:
- serializeElement(child)
-
- elif element.tag == ElementTreeCommentType:
- rv.append("<!--%s-->" % (element.text,))
- else:
- # This is assumed to be an ordinary element
- if not element.attrib:
- rv.append("<%s>" % (filter.fromXmlName(element.tag),))
- else:
- attr = " ".join(["%s=\"%s\"" % (
- filter.fromXmlName(name), value)
- for name, value in element.attrib.items()])
- rv.append("<%s %s>" % (element.tag, attr))
- if element.text:
- rv.append(element.text)
-
- for child in element:
- serializeElement(child)
-
- rv.append("</%s>" % (element.tag,))
-
- if element.tail:
- rv.append(element.tail)
-
- serializeElement(element)
-
- return "".join(rv)
-
- class TreeBuilder(_base.TreeBuilder):
- documentClass = Document
- doctypeClass = DocumentType
- elementClass = Element
- commentClass = Comment
- fragmentClass = DocumentFragment
- implementation = ElementTreeImplementation
-
- def testSerializer(self, element):
- return testSerializer(element)
-
- def getDocument(self):
- if fullTree:
- return self.document._element
- else:
- if self.defaultNamespace is not None:
- return self.document._element.find(
- "{%s}html" % self.defaultNamespace)
- else:
- return self.document._element.find("html")
-
- def getFragment(self):
- return _base.TreeBuilder.getFragment(self)._element
-
- return locals()
-
-
-getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py
deleted file mode 100644
index 35d08ef..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py
+++ /dev/null
@@ -1,369 +0,0 @@
-"""Module for supporting the lxml.etree library. The idea here is to use as much
-of the native library as possible, without using fragile hacks like custom element
-names that break between releases. The downside of this is that we cannot represent
-all possible trees; specifically the following are known to cause problems:
-
-Text or comments as siblings of the root element
-Docypes with no name
-
-When any of these things occur, we emit a DataLossWarning
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-import warnings
-import re
-import sys
-
-from . import _base
-from ..constants import DataLossWarning
-from .. import constants
-from . import etree as etree_builders
-from .. import ihatexml
-
-import lxml.etree as etree
-
-
-fullTree = True
-tag_regexp = re.compile("{([^}]*)}(.*)")
-
-comment_type = etree.Comment("asd").tag
-
-
-class DocumentType(object):
- def __init__(self, name, publicId, systemId):
- self.name = name
- self.publicId = publicId
- self.systemId = systemId
-
-
-class Document(object):
- def __init__(self):
- self._elementTree = None
- self._childNodes = []
-
- def appendChild(self, element):
- self._elementTree.getroot().addnext(element._element)
-
- def _getChildNodes(self):
- return self._childNodes
-
- childNodes = property(_getChildNodes)
-
-
-def testSerializer(element):
- rv = []
- finalText = None
- infosetFilter = ihatexml.InfosetFilter()
-
- def serializeElement(element, indent=0):
- if not hasattr(element, "tag"):
- if hasattr(element, "getroot"):
- # Full tree case
- rv.append("#document")
- if element.docinfo.internalDTD:
- if not (element.docinfo.public_id or
- element.docinfo.system_url):
- dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
- else:
- dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
- element.docinfo.root_name,
- element.docinfo.public_id,
- element.docinfo.system_url)
- rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
- next_element = element.getroot()
- while next_element.getprevious() is not None:
- next_element = next_element.getprevious()
- while next_element is not None:
- serializeElement(next_element, indent + 2)
- next_element = next_element.getnext()
- elif isinstance(element, str) or isinstance(element, bytes):
- # Text in a fragment
- assert isinstance(element, str) or sys.version_info.major == 2
- rv.append("|%s\"%s\"" % (' ' * indent, element))
- else:
- # Fragment case
- rv.append("#document-fragment")
- for next_element in element:
- serializeElement(next_element, indent + 2)
- elif element.tag == comment_type:
- rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
- if hasattr(element, "tail") and element.tail:
- rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
- else:
- assert isinstance(element, etree._Element)
- nsmatch = etree_builders.tag_regexp.match(element.tag)
- if nsmatch is not None:
- ns = nsmatch.group(1)
- tag = nsmatch.group(2)
- prefix = constants.prefixes[ns]
- rv.append("|%s<%s %s>" % (' ' * indent, prefix,
- infosetFilter.fromXmlName(tag)))
- else:
- rv.append("|%s<%s>" % (' ' * indent,
- infosetFilter.fromXmlName(element.tag)))
-
- if hasattr(element, "attrib"):
- attributes = []
- for name, value in element.attrib.items():
- nsmatch = tag_regexp.match(name)
- if nsmatch is not None:
- ns, name = nsmatch.groups()
- name = infosetFilter.fromXmlName(name)
- prefix = constants.prefixes[ns]
- attr_string = "%s %s" % (prefix, name)
- else:
- attr_string = infosetFilter.fromXmlName(name)
- attributes.append((attr_string, value))
-
- for name, value in sorted(attributes):
- rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
-
- if element.text:
- rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
- indent += 2
- for child in element:
- serializeElement(child, indent)
- if hasattr(element, "tail") and element.tail:
- rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
- serializeElement(element, 0)
-
- if finalText is not None:
- rv.append("|%s\"%s\"" % (' ' * 2, finalText))
-
- return "\n".join(rv)
-
-
-def tostring(element):
- """Serialize an element and its child nodes to a string"""
- rv = []
- finalText = None
-
- def serializeElement(element):
- if not hasattr(element, "tag"):
- if element.docinfo.internalDTD:
- if element.docinfo.doctype:
- dtd_str = element.docinfo.doctype
- else:
- dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
- rv.append(dtd_str)
- serializeElement(element.getroot())
-
- elif element.tag == comment_type:
- rv.append("<!--%s-->" % (element.text,))
-
- else:
- # This is assumed to be an ordinary element
- if not element.attrib:
- rv.append("<%s>" % (element.tag,))
- else:
- attr = " ".join(["%s=\"%s\"" % (name, value)
- for name, value in element.attrib.items()])
- rv.append("<%s %s>" % (element.tag, attr))
- if element.text:
- rv.append(element.text)
-
- for child in element:
- serializeElement(child)
-
- rv.append("</%s>" % (element.tag,))
-
- if hasattr(element, "tail") and element.tail:
- rv.append(element.tail)
-
- serializeElement(element)
-
- if finalText is not None:
- rv.append("%s\"" % (' ' * 2, finalText))
-
- return "".join(rv)
-
-
-class TreeBuilder(_base.TreeBuilder):
- documentClass = Document
- doctypeClass = DocumentType
- elementClass = None
- commentClass = None
- fragmentClass = Document
- implementation = etree
-
- def __init__(self, namespaceHTMLElements, fullTree=False):
- builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
- self.namespaceHTMLElements = namespaceHTMLElements
-
- class Attributes(dict):
- def __init__(self, element, value={}):
- self._element = element
- dict.__init__(self, value)
- for key, value in self.items():
- if isinstance(key, tuple):
- name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
- else:
- name = infosetFilter.coerceAttribute(key)
- self._element._element.attrib[name] = value
-
- def __setitem__(self, key, value):
- dict.__setitem__(self, key, value)
- if isinstance(key, tuple):
- name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
- else:
- name = infosetFilter.coerceAttribute(key)
- self._element._element.attrib[name] = value
-
- class Element(builder.Element):
- def __init__(self, name, namespace):
- name = infosetFilter.coerceElement(name)
- builder.Element.__init__(self, name, namespace=namespace)
- self._attributes = Attributes(self)
-
- def _setName(self, name):
- self._name = infosetFilter.coerceElement(name)
- self._element.tag = self._getETreeTag(
- self._name, self._namespace)
-
- def _getName(self):
- return infosetFilter.fromXmlName(self._name)
-
- name = property(_getName, _setName)
-
- def _getAttributes(self):
- return self._attributes
-
- def _setAttributes(self, attributes):
- self._attributes = Attributes(self, attributes)
-
- attributes = property(_getAttributes, _setAttributes)
-
- def insertText(self, data, insertBefore=None):
- data = infosetFilter.coerceCharacters(data)
- builder.Element.insertText(self, data, insertBefore)
-
- def appendChild(self, child):
- builder.Element.appendChild(self, child)
-
- class Comment(builder.Comment):
- def __init__(self, data):
- data = infosetFilter.coerceComment(data)
- builder.Comment.__init__(self, data)
-
- def _setData(self, data):
- data = infosetFilter.coerceComment(data)
- self._element.text = data
-
- def _getData(self):
- return self._element.text
-
- data = property(_getData, _setData)
-
- self.elementClass = Element
- self.commentClass = builder.Comment
- # self.fragmentClass = builder.DocumentFragment
- _base.TreeBuilder.__init__(self, namespaceHTMLElements)
-
- def reset(self):
- _base.TreeBuilder.reset(self)
- self.insertComment = self.insertCommentInitial
- self.initial_comments = []
- self.doctype = None
-
- def testSerializer(self, element):
- return testSerializer(element)
-
- def getDocument(self):
- if fullTree:
- return self.document._elementTree
- else:
- return self.document._elementTree.getroot()
-
- def getFragment(self):
- fragment = []
- element = self.openElements[0]._element
- if element.text:
- fragment.append(element.text)
- fragment.extend(list(element))
- if element.tail:
- fragment.append(element.tail)
- return fragment
-
- def insertDoctype(self, token):
- name = token["name"]
- publicId = token["publicId"]
- systemId = token["systemId"]
-
- if not name:
- warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
- self.doctype = None
- else:
- coercedName = self.infosetFilter.coerceElement(name)
- if coercedName != name:
- warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
-
- doctype = self.doctypeClass(coercedName, publicId, systemId)
- self.doctype = doctype
-
- def insertCommentInitial(self, data, parent=None):
- self.initial_comments.append(data)
-
- def insertCommentMain(self, data, parent=None):
- if (parent == self.document and
- self.document._elementTree.getroot()[-1].tag == comment_type):
- warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
- super(TreeBuilder, self).insertComment(data, parent)
-
- def insertRoot(self, token):
- """Create the document root"""
- # Because of the way libxml2 works, it doesn't seem to be possible to
- # alter information like the doctype after the tree has been parsed.
- # Therefore we need to use the built-in parser to create our iniial
- # tree, after which we can add elements like normal
- docStr = ""
- if self.doctype:
- assert self.doctype.name
- docStr += "<!DOCTYPE %s" % self.doctype.name
- if (self.doctype.publicId is not None or
- self.doctype.systemId is not None):
- docStr += (' PUBLIC "%s" ' %
- (self.infosetFilter.coercePubid(self.doctype.publicId or "")))
- if self.doctype.systemId:
- sysid = self.doctype.systemId
- if sysid.find("'") >= 0 and sysid.find('"') >= 0:
- warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
- sysid = sysid.replace("'", 'U00027')
- if sysid.find("'") >= 0:
- docStr += '"%s"' % sysid
- else:
- docStr += "'%s'" % sysid
- else:
- docStr += "''"
- docStr += ">"
- if self.doctype.name != token["name"]:
- warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
- docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
- root = etree.fromstring(docStr)
-
- # Append the initial comments:
- for comment_token in self.initial_comments:
- root.addprevious(etree.Comment(comment_token["data"]))
-
- # Create the root document and add the ElementTree to it
- self.document = self.documentClass()
- self.document._elementTree = root.getroottree()
-
- # Give the root element the right name
- name = token["name"]
- namespace = token.get("namespace", self.defaultNamespace)
- if namespace is None:
- etree_tag = name
- else:
- etree_tag = "{%s}%s" % (namespace, name)
- root.tag = etree_tag
-
- # Add the root element to the internal child/open data structures
- root_element = self.elementClass(name, namespace)
- root_element._element = root
- self.document._childNodes.append(root_element)
- self.openElements.append(root_element)
-
- # Reset to the default insert comment function
- self.insertComment = self.insertCommentMain
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py
deleted file mode 100644
index 18124e7..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""A collection of modules for iterating through different kinds of
-tree, generating tokens identical to those produced by the tokenizer
-module.
-
-To create a tree walker for a new type of tree, you need to do
-implement a tree walker object (called TreeWalker by convention) that
-implements a 'serialize' method taking a tree as sole argument and
-returning an iterator generating tokens.
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-
-from ..utils import default_etree
-
-treeWalkerCache = {}
-
-
-def getTreeWalker(treeType, implementation=None, **kwargs):
- """Get a TreeWalker class for various types of tree with built-in support
-
- treeType - the name of the tree type required (case-insensitive). Supported
- values are:
-
- "dom" - The xml.dom.minidom DOM implementation
- "pulldom" - The xml.dom.pulldom event stream
- "etree" - A generic walker for tree implementations exposing an
- elementtree-like interface (known to work with
- ElementTree, cElementTree and lxml.etree).
- "lxml" - Optimized walker for lxml.etree
- "genshi" - a Genshi stream
-
- implementation - (Currently applies to the "etree" tree type only). A module
- implementing the tree type e.g. xml.etree.ElementTree or
- cElementTree."""
-
- treeType = treeType.lower()
- if treeType not in treeWalkerCache:
- if treeType in ("dom", "pulldom"):
- name = "%s.%s" % (__name__, treeType)
- __import__(name)
- mod = sys.modules[name]
- treeWalkerCache[treeType] = mod.TreeWalker
- elif treeType == "genshi":
- from . import genshistream
- treeWalkerCache[treeType] = genshistream.TreeWalker
- elif treeType == "lxml":
- from . import lxmletree
- treeWalkerCache[treeType] = lxmletree.TreeWalker
- elif treeType == "etree":
- from . import etree
- if implementation is None:
- implementation = default_etree
- # XXX: NEVER cache here, caching is done in the etree submodule
- return etree.getETreeModule(implementation, **kwargs).TreeWalker
- return treeWalkerCache.get(treeType)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py
deleted file mode 100644
index a202359..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/_base.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-import gettext
-_ = gettext.gettext
-
-from ..constants import voidElements, spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-class TreeWalker(object):
- def __init__(self, tree):
- self.tree = tree
-
- def __iter__(self):
- raise NotImplementedError
-
- def error(self, msg):
- return {"type": "SerializeError", "data": msg}
-
- def emptyTag(self, namespace, name, attrs, hasChildren=False):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
- for (namespace, name), value in attrs.items())
-
- yield {"type": "EmptyTag", "name": name,
- "namespace": namespace,
- "data": attrs}
- if hasChildren:
- yield self.error(_("Void element has children"))
-
- def startTag(self, namespace, name, attrs):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(name)
- assert all((namespace is None or isinstance(namespace, text_type)) and
- isinstance(name, text_type) and
- isinstance(value, text_type)
- for (namespace, name), value in attrs.items())
-
- return {"type": "StartTag",
- "name": name,
- "namespace": namespace,
- "data": attrs}
-
- def endTag(self, namespace, name):
- assert namespace is None or isinstance(namespace, text_type), type(namespace)
- assert isinstance(name, text_type), type(namespace)
-
- return {"type": "EndTag",
- "name": name,
- "namespace": namespace,
- "data": {}}
-
- def text(self, data):
- assert isinstance(data, text_type), type(data)
-
- data = data
- middle = data.lstrip(spaceCharacters)
- left = data[:len(data) - len(middle)]
- if left:
- yield {"type": "SpaceCharacters", "data": left}
- data = middle
- middle = data.rstrip(spaceCharacters)
- right = data[len(middle):]
- if middle:
- yield {"type": "Characters", "data": middle}
- if right:
- yield {"type": "SpaceCharacters", "data": right}
-
- def comment(self, data):
- assert isinstance(data, text_type), type(data)
-
- return {"type": "Comment", "data": data}
-
- def doctype(self, name, publicId=None, systemId=None, correct=True):
- assert name is None or isinstance(name, text_type), type(name)
- assert publicId is None or isinstance(publicId, text_type), type(publicId)
- assert systemId is None or isinstance(systemId, text_type), type(systemId)
-
- return {"type": "Doctype",
- "name": name if name is not None else "",
- "publicId": publicId,
- "systemId": systemId,
- "correct": correct}
-
- def entity(self, name):
- assert isinstance(name, text_type), type(name)
-
- return {"type": "Entity", "name": name}
-
- def unknown(self, nodeType):
- return self.error(_("Unknown node type: ") + nodeType)
-
-
-class RecursiveTreeWalker(TreeWalker):
- def walkChildren(self, node):
- raise NotImplementedError
-
- def element(self, node, namespace, name, attrs, hasChildren):
- if name in voidElements:
- for token in self.emptyTag(namespace, name, attrs, hasChildren):
- yield token
- else:
- yield self.startTag(name, attrs)
- if hasChildren:
- for token in self.walkChildren(node):
- yield token
- yield self.endTag(name)
-
-from xml.dom import Node
-
-DOCUMENT = Node.DOCUMENT_NODE
-DOCTYPE = Node.DOCUMENT_TYPE_NODE
-TEXT = Node.TEXT_NODE
-ELEMENT = Node.ELEMENT_NODE
-COMMENT = Node.COMMENT_NODE
-ENTITY = Node.ENTITY_NODE
-UNKNOWN = "<#UNKNOWN#>"
-
-
-class NonRecursiveTreeWalker(TreeWalker):
- def getNodeDetails(self, node):
- raise NotImplementedError
-
- def getFirstChild(self, node):
- raise NotImplementedError
-
- def getNextSibling(self, node):
- raise NotImplementedError
-
- def getParentNode(self, node):
- raise NotImplementedError
-
- def __iter__(self):
- currentNode = self.tree
- while currentNode is not None:
- details = self.getNodeDetails(currentNode)
- type, details = details[0], details[1:]
- hasChildren = False
-
- if type == DOCTYPE:
- yield self.doctype(*details)
-
- elif type == TEXT:
- for token in self.text(*details):
- yield token
-
- elif type == ELEMENT:
- namespace, name, attributes, hasChildren = details
- if name in voidElements:
- for token in self.emptyTag(namespace, name, attributes,
- hasChildren):
- yield token
- hasChildren = False
- else:
- yield self.startTag(namespace, name, attributes)
-
- elif type == COMMENT:
- yield self.comment(details[0])
-
- elif type == ENTITY:
- yield self.entity(details[0])
-
- elif type == DOCUMENT:
- hasChildren = True
-
- else:
- yield self.unknown(details[0])
-
- if hasChildren:
- firstChild = self.getFirstChild(currentNode)
- else:
- firstChild = None
-
- if firstChild is not None:
- currentNode = firstChild
- else:
- while currentNode is not None:
- details = self.getNodeDetails(currentNode)
- type, details = details[0], details[1:]
- if type == ELEMENT:
- namespace, name, attributes, hasChildren = details
- if name not in voidElements:
- yield self.endTag(namespace, name)
- if self.tree is currentNode:
- currentNode = None
- break
- nextSibling = self.getNextSibling(currentNode)
- if nextSibling is not None:
- currentNode = nextSibling
- break
- else:
- currentNode = self.getParentNode(currentNode)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py
deleted file mode 100644
index a01287a..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/dom.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom import Node
-
-import gettext
-_ = gettext.gettext
-
-from . import _base
-
-
-class TreeWalker(_base.NonRecursiveTreeWalker):
- def getNodeDetails(self, node):
- if node.nodeType == Node.DOCUMENT_TYPE_NODE:
- return _base.DOCTYPE, node.name, node.publicId, node.systemId
-
- elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
- return _base.TEXT, node.nodeValue
-
- elif node.nodeType == Node.ELEMENT_NODE:
- attrs = {}
- for attr in list(node.attributes.keys()):
- attr = node.getAttributeNode(attr)
- if attr.namespaceURI:
- attrs[(attr.namespaceURI, attr.localName)] = attr.value
- else:
- attrs[(None, attr.name)] = attr.value
- return (_base.ELEMENT, node.namespaceURI, node.nodeName,
- attrs, node.hasChildNodes())
-
- elif node.nodeType == Node.COMMENT_NODE:
- return _base.COMMENT, node.nodeValue
-
- elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
- return (_base.DOCUMENT,)
-
- else:
- return _base.UNKNOWN, node.nodeType
-
- def getFirstChild(self, node):
- return node.firstChild
-
- def getNextSibling(self, node):
- return node.nextSibling
-
- def getParentNode(self, node):
- return node.parentNode
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py
deleted file mode 100644
index 88fb981..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/etree.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import gettext
-_ = gettext.gettext
-
-import re
-
-from pip._vendor.six import text_type
-
-from . import _base
-from ..utils import moduleFactoryFactory
-
-tag_regexp = re.compile("{([^}]*)}(.*)")
-
-
-def getETreeBuilder(ElementTreeImplementation):
- ElementTree = ElementTreeImplementation
- ElementTreeCommentType = ElementTree.Comment("asd").tag
-
- class TreeWalker(_base.NonRecursiveTreeWalker):
- """Given the particular ElementTree representation, this implementation,
- to avoid using recursion, returns "nodes" as tuples with the following
- content:
-
- 1. The current element
-
- 2. The index of the element relative to its parent
-
- 3. A stack of ancestor elements
-
- 4. A flag "text", "tail" or None to indicate if the current node is a
- text node; either the text or tail of the current element (1)
- """
- def getNodeDetails(self, node):
- if isinstance(node, tuple): # It might be the root Element
- elt, key, parents, flag = node
- if flag in ("text", "tail"):
- return _base.TEXT, getattr(elt, flag)
- else:
- node = elt
-
- if not(hasattr(node, "tag")):
- node = node.getroot()
-
- if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
- return (_base.DOCUMENT,)
-
- elif node.tag == "<!DOCTYPE>":
- return (_base.DOCTYPE, node.text,
- node.get("publicId"), node.get("systemId"))
-
- elif node.tag == ElementTreeCommentType:
- return _base.COMMENT, node.text
-
- else:
- assert type(node.tag) == text_type, type(node.tag)
- # This is assumed to be an ordinary element
- match = tag_regexp.match(node.tag)
- if match:
- namespace, tag = match.groups()
- else:
- namespace = None
- tag = node.tag
- attrs = {}
- for name, value in list(node.attrib.items()):
- match = tag_regexp.match(name)
- if match:
- attrs[(match.group(1), match.group(2))] = value
- else:
- attrs[(None, name)] = value
- return (_base.ELEMENT, namespace, tag,
- attrs, len(node) or node.text)
-
- def getFirstChild(self, node):
- if isinstance(node, tuple):
- element, key, parents, flag = node
- else:
- element, key, parents, flag = node, None, [], None
-
- if flag in ("text", "tail"):
- return None
- else:
- if element.text:
- return element, key, parents, "text"
- elif len(element):
- parents.append(element)
- return element[0], 0, parents, None
- else:
- return None
-
- def getNextSibling(self, node):
- if isinstance(node, tuple):
- element, key, parents, flag = node
- else:
- return None
-
- if flag == "text":
- if len(element):
- parents.append(element)
- return element[0], 0, parents, None
- else:
- return None
- else:
- if element.tail and flag != "tail":
- return element, key, parents, "tail"
- elif key < len(parents[-1]) - 1:
- return parents[-1][key + 1], key + 1, parents, None
- else:
- return None
-
- def getParentNode(self, node):
- if isinstance(node, tuple):
- element, key, parents, flag = node
- else:
- return None
-
- if flag == "text":
- if not parents:
- return element
- else:
- return element, key, parents, None
- else:
- parent = parents.pop()
- if not parents:
- return parent
- else:
- return parent, list(parents[-1]).index(parent), parents, None
-
- return locals()
-
-getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py
deleted file mode 100644
index f559c45..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/genshistream.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from genshi.core import QName
-from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
-from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
-
-from . import _base
-
-from ..constants import voidElements, namespaces
-
-
-class TreeWalker(_base.TreeWalker):
- def __iter__(self):
- # Buffer the events so we can pass in the following one
- previous = None
- for event in self.tree:
- if previous is not None:
- for token in self.tokens(previous, event):
- yield token
- previous = event
-
- # Don't forget the final event!
- if previous is not None:
- for token in self.tokens(previous, None):
- yield token
-
- def tokens(self, event, next):
- kind, data, pos = event
- if kind == START:
- tag, attribs = data
- name = tag.localname
- namespace = tag.namespace
- converted_attribs = {}
- for k, v in attribs:
- if isinstance(k, QName):
- converted_attribs[(k.namespace, k.localname)] = v
- else:
- converted_attribs[(None, k)] = v
-
- if namespace == namespaces["html"] and name in voidElements:
- for token in self.emptyTag(namespace, name, converted_attribs,
- not next or next[0] != END
- or next[1] != tag):
- yield token
- else:
- yield self.startTag(namespace, name, converted_attribs)
-
- elif kind == END:
- name = data.localname
- namespace = data.namespace
- if name not in voidElements:
- yield self.endTag(namespace, name)
-
- elif kind == COMMENT:
- yield self.comment(data)
-
- elif kind == TEXT:
- for token in self.text(data):
- yield token
-
- elif kind == DOCTYPE:
- yield self.doctype(*data)
-
- elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
- START_CDATA, END_CDATA, PI):
- pass
-
- else:
- yield self.unknown(kind)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py
deleted file mode 100644
index 4373383..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/lxmletree.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-from lxml import etree
-from ..treebuilders.etree import tag_regexp
-
-from gettext import gettext
-_ = gettext
-
-from . import _base
-
-from .. import ihatexml
-
-
-def ensure_str(s):
- if s is None:
- return None
- elif isinstance(s, text_type):
- return s
- else:
- return s.decode("utf-8", "strict")
-
-
-class Root(object):
- def __init__(self, et):
- self.elementtree = et
- self.children = []
- if et.docinfo.internalDTD:
- self.children.append(Doctype(self,
- ensure_str(et.docinfo.root_name),
- ensure_str(et.docinfo.public_id),
- ensure_str(et.docinfo.system_url)))
- root = et.getroot()
- node = root
-
- while node.getprevious() is not None:
- node = node.getprevious()
- while node is not None:
- self.children.append(node)
- node = node.getnext()
-
- self.text = None
- self.tail = None
-
- def __getitem__(self, key):
- return self.children[key]
-
- def getnext(self):
- return None
-
- def __len__(self):
- return 1
-
-
-class Doctype(object):
- def __init__(self, root_node, name, public_id, system_id):
- self.root_node = root_node
- self.name = name
- self.public_id = public_id
- self.system_id = system_id
-
- self.text = None
- self.tail = None
-
- def getnext(self):
- return self.root_node.children[1]
-
-
-class FragmentRoot(Root):
- def __init__(self, children):
- self.children = [FragmentWrapper(self, child) for child in children]
- self.text = self.tail = None
-
- def getnext(self):
- return None
-
-
-class FragmentWrapper(object):
- def __init__(self, fragment_root, obj):
- self.root_node = fragment_root
- self.obj = obj
- if hasattr(self.obj, 'text'):
- self.text = ensure_str(self.obj.text)
- else:
- self.text = None
- if hasattr(self.obj, 'tail'):
- self.tail = ensure_str(self.obj.tail)
- else:
- self.tail = None
- self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
- # Support for bytes here is Py2
- if self.isstring:
- self.obj = ensure_str(self.obj)
-
- def __getattr__(self, name):
- return getattr(self.obj, name)
-
- def getnext(self):
- siblings = self.root_node.children
- idx = siblings.index(self)
- if idx < len(siblings) - 1:
- return siblings[idx + 1]
- else:
- return None
-
- def __getitem__(self, key):
- return self.obj[key]
-
- def __bool__(self):
- return bool(self.obj)
-
- def getparent(self):
- return None
-
- def __str__(self):
- return str(self.obj)
-
- def __unicode__(self):
- return str(self.obj)
-
- def __len__(self):
- return len(self.obj)
-
-
-class TreeWalker(_base.NonRecursiveTreeWalker):
- def __init__(self, tree):
- if hasattr(tree, "getroot"):
- tree = Root(tree)
- elif isinstance(tree, list):
- tree = FragmentRoot(tree)
- _base.NonRecursiveTreeWalker.__init__(self, tree)
- self.filter = ihatexml.InfosetFilter()
-
- def getNodeDetails(self, node):
- if isinstance(node, tuple): # Text node
- node, key = node
- assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
- return _base.TEXT, ensure_str(getattr(node, key))
-
- elif isinstance(node, Root):
- return (_base.DOCUMENT,)
-
- elif isinstance(node, Doctype):
- return _base.DOCTYPE, node.name, node.public_id, node.system_id
-
- elif isinstance(node, FragmentWrapper) and node.isstring:
- return _base.TEXT, node.obj
-
- elif node.tag == etree.Comment:
- return _base.COMMENT, ensure_str(node.text)
-
- elif node.tag == etree.Entity:
- return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
-
- else:
- # This is assumed to be an ordinary element
- match = tag_regexp.match(ensure_str(node.tag))
- if match:
- namespace, tag = match.groups()
- else:
- namespace = None
- tag = ensure_str(node.tag)
- attrs = {}
- for name, value in list(node.attrib.items()):
- name = ensure_str(name)
- value = ensure_str(value)
- match = tag_regexp.match(name)
- if match:
- attrs[(match.group(1), match.group(2))] = value
- else:
- attrs[(None, name)] = value
- return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
- attrs, len(node) > 0 or node.text)
-
- def getFirstChild(self, node):
- assert not isinstance(node, tuple), _("Text nodes have no children")
-
- assert len(node) or node.text, "Node has no children"
- if node.text:
- return (node, "text")
- else:
- return node[0]
-
- def getNextSibling(self, node):
- if isinstance(node, tuple): # Text node
- node, key = node
- assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
- if key == "text":
- # XXX: we cannot use a "bool(node) and node[0] or None" construct here
- # because node[0] might evaluate to False if it has no child element
- if len(node):
- return node[0]
- else:
- return None
- else: # tail
- return node.getnext()
-
- return (node, "tail") if node.tail else node.getnext()
-
- def getParentNode(self, node):
- if isinstance(node, tuple): # Text node
- node, key = node
- assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
- if key == "text":
- return node
- # else: fallback to "normal" processing
-
- return node.getparent()
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py
deleted file mode 100644
index 0b0f515..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/treewalkers/pulldom.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
- COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
-
-from . import _base
-
-from ..constants import voidElements
-
-
-class TreeWalker(_base.TreeWalker):
- def __iter__(self):
- ignore_until = None
- previous = None
- for event in self.tree:
- if previous is not None and \
- (ignore_until is None or previous[1] is ignore_until):
- if previous[1] is ignore_until:
- ignore_until = None
- for token in self.tokens(previous, event):
- yield token
- if token["type"] == "EmptyTag":
- ignore_until = previous[1]
- previous = event
- if ignore_until is None or previous[1] is ignore_until:
- for token in self.tokens(previous, None):
- yield token
- elif ignore_until is not None:
- raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
-
- def tokens(self, event, next):
- type, node = event
- if type == START_ELEMENT:
- name = node.nodeName
- namespace = node.namespaceURI
- attrs = {}
- for attr in list(node.attributes.keys()):
- attr = node.getAttributeNode(attr)
- attrs[(attr.namespaceURI, attr.localName)] = attr.value
- if name in voidElements:
- for token in self.emptyTag(namespace,
- name,
- attrs,
- not next or next[1] is not node):
- yield token
- else:
- yield self.startTag(namespace, name, attrs)
-
- elif type == END_ELEMENT:
- name = node.nodeName
- namespace = node.namespaceURI
- if name not in voidElements:
- yield self.endTag(namespace, name)
-
- elif type == COMMENT:
- yield self.comment(node.nodeValue)
-
- elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
- for token in self.text(node.nodeValue):
- yield token
-
- else:
- yield self.unknown(type)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/__init__.py
deleted file mode 100644
index a8cca8a..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .py import Trie as PyTrie
-
-Trie = PyTrie
-
-try:
- from .datrie import Trie as DATrie
-except ImportError:
- pass
-else:
- Trie = DATrie
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/_base.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/_base.py
deleted file mode 100644
index 724486b..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/_base.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from collections import Mapping
-
-
-class Trie(Mapping):
- """Abstract base class for tries"""
-
- def keys(self, prefix=None):
- keys = super().keys()
-
- if prefix is None:
- return set(keys)
-
- # Python 2.6: no set comprehensions
- return set([x for x in keys if x.startswith(prefix)])
-
- def has_keys_with_prefix(self, prefix):
- for key in self.keys():
- if key.startswith(prefix):
- return True
-
- return False
-
- def longest_prefix(self, prefix):
- if prefix in self:
- return prefix
-
- for i in range(1, len(prefix) + 1):
- if prefix[:-i] in self:
- return prefix[:-i]
-
- raise KeyError(prefix)
-
- def longest_prefix_item(self, prefix):
- lprefix = self.longest_prefix(prefix)
- return (lprefix, self[lprefix])
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/datrie.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/datrie.py
deleted file mode 100644
index e2e5f86..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/datrie.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from datrie import Trie as DATrie
-from pip._vendor.six import text_type
-
-from ._base import Trie as ABCTrie
-
-
-class Trie(ABCTrie):
- def __init__(self, data):
- chars = set()
- for key in data.keys():
- if not isinstance(key, text_type):
- raise TypeError("All keys must be strings")
- for char in key:
- chars.add(char)
-
- self._data = DATrie("".join(chars))
- for key, value in data.items():
- self._data[key] = value
-
- def __contains__(self, key):
- return key in self._data
-
- def __len__(self):
- return len(self._data)
-
- def __iter__(self):
- raise NotImplementedError()
-
- def __getitem__(self, key):
- return self._data[key]
-
- def keys(self, prefix=None):
- return self._data.keys(prefix)
-
- def has_keys_with_prefix(self, prefix):
- return self._data.has_keys_with_prefix(prefix)
-
- def longest_prefix(self, prefix):
- return self._data.longest_prefix(prefix)
-
- def longest_prefix_item(self, prefix):
- return self._data.longest_prefix_item(prefix)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/py.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/py.py
deleted file mode 100644
index c178b21..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/trie/py.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-from bisect import bisect_left
-
-from ._base import Trie as ABCTrie
-
-
-class Trie(ABCTrie):
- def __init__(self, data):
- if not all(isinstance(x, text_type) for x in data.keys()):
- raise TypeError("All keys must be strings")
-
- self._data = data
- self._keys = sorted(data.keys())
- self._cachestr = ""
- self._cachepoints = (0, len(data))
-
- def __contains__(self, key):
- return key in self._data
-
- def __len__(self):
- return len(self._data)
-
- def __iter__(self):
- return iter(self._data)
-
- def __getitem__(self, key):
- return self._data[key]
-
- def keys(self, prefix=None):
- if prefix is None or prefix == "" or not self._keys:
- return set(self._keys)
-
- if prefix.startswith(self._cachestr):
- lo, hi = self._cachepoints
- start = i = bisect_left(self._keys, prefix, lo, hi)
- else:
- start = i = bisect_left(self._keys, prefix)
-
- keys = set()
- if start == len(self._keys):
- return keys
-
- while self._keys[i].startswith(prefix):
- keys.add(self._keys[i])
- i += 1
-
- self._cachestr = prefix
- self._cachepoints = (start, i)
-
- return keys
-
- def has_keys_with_prefix(self, prefix):
- if prefix in self._data:
- return True
-
- if prefix.startswith(self._cachestr):
- lo, hi = self._cachepoints
- i = bisect_left(self._keys, prefix, lo, hi)
- else:
- i = bisect_left(self._keys, prefix)
-
- if i == len(self._keys):
- return False
-
- return self._keys[i].startswith(prefix)
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/utils.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/utils.py
deleted file mode 100644
index 4e8559d..0000000
--- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/utils.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from types import ModuleType
-
-try:
- import xml.etree.cElementTree as default_etree
-except ImportError:
- import xml.etree.ElementTree as default_etree
-
-
-class MethodDispatcher(dict):
- """Dict with 2 special properties:
-
- On initiation, keys that are lists, sets or tuples are converted to
- multiple keys so accessing any one of the items in the original
- list-like object returns the matching value
-
- md = MethodDispatcher({("foo", "bar"):"baz"})
- md["foo"] == "baz"
-
- A default value which can be set through the default attribute.
- """
-
- def __init__(self, items=()):
- # Using _dictEntries instead of directly assigning to self is about
- # twice as fast. Please do careful performance testing before changing
- # anything here.
- _dictEntries = []
- for name, value in items:
- if type(name) in (list, tuple, frozenset, set):
- for item in name:
- _dictEntries.append((item, value))
- else:
- _dictEntries.append((name, value))
- dict.__init__(self, _dictEntries)
- self.default = None
-
- def __getitem__(self, key):
- return dict.get(self, key, self.default)
-
-
-# Some utility functions to dal with weirdness around UCS2 vs UCS4
-# python builds
-
-def isSurrogatePair(data):
- return (len(data) == 2 and
- ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
- ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
-
-
-def surrogatePairToCodepoint(data):
- char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
- (ord(data[1]) - 0xDC00))
- return char_val
-
-# Module Factory Factory (no, this isn't Java, I know)
-# Here to stop this being duplicated all over the place.
-
-
-def moduleFactoryFactory(factory):
- moduleCache = {}
-
- def moduleFactory(baseModule, *args, **kwargs):
- if isinstance(ModuleType.__name__, type("")):
- name = "_%s_factory" % baseModule.__name__
- else:
- name = b"_%s_factory" % baseModule.__name__
-
- if name in moduleCache:
- return moduleCache[name]
- else:
- mod = ModuleType(name)
- objs = factory(baseModule, *args, **kwargs)
- mod.__dict__.update(objs)
- moduleCache[name] = mod
- return mod
-
- return moduleFactory