From 04387deeab820e75c6d37d2ddd5b514cb7bcfd9e Mon Sep 17 00:00:00 2001 From: Pavel Aharoni Date: Sun, 11 Jun 2017 14:33:57 +0300 Subject: [SDC-32] separate Tosca Parser from DC Change-Id: I7e7f31ff2bd92fec22031f75b7051d129a21d01b Signed-off-by: Pavel Aharoni --- .../pip/_vendor/html5lib/serializer/__init__.py | 16 -- .../_vendor/html5lib/serializer/htmlserializer.py | 309 --------------------- 2 files changed, 325 deletions(-) delete mode 100644 jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py delete mode 100644 jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer') diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py deleted file mode 100644 index 8380839..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from .. import treewalkers - -from .htmlserializer import HTMLSerializer - - -def serialize(input, tree="etree", format="html", encoding=None, - **serializer_opts): - # XXX: Should we cache this? - walker = treewalkers.getTreeWalker(tree) - if format == "html": - s = HTMLSerializer(**serializer_opts) - else: - raise ValueError("type must be html") - return s.render(walker(input), encoding) diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py deleted file mode 100644 index 08b60df..0000000 --- a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/serializer/htmlserializer.py +++ /dev/null @@ -1,309 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type - -import gettext -_ = gettext.gettext - -try: - from functools import reduce -except ImportError: - pass - -from ..constants import voidElements, booleanAttributes, spaceCharacters -from ..constants import rcdataElements, entities, xmlEntities -from .. import utils -from xml.sax.saxutils import escape - -spaceCharacters = "".join(spaceCharacters) - -try: - from codecs import register_error, xmlcharrefreplace_errors -except ImportError: - unicode_encode_errors = "strict" -else: - unicode_encode_errors = "htmlentityreplace" - - encode_entity_map = {} - is_ucs4 = len("\U0010FFFF") == 1 - for k, v in list(entities.items()): - # skip multi-character entities - if ((is_ucs4 and len(v) > 1) or - (not is_ucs4 and len(v) > 2)): - continue - if v != "&": - if len(v) == 2: - v = utils.surrogatePairToCodepoint(v) - else: - v = ord(v) - if not v in encode_entity_map or k.islower(): - # prefer < over < and similarly for &, >, etc. - encode_entity_map[v] = k - - def htmlentityreplace_errors(exc): - if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): - res = [] - codepoints = [] - skip = False - for i, c in enumerate(exc.object[exc.start:exc.end]): - if skip: - skip = False - continue - index = i + exc.start - if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): - codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2]) - skip = True - else: - codepoint = ord(c) - codepoints.append(codepoint) - for cp in codepoints: - e = encode_entity_map.get(cp) - if e: - res.append("&") - res.append(e) - if not e.endswith(";"): - res.append(";") - else: - res.append("&#x%s;" % (hex(cp)[2:])) - return ("".join(res), exc.end) - else: - return xmlcharrefreplace_errors(exc) - - register_error(unicode_encode_errors, htmlentityreplace_errors) - - del register_error - - -class HTMLSerializer(object): - - # attribute quoting options - quote_attr_values = False - quote_char = '"' - use_best_quote_char = True - - # tag syntax options - omit_optional_tags = True - minimize_boolean_attributes = True - use_trailing_solidus = False - space_before_trailing_solidus = True - - # escaping options - escape_lt_in_attrs = False - escape_rcdata = False - resolve_entities = True - - # miscellaneous options - inject_meta_charset = True - strip_whitespace = False - sanitize = False - - options = ("quote_attr_values", "quote_char", "use_best_quote_char", - "minimize_boolean_attributes", "use_trailing_solidus", - "space_before_trailing_solidus", "omit_optional_tags", - "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs", - "escape_rcdata", "resolve_entities", "sanitize") - - def __init__(self, **kwargs): - """Initialize HTMLSerializer. - - Keyword options (default given first unless specified) include: - - inject_meta_charset=True|False - Whether it insert a meta element to define the character set of the - document. - quote_attr_values=True|False - Whether to quote attribute values that don't require quoting - per HTML5 parsing rules. - quote_char=u'"'|u"'" - Use given quote character for attribute quoting. Default is to - use double quote unless attribute value contains a double quote, - in which case single quotes are used instead. - escape_lt_in_attrs=False|True - Whether to escape < in attribute values. - escape_rcdata=False|True - Whether to escape characters that need to be escaped within normal - elements within rcdata elements such as style. - resolve_entities=True|False - Whether to resolve named character entities that appear in the - source tree. The XML predefined entities < > & " ' - are unaffected by this setting. - strip_whitespace=False|True - Whether to remove semantically meaningless whitespace. (This - compresses all whitespace to a single space except within pre.) - minimize_boolean_attributes=True|False - Shortens boolean attributes to give just the attribute value, - for example becomes . - use_trailing_solidus=False|True - Includes a close-tag slash at the end of the start tag of void - elements (empty elements whose end tag is forbidden). E.g.
. - space_before_trailing_solidus=True|False - Places a space immediately before the closing slash in a tag - using a trailing solidus. E.g.
. Requires use_trailing_solidus. - sanitize=False|True - Strip all unsafe or unknown constructs from output. - See `html5lib user documentation`_ - omit_optional_tags=True|False - Omit start/end tags that are optional. - - .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation - """ - if 'quote_char' in kwargs: - self.use_best_quote_char = False - for attr in self.options: - setattr(self, attr, kwargs.get(attr, getattr(self, attr))) - self.errors = [] - self.strict = False - - def encode(self, string): - assert(isinstance(string, text_type)) - if self.encoding: - return string.encode(self.encoding, unicode_encode_errors) - else: - return string - - def encodeStrict(self, string): - assert(isinstance(string, text_type)) - if self.encoding: - return string.encode(self.encoding, "strict") - else: - return string - - def serialize(self, treewalker, encoding=None): - self.encoding = encoding - in_cdata = False - self.errors = [] - if encoding and self.inject_meta_charset: - from ..filters.inject_meta_charset import Filter - treewalker = Filter(treewalker, encoding) - # XXX: WhitespaceFilter should be used before OptionalTagFilter - # for maximum efficiently of this latter filter - if self.strip_whitespace: - from ..filters.whitespace import Filter - treewalker = Filter(treewalker) - if self.sanitize: - from ..filters.sanitizer import Filter - treewalker = Filter(treewalker) - if self.omit_optional_tags: - from ..filters.optionaltags import Filter - treewalker = Filter(treewalker) - for token in treewalker: - type = token["type"] - if type == "Doctype": - doctype = "= 0: - if token["systemId"].find("'") >= 0: - self.serializeError(_("System identifer contains both single and double quote characters")) - quote_char = "'" - else: - quote_char = '"' - doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) - - doctype += ">" - yield self.encodeStrict(doctype) - - elif type in ("Characters", "SpaceCharacters"): - if type == "SpaceCharacters" or in_cdata: - if in_cdata and token["data"].find("= 0: - self.serializeError(_("Unexpected \"'=", False) - v = v.replace("&", "&") - if self.escape_lt_in_attrs: - v = v.replace("<", "<") - if quote_attr: - quote_char = self.quote_char - if self.use_best_quote_char: - if "'" in v and '"' not in v: - quote_char = '"' - elif '"' in v and "'" not in v: - quote_char = "'" - if quote_char == "'": - v = v.replace("'", "'") - else: - v = v.replace('"', """) - yield self.encodeStrict(quote_char) - yield self.encode(v) - yield self.encodeStrict(quote_char) - else: - yield self.encode(v) - if name in voidElements and self.use_trailing_solidus: - if self.space_before_trailing_solidus: - yield self.encodeStrict(" /") - else: - yield self.encodeStrict("/") - yield self.encode(">") - - elif type == "EndTag": - name = token["name"] - if name in rcdataElements: - in_cdata = False - elif in_cdata: - self.serializeError(_("Unexpected child element of a CDATA element")) - yield self.encodeStrict("" % name) - - elif type == "Comment": - data = token["data"] - if data.find("--") >= 0: - self.serializeError(_("Comment contains --")) - yield self.encodeStrict("" % token["data"]) - - elif type == "Entity": - name = token["name"] - key = name + ";" - if not key in entities: - self.serializeError(_("Entity %s not recognized" % name)) - if self.resolve_entities and key not in xmlEntities: - data = entities[key] - else: - data = "&%s;" % name - yield self.encodeStrict(data) - - else: - self.serializeError(token["data"]) - - def render(self, treewalker, encoding=None): - if encoding: - return b"".join(list(self.serialize(treewalker, encoding))) - else: - return "".join(list(self.serialize(treewalker))) - - def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): - # XXX The idea is to make data mandatory. - self.errors.append(data) - if self.strict: - raise SerializeError - - -def SerializeError(Exception): - """Error in serialized tree""" - pass -- cgit 1.2.3-korg