aboutsummaryrefslogtreecommitdiffstats
path: root/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py
diff options
context:
space:
mode:
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py')
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py205
1 files changed, 205 insertions, 0 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py
new file mode 100644
index 0000000..fefe0b3
--- /dev/null
+++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/pip/_vendor/html5lib/filters/optionaltags.py
@@ -0,0 +1,205 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import _base
+
+
+class Filter(_base.Filter):
+ def slider(self):
+ previous1 = previous2 = None
+ for token in self.source:
+ if previous1 is not None:
+ yield previous2, previous1, token
+ previous2 = previous1
+ previous1 = token
+ yield previous2, previous1, None
+
+ def __iter__(self):
+ for previous, token, next in self.slider():
+ type = token["type"]
+ if type == "StartTag":
+ if (token["data"] or
+ not self.is_optional_start(token["name"], previous, next)):
+ yield token
+ elif type == "EndTag":
+ if not self.is_optional_end(token["name"], next):
+ yield token
+ else:
+ yield token
+
+ def is_optional_start(self, tagname, previous, next):
+ type = next and next["type"] or None
+ if tagname in 'html':
+ # An html element's start tag may be omitted if the first thing
+ # inside the html element is not a space character or a comment.
+ return type not in ("Comment", "SpaceCharacters")
+ elif tagname == 'head':
+ # A head element's start tag may be omitted if the first thing
+ # inside the head element is an element.
+ # XXX: we also omit the start tag if the head element is empty
+ if type in ("StartTag", "EmptyTag"):
+ return True
+ elif type == "EndTag":
+ return next["name"] == "head"
+ elif tagname == 'body':
+ # A body element's start tag may be omitted if the first thing
+ # inside the body element is not a space character or a comment,
+ # except if the first thing inside the body element is a script
+ # or style element and the node immediately preceding the body
+ # element is a head element whose end tag has been omitted.
+ if type in ("Comment", "SpaceCharacters"):
+ return False
+ elif type == "StartTag":
+ # XXX: we do not look at the preceding event, so we never omit
+ # the body element's start tag if it's followed by a script or
+ # a style element.
+ return next["name"] not in ('script', 'style')
+ else:
+ return True
+ elif tagname == 'colgroup':
+ # A colgroup element's start tag may be omitted if the first thing
+ # inside the colgroup element is a col element, and if the element
+ # is not immediately preceeded by another colgroup element whose
+ # end tag has been omitted.
+ if type in ("StartTag", "EmptyTag"):
+ # XXX: we do not look at the preceding event, so instead we never
+ # omit the colgroup element's end tag when it is immediately
+ # followed by another colgroup element. See is_optional_end.
+ return next["name"] == "col"
+ else:
+ return False
+ elif tagname == 'tbody':
+ # A tbody element's start tag may be omitted if the first thing
+ # inside the tbody element is a tr element, and if the element is
+ # not immediately preceeded by a tbody, thead, or tfoot element
+ # whose end tag has been omitted.
+ if type == "StartTag":
+ # omit the thead and tfoot elements' end tag when they are
+ # immediately followed by a tbody element. See is_optional_end.
+ if previous and previous['type'] == 'EndTag' and \
+ previous['name'] in ('tbody', 'thead', 'tfoot'):
+ return False
+ return next["name"] == 'tr'
+ else:
+ return False
+ return False
+
+ def is_optional_end(self, tagname, next):
+ type = next and next["type"] or None
+ if tagname in ('html', 'head', 'body'):
+ # An html element's end tag may be omitted if the html element
+ # is not immediately followed by a space character or a comment.
+ return type not in ("Comment", "SpaceCharacters")
+ elif tagname in ('li', 'optgroup', 'tr'):
+ # A li element's end tag may be omitted if the li element is
+ # immediately followed by another li element or if there is
+ # no more content in the parent element.
+ # An optgroup element's end tag may be omitted if the optgroup
+ # element is immediately followed by another optgroup element,
+ # or if there is no more content in the parent element.
+ # A tr element's end tag may be omitted if the tr element is
+ # immediately followed by another tr element, or if there is
+ # no more content in the parent element.
+ if type == "StartTag":
+ return next["name"] == tagname
+ else:
+ return type == "EndTag" or type is None
+ elif tagname in ('dt', 'dd'):
+ # A dt element's end tag may be omitted if the dt element is
+ # immediately followed by another dt element or a dd element.
+ # A dd element's end tag may be omitted if the dd element is
+ # immediately followed by another dd element or a dt element,
+ # or if there is no more content in the parent element.
+ if type == "StartTag":
+ return next["name"] in ('dt', 'dd')
+ elif tagname == 'dd':
+ return type == "EndTag" or type is None
+ else:
+ return False
+ elif tagname == 'p':
+ # A p element's end tag may be omitted if the p element is
+ # immediately followed by an address, article, aside,
+ # blockquote, datagrid, dialog, dir, div, dl, fieldset,
+ # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
+ # nav, ol, p, pre, section, table, or ul, element, or if
+ # there is no more content in the parent element.
+ if type in ("StartTag", "EmptyTag"):
+ return next["name"] in ('address', 'article', 'aside',
+ 'blockquote', 'datagrid', 'dialog',
+ 'dir', 'div', 'dl', 'fieldset', 'footer',
+ 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+ 'header', 'hr', 'menu', 'nav', 'ol',
+ 'p', 'pre', 'section', 'table', 'ul')
+ else:
+ return type == "EndTag" or type is None
+ elif tagname == 'option':
+ # An option element's end tag may be omitted if the option
+ # element is immediately followed by another option element,
+ # or if it is immediately followed by an <code>optgroup</code>
+ # element, or if there is no more content in the parent
+ # element.
+ if type == "StartTag":
+ return next["name"] in ('option', 'optgroup')
+ else:
+ return type == "EndTag" or type is None
+ elif tagname in ('rt', 'rp'):
+ # An rt element's end tag may be omitted if the rt element is
+ # immediately followed by an rt or rp element, or if there is
+ # no more content in the parent element.
+ # An rp element's end tag may be omitted if the rp element is
+ # immediately followed by an rt or rp element, or if there is
+ # no more content in the parent element.
+ if type == "StartTag":
+ return next["name"] in ('rt', 'rp')
+ else:
+ return type == "EndTag" or type is None
+ elif tagname == 'colgroup':
+ # A colgroup element's end tag may be omitted if the colgroup
+ # element is not immediately followed by a space character or
+ # a comment.
+ if type in ("Comment", "SpaceCharacters"):
+ return False
+ elif type == "StartTag":
+ # XXX: we also look for an immediately following colgroup
+ # element. See is_optional_start.
+ return next["name"] != 'colgroup'
+ else:
+ return True
+ elif tagname in ('thead', 'tbody'):
+ # A thead element's end tag may be omitted if the thead element
+ # is immediately followed by a tbody or tfoot element.
+ # A tbody element's end tag may be omitted if the tbody element
+ # is immediately followed by a tbody or tfoot element, or if
+ # there is no more content in the parent element.
+ # A tfoot element's end tag may be omitted if the tfoot element
+ # is immediately followed by a tbody element, or if there is no
+ # more content in the parent element.
+ # XXX: we never omit the end tag when the following element is
+ # a tbody. See is_optional_start.
+ if type == "StartTag":
+ return next["name"] in ['tbody', 'tfoot']
+ elif tagname == 'tbody':
+ return type == "EndTag" or type is None
+ else:
+ return False
+ elif tagname == 'tfoot':
+ # A tfoot element's end tag may be omitted if the tfoot element
+ # is immediately followed by a tbody element, or if there is no
+ # more content in the parent element.
+ # XXX: we never omit the end tag when the following element is
+ # a tbody. See is_optional_start.
+ if type == "StartTag":
+ return next["name"] == 'tbody'
+ else:
+ return type == "EndTag" or type is None
+ elif tagname in ('td', 'th'):
+ # A td element's end tag may be omitted if the td element is
+ # immediately followed by a td or th element, or if there is
+ # no more content in the parent element.
+ # A th element's end tag may be omitted if the th element is
+ # immediately followed by a td or th element, or if there is
+ # no more content in the parent element.
+ if type == "StartTag":
+ return next["name"] in ('td', 'th')
+ else:
+ return type == "EndTag" or type is None
+ return False