from __future__ import absolute_import, division, unicode_literals from gettext import gettext _ = gettext from . import _base from ..constants import cdataElements, rcdataElements, voidElements from ..constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) class LintError(Exception): pass class Filter(_base.Filter): def __iter__(self): open_elements = [] contentModelFlag = "PCDATA" for token in _base.Filter.__iter__(self): type = token["type"] if type in ("StartTag", "EmptyTag"): name = token["name"] if contentModelFlag != "PCDATA": raise LintError(_("StartTag not in PCDATA content model flag: %s") % name) if not isinstance(name, str): raise LintError(_("Tag name is not a string: %r") % name) if not name: raise LintError(_("Empty tag name")) if type == "StartTag" and name in voidElements: raise LintError(_("Void element reported as StartTag token: %s") % name) elif type == "EmptyTag" and name not in voidElements: raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"]) if type == "StartTag": open_elements.append(name) for name, value in token["data"]: if not isinstance(name, str): raise LintError(_("Attribute name is not a string: %r") % name) if not name: raise LintError(_("Empty attribute name")) if not isinstance(value, str): raise LintError(_("Attribute value is not a string: %r") % value) if name in cdataElements: contentModelFlag = "CDATA" elif name in rcdataElements: contentModelFlag = "RCDATA" elif name == "plaintext": contentModelFlag = "PLAINTEXT" elif type == "EndTag": name = token["name"] if not isinstance(name, str): raise LintError(_("Tag name is not a string: %r") % name) if not name: raise LintError(_("Empty tag name")) if name in voidElements: raise LintError(_("Void element reported as EndTag token: %s") % name) start_name = open_elements.pop() if start_name != name: raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name)) contentModelFlag = "PCDATA" elif type == "Comment": if contentModelFlag != "PCDATA": raise LintError(_("Comment not in PCDATA content model flag")) elif type in ("Characters", "SpaceCharacters"): data = token["data"] if not isinstance(data, str): raise LintError(_("Attribute name is not a string: %r") % data) if not data: raise LintError(_("%s token with empty data") % type) if type == "SpaceCharacters": data = data.strip(spaceCharacters) if data: raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data) elif type == "Doctype": name = token["name"] if contentModelFlag != "PCDATA": raise LintError(_("Doctype not in PCDATA content model flag: %s") % name) if not isinstance(name, str): raise LintError(_("Tag name is not a string: %r") % name) # XXX: what to do with token["data"] ? elif type in ("ParseError", "SerializeError"): pass else: raise LintError(_("Unknown token type: %s") % type) yield token