diff options
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/extract.py')
-rw-r--r-- | jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/extract.py | 632 |
1 files changed, 632 insertions, 0 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/extract.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/extract.py new file mode 100644 index 0000000..7162627 --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/extract.py @@ -0,0 +1,632 @@ +# -*- coding: utf-8 -*- +""" + babel.messages.extract + ~~~~~~~~~~~~~~~~~~~~~~ + + Basic infrastructure for extracting localizable messages from source files. + + This module defines an extensible system for collecting localizable message + strings from a variety of sources. A native extractor for Python source + files is builtin, extractors for other sources can be added using very + simple plugins. + + The main entry points into the extraction functionality are the functions + `extract_from_dir` and `extract_from_file`. + + :copyright: (c) 2013 by the Babel Team. + :license: BSD, see LICENSE for more details. +""" + +import os +from os.path import relpath +import sys +from tokenize import generate_tokens, COMMENT, NAME, OP, STRING + +from babel.util import parse_encoding, pathmatch +from babel._compat import PY2, text_type +from textwrap import dedent + + +GROUP_NAME = 'babel.extractors' + +DEFAULT_KEYWORDS = { + '_': None, + 'gettext': None, + 'ngettext': (1, 2), + 'ugettext': None, + 'ungettext': (1, 2), + 'dgettext': (2,), + 'dngettext': (2, 3), + 'N_': None, + 'pgettext': ((1, 'c'), 2), + 'npgettext': ((1, 'c'), 2, 3) +} + +DEFAULT_MAPPING = [('**.py', 'python')] + +empty_msgid_warning = ( + '%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' + 'returns the header entry with meta information, not the empty string.') + + +def _strip_comment_tags(comments, tags): + """Helper function for `extract` that strips comment tags from strings + in a list of comment lines. This functions operates in-place. + """ + def _strip(line): + for tag in tags: + if line.startswith(tag): + return line[len(tag):].strip() + return line + comments[:] = map(_strip, comments) + + +def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING, + options_map=None, keywords=DEFAULT_KEYWORDS, + comment_tags=(), callback=None, strip_comment_tags=False): + """Extract messages from any source files found in the given directory. + + This function generates tuples of the form ``(filename, lineno, message, + comments, context)``. + + Which extraction method is used per file is determined by the `method_map` + parameter, which maps extended glob patterns to extraction method names. + For example, the following is the default mapping: + + >>> method_map = [ + ... ('**.py', 'python') + ... ] + + This basically says that files with the filename extension ".py" at any + level inside the directory should be processed by the "python" extraction + method. Files that don't match any of the mapping patterns are ignored. See + the documentation of the `pathmatch` function for details on the pattern + syntax. + + The following extended mapping would also use the "genshi" extraction + method on any file in "templates" subdirectory: + + >>> method_map = [ + ... ('**/templates/**.*', 'genshi'), + ... ('**.py', 'python') + ... ] + + The dictionary provided by the optional `options_map` parameter augments + these mappings. It uses extended glob patterns as keys, and the values are + dictionaries mapping options names to option values (both strings). + + The glob patterns of the `options_map` do not necessarily need to be the + same as those used in the method mapping. For example, while all files in + the ``templates`` folders in an application may be Genshi applications, the + options for those files may differ based on extension: + + >>> options_map = { + ... '**/templates/**.txt': { + ... 'template_class': 'genshi.template:TextTemplate', + ... 'encoding': 'latin-1' + ... }, + ... '**/templates/**.html': { + ... 'include_attrs': '' + ... } + ... } + + :param dirname: the path to the directory to extract messages from. If + not given the current working directory is used. + :param method_map: a list of ``(pattern, method)`` tuples that maps of + extraction method names to extended glob patterns + :param options_map: a dictionary of additional options (optional) + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of tags of translator comments to search for + and include in the results + :param callback: a function that is called for every file that message are + extracted from, just before the extraction itself is + performed; the function is passed the filename, the name + of the extraction method and and the options dictionary as + positional arguments, in that order + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. + :see: `pathmatch` + """ + if dirname is None: + dirname = os.getcwd() + if options_map is None: + options_map = {} + + absname = os.path.abspath(dirname) + for root, dirnames, filenames in os.walk(absname): + for subdir in dirnames: + if subdir.startswith('.') or subdir.startswith('_'): + dirnames.remove(subdir) + dirnames.sort() + filenames.sort() + for filename in filenames: + filepath = os.path.join(root, filename).replace(os.sep, '/') + + for message_tuple in check_and_call_extract_file( + filepath, + method_map, + options_map, + callback, + keywords, + comment_tags, + strip_comment_tags, + dirpath=absname, + ): + yield message_tuple + + +def check_and_call_extract_file(filepath, method_map, options_map, + callback, keywords, comment_tags, + strip_comment_tags, dirpath=None): + """Checks if the given file matches an extraction method mapping, and if so, calls extract_from_file. + + Note that the extraction method mappings are based relative to dirpath. + So, given an absolute path to a file `filepath`, we want to check using + just the relative path from `dirpath` to `filepath`. + + :param filepath: An absolute path to a file that exists. + :param method_map: a list of ``(pattern, method)`` tuples that maps of + extraction method names to extended glob patterns + :param options_map: a dictionary of additional options (optional) + :param callback: a function that is called for every file that message are + extracted from, just before the extraction itself is + performed; the function is passed the filename, the name + of the extraction method and and the options dictionary as + positional arguments, in that order + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of tags of translator comments to search for + and include in the results + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. + :param dirpath: the path to the directory to extract messages from. + """ + # filename is the relative path from dirpath to the actual file + filename = relpath(filepath, dirpath) + + for pattern, method in method_map: + if not pathmatch(pattern, filename): + continue + + options = {} + for opattern, odict in options_map.items(): + if pathmatch(opattern, filename): + options = odict + if callback: + callback(filename, method, options) + for message_tuple in extract_from_file( + method, filepath, + keywords=keywords, + comment_tags=comment_tags, + options=options, + strip_comment_tags=strip_comment_tags + ): + yield (filename, ) + message_tuple + + break + + +def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, + comment_tags=(), options=None, strip_comment_tags=False): + """Extract messages from a specific file. + + This function returns a list of tuples of the form ``(lineno, funcname, + message)``. + + :param filename: the path to the file to extract messages from + :param method: a string specifying the extraction method (.e.g. "python") + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of translator tags to search for and include + in the results + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. + :param options: a dictionary of additional options (optional) + """ + fileobj = open(filename, 'rb') + try: + return list(extract(method, fileobj, keywords, comment_tags, options, + strip_comment_tags)) + finally: + fileobj.close() + + +def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), + options=None, strip_comment_tags=False): + """Extract messages from the given file-like object using the specified + extraction method. + + This function returns tuples of the form ``(lineno, message, comments)``. + + The implementation dispatches the actual extraction to plugins, based on the + value of the ``method`` parameter. + + >>> source = b'''# foo module + ... def run(argv): + ... print(_('Hello, world!')) + ... ''' + + >>> from babel._compat import BytesIO + >>> for message in extract('python', BytesIO(source)): + ... print(message) + (3, u'Hello, world!', [], None) + + :param method: an extraction method (a callable), or + a string specifying the extraction method (.e.g. "python"); + if this is a simple name, the extraction function will be + looked up by entry point; if it is an explicit reference + to a function (of the form ``package.module:funcname`` or + ``package.module.funcname``), the corresponding function + will be imported and used + :param fileobj: the file-like object the messages should be extracted from + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. + :raise ValueError: if the extraction method is not registered + """ + func = None + if callable(method): + func = method + elif ':' in method or '.' in method: + if ':' not in method: + lastdot = method.rfind('.') + module, attrname = method[:lastdot], method[lastdot + 1:] + else: + module, attrname = method.split(':', 1) + func = getattr(__import__(module, {}, {}, [attrname]), attrname) + else: + try: + from pkg_resources import working_set + except ImportError: + pass + else: + for entry_point in working_set.iter_entry_points(GROUP_NAME, + method): + func = entry_point.load(require=True) + break + if func is None: + # if pkg_resources is not available or no usable egg-info was found + # (see #230), we resort to looking up the builtin extractors + # directly + builtin = { + 'ignore': extract_nothing, + 'python': extract_python, + 'javascript': extract_javascript + } + func = builtin.get(method) + + if func is None: + raise ValueError('Unknown extraction method %r' % method) + + results = func(fileobj, keywords.keys(), comment_tags, + options=options or {}) + + for lineno, funcname, messages, comments in results: + if funcname: + spec = keywords[funcname] or (1,) + else: + spec = (1,) + if not isinstance(messages, (list, tuple)): + messages = [messages] + if not messages: + continue + + # Validate the messages against the keyword's specification + context = None + msgs = [] + invalid = False + # last_index is 1 based like the keyword spec + last_index = len(messages) + for index in spec: + if isinstance(index, tuple): + context = messages[index[0] - 1] + continue + if last_index < index: + # Not enough arguments + invalid = True + break + message = messages[index - 1] + if message is None: + invalid = True + break + msgs.append(message) + if invalid: + continue + + # keyword spec indexes are 1 based, therefore '-1' + if isinstance(spec[0], tuple): + # context-aware *gettext method + first_msg_index = spec[1] - 1 + else: + first_msg_index = spec[0] - 1 + if not messages[first_msg_index]: + # An empty string msgid isn't valid, emit a warning + where = '%s:%i' % (hasattr(fileobj, 'name') and + fileobj.name or '(unknown)', lineno) + sys.stderr.write((empty_msgid_warning % where) + '\n') + continue + + messages = tuple(msgs) + if len(messages) == 1: + messages = messages[0] + + if strip_comment_tags: + _strip_comment_tags(comments, comment_tags) + yield lineno, messages, comments, context + + +def extract_nothing(fileobj, keywords, comment_tags, options): + """Pseudo extractor that does not actually extract anything, but simply + returns an empty list. + """ + return [] + + +def extract_python(fileobj, keywords, comment_tags, options): + """Extract messages from Python source code. + + It returns an iterator yielding tuples in the following form ``(lineno, + funcname, message, comments)``. + + :param fileobj: the seekable, file-like object the messages should be + extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :rtype: ``iterator`` + """ + funcname = lineno = message_lineno = None + call_stack = -1 + buf = [] + messages = [] + translator_comments = [] + in_def = in_translator_comments = False + comment_tag = None + + encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1') + + if PY2: + next_line = fileobj.readline + else: + next_line = lambda: fileobj.readline().decode(encoding) + + tokens = generate_tokens(next_line) + for tok, value, (lineno, _), _, _ in tokens: + if call_stack == -1 and tok == NAME and value in ('def', 'class'): + in_def = True + elif tok == OP and value == '(': + if in_def: + # Avoid false positives for declarations such as: + # def gettext(arg='message'): + in_def = False + continue + if funcname: + message_lineno = lineno + call_stack += 1 + elif in_def and tok == OP and value == ':': + # End of a class definition without parens + in_def = False + continue + elif call_stack == -1 and tok == COMMENT: + # Strip the comment token from the line + if PY2: + value = value.decode(encoding) + value = value[1:].strip() + if in_translator_comments and \ + translator_comments[-1][0] == lineno - 1: + # We're already inside a translator comment, continue appending + translator_comments.append((lineno, value)) + continue + # If execution reaches this point, let's see if comment line + # starts with one of the comment tags + for comment_tag in comment_tags: + if value.startswith(comment_tag): + in_translator_comments = True + translator_comments.append((lineno, value)) + break + elif funcname and call_stack == 0: + if tok == OP and value == ')': + if buf: + messages.append(''.join(buf)) + del buf[:] + else: + messages.append(None) + + if len(messages) > 1: + messages = tuple(messages) + else: + messages = messages[0] + # Comments don't apply unless they immediately preceed the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = lineno = message_lineno = None + call_stack = -1 + messages = [] + translator_comments = [] + in_translator_comments = False + elif tok == STRING: + # Unwrap quotes in a safe manner, maintaining the string's + # encoding + # https://sourceforge.net/tracker/?func=detail&atid=355470& + # aid=617979&group_id=5470 + value = eval('# coding=%s\n%s' % (str(encoding), value), + {'__builtins__': {}}, {}) + if PY2 and not isinstance(value, text_type): + value = value.decode(encoding) + buf.append(value) + elif tok == OP and value == ',': + if buf: + messages.append(''.join(buf)) + del buf[:] + else: + messages.append(None) + if translator_comments: + # We have translator comments, and since we're on a + # comma(,) user is allowed to break into a new line + # Let's increase the last comment's lineno in order + # for the comment to still be a valid one + old_lineno, old_comment = translator_comments.pop() + translator_comments.append((old_lineno + 1, old_comment)) + elif call_stack > 0 and tok == OP and value == ')': + call_stack -= 1 + elif funcname and call_stack == -1: + funcname = None + elif tok == NAME and value in keywords: + funcname = value + + +def extract_javascript(fileobj, keywords, comment_tags, options): + """Extract messages from JavaScript source code. + + :param fileobj: the seekable, file-like object the messages should be + extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + Supported options are: + * `jsx` -- set to false to disable JSX/E4X support. + * `template_string` -- set to false to disable ES6 + template string support. + """ + from babel.messages.jslexer import Token, tokenize, unquote_string + funcname = message_lineno = None + messages = [] + last_argument = None + translator_comments = [] + concatenate_next = False + encoding = options.get('encoding', 'utf-8') + last_token = None + call_stack = -1 + dotted = any('.' in kw for kw in keywords) + + for token in tokenize( + fileobj.read().decode(encoding), + jsx=options.get("jsx", True), + template_string=options.get("template_string", True), + dotted=dotted + ): + if ( # Turn keyword`foo` expressions into keyword("foo") calls: + funcname and # have a keyword... + (last_token and last_token.type == 'name') and # we've seen nothing after the keyword... + token.type == 'template_string' # this is a template string + ): + message_lineno = token.lineno + messages = [unquote_string(token.value)] + call_stack = 0 + token = Token('operator', ')', token.lineno) + + if token.type == 'operator' and token.value == '(': + if funcname: + message_lineno = token.lineno + call_stack += 1 + + elif call_stack == -1 and token.type == 'linecomment': + value = token.value[2:].strip() + if translator_comments and \ + translator_comments[-1][0] == token.lineno - 1: + translator_comments.append((token.lineno, value)) + continue + + for comment_tag in comment_tags: + if value.startswith(comment_tag): + translator_comments.append((token.lineno, value.strip())) + break + + elif token.type == 'multilinecomment': + # only one multi-line comment may preceed a translation + translator_comments = [] + value = token.value[2:-2].strip() + for comment_tag in comment_tags: + if value.startswith(comment_tag): + lines = value.splitlines() + if lines: + lines[0] = lines[0].strip() + lines[1:] = dedent('\n'.join(lines[1:])).splitlines() + for offset, line in enumerate(lines): + translator_comments.append((token.lineno + offset, + line)) + break + + elif funcname and call_stack == 0: + if token.type == 'operator' and token.value == ')': + if last_argument is not None: + messages.append(last_argument) + if len(messages) > 1: + messages = tuple(messages) + elif messages: + messages = messages[0] + else: + messages = None + + # Comments don't apply unless they immediately precede the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + if messages is not None: + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = message_lineno = last_argument = None + concatenate_next = False + translator_comments = [] + messages = [] + call_stack = -1 + + elif token.type in ('string', 'template_string'): + new_value = unquote_string(token.value) + if concatenate_next: + last_argument = (last_argument or '') + new_value + concatenate_next = False + else: + last_argument = new_value + + elif token.type == 'operator': + if token.value == ',': + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + messages.append(None) + concatenate_next = False + elif token.value == '+': + concatenate_next = True + + elif call_stack > 0 and token.type == 'operator' \ + and token.value == ')': + call_stack -= 1 + + elif funcname and call_stack == -1: + funcname = None + + elif call_stack == -1 and token.type == 'name' and \ + token.value in keywords and \ + (last_token is None or last_token.type != 'name' or + last_token.value != 'function'): + funcname = token.value + + last_token = token |