diff options
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py')
-rw-r--r-- | jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py new file mode 100644 index 0000000..aed39f3 --- /dev/null +++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +""" + babel.messages.jslexer + ~~~~~~~~~~~~~~~~~~~~~~ + + A simple JavaScript 1.5 lexer which is used for the JavaScript + extractor. + + :copyright: (c) 2013 by the Babel Team. + :license: BSD, see LICENSE for more details. +""" +from collections import namedtuple +import re +from babel._compat import unichr + +operators = sorted([ + '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', + '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', + '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', + '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' +], key=len, reverse=True) + +escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} + +name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE) +dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE) +division_re = re.compile(r'/=?') +regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*(?s)') +line_re = re.compile(r'(\r\n|\n|\r)') +line_join_re = re.compile(r'\\' + line_re.pattern) +uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') + +Token = namedtuple('Token', 'type value lineno') + +_rules = [ + (None, re.compile(r'\s+(?u)')), + (None, re.compile(r'<!--.*')), + ('linecomment', re.compile(r'//.*')), + ('multilinecomment', re.compile(r'/\*.*?\*/(?us)')), + ('dotted_name', dotted_name_re), + ('name', name_re), + ('number', re.compile(r'''(?x)( + (?:0|[1-9]\d*) + (\.\d+)? + ([eE][-+]?\d+)? | + (0x[a-fA-F0-9]+) + )''')), + ('jsx_tag', re.compile(r'<(?:/?)\w+.+?>', re.I)), # May be mangled in `get_rules` + ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))), + ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)), + ('string', re.compile(r'''(?xs)( + '(?:[^'\\]*(?:\\.[^'\\]*)*)' | + "(?:[^"\\]*(?:\\.[^"\\]*)*)" + )''')) +] + + +def get_rules(jsx, dotted, template_string): + """ + Get a tokenization rule list given the passed syntax options. + + Internal to this module. + """ + rules = [] + for token_type, rule in _rules: + if not jsx and token_type and 'jsx' in token_type: + continue + if not template_string and token_type == 'template_string': + continue + if token_type == 'dotted_name': + if not dotted: + continue + token_type = 'name' + rules.append((token_type, rule)) + return rules + + +def indicates_division(token): + """A helper function that helps the tokenizer to decide if the current + token may be followed by a division operator. + """ + if token.type == 'operator': + return token.value in (')', ']', '}', '++', '--') + return token.type in ('name', 'number', 'string', 'regexp') + + +def unquote_string(string): + """Unquote a string with JavaScript rules. The string has to start with + string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).) + """ + assert string and string[0] == string[-1] and string[0] in '"\'`', \ + 'string provided is not properly delimited' + string = line_join_re.sub('\\1', string[1:-1]) + result = [] + add = result.append + pos = 0 + + while 1: + # scan for the next escape + escape_pos = string.find('\\', pos) + if escape_pos < 0: + break + add(string[pos:escape_pos]) + + # check which character is escaped + next_char = string[escape_pos + 1] + if next_char in escapes: + add(escapes[next_char]) + + # unicode escapes. trie to consume up to four characters of + # hexadecimal characters and try to interpret them as unicode + # character point. If there is no such character point, put + # all the consumed characters into the string. + elif next_char in 'uU': + escaped = uni_escape_re.match(string, escape_pos + 2) + if escaped is not None: + escaped_value = escaped.group() + if len(escaped_value) == 4: + try: + add(unichr(int(escaped_value, 16))) + except ValueError: + pass + else: + pos = escape_pos + 6 + continue + add(next_char + escaped_value) + pos = escaped.end() + continue + else: + add(next_char) + + # bogus escape. Just remove the backslash. + else: + add(next_char) + pos = escape_pos + 2 + + if pos < len(string): + add(string[pos:]) + + return u''.join(result) + + +def tokenize(source, jsx=True, dotted=True, template_string=True): + """ + Tokenize JavaScript/JSX source. Returns a generator of tokens. + + :param jsx: Enable (limited) JSX parsing. + :param dotted: Read dotted names as single name token. + :param template_string: Support ES6 template strings + """ + may_divide = False + pos = 0 + lineno = 1 + end = len(source) + rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string) + + while pos < end: + # handle regular rules first + for token_type, rule in rules: + match = rule.match(source, pos) + if match is not None: + break + # if we don't have a match we don't give up yet, but check for + # division operators or regular expression literals, based on + # the status of `may_divide` which is determined by the last + # processed non-whitespace token using `indicates_division`. + else: + if may_divide: + match = division_re.match(source, pos) + token_type = 'operator' + else: + match = regex_re.match(source, pos) + token_type = 'regexp' + if match is None: + # woops. invalid syntax. jump one char ahead and try again. + pos += 1 + continue + + token_value = match.group() + if token_type is not None: + token = Token(token_type, token_value, lineno) + may_divide = indicates_division(token) + yield token + lineno += len(line_re.findall(token_value)) + pos = match.end() |